Skip to content

Commit 45e3012

Browse files
Added ShardArchiver.py utility:
ShardArchiver is a command line utility that creates lz4 compressed tar archives from complete shards and optionally transfers them to a remote server.
1 parent bd558b9 commit 45e3012

1 file changed

Lines changed: 150 additions & 0 deletions

File tree

python/ShardArchiver.py

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
#!/usr/bin/env python
2+
3+
# Copyright (c) 2018 Ripple Labs Inc.
4+
#
5+
# Permission to use, copy, modify, and/or distribute this software for any
6+
# purpose with or without fee is hereby granted, provided that the above
7+
# copyright notice and this permission notice appear in all copies.
8+
#
9+
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10+
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11+
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12+
# ANY SPECIAL , DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13+
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14+
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15+
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16+
17+
# Creates lz4 compressed tar archives from complete
18+
# shards and optinally SCP transfers to a destination
19+
20+
import lz4.frame
21+
import os
22+
import subprocess
23+
import sys
24+
import tarfile
25+
26+
from os import listdir, remove
27+
from os.path import basename, isdir, isfile, join
28+
29+
30+
def validate_args():
31+
'''Validate command line arguments'''
32+
33+
usage = ("usage: shard_archiver <shards_directory> <output_directory>"
34+
" [<indenty_file> <username> <host> <host_directory>]\n"
35+
"example: shard_archiver.py /db/shards . /.ssh/id_dsa "
36+
"username domain.com \\\\home\\\\archives\n")
37+
38+
arg_count = len(sys.argv) - 1
39+
if arg_count != 2 and arg_count != 6:
40+
print('Invalid number of arguments.\n')
41+
print(usage)
42+
sys.exit(1)
43+
44+
# Sanity check the shards DB path
45+
if not isdir(sys.argv[1]):
46+
print('Invalid shards directory.\n')
47+
print(usage)
48+
sys.exit(1)
49+
50+
# Sanity check the output path
51+
if not isdir(sys.argv[2]):
52+
print('Invalid output directory.\n')
53+
print(usage)
54+
sys.exit(1)
55+
56+
if arg_count > 3:
57+
# Sanity check the identity file
58+
if not isfile(sys.argv[3]):
59+
print('Invalid identity file.\n')
60+
print(usage)
61+
sys.exit(1)
62+
63+
64+
def read_chunk(file_object):
65+
while True:
66+
data = file_object.read(16384)
67+
if not data:
68+
break
69+
yield data
70+
71+
72+
def create_lz4(source_file, output_path):
73+
src = open(source_file, mode='rb')
74+
with lz4.frame.open(output_path,
75+
mode='wb',
76+
block_size=lz4.frame.BLOCKSIZE_MAX1MB,
77+
compression_level=lz4.frame.COMPRESSIONLEVEL_MAX,
78+
content_checksum=True) as f:
79+
for piece in read_chunk(src):
80+
f.write(piece)
81+
82+
83+
def process(args):
84+
'''Process shard directory'''
85+
86+
shard_indexes = [d for d in listdir(args[1]) if isdir(
87+
join(args[1], d)) and d.isdigit()]
88+
89+
for shard_index in shard_indexes:
90+
tar_path = join(args[2], shard_index + '.tar')
91+
lz4_path = join(args[2], shard_index + '.tar.lz4')
92+
93+
host = None
94+
dst_path = None
95+
# If host specified, check if the archive exists on it
96+
if len(args) > 3:
97+
host = args[4] + '@' + args[5]
98+
dst_path = join(args[6], shard_index +
99+
'.tar.lz4').replace('\\', '/')
100+
if subprocess.call(['ssh', '-i', args[3], host,
101+
'test -e ' + dst_path]) == 0:
102+
continue
103+
# Otherwise check if it exists locally
104+
elif isfile(lz4_path):
105+
continue
106+
107+
shard_dir = join(args[1], shard_index)
108+
109+
# A NuDB complete shard directory
110+
# should have a maximum of three files
111+
if len([name for name in listdir(shard_dir)
112+
if isfile(join(shard_dir, name))]) > 3:
113+
continue
114+
115+
# If a control file is present
116+
# the shard is not complete
117+
if isfile(join(shard_dir, 'control.txt')):
118+
continue
119+
120+
# Verify the data file is present
121+
if not isfile(join(shard_dir, 'nudb.dat')):
122+
continue
123+
124+
# Verify the key file is present
125+
if not isfile(join(shard_dir, 'nudb.key')):
126+
continue
127+
128+
# Create tar file containing shard directory
129+
if isfile(tar_path):
130+
remove(tar_path)
131+
with tarfile.open(tar_path, "w") as tar:
132+
tar.add(shard_dir, arcname=basename(shard_dir))
133+
134+
# Compress the tar file
135+
create_lz4(tar_path, lz4_path)
136+
remove(tar_path)
137+
138+
# If host specified, transfer the archive to it
139+
if host:
140+
try:
141+
subprocess.check_call(['scp', '-i', args[3], lz4_path,
142+
"%s:%s" % (host, dst_path)])
143+
remove(lz4_path)
144+
except:
145+
print('SCP to host failed')
146+
147+
148+
if __name__ == "__main__":
149+
validate_args()
150+
process(sys.argv)

0 commit comments

Comments
 (0)