-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparallelFast.py
More file actions
112 lines (96 loc) · 2.63 KB
/
parallelFast.py
File metadata and controls
112 lines (96 loc) · 2.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
'''
This is a python implementation of FastCDC.
Author: Kanishk Tantia, Jonathan Cruz
Date: June 7th, 2016
Pseudocode:
Input: data buffer, src; buffer length, n
Output: chunking breakpoint i
Macro Defined: Mask <-- 0x7
Macro Defined: MinSize <-- 2KB; MaxSize <-- 64KB;
fp <-- 0; i <-- MinSize; NormalSize <-- 8KB;
if n <= MinSize then
return n;
if n >= MaxSize then
n <-- MaxSize;
else if n <= NormalSize then
n <-- NormalSize;
for ; i < n; i++; do
fp = (fp << 1) + Gear[src[i]];
if ! (fp & Mask) then
return i; // if the masked bits are all '0'
return i;
'''
import sys
MIN_SIZE = 8
MAX_SIZE = 20
I = 0
HASH = 0
GEAR = []
import random
def gear_gen():
global GEAR
x = []
for i in range(0, 256):
y = random.getrandbits(64)
x.append(y)
target = open("randoms", 'w')
GEAR = x
for item in x:
target.write("%s\n" % item)
def blockbreak(LBA):
'''Takes a single block as string input. Converts it to a 40 bit binary number.
Uses GEAR to do a lookup on each of the 8 bits and adds that to a running total.
The running total is then checked to see if it ends with "000".
'''
global HASH
binLBA = '{0:040b}'.format(int(LBA)) #Converts integer to 40 bit binary
n = 8
splitLBA = [binLBA[i:i+n] for i in range(0, len(binLBA), n)] #Splits binary every 8 bits and adds to list
for binary in splitLBA:
HASH <<= 1
HASH += int(GEAR[int(binary, 2)]) #Converts each 8 bit binary into integer and adds it to HASH
def parallelcdc(a0, a1):
def main(src):
'''
Input: data buffer src
'''
global I
global GEAR
global HASH
try:
target = open("randoms", 'r')
GEAR = target.readlines()
target.close()
except:
gear_gen()
target = open(src, 'r')
LBAlist = target.readlines()
breakindices = []
for num in range(len(LBAlist)):
blockbreak(LBAlist[num])
I += 1
if I <= MIN_SIZE:
if num == len(LBAlist)-1:
breakindices.append(num)
break
continue
elif I >= MAX_SIZE:
breakindices.append(num)
I = 0
HASH = 0
continue
#Checks last three bits of hash
elif HASH & 0x7 == 0:
breakindices.append(num)
I = 0
HASH = 0
outputfile = open("FastOutput", 'w')
for i in breakindices:
outputfile.write("%s\n" % i)
outputfile.close()
if __name__ == '__main__':
if len(sys.argv) == 2:
src = sys.argv[1] # data buffer
main(src)
else:
print("Usage: fastcdc.py <databuffer>")