-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathxml2strings.py
More file actions
66 lines (54 loc) · 2.16 KB
/
xml2strings.py
File metadata and controls
66 lines (54 loc) · 2.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
"""
Looks for all files names strings.xml in the directory.
Reads file and searches for name attribute in the xml.
Stores all the name attribute values in .txt file for each apk
"""
import subprocess
import re
from xml.etree import ElementTree
def extract_name_attribute_from_xml(xml_string):
tree_root = ElementTree.fromstring(xml_string)
strings_in_file = []
for element in tree_root.iter('string'):
strings_in_file.append(element.attrib.get('name'))
return strings_in_file
# output_path = 'extracted_features/xml2txt/'
def extract_strings_from_xml_in_path(output_path):
cmd_out: bytes = subprocess.run(
'find /home/cyberian/PycharmProjects/androidFeatureExtraction/apks/decompiled/ -type f -name "strings.xml"',
shell=True,
capture_output=True,
text=True,
check=True).stdout.split('\n')
apk_names_list = []
for index, file_path in enumerate(cmd_out):
if len(file_path) > 0:
pattern = "\/\w+\/\w+\/\w+\/\w+\/\w+\/\w+\/\w+\/\w+"
file_name = re.search(pattern, file_path).group(0)
apk_names_list.append(file_name)
else:
print("Probably the end of list!")
unique_file_names = set(apk_names_list)
for index, apk in enumerate(unique_file_names):
name = re.search('(\w+)$|(\w)+(.apk)$', apk).group(0)
print(index, name)
unique_strings_in_apk = set()
for file_path in cmd_out:
if apk in file_path:
# print(file_path)
# Write the list to file named with regex
with open(file_path, 'r') as strings_xml:
data = strings_xml.read()
strings = extract_name_attribute_from_xml(data)
unique_strings_in_apk.update(strings)
cmd_out.remove(file_path)
print(unique_strings_in_apk)
# print(apk)
if 'Benign' in apk:
tag = 'benign_'
if 'Malicious' in apk:
tag = 'malicious_'
with open(output_path + tag + name + '.txt', 'a+') as f:
for line in unique_strings_in_apk:
f.write(line)
f.write('\n')