-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathclean_filename.py
More file actions
22 lines (16 loc) · 883 Bytes
/
clean_filename.py
File metadata and controls
22 lines (16 loc) · 883 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# From https://gist.github.com/wassname/1393c4a57cfcbf03641dbc31886123b8
import unicodedata
import string
valid_filename_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
char_limit = 255
def convert(filename, whitelist=valid_filename_chars, replace=''):
# replace arbitrary things with underscore (default nuffin')
for r in replace:
filename = filename.replace(r,'_')
# keep only valid ascii chars
cleaned_filename = unicodedata.normalize('NFKD', filename).encode('ASCII', 'ignore').decode()
# keep only whitelisted chars
cleaned_filename = ''.join(c for c in cleaned_filename if c in whitelist)
if len(cleaned_filename)>char_limit:
print("Warning, filename truncated because it was over {}. Filenames may no longer be unique".format(char_limit))
return cleaned_filename[:char_limit]