1-
21import sys
3- import os
42import argparse
53
64from argparse import RawTextHelpFormatter
75
86from importlib .metadata import version
97
10- from pywaybackup .helper import url_split , sanitize_filename
118
129class Arguments :
13-
1410 def __init__ (self ):
1511 parser = argparse .ArgumentParser (
1612 description = f"<<< python-wayback-machine-downloader v{ version ('pywaybackup' )} >>>\n by @bitdruid -> https://github.com/bitdruid" ,
1713 formatter_class = RawTextHelpFormatter ,
1814 )
1915
20- required = parser .add_argument_group (' required (one exclusive)' )
21- required .add_argument ('-u' , ' --url' , type = str , metavar = "" , help = ' url (with subdir/subdomain) to download' )
16+ required = parser .add_argument_group (" required (one exclusive)" )
17+ required .add_argument ("-u" , " --url" , type = str , metavar = "" , help = " url (with subdir/subdomain) to download" )
2218 exclusive_required = required .add_mutually_exclusive_group (required = True )
23- exclusive_required .add_argument ('-a' , '--all' , action = 'store_true' , help = 'download snapshots of all timestamps' )
24- exclusive_required .add_argument ('-l' , '--last' , action = 'store_true' , help = 'download the last version of each file snapshot' )
25- exclusive_required .add_argument ('-f' , '--first' , action = 'store_true' , help = 'download the first version of each file snapshot' )
26- exclusive_required .add_argument ('-s' , '--save' , action = 'store_true' , help = 'save a page to the wayback machine' )
27-
28- optional = parser .add_argument_group ('optional query parameters' )
29- optional .add_argument ('-e' , '--explicit' , action = 'store_true' , help = 'search only for the explicit given url' )
30- optional .add_argument ('-r' , '--range' , type = int , metavar = "" , help = 'range in years to search' )
31- optional .add_argument ('--start' , type = int , metavar = "" , help = 'start timestamp format: YYYYMMDDhhmmss' )
32- optional .add_argument ('--end' , type = int , metavar = "" , help = 'end timestamp format: YYYYMMDDhhmmss' )
33- optional .add_argument ('--limit' , type = int , nargs = '?' , const = True , metavar = 'int' , help = 'limit the number of snapshots to download' )
34- optional .add_argument ('--filetype' , type = str , metavar = "" , help = 'filetypes to download comma separated (js,css,...)' )
35- optional .add_argument ('--statuscode' , type = str , metavar = "" , help = 'statuscodes to download comma separated (200,404,...)' )
36-
37- behavior = parser .add_argument_group ('manipulate behavior' )
38- behavior .add_argument ('-o' , '--output' , type = str , metavar = "" , help = 'output for all files - defaults to current directory' )
39- behavior .add_argument ('-m' , '--metadata' , type = str , metavar = "" , help = 'change directory for db/cdx/csv/log files' )
40- behavior .add_argument ('-v' , '--verbose' , action = 'store_true' , help = 'overwritten by progress - gives detailed output' )
41- behavior .add_argument ('--log' , action = 'store_true' , help = 'save a log file into the output folder' )
42- behavior .add_argument ('--progress' , action = 'store_true' , help = 'show a progress bar' )
43- behavior .add_argument ('--no-redirect' , action = 'store_true' , help = 'do not follow redirects by archive.org' )
44- behavior .add_argument ('--retry' , type = int , default = 0 , metavar = "" , help = 'retry failed downloads (opt tries as int, else infinite)' )
45- behavior .add_argument ('--workers' , type = int , default = 1 , metavar = "" , help = 'number of workers (simultaneous downloads)' )
46- # behavior.add_argument('--convert-links', action='store_true', help='Convert all links in the files to local paths. Requires -c/--current')
47- behavior .add_argument ('--delay' , type = int , default = 0 , metavar = "" , help = 'delay between each download in seconds' )
19+ exclusive_required .add_argument ("-a" , "--all" , action = "store_true" , help = "download snapshots of all timestamps" )
20+ exclusive_required .add_argument ("-l" , "--last" , action = "store_true" , help = "download the last version of each file snapshot" )
21+ exclusive_required .add_argument ("-f" , "--first" , action = "store_true" , help = "download the first version of each file snapshot" )
22+ exclusive_required .add_argument ("-s" , "--save" , action = "store_true" , help = "save a page to the wayback machine" )
23+
24+ optional = parser .add_argument_group ("optional query parameters" )
25+ optional .add_argument ("-e" , "--explicit" , action = "store_true" , help = "search only for the explicit given url" )
26+ optional .add_argument ("-r" , "--range" , type = int , metavar = "" , help = "range in years to search" )
27+ optional .add_argument ("--start" , type = int , metavar = "" , help = "start timestamp format: YYYYMMDDhhmmss" )
28+ optional .add_argument ("--end" , type = int , metavar = "" , help = "end timestamp format: YYYYMMDDhhmmss" )
29+ optional .add_argument ("--limit" , type = int , nargs = "?" , const = True , metavar = "int" , help = "limit the number of snapshots to download" )
30+ optional .add_argument ("--filetype" , type = str , metavar = "" , help = "filetypes to download comma separated (js,css,...)" )
31+ optional .add_argument ("--statuscode" , type = str , metavar = "" , help = "statuscodes to download comma separated (200,404,...)" )
32+
33+ behavior = parser .add_argument_group ("manipulate behavior" )
34+ behavior .add_argument ("-o" , "--output" , type = str , metavar = "" , help = "output for all files - defaults to current directory" )
35+ behavior .add_argument ("-m" , "--metadata" , type = str , metavar = "" , help = "change directory for db/cdx/csv/log files" )
36+ behavior .add_argument ("-v" , "--verbose" , action = "store_true" , help = "overwritten by progress - gives detailed output" )
37+ behavior .add_argument ("--log" , action = "store_true" , help = "save a log file into the output folder" )
38+ behavior .add_argument ("--progress" , action = "store_true" , help = "show a progress bar" )
39+ behavior .add_argument ("--no-redirect" , action = "store_true" , help = "do not follow redirects by archive.org" )
40+ behavior .add_argument ("--retry" , type = int , default = 0 , metavar = "" , help = "retry failed downloads (opt tries as int, else infinite)" )
41+ behavior .add_argument ("--workers" , type = int , default = 1 , metavar = "" , help = "number of workers (simultaneous downloads)" )
42+ behavior .add_argument ("--delay" , type = int , default = 0 , metavar = "" , help = "delay between each download in seconds" )
43+
44+ special = parser .add_argument_group ("special" )
45+ special .add_argument ("--reset" , action = "store_true" , help = "reset the job and ignore existing cdx/db/csv files" )
46+ special .add_argument ("--keep" , action = "store_true" , help = "keep all files after the job finished" )
47+
48+ args = parser .parse_args (args = None if sys .argv [1 :] else ["--help" ]) # if no arguments are given, print help
49+
50+ args .silent = False
51+ args .debug = True
4852
49- special = parser .add_argument_group ('special' )
50- special .add_argument ('--reset' , action = 'store_true' , help = 'reset the job and ignore existing cdx/db/csv files' )
51- special .add_argument ('--keep' , action = 'store_true' , help = 'keep all files after the job finished' )
52-
53- args = parser .parse_args (args = None if sys .argv [1 :] else ['--help' ]) # if no arguments are given, print help
54-
55- required_args = {action .dest : getattr (args , action .dest ) for action in exclusive_required ._group_actions }
56- optional_args = {action .dest : getattr (args , action .dest ) for action in optional ._group_actions }
57- args .query_identifier = str (args .url ) + str (required_args ) + str (optional_args )
58-
59- # if args.convert_links and not args.current:
60- # parser.error("--convert-links can only be used with the -c/--current option")
61-
6253 self .args = args
63-
64- def get_args (self ):
65- return self .args
66-
67- class Configuration :
68-
69- # def __init__(self):
70- # self.args = Arguments().get_args()
71- # for key, value in vars(self.args).items():
72- # setattr(Configuration, key, value)
73-
74- # self.set_config()
75-
76- # def set_config(self):
77- # # args now attributes of Configuration // Configuration.output, ...
78- # self.command = ' '.join(sys.argv[1:])
79- # self.domain, self.subdir, self.filename = url_split(self.url)
80-
81- # if self.output is None:
82- # self.output = os.path.join(os.getcwd(), "waybackup_snapshots")
83- # if self.metadata is None:
84- # self.metadata = self.output
85- # os.makedirs(self.output, exist_ok=True) if not self.save else None
86- # os.makedirs(self.metadata, exist_ok=True) if not self.save else None
87-
88- # if self.all:
89- # self.mode = "all"
90- # if self.last:
91- # self.mode = "last"
92- # if self.first:
93- # self.mode = "first"
94- # if self.save:
95- # self.mode = "save"
96-
97- # if self.filetype:
98- # self.filetype = [f.lower().strip() for f in self.filetype.split(",")]
99- # if self.statuscode:
100- # self.statuscode = [s.lower().strip() for s in self.statuscode.split(",")]
101-
102- # base_path = self.metadata
103- # base_name = f"waybackup_{sanitize_filename(self.url)}"
104- # self.cdxfile = os.path.join(base_path, f"{base_name}.cdx")
105- # self.dbfile = os.path.join(base_path, f"{base_name}.db")
106- # self.csvfile = os.path.join(base_path, f"{base_name}.csv")
107- # self.log = os.path.join(base_path, f"{base_name}.log") if self.log else None
108-
109- # if self.reset:
110- # os.remove(self.cdxfile) if os.path.isfile(self.cdxfile) else None
111- # os.remove(self.dbfile) if os.path.isfile(self.dbfile) else None
112- # os.remove(self.csvfile) if os.path.isfile(self.csvfile) else None
113-
114-
115- @classmethod
116- def init (cls ):
117-
118- cls .args = Arguments ().get_args ()
119- for key , value in vars (cls .args ).items ():
120- setattr (Configuration , key , value )
121-
122- # args now attributes of Configuration // Configuration.output, ...
123- cls .command = ' ' .join (sys .argv [1 :])
124- cls .domain , cls .subdir , cls .filename = url_split (cls .url )
125-
126- if cls .output is None :
127- cls .output = os .path .join (os .getcwd (), "waybackup_snapshots" )
128- if cls .metadata is None :
129- cls .metadata = cls .output
130- os .makedirs (cls .output , exist_ok = True ) if not cls .save else None
131- os .makedirs (cls .metadata , exist_ok = True ) if not cls .save else None
132-
133- if cls .all :
134- cls .mode = "all"
135- if cls .last :
136- cls .mode = "last"
137- if cls .first :
138- cls .mode = "first"
139- if cls .save :
140- cls .mode = "save"
141-
142- if cls .filetype :
143- cls .filetype = [f .lower ().strip () for f in cls .filetype .split ("," )]
144- if cls .statuscode :
145- cls .statuscode = [s .lower ().strip () for s in cls .statuscode .split ("," )]
14654
147- base_path = cls .metadata
148- base_name = f"waybackup_{ sanitize_filename (cls .url )} "
149- cls .cdxfile = os .path .join (base_path , f"{ base_name } .cdx" )
150- cls .dbfile = os .path .join (base_path , f"{ base_name } .db" )
151- cls .csvfile = os .path .join (base_path , f"{ base_name } .csv" )
152- cls .log = os .path .join (base_path , f"{ base_name } .log" ) if cls .log else None
153-
154- if cls .reset :
155- os .remove (cls .cdxfile ) if os .path .isfile (cls .cdxfile ) else None
156- os .remove (cls .dbfile ) if os .path .isfile (cls .dbfile ) else None
157- os .remove (cls .csvfile ) if os .path .isfile (cls .csvfile ) else None
55+ def get_args (self ) -> dict :
56+ """Returns the parsed arguments as a dictionary."""
57+ return vars (self .args )
0 commit comments