11import os
2+ import threading
23
3- from pywaybackup .db import Database
4+ from pywaybackup .db import Database , select , update , waybackup_snapshots
45from pywaybackup .helper import url_split
56
67
78class Snapshot :
89 """
9- If a relevant property of the snapshot is modified, the change will be pushed to the database .
10+ Represents a single snapshot entry and manages its state and persistence .
1011
11- - _redirect_url
12- - _redirect_timestamp
13- - _response
14- - _file
12+ When a relevant property of the snapshot is modified, the change is automatically
13+ pushed to the database:
14+ - redirect_url
15+ - redirect_timestamp
16+ - response_status
17+ - file
18+
19+ Thread-safe for SQLite operations using a lock.
1520 """
1621
22+ __sqlite_lock = threading .Lock ()
23+
1724 def __init__ (self , db : Database , output : str , mode : str ):
25+ """
26+ Initialize a Snapshot instance and fetch its database row if available.
27+
28+ Args:
29+ db (Database): Database connection/session manager.
30+ output (str): Output directory for downloaded files.
31+ mode (str): Download mode ('first', 'last', or default).
32+ """
1833 self ._db = db
1934 self .output = output
2035 self .mode = mode
@@ -26,52 +41,78 @@ def __init__(self, db: Database, output: str, mode: str):
2641
2742 self ._row = self .fetch ()
2843 if self ._row :
29- self .counter = self ._row ["counter" ]
30- self .timestamp = self ._row ["timestamp" ]
31- self .url_archive = self ._row ["url_archive" ]
32- self .url_origin = self ._row ["url_origin" ]
33- self .redirect_url = self ._row ["redirect_url" ]
34- self .redirect_timestamp = self ._row ["redirect_timestamp" ]
35- self .response_status = self ._row ["response" ]
36- self .file = self ._row ["file" ]
44+ self .scid = self ._row .scid
45+ self .counter = self ._row .counter
46+ self .timestamp = self ._row .timestamp
47+ self .url_archive = self ._row .url_archive
48+ self .url_origin = self ._row .url_origin
49+ self .redirect_url = self ._row .redirect_url
50+ self .redirect_timestamp = self ._row .redirect_timestamp
51+ self .response_status = self ._row .response
52+ self .file = self ._row .file
3753 else :
3854 self .counter = False
3955
4056 def fetch (self ):
4157 """
42- Get a snapshot-row from the snapshot table with response NULL. (not processed)
58+ Fetch a snapshot row from the database with response=NULL (not processed).
59+ Uses row locking to prevent concurrent workers from processing the same row.
60+
61+ Returns:
62+ waybackup_snapshots or None: The next unprocessed snapshot row, or None if none available.
4363 """
4464 # mark as locked for other workers // only visual because get_snapshot fetches by NULL
45- self ._db .cursor .execute (
46- """
47- UPDATE snapshot_tbl
48- SET response = 'LOCK'
49- WHERE rowid = (
50- SELECT rowid FROM snapshot_tbl
51- WHERE response IS NULL
52- LIMIT 1
53- )
54- RETURNING rowid, *;
55- """
56- )
57- row = self ._db .cursor .fetchone ()
58- self ._db .conn .commit ()
59- return row
65+ # prevent another worker from fetching between LOCK-update (for sqlite by threading.Lock, else lock row)
66+
67+ def __on_sqlite ():
68+ if self ._db .session .bind .dialect .name == "sqlite" :
69+ return True
70+ return False
71+
72+ def __get_row ():
73+ with self ._db .session .begin ():
74+ row = self ._db .session .execute (
75+ select (waybackup_snapshots )
76+ .where (waybackup_snapshots .response .is_ (None ))
77+ .order_by (waybackup_snapshots .scid )
78+ .limit (1 )
79+ .with_for_update (skip_locked = True )
80+ ).scalar_one_or_none ()
81+
82+ if row is None :
83+ return None
84+
85+ row .response = "LOCK"
86+
87+ return row
88+
89+ if __on_sqlite ():
90+ with self .__sqlite_lock :
91+ return __get_row ()
92+ else :
93+ return __get_row ()
6094
6195 def modify (self , column , value ):
6296 """
63- Modify the snapshot in the database.
97+ Update a column value for this snapshot in the database.
98+
99+ Args:
100+ column (str): Name of the column to update.
101+ value: New value to set for the column.
64102 """
65- query = f"UPDATE snapshot_tbl SET { column } = ? WHERE counter = ?"
66- self ._db .cursor .execute (query , ( value , self .counter ))
67- self ._db .conn .commit ()
103+ column = getattr ( waybackup_snapshots , column )
104+ self ._db .session .execute (update ( waybackup_snapshots ). where ( waybackup_snapshots . scid == self .scid ). values ({ column : value } ))
105+ self ._db .session .commit ()
68106
69107 def create_output (self ):
70108 """
71- Create a file path for the snapshot.
109+ Generate the file path for the snapshot download .
72110
73- - If MODE_LAST or MODE_FIRST is enabled, the path does not include the timestamp.
74- - Otherwise, include the timestamp in the path.
111+ If mode is 'first' or 'last', the path does not include the timestamp.
112+ Otherwise, the timestamp is included in the path.
113+
114+ Returns:
115+ str: Absolute path to the output file for the snapshot.
75116 """
76117 domain , subdir , filename = url_split (self .url_archive .split ("id_/" )[1 ], index = True )
77118
@@ -86,43 +127,79 @@ def create_output(self):
86127
87128 @property
88129 def redirect_url (self ):
130+ """
131+ str: The redirect URL for this snapshot, if any.
132+ """
89133 return self ._redirect_url
90134
91135 @redirect_url .setter
92136 def redirect_url (self , value ):
137+ """
138+ Set the redirect URL and update the database.
139+
140+ Args:
141+ value (str): The new redirect URL.
142+ """
93143 if self .redirect_timestamp is None and value is None :
94144 return
95145 self ._redirect_url = value
96146 self .modify (column = "redirect_url" , value = value )
97147
98148 @property
99149 def redirect_timestamp (self ):
150+ """
151+ str: The timestamp of the redirect, if any.
152+ """
100153 return self ._redirect_timestamp
101154
102155 @redirect_timestamp .setter
103156 def redirect_timestamp (self , value ):
157+ """
158+ Set the redirect timestamp and update the database.
159+
160+ Args:
161+ value (str): The new redirect timestamp.
162+ """
104163 if self .redirect_url is None and value is None :
105164 return
106165 self ._redirect_timestamp = value
107166 self .modify (column = "redirect_timestamp" , value = value )
108167
109168 @property
110169 def response_status (self ):
170+ """
171+ str: The HTTP response/status for this snapshot.
172+ """
111173 return self ._response_status
112174
113175 @response_status .setter
114176 def response_status (self , value ):
177+ """
178+ Set the response status and update the database.
179+
180+ Args:
181+ value (str): The new response status.
182+ """
115183 if self .response_status is None and value is None :
116184 return
117185 self ._response_status = value
118186 self .modify (column = "response" , value = value )
119187
120188 @property
121189 def file (self ):
190+ """
191+ str: The file path for the downloaded snapshot.
192+ """
122193 return self ._file
123194
124195 @file .setter
125196 def file (self , value ):
197+ """
198+ Set the file path and update the database.
199+
200+ Args:
201+ value (str): The new file path.
202+ """
126203 if self .file is None and value is None :
127204 return
128205 self ._file = value
0 commit comments