diff --git a/README.md b/README.md index fe5743b..914ace5 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ There are a few assumptions made in this code. 2) It is assumed that the hostname is universally unique and constant. -3) The [embedded software](https://github.com/Project-CETI/whale-tag-embedded/tree/main/packages/ceti-tag-set-hostname) for the whale tags actually sets the hostname that way. +3) The [embedded software](https://github.com/Project-CETI/whale-tag-embedded/blob/c0dcc18b6fca2b4d0e44fc6c8621816daf85554d/overlay/usr/lib/raspi-config/init_resize.sh#L149) for the whale tags actually sets the hostname that way. 4) Whale tags are mechanically isolated to withstand high pressures, so we assume LAN is a WiFi. diff --git a/ceti/whaletag.py b/ceti/whaletag.py index bd00b55..72c24a8 100644 --- a/ceti/whaletag.py +++ b/ceti/whaletag.py @@ -127,39 +127,51 @@ def create_filelist_to_download(hostname): hostname, username=DEFAULT_USERNAME, password=DEFAULT_PASSWORD) + sftp = ssh.open_sftp() # Prepare the local storage to accept the files local_data_folder = os.path.join(LOCAL_DATA_PATH, hostname) if not os.path.exists(local_data_folder): os.makedirs(local_data_folder) - local_files = os.listdir(local_data_folder) - # Check what files are available for download from the tag - # Ignores any folders in tag - remote_data_folder = os.path.normpath("/data") - _, stdout, _ = ssh.exec_command("ls -p " + remote_data_folder + "| grep -v /") + # Find all files recursively in /data, excluding swap and lost+found + remote_data_folder = "/data" + find_command = ( + f"find {remote_data_folder} -type f " + f"-not -path '*/swap/*' " + f"-not -path '*/lost+found/*'" + ) + _, stdout, stderr = ssh.exec_command(find_command) remote_files = stdout.readlines() - # Create the list of files to download - for fname in remote_files: - fname = fname.strip() - if (fname not in local_files): - files_to_download.append( - os.path.join(remote_data_folder, fname)) + # Process each remote file + for remote_path in remote_files: + remote_path = remote_path.strip() + if not remote_path: + continue + + # Get relative path from /data/ (e.g., "logs/syslog.log" or "audio.raw") + relative_path = os.path.relpath(remote_path, remote_data_folder) + local_path = os.path.join(local_data_folder, relative_path) + + # Check if file already exists locally + if not os.path.exists(local_path): + files_to_download.append(remote_path) continue - # Here: the file with this name is already present. - # Compare its hash to the local file. - # If different, lets re-download that file again. - local_sha = sha256sum(os.path.join(local_data_folder, fname)) - _, stdout, _ = ssh.exec_command( - "sha256sum " + os.path.join(remote_data_folder, fname)) - remote_sha = stdout.read().decode("utf-8").split(" ")[0] + # File exists - compare hash to see if it needs re-downloading + try: + local_sha = sha256sum(local_path) + _, stdout, _ = ssh.exec_command(f"sha256sum {remote_path}") + remote_sha = stdout.read().decode("utf-8").split(" ")[0] - if (local_sha != remote_sha): - files_to_download.append( - os.path.join(remote_data_folder, fname)) + if local_sha != remote_sha: + files_to_download.append(remote_path) + except: + # If hash comparison fails, download to be safe + files_to_download.append(remote_path) + sftp.close() finally: ssh.close() return files_to_download @@ -184,10 +196,20 @@ def stop_capture_service(hostname): # Download a file over sftp def download_remote_file(hostname, remote_file): - local_file = os.path.join(LOCAL_DATA_PATH, hostname) - local_file = os.path.join(local_file, os.path.basename(remote_file)) + # Get relative path from /data/ to preserve directory structure + relative_path = os.path.relpath(remote_file, "/data") + + # Build local path maintaining directory structure + local_folder = os.path.join(LOCAL_DATA_PATH, hostname) + local_file = os.path.join(local_folder, relative_path) + + # Create subdirectories if needed + local_dir = os.path.dirname(local_file) + if not os.path.exists(local_dir): + os.makedirs(local_dir, exist_ok=True) + try: - print("Downloading " + remote_file) + print("Downloading " + relative_path) ssh = paramiko.SSHClient() ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) ssh.connect(