diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ed23b19 --- /dev/null +++ b/.gitignore @@ -0,0 +1,223 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# Virtual environments +venv/ +env/ +ENV/ +env.bak/ +venv.bak/ + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# OS +.DS_Store +.DS_Store? +._* +.Spotlight-V100 +.Trashes +ehthumbs.db +Thumbs.db +.codebuddy +# Logs +*.log +logs/ + +# Database +*.db +*.sqlite +*.sqlite3 + +# Environment variables +.env +.venv + +# PyInstaller +*.spec + +# Jupyter Notebook +.ipynb_checkpoints + +# PyCharm +.idea/ + +# VS Code +.vscode/ + +# pytest +.pytest_cache/ +.coverage +htmlcov/ + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +bindd/ +bin/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +.idea/ + +# VS Code +.vscode/ +*.code-workspace +# Windows +Thumbs.db +ehthumbs.db + +# Folder config file +Desktop.ini + +# Recycle Bin used on file shares +$RECYCLE.BIN/ + +# Windows Installer files +*.cab +*.msi +*.msm +*.msp + +# Windows shortcuts +*.lnk + +build/ +bin/ +dist/ +test_data/ +# Python +*.egg-info/ +*.egg/ +*.pyc + +*.pyo +*.pyd +*.spec +# End of https://www.toptal.com/developers/gitignore/api/python +.venv/ +build/ + +.trae/ +coldatafresh/ + +target/ \ No newline at end of file diff --git "a/.trae/documents/\344\277\256\345\244\215\345\205\250\347\233\230\345\210\267\346\226\260\345\212\237\350\203\275\347\232\204\344\270\232\345\212\241\346\265\201\347\250\213.md" "b/.trae/documents/\344\277\256\345\244\215\345\205\250\347\233\230\345\210\267\346\226\260\345\212\237\350\203\275\347\232\204\344\270\232\345\212\241\346\265\201\347\250\213.md" new file mode 100644 index 0000000..380d531 --- /dev/null +++ "b/.trae/documents/\344\277\256\345\244\215\345\205\250\347\233\230\345\210\267\346\226\260\345\212\237\350\203\275\347\232\204\344\270\232\345\212\241\346\265\201\347\250\213.md" @@ -0,0 +1,78 @@ +## 问题分析 + +当前的全盘刷新功能存在以下问题: +1. 没有按照用户期望的业务流程执行 +2. 没有询问用户是否保留已使用空间中的文件 +3. 没有实现文件的临时备份和恢复功能 +4. 没有实现填满可用空间的逻辑 +5. 没有返回累积填入容量和最大写入速度 + +## 解决方案 + +根据用户描述的业务流程,我将重新设计和实现全盘刷新功能: + +### 1. 重新设计业务流程 + +``` +1. 获取指定目录的总容量和已使用容量 +2. 询问用户是否保留已使用空间中的文件 +3. 如果保留: + a. 将已使用空间中的文件复制到系统临时目录 d:\$aspnmytools + b. 删除指定目录下的原文件 +4. 在指定目录下创建工作目录 $aspnmytools +5. 写入指定单位大小的数据文件(如50GB/个),直到填满可用空间 +6. 记录累积填入容量和最大写入速度 +7. 删除工作目录 $aspnmytools +8. 如果保留原文件: + a. 将临时目录中的文件复制回原位置 + b. 删除临时目录 d:\$aspnmytools +9. 完成全盘刷新 +``` + +### 2. 修改代码结构 + +- **新增函数**: + - `get_directory_stats(directory)` - 获取目录的总容量和已使用容量 + - `backup_files(source_dir, backup_dir)` - 备份文件到临时目录 + - `restore_files(backup_dir, target_dir)` - 从临时目录恢复文件 + - `fill_available_space(directory, unit_size)` - 填满可用空间 + - `calculate_max_files(directory, unit_size)` - 计算需要创建的文件数量 + +- **修改现有函数**: + - `full_refresh_file` - 重命名或重构,专注于单个文件的刷新 + - `execute` - 修改主执行逻辑,添加全盘刷新的新流程 + +### 3. 实现关键功能 + +- **空间计算**:使用 `get_disk_space` 函数获取磁盘空间信息 +- **文件复制**:使用 `shutil.copy2` 或类似函数实现文件的完整复制 +- **文件写入**:使用现有的 `continuous_full_refresh_file` 函数写入数据 +- **进度显示**:使用现有的 `Dashboard` 类显示进度 +- **统计信息**:记录写入速度和总容量 + +### 4. 修改用户交互 + +- 在主菜单中添加全盘刷新选项 +- 询问用户是否保留已使用空间中的文件 +- 询问用户写入单位大小 +- 显示空间信息和操作确认 + +## 预期效果 + +1. 全盘刷新功能按照用户期望的业务流程执行 +2. 用户可以选择是否保留已使用空间中的文件 +3. 实现了文件的临时备份和恢复功能 +4. 实现了填满可用空间的逻辑 +5. 返回累积填入容量和最大写入速度 +6. 提高了固态硬盘的刷新效果 + +## 实施步骤 + +1. 分析当前代码结构和功能 +2. 设计新的全盘刷新业务流程 +3. 实现新的函数和修改现有函数 +4. 添加用户交互逻辑 +5. 测试新功能 +6. 优化和调试 + +这个修改计划将确保全盘刷新功能按照用户期望的业务流程执行,提高固态硬盘的刷新效果。 \ No newline at end of file diff --git a/README.md b/README.md index 1ffa507..85bacaa 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,52 @@ -# ColDataRefresh +# ColDataRefresh SSD冷数据维护系统 v5.0.0 + 智能检测固态硬盘的冷数据并解决冷数据掉速问题,带数据校验功能 [English](/README_EN.md) +- https://aspnmy.blog.csdn.net/article/details/150638290?spm=1011.2415.3001.5331 + +## SSD固态维护的简单说明 + +SSD固态维护工具(rust编译重构版) +- 模式1,冷数据维护,SSD里面的nano颗粒是电化学没有磁道的,长时间不通电的区块会因为亏电而写入降速,所谓的掉速,用模式一对指定时间以上未进行访问的数据区块进行梳理,解决调速问题。定期维护一般一年一次,日常维护可以6个月一次。模式1不会损失数据。 + +- 模式2,全盘模式,全盘模式会向盘内写入指定数据量来刷新全盘,此模式下会覆盖数据。此模式可以做芯片的全面加电刷新,也可以当老化程序使用。固态老化挑体质:当你收到一个盘片的时候不知道体质怎样,用模式2向里面写入500-1T的数据,然后不爆盘(爆盘就是指不识别或写保护)的这种盘的颗粒体质基本上可以打85分以上(这个打分不是健康度打分)。那这盘片的颗粒值得信赖。 + +- 3.模式3.实时TRIM优化,系统默认的trim策略是空闲时才进行释放,但是有时候空闲时固态可能拔了,或者再执行写入业务所以trim优先级不大,或者关机了,最终trim没有执行。 +模式3就是直接跳过系统策略,执行实时trim,执行期间不要拔盘或断电,一般这个时间有15-30分钟,盘的容量越大,执行全盘刷新trim的时间越长。日常维护3个月一次 + +- 以上三种模式可作为常规固态电化学维护手段,除模式1外,其他两种模式都会损失数据。 +- +## v5.0.0 更新内容 +- 完全重写为Rust语言,提供更高的性能和可靠性 +- 保留了所有原有功能,并进行了性能优化 +- 支持并发处理,提高数据刷新速度 +- 增强了跨平台兼容性,支持Windows和Linux +- 优化了文件系统操作,减少I/O开销 +- 改进了错误处理机制,提高程序稳定性 +- 简化了构建流程,使用Cargo进行依赖管理和构建 + +## v4.7.0 更新内容(历史版本) +- 修复了`full_refresh_file`模式,确保正确写入数据到磁盘 +- 修复了PyInstaller构建脚本问题,确保依赖文件正确打包 +- 确保日志保存在程序目录而不是临时目录,方便查询 +- 实现了完整的全盘刷新业务流程,包括文件备份/恢复和空间填充 +- 优化了全盘刷新业务流程: + - 先尝试格式化操作,失败后再进行文件删除操作 + - TRIM操作放在最后一步执行,避免中间流程卡住 +- 添加了自动管理员权限提升,简化用户操作 +- 根据Windows版本优化了TRIM操作: + - Windows 11:执行ReTrim + SlabConsolidate + ReTrim组合操作 + - Windows 10:只执行ReTrim操作 + - Windows 10以下:使用DeviceIoControl方法执行TRIM操作 +- 添加了重复TRIM操作避免机制,提高效率 +- 增强了TRIM操作的用户提示,告知用户操作详情和注意事项 +- 修复了SSL证书验证失败的问题,确保程序能在各种环境下正常运行 +- 增强了日志记录和错误处理 + +## v4.4 更新内容(历史版本) +- 实现真正的TRIM功能:通过操作系统API通知SSD哪些数据块无效,提高写入性能并延长SSD寿命 +- 优化了用户界面和交互体验 +- 增强了跨平台兼容性支持 ### 什么是冷数据 冷数据指存放在硬盘上且较长时间(如两年甚至更长)没有进行重新写入或者更新的数据,直观上以文件为单位,实际上反应到物理层面是文件对应的存储单元。通常,长时间存储在硬盘上的文档、视频、音乐、图片等静态资料都是冷数据,甚至操作系统、程序和游戏在较长一段时间内只读取过却没有修改或者更新的任何文件都会在未来“成长为”冷数据(热更新或者增量更新现在已经很成熟,一般来讲系统、游戏、应用的更新只会更新需要修改的部分,不需要修改的部分不会动)。 @@ -22,17 +68,41 @@ ### 本工具的特点/与 `DiskFresh`等工具的区别 1. `DiskFresh`也是为处理冷数据而设计的,但是DiskFresh是基于更加底层的磁盘`Sector(扇区)`层面进行全面的覆写。其优点是更为彻底,缺点是刷新时间长,会刷新不必要的非冷数据区块,可能会缩减硬盘寿命,如果需要全盘刷新,`DiskFresh`会是更好的选择;**本工具基于文件系统层面,仅重构检测到的冷数据,可以跳过不必要的文件,并且带有CRC快速校验保证文件安全,尤其适合只需要刷新部分文件(夹)中的冷数据的情形,安全快速,最大限度取得硬盘寿命消耗和性能的平衡点。** -2. 本工具支持保存文件刷新进度,你可以随时退出并在下次继续数据刷新的操作 -3. 本工具开源。 +2. **实现真正的TRIM功能**:通过操作系统API(Windows的DeviceIoControl和Linux的ioctl)通知SSD哪些数据块无效,提高写入性能并延长SSD寿命 +3. 本工具支持保存文件刷新进度,你可以随时退出并在下次继续数据刷新的操作 +4. 本工具开源。 +5. 采用Rust语言开发,提供更高的性能和可靠性 ### 如何使用 > **请右键程序 - `以管理员身份运行`**,这是必要的,可以不授予权限,但特定文件可能会访问或者覆写失败。 -1. Releases界面有编译的exe二进制文件,下载双击运行 / 你也可以从python源代码运行(你可以在代码里修改更多的配置) -2. 输入你需要扫描冷数据的目录,如`D:\DL`或者整个硬盘`D:\`(Windows用户可以选中文件夹按`Ctrl+Shift+C`复制目录地址),按下回车 -3. 输入冷数据天数,如`300`,程序会扫描最后一次修改大于300天的文件。(输入0将会扫描目录下所有文件),按下回车程序即可运行。 -4. **重要:如果运行中需要退出程序,请先在控制台按下`Ctrl+C`发送终止命令,否则可能会造成数据丢失!** +1. **从源代码构建**: + - 确保已安装Rust开发环境(推荐使用rustup安装) + - 克隆仓库:`git clone https://github.com/aspnmy/ColDataRefresh.git` + - 切换到5.0分支:`git checkout v5.0` + - 进入项目目录:`cd ColDataRefresh/coldatafresh` + - 构建项目:`cargo build --release` + - 运行程序:`cargo run --release` 或直接运行生成的可执行文件 + +2. 程序提供三种模式: + - 智能模式:自动检测并刷新冷数据,保留文件原始内容 + - 全盘激活冷数据模式:将文件内容替换为特定值,**此模式会导致文件内容丢失,谨慎使用!** + - TRIM模式:通知SSD哪些数据块无效,提高写入性能并延长寿命 + +3. 输入你需要扫描冷数据的目录,如`D:\DL`或者整个硬盘`D:\`(Windows用户可以选中文件夹按`Ctrl+Shift+C`复制目录地址),按下回车 + +4. 输入冷数据天数,如`300`,程序会扫描最后一次修改大于300天的文件。(输入0将会扫描目录下所有文件),按下回车程序即可运行。 + +5. **重要:如果运行中需要退出程序,请先在控制台按下`Ctrl+C`发送终止命令,否则可能会造成数据丢失!** + +### TRIM功能说明 + +TRIM功能是一种高级SSD维护特性,通过通知固态硬盘哪些数据块不再有效,可以显著提高写入性能并延长SSD寿命。本工具实现的TRIM功能: + +- 通过操作系统API直接与SSD通信,比文件系统级别的TRIM更加高效 +- 支持Windows和Linux平台 +- 在数据刷新过程中自动应用TRIM指令到相关数据块 +- 不需要格式化或低级操作,安全可靠 -### 程序截图 Screenshots -![projectimage](./projectimage.png) +> 注意:TRIM功能需要硬件和操作系统支持,请确保您的SSD和操作系统支持TRIM指令。 diff --git a/README_EN.md b/README_EN.md index f58aef0..81a4e37 100644 --- a/README_EN.md +++ b/README_EN.md @@ -1,6 +1,33 @@ -# ColDataRefresh +# ColDataRefresh SSD Cold Data Maintenance System v5.0.0 Intelligently detects cold data on SSD and solves the cold data crash problem with data validation. +## v5.0.0 Update Content +- Completely rewritten in Rust language, providing higher performance and reliability +- Retained all original features with performance optimizations +- Supports concurrent processing to improve data refresh speed +- Enhanced cross-platform compatibility, supporting Windows and Linux +- Optimized file system operations to reduce I/O overhead +- Improved error handling mechanism to enhance program stability +- Simplified build process using Cargo for dependency management and building + +## v4.7.0 Update Content (Historical Version) +- Fixed the `full_refresh_file` mode to ensure data is correctly written to disk +- Fixed PyInstaller build script issues to ensure dependency files are correctly packaged +- Ensured logs are saved in the program directory instead of temporary directories for easier querying +- Implemented a complete full refresh business flow, including file backup/restore and space filling +- Optimized the full refresh business flow: + - First attempts formatting operation, then falls back to file deletion if formatting fails + - TRIM operation is executed at the end to avoid blocking intermediate processes +- Added automatic admin privilege elevation to simplify user operations +- Optimized TRIM operations based on Windows version: + - Windows 11: Executes ReTrim + SlabConsolidate + ReTrim combination operation + - Windows 10: Only executes ReTrim operation + - Windows 10 and below: Uses DeviceIoControl method to execute TRIM operations +- Added duplicate TRIM operation avoidance mechanism to improve efficiency +- Enhanced TRIM operation user prompts to inform users about operation details and precautions +- Fixed SSL certificate verification failure issue to ensure the program can run normally in various environments +- Enhanced logging and error handling + ### What is Cold Data Cold data refers to data that has been stored on the hard drive for a long time (e.g., half a year or even longer) and has not been rewritten or updated, which is intuitively expressed in terms of files, but in reality is reflected in the physical level of the corresponding storage unit of the file. Usually, documents, videos, music, pictures and other static data stored on the hard drive for a long time are cold data, and even any files that have been read by the operating system, programmes and games over a long period of time without modification or update will ‘grow’ to be cold data in the future (hot or incremental updates are already very mature nowadays, but they can be used for a long time). Generally speaking, updates to systems, games, and applications will only update the parts that need to be changed, and leave the parts that don't need to be changed untouched). **Note that the formation of cold data is only related to writing, not reading, even if a file is read frequently, but not modified to write, it is possible to become cold data** (this is also the reason why some people react to the slow loading of the games that they often play because of the cold data falling speed). @@ -23,15 +50,38 @@ You can also use this tool, which will automatically determine if your file is c 1. `DiskFresh` is also designed to deal with cold data, but DiskFresh is based on the more underlying `Sector` level of the disc to do a full overwrite. The disadvantage is that it takes a long time to refresh, and will refresh unnecessary non-cold data blocks, which may reduce the life of the hard disc; **This tool is based on the file system level, and only refreshes the detected cold data, and comes with CRC file checksum, which is safer and faster. **, 2. This tool supports saving the file refresh progress, you can exit at any time and continue the data refresh operation the next time 3. This tool is open source. +4. Developed in Rust language, providing higher performance and reliability ### How to use > **Please right click the programme - `Run as administrator` **, this is necessary, you can not grant permission, but specific files may be accessed or overwrite failed. -1. Releases interface has compiled exe binaries, download and double click to run / you can also run from python source code (you can change more configurations in the code) -2. Enter the directory you want to scan for cold data, e.g. `D:\DL` or the whole hard drive `D:\` (Windows users can select the folder and press `Ctrl+Shift+C` to copy the directory address), press enter. -3. Enter the number of days of cold data, e.g. `300`, the programme will scan files that have been last modified more than 300 days ago. (Entering 0 will scan all files in the directory.) Press Enter to run the program. -4. **Important: If you need to exit the programme while it is running, please press `Ctrl+C` on the console first to send the terminate command, otherwise it may cause data loss! **Important. +1. **Build from source code**: + - Ensure Rust development environment is installed (recommended to use rustup for installation) + - Clone the repository: `git clone https://github.com/aspnmy/ColDataRefresh.git` + - Switch to v5.0 branch: `git checkout v5.0` + - Enter project directory: `cd ColDataRefresh/coldatafresh` + - Build the project: `cargo build --release` + - Run the program: `cargo run --release` or directly run the generated executable file + +2. The program provides three modes: + - Smart mode: Automatically detects and refreshes cold data, preserving the original file content + - Full disk cold data activation mode: Replaces file content with specific values, **this mode will cause file content loss, use with caution! ** + - TRIM mode: Notifies SSD which data blocks are invalid, improves write performance and extends SSD life + +3. Enter the directory you want to scan for cold data, e.g. `D:\DL` or the whole hard drive `D:\` (Windows users can select the folder and press `Ctrl+Shift+C` to copy the directory address), press enter. + +4. Enter the number of days of cold data, e.g. `300`, the programme will scan files that have been last modified more than 300 days ago. (Entering 0 will scan all files in the directory.) Press Enter to run the program. + +5. **Important: If you need to exit the programme while it is running, please press `Ctrl+C` on the console first to send the terminate command, otherwise it may cause data loss! ** + +### TRIM Function Description + +TRIM is an advanced SSD maintenance feature that can significantly improve write performance and extend SSD life by notifying the solid-state drive which data blocks are no longer valid. The TRIM function implemented by this tool: + +- Communicates directly with SSD via operating system API, more efficient than file system level TRIM +- Supports Windows and Linux platforms +- Automatically applies TRIM commands to relevant data blocks during data refresh +- No formatting or low-level operations required, safe and reliable -### Program screenshots Screenshots -! [projectimage](. /projectimage.png) \ No newline at end of file +> Note: TRIM functionality requires hardware and operating system support, please ensure your SSD and operating system support TRIM commands. diff --git a/coldatafresh.py b/coldatafresh.py deleted file mode 100644 index 7bc0202..0000000 --- a/coldatafresh.py +++ /dev/null @@ -1,269 +0,0 @@ -import os -import time -import threading -import signal -import json -import zlib -import shutil -import random -from concurrent.futures import ThreadPoolExecutor -from elevate import elevate - -# 全局变量 -LOG_FILE = "refresh_log.json" -BUFFER_SIZE = 4 * 1024 # 缓冲区大小 -ENABLE_MULTITHREADING = False # 设置为 False 时禁用多线程 -THREAD_COUNT = 4 # 线程数 -BENCHMARK_SIZE_GB = 1 # 基准速度测试大小 (GB) -RATIO = 0.3 # 假设基准测试读取值为100MB/s, 若测试文件读取速度为100*0.3 = 30MB/s,则判断为冷数据 -SKIP_SIZE = 1 * 1024**2 #小于1(MB)的文件会被跳过。删除此行或填0则不跳过文件。 -EXIT_FLAG = False # 用于检测是否终止程序,请不要修改这个 - -def signal_handler(sig, frame): - global EXIT_FLAG - print("\nTerminating program...") - EXIT_FLAG = True - -signal.signal(signal.SIGINT, signal_handler) - -def load_log(): - if os.path.exists(LOG_FILE): - with open(LOG_FILE, "r") as f: - return json.load(f) - return {"pending": [], "completed": []} - -def save_log(log): - with open(LOG_FILE, "w") as f: - json.dump(log, f) - -def benchmark_speed(directory, size_in_gb=BENCHMARK_SIZE_GB): - size_in_bytes = size_in_gb * 1024**3 - small_file_sizes = [random.randint(100 * 1024, 10 * 1024**2) for _ in range(10)] # 100KB - 10MB - medium_file_sizes = [random.randint(10 * 1024**2, 100 * 1024**2) for _ in range(10)] # 10MB - 100MB - - benchmark_results = { - "large": {"speed": 0, "file_size": size_in_gb * 1024**3}, - "medium": {"speed": 0, "file_size": sum(medium_file_sizes)}, - "small": {"speed": 0, "file_size": sum(small_file_sizes)}, - } - - # 大文件测试 - try: - benchmark_file = os.path.join(directory, "benchmark_large.bin") - print(f"Benchmarking large file ({size_in_gb}GB)...") - with open(benchmark_file, "wb") as f: - for _ in range(size_in_bytes // BUFFER_SIZE): - f.write(os.urandom(BUFFER_SIZE)) - - start = time.time() - with open(benchmark_file, "rb") as f: - while f.read(BUFFER_SIZE): - pass - elapsed = time.time() - start - benchmark_results["large"]["speed"] = size_in_bytes / elapsed / 1024**2 # MB/s - os.remove(benchmark_file) - except Exception as e: - print(f"Error in large file benchmark: {e}") - - # 中小文件测试 - for category, file_sizes in [("medium", medium_file_sizes), ("small", small_file_sizes)]: - files = [] - try: - # 写入多个文件 - for idx, file_size in enumerate(file_sizes): - file_path = os.path.join(directory, f"benchmark_{category}_{idx}.bin") - with open(file_path, "wb") as f: - f.write(os.urandom(file_size)) - files.append(file_path) - - start = time.time() - for file_path in files: - with open(file_path, "rb") as f: - while f.read(BUFFER_SIZE): - pass - elapsed = time.time() - start - benchmark_results[category]["speed"] = sum(file_sizes) / elapsed / 1024**2 # MB/s - except Exception as e: - print(f"Error in {category} file benchmark: {e}") - finally: - for file_path in files: - if os.path.exists(file_path): - os.remove(file_path) - return benchmark_results - -def refresh_file(file_path, benchmark_speed_results, max_retries=2): - if EXIT_FLAG: - return - - temp_path = file_path + ".temp" - checksum_src = 0 - checksum_dest = 0 - retries = 0 - - try: - file_size = os.path.getsize(file_path) - - # 判断文件大小,选择合适的基准速度 - if file_size > 100 * 1024**2: # 大于100MB,使用大文件基准 - benchmark_speed = benchmark_speed_results["large"]["speed"] - elif file_size > 10 * 1024**2: # 10MB-100MB,使用中等文件基准 - benchmark_speed = benchmark_speed_results["medium"]["speed"] - else: # 小于10MB,使用小文件基准 - benchmark_speed = benchmark_speed_results["small"]["speed"] - - # 如果文件太小,跳过刷新 - if file_size <= BUFFER_SIZE: - print(f"Skipping tiny file: {file_path} (size: {file_size} bytes)") - return - - if SKIP_SIZE and file_size <= SKIP_SIZE: - print(f"Skipping tiny file: {file_path} (size: {file_size} bytes)") - return - - file_speed = test_read_speed(file_path) - - if file_speed < benchmark_speed * RATIO: - print(f"Refreshing cold data: {file_path} (read speed: {file_speed:.2f} MB/s, benchmark: {benchmark_speed:.2f} MB/s)") - - # 读取和写入 - while retries < max_retries: - with open(file_path, "rb") as src, open(temp_path, "wb") as dest: - while chunk := src.read(BUFFER_SIZE): - checksum_src = zlib.crc32(chunk, checksum_src) - dest.write(chunk) - checksum_dest = zlib.crc32(chunk, checksum_dest) - - # 校验 - if checksum_src == checksum_dest: - break - else: - retries += 1 - print(f"CRC mismatch, retrying {file_path}... ({retries}/{max_retries})") - os.remove(temp_path) - else: - # 如果多次重试失败,保留源文件并报告损坏 - print(f"Failed to refresh {file_path} after {max_retries} retries. The file might be corrupted.") - return - - # 保留原文件时间 - file_stat = os.stat(file_path) - shutil.move(temp_path, file_path) - os.utime(file_path, (file_stat.st_atime, file_stat.st_mtime)) # 恢复时间戳 - - # 保留原文件夹时间 - if os.path.isdir(file_path): - dir_stat = os.stat(os.path.dirname(file_path)) - os.utime(os.path.dirname(file_path), (dir_stat.st_atime, dir_stat.st_mtime)) # 恢复目录时间戳 - - print(f"File refreshed: {file_path}") - - else: - print(f"Skipping non-cold data: {file_path} (read speed: {file_speed:.2f} MB/s)") - - except Exception as e: - print(f"Error refreshing {file_path}: {e}") - - finally: - if os.path.exists(temp_path): - os.remove(temp_path) - - -# 多线程刷新文件 -def refresh_files(cold_files, benchmark_speed): - log = load_log() - - # 筛选待处理文件 - pending_files = list(set(cold_files) - set(log["completed"])) - log["pending"] = pending_files - save_log(log) - - lock = threading.Lock() - - def worker(file_path): - if EXIT_FLAG: - return - try: - refresh_file(file_path, benchmark_speed) - with lock: - log["completed"].append(file_path) - save_log(log) - except Exception as e: - print(f"Thread error: {e}") - - if ENABLE_MULTITHREADING: - # 使用多线程池 - with ThreadPoolExecutor(max_workers=THREAD_COUNT) as executor: - futures = [executor.submit(worker, file) for file in pending_files] - for future in futures: - if EXIT_FLAG: - break - future.result() - else: - for file_path in pending_files: - if EXIT_FLAG: - break - worker(file_path) - - -def scan_files(directory, min_days_old=30): - now = time.time() - cold_files = [] - - print(f"Scanning files in directory: {directory} for files older than {min_days_old} days...") - for root, _, files in os.walk(directory): - for file in files: - file_path = os.path.join(root, file) - try: - stat = os.stat(file_path) - if (now - stat.st_atime) > min_days_old * 86400: - cold_files.append(file_path) - except Exception as e: - print(f"Error accessing file {file_path}: {e}") - - print(f"Found {len(cold_files)} cold files.") - return cold_files - -def test_read_speed(file_path): - try: - start = time.time() - with open(file_path, "rb") as f: - while f.read(BUFFER_SIZE): - pass - elapsed = time.time() - start - file_size = os.path.getsize(file_path) - read_speed = file_size / elapsed / 1024**2 # MB/s - return read_speed - except Exception as e: - print(f"Error testing read speed for file {file_path}: {e}") - return 0 - -# 主函数 -def main(): - try: - elevate() - except Exception as e: - print("Warning: some files may fail to refresh without granting administrator privileges") - directory = input("Enter directory to scan for cold data: ").strip('"') - min_days_old = int(input("Enter minimum days to consider data as cold: ")) - - print("Benchmarking speed for new data...") - benchmark_speed_value = benchmark_speed(directory, BENCHMARK_SIZE_GB) - - print(f"Benchmark read speed for large files: {benchmark_speed_value['large']['speed']:.2f} MB/s") - print(f"Benchmark read speed for medium files: {benchmark_speed_value['medium']['speed']:.2f} MB/s") - print(f"Benchmark read speed for small files: {benchmark_speed_value['small']['speed']:.2f} MB/s") - - - print("Scanning for cold files...") - cold_files = scan_files(directory, min_days_old) - if not cold_files: - print("No cold files found. Exiting.") - return - - print("Refreshing cold files...") - refresh_files(cold_files, benchmark_speed_value) - print("All tasks completed.") - input("Press Enter to exit...") - -if __name__ == "__main__": - main() diff --git a/devrom.ico b/devrom.ico new file mode 100644 index 0000000..c6913e6 Binary files /dev/null and b/devrom.ico differ diff --git a/projectimage.png b/projectimage.png deleted file mode 100644 index 97bc7ef..0000000 Binary files a/projectimage.png and /dev/null differ