diff --git a/.gitignore b/.gitignore index 103c199..5f2f4dc 100644 --- a/.gitignore +++ b/.gitignore @@ -1,78 +1,46 @@ -# Xcode -# -# gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore - -## User settings -xcuserdata/ -DerivedData/ -build -## Obj-C/Swift specific -*.hmap - -## App packaging -*.ipa -*.dSYM.zip -*.dSYM - -## Playgrounds -timeline.xctimeline -playground.xcworkspace - -# Swift Package Manager -# -# Add this line if you want to avoid checking in source code from Swift Package Manager dependencies. -# Packages/ -# Package.pins -# Package.resolved -# *.xcodeproj -# -# Xcode automatically generates this directory with a .xcworkspacedata file and xcuserdata -# hence it is not needed unless you have added a package configuration file to your project -# .swiftpm - -.build/ - -# CocoaPods -# -# We recommend against adding the Pods directory to your .gitignore. However -# you should judge for yourself, the pros and cons are mentioned at: -# https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control -Pods/ -Podfile.lock - -# Carthage -# -# Add this line if you want to avoid checking in source code from Carthage dependencies. -# Carthage/Checkouts - -Carthage/Build/ - -# fastlane -# -# It is recommended to not store the screenshots in the git repo. -# Instead, use fastlane to re-generate the screenshots whenever they are needed. -# For more information about the recommended setup visit: -# https://docs.fastlane.tools/best-practices/source-control/#source-control - -fastlane/report.xml -fastlane/Preview.html -fastlane/screenshots/**/*.png -fastlane/test_output - -# macOS -.DS_Store -.AppleDouble -.LSOverride -._* - -# 环境变量文件 - 敏感信息 +``` +# Compiled and build artifacts +*.pyc +__pycache__/ + +# Dependencies +.venv/ +venv/ +node_modules/ + +# Logs and temp files +*.log +*.tmp +*.swp + +# Environment .env - -# 构建输出 -*.app -*.dmg -*.zip -*.pkg - -# 公证信息 -notarization-info.json +.env.local +*.env.* + +# Editors +.vscode/ +.idea/ + +# Coverage +.coverage +coverage/ +htmlcov/ + +# Distribution +dist/ +build/ +*.egg-info/ + +# Python specific +*.py[cod] +*$py.class +*.so +.Python +pip-log.txt +pip-delete-this-directory.txt + +# System files +.DS_Store +Thumbs.db +``` \ No newline at end of file diff --git a/Python/html_to_png.py b/Python/html_to_png.py index e13c4c8..40e4aa5 100644 --- a/Python/html_to_png.py +++ b/Python/html_to_png.py @@ -1,75 +1,105 @@ #!/usr/bin/env python3 +"""HTML to PNG converter - Optimized version with parallel processing""" import os import glob -import time +from multiprocessing import Pool, cpu_count from selenium import webdriver from selenium.webdriver.chrome.service import Service from selenium.webdriver.chrome.options import Options +from selenium.webdriver.support.ui import WebDriverWait from webdriver_manager.chrome import ChromeDriverManager -from PIL import Image import io # 配置 html_dir = '../HTML' output_dir = '../HTML/imgs' +MAX_WORKERS = min(cpu_count(), 4) # 限制最大进程数,避免资源耗尽 + # 确保输出目录存在 os.makedirs(output_dir, exist_ok=True) -# 获取所有HTML文件 +# 获取所有 HTML 文件 html_files = glob.glob(f"{html_dir}/*.html") -print(f"找到 {len(html_files)} 个HTML文件") - -# 设置Chrome选项 -chrome_options = Options() -chrome_options.add_argument("--headless=new") # 新的无头模式 -chrome_options.add_argument("--window-size=1920,1080") # 设置窗口大小,保证高清 -chrome_options.add_argument("--hide-scrollbars") # 隐藏滚动条 -chrome_options.add_argument("--disable-extensions") # 禁用扩展 -chrome_options.add_argument("--disable-gpu") # 禁用GPU加速 +print(f"找到 {len(html_files)} 个 HTML 文件") +print(f"使用 {MAX_WORKERS} 个并行进程") -# 初始化WebDriver -service = Service(ChromeDriverManager().install()) -driver = webdriver.Chrome(service=service, options=chrome_options) -try: - # 遍历每个HTML文件并转换为PNG - for html_file in html_files: +def convert_html_to_png(html_file): + """单个 HTML 文件转换函数,用于并行处理""" + # 每个进程创建独立的 WebDriver 实例 + chrome_options = Options() + chrome_options.add_argument("--headless=new") + chrome_options.add_argument("--window-size=1920,1080") + chrome_options.add_argument("--hide-scrollbars") + chrome_options.add_argument("--disable-extensions") + chrome_options.add_argument("--disable-gpu") + chrome_options.add_argument("--disable-dev-shm-usage") # 避免共享内存问题 + chrome_options.add_argument("--no-sandbox") # 避免沙箱问题 + + driver = None + try: + service = Service(ChromeDriverManager().install()) + driver = webdriver.Chrome(service=service, options=chrome_options) + filename = os.path.basename(html_file) name_without_ext = os.path.splitext(filename)[0] output_file = f"{output_dir}/{name_without_ext}.png" - print(f"正在转换: {html_file} -> {output_file}") - - # 加载HTML文件 + # 加载 HTML 文件 html_path = os.path.abspath(html_file) driver.get(f"file://{html_path}") - # 等待页面加载完成 - time.sleep(2) + # 使用显式等待替代固定等待,提高性能 + wait = WebDriverWait(driver, 10) + wait.until(lambda d: d.execute_script("return document.readyState") == "complete") + + # 额外等待动态内容渲染(可根据需要调整) + driver.implicitly_wait(1) # 获取页面大小 - total_height = driver.execute_script("return document.body.scrollHeight") - total_width = driver.execute_script("return document.body.scrollWidth") + total_height = driver.execute_script( + "return Math.max(document.body.scrollHeight, document.documentElement.scrollHeight);" + ) + total_width = driver.execute_script( + "return Math.max(document.body.scrollWidth, document.documentElement.scrollWidth);" + ) # 调整窗口大小以适应整个页面内容 driver.set_window_size(total_width, total_height) - # 再次等待确保页面完全渲染 - time.sleep(1) + # 等待重新渲染 + wait.until(lambda d: d.execute_script("return document.readyState") == "complete") # 截取整个页面 screenshot = driver.get_screenshot_as_png() - # 保存图片为PNG + # 保存图片为 PNG with open(output_file, 'wb') as f: f.write(screenshot) - print(f"成功转换: {output_file}") -except Exception as e: - print(f"发生错误: {e}") -finally: - # 关闭浏览器 - driver.quit() + return f"成功:{html_file} -> {output_file}" + + except Exception as e: + return f"失败:{html_file} - 错误:{e}" + + finally: + if driver: + driver.quit() + -print("所有转换任务完成") \ No newline at end of file +if __name__ == '__main__': + print(f"开始转换...") + + # 使用进程池并行处理 + with Pool(processes=MAX_WORKERS) as pool: + results = list(pool.imap_unordered(convert_html_to_png, html_files)) + + # 打印结果 + success_count = sum(1 for r in results if r.startswith("成功")) + fail_count = len(results) - success_count + + print(f"\n转换完成:") + print(f" 成功:{success_count}") + print(f" 失败:{fail_count}") + print(f"所有 PNG 图片已保存到 {output_dir} 目录")