Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 45 additions & 77 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,78 +1,46 @@
# Xcode
#
# gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore

## User settings
xcuserdata/
DerivedData/
build
## Obj-C/Swift specific
*.hmap

## App packaging
*.ipa
*.dSYM.zip
*.dSYM

## Playgrounds
timeline.xctimeline
playground.xcworkspace

# Swift Package Manager
#
# Add this line if you want to avoid checking in source code from Swift Package Manager dependencies.
# Packages/
# Package.pins
# Package.resolved
# *.xcodeproj
#
# Xcode automatically generates this directory with a .xcworkspacedata file and xcuserdata
# hence it is not needed unless you have added a package configuration file to your project
# .swiftpm

.build/

# CocoaPods
#
# We recommend against adding the Pods directory to your .gitignore. However
# you should judge for yourself, the pros and cons are mentioned at:
# https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
Pods/
Podfile.lock

# Carthage
#
# Add this line if you want to avoid checking in source code from Carthage dependencies.
# Carthage/Checkouts

Carthage/Build/

# fastlane
#
# It is recommended to not store the screenshots in the git repo.
# Instead, use fastlane to re-generate the screenshots whenever they are needed.
# For more information about the recommended setup visit:
# https://docs.fastlane.tools/best-practices/source-control/#source-control

fastlane/report.xml
fastlane/Preview.html
fastlane/screenshots/**/*.png
fastlane/test_output

# macOS
.DS_Store
.AppleDouble
.LSOverride
._*

# 环境变量文件 - 敏感信息
```
# Compiled and build artifacts
*.pyc
__pycache__/

# Dependencies
.venv/
venv/
node_modules/

# Logs and temp files
*.log
*.tmp
*.swp

# Environment
.env

# 构建输出
*.app
*.dmg
*.zip
*.pkg

# 公证信息
notarization-info.json
.env.local
*.env.*

# Editors
.vscode/
.idea/

# Coverage
.coverage
coverage/
htmlcov/

# Distribution
dist/
build/
*.egg-info/

# Python specific
*.py[cod]
*$py.class
*.so
.Python
pip-log.txt
pip-delete-this-directory.txt

# System files
.DS_Store
Thumbs.db
```
100 changes: 65 additions & 35 deletions Python/html_to_png.py
Original file line number Diff line number Diff line change
@@ -1,75 +1,105 @@
#!/usr/bin/env python3
"""HTML to PNG converter - Optimized version with parallel processing"""
import os
import glob
import time
from multiprocessing import Pool, cpu_count
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager
from PIL import Image
import io

# 配置
html_dir = '../HTML'
output_dir = '../HTML/imgs'
MAX_WORKERS = min(cpu_count(), 4) # 限制最大进程数,避免资源耗尽

# 确保输出目录存在
os.makedirs(output_dir, exist_ok=True)

# 获取所有HTML文件
# 获取所有 HTML 文件
html_files = glob.glob(f"{html_dir}/*.html")

print(f"找到 {len(html_files)} 个HTML文件")

# 设置Chrome选项
chrome_options = Options()
chrome_options.add_argument("--headless=new") # 新的无头模式
chrome_options.add_argument("--window-size=1920,1080") # 设置窗口大小,保证高清
chrome_options.add_argument("--hide-scrollbars") # 隐藏滚动条
chrome_options.add_argument("--disable-extensions") # 禁用扩展
chrome_options.add_argument("--disable-gpu") # 禁用GPU加速
print(f"找到 {len(html_files)} 个 HTML 文件")
print(f"使用 {MAX_WORKERS} 个并行进程")

# 初始化WebDriver
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=chrome_options)

try:
# 遍历每个HTML文件并转换为PNG
for html_file in html_files:
def convert_html_to_png(html_file):
"""单个 HTML 文件转换函数,用于并行处理"""
# 每个进程创建独立的 WebDriver 实例
chrome_options = Options()
chrome_options.add_argument("--headless=new")
chrome_options.add_argument("--window-size=1920,1080")
chrome_options.add_argument("--hide-scrollbars")
chrome_options.add_argument("--disable-extensions")
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--disable-dev-shm-usage") # 避免共享内存问题
chrome_options.add_argument("--no-sandbox") # 避免沙箱问题

driver = None
try:
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=chrome_options)

filename = os.path.basename(html_file)
name_without_ext = os.path.splitext(filename)[0]
output_file = f"{output_dir}/{name_without_ext}.png"

print(f"正在转换: {html_file} -> {output_file}")

# 加载HTML文件
# 加载 HTML 文件
html_path = os.path.abspath(html_file)
driver.get(f"file://{html_path}")

# 等待页面加载完成
time.sleep(2)
# 使用显式等待替代固定等待,提高性能
wait = WebDriverWait(driver, 10)
wait.until(lambda d: d.execute_script("return document.readyState") == "complete")

# 额外等待动态内容渲染(可根据需要调整)
driver.implicitly_wait(1)

# 获取页面大小
total_height = driver.execute_script("return document.body.scrollHeight")
total_width = driver.execute_script("return document.body.scrollWidth")
total_height = driver.execute_script(
"return Math.max(document.body.scrollHeight, document.documentElement.scrollHeight);"
)
total_width = driver.execute_script(
"return Math.max(document.body.scrollWidth, document.documentElement.scrollWidth);"
)

# 调整窗口大小以适应整个页面内容
driver.set_window_size(total_width, total_height)

# 再次等待确保页面完全渲染
time.sleep(1)
# 等待重新渲染
wait.until(lambda d: d.execute_script("return document.readyState") == "complete")

# 截取整个页面
screenshot = driver.get_screenshot_as_png()

# 保存图片为PNG
# 保存图片为 PNG
with open(output_file, 'wb') as f:
f.write(screenshot)

print(f"成功转换: {output_file}")
except Exception as e:
print(f"发生错误: {e}")
finally:
# 关闭浏览器
driver.quit()
return f"成功:{html_file} -> {output_file}"

except Exception as e:
return f"失败:{html_file} - 错误:{e}"

finally:
if driver:
driver.quit()


print("所有转换任务完成")
if __name__ == '__main__':
print(f"开始转换...")

# 使用进程池并行处理
with Pool(processes=MAX_WORKERS) as pool:
results = list(pool.imap_unordered(convert_html_to_png, html_files))

# 打印结果
success_count = sum(1 for r in results if r.startswith("成功"))
fail_count = len(results) - success_count

print(f"\n转换完成:")
print(f" 成功:{success_count}")
print(f" 失败:{fail_count}")
print(f"所有 PNG 图片已保存到 {output_dir} 目录")