From e4f149ceea278b668efaff12cfeacf9df4d16962 Mon Sep 17 00:00:00 2001 From: adrienvon Date: Thu, 23 Oct 2025 16:51:32 +0800 Subject: [PATCH 1/2] modified: README.md new file: requirements.txt new file: scripts/README.md new file: scripts/install.ps1 new file: scripts/install.sh --- README.md | 27 +++++ requirements.txt | 16 +++ scripts/README.md | 202 +++++++++++++++++++++++++++++++++++++ scripts/install.ps1 | 205 ++++++++++++++++++++++++++++++++++++++ scripts/install.sh | 236 ++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 686 insertions(+) create mode 100644 requirements.txt create mode 100644 scripts/README.md create mode 100644 scripts/install.ps1 create mode 100644 scripts/install.sh diff --git a/README.md b/README.md index 1d91b31..a0aa059 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,33 @@ sklearn transformers 4.8.1 +## 安装指南 + +### 快速安装(推荐) + +#### Windows + +```powershell +# CPU 版本 +.\scripts\install.ps1 -device cpu + +# GPU 版本(CUDA 11.1) +.\scripts\install.ps1 -device gpu -cuda_version 111 +``` + +#### Linux / macOS + +```bash +# CPU 版本 +bash scripts/install.sh cpu + +# GPU 版本(CUDA 11.1) +bash scripts/install.sh gpu 111 +``` + +**⚠️ GPU 用户**: 运行 `nvidia-smi` 查看 CUDA 版本后选择对应脚本参数 +**💡 详细说明**: 查看 [scripts/README.md](scripts/README.md) + ## 中文数据集 从 THUCNews 中随机抽取20万条新闻标题,一共有10个类别:财经、房产、股票、教育、科技、社会、时政、体育、游戏、娱乐,每类2万条标题数据。数据集按如下划分: diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..0d6a56f --- /dev/null +++ b/requirements.txt @@ -0,0 +1,16 @@ +# BERT Text Classification Requirements +# Python 3.7+ +# PyTorch 1.9.0: Install separately based on your device (CPU or GPU) +# See scripts/README.md for installation instructions + +# Core ML libraries +transformers==4.8.1 +tokenizers==0.10.3 + +# Numeric / utils +numpy>=1.18.5,<2.0 +scikit-learn>=0.24.0,<0.25 +tqdm>=4.50.0 + +# Backwards compatibility helpers +typing-extensions>=3.7.4 diff --git a/scripts/README.md b/scripts/README.md new file mode 100644 index 0000000..a19a770 --- /dev/null +++ b/scripts/README.md @@ -0,0 +1,202 @@ +# Installation Scripts + +自动化安装脚本用于快速安装 BERT Text Classification 的依赖环境。支持两种 Python 环境管理方式:**Venv** 和 **Conda**。 + +## 文件说明 + +- `install.ps1` - Windows PowerShell 安装脚本(支持 Venv 和 Conda) +- `install.sh` - Linux/macOS Bash 安装脚本(支持 Venv 和 Conda) + + +--- + +## 使用方法 + +### 方案 A:Venv(推荐新手) + +#### Windows + +```powershell +# CPU 版本 +.\scripts\install.ps1 -env_type venv -device cpu + +# GPU 版本(CUDA 11.1) +.\scripts\install.ps1 -env_type venv -device gpu -cuda_version 111 + +# 其他 CUDA 版本 +.\scripts\install.ps1 -env_type venv -device gpu -cuda_version 102 # CUDA 10.2 +``` + +#### Linux / macOS + +```bash +# CPU 版本 +bash scripts/install.sh venv cpu + +# GPU 版本(CUDA 11.1) +bash scripts/install.sh venv gpu 111 + +# 其他 CUDA 版本 +bash scripts/install.sh venv gpu 102 # CUDA 10.2 +``` + +**激活环境**: +- Windows: `.\venv\Scripts\Activate.ps1` +- Linux/Mac: `source venv/bin/activate` + +--- + +### 方案 B:Conda(推荐已有 Conda 用户) + +#### Windows + +```powershell +# CPU 版本(环境名: bert_env) +.\scripts\install.ps1 -env_type conda -device cpu + +# GPU 版本(CUDA 11.1) +.\scripts\install.ps1 -env_type conda -device gpu -cuda_version 111 + +# 自定义环境名 +.\scripts\install.ps1 -env_type conda -device cpu -env_name mybert +``` + +#### Linux / macOS + +```bash +# CPU 版本(环境名: bert_env) +bash scripts/install.sh conda cpu + +# GPU 版本(CUDA 11.1) +bash scripts/install.sh conda gpu 111 + +# 自定义环境名 +bash scripts/install.sh conda cpu mybert +``` + +**激活环境**: +```bash +conda activate bert_env # 或自定义的环境名 +conda activate mybert +``` + +--- + +## 脚本参数详解 + +### Windows PowerShell 脚本 + +```powershell +.\scripts\install.ps1 -env_type venv|conda -device cpu|gpu -cuda_version 102|110|111|113 -env_name bert_env +``` + +| 参数 | 默认值 | 说明 | +|------|--------|------| +| `-env_type` | `venv` | 环境管理方式:`venv` 或 `conda` | +| `-device` | `cpu` | 计算设备:`cpu` 或 `gpu` | +| `-cuda_version` | `111` | GPU 版本的 CUDA 版本号 | +| `-env_name` | `bert_env` | Conda 环境名称 | + +### Linux/macOS Bash 脚本 + +```bash +bash scripts/install.sh [env_type] [device] [cuda_version] [env_name] +``` + +| 位置 | 默认值 | 说明 | +|------|--------|------| +| $1 | `venv` | 环境管理方式:`venv` 或 `conda` | +| $2 | `cpu` | 计算设备:`cpu` 或 `gpu` | +| $3 | `111` | GPU 版本的 CUDA 版本号 | +| $4 | `bert_env` | Conda 环境名称 | + +--- + +## 故障排除 + +### Windows 执行策略错误 + +如果遇到执行策略错误,运行: + +```powershell +Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser +``` + +### 查询 CUDA 版本 + +GPU 用户可以运行以下命令查看已安装的 CUDA 版本: + +```bash +nvidia-smi +``` + +查看输出中的 "CUDA Version" 字段,选择对应的脚本参数: + +- CUDA 10.2 → 参数 `102` +- CUDA 11.0 → 参数 `110` +- CUDA 11.1 → 参数 `111` +- CUDA 11.3 → 参数 `113` + + +--- + +## 脚本做了什么 + +1. ✅ 创建或检测 Python 环境(Venv 或 Conda) +2. ✅ 激活虚拟环境 +3. ✅ 自动下载并安装正确版本的 PyTorch(CPU 或 GPU) +4. ✅ 安装所有必需的依赖包(使用 `requirements.txt`) +5. ✅ 验证安装成功 +6. ✅ 显示后续步骤提示 + +--- + +## 支持的 PyTorch 版本 + +- **PyTorch 1.9.0**(核心库) +- **torchvision 0.10.0**(图像处理库) + +--- + +## 手动安装 + +### Venv 方式 + +```bash +# 1. 创建虚拟环境 +python3 -m venv venv + +# 2. 激活环境 +# Windows: .\venv\Scripts\Activate.ps1 +# Linux/Mac: source venv/bin/activate + +# 3. 安装 PyTorch(选择 CPU 或 GPU) +pip install torch==1.9.0+cpu -f https://download.pytorch.org/whl/torch_stable.html + +# 或 GPU 版本(CUDA 11.1) +pip install --index-url https://download.pytorch.org/whl/cu111 torch==1.9.0+cu111 -f https://download.pytorch.org/whl/torch_stable.html + +# 4. 安装其他依赖 +pip install -r requirements.txt +``` + +### Conda 方式 + +```bash +# 1. 创建 Conda 环境 +conda create -n bert_env python=3.9 -y + +# 2. 激活环境 +conda activate bert_env + +# 3. 安装 PyTorch(选择 CPU 或 GPU) +conda install pytorch::pytorch torchvision -c pytorch + +# 或指定版本 +pip install torch==1.9.0+cpu -f https://download.pytorch.org/whl/torch_stable.html + +# 4. 安装其他依赖 +pip install -r requirements.txt +``` + +--- diff --git a/scripts/install.ps1 b/scripts/install.ps1 new file mode 100644 index 0000000..a1b63aa --- /dev/null +++ b/scripts/install.ps1 @@ -0,0 +1,205 @@ +# Windows Auto-Install Script for BERT Text Classification +# Supports both Conda and Venv environments +# Usage: .\install.ps1 -env_type venv -device cpu +# .\install.ps1 -env_type conda -device gpu -cuda_version 111 +# Parameters: +# -env_type: venv or conda (default: venv) +# -device: cpu or gpu (default: cpu) +# -cuda_version: 102 (10.2), 110, 111, 113 (default: 111) +# -env_name: environment name for conda (default: bert_env) + +param( + [ValidateSet("venv", "conda")] + [string]$env_type = "venv", + [ValidateSet("cpu", "gpu")] + [string]$device = "cpu", + [ValidateSet("102", "110", "111", "113")] + [string]$cuda_version = "111", + [string]$env_name = "bert_env" +) + +# Color output +function Write-Success { + param([string]$message) + Write-Host $message -ForegroundColor Green +} + +function Write-Error-Custom { + param([string]$message) + Write-Host $message -ForegroundColor Red +} + +function Write-Warning-Custom { + param([string]$message) + Write-Host $message -ForegroundColor Yellow +} + +function Write-Info { + param([string]$message) + Write-Host $message -ForegroundColor Cyan +} + +# Header +Write-Host "`n" -ForegroundColor Green +Write-Host "========================================" -ForegroundColor Green +Write-Host "BERT Text Classification - Auto Installer" -ForegroundColor Green +Write-Host "Environment: $env_type | Device: $device" -ForegroundColor Green +Write-Host "========================================" -ForegroundColor Green +Write-Host "`n" + +# ==================== VENV Setup ==================== +if ($env_type -eq "venv") { + Write-Info "[1/4] Setting up Python virtual environment (venv)..." + + # Check Python + $python_version = python --version 2>&1 + if ($LASTEXITCODE -ne 0) { + Write-Error-Custom "❌ Python not found! Please install Python 3.7+ first." + exit 1 + } + Write-Host "✅ $python_version" + + # Create venv + Write-Info "Creating virtual environment 'venv'..." + python -m venv venv + if ($LASTEXITCODE -ne 0) { + Write-Error-Custom "❌ Failed to create virtual environment!" + exit 1 + } + Write-Success "✅ Virtual environment created" + + # Activate venv + Write-Info "Activating virtual environment..." + & .\venv\Scripts\Activate.ps1 + if ($LASTEXITCODE -ne 0) { + Write-Error-Custom "❌ Failed to activate virtual environment!" + exit 1 + } + Write-Success "✅ Virtual environment activated" + + $pip_cmd = "pip" + $python_cmd = "python" +} +# ==================== Conda Setup ==================== +else { + Write-Info "[1/4] Setting up Conda environment..." + + # Check conda + $conda_version = conda --version 2>&1 + if ($LASTEXITCODE -ne 0) { + Write-Error-Custom "❌ Conda not found! Please install Anaconda or Miniconda first." + Write-Error-Custom " Download: https://www.anaconda.com/products/miniconda" + exit 1 + } + Write-Host "✅ $conda_version" + + # Create conda environment + Write-Info "Creating Conda environment '$env_name' with Python 3.9..." + conda create -n $env_name python=3.9 -y + if ($LASTEXITCODE -ne 0) { + Write-Error-Custom "❌ Failed to create Conda environment!" + exit 1 + } + Write-Success "✅ Conda environment created" + + # Activate conda environment + Write-Info "Activating Conda environment..." + conda activate $env_name + if ($LASTEXITCODE -ne 0) { + Write-Error-Custom "❌ Failed to activate Conda environment!" + exit 1 + } + Write-Success "✅ Conda environment activated: $env_name" + + $pip_cmd = "pip" + $python_cmd = "python" +} + +# ==================== Install PyTorch ==================== +Write-Info "`n[2/4] Installing PyTorch..." + + +if ($device -eq "cpu") { + Write-Warning-Custom "Installing PyTorch (CPU version)..." + & $pip_cmd install torch==1.9.0+cpu torchvision==0.10.0+cpu -f https://download.pytorch.org/whl/torch_stable.html + if ($LASTEXITCODE -ne 0) { + Write-Error-Custom "❌ PyTorch CPU installation failed!" + exit 1 + } +} +else { + $cuda_map = @{ + "102" = @{name = "CUDA 10.2"; url = "cu102" } + "110" = @{name = "CUDA 11.0"; url = "cu110" } + "111" = @{name = "CUDA 11.1"; url = "cu111" } + "113" = @{name = "CUDA 11.3"; url = "cu113" } + } + + $cuda_info = $cuda_map[$cuda_version] + Write-Warning-Custom "Installing PyTorch with $($cuda_info.name)..." + + & $pip_cmd install --index-url https://download.pytorch.org/whl/$($cuda_info.url) torch==1.9.0+$($cuda_info.url) torchvision==0.10.0+$($cuda_info.url) -f https://download.pytorch.org/whl/torch_stable.html + if ($LASTEXITCODE -ne 0) { + Write-Error-Custom "❌ PyTorch GPU installation failed!" + exit 1 + } +} + +# Verify PyTorch installation +Write-Info "`nVerifying PyTorch installation..." +& $python_cmd -c "import torch; print(f'PyTorch {torch.__version__}'); print(f'CUDA available: {torch.cuda.is_available()}')" 2>&1 +Write-Success "✅ PyTorch installed successfully" + +# ==================== Install Dependencies ==================== +Write-Info "`n[3/4] Installing other dependencies..." +& $pip_cmd install -r requirements.txt +if ($LASTEXITCODE -ne 0) { + Write-Error-Custom "❌ Dependency installation failed!" + exit 1 +} +Write-Success "✅ All dependencies installed" + +# ==================== Verify Installation ==================== +Write-Info "`n[4/4] Verifying installation..." +& $python_cmd -c @" +import torch +import transformers +import numpy +import sklearn +print('✅ All packages imported successfully') +print(f' - PyTorch: {torch.__version__}') +print(f' - Transformers: {transformers.__version__}') +print(f' - NumPy: {numpy.__version__}') +"@ 2>&1 + +if ($LASTEXITCODE -ne 0) { + Write-Error-Custom "❌ Verification failed!" + exit 1 +} + +# Success message +Write-Success "`n========================================" +Write-Success "✅ Installation Completed Successfully!" +Write-Success "========================================`n" + +# Next steps +Write-Info "📝 Next Steps:" +Write-Host " 1. Download BERT model from:" +Write-Host " https://huggingface.co/bert-base-chinese" +Write-Host " 2. Place model files in ./pretrained_bert folder" +Write-Host " 3. Prepare your data in ./data folder" + +if ($env_type -eq "venv") { + Write-Host " 4. Activate environment: .\venv\Scripts\Activate.ps1" +} +else { + Write-Host " 4. Activate environment: conda activate $env_name" +} + +Write-Host " 5. Run: python main.py --mode train --data_dir ./data --pretrained_bert_dir ./pretrained_bert" + +if ($device -eq "gpu") { + Write-Host "`n💡 GPU Mode: Training will be significantly faster!" -ForegroundColor Magenta +} + +Write-Host "`n" diff --git a/scripts/install.sh b/scripts/install.sh new file mode 100644 index 0000000..4424294 --- /dev/null +++ b/scripts/install.sh @@ -0,0 +1,236 @@ +#!/bin/bash +# Linux/Mac Auto-Install Script for BERT Text Classification +# Supports both Conda and Venv environments +# Usage: bash install.sh venv cpu +# bash install.sh conda gpu 111 +# Parameters: +# $1: venv or conda (default: venv) +# $2: cpu or gpu (default: cpu) +# $3: CUDA version - 102 (10.2), 110, 111, 113 (default: 111) +# $4: environment name for conda (default: bert_env) + +set -e # Exit on error + +# Colors +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +CYAN='\033[0;36m' +NC='\033[0m' # No Color + +# Functions +success() { + echo -e "${GREEN}$1${NC}" +} + +error() { + echo -e "${RED}$1${NC}" +} + +warning() { + echo -e "${YELLOW}$1${NC}" +} + +info() { + echo -e "${CYAN}$1${NC}" +} + +# Parse arguments +ENV_TYPE=${1:-venv} +DEVICE=${2:-cpu} +CUDA_VERSION=${3:-111} +ENV_NAME=${4:-bert_env} + +# Validate env_type +if [[ "$ENV_TYPE" != "venv" && "$ENV_TYPE" != "conda" ]]; then + error "❌ Invalid environment type: $ENV_TYPE (must be 'venv' or 'conda')" + exit 1 +fi + +# Validate device +if [[ "$DEVICE" != "cpu" && "$DEVICE" != "gpu" ]]; then + error "❌ Invalid device: $DEVICE (must be 'cpu' or 'gpu')" + exit 1 +fi + +# Validate CUDA version +if [[ "$DEVICE" == "gpu" && ! "$CUDA_VERSION" =~ ^(102|110|111|113)$ ]]; then + error "❌ Invalid CUDA version: $CUDA_VERSION" + error " Supported: 102 (CUDA 10.2), 110, 111, 113" + exit 1 +fi + +# Header +echo "" +success "========================================" +success "BERT Text Classification - Auto Installer" +success "Environment: $ENV_TYPE | Device: $DEVICE" +success "========================================" +success "" + +# ==================== VENV Setup ==================== +if [ "$ENV_TYPE" = "venv" ]; then + info "[1/4] Setting up Python virtual environment (venv)..." + + # Check Python + if ! command -v python3 &> /dev/null; then + error "❌ Python3 not found! Please install Python 3.7+ first." + exit 1 + fi + + PYTHON_VERSION=$(python3 --version) + success "✅ $PYTHON_VERSION" + + # Create venv + info "Creating virtual environment 'venv'..." + python3 -m venv venv + success "✅ Virtual environment created" + + # Activate venv + info "Activating virtual environment..." + source venv/bin/activate + success "✅ Virtual environment activated" + + PIP_CMD="pip" + PYTHON_CMD="python" + +# ==================== Conda Setup ==================== +else + info "[1/4] Setting up Conda environment..." + + # Check conda + if ! command -v conda &> /dev/null; then + error "❌ Conda not found! Please install Anaconda or Miniconda first." + error " Download: https://www.anaconda.com/products/miniconda" + exit 1 + fi + + CONDA_VERSION=$(conda --version) + success "✅ $CONDA_VERSION" + + # Create conda environment + info "Creating Conda environment '$ENV_NAME' with Python 3.9..." + conda create -n $ENV_NAME python=3.9 -y + success "✅ Conda environment created" + + # Activate conda environment + info "Activating Conda environment..." + eval "$(conda shell.bash hook)" + conda activate $ENV_NAME + success "✅ Conda environment activated: $ENV_NAME" + + PIP_CMD="pip" + PYTHON_CMD="python" +fi + +# ==================== Install PyTorch ==================== +info "" +info "[2/4] Installing PyTorch..." + +if [ "$DEVICE" = "cpu" ]; then + warning "Installing PyTorch (CPU version)..." + $PIP_CMD install torch==1.9.0+cpu torchvision==0.10.0+cpu -f https://download.pytorch.org/whl/torch_stable.html + if [ $? -ne 0 ]; then + error "❌ PyTorch CPU installation failed!" + exit 1 + fi +else + # GPU installation + case $CUDA_VERSION in + 102) + CUDA_URL="cu102" + CUDA_NAME="CUDA 10.2" + ;; + 110) + CUDA_URL="cu110" + CUDA_NAME="CUDA 11.0" + ;; + 111) + CUDA_URL="cu111" + CUDA_NAME="CUDA 11.1" + ;; + 113) + CUDA_URL="cu113" + CUDA_NAME="CUDA 11.3" + ;; + esac + + warning "Installing PyTorch with $CUDA_NAME..." + $PIP_CMD install --index-url https://download.pytorch.org/whl/$CUDA_URL torch==1.9.0+$CUDA_URL torchvision==0.10.0+$CUDA_URL -f https://download.pytorch.org/whl/torch_stable.html + if [ $? -ne 0 ]; then + error "❌ PyTorch GPU installation failed!" + exit 1 + fi +fi + +# Verify PyTorch installation +info "" +info "Verifying PyTorch installation..." +$PYTHON_CMD << 'PYTHON_CHECK' +import torch +print(f'✅ PyTorch {torch.__version__}') +print(f'✅ CUDA available: {torch.cuda.is_available()}') +if torch.cuda.is_available(): + print(f'✅ CUDA version: {torch.version.cuda}') + print(f'✅ GPU device: {torch.cuda.get_device_name(0)}') +PYTHON_CHECK + +success "✅ PyTorch installed successfully" + +# ==================== Install Dependencies ==================== +info "" +info "[3/4] Installing other dependencies..." +$PIP_CMD install -r requirements.txt +if [ $? -ne 0 ]; then + error "❌ Dependency installation failed!" + exit 1 +fi +success "✅ All dependencies installed" + +# ==================== Verify Installation ==================== +info "" +info "[4/4] Verifying installation..." +$PYTHON_CMD << 'VERIFY_CHECK' +import torch +import transformers +import numpy +import sklearn +print('✅ All packages imported successfully') +print(f' - PyTorch: {torch.__version__}') +print(f' - Transformers: {transformers.__version__}') +print(f' - NumPy: {numpy.__version__}') +VERIFY_CHECK + +if [ $? -ne 0 ]; then + error "❌ Verification failed!" + exit 1 +fi + +# Success message +echo "" +success "========================================" +success "✅ Installation Completed Successfully!" +success "========================================" +echo "" + +# Next steps +info "📝 Next Steps:" +echo " 1. Download BERT model from:" +echo " https://huggingface.co/bert-base-chinese" +echo " 2. Place model files in ./pretrained_bert folder" +echo " 3. Prepare your data in ./data folder" + +if [ "$ENV_TYPE" = "venv" ]; then + echo " 4. Activate environment: source venv/bin/activate" +else + echo " 4. Activate environment: conda activate $ENV_NAME" +fi + +echo " 5. Run: python main.py --mode train --data_dir ./data --pretrained_bert_dir ./pretrained_bert" + +if [ "$DEVICE" = "gpu" ]; then + echo "" + warning "💡 GPU Mode: Training will be significantly faster!" +fi + +echo "" From a110cc0717d0f51a534f5673849d1c73b38bedc3 Mon Sep 17 00:00:00 2001 From: adrienvon Date: Thu, 23 Oct 2025 16:54:39 +0800 Subject: [PATCH 2/2] modified: requirements.txt --- requirements.txt | 5 ----- 1 file changed, 5 deletions(-) diff --git a/requirements.txt b/requirements.txt index 0d6a56f..ecbc6c5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,3 @@ -# BERT Text Classification Requirements -# Python 3.7+ -# PyTorch 1.9.0: Install separately based on your device (CPU or GPU) -# See scripts/README.md for installation instructions - # Core ML libraries transformers==4.8.1 tokenizers==0.10.3