diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml new file mode 100644 index 0000000..0a08303 --- /dev/null +++ b/.github/workflows/publish-pypi.yml @@ -0,0 +1,43 @@ +name: Publish Python Package + +# This workflow is automatically triggered when a GitHub release is created +# by the main.yml workflow. This ensures that the PyPI package is published +# with the same version as the Docker image and GitHub release. +on: + release: + types: [created] + +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + - name: Set version from tag + run: | + # Extract version from the GitHub release tag (remove 'v' prefix if present) + # This tag is created by the main.yml workflow + VERSION=${GITHUB_REF#refs/tags/} + VERSION=${VERSION#v} + echo "RELEASE_VERSION=$VERSION" >> $GITHUB_ENV + echo "Using version: $VERSION from GitHub release" + - name: Update version in pyproject.toml + run: | + # Use sed to update the version in pyproject.toml to match the GitHub release + sed -i "s/version = \"[0-9]*\.[0-9]*\.[0-9]*\"/version = \"$RELEASE_VERSION\"/" pyproject.toml + cat pyproject.toml | grep version + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install build twine + - name: Build and publish + env: + TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} + TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} + run: | + python -m build + twine check dist/* + twine upload dist/* \ No newline at end of file diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..ec9e6bc --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,10 @@ +include LICENSE +include README.md +include cloudproxy/providers/user_data.sh +recursive-exclude cloudproxy-ui * +recursive-exclude tests * +recursive-exclude docs * +recursive-exclude .github * +recursive-exclude venv * +recursive-exclude __pycache__ * +recursive-exclude .pytest_cache * \ No newline at end of file diff --git a/README.md b/README.md index 1b50c38..fcd4899 100644 --- a/README.md +++ b/README.md @@ -80,11 +80,51 @@ To get a local copy up and running follow these simple steps. ### Prerequisites -All you need is: -* Docker +You can use CloudProxy in two ways: + +1. **Docker (recommended for running the service)** + * Docker installed on your system + +2. **Python Package (for development or integration)** + * Python 3.9 or higher ### Installation +#### As a Python Package + +CloudProxy is available as a Python package that you can install directly from PyPI: + +```bash +pip install cloudproxy +``` + +Or install from source: + +```bash +# Clone the repository +git clone https://github.com/claffin/cloudproxy.git +cd cloudproxy + +# Install in development mode +pip install -e . + +# Or build and install +pip install build +python -m build +pip install dist/cloudproxy-0.1.0-py3-none-any.whl +``` + +Once installed, you can import and use CloudProxy in your Python applications: + +```python +from cloudproxy.providers import manager +import cloudproxy.main as cloudproxy + +# Setup your environment variables first +# Start the CloudProxy service +cloudproxy.start() +``` + #### Environment variables: Basic authentication is used for proxy access. Configure via environment variables: @@ -157,6 +197,8 @@ proxies = { my_request = requests.get("https://api.ipify.org", proxies=proxies) ``` +For more detailed examples of using CloudProxy as a Python package, see the [Python Package Usage Guide](docs/python-package-usage.md). + ## Multi-Account Provider Support CloudProxy now supports multiple accounts per provider, allowing you to: diff --git a/cloudproxy/__main__.py b/cloudproxy/__main__.py new file mode 100644 index 0000000..775730c --- /dev/null +++ b/cloudproxy/__main__.py @@ -0,0 +1,5 @@ +#!/usr/bin/env python +from cloudproxy.main import start + +if __name__ == "__main__": + start() \ No newline at end of file diff --git a/docs/python-package-usage.md b/docs/python-package-usage.md new file mode 100644 index 0000000..ac76b2e --- /dev/null +++ b/docs/python-package-usage.md @@ -0,0 +1,408 @@ +# Using CloudProxy as a Python Package + +This guide explains how to install and use CloudProxy as a Python package in your own projects. + +## Installation + +CloudProxy can be installed directly from PyPI: + +```bash +pip install cloudproxy +``` + +Or from source: + +```bash +# Clone the repository +git clone https://github.com/claffin/cloudproxy.git +cd cloudproxy + +# Install in development mode +pip install -e . + +# Or build and install +pip install build +python -m build +pip install dist/cloudproxy-0.6.23-py3-none-any.whl +``` + +## Basic Usage + +### Starting the CloudProxy Service + +You can start the CloudProxy service programmatically in your application: + +```python +import os +from cloudproxy.providers import manager +import cloudproxy.main as cloudproxy + +# Set required environment variables +os.environ["PROXY_USERNAME"] = "your_username" +os.environ["PROXY_PASSWORD"] = "your_password" + +# Configure provider(s) +os.environ["DIGITALOCEAN_ENABLED"] = "True" +os.environ["DIGITALOCEAN_ACCESS_TOKEN"] = "your_digitalocean_token" + +# Start the CloudProxy service +cloudproxy.start() +``` + +### Using the API Programmatically + +Instead of starting the full service, you can use CloudProxy's functionality programmatically: + +```python +import os +from cloudproxy.providers import manager + +# Configure credentials +os.environ["DIGITALOCEAN_ENABLED"] = "True" +os.environ["DIGITALOCEAN_ACCESS_TOKEN"] = "your_digitalocean_token" + +# Initialize the provider manager +manager.init_schedule() + +# Get available proxies +proxies = manager.get_all_ips() +print(f"Available proxies: {proxies}") + +# Scale the number of proxies for a specific provider +manager.scaling_handler("digitalocean", min_scaling=3, max_scaling=5) +``` + +## Direct Proxy Access (Without the API) + +You can access and manage proxies directly from your code without starting the HTTP API server: + +```python +import os +import random +from cloudproxy.providers import manager + +# Configure environment +os.environ["PROXY_USERNAME"] = "your_username" +os.environ["PROXY_PASSWORD"] = "your_password" +os.environ["DIGITALOCEAN_ENABLED"] = "True" +os.environ["DIGITALOCEAN_ACCESS_TOKEN"] = "your_digitalocean_token" + +# Initialize CloudProxy infrastructure +manager.init_schedule() + +# Wait for proxies to be ready (simplified example) +import time +print("Waiting for proxies to be provisioned...") +for _ in range(30): # Wait up to 5 minutes + proxies = manager.get_all_ips() + if proxies: + break + time.sleep(10) + +# Get all available proxy IPs +all_ips = manager.get_all_ips() +print(f"Available proxies: {all_ips}") + +# Format a random proxy for use with requests +if all_ips: + # Select a random proxy IP + random_ip = random.choice(all_ips) + + # Format as a proxy URL with authentication + proxy_url = f"http://{os.environ['PROXY_USERNAME']}:{os.environ['PROXY_PASSWORD']}@{random_ip}:8899" + + # Use with requests + import requests + proxies = { + "http": proxy_url, + "https": proxy_url + } + response = requests.get("https://api.ipify.org?format=json", proxies=proxies) + print(f"Your IP is: {response.json()['ip']}") +``` + +### Accessing Provider-Specific Proxies + +You can also get proxies from specific providers or provider instances: + +```python +# Get all IPs from a specific provider +do_ips = manager.get_provider_ips("digitalocean") + +# Get IPs from a specific provider instance +do_secondary_ips = manager.get_provider_instance_ips("digitalocean", "secondary") + +# Get all provider configuration +providers_config = manager.get_config() +``` + +### Creating Formatted Proxy URLs + +CloudProxy stores the IPs of the proxy servers, but you need to format them correctly for use: + +```python +def format_proxy_url(ip, username, password, port=8899): + """Format a proxy IP into a URL with authentication""" + return f"http://{username}:{password}@{ip}:{port}" + +# Get a list of all formatted proxy URLs +username = os.environ.get("PROXY_USERNAME") +password = os.environ.get("PROXY_PASSWORD") +all_ips = manager.get_all_ips() + +proxy_urls = [format_proxy_url(ip, username, password) for ip in all_ips] +``` + +### Load Balancing Across Proxies + +You can implement a simple load balancing strategy: + +```python +class ProxyRotator: + """Simple proxy rotator for load balancing""" + def __init__(self, username, password): + self.username = username + self.password = password + self.current_index = 0 + self.proxies = [] + self.update_proxies() + + def update_proxies(self): + """Update the list of available proxies""" + all_ips = manager.get_all_ips() + self.proxies = [ + f"http://{self.username}:{self.password}@{ip}:8899" + for ip in all_ips + ] + + def get_next_proxy(self): + """Get the next proxy in the rotation""" + if not self.proxies: + self.update_proxies() + if not self.proxies: + return None + + if self.current_index >= len(self.proxies): + self.current_index = 0 + + proxy = self.proxies[self.current_index] + self.current_index += 1 + return proxy + + def get_proxy_dict(self): + """Get a proxy dictionary for requests""" + proxy = self.get_next_proxy() + if not proxy: + return {} + + return { + "http": proxy, + "https": proxy + } + +# Usage +rotator = ProxyRotator( + username=os.environ.get("PROXY_USERNAME"), + password=os.environ.get("PROXY_PASSWORD") +) + +# Make requests with rotating proxies +for url in urls_to_scrape: + proxies = rotator.get_proxy_dict() + response = requests.get(url, proxies=proxies) + # Process response... +``` + +## Integrating with Web Scraping Libraries + +### Using with Requests + +```python +import os +import requests + +# Setup your proxy credentials +username = os.getenv("PROXY_USERNAME", "your_username") +password = os.getenv("PROXY_PASSWORD", "your_password") + +# Function to get a proxy URL from CloudProxy +def get_proxy(): + response = requests.get("http://localhost:8000/random").json() + return response["proxy"]["url"] + +# Use the proxy with requests +def make_proxied_request(url): + proxy_url = get_proxy() + proxies = { + "http": proxy_url, + "https": proxy_url + } + return requests.get(url, proxies=proxies) + +# Example usage +response = make_proxied_request("https://api.ipify.org?format=json") +print(f"IP detected: {response.json()}") +``` + +### Using with Selenium + +```python +from selenium import webdriver +from selenium.webdriver.chrome.options import Options +import requests + +def get_proxy(): + response = requests.get("http://localhost:8000/random").json() + proxy_details = response["proxy"] + return proxy_details["ip"], proxy_details["port"] + +def setup_selenium_with_proxy(): + proxy_ip, proxy_port = get_proxy() + + options = Options() + options.add_argument(f'--proxy-server={proxy_ip}:{proxy_port}') + + # If proxy requires authentication + # You'll need to use a proxy authentication extension or plugin + + driver = webdriver.Chrome(options=options) + return driver + +# Example usage +driver = setup_selenium_with_proxy() +driver.get("https://www.whatismyip.com/") +# The page should show the IP of your proxy +``` + +## Advanced Usage + +### Managing Multiple Provider Instances + +CloudProxy supports multiple instances of the same provider, which allows you to use different API keys or configurations: + +```python +import os +from cloudproxy.providers import manager + +# Setup the first DigitalOcean instance +os.environ["DIGITALOCEAN_ENABLED"] = "True" +os.environ["DIGITALOCEAN_ACCESS_TOKEN"] = "first_token" +os.environ["DIGITALOCEAN_DEFAULT_REGION"] = "lon1" +os.environ["DIGITALOCEAN_DEFAULT_MIN_SCALING"] = "2" + +# Setup a second DigitalOcean instance +os.environ["DIGITALOCEAN_SECONDARY_ENABLED"] = "True" +os.environ["DIGITALOCEAN_SECONDARY_ACCESS_TOKEN"] = "second_token" +os.environ["DIGITALOCEAN_SECONDARY_REGION"] = "nyc1" +os.environ["DIGITALOCEAN_SECONDARY_MIN_SCALING"] = "3" + +# Initialize the manager +manager.init_schedule() + +# Get all proxies from the first instance +do_proxies = manager.get_provider_instance_ips("digitalocean", "default") + +# Get all proxies from the second instance +do_secondary_proxies = manager.get_provider_instance_ips("digitalocean", "secondary") +``` + +### Rotating Proxies Automatically + +You can set up automatic proxy rotation by configuring the `AGE_LIMIT` environment variable: + +```python +import os + +# Set proxies to be replaced after 3600 seconds (1 hour) +os.environ["AGE_LIMIT"] = "3600" + +# Then start CloudProxy as usual +from cloudproxy.providers import manager +import cloudproxy.main as cloudproxy + +manager.init_schedule() +cloudproxy.start() +``` + +## Environment Configuration Reference + +### Required Variables + +- `PROXY_USERNAME`, `PROXY_PASSWORD`: Authentication credentials for the proxy servers + - OR `ONLY_HOST_IP=True`: Restrict access to only the host IP + +### Provider-Specific Variables + +#### DigitalOcean +- `DIGITALOCEAN_ENABLED`: Set to "True" to enable +- `DIGITALOCEAN_ACCESS_TOKEN`: Your API token +- `DIGITALOCEAN_DEFAULT_REGION`: Region to deploy in (default: "lon1") +- `DIGITALOCEAN_DEFAULT_SIZE`: Droplet size (default: "s-1vcpu-1gb") +- `DIGITALOCEAN_DEFAULT_MIN_SCALING`: Minimum number of proxies (default: 2) +- `DIGITALOCEAN_DEFAULT_MAX_SCALING`: Maximum number of proxies (default: 2) + +#### AWS +- `AWS_ENABLED`: Set to "True" to enable +- `AWS_ACCESS_KEY`: Your AWS access key +- `AWS_SECRET_KEY`: Your AWS secret key +- `AWS_DEFAULT_REGION`: Region to deploy in (default: "eu-west-2") +- `AWS_DEFAULT_SIZE`: Instance type (default: "t2.micro") +- `AWS_DEFAULT_MIN_SCALING`: Minimum number of proxies (default: 2) +- `AWS_DEFAULT_MAX_SCALING`: Maximum number of proxies (default: 2) +- `AWS_DEFAULT_AMI`: AMI ID to use (default varies by region) +- `AWS_DEFAULT_SPOT`: Use spot instances (default: "false") + +#### Google Cloud Platform +- `GCP_ENABLED`: Set to "True" to enable +- `GCP_SERVICE_ACCOUNT`: Your GCP service account JSON +- `GCP_DEFAULT_ZONE`: Zone to deploy in (default: "europe-west2-a") +- `GCP_DEFAULT_REGION`: Region to deploy in (default: "europe-west2") +- `GCP_DEFAULT_SIZE`: Machine type (default: "e2-micro") +- `GCP_DEFAULT_MIN_SCALING`: Minimum number of proxies (default: 2) +- `GCP_DEFAULT_MAX_SCALING`: Maximum number of proxies (default: 2) +- `GCP_DEFAULT_PROJECT`: GCP project ID + +#### Hetzner +- `HETZNER_ENABLED`: Set to "True" to enable +- `HETZNER_API_TOKEN`: Your Hetzner API token +- `HETZNER_DEFAULT_LOCATION`: Location to deploy in (default: "nbg1") +- `HETZNER_DEFAULT_SIZE`: Server type (default: "cx11") +- `HETZNER_DEFAULT_MIN_SCALING`: Minimum number of proxies (default: 2) +- `HETZNER_DEFAULT_MAX_SCALING`: Maximum number of proxies (default: 2) + +## Troubleshooting + +### Common Issues + +#### "No proxies available" error +- Check that you've correctly configured your cloud provider credentials +- Verify that the minimum scaling is set correctly +- Allow enough time for proxy deployment (can take 1-3 minutes) + +#### Authentication failures +- Ensure `PROXY_USERNAME` and `PROXY_PASSWORD` are correctly set +- Avoid special characters in credentials that might cause URL encoding issues + +#### Proxies being blocked +- Consider increasing the `AGE_LIMIT` to rotate proxies more frequently +- Try using different cloud provider regions + +### Logging + +CloudProxy uses the `loguru` library for logging. You can configure it in your code: + +```python +import sys +from loguru import logger + +# Configure log level +logger.remove() +logger.add(sys.stderr, level="INFO") # Change to DEBUG for more detailed logs + +# Add file logging +logger.add("cloudproxy.log", rotation="10 MB") + +# Then initialize CloudProxy +from cloudproxy.providers import manager +manager.init_schedule() +``` \ No newline at end of file diff --git a/examples/direct_proxy_access.py b/examples/direct_proxy_access.py new file mode 100644 index 0000000..2f30d9c --- /dev/null +++ b/examples/direct_proxy_access.py @@ -0,0 +1,203 @@ +#!/usr/bin/env python +""" +Example script demonstrating how to use CloudProxy directly without the API +""" +import os +import time +import sys +import random +import requests +from loguru import logger + +# Configure logging +logger.remove() +logger.add(sys.stderr, level="INFO") +logger.add("direct_proxy_example.log", rotation="5 MB") + +def setup_environment(): + """Setup required environment variables""" + # Proxy authentication + os.environ["PROXY_USERNAME"] = "example_user" + os.environ["PROXY_PASSWORD"] = "example_pass" + + # Enable DigitalOcean provider + # Replace with your own API token in production + os.environ["DIGITALOCEAN_ENABLED"] = "True" + os.environ["DIGITALOCEAN_ACCESS_TOKEN"] = "your_digitalocean_token" + os.environ["DIGITALOCEAN_DEFAULT_MIN_SCALING"] = "1" + os.environ["DIGITALOCEAN_DEFAULT_MAX_SCALING"] = "3" + + # Set proxy rotation (optional) - rotate after 1 hour + os.environ["AGE_LIMIT"] = "3600" + + logger.info("Environment variables set up") + +def initialize_cloudproxy(): + """Initialize CloudProxy directly without starting the API server""" + from cloudproxy.providers import manager + + # Initialize the manager, which will start creating proxies based on min_scaling + logger.info("Initializing CloudProxy manager") + manager.init_schedule() + + # Wait for at least one proxy to be available + logger.info("Waiting for proxies to be available...") + max_wait = 180 # Maximum wait time of 3 minutes + start_time = time.time() + + while time.time() - start_time < max_wait: + proxies = manager.get_all_ips() + if proxies: + logger.info(f"Proxies are ready: {proxies}") + return True + logger.info("No proxies available yet, waiting 10 seconds...") + time.sleep(10) + + logger.error("Timed out waiting for proxies") + return False + +class ProxyRotator: + """Class to manage and rotate proxies""" + def __init__(self): + from cloudproxy.providers import manager + self.manager = manager + self.username = os.environ.get("PROXY_USERNAME") + self.password = os.environ.get("PROXY_PASSWORD") + self.port = 8899 # Default port for CloudProxy proxies + self.current_index = 0 + self.proxies = [] + self.update_proxies() + + def update_proxies(self): + """Get the latest list of available proxies""" + ips = self.manager.get_all_ips() + self.proxies = [ + f"http://{self.username}:{self.password}@{ip}:{self.port}" + for ip in ips + ] + logger.info(f"Updated proxy list, {len(self.proxies)} proxies available") + + def get_random_proxy(self): + """Get a random proxy from the available proxies""" + if not self.proxies: + self.update_proxies() + + if not self.proxies: + logger.warning("No proxies available") + return None + + return random.choice(self.proxies) + + def get_next_proxy(self): + """Get the next proxy in the rotation""" + if not self.proxies: + self.update_proxies() + + if not self.proxies: + logger.warning("No proxies available") + return None + + if self.current_index >= len(self.proxies): + self.current_index = 0 + + proxy = self.proxies[self.current_index] + self.current_index += 1 + return proxy + + def get_proxy_dict(self, proxy_url=None): + """Convert a proxy URL to a requests proxy dictionary""" + if proxy_url is None: + proxy_url = self.get_next_proxy() + + if not proxy_url: + return {} + + return { + "http": proxy_url, + "https": proxy_url + } + + def get_all_providers(self): + """Get information about all providers""" + return self.manager.get_config() + + def scale_provider(self, provider, min_scaling, max_scaling): + """Scale a specific provider""" + self.manager.scaling_handler(provider, min_scaling, max_scaling) + logger.info(f"Scaled {provider} to min:{min_scaling}, max:{max_scaling}") + +def test_requests(rotator): + """Test making requests through the proxy""" + urls = [ + "https://api.ipify.org?format=json", + "https://httpbin.org/ip", + "https://icanhazip.com" + ] + + for url in urls: + try: + # Get a proxy + proxy_dict = rotator.get_proxy_dict() + + if not proxy_dict: + logger.error("No proxy available for testing") + continue + + # Make the request + logger.info(f"Making request to {url} through {proxy_dict['http']}") + response = requests.get(url, proxies=proxy_dict, timeout=10) + + if response.status_code == 200: + logger.info(f"Request successful: {response.text.strip()}") + else: + logger.error(f"Request failed with status code {response.status_code}") + + except Exception as e: + logger.exception(f"Error making request to {url}: {str(e)}") + +def demonstrate_provider_management(rotator): + """Demonstrate managing providers directly""" + # Get all providers + providers = rotator.get_all_providers() + for provider, config in providers.items(): + if config.get("enabled"): + logger.info(f"Provider {provider} is enabled") + logger.info(f" Current scaling: min={config.get('scaling', {}).get('min_scaling')}, max={config.get('scaling', {}).get('max_scaling')}") + logger.info(f" IPs: {config.get('ips', [])}") + + # Scale a provider + logger.info("Scaling DigitalOcean to min:2, max:4") + rotator.scale_provider("digitalocean", 2, 4) + + # Get updated configuration + updated_providers = rotator.get_all_providers() + digitalocean = updated_providers.get("digitalocean", {}) + logger.info(f"Updated DigitalOcean configuration: {digitalocean}") + +def main(): + """Main function""" + logger.info("Starting direct proxy access example") + + # Setup environment variables + setup_environment() + + # Initialize CloudProxy without the API server + if not initialize_cloudproxy(): + logger.error("Failed to initialize CloudProxy") + return + + # Create proxy rotator + rotator = ProxyRotator() + + # Test making requests through the proxies + logger.info("Testing requests through proxies") + test_requests(rotator) + + # Demonstrate provider management + logger.info("Demonstrating provider management") + demonstrate_provider_management(rotator) + + logger.info("Example completed") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/examples/package_usage.py b/examples/package_usage.py new file mode 100644 index 0000000..33895b6 --- /dev/null +++ b/examples/package_usage.py @@ -0,0 +1,181 @@ +#!/usr/bin/env python +""" +Example script demonstrating how to use CloudProxy as a Python package +""" +import os +import time +import sys +import requests +from loguru import logger + +# Configure logging +logger.remove() +logger.add(sys.stderr, level="INFO") +logger.add("cloudproxy_example.log", rotation="5 MB") + +# Set required environment variables +def setup_environment(): + """Setup required environment variables""" + # Proxy authentication + os.environ["PROXY_USERNAME"] = "example_user" + os.environ["PROXY_PASSWORD"] = "example_pass" + + # Enable DigitalOcean provider + # Replace with your own API token in production + os.environ["DIGITALOCEAN_ENABLED"] = "True" + os.environ["DIGITALOCEAN_ACCESS_TOKEN"] = "your_digitalocean_token" + os.environ["DIGITALOCEAN_DEFAULT_MIN_SCALING"] = "1" + os.environ["DIGITALOCEAN_DEFAULT_MAX_SCALING"] = "3" + + # Set proxy rotation (optional) - rotate after 1 hour + os.environ["AGE_LIMIT"] = "3600" + + logger.info("Environment variables set up") + +def initialize_cloudproxy(): + """Initialize CloudProxy programmatically""" + from cloudproxy.providers import manager + + # Initialize the manager, which will start creating proxies based on min_scaling + logger.info("Initializing CloudProxy manager") + manager.init_schedule() + + # Wait for at least one proxy to be available + # In production, you might want to implement a retry mechanism or queue + logger.info("Waiting for proxies to be available...") + max_wait = 180 # Maximum wait time of 3 minutes + start_time = time.time() + + while time.time() - start_time < max_wait: + proxies = manager.get_all_ips() + if proxies: + logger.info(f"Proxies are ready: {proxies}") + return True + logger.info("No proxies available yet, waiting 10 seconds...") + time.sleep(10) + + logger.error("Timed out waiting for proxies") + return False + +def start_api_server(): + """Start the FastAPI server to expose the API endpoints""" + import cloudproxy.main as cloudproxy + import threading + + # Start the API server in a background thread + logger.info("Starting CloudProxy API server") + api_thread = threading.Thread(target=cloudproxy.start, daemon=True) + api_thread.start() + + # Give the server time to start + time.sleep(3) + logger.info("API server started") + return api_thread + +def test_proxy(): + """Test a random proxy by making a request to ipify.org""" + try: + # Get a random proxy from the CloudProxy API + response = requests.get("http://localhost:8000/random") + if response.status_code != 200: + logger.error(f"Failed to get a random proxy: {response.text}") + return False + + proxy_data = response.json() + proxy_url = proxy_data["proxy"]["url"] + logger.info(f"Using proxy: {proxy_url}") + + # Use the proxy to make a request to ipify.org + proxies = { + "http": proxy_url, + "https": proxy_url + } + + ip_response = requests.get("https://api.ipify.org?format=json", proxies=proxies) + if ip_response.status_code == 200: + ip_data = ip_response.json() + logger.info(f"Request successful - IP address: {ip_data['ip']}") + return True + else: + logger.error(f"Request failed: {ip_response.status_code}") + return False + + except Exception as e: + logger.exception(f"Error testing proxy: {str(e)}") + return False + +def list_all_proxies(): + """List all available proxies""" + try: + response = requests.get("http://localhost:8000/") + if response.status_code == 200: + proxy_data = response.json() + logger.info(f"Total proxies: {proxy_data['total']}") + for i, proxy in enumerate(proxy_data['proxies']): + logger.info(f"Proxy {i+1}: {proxy['ip']}:{proxy['port']}") + return True + else: + logger.error(f"Failed to get proxies: {response.status_code}") + return False + except Exception as e: + logger.exception(f"Error listing proxies: {str(e)}") + return False + +def programmatic_management(): + """Demonstrate programmatic management of proxies""" + from cloudproxy.providers import manager + + # Get all IPs + all_ips = manager.get_all_ips() + logger.info(f"All IPs: {all_ips}") + + # Get provider-specific IPs + do_ips = manager.get_provider_ips("digitalocean") + logger.info(f"DigitalOcean IPs: {do_ips}") + + # Update scaling + logger.info("Updating scaling for DigitalOcean") + manager.scaling_handler("digitalocean", min_scaling=2, max_scaling=4) + + # Get updated provider configuration + providers = manager.get_config() + do_config = providers.get("digitalocean", {}) + logger.info(f"Updated DigitalOcean configuration: {do_config}") + +def main(): + """Main function""" + logger.info("Starting CloudProxy example script") + + # Setup environment variables + setup_environment() + + # Initialize CloudProxy + if not initialize_cloudproxy(): + logger.error("Failed to initialize CloudProxy") + return + + # Start the API server + api_thread = start_api_server() + + # Run examples + logger.info("Testing proxy functionality") + test_proxy() + + logger.info("Listing all available proxies") + list_all_proxies() + + logger.info("Demonstrating programmatic management") + programmatic_management() + + logger.info("Example completed") + + # Keep the script running to maintain the API server + # In a real application, this would be part of your main program logic + try: + while api_thread.is_alive(): + time.sleep(1) + except KeyboardInterrupt: + logger.info("Shutting down") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..020d8eb --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,62 @@ +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "cloudproxy" +version = "0.6.23" +authors = [ + { name = "Christian Laffin", email = "christian.laffin@gmail.com" }, +] +description = "A tool to manage cloud-based proxies for scraping" +readme = "README.md" +requires-python = ">=3.9" +license = { file = "LICENSE" } +classifiers = [ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Topic :: Internet :: Proxy Servers", + "Topic :: System :: Networking", +] +dependencies = [ + "requests>=2.32.2", + "apscheduler>=3.10.4", + "dateparser>=1.2.0", + "fastapi>=0.110.0", + "loguru>=0.7.2", + "python-dotenv>=1.0.1", + "uvicorn>=0.27.1", + "uvicorn-loguru-integration>=0.3.1", + "python-digitalocean>=1.17.0", + "boto3>=1.34.69", + "urllib3>=2.2.2", + "aiofiles>=23.2.1", + "botocore>=1.34.69", + "hcloud>=2.3.0", + "google-api-python-client>=2.122.0", + "anyio>=3.7.1", + "starlette>=0.36.3", +] + +[project.urls] +"Homepage" = "https://github.com/claffin/cloudproxy" +"Bug Tracker" = "https://github.com/claffin/cloudproxy/issues" + +[project.optional-dependencies] +test = [ + "pytest>=8.0.2", + "pytest-cov>=4.1.0", + "pytest-mock>=3.12.0", + "httpx>=0.27.0", +] + +[project.scripts] +cloudproxy = "cloudproxy.main:start" + +[tool.setuptools.packages.find] +where = ["."] +exclude = ["cloudproxy-ui*", "tests*", "docs*", ".github*", "venv*"] + +[tool.setuptools.package-data] +cloudproxy = ["providers/user_data.sh"] \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..57eb283 --- /dev/null +++ b/setup.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python +from setuptools import setup + +if __name__ == "__main__": + try: + setup(name="cloudproxy") + except: # noqa + print( + "An error occurred during setup; please ensure that setuptools is installed." + ) + raise \ No newline at end of file