|
1 | 1 | # Bright Data Python SDK Changelog |
2 | 2 |
|
| 3 | +## Version 2.1.0 - Async Mode, API Simplification & Bug Fixes |
| 4 | + |
| 5 | +### ✨ New Features |
| 6 | + |
| 7 | +#### SERP Async Mode |
| 8 | + |
| 9 | +Added non-blocking async mode for SERP API using Bright Data's unblocker endpoints: |
| 10 | + |
| 11 | +```python |
| 12 | +from brightdata import BrightDataClient |
| 13 | + |
| 14 | +async with BrightDataClient() as client: |
| 15 | + # Non-blocking - polls for results |
| 16 | + result = await client.search.google( |
| 17 | + query="python programming", |
| 18 | + mode="async", # Enable async mode |
| 19 | + poll_interval=2, # Check every 2 seconds |
| 20 | + poll_timeout=30 # Give up after 30 seconds |
| 21 | + ) |
| 22 | +``` |
| 23 | + |
| 24 | +**Supported Engines:** Google, Bing, Yandex |
| 25 | + |
| 26 | +**Performance:** SERP async mode typically completes in ~3 seconds. |
| 27 | + |
| 28 | +#### Web Unlocker Async Mode |
| 29 | + |
| 30 | +Added non-blocking async mode for Web Unlocker API: |
| 31 | + |
| 32 | +```python |
| 33 | +async with BrightDataClient() as client: |
| 34 | + result = await client.scrape_url( |
| 35 | + url="https://example.com", |
| 36 | + mode="async", |
| 37 | + poll_interval=5, # Check every 5 seconds |
| 38 | + poll_timeout=180 # Web Unlocker async takes ~2 minutes |
| 39 | + ) |
| 40 | + |
| 41 | + # Batch scraping multiple URLs |
| 42 | + urls = ["https://example.com", "https://example.org"] |
| 43 | + results = await client.scrape_url(url=urls, mode="async", poll_timeout=180) |
| 44 | +``` |
| 45 | + |
| 46 | +**Performance Warning:** Web Unlocker async mode takes ~2 minutes to complete. For faster single-URL scraping, use the default sync mode. |
| 47 | + |
| 48 | +**How async mode works:** |
| 49 | +1. Triggers request to `/unblocker/req` (returns immediately) |
| 50 | +2. Polls `/unblocker/get_result` until ready or timeout |
| 51 | +3. Returns same data structure as sync mode |
| 52 | + |
| 53 | +**Key Benefits:** |
| 54 | +- ✅ Non-blocking requests - continue work while scraping |
| 55 | +- ✅ Batch optimization - trigger multiple URLs, collect later |
| 56 | +- ✅ Same data structure as sync mode |
| 57 | +- ✅ **No extra configuration** - works with existing zones |
| 58 | +- ✅ **No customer_id required** - derived from API token |
| 59 | + |
| 60 | +**See:** [Async Mode Guide](docs/async_mode_guide.md) for detailed usage |
| 61 | + |
| 62 | +### 🐛 Bug Fixes |
| 63 | + |
| 64 | +- **Fixed SyncBrightDataClient**: Removed unused `customer_id` parameter that was incorrectly being passed to `BrightDataClient` |
| 65 | +- **Fixed Web Unlocker async timeout**: Changed default `poll_timeout` from 30s to 180s (Web Unlocker async takes ~145 seconds) |
| 66 | + |
| 67 | +### 🚨 Breaking Changes |
| 68 | + |
| 69 | +#### Removed GenericScraper |
| 70 | +```python |
| 71 | +# OLD (v2.0.0) |
| 72 | +result = await client.scrape.generic.url("https://example.com") |
| 73 | + |
| 74 | +# NEW (v2.1.0) - Use scrape_url() directly |
| 75 | +result = await client.scrape_url("https://example.com") |
| 76 | +``` |
| 77 | + |
| 78 | +#### Async Method Naming Convention |
| 79 | +The `_async` suffix has been removed. Now `method()` is async by default, and `method_sync()` is the synchronous version. |
| 80 | + |
| 81 | +```python |
| 82 | +# OLD (v2.0.0) |
| 83 | +result = await scraper.products_async(url) |
| 84 | +await job.wait_async() |
| 85 | +data = await job.fetch_async() |
| 86 | + |
| 87 | +# NEW (v2.1.0) |
| 88 | +result = await scraper.products(url) |
| 89 | +await job.wait() |
| 90 | +data = await job.fetch() |
| 91 | +``` |
| 92 | + |
| 93 | +#### CLI Command Change |
| 94 | +```bash |
| 95 | +# OLD |
| 96 | +brightdata scrape generic --url https://example.com |
| 97 | + |
| 98 | +# NEW |
| 99 | +brightdata scrape url --url https://example.com |
| 100 | +``` |
| 101 | + |
| 102 | +### ✨ New Features |
| 103 | + |
| 104 | +#### Complete SyncBrightDataClient |
| 105 | +Added comprehensive `sync_client.py` with full coverage for all scrapers: |
| 106 | + |
| 107 | +```python |
| 108 | +from brightdata import SyncBrightDataClient |
| 109 | + |
| 110 | +with SyncBrightDataClient() as client: |
| 111 | + # All methods work synchronously |
| 112 | + result = client.scrape.amazon.products(url) |
| 113 | + result = client.scrape.linkedin.profiles(url) |
| 114 | + result = client.search.google("query") |
| 115 | +``` |
| 116 | + |
| 117 | +**Supported sync wrappers:** |
| 118 | +- `SyncAmazonScraper` - products, reviews, sellers (+ trigger/status/fetch) |
| 119 | +- `SyncLinkedInScraper` - profiles, jobs, companies, posts |
| 120 | +- `SyncInstagramScraper` - profiles, posts, comments, reels |
| 121 | +- `SyncFacebookScraper` - posts_by_profile, posts_by_group, comments, reels |
| 122 | +- `SyncChatGPTScraper` - prompt, prompts |
| 123 | +- `SyncSearchService` - google, bing, yandex |
| 124 | +- `SyncCrawlerService` - crawl, scrape |
| 125 | + |
| 126 | +#### Context Manager Enforcement |
| 127 | +Client methods now require proper context manager initialization: |
| 128 | + |
| 129 | +```python |
| 130 | +# Correct usage |
| 131 | +async with BrightDataClient() as client: |
| 132 | + result = await client.scrape_url(url) |
| 133 | + |
| 134 | +# Will raise RuntimeError |
| 135 | +client = BrightDataClient() |
| 136 | +result = await client.scrape_url(url) # Error: not initialized |
| 137 | +``` |
| 138 | + |
| 139 | +### 🔄 Migration Guide |
| 140 | + |
| 141 | +#### Method Renames |
| 142 | +| Old (v2.0.0) | New (v2.1.0) | |
| 143 | +|--------------|--------------| |
| 144 | +| `products_async()` | `products()` | |
| 145 | +| `reviews_async()` | `reviews()` | |
| 146 | +| `profiles_async()` | `profiles()` | |
| 147 | +| `jobs_async()` | `jobs()` | |
| 148 | +| `wait_async()` | `wait()` | |
| 149 | +| `fetch_async()` | `fetch()` | |
| 150 | +| `to_result_async()` | `to_result()` | |
| 151 | +| `status_async()` | `status()` | |
| 152 | +| `scrape.generic.url()` | `scrape_url()` | |
| 153 | + |
| 154 | +#### Quick Migration |
| 155 | +```bash |
| 156 | +# Find and replace in your codebase: |
| 157 | +_async() → () |
| 158 | +scrape.generic.url → scrape_url |
| 159 | +``` |
| 160 | + |
| 161 | +### 📚 Documentation |
| 162 | +- Added [Async Mode Guide](docs/async_mode_guide.md) - comprehensive guide to async mode |
| 163 | +- Simplified README with clearer examples |
| 164 | +- Updated all examples and tests to use new naming convention |
| 165 | + |
| 166 | +### 🧪 Testing |
| 167 | +- Added unit tests for `AsyncUnblockerClient` |
| 168 | +- Added integration tests for SERP and Web Unlocker async modes |
| 169 | +- Verified backwards compatibility (existing code works unchanged) |
| 170 | + |
| 171 | +--- |
| 172 | + |
3 | 173 | ## Version 2.0.0 - Complete Architecture Rewrite |
4 | 174 |
|
5 | 175 | ### 🚨 Breaking Changes |
@@ -50,14 +220,14 @@ with ThreadPoolExecutor(max_workers=10) as executor: |
50 | 220 |
|
51 | 221 | **New**: Native async/await throughout with sync wrappers |
52 | 222 | ```python |
53 | | -# New approach - native async |
54 | | -async def scrape_async(self, url): |
| 223 | +# New approach - native async (method() is async by default) |
| 224 | +async def products(self, url): |
55 | 225 | async with self.engine: |
56 | 226 | return await self._execute_workflow(...) |
57 | 227 |
|
58 | | -# Sync wrapper for compatibility |
59 | | -def scrape(self, url): |
60 | | - return asyncio.run(self.scrape_async(url)) |
| 228 | +# Sync client uses persistent event loop |
| 229 | +with SyncBrightDataClient() as client: |
| 230 | + result = client.scrape.amazon.products(url) |
61 | 231 | ``` |
62 | 232 |
|
63 | 233 | #### 2. Service-Based Architecture |
@@ -102,11 +272,11 @@ data = await fetch_results(snapshot_id) # Get results |
102 | 272 | #### 2. Manual Job Control |
103 | 273 | ```python |
104 | 274 | # New capability - fine-grained control over scraping jobs |
105 | | -job = await scraper.trigger(url) |
| 275 | +job = await scraper.products_trigger(url) |
106 | 276 | # Do other work... |
107 | | -status = await job.status_async() |
| 277 | +status = await job.status() |
108 | 278 | if status == "ready": |
109 | | - data = await job.fetch_async() |
| 279 | + data = await job.fetch() |
110 | 280 | ``` |
111 | 281 |
|
112 | 282 | #### 3. Type-Safe Payloads (Dataclasses) |
@@ -270,11 +440,11 @@ result = client.scrape(url) |
270 | 440 | # New (async-first) |
271 | 441 | async def main(): |
272 | 442 | async with BrightDataClient(token="...") as client: |
273 | | - result = await client.scrape_url_async(url) |
| 443 | + result = await client.scrape_url(url) |
274 | 444 |
|
275 | | -# Or keep using sync |
276 | | -client = BrightDataClient(token="...") |
277 | | -result = client.scrape_url(url) |
| 445 | +# Or use sync client |
| 446 | +with SyncBrightDataClient(token="...") as client: |
| 447 | + result = client.scrape_url(url) |
278 | 448 | ``` |
279 | 449 |
|
280 | 450 |
|
|
0 commit comments