# OLD (v1.1.3)
from brightdata import bdclient
client = bdclient(api_token="your_token")
# NEW (v2.0.0)
from brightdata import BrightDataClient
client = BrightDataClient(token="your_token")- Old: Flat API with methods directly on client (
client.scrape(),client.search()) - New: Hierarchical service-based API (
client.scrape.amazon.products(),client.search.google())
# OLD
client.scrape_linkedin.profiles(url)
client.search_linkedin.jobs()
# NEW
client.scrape.linkedin.profiles(url)
client.search.linkedin.jobs()- Old: Raw dictionaries and strings
- New: Structured
ScrapeResultandSearchResultobjects with metadata and timing metrics
- Old: Python 3.8+
- New: Python 3.9+ (dropped Python 3.8 support)
Old: Synchronous with ThreadPoolExecutor for concurrency
# Old approach - thread-based parallelism
with ThreadPoolExecutor(max_workers=10) as executor:
results = executor.map(self.scrape, urls)New: Native async/await throughout with sync wrappers
# New approach - native async
async def scrape_async(self, url):
async with self.engine:
return await self._execute_workflow(...)
# Sync wrapper for compatibility
def scrape(self, url):
return asyncio.run(self.scrape_async(url))Old: Monolithic bdclient class with all methods
New: Layered architecture with specialized services
BrightDataClient
├── scrape (ScrapeService)
│ ├── amazon (AmazonScraper)
│ ├── linkedin (LinkedInScraper)
│ └── instagram (InstagramScraper)
├── search (SearchService)
│ ├── google
│ ├── bing
│ └── yandex
└── crawler (CrawlService)
Old: Direct HTTP requests with immediate responses New: Trigger/Poll/Fetch workflow for long-running operations
# New workflow pattern
snapshot_id = await trigger(payload) # Start job
status = await poll_until_ready(snapshot_id) # Check progress
data = await fetch_results(snapshot_id) # Get results| Platform | Old SDK | New SDK | New Capabilities |
|---|---|---|---|
| Amazon | ❌ | ✅ | Products, Reviews, Sellers (separate datasets) |
| ✅ Basic | ✅ Full | Enhanced scraping and search methods | |
| ❌ | ✅ | Profiles, Posts, Comments, Reels | |
| ❌ | ✅ | Posts, Comments, Groups | |
| ChatGPT | ✅ Basic | ✅ Enhanced | Improved prompt interaction |
| Google Search | ✅ | ✅ Enhanced | Dedicated service with better structure |
| Bing/Yandex | ✅ | ✅ Enhanced | Separate service methods |
# New capability - fine-grained control over scraping jobs
job = await scraper.trigger(url)
# Do other work...
status = await job.status_async()
if status == "ready":
data = await job.fetch_async()# New - structured payloads with validation
from brightdata import AmazonProductPayload
payload = AmazonProductPayload(
url="https://amazon.com/dp/B123",
reviews_count=100
)
# Old - untyped dictionaries
payload = {"url": "...", "reviews_count": 100}# New - command-line interface
brightdata scrape amazon products --url https://amazon.com/dp/B123
brightdata search google --query "python sdk"
brightdata crawler discover --url https://example.com --depth 3
# Old - no CLI support# New - self-registering scrapers
@register("amazon")
class AmazonScraper(BaseWebScraper):
DATASET_ID = "gd_l7q7dkf244hwxbl93"- SDK function tracking via stack inspection
- Microsecond-precision timestamps for all operations
- Comprehensive cost tracking per platform
- Detailed timing metrics in results
- Old: New connection per request, basic session management
- New: Advanced connection pooling (100 total, 30 per host) with keep-alive
- Old: Thread-based with GIL limitations
- New: Event loop-based with true async concurrency
- Old: Basic cleanup with requests library
- New: Triple-layer cleanup strategy with context managers and idempotent operations
- Old: No built-in rate limiting
- New: Optional
AsyncLimiterintegration (10 req/sec default)
beautifulsoup4- Parsing moved to server-sideopenai- Not needed for ChatGPT scraping
tldextract- Domain extraction for registrypydantic- Data validation (optional)aiolimiter- Rate limiting supportclick- CLI framework
aiohttp>=3.8.0- Core async HTTP client (was using requests for sync)
# Supported in both old and new versions:
BRIGHTDATA_API_TOKEN=token
WEB_UNLOCKER_ZONE=zone
SERP_ZONE=zone
BROWSER_ZONE=zone
BRIGHTDATA_BROWSER_USERNAME=username
BRIGHTDATA_BROWSER_PASSWORD=password
# Note: Rate limiting is NOT configured via environment variable
# It must be set programmatically when creating the client# Old (v1.1.3)
client = bdclient(
api_token="token", # Required parameter name
auto_create_zones=True, # Default: True
web_unlocker_zone="sdk_unlocker", # Default from env or 'sdk_unlocker'
serp_zone="sdk_serp", # Default from env or 'sdk_serp'
browser_zone="sdk_browser", # Default from env or 'sdk_browser'
browser_username="username",
browser_password="password",
browser_type="playwright",
log_level="INFO",
structured_logging=True,
verbose=False
)
# New (v2.0.0)
client = BrightDataClient(
token="token", # Changed parameter name (was api_token)
customer_id="id", # New parameter (optional)
timeout=30, # New parameter (default: 30)
auto_create_zones=False, # Changed default: now False (was True)
web_unlocker_zone="web_unlocker1", # Changed default name
serp_zone="serp_api1", # Changed default name
browser_zone="browser_api1", # Changed default name
validate_token=False, # New parameter
rate_limit=10, # New parameter (optional)
rate_period=1.0 # New parameter (default: 1.0)
)
# Note: browser credentials and logging config removed from client init# Old
result = client.scrape(url, zone="my_zone", response_format="json")
# New (minimal change)
result = client.scrape_url(url, zone="my_zone", response_format="json")
# New (recommended - platform-specific)
result = client.scrape.amazon.products(url)# Old
profiles = client.scrape_linkedin.profiles(url)
jobs = client.search_linkedin.jobs(location="Paris")
# New
profiles = client.scrape.linkedin.profiles(url)
jobs = client.search.linkedin.jobs(location="Paris")# Old
results = client.search(query, search_engine="google")
# New
results = client.search.google(query)# Old (sync only)
result = client.scrape(url)
# New (async-first)
async def main():
async with BrightDataClient(token="...") as client:
result = await client.scrape_url_async(url)
# Or keep using sync
client = BrightDataClient(token="...")
result = client.scrape_url(url)Version 2.0.0 represents a complete rewrite of the Bright Data Python SDK, not an incremental update. The new architecture prioritizes:
- Modern Python patterns: Async-first with proper resource management
- Developer experience: Hierarchical APIs, type safety, CLI tools
- Production reliability: Comprehensive error handling, telemetry
- Platform coverage: All major platforms with specialized scrapers
- Flexibility: Three levels of control (simple, workflow, manual)
This is a breaking release requiring code changes. The migration effort is justified by:
- 10x improvement in concurrent operation handling
- 50+ new platform-specific methods
- Proper async support for modern applications
- Comprehensive timing and cost tracking
- Future-proof architecture for new platforms
- Update Python to 3.9+
- Update import statements from
bdclienttoBrightDataClient - Migrate to hierarchical API structure
- Update method calls to new naming convention
- Handle new
ScrapeResult/SearchResultreturn types - Consider async-first approach for better performance
- Review and update error handling for new exception types
- Test rate limiting configuration if needed
- Validate platform-specific scraper migrations