-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlambda_function.py
More file actions
154 lines (128 loc) · 4.8 KB
/
lambda_function.py
File metadata and controls
154 lines (128 loc) · 4.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
"""
AWS Lambda function for AI SEO optimization - llms.txt checker and generator.
"""
import json
import sys
import os
# Add src directory to path for imports
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
from src.llms_txt_checker import LLMSTxtChecker
from src.llms_txt_validator import LLMSTxtValidator
from src.website_crawler import WebsiteCrawler
from src.llms_txt_generator import LLMSTxtGenerator
def lambda_handler(event, context):
"""
AWS Lambda handler function.
Expected event structure:
{
"url": "https://example.com",
"options": {
"max_pages": 20, # optional
"timeout": 10 # optional
}
}
Returns:
{
"statusCode": 200,
"body": {
"url": "https://example.com",
"llms_txt_exists": true/false,
"validation": {...}, # if exists
"generated_llms_txt": "...", # if doesn't exist
"recommendations": [...]
}
}
"""
try:
# Parse input
if isinstance(event.get('body'), str):
body = json.loads(event['body'])
else:
body = event
url = body.get('url')
if not url:
return {
'statusCode': 400,
'body': json.dumps({
'error': 'Missing required parameter: url'
})
}
options = body.get('options', {})
max_pages = options.get('max_pages', 20)
timeout = options.get('timeout', 10)
# Initialize components
checker = LLMSTxtChecker(timeout=timeout)
validator = LLMSTxtValidator()
crawler = WebsiteCrawler(max_pages=max_pages, timeout=timeout)
generator = LLMSTxtGenerator()
# Step 1: Check if llms.txt exists
check_result = checker.check_llms_txt(url)
response_data = {
'url': url,
'llms_txt_exists': check_result['exists'],
'llms_txt_url': check_result.get('url')
}
if check_result['exists']:
# Step 2a: Validate existing llms.txt
content = check_result.get('content', '')
validation_result = validator.validate(content)
response_data['validation'] = {
'valid': validation_result['valid'],
'score': validation_result['score'],
'issues': validation_result['issues'],
'warnings': validation_result['warnings'],
'sections_found': validation_result['sections_found'],
'links_count': len(validation_result['links_found']),
'has_optional_section': validation_result['has_optional_section']
}
response_data['current_llms_txt'] = content
# Generate recommendations for improvement
site_info = crawler.crawl(url)
response_data['recommendations'] = generator.generate_recommendations(
site_info,
validation_result
)
else:
# Step 2b: Generate new llms.txt
site_info = crawler.crawl(url)
if 'error' in site_info:
return {
'statusCode': 500,
'body': json.dumps({
'error': f'Failed to crawl website: {site_info["error"]}'
})
}
generated_content = generator.generate(site_info)
response_data['generated_llms_txt'] = generated_content
response_data['site_info'] = {
'title': site_info.get('title'),
'description': site_info.get('description'),
'documentation_links_found': len(site_info.get('documentation_links', [])),
'navigation_links_found': len(site_info.get('navigation_links', []))
}
# Generate recommendations
response_data['recommendations'] = generator.generate_recommendations(site_info)
return {
'statusCode': 200,
'headers': {
'Content-Type': 'application/json',
'Access-Control-Allow-Origin': '*'
},
'body': json.dumps(response_data, indent=2)
}
except Exception as e:
return {
'statusCode': 500,
'body': json.dumps({
'error': str(e),
'type': type(e).__name__
})
}
# For local testing
if __name__ == '__main__':
# Test event
test_event = {
'url': 'https://fastht.ml'
}
result = lambda_handler(test_event, None)
print(json.dumps(json.loads(result['body']), indent=2))