Skip to content

Commit f2b3035

Browse files
committed
fix(#43,#44,#45,#46): state filter, cron status, category dedup & translation
- #43: Use state query parameter instead of hardcoding 'live', only apply deadline filter for live campaigns - #44: Add /admin/cron-status endpoint exposing last crawl time/count/error for observability (infra fix: set ECS desired_count=1 separately) - #45: Add DB migration to merge duplicate category rows and delete non- canonical duplicates created by translation code - #46: Add TranslateCategories method and call it during cron syncCategories to translate all categories missing name_zh via Vertex AI
1 parent 9175c03 commit f2b3035

6 files changed

Lines changed: 175 additions & 6 deletions

File tree

backend/cmd/api/main.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ func main() {
104104

105105
if cronSvc != nil {
106106
api.POST("/admin/backfill", handler.TriggerBackfill(cronSvc))
107+
api.GET("/admin/cron-status", handler.CronStatus(cronSvc))
107108
}
108109
}
109110

backend/internal/db/db.go

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,41 @@ func Init(cfg *config.Config) error {
200200
}
201201
log.Printf("DB init: deduplicated %d snapshot rows", dedup.RowsAffected)
202202

203+
// Fix duplicate categories: the Vertex AI translation created new category rows
204+
// instead of updating existing ones. Merge name_zh from duplicates into originals
205+
// and delete the duplicates. Duplicates are identified by having the same name
206+
// but different IDs (originals use Kickstarter IDs like "1", "3", etc.).
207+
if err := DB.Exec(`
208+
DO $$
209+
BEGIN
210+
IF EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'categories') THEN
211+
-- Update originals with name_zh from duplicates (match by name, prefer non-empty name_zh)
212+
UPDATE categories orig
213+
SET name_zh = dup.name_zh
214+
FROM categories dup
215+
WHERE orig.name = dup.name
216+
AND orig.id <> dup.id
217+
AND (orig.name_zh IS NULL OR orig.name_zh = '')
218+
AND dup.name_zh IS NOT NULL
219+
AND dup.name_zh <> '';
220+
221+
-- Delete duplicate categories that don't use Kickstarter numeric IDs
222+
-- Original categories have short numeric IDs (1-999), duplicates have longer/encoded IDs
223+
DELETE FROM categories
224+
WHERE id IN (
225+
SELECT c2.id
226+
FROM categories c1
227+
JOIN categories c2 ON c1.name = c2.name AND c1.id <> c2.id
228+
WHERE LENGTH(c1.id) <= 3 AND c1.id ~ '^[0-9]+$'
229+
AND (LENGTH(c2.id) > 3 OR c2.id !~ '^[0-9]+$')
230+
);
231+
END IF;
232+
END
233+
$$;
234+
`).Error; err != nil {
235+
log.Printf("DB init: category dedup warning: %v", err)
236+
}
237+
203238
// NOW run AutoMigrate after all column renames are complete
204239
if err := DB.AutoMigrate(
205240
&model.Campaign{},

backend/internal/handler/campaigns.go

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ func ListCampaigns(client *service.KickstarterScrapingService) gin.HandlerFunc {
2626
sort := c.DefaultQuery("sort", "trending")
2727
categoryID := c.Query("category_id")
2828
cursor := c.Query("cursor")
29+
state := c.DefaultQuery("state", "live")
2930
limit, _ := strconv.Atoi(c.DefaultQuery("limit", "20"))
3031
if limit > 50 {
3132
limit = 50
@@ -58,10 +59,12 @@ func ListCampaigns(client *service.KickstarterScrapingService) gin.HandlerFunc {
5859
}
5960

6061
var campaigns []model.Campaign
61-
// Filter: state='live' AND deadline >= NOW() to exclude expired campaigns
62-
// Cron only upserts campaigns from discover pages (which only show live ones),
63-
// but never marks rows as ended when they disappear/expire.
64-
q := db.DB.Where("state = 'live' AND deadline >= ?", time.Now()).Offset(offset).Limit(limit + 1)
62+
q := db.DB.Where("state = ?", state).Offset(offset).Limit(limit + 1)
63+
64+
// Only filter by deadline for live campaigns
65+
if state == "live" {
66+
q = q.Where("deadline >= ?", time.Now())
67+
}
6568

6669
// Map sort to DB columns
6770
switch sort {

backend/internal/handler/health.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,23 @@ import (
44
"net/http"
55

66
"github.com/gin-gonic/gin"
7+
"github.com/kickwatch/backend/internal/service"
78
)
89

910
func Health(c *gin.Context) {
1011
c.JSON(http.StatusOK, gin.H{"status": "ok", "service": "kickwatch-api"})
1112
}
13+
14+
func CronStatus(cronSvc *service.CronService) gin.HandlerFunc {
15+
return func(c *gin.Context) {
16+
status := gin.H{
17+
"last_crawl_at": nil,
18+
"last_crawl_count": cronSvc.LastCrawlCount,
19+
"last_crawl_error": cronSvc.LastCrawlError,
20+
}
21+
if !cronSvc.LastCrawlAt.IsZero() {
22+
status["last_crawl_at"] = cronSvc.LastCrawlAt
23+
}
24+
c.JSON(http.StatusOK, status)
25+
}
26+
}

backend/internal/service/cron.go

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ type CronService struct {
3131
apnsClient *APNsClient
3232
translator *TranslatorService
3333
scheduler *cron.Cron
34+
LastCrawlAt time.Time
35+
LastCrawlCount int
36+
LastCrawlError string
3437
}
3538

3639
func NewCronService(db *gorm.DB, scrapingService *KickstarterScrapingService, apns *APNsClient, translator *TranslatorService) *CronService {
@@ -79,8 +82,8 @@ func (s *CronService) Stop() {
7982
s.scheduler.Stop()
8083
}
8184

82-
// syncCategories upserts the canonical category list into the DB so that
83-
// clients and alert filters always see the current IDs and subcategories.
85+
// syncCategories upserts the canonical category list into the DB and
86+
// translates any categories missing Chinese translations.
8487
func (s *CronService) syncCategories() {
8588
result := s.db.Clauses(clause.OnConflict{
8689
Columns: []clause.Column{{Name: "id"}},
@@ -91,6 +94,25 @@ func (s *CronService) syncCategories() {
9194
} else {
9295
log.Printf("Cron: synced %d categories", len(kickstarterCategories))
9396
}
97+
98+
// Translate any categories missing name_zh
99+
if s.translator != nil {
100+
var allCats []model.Category
101+
if err := s.db.Find(&allCats).Error; err != nil {
102+
log.Printf("Cron: fetch categories for translation error: %v", err)
103+
return
104+
}
105+
if err := s.translator.TranslateCategories(allCats); err != nil {
106+
log.Printf("Cron: category translation error: %v", err)
107+
return
108+
}
109+
// Update translated categories back to DB
110+
for _, cat := range allCats {
111+
if cat.NameZh != "" {
112+
s.db.Model(&model.Category{}).Where("id = ?", cat.ID).Update("name_zh", cat.NameZh)
113+
}
114+
}
115+
}
94116
}
95117

96118
// crawlSorts defines the sort strategies used in each nightly crawl pass.
@@ -171,6 +193,10 @@ func (s *CronService) RunCrawlNow() error {
171193
}
172194
log.Printf("Cron: crawl done, upserted %d campaigns", upserted)
173195

196+
s.LastCrawlAt = time.Now()
197+
s.LastCrawlCount = upserted
198+
s.LastCrawlError = ""
199+
174200
// Sanity check: a full crawl across all categories should always yield
175201
// at least some campaigns. Zero almost certainly means a parse failure
176202
// (e.g. Kickstarter changed their HTML structure), not a genuinely empty site.

backend/internal/service/translator.go

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,95 @@ func (t *TranslatorService) translateBatch(model *genai.GenerativeModel, campaig
188188
return nil
189189
}
190190

191+
// TranslateCategories translates category names that are missing Chinese translations.
192+
func (t *TranslatorService) TranslateCategories(categories []model.Category) error {
193+
var untranslated []model.Category
194+
for _, c := range categories {
195+
if c.NameZh == "" {
196+
untranslated = append(untranslated, c)
197+
}
198+
}
199+
if len(untranslated) == 0 {
200+
return nil
201+
}
202+
203+
model := t.client.GenerativeModel("gemini-2.0-flash-001")
204+
model.SetTemperature(0.3)
205+
206+
type catInput struct {
207+
ID string `json:"id"`
208+
Name string `json:"name"`
209+
}
210+
type catOutput struct {
211+
ID string `json:"id"`
212+
NameZh string `json:"name_zh"`
213+
}
214+
215+
const batchSize = 20
216+
for i := 0; i < len(untranslated); i += batchSize {
217+
end := i + batchSize
218+
if end > len(untranslated) {
219+
end = len(untranslated)
220+
}
221+
batch := untranslated[i:end]
222+
223+
inputs := make([]catInput, len(batch))
224+
for j, c := range batch {
225+
inputs[j] = catInput{ID: c.ID, Name: c.Name}
226+
}
227+
inputJSON, _ := json.MarshalIndent(inputs, "", " ")
228+
229+
prompt := fmt.Sprintf(`将以下 Kickstarter 分类名称翻译成中文。输出 JSON 数组,每个元素包含 id 和 name_zh。
230+
231+
输入:
232+
%s
233+
234+
直接输出 JSON 数组,不要有其他文字:`, string(inputJSON))
235+
236+
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
237+
resp, err := model.GenerateContent(ctx, genai.Text(prompt))
238+
cancel()
239+
if err != nil {
240+
log.Printf("Translator: category batch %d-%d error: %v", i, end-1, err)
241+
continue
242+
}
243+
244+
if len(resp.Candidates) == 0 || len(resp.Candidates[0].Content.Parts) == 0 {
245+
continue
246+
}
247+
248+
responseText := fmt.Sprintf("%v", resp.Candidates[0].Content.Parts[0])
249+
responseText = strings.TrimSpace(responseText)
250+
responseText = strings.TrimPrefix(responseText, "```json")
251+
responseText = strings.TrimPrefix(responseText, "```")
252+
responseText = strings.TrimSuffix(responseText, "```")
253+
responseText = strings.TrimSpace(responseText)
254+
255+
var outputs []catOutput
256+
if err := json.Unmarshal([]byte(responseText), &outputs); err != nil {
257+
log.Printf("Translator: failed to parse category response: %v", err)
258+
continue
259+
}
260+
261+
idMap := make(map[string]string)
262+
for _, out := range outputs {
263+
idMap[out.ID] = out.NameZh
264+
}
265+
266+
for j := range categories {
267+
if zh, ok := idMap[categories[j].ID]; ok && zh != "" {
268+
categories[j].NameZh = zh
269+
}
270+
}
271+
272+
log.Printf("Translator: translated %d category names", len(outputs))
273+
if end < len(untranslated) {
274+
time.Sleep(2 * time.Second)
275+
}
276+
}
277+
return nil
278+
}
279+
191280
// Close releases resources held by the translator.
192281
func (t *TranslatorService) Close() error {
193282
if t.client != nil {

0 commit comments

Comments
 (0)