diff --git a/.github/workflows/daily-update.yml b/.github/workflows/daily-update.yml new file mode 100644 index 0000000..580bb01 --- /dev/null +++ b/.github/workflows/daily-update.yml @@ -0,0 +1,112 @@ +# This workflow scrapes, parses, and uploads data to MongoDB on a daily basis + +name: Daily update + +on: + workflow_dispatch: + schedule: + - cron: '10 1 * * *' + pull_request: # temporary until merged + +jobs: + + astra: + runs-on: ubuntu-latest + steps: + + - name: Install Chromium + run: sudo apt-get install -y chromium-browser + + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: '1.23' + + - name: Fill env for dev + run: | + cp .env.template .env + sed -i 's#LOGIN_ASTRA_USERNAME=#LOGIN_ASTRA_USERNAME=${{ secrets.LOGIN_ASTRA_USERNAME }}#' .env + sed -i 's#LOGIN_ASTRA_PASSWORD=#LOGIN_ASTRA_PASSWORD=${{ secrets.LOGIN_ASTRA_PASSWORD }}#' .env + sed -i 's#MAZEVO_API_KEY=#MAZEVO_API_KEY=${{ secrets.MAZEVO_API_KEY }}#' .env + sed -i 's#MONGODB_URI=#MONGODB_URI=${{ secrets.MONGODB_URI_DEV }}#' .env + + - name: Setup + run: make setup + + - name: Check + run: make check + + - name: Build + run: make build + + - name: Scrape Astra + run: ./api-tools -scrape -astra -verbose + +# - name: Parse Astra +# run: ./api-tools -parse -astra + +# - name: Upload Astra +# run: ./api-tools -upload -events + + - name: Fill env for prod + run: | + cp .env.template .env + sed -i 's#LOGIN_ASTRA_USERNAME=#LOGIN_ASTRA_USERNAME=${{ secrets.LOGIN_ASTRA_USERNAME }}#' .env + sed -i 's#LOGIN_ASTRA_PASSWORD=#LOGIN_ASTRA_PASSWORD=${{ secrets.LOGIN_ASTRA_PASSWORD }}#' .env + sed -i 's#MAZEVO_API_KEY=#MAZEVO_API_KEY=${{ secrets.MAZEVO_API_KEY }}#' .env + sed -i 's#MONGODB_URI=#MONGODB_URI=${{ secrets.MONGODB_URI_PROD }}#' .env + +# - name: Upload Astra +# run: ./api-tools -upload -events + + mazevo: + runs-on: ubuntu-latest + steps: + + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: '1.23' + + - name: Fill env for dev + run: | + cp .env.template .env + sed -i 's#LOGIN_ASTRA_USERNAME=#LOGIN_ASTRA_USERNAME=${{ secrets.LOGIN_ASTRA_USERNAME }}#' .env + sed -i 's#LOGIN_ASTRA_PASSWORD=#LOGIN_ASTRA_PASSWORD=${{ secrets.LOGIN_ASTRA_PASSWORD }}#' .env + sed -i 's#MAZEVO_API_KEY=#MAZEVO_API_KEY=${{ secrets.MAZEVO_API_KEY }}#' .env + sed -i 's#MONGODB_URI=#MONGODB_URI=${{ secrets.MONGODB_URI_DEV }}#' .env + + - name: Setup + run: make setup + + - name: Check + run: make check + + - name: Build + run: make build + + - name: Scrape Mazevo + run: ./api-tools -scrape -mazevo + +# - name: Parse Mazevo +# run: ./api-tools -parse -mazevo + +# - name: Upload Mazevo +# run: ./api-tools -upload -events + + - name: Fill env for prod + run: | + cp .env.template .env + sed -i 's#LOGIN_ASTRA_USERNAME=#LOGIN_ASTRA_USERNAME=${{ secrets.LOGIN_ASTRA_USERNAME }}#' .env + sed -i 's#LOGIN_ASTRA_PASSWORD=#LOGIN_ASTRA_PASSWORD=${{ secrets.LOGIN_ASTRA_PASSWORD }}#' .env + sed -i 's#MAZEVO_API_KEY=#MAZEVO_API_KEY=${{ secrets.MAZEVO_API_KEY }}#' .env + sed -i 's#MONGODB_URI=#MONGODB_URI=${{ secrets.MONGODB_URI_PROD }}#' .env + +# - name: Upload Mazevo +# run: ./api-tools -upload -events diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 6422e42..ca57a2b 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -17,10 +17,10 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up Go - uses: actions/setup-go@v4 + uses: actions/setup-go@v5 with: go-version: '1.23' diff --git a/scrapers/astra.go b/scrapers/astra.go index d2cf5b1..b00730e 100644 --- a/scrapers/astra.go +++ b/scrapers/astra.go @@ -26,17 +26,21 @@ func ScrapeAstra(outDir string) { log.Panic("Error loading .env file") } + log.Print("1") // Start chromedp chromedpCtx, cancel := utils.InitChromeDp() + log.Print("2") // Make output folder err := os.MkdirAll(outDir, 0777) if err != nil { panic(err) } + log.Print("3") days := "{" // String JSON for storing results by day firstLoop := true // To avoid adding a comma to the JSON on the first loop + log.Print("4") // Init http client tr := &http.Transport{ @@ -45,11 +49,13 @@ func ScrapeAstra(outDir string) { DisableCompression: true, } cli := &http.Client{Transport: tr} + log.Print("5") // Get cookies for auth astraHeaders := utils.RefreshAstraToken(chromedpCtx) time.Sleep(500 * time.Millisecond) cancel() // Don't need chromedp anymore + log.Print("6") // Starting date date := time.Now() diff --git a/utils/methods.go b/utils/methods.go index 2b54eaa..87ce9de 100644 --- a/utils/methods.go +++ b/utils/methods.go @@ -104,6 +104,7 @@ func RefreshToken(chromedpCtx context.Context) map[string][]string { // This function signs into Astra func RefreshAstraToken(chromedpCtx context.Context) map[string][]string { + log.Print("5.1") // Get username and password username, present := os.LookupEnv("LOGIN_ASTRA_USERNAME") if !present { @@ -113,6 +114,7 @@ func RefreshAstraToken(chromedpCtx context.Context) map[string][]string { if !present { log.Panic("LOGIN_ASTRA_PASSWORD is missing from .env!") } + log.Print("5.2") // Sign in VPrintf("Signing in...") @@ -132,6 +134,7 @@ func RefreshAstraToken(chromedpCtx context.Context) map[string][]string { if err != nil { panic(err) } + log.Print("5.3") // Save all cookies to string cookieStr := "" @@ -139,23 +142,33 @@ func RefreshAstraToken(chromedpCtx context.Context) map[string][]string { chromedp.WaitVisible(`body`, chromedp.ByQuery), chromedp.ActionFunc(func(ctx context.Context) error { cookies, err := network.GetCookies().Do(ctx) + if err != nil { + return err + } gotToken := false for _, cookie := range cookies { + log.Printf("5.3.1 %s=%s; ", cookie.Name, cookie.Value) cookieStr = fmt.Sprintf("%s%s=%s; ", cookieStr, cookie.Name, cookie.Value) if cookie.Name == "UTXDallas.ASPXFORMSAUTH" { - VPrintf("Got new token: PTGSESSID = %s", cookie.Value) + VPrintf("Got new token: UTXDallas.ASPXFORMSAUTH = %s", cookie.Value) gotToken = true } + log.Print("5.3.2") } + log.Print(gotToken) if !gotToken { return errors.New("failed to get a new token") } - return err + log.Print("5.3.3") + return nil }), + chromedp.WaitVisible(`body`, chromedp.ByQuery), ) + log.Print("5.3.4") if err != nil { panic(err) } + log.Print("5.4") // Return headers, copied from a request the actual site made return map[string][]string{