Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 112 additions & 0 deletions .github/workflows/daily-update.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
# This workflow scrapes, parses, and uploads data to MongoDB on a daily basis

name: Daily update

on:
workflow_dispatch:
schedule:
- cron: '10 1 * * *'
pull_request: # temporary until merged

jobs:

astra:
runs-on: ubuntu-latest
steps:

- name: Install Chromium
run: sudo apt-get install -y chromium-browser

- name: Checkout
uses: actions/checkout@v4

- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: '1.23'

- name: Fill env for dev
run: |
cp .env.template .env
sed -i 's#LOGIN_ASTRA_USERNAME=#LOGIN_ASTRA_USERNAME=${{ secrets.LOGIN_ASTRA_USERNAME }}#' .env
sed -i 's#LOGIN_ASTRA_PASSWORD=#LOGIN_ASTRA_PASSWORD=${{ secrets.LOGIN_ASTRA_PASSWORD }}#' .env
sed -i 's#MAZEVO_API_KEY=#MAZEVO_API_KEY=${{ secrets.MAZEVO_API_KEY }}#' .env
sed -i 's#MONGODB_URI=#MONGODB_URI=${{ secrets.MONGODB_URI_DEV }}#' .env

- name: Setup
run: make setup

- name: Check
run: make check

- name: Build
run: make build

- name: Scrape Astra
run: ./api-tools -scrape -astra -verbose

# - name: Parse Astra
# run: ./api-tools -parse -astra

# - name: Upload Astra
# run: ./api-tools -upload -events

- name: Fill env for prod
run: |
cp .env.template .env
sed -i 's#LOGIN_ASTRA_USERNAME=#LOGIN_ASTRA_USERNAME=${{ secrets.LOGIN_ASTRA_USERNAME }}#' .env
sed -i 's#LOGIN_ASTRA_PASSWORD=#LOGIN_ASTRA_PASSWORD=${{ secrets.LOGIN_ASTRA_PASSWORD }}#' .env
sed -i 's#MAZEVO_API_KEY=#MAZEVO_API_KEY=${{ secrets.MAZEVO_API_KEY }}#' .env
sed -i 's#MONGODB_URI=#MONGODB_URI=${{ secrets.MONGODB_URI_PROD }}#' .env

# - name: Upload Astra
# run: ./api-tools -upload -events

mazevo:
runs-on: ubuntu-latest
steps:

- name: Checkout
uses: actions/checkout@v4

- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: '1.23'

- name: Fill env for dev
run: |
cp .env.template .env
sed -i 's#LOGIN_ASTRA_USERNAME=#LOGIN_ASTRA_USERNAME=${{ secrets.LOGIN_ASTRA_USERNAME }}#' .env
sed -i 's#LOGIN_ASTRA_PASSWORD=#LOGIN_ASTRA_PASSWORD=${{ secrets.LOGIN_ASTRA_PASSWORD }}#' .env
sed -i 's#MAZEVO_API_KEY=#MAZEVO_API_KEY=${{ secrets.MAZEVO_API_KEY }}#' .env
sed -i 's#MONGODB_URI=#MONGODB_URI=${{ secrets.MONGODB_URI_DEV }}#' .env

- name: Setup
run: make setup

- name: Check
run: make check

- name: Build
run: make build

- name: Scrape Mazevo
run: ./api-tools -scrape -mazevo

# - name: Parse Mazevo
# run: ./api-tools -parse -mazevo

# - name: Upload Mazevo
# run: ./api-tools -upload -events

- name: Fill env for prod
run: |
cp .env.template .env
sed -i 's#LOGIN_ASTRA_USERNAME=#LOGIN_ASTRA_USERNAME=${{ secrets.LOGIN_ASTRA_USERNAME }}#' .env
sed -i 's#LOGIN_ASTRA_PASSWORD=#LOGIN_ASTRA_PASSWORD=${{ secrets.LOGIN_ASTRA_PASSWORD }}#' .env
sed -i 's#MAZEVO_API_KEY=#MAZEVO_API_KEY=${{ secrets.MAZEVO_API_KEY }}#' .env
sed -i 's#MONGODB_URI=#MONGODB_URI=${{ secrets.MONGODB_URI_PROD }}#' .env

# - name: Upload Mazevo
# run: ./api-tools -upload -events
4 changes: 2 additions & 2 deletions .github/workflows/go.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@ jobs:
steps:

- name: Checkout
uses: actions/checkout@v3
uses: actions/checkout@v4

- name: Set up Go
uses: actions/setup-go@v4
uses: actions/setup-go@v5
with:
go-version: '1.23'

Expand Down
6 changes: 6 additions & 0 deletions scrapers/astra.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,21 @@ func ScrapeAstra(outDir string) {
log.Panic("Error loading .env file")
}

log.Print("1")
// Start chromedp
chromedpCtx, cancel := utils.InitChromeDp()
log.Print("2")

// Make output folder
err := os.MkdirAll(outDir, 0777)
if err != nil {
panic(err)
}
log.Print("3")

days := "{" // String JSON for storing results by day
firstLoop := true // To avoid adding a comma to the JSON on the first loop
log.Print("4")

// Init http client
tr := &http.Transport{
Expand All @@ -45,11 +49,13 @@ func ScrapeAstra(outDir string) {
DisableCompression: true,
}
cli := &http.Client{Transport: tr}
log.Print("5")

// Get cookies for auth
astraHeaders := utils.RefreshAstraToken(chromedpCtx)
time.Sleep(500 * time.Millisecond)
cancel() // Don't need chromedp anymore
log.Print("6")

// Starting date
date := time.Now()
Expand Down
17 changes: 15 additions & 2 deletions utils/methods.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ func RefreshToken(chromedpCtx context.Context) map[string][]string {

// This function signs into Astra
func RefreshAstraToken(chromedpCtx context.Context) map[string][]string {
log.Print("5.1")
// Get username and password
username, present := os.LookupEnv("LOGIN_ASTRA_USERNAME")
if !present {
Expand All @@ -113,6 +114,7 @@ func RefreshAstraToken(chromedpCtx context.Context) map[string][]string {
if !present {
log.Panic("LOGIN_ASTRA_PASSWORD is missing from .env!")
}
log.Print("5.2")

// Sign in
VPrintf("Signing in...")
Expand All @@ -132,30 +134,41 @@ func RefreshAstraToken(chromedpCtx context.Context) map[string][]string {
if err != nil {
panic(err)
}
log.Print("5.3")

// Save all cookies to string
cookieStr := ""
_, err = chromedp.RunResponse(chromedpCtx,
chromedp.WaitVisible(`body`, chromedp.ByQuery),
chromedp.ActionFunc(func(ctx context.Context) error {
cookies, err := network.GetCookies().Do(ctx)
if err != nil {
return err
}
gotToken := false
for _, cookie := range cookies {
log.Printf("5.3.1 %s=%s; ", cookie.Name, cookie.Value)
cookieStr = fmt.Sprintf("%s%s=%s; ", cookieStr, cookie.Name, cookie.Value)
if cookie.Name == "UTXDallas.ASPXFORMSAUTH" {
VPrintf("Got new token: PTGSESSID = %s", cookie.Value)
VPrintf("Got new token: UTXDallas.ASPXFORMSAUTH = %s", cookie.Value)
gotToken = true
}
log.Print("5.3.2")
}
log.Print(gotToken)
if !gotToken {
return errors.New("failed to get a new token")
}
return err
log.Print("5.3.3")
return nil
}),
chromedp.WaitVisible(`body`, chromedp.ByQuery),
)
log.Print("5.3.4")
if err != nil {
panic(err)
}
log.Print("5.4")

// Return headers, copied from a request the actual site made
return map[string][]string{
Expand Down
Loading