From de8313186a726981b4720dafbd0cf073dc498fec Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Mon, 4 Aug 2025 02:05:35 +0200 Subject: [PATCH 1/3] all crawl reports --- includes/reports.js | 2810 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 2778 insertions(+), 32 deletions(-) diff --git a/includes/reports.js b/includes/reports.js index 5d01df6..655c14e 100644 --- a/includes/reports.js +++ b/includes/reports.js @@ -2,25 +2,220 @@ const { DataformTemplateBuilder } = require('./constants') const config = { _metrics: { - bytesTotal: { + bytesCss: { SQL: [ { type: 'histogram', query: DataformTemplateBuilder.create((ctx, params) => ` -WITH pages AS ( +SELECT + *, + SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf +FROM ( + SELECT + *, + volume / SUM(volume) OVER (PARTITION BY client) AS pdf + FROM ( + SELECT + date, + client, + COUNT(0) AS volume, + CAST(FLOOR(FLOAT64(summary.bytesCss) / 10240) * 10 AS INT64) AS bin + FROM ${ctx.ref('crawl', 'pages')} + WHERE + date = '${params.date}' AND + ${params.devRankFilter} AND + ${params.lens.sql} AND + is_root_page + GROUP BY + date, + bin, + client + ) +) +ORDER BY + bin, + client +`) + }, + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesCss), 1001)[OFFSET(101)] / 1024, 2) AS p10, + ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesCss), 1001)[OFFSET(251)] / 1024, 2) AS p25, + ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesCss), 1001)[OFFSET(501)] / 1024, 2) AS p50, + ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesCss), 1001)[OFFSET(751)] / 1024, 2) AS p75, + ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesCss), 1001)[OFFSET(901)] / 1024, 2) AS p90 +FROM ${ctx.ref('crawl', 'pages')} +WHERE + date = '${params.date}' AND + ${params.devRankFilter} AND + ${params.lens.sql} AND + is_root_page AND + FLOAT64(summary.bytesCss) > 0 +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client +`) + } + ] + }, + ttci: { + SQL: [ + { + type: 'histogram', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + *, + SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf +FROM ( + SELECT + *, + volume / SUM(volume) OVER (PARTITION BY client) AS pdf + FROM ( + SELECT + date, + client, + COUNT(0) AS volume, + CAST(FLOOR(CAST(IFNULL( + FLOAT64(lighthouse.audits.interactive.numericValue), + IFNULL( + FLOAT64(lighthouse.audits['consistently-interactive'].rawValue), + FLOAT64(lighthouse.audits.interactive.rawValue) + ) + ) AS FLOAT64) / 1000) AS INT64) AS bin + FROM ${ctx.ref('crawl', 'pages')} + WHERE + date = '${params.date}' AND + ${params.devRankFilter} AND + ${params.lens.sql} AND + is_root_page + GROUP BY + date, + bin, + client + HAVING + bin IS NOT NULL + ) +) +ORDER BY + bin, + client +`) + }, + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + ROUND(APPROX_QUANTILES(value, 1000)[OFFSET(100)], 2) AS p10, + ROUND(APPROX_QUANTILES(value, 1000)[OFFSET(250)], 2) AS p25, + ROUND(APPROX_QUANTILES(value, 1000)[OFFSET(500)], 2) AS p50, + ROUND(APPROX_QUANTILES(value, 1000)[OFFSET(750)], 2) AS p75, + ROUND(APPROX_QUANTILES(value, 1000)[OFFSET(900)], 2) AS p90 +FROM ( SELECT - date, client, - CAST(FLOOR(FLOAT64(summary.bytesTotal) / 1024 / 100) * 100 AS INT64) AS bin + date, + IFNULL( + FLOAT64(lighthouse.audits.interactive.numericValue), + IFNULL( + FLOAT64(lighthouse.audits.interactive.rawValue), + FLOAT64(lighthouse.audits['consistently-interactive'].rawValue) + ) + ) / 1000 AS value FROM ${ctx.ref('crawl', 'pages')} WHERE - date = '${params.date}' - ${params.devRankFilter} - ${params.lens.sql} - AND is_root_page - AND FLOAT64(summary.bytesTotal) > 0 + date = '${params.date}' AND + ${params.devRankFilter} AND + ${params.lens.sql} AND + is_root_page ) - +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client +`) + } + ] + }, + pctHttps: { + SQL: [ + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + ROUND(SUM(IF(STARTS_WITH(url, 'https'), 1, 0)) * 100 / COUNT(0), 2) AS percent +FROM ${ctx.ref('crawl', 'requests')} +INNER JOIN ${ctx.ref('crawl', 'pages')} +USING (date, client, is_root_page, rank, page) +WHERE + date = '${params.date}' AND + ${params.devRankFilter} AND + ${params.lens.sql} AND + is_root_page +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client +`) + } + ] + }, + storageEstimate: { + SQL: [ + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + SUM(IF(feat.id IS NOT NULL, 1, 0)) AS num_urls, + ROUND(SUM(IF(feat.id IS NOT NULL, 1, 0)) / COUNT(0) * 100, 5) AS percent +FROM ${ctx.ref('crawl', 'pages')} +LEFT OUTER JOIN UNNEST(features) AS feat +ON (feat.id = '1371' OR feat.feature = 'DurableStorageEstimate') +WHERE + date = '${params.date}' AND + ${params.devRankFilter} AND + ${params.lens.sql} AND + is_root_page +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client, + num_urls DESC +`) + } + ] + }, + bootupJs: { + SQL: [ + { + type: 'histogram', + query: DataformTemplateBuilder.create((ctx, params) => ` SELECT *, SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf @@ -30,14 +225,22 @@ FROM ( volume / SUM(volume) OVER (PARTITION BY client) AS pdf FROM ( SELECT - *, - COUNT(0) AS volume - FROM pages - WHERE bin IS NOT NULL - GROUP BY date, client, - bin + COUNT(0) AS volume, + FLOOR(FLOAT64(IFNULL(lighthouse.audits['bootup-time'].numericValue, lighthouse.audits['bootup-time'].rawValue)) / 100) / 10 AS bin + FROM ${ctx.ref('crawl', 'pages')} + WHERE + date = '${params.date}' AND + ${params.devRankFilter} AND + ${params.lens.sql} AND + is_root_page + GROUP BY + date, + bin, + client + HAVING + bin IS NOT NULL ) ) ORDER BY @@ -48,37 +251,2580 @@ ORDER BY { type: 'timeseries', query: DataformTemplateBuilder.create((ctx, params) => ` -WITH pages AS ( +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + ROUND(APPROX_QUANTILES(value, 1000)[OFFSET(100)], 2) AS p10, + ROUND(APPROX_QUANTILES(value, 1000)[OFFSET(250)], 2) AS p25, + ROUND(APPROX_QUANTILES(value, 1000)[OFFSET(500)], 2) AS p50, + ROUND(APPROX_QUANTILES(value, 1000)[OFFSET(750)], 2) AS p75, + ROUND(APPROX_QUANTILES(value, 1000)[OFFSET(900)], 2) AS p90 +FROM ( SELECT date, client, - FLOAT64(summary.bytesTotal) AS bytesTotal + IFNULL( + FLOAT64(lighthouse.audits['bootup-time'].numericValue), + FLOAT64(lighthouse.audits['bootup-time'].rawValue) + ) / 1000 AS value FROM ${ctx.ref('crawl', 'pages')} WHERE - date = '${params.date}' - ${params.devRankFilter} - ${params.lens.sql} - AND is_root_page - AND INT64(summary.bytesTotal) > 0 + date = '${params.date}' AND + ${params.devRankFilter} AND + ${params.lens.sql} AND + lighthouse IS NOT NULL AND + TO_JSON_STRING(lighthouse) != '{}' AND + is_root_page ) - +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client +`) + } + ] + }, + bytesFont: { + SQL: [ + { + type: 'histogram', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + *, + SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf +FROM ( + SELECT + *, + volume / SUM(volume) OVER (PARTITION BY client) AS pdf + FROM ( + SELECT + date, + client, + COUNT(0) AS volume, + CAST(FLOOR(FLOAT64(summary.bytesFont) / 10240) * 10 AS INT64) AS bin + FROM ${ctx.ref('crawl', 'pages')} + WHERE + date = '${params.date}' AND + ${params.devRankFilter} AND + ${params.lens.sql} AND + is_root_page + GROUP BY + date, + bin, + client + ) +) +ORDER BY + bin, + client +`) + }, + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - client, UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, - ROUND(APPROX_QUANTILES(bytesTotal, 1001)[OFFSET(101)] / 1024, 2) AS p10, - ROUND(APPROX_QUANTILES(bytesTotal, 1001)[OFFSET(251)] / 1024, 2) AS p25, - ROUND(APPROX_QUANTILES(bytesTotal, 1001)[OFFSET(501)] / 1024, 2) AS p50, - ROUND(APPROX_QUANTILES(bytesTotal, 1001)[OFFSET(751)] / 1024, 2) AS p75, - ROUND(APPROX_QUANTILES(bytesTotal, 1001)[OFFSET(901)] / 1024, 2) AS p90 -FROM pages + client, + ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesFont), 1001)[OFFSET(101)] / 1024, 2) AS p10, + ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesFont), 1001)[OFFSET(251)] / 1024, 2) AS p25, + ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesFont), 1001)[OFFSET(501)] / 1024, 2) AS p50, + ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesFont), 1001)[OFFSET(751)] / 1024, 2) AS p75, + ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesFont), 1001)[OFFSET(901)] / 1024, 2) AS p90 +FROM ${ctx.ref('crawl', 'pages')} +WHERE + date = '${params.date}' AND + ${params.devRankFilter} AND + ${params.lens.sql} AND + is_root_page AND + FLOAT64(summary.bytesFont) > 0 GROUP BY date, + timestamp, + client +ORDER BY + date DESC, + client +`) + } + ] + }, + bytesHtml: { + SQL: [ + { + type: 'histogram', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + *, + SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf +FROM ( + SELECT + *, + volume / SUM(volume) OVER (PARTITION BY client) AS pdf + FROM ( + SELECT + date, + client, + COUNT(0) AS volume, + CAST(FLOOR(FLOAT64(summary.bytesHtml) / 10240) * 10 AS INT64) AS bin + FROM ${ctx.ref('crawl', 'pages')} + WHERE + date = '${params.date}' AND + ${params.devRankFilter} AND + ${params.lens.sql} AND + is_root_page + GROUP BY + date, + bin, + client + ) +) +ORDER BY + bin, + client +`) + }, + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, - timestamp + ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesHtml), 1001)[OFFSET(101)] / 1024, 2) AS p10, + ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesHtml), 1001)[OFFSET(251)] / 1024, 2) AS p25, + ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesHtml), 1001)[OFFSET(501)] / 1024, 2) AS p50, + ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesHtml), 1001)[OFFSET(751)] / 1024, 2) AS p75, + ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesHtml), 1001)[OFFSET(901)] / 1024, 2) AS p90 +FROM ${ctx.ref('crawl', 'pages')} +WHERE + date = '${params.date}' AND + ${params.devRankFilter} AND + ${params.lens.sql} AND + is_root_page AND + FLOAT64(summary.bytesHtml) > 0 +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client +`) + } + ] + }, + bytesImg: { + SQL: [ + { + type: 'histogram', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + *, + SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf +FROM ( + SELECT + *, + volume / SUM(volume) OVER (PARTITION BY client) AS pdf + FROM ( + SELECT + date, + client, + COUNT(0) AS volume, + CAST(FLOOR(FLOAT64(summary.bytesImg) / 102400) * 100 AS INT64) AS bin + FROM ${ctx.ref('crawl', 'pages')} + WHERE + date = '${params.date}' AND + ${params.devRankFilter} AND + ${params.lens.sql} AND + is_root_page + GROUP BY + date, + bin, + client + ) +) ORDER BY + bin, + client +`) + }, + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesImg), 1001)[OFFSET(101)] / 1024, 2) AS p10, + ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesImg), 1001)[OFFSET(251)] / 1024, 2) AS p25, + ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesImg), 1001)[OFFSET(501)] / 1024, 2) AS p50, + ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesImg), 1001)[OFFSET(751)] / 1024, 2) AS p75, + ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesImg), 1001)[OFFSET(901)] / 1024, 2) AS p90 +FROM ${ctx.ref('crawl', 'pages')} +WHERE + date = '${params.date}' AND + ${params.devRankFilter} AND + ${params.lens.sql} AND + is_root_page AND + FLOAT64(summary.bytesImg) > 0 +GROUP BY date, + timestamp, + client +ORDER BY + date DESC, client +`) + } + ] + }, + bytesJs: { + SQL: [ + { + type: 'histogram', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + *, + SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf +FROM ( + SELECT + *, + volume / SUM(volume) OVER (PARTITION BY client) AS pdf + FROM ( + SELECT + date, + client, + COUNT(0) AS volume, + CAST(FLOOR(FLOAT64(summary.bytesJS) / 10240) * 10 AS INT64) AS bin + FROM ${ctx.ref('crawl', 'pages')} + WHERE + date = '${params.date}' AND + ${params.devRankFilter} AND + ${params.lens.sql} AND + is_root_page + GROUP BY + date, + bin, + client + ) +) +ORDER BY + bin, + client +`) + }, + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesJS), 1001)[OFFSET(101)] / 1024, 2) AS p10, + ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesJS), 1001)[OFFSET(251)] / 1024, 2) AS p25, + ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesJS), 1001)[OFFSET(501)] / 1024, 2) AS p50, + ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesJS), 1001)[OFFSET(751)] / 1024, 2) AS p75, + ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesJS), 1001)[OFFSET(901)] / 1024, 2) AS p90 +FROM ${ctx.ref('crawl', 'pages')} +WHERE + date = '${params.date}' AND + ${params.devRankFilter} AND + ${params.lens.sql} AND + is_root_page AND + FLOAT64(summary.bytesJS) > 0 +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client +`) + } + ] + }, + bytesOther: { + SQL: [ + { + type: 'histogram', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + *, + SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf +FROM ( + SELECT + *, + volume / SUM(volume) OVER (PARTITION BY client) AS pdf + FROM ( + SELECT + date, + client, + COUNT(0) AS volume, + CAST(FLOOR(FLOAT64(summary.bytesOther) / 10240) * 10 AS INT64) AS bin + FROM ${ctx.ref('crawl', 'pages')} + WHERE + date = '${params.date}' AND + ${params.devRankFilter} AND + ${params.lens.sql} AND + is_root_page + GROUP BY + date, + bin, + client + ) +) +ORDER BY + bin, + client +`) + }, + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesOther), 1001)[OFFSET(101)] / 1024, 2) AS p10, + ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesOther), 1001)[OFFSET(251)] / 1024, 2) AS p25, + ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesOther), 1001)[OFFSET(501)] / 1024, 2) AS p50, + ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesOther), 1001)[OFFSET(751)] / 1024, 2) AS p75, + ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesOther), 1001)[OFFSET(901)] / 1024, 2) AS p90 +FROM ${ctx.ref('crawl', 'pages')} +WHERE + date = '${params.date}' AND + ${params.devRankFilter} AND + ${params.lens.sql} AND + is_root_page AND + FLOAT64(summary.bytesOther) > 0 +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client +`) + } + ] + }, + bytesTotal: { + SQL: [ + { + type: 'histogram', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + *, + SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf +FROM ( + SELECT + *, + volume / SUM(volume) OVER (PARTITION BY client) AS pdf + FROM ( + SELECT + date, + client, + COUNT(0) AS volume, + CAST(FLOOR(FLOAT64(summary.bytesTotal) / 1024 / 100) * 100 AS INT64) AS bin + FROM ${ctx.ref('crawl', 'pages')} + WHERE + date = '${params.date}' AND + ${params.devRankFilter} AND + ${params.lens.sql} AND + is_root_page + GROUP BY + date, + bin, + client + ) +) +ORDER BY + bin, + client +`) + }, + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + ROUND(APPROX_QUANTILES(bytesTotal, 1001)[OFFSET(101)] / 1024, 2) AS p10, + ROUND(APPROX_QUANTILES(bytesTotal, 1001)[OFFSET(251)] / 1024, 2) AS p25, + ROUND(APPROX_QUANTILES(bytesTotal, 1001)[OFFSET(501)] / 1024, 2) AS p50, + ROUND(APPROX_QUANTILES(bytesTotal, 1001)[OFFSET(751)] / 1024, 2) AS p75, + ROUND(APPROX_QUANTILES(bytesTotal, 1001)[OFFSET(901)] / 1024, 2) AS p90 +FROM ${ctx.ref('crawl', 'pages')} +WHERE + date = '${params.date}' AND + ${params.devRankFilter} AND + ${params.lens.sql} AND + is_root_page AND + INT64(summary.bytesTotal) > 0 +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client +`) + } + ] + }, + bytesVideo: { + SQL: [ + { + type: 'histogram', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + *, + SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf +FROM ( + SELECT + *, + volume / SUM(volume) OVER (PARTITION BY client) AS pdf + FROM ( + SELECT + date, + client, + COUNT(0) AS volume, + CAST(FLOOR(FLOAT64(summary.bytesVideo) / 10240) * 10 AS INT64) AS bin + FROM ${ctx.ref('crawl', 'pages')} + WHERE + date = '${params.date}' AND + ${params.devRankFilter} AND + ${params.lens.sql} AND + is_root_page + GROUP BY + date, + bin, + client + ) +) +ORDER BY + bin, + client +`) + }, + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesVideo), 1001)[OFFSET(101)] / 1024, 2) AS p10, + ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesVideo), 1001)[OFFSET(251)] / 1024, 2) AS p25, + ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesVideo), 1001)[OFFSET(501)] / 1024, 2) AS p50, + ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesVideo), 1001)[OFFSET(751)] / 1024, 2) AS p75, + ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesVideo), 1001)[OFFSET(901)] / 1024, 2) AS p90 +FROM ${ctx.ref('crawl', 'pages')} +WHERE + date = '${params.date}' AND + ${params.devRankFilter} AND + ${params.lens.sql} AND + is_root_page AND + FLOAT64(summary.bytesVideo) > 0 +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client +`) + } + ] + }, + compileJs: { + SQL: [ + { + type: 'histogram', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + *, + SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf +FROM ( + SELECT + *, + volume / SUM(volume) OVER (PARTITION BY client) AS pdf + FROM ( + SELECT + date, + client, + COUNT(0) AS volume, + INT64(payload['_cpu.v8.compile']) AS bin + FROM ${ctx.ref('crawl', 'pages')} + WHERE + date = '${params.date}' AND + ${params.devRankFilter} AND + ${params.lens.sql} AND + is_root_page + GROUP BY + date, + bin, + client + HAVING + bin >= 0 + ) +) +ORDER BY + bin, + client +`) + }, + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + ROUND(APPROX_QUANTILES(value, 1000)[OFFSET(100)], 2) AS p10, + ROUND(APPROX_QUANTILES(value, 1000)[OFFSET(250)], 2) AS p25, + ROUND(APPROX_QUANTILES(value, 1000)[OFFSET(500)], 2) AS p50, + ROUND(APPROX_QUANTILES(value, 1000)[OFFSET(750)], 2) AS p75, + ROUND(APPROX_QUANTILES(value, 1000)[OFFSET(900)], 2) AS p90 +FROM ( + SELECT + date, + client, + INT64(payload['_cpu.v8.compile']) AS value + FROM ${ctx.ref('crawl', 'pages')} + WHERE + date = '${params.date}' AND + ${params.devRankFilter} AND + ${params.lens.sql} AND + is_root_page AND + INT64(payload['_cpu.v8.compile']) IS NOT NULL AND + INT64(payload['_cpu.v8.compile']) >= 0 +) +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client +`) + } + ] + }, + dcl: { + SQL: [ + { + type: 'histogram', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + *, + SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf +FROM ( + SELECT + *, + volume / SUM(volume) OVER (PARTITION BY client) AS pdf + FROM ( + SELECT + date, + client, + COUNT(0) AS volume, + FLOOR(FLOAT64(summary.onContentLoaded) / 1000) AS bin + FROM ${ctx.ref('crawl', 'pages')} + WHERE + date = '${params.date}' AND + ${params.devRankFilter} AND + ${params.lens.sql} AND + is_root_page AND + FLOAT64(summary.onContentLoaded) > 0 + GROUP BY + date, + bin, + client + ) +) +ORDER BY + bin, + client +`) + }, + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + ROUND(APPROX_QUANTILES(FLOAT64(summary.onContentLoaded), 1001)[OFFSET(101)], 2) AS p10, + ROUND(APPROX_QUANTILES(FLOAT64(summary.onContentLoaded), 1001)[OFFSET(251)], 2) AS p25, + ROUND(APPROX_QUANTILES(FLOAT64(summary.onContentLoaded), 1001)[OFFSET(501)], 2) AS p50, + ROUND(APPROX_QUANTILES(FLOAT64(summary.onContentLoaded), 1001)[OFFSET(751)], 2) AS p75, + ROUND(APPROX_QUANTILES(FLOAT64(summary.onContentLoaded), 1001)[OFFSET(901)], 2) AS p90 +FROM ${ctx.ref('crawl', 'pages')} +WHERE + date = '${params.date}' AND + ${params.devRankFilter} AND + ${params.lens.sql} AND + is_root_page AND + FLOAT64(summary.onContentLoaded) > 0 +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client +`) + } + ] + }, + evalJs: { + SQL: [ + { + type: 'histogram', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + *, + SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf +FROM ( + SELECT + *, + volume / SUM(volume) OVER (PARTITION BY client) AS pdf + FROM ( + SELECT + date, + client, + COUNT(0) AS volume, + CAST(FLOAT64(r.payload['_cpu.EvaluateScript']) / 20 AS INT64) * 20 AS bin + FROM ${ctx.ref('crawl', 'requests')} r + INNER JOIN ${ctx.ref('crawl', 'pages')} + USING (date, client, is_root_page, rank, page) + WHERE + date = '${params.date}' AND + ${params.devRankFilter} AND + ${params.lens.sql} AND + is_root_page + GROUP BY + date, + bin, + client + HAVING + bin >= 0 + ) +) +ORDER BY + bin, + client +`) + } + ] + }, + fcp: { + SQL: [ + { + type: 'histogram', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + *, + SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf +FROM ( + SELECT + *, + volume / SUM(volume) OVER (PARTITION BY client) AS pdf + FROM ( + SELECT + date, + client, + COUNT(0) AS volume, + CAST(FLOOR(FLOAT64(payload['_chromeUserTiming.firstContentfulPaint']) / 1000) AS INT64) AS bin + FROM ${ctx.ref('crawl', 'pages')} + WHERE + date = '${params.date}' AND + ${params.devRankFilter} AND + ${params.lens.sql} AND + is_root_page + GROUP BY + date, + bin, + client + HAVING + bin >= 0 + ) +) +ORDER BY + bin, + client +`) + }, + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + ROUND(APPROX_QUANTILES(FLOAT64(payload['_chromeUserTiming.firstContentfulPaint']), 1001)[OFFSET(101)] / 1024, 2) AS p10, + ROUND(APPROX_QUANTILES(FLOAT64(payload['_chromeUserTiming.firstContentfulPaint']), 1001)[OFFSET(251)] / 1024, 2) AS p25, + ROUND(APPROX_QUANTILES(FLOAT64(payload['_chromeUserTiming.firstContentfulPaint']), 1001)[OFFSET(501)] / 1024, 2) AS p50, + ROUND(APPROX_QUANTILES(FLOAT64(payload['_chromeUserTiming.firstContentfulPaint']), 1001)[OFFSET(751)] / 1024, 2) AS p75, + ROUND(APPROX_QUANTILES(FLOAT64(payload['_chromeUserTiming.firstContentfulPaint']), 1001)[OFFSET(901)] / 1024, 2) AS p90 +FROM ${ctx.ref('crawl', 'pages')} +WHERE + date = '${params.date}' AND + ${params.devRankFilter} AND + ${params.lens.sql} AND + is_root_page +GROUP BY + date, + timestamp, + client +HAVING + p50 IS NOT NULL +ORDER BY + date DESC, + client +`) + } + ] + }, + gzipSavings: { + SQL: [ + { + type: 'histogram', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + *, + SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf +FROM ( + SELECT + *, + volume / SUM(volume) OVER (PARTITION BY client) AS pdf + FROM ( + SELECT + date, + client, + COUNT(0) AS volume, + CAST(FLOOR(FLOAT64(payload._gzip_savings) / (1024 * 2)) * 2 AS INT64) AS bin + FROM ${ctx.ref('crawl', 'pages')} + WHERE + date = '${params.date}' AND + ${params.devRankFilter} AND + ${params.lens.sql} AND + is_root_page + GROUP BY + date, + bin, + client + HAVING + bin IS NOT NULL + ) +) +ORDER BY + bin, + client +`) + }, + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + ROUND(APPROX_QUANTILES(FLOAT64(payload._gzip_savings), 1001)[OFFSET(101)] / 1024, 2) AS p10, + ROUND(APPROX_QUANTILES(FLOAT64(payload._gzip_savings), 1001)[OFFSET(251)] / 1024, 2) AS p25, + ROUND(APPROX_QUANTILES(FLOAT64(payload._gzip_savings), 1001)[OFFSET(501)] / 1024, 2) AS p50, + ROUND(APPROX_QUANTILES(FLOAT64(payload._gzip_savings), 1001)[OFFSET(751)] / 1024, 2) AS p75, + ROUND(APPROX_QUANTILES(FLOAT64(payload._gzip_savings), 1001)[OFFSET(901)] / 1024, 2) AS p90 +FROM ${ctx.ref('crawl', 'pages')} +WHERE + date = '${params.date}' AND + ${params.devRankFilter} AND + ${params.lens.sql} AND + is_root_page +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client +`) + } + ] + }, + ol: { + SQL: [ + { + type: 'histogram', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + *, + SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf +FROM ( + SELECT + *, + volume / SUM(volume) OVER (PARTITION BY client) AS pdf + FROM ( + SELECT + date, + client, + COUNT(0) AS volume, + FLOOR(FLOAT64(summary.onLoad) / 1000) AS bin + FROM ${ctx.ref('crawl', 'pages')} + WHERE + date = '${params.date}' AND + ${params.devRankFilter} AND + ${params.lens.sql} AND + is_root_page AND + FLOAT64(summary.onLoad) > 0 + GROUP BY + date, + bin, + client + ) +) +ORDER BY + bin, + client +`) + }, + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + ROUND(APPROX_QUANTILES(FLOAT64(summary.onLoad), 1001)[OFFSET(101)] / 1000, 2) AS p10, + ROUND(APPROX_QUANTILES(FLOAT64(summary.onLoad), 1001)[OFFSET(251)] / 1000, 2) AS p25, + ROUND(APPROX_QUANTILES(FLOAT64(summary.onLoad), 1001)[OFFSET(501)] / 1000, 2) AS p50, + ROUND(APPROX_QUANTILES(FLOAT64(summary.onLoad), 1001)[OFFSET(751)] / 1000, 2) AS p75, + ROUND(APPROX_QUANTILES(FLOAT64(summary.onLoad), 1001)[OFFSET(901)] / 1000, 2) AS p90 +FROM ${ctx.ref('crawl', 'pages')} +WHERE + date = '${params.date}' AND + ${params.devRankFilter} AND + ${params.lens.sql} AND + is_root_page AND + FLOAT64(summary.onLoad) > 0 +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client +`) + } + ] + }, + reqCss: { + SQL: [ + { + type: 'histogram', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + *, + SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf +FROM ( + SELECT + *, + volume / SUM(volume) OVER (PARTITION BY client) AS pdf + FROM ( + SELECT + client, + COUNT(0) AS volume, + FLOAT64(summary.reqCss) AS bin + FROM ${ctx.ref('crawl', 'pages')} + WHERE + date = '${params.date}' AND + ${params.devRankFilter} + ${params.lens.sql} + is_root_page + GROUP BY + bin, + client + ) +) +ORDER BY + bin, + client +`) + }, + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + ROUND(APPROX_QUANTILES(FLOAT64(summary.reqCss), 1001)[OFFSET(101)], 2) AS p10, + ROUND(APPROX_QUANTILES(FLOAT64(summary.reqCss), 1001)[OFFSET(251)], 2) AS p25, + ROUND(APPROX_QUANTILES(FLOAT64(summary.reqCss), 1001)[OFFSET(501)], 2) AS p50, + ROUND(APPROX_QUANTILES(FLOAT64(summary.reqCss), 1001)[OFFSET(751)], 2) AS p75, + ROUND(APPROX_QUANTILES(FLOAT64(summary.reqCss), 1001)[OFFSET(901)], 2) AS p90 +FROM ${ctx.ref('crawl', 'pages')} +WHERE + date = '${params.date}' AND + ${params.devRankFilter} AND + ${params.lens.sql} AND + is_root_page AND + FLOAT64(summary.reqCss) > 0 +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client +`) + } + ] + }, + reqFont: { + SQL: [ + { + type: 'histogram', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + *, + SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf +FROM ( + SELECT + *, + volume / SUM(volume) OVER (PARTITION BY client) AS pdf + FROM ( + SELECT + client, + COUNT(0) AS volume, + FLOAT64(summary.reqFont) AS bin + FROM ${ctx.ref('crawl', 'pages')} + WHERE + date = '${params.date}' AND + ${params.devRankFilter} + ${params.lens.sql} + is_root_page + GROUP BY + bin, + client + ) +) +ORDER BY + bin, + client +`) + }, + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + ROUND(APPROX_QUANTILES(FLOAT64(summary.reqFont), 1001)[OFFSET(101)], 2) AS p10, + ROUND(APPROX_QUANTILES(FLOAT64(summary.reqFont), 1001)[OFFSET(251)], 2) AS p25, + ROUND(APPROX_QUANTILES(FLOAT64(summary.reqFont), 1001)[OFFSET(501)], 2) AS p50, + ROUND(APPROX_QUANTILES(FLOAT64(summary.reqFont), 1001)[OFFSET(751)], 2) AS p75, + ROUND(APPROX_QUANTILES(FLOAT64(summary.reqFont), 1001)[OFFSET(901)], 2) AS p90 +FROM ${ctx.ref('crawl', 'pages')} +WHERE + date = '${params.date}' AND + ${params.devRankFilter} AND + ${params.lens.sql} AND + is_root_page AND + FLOAT64(summary.reqFont) > 0 +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client +`) + } + ] + }, + reqHtml: { + SQL: [ + { + type: 'histogram', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + *, + SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf +FROM ( + SELECT + *, + volume / SUM(volume) OVER (PARTITION BY client) AS pdf + FROM ( + SELECT + client, + COUNT(0) AS volume, + FLOAT64(summary.reqHtml) AS bin + FROM ${ctx.ref('crawl', 'pages')} + WHERE + date = '${params.date}' AND + ${params.devRankFilter} + ${params.lens.sql} + is_root_page + GROUP BY + bin, + client + ) +) +ORDER BY + bin, + client +`) + }, + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + ROUND(APPROX_QUANTILES(FLOAT64(summary.reqHtml), 1001)[OFFSET(101)], 2) AS p10, + ROUND(APPROX_QUANTILES(FLOAT64(summary.reqHtml), 1001)[OFFSET(251)], 2) AS p25, + ROUND(APPROX_QUANTILES(FLOAT64(summary.reqHtml), 1001)[OFFSET(501)], 2) AS p50, + ROUND(APPROX_QUANTILES(FLOAT64(summary.reqHtml), 1001)[OFFSET(751)], 2) AS p75, + ROUND(APPROX_QUANTILES(FLOAT64(summary.reqHtml), 1001)[OFFSET(901)], 2) AS p90 +FROM ${ctx.ref('crawl', 'pages')} +WHERE + date = '${params.date}' AND + ${params.devRankFilter} AND + ${params.lens.sql} AND + is_root_page AND + FLOAT64(summary.reqHtml) > 0 +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client +`) + } + ] + }, + reqImg: { + SQL: [ + { + type: 'histogram', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + *, + SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf +FROM ( + SELECT + *, + volume / SUM(volume) OVER (PARTITION BY client) AS pdf + FROM ( + SELECT + client, + COUNT(0) AS volume, + FLOAT64(summary.reqImg) AS bin + FROM ${ctx.ref('crawl', 'pages')} + WHERE + date = '${params.date}' AND + ${params.devRankFilter} + ${params.lens.sql} + is_root_page + GROUP BY + bin, + client + ) +) +ORDER BY + bin, + client +`) + }, + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + ROUND(APPROX_QUANTILES(FLOAT64(summary.reqImg), 1001)[OFFSET(101)], 2) AS p10, + ROUND(APPROX_QUANTILES(FLOAT64(summary.reqImg), 1001)[OFFSET(251)], 2) AS p25, + ROUND(APPROX_QUANTILES(FLOAT64(summary.reqImg), 1001)[OFFSET(501)], 2) AS p50, + ROUND(APPROX_QUANTILES(FLOAT64(summary.reqImg), 1001)[OFFSET(751)], 2) AS p75, + ROUND(APPROX_QUANTILES(FLOAT64(summary.reqImg), 1001)[OFFSET(901)], 2) AS p90 +FROM ${ctx.ref('crawl', 'pages')} +WHERE + date = '${params.date}' AND + ${params.devRankFilter} AND + ${params.lens.sql} AND + is_root_page AND + FLOAT64(summary.reqImg) > 0 +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client +`) + } + ] + }, + reqJs: { + SQL: [ + { + type: 'histogram', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + *, + SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf +FROM ( + SELECT + *, + volume / SUM(volume) OVER (PARTITION BY client) AS pdf + FROM ( + SELECT + client, + COUNT(0) AS volume, + FLOAT64(summary.reqJS) AS bin + FROM ${ctx.ref('crawl', 'pages')} + WHERE + date = '${params.date}' AND + ${params.devRankFilter} + ${params.lens.sql} + is_root_page + GROUP BY + bin, + client + ) +) +ORDER BY + bin, + client +`) + }, + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + ROUND(APPROX_QUANTILES(FLOAT64(summary.reqJS), 1001)[OFFSET(101)], 2) AS p10, + ROUND(APPROX_QUANTILES(FLOAT64(summary.reqJS), 1001)[OFFSET(251)], 2) AS p25, + ROUND(APPROX_QUANTILES(FLOAT64(summary.reqJS), 1001)[OFFSET(501)], 2) AS p50, + ROUND(APPROX_QUANTILES(FLOAT64(summary.reqJS), 1001)[OFFSET(751)], 2) AS p75, + ROUND(APPROX_QUANTILES(FLOAT64(summary.reqJS), 1001)[OFFSET(901)], 2) AS p90 +FROM ${ctx.ref('crawl', 'pages')} +WHERE + date = '${params.date}' AND + ${params.devRankFilter} AND + ${params.lens.sql} AND + is_root_page AND + FLOAT64(summary.reqJS) > 0 +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client +`) + } + ] + }, + reqOther: { + SQL: [ + { + type: 'histogram', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + *, + SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf +FROM ( + SELECT + *, + volume / SUM(volume) OVER (PARTITION BY client) AS pdf + FROM ( + SELECT + client, + COUNT(0) AS volume, + FLOAT64(summary.reqOther) AS bin + FROM ${ctx.ref('crawl', 'pages')} + WHERE + date = '${params.date}' AND + ${params.devRankFilter} + ${params.lens.sql} + is_root_page + GROUP BY + bin, + client + ) +) +ORDER BY + bin, + client +`) + }, + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + ROUND(APPROX_QUANTILES(FLOAT64(summary.reqOther), 1001)[OFFSET(101)], 2) AS p10, + ROUND(APPROX_QUANTILES(FLOAT64(summary.reqOther), 1001)[OFFSET(251)], 2) AS p25, + ROUND(APPROX_QUANTILES(FLOAT64(summary.reqOther), 1001)[OFFSET(501)], 2) AS p50, + ROUND(APPROX_QUANTILES(FLOAT64(summary.reqOther), 1001)[OFFSET(751)], 2) AS p75, + ROUND(APPROX_QUANTILES(FLOAT64(summary.reqOther), 1001)[OFFSET(901)], 2) AS p90 +FROM ${ctx.ref('crawl', 'pages')} +WHERE + date = '${params.date}' AND + ${params.devRankFilter} AND + ${params.lens.sql} AND + is_root_page AND + FLOAT64(summary.reqOther) > 0 +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client +`) + } + ] + }, + reqTotal: { + SQL: [ + { + type: 'histogram', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + *, + SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf +FROM ( + SELECT + *, + volume / SUM(volume) OVER (PARTITION BY client) AS pdf + FROM ( + SELECT + client, + COUNT(0) AS volume, + FLOOR(FLOAT64(summary.reqTotal) / 10) * 10 AS bin + FROM ${ctx.ref('crawl', 'pages')} + WHERE + date = '${params.date}' AND + ${params.devRankFilter} + ${params.lens.sql} + is_root_page + GROUP BY + bin, + client + ) +) +ORDER BY + bin, + client +`) + }, + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + ROUND(APPROX_QUANTILES(FLOAT64(summary.reqTotal), 1001)[OFFSET(101)], 2) AS p10, + ROUND(APPROX_QUANTILES(FLOAT64(summary.reqTotal), 1001)[OFFSET(251)], 2) AS p25, + ROUND(APPROX_QUANTILES(FLOAT64(summary.reqTotal), 1001)[OFFSET(501)], 2) AS p50, + ROUND(APPROX_QUANTILES(FLOAT64(summary.reqTotal), 1001)[OFFSET(751)], 2) AS p75, + ROUND(APPROX_QUANTILES(FLOAT64(summary.reqTotal), 1001)[OFFSET(901)], 2) AS p90 +FROM ${ctx.ref('crawl', 'pages')} +WHERE + date = '${params.date}' AND + ${params.devRankFilter} AND + ${params.lens.sql} AND + is_root_page AND + FLOAT64(summary.reqTotal) > 0 +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client +`) + } + ] + }, + reqVideo: { + SQL: [ + { + type: 'histogram', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + *, + SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf +FROM ( + SELECT + *, + volume / SUM(volume) OVER (PARTITION BY client) AS pdf + FROM ( + SELECT + client, + COUNT(0) AS volume, + FLOAT64(summary.reqVideo) AS bin + FROM ${ctx.ref('crawl', 'pages')} + WHERE + date = '${params.date}' AND + ${params.devRankFilter} + ${params.lens.sql} + is_root_page + GROUP BY + bin, + client + ) +) +ORDER BY + bin, + client +`) + }, + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + ROUND(APPROX_QUANTILES(FLOAT64(summary.reqVideo), 1001)[OFFSET(101)], 2) AS p10, + ROUND(APPROX_QUANTILES(FLOAT64(summary.reqVideo), 1001)[OFFSET(251)], 2) AS p25, + ROUND(APPROX_QUANTILES(FLOAT64(summary.reqVideo), 1001)[OFFSET(501)], 2) AS p50, + ROUND(APPROX_QUANTILES(FLOAT64(summary.reqVideo), 1001)[OFFSET(751)], 2) AS p75, + ROUND(APPROX_QUANTILES(FLOAT64(summary.reqVideo), 1001)[OFFSET(901)], 2) AS p90 +FROM ${ctx.ref('crawl', 'pages')} +WHERE + date = '${params.date}' AND + ${params.devRankFilter} AND + ${params.lens.sql} AND + is_root_page AND + FLOAT64(summary.reqVideo) > 0 +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client +`) + } + ] + }, + imgSavings: { + SQL: [ + { + type: 'histogram', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + *, + SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf +FROM ( + SELECT + *, + volume / SUM(volume) OVER (PARTITION BY client) AS pdf + FROM ( + SELECT + client, + COUNT(0) AS volume, + CAST(FLOOR(FLOAT64(payload._image_savings) / (1024 * 10)) * 10 AS INT64) AS bin + FROM ${ctx.ref('crawl', 'pages')} + WHERE + date = '${params.date}' AND + ${params.devRankFilter} + ${params.lens.sql} + is_root_page + GROUP BY + bin, + client + HAVING + bin IS NOT NULL + ) +) +ORDER BY + bin, + client +`) + }, + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + ROUND(APPROX_QUANTILES(FLOAT64(payload._image_savings), 1001)[OFFSET(101)] / 1024, 2) AS p10, + ROUND(APPROX_QUANTILES(FLOAT64(payload._image_savings), 1001)[OFFSET(251)] / 1024, 2) AS p25, + ROUND(APPROX_QUANTILES(FLOAT64(payload._image_savings), 1001)[OFFSET(501)] / 1024, 2) AS p50, + ROUND(APPROX_QUANTILES(FLOAT64(payload._image_savings), 1001)[OFFSET(751)] / 1024, 2) AS p75, + ROUND(APPROX_QUANTILES(FLOAT64(payload._image_savings), 1001)[OFFSET(901)] / 1024, 2) AS p90 +FROM ${ctx.ref('crawl', 'pages')} +WHERE + is_root_page AND + ${params.devRankFilter} + ${params.lens.sql} + date = '${params.date}' +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client +`) + } + ] + }, + offscreenImages: { + SQL: [ + { + type: 'histogram', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + *, + SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf +FROM ( + SELECT + *, + volume / SUM(volume) OVER (PARTITION BY client) AS pdf + FROM ( + SELECT + client, + COUNT(0) AS volume, + CAST(FLOOR(IFNULL( + INT64(lighthouse.audits['offscreen-images'].details.overallSavingsBytes), + INT64(lighthouse.audits['offscreen-images'].extendedInfo.value.wastedKb) * 1024 + ) / 10240) * 10 AS INT64) AS bin + FROM ${ctx.ref('crawl', 'pages')} + WHERE + date = '${params.date}' + date = '${params.date}' AND + ${params.devRankFilter} + ${params.lens.sql} + is_root_page + GROUP BY + bin, + client + HAVING + bin IS NOT NULL + ) +) +ORDER BY + bin, + client +`) + }, + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + ROUND(APPROX_QUANTILES(IFNULL(INT64(lighthouse.audits['offscreen-images'].details.overallSavingsBytes), INT64(lighthouse.audits['offscreen-images'].extendedInfo.value.wastedKb) * 1024), 1001)[OFFSET(101)] / 1024, 2) AS p10, + ROUND(APPROX_QUANTILES(IFNULL(INT64(lighthouse.audits['offscreen-images'].details.overallSavingsBytes), INT64(lighthouse.audits['offscreen-images'].extendedInfo.value.wastedKb) * 1024), 1001)[OFFSET(251)] / 1024, 2) AS p25, + ROUND(APPROX_QUANTILES(IFNULL(INT64(lighthouse.audits['offscreen-images'].details.overallSavingsBytes), INT64(lighthouse.audits['offscreen-images'].extendedInfo.value.wastedKb) * 1024), 1001)[OFFSET(501)] / 1024, 2) AS p50, + ROUND(APPROX_QUANTILES(IFNULL(INT64(lighthouse.audits['offscreen-images'].details.overallSavingsBytes), INT64(lighthouse.audits['offscreen-images'].extendedInfo.value.wastedKb) * 1024), 1001)[OFFSET(751)] / 1024, 2) AS p75, + ROUND(APPROX_QUANTILES(IFNULL(INT64(lighthouse.audits['offscreen-images'].details.overallSavingsBytes), INT64(lighthouse.audits['offscreen-images'].extendedInfo.value.wastedKb) * 1024), 1001)[OFFSET(901)] / 1024, 2) AS p90 +FROM ${ctx.ref('crawl', 'pages')} +WHERE + is_root_page AND + ${params.devRankFilter} + ${params.lens.sql} + date = '${params.date}' +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client +`) + } + ] + }, + optimizedImages: { + SQL: [ + { + type: 'histogram', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + *, + SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf +FROM ( + SELECT + *, + volume / SUM(volume) OVER (PARTITION BY client) AS pdf + FROM ( + SELECT + client, + COUNT(0) AS volume, + CAST(FLOOR(IFNULL( + INT64(lighthouse.audits['uses-optimized-images'].details.overallSavingsBytes), + INT64(lighthouse.audits['uses-optimized-images'].extendedInfo.value.wastedKb) * 1024 + ) / 10240) * 10 AS INT64) AS bin + FROM ${ctx.ref('crawl', 'pages')} + WHERE + date = '${params.date}' AND + ${params.devRankFilter} + ${params.lens.sql} + is_root_page + GROUP BY + bin, + client + HAVING + bin IS NOT NULL + ) +) +ORDER BY + bin, + client +`) + }, + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + ROUND(APPROX_QUANTILES(IFNULL(INT64(lighthouse.audits['uses-optimized-images'].details.overallSavingsBytes), INT64(lighthouse.audits['uses-optimized-images'].extendedInfo.value.wastedKb) * 1024), 1001)[OFFSET(101)] / 1024, 2) AS p10, + ROUND(APPROX_QUANTILES(IFNULL(INT64(lighthouse.audits['uses-optimized-images'].details.overallSavingsBytes), INT64(lighthouse.audits['uses-optimized-images'].extendedInfo.value.wastedKb) * 1024), 1001)[OFFSET(251)] / 1024, 2) AS p25, + ROUND(APPROX_QUANTILES(IFNULL(INT64(lighthouse.audits['uses-optimized-images'].details.overallSavingsBytes), INT64(lighthouse.audits['uses-optimized-images'].extendedInfo.value.wastedKb) * 1024), 1001)[OFFSET(501)] / 1024, 2) AS p50, + ROUND(APPROX_QUANTILES(IFNULL(INT64(lighthouse.audits['uses-optimized-images'].details.overallSavingsBytes), INT64(lighthouse.audits['uses-optimized-images'].extendedInfo.value.wastedKb) * 1024), 1001)[OFFSET(751)] / 1024, 2) AS p75, + ROUND(APPROX_QUANTILES(IFNULL(INT64(lighthouse.audits['uses-optimized-images'].details.overallSavingsBytes), INT64(lighthouse.audits['uses-optimized-images'].extendedInfo.value.wastedKb) * 1024), 1001)[OFFSET(901)] / 1024, 2) AS p90 +FROM ${ctx.ref('crawl', 'pages')} +WHERE + date = '${params.date}' AND + ${params.devRankFilter} AND + ${params.lens.sql} AND + is_root_page +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client +`) + } + ] + }, + speedIndex: { + SQL: [ + { + type: 'histogram', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + *, + SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf +FROM ( + SELECT + *, + volume / SUM(volume) OVER (PARTITION BY client) AS pdf + FROM ( + SELECT + client, + COUNT(0) AS volume, + CAST(FLOOR(FLOAT64(payload._SpeedIndex) / (1000)) * 1000 AS INT64) AS bin + FROM ${ctx.ref('crawl', 'pages')} + WHERE + date = '${params.date}' AND + ${params.devRankFilter} + ${params.lens.sql} + is_root_page + GROUP BY + bin, + client + HAVING + bin IS NOT NULL + ) +) +ORDER BY + bin, + client +`) + }, + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + ROUND(APPROX_QUANTILES(FLOAT64(payload._SpeedIndex), 1001)[OFFSET(101)] / 1000, 2) AS p10, + ROUND(APPROX_QUANTILES(FLOAT64(payload._SpeedIndex), 1001)[OFFSET(251)] / 1000, 2) AS p25, + ROUND(APPROX_QUANTILES(FLOAT64(payload._SpeedIndex), 1001)[OFFSET(501)] / 1000, 2) AS p50, + ROUND(APPROX_QUANTILES(FLOAT64(payload._SpeedIndex), 1001)[OFFSET(751)] / 1000, 2) AS p75, + ROUND(APPROX_QUANTILES(FLOAT64(payload._SpeedIndex), 1001)[OFFSET(901)] / 1000, 2) AS p90 +FROM ${ctx.ref('crawl', 'pages')} +WHERE + is_root_page AND + ${params.devRankFilter} + ${params.lens.sql} + date = '${params.date}' +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client +`) + } + ] + }, + tcp: { + SQL: [ + { + type: 'histogram', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + *, + SUM(pdf) OVER (PARTITION BY client ORDER BY bin) AS cdf +FROM ( + SELECT + *, + volume / SUM(volume) OVER (PARTITION BY client) AS pdf + FROM ( + SELECT + client, + COUNT(0) AS volume, + INT64(summary._connections) AS bin + FROM ${ctx.ref('crawl', 'pages')} + WHERE + date = '${params.date}' AND + ${params.devRankFilter} + ${params.lens.sql} + is_root_page AND + INT64(summary._connections) > 0 + GROUP BY + bin, + client + ) +) +ORDER BY + bin, + client +`) + } + ] + }, + imgLazy: { + SQL: [ + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + ROUND(COUNT(DISTINCT IF(LOWER(LAX_STRING(attr)) = 'lazy', page, NULL)) * 100 / COUNT(DISTINCT page), 2) AS percent +FROM ${ctx.ref('crawl', 'pages')} +LEFT JOIN + UNNEST(JSON_EXTRACT_ARRAY(custom_metrics.other['img-loading-attr'])) AS attr +WHERE + is_root_page AND + ${params.devRankFilter} + ${params.lens.sql} + date > '2016-01-01' +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client +`) + } + ] + }, + h2: { + SQL: [ + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + ROUND(SUM(IF(LAX_STRING(r.summary.respHttpVersion) = 'HTTP/2', 1, 0)) * 100 / COUNT(0), 2) AS percent +FROM ${ctx.ref('crawl', 'requests')} r +INNER JOIN ${ctx.ref('crawl', 'pages')} +USING (date, client, is_root_page, rank, page) +WHERE + is_root_page AND + ${params.devRankFilter} + ${params.lens.sql} + date = '${params.date}' +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client +`) + } + ] + }, + h3: { + SQL: [ + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + ROUND( + SUM( + IF( + LAX_STRING(r.summary.respHttpVersion) IN ('HTTP/3', 'h3', 'h3-29') OR + REGEXP_EXTRACT(REGEXP_EXTRACT(resp.value, r'(.*)'), r'(.*?)(?:, [^ ]* = .*)?$') LIKE '%h3=%' OR + REGEXP_EXTRACT(REGEXP_EXTRACT(resp.value, r'(.*)'), r'(.*?)(?:, [^ ]* = .*)?$') LIKE '%h3-29=%', + 1, 0 + ) + ) * 100 / COUNT(0), 2 + ) AS percent +FROM ${ctx.ref('crawl', 'requests')} r +LEFT OUTER JOIN +UNNEST(response_headers) AS resp +ON (resp.name = 'alt-svc') +INNER JOIN ${ctx.ref('crawl', 'pages')} +USING (date, client, is_root_page, rank, page) +WHERE + date = '${params.date}' AND + ${params.devRankFilter} AND + ${params.lens.sql} AND + is_root_page +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client +`) + } + ] + }, + fontDisplay: { + SQL: [ + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + ROUND(SUM(IF(LAX_STRING(lighthouse.audits['font-display'].score) IN ('true', '1'), 1, 0)) * 100 / COUNT(0), 2) AS percent +FROM ${ctx.ref('crawl', 'pages')} +WHERE + date = '${params.date}' + ${params.devRankFilter} + ${params.lens.sql} + is_root_page AND + lighthouse IS NOT NULL AND + TO_JSON_STRING(lighthouse) != '{}' AND + LAX_STRING(lighthouse.audits['font-display'].score) IS NOT NULL +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client +`) + } + ] + }, + canonical: { + SQL: [ + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + ROUND(SUM(IF(LAX_STRING(lighthouse.audits.canonical.score) IN ('true', '1'), 1, 0)) * 100 / COUNT(0), 2) AS percent +FROM ${ctx.ref('crawl', 'pages')} +WHERE + lighthouse IS NOT NULL AND + TO_JSON_STRING(lighthouse) != '{}' AND + date = '${params.date}' + ${params.devRankFilter} + ${params.lens.sql} + is_root_page +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client +`) + } + ] + }, + a11yButtonName: { + SQL: [ + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + ROUND(SUM(IF(LAX_STRING(lighthouse.audits['button-name'].score) IN ('true', '1'), 1, 0)) * 100 / COUNT(0), 2) AS percent +FROM ${ctx.ref('crawl', 'pages')} +WHERE + lighthouse IS NOT NULL AND + TO_JSON_STRING(lighthouse) != '{}' AND + is_root_page AND + ${params.devRankFilter} + ${params.lens.sql} + date = '${params.date}' +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client +`) + } + ] + }, + hreflang: { + SQL: [ + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + ROUND(SUM(IF(LAX_STRING(lighthouse.audits.hreflang.score) IN ('true', '1'), 1, 0)) * 100 / COUNT(0), 2) AS percent +FROM ${ctx.ref('crawl', 'pages')} +WHERE + lighthouse IS NOT NULL AND + TO_JSON_STRING(lighthouse) != '{}' AND + date = '${params.date}' + ${params.devRankFilter} + ${params.lens.sql} + is_root_page AND + LAX_STRING(lighthouse.audits.hreflang.score) IS NOT NULL +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client +`) + } + ] + }, + numUrls: { + SQL: [ + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + COUNT(0) AS urls +FROM ${ctx.ref('crawl', 'pages')} +WHERE + date = '${params.date}' + ${params.devRankFilter} + ${params.lens.sql} + is_root_page +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client +`) + } + ] + }, + contentIndex: { + SQL: [ + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + SUM(IF(feat.id IS NOT NULL, 1, 0)) AS num_urls, + ROUND(SUM(IF(feat.id IS NOT NULL, 1, 0)) / COUNT(0) * 100, 5) AS percent +FROM ${ctx.ref('crawl', 'pages')} +LEFT OUTER JOIN UNNEST(features) AS feat +ON (feat.id = '2983' OR feat.feature = 'ContentIndexAdd') +WHERE + date = '${params.date}' + ${params.devRankFilter} + ${params.lens.sql} + is_root_page +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client, + num_urls DESC +`) + } + ] + }, + legible: { + SQL: [ + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + ROUND(SUM(IF(LAX_STRING(lighthouse.audits['font-size'].score) IN ('true', '1'), 1, 0)) * 100 / COUNT(0), 2) AS percent +FROM ${ctx.ref('crawl', 'pages')} +WHERE + lighthouse IS NOT NULL AND + date = '${params.date}' + ${params.devRankFilter} + ${params.lens.sql} + is_root_page AND + LAX_STRING(lighthouse.audits['font-size'].score) IS NOT NULL +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client +`) + } + ] + }, + a11yColorContrast: { + SQL: [ + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + ROUND(SUM(IF(LAX_STRING(lighthouse.audits['color-contrast'].score) IN ('true', '1'), 1, 0)) * 100 / COUNT(0), 2) AS percent +FROM ${ctx.ref('crawl', 'pages')} +WHERE + lighthouse IS NOT NULL AND + TO_JSON_STRING(lighthouse) != '{}' AND + date = '${params.date}' + ${params.devRankFilter} + ${params.lens.sql} + is_root_page +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client +`) + } + ] + }, + a11yImageAlt: { + SQL: [ + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + ROUND(SUM(IF(LAX_STRING(lighthouse.audits['image-alt'].score) IN ('true', '1'), 1, 0)) * 100 / COUNT(0), 2) AS percent +FROM ${ctx.ref('crawl', 'pages')} +WHERE + lighthouse IS NOT NULL AND + TO_JSON_STRING(lighthouse) != '{}' AND + date = '${params.date}' + ${params.devRankFilter} + ${params.lens.sql} + is_root_page +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client +`) + } + ] + }, + a11yLabel: { + SQL: [ + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + ROUND(SUM(IF(LAX_STRING(lighthouse.audits.label.score) IN ('true', '1'), 1, 0)) * 100 / COUNT(0), 2) AS percent +FROM ${ctx.ref('crawl', 'pages')} +WHERE + lighthouse IS NOT NULL AND + TO_JSON_STRING(lighthouse) != '{}' AND + date = '${params.date}' + ${params.devRankFilter} + ${params.lens.sql} + is_root_page +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client +`) + } + ] + }, + a11yLinkName: { + SQL: [ + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + ROUND(SUM(IF(LAX_STRING(lighthouse.audits['link-name'].score) IN ('true', '1'), 1, 0)) * 100 / COUNT(0), 2) AS percent +FROM ${ctx.ref('crawl', 'pages')} +WHERE + lighthouse IS NOT NULL AND + TO_JSON_STRING(lighthouse) != '{}' AND + date = '${params.date}' + ${params.devRankFilter} + ${params.lens.sql} + is_root_page +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client +`) + } + ] + }, + a11yScores: { + SQL: [ + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +CREATE TEMPORARY FUNCTION getA11yScore(reportCategories JSON) +RETURNS FLOAT64 DETERMINISTIC +LANGUAGE js AS """ + if(reportCategories) { + return reportCategories.find(i => i.name === 'Accessibility').score; + } +"""; + +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + ROUND(APPROX_QUANTILES(score, 1000)[OFFSET(100)], 2) AS p10, + ROUND(APPROX_QUANTILES(score, 1000)[OFFSET(250)], 2) AS p25, + ROUND(APPROX_QUANTILES(score, 1000)[OFFSET(500)], 2) AS p50, + ROUND(APPROX_QUANTILES(score, 1000)[OFFSET(750)], 2) AS p75, + ROUND(APPROX_QUANTILES(score, 1000)[OFFSET(900)], 2) AS p90 +FROM ( + SELECT + date, + client, + IFNULL(LAX_FLOAT64(lighthouse.categories.accessibility.score) * 100, getA11yScore(lighthouse.reportCategories)) AS score + FROM ${ctx.ref('crawl', 'pages')} + WHERE + lighthouse IS NOT NULL AND + TO_JSON_STRING(lighthouse) != '{}' AND + date = '${params.date}' + ${params.devRankFilter} + ${params.lens.sql} + is_root_page +) +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client +`) + } + ] + }, + asyncClipboardRead: { + SQL: [ + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + SUM(IF(feat.id IS NOT NULL, 1, 0)) AS num_urls, + ROUND(SUM(IF(feat.id IS NOT NULL, 1, 0)) / COUNT(0) * 100, 5) AS percent +FROM ${ctx.ref('crawl', 'pages')} +LEFT OUTER JOIN UNNEST(features) AS feat +ON (feat.id = '2369' OR feat.feature = 'AsyncClipboardAPIRead') +WHERE + date = '${params.date}' + ${params.devRankFilter} + ${params.lens.sql} + is_root_page +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client, + num_urls DESC +`) + } + ] + }, + badgeClear: { + SQL: [ + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + SUM(IF(feat.id IS NOT NULL, 1, 0)) AS num_urls, + ROUND(SUM(IF(feat.id IS NOT NULL, 1, 0)) / COUNT(0) * 100, 5) AS percent +FROM ${ctx.ref('crawl', 'pages')} +LEFT OUTER JOIN UNNEST(features) AS feat +ON (feat.id = '2727' OR feat.feature = 'BadgeClear') +WHERE + date = '${params.date}' + ${params.devRankFilter} + ${params.lens.sql} + is_root_page +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client, + num_urls DESC +`) + } + ] + }, + badgeSet: { + SQL: [ + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + SUM(IF(feat.id IS NOT NULL, 1, 0)) AS num_urls, + ROUND(SUM(IF(feat.id IS NOT NULL, 1, 0)) / COUNT(0) * 100, 5) AS percent +FROM ${ctx.ref('crawl', 'pages')} +LEFT OUTER JOIN UNNEST(features) AS feat +ON (feat.id = '2726' OR feat.feature = 'BadgeSet') +WHERE + date = '${params.date}' + ${params.devRankFilter} + ${params.lens.sql} + is_root_page +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client, + num_urls DESC +`) + } + ] + }, + getInstalledRelatedApps: { + SQL: [ + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + SUM(IF(feat.id IS NOT NULL, 1, 0)) AS num_urls, + ROUND(SUM(IF(feat.id IS NOT NULL, 1, 0)) / COUNT(0) * 100, 5) AS percent +FROM ${ctx.ref('crawl', 'pages')} +LEFT OUTER JOIN UNNEST(features) AS feat +ON (feat.id = '1870' OR feat.feature = 'V8Navigator_GetInstalledRelatedApps_Method') +WHERE + date = '${params.date}' + ${params.devRankFilter} + ${params.lens.sql} + is_root_page +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client, + num_urls DESC +`) + } + ] + }, + idleDetection: { + SQL: [ + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + SUM(IF(feat.id IS NOT NULL, 1, 0)) AS num_urls, + ROUND(SUM(IF(feat.id IS NOT NULL, 1, 0)) / COUNT(0) * 100, 5) AS percent +FROM ${ctx.ref('crawl', 'pages')} +LEFT OUTER JOIN UNNEST(features) AS feat +ON (feat.id = '2834' OR feat.feature = 'IdleDetectionStart') +WHERE + date = '${params.date}' + ${params.devRankFilter} + ${params.lens.sql} + is_root_page +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client, + num_urls DESC +`) + } + ] + }, + linkText: { + SQL: [ + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + ROUND(SUM(IF(LAX_STRING(lighthouse.audits['link-text'].score) IN ('true', '1'), 1, 0)) * 100 / COUNT(0), 2) AS percent +FROM ${ctx.ref('crawl', 'pages')} +WHERE + lighthouse IS NOT NULL AND + date = '${params.date}' + ${params.devRankFilter} + ${params.lens.sql} + is_root_page AND + LAX_STRING(lighthouse.audits['link-text'].score) IS NOT NULL +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client +`) + } + ] + }, + notificationTriggers: { + SQL: [ + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + SUM(IF(feat.id IS NOT NULL, 1, 0)) AS num_urls, + ROUND(SUM(IF(feat.id IS NOT NULL, 1, 0)) / COUNT(0) * 100, 5) AS percent +FROM ${ctx.ref('crawl', 'pages')} +LEFT OUTER JOIN UNNEST(features) AS feat +ON (feat.id = '3017' OR feat.feature = 'NotificationShowTrigger') +WHERE + date = '${params.date}' + ${params.devRankFilter} + ${params.lens.sql} + is_root_page +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client, + num_urls DESC +`) + } + ] + }, + periodicBackgroundSync: { + SQL: [ + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + SUM(IF(feat.id IS NOT NULL, 1, 0)) AS num_urls, + ROUND(SUM(IF(feat.id IS NOT NULL, 1, 0)) / COUNT(0) * 100, 5) AS percent +FROM ${ctx.ref('crawl', 'pages')} +LEFT OUTER JOIN UNNEST(features) AS feat +ON (feat.id = '2930' OR feat.feature = 'PeriodicBackgroundSync') +WHERE + date = '${params.date}' + ${params.devRankFilter} + ${params.lens.sql} + is_root_page +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client, + num_urls DESC +`) + } + ] + }, + periodicBackgroundSyncRegister: { + SQL: [ + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + SUM(IF(feat.id IS NOT NULL, 1, 0)) AS num_urls, + ROUND(SUM(IF(feat.id IS NOT NULL, 1, 0)) / COUNT(0) * 100, 5) AS percent +FROM ${ctx.ref('crawl', 'pages')} +LEFT OUTER JOIN UNNEST(features) AS feat +ON (feat.id = '2931' OR feat.feature = 'PeriodicBackgroundSyncRegister') +WHERE + date = '${params.date}' + ${params.devRankFilter} + ${params.lens.sql} + is_root_page +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client, + num_urls DESC +`) + } + ] + }, + quicTransport: { + SQL: [ + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + SUM(IF(feat.id IS NOT NULL, 1, 0)) AS num_urls, + ROUND(SUM(IF(feat.id IS NOT NULL, 1, 0)) / COUNT(0) * 100, 5) AS percent +FROM ${ctx.ref('crawl', 'pages')} +LEFT OUTER JOIN UNNEST(features) AS feat +ON (feat.id = '3184' OR feat.feature = 'QuicTransport') +WHERE + date = '${params.date}' + ${params.devRankFilter} + ${params.lens.sql} + is_root_page +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client, + num_urls DESC +`) + } + ] + }, + screenWakeLock: { + SQL: [ + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + SUM(IF(feat.id IS NOT NULL, 1, 0)) AS num_urls, + ROUND(SUM(IF(feat.id IS NOT NULL, 1, 0)) / COUNT(0) * 100, 5) AS percent +FROM ${ctx.ref('crawl', 'pages')} +LEFT OUTER JOIN UNNEST(features) AS feat +ON (feat.id = '3005' OR feat.feature = 'WakeLockAcquireScreenLock') +WHERE + date = '${params.date}' + ${params.devRankFilter} + ${params.lens.sql} + is_root_page +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client, + num_urls DESC +`) + } + ] + }, + storagePersist: { + SQL: [ + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + SUM(IF(feat.id IS NOT NULL, 1, 0)) AS num_urls, + ROUND(SUM(IF(feat.id IS NOT NULL, 1, 0)) / COUNT(0) * 100, 5) AS percent +FROM ${ctx.ref('crawl', 'pages')} +LEFT OUTER JOIN + UNNEST(features) AS feat +ON (feat.id = '3018' OR feat.feature = 'DurableStoragePersist') +WHERE + date = '${params.date}' + ${params.devRankFilter} + ${params.lens.sql} + is_root_page +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client, + num_urls DESC +`) + } + ] + }, + swControlledPages: { + SQL: [ + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + SUM(IF(feat.id = '990' OR feat.feature = 'ServiceWorkerControlledPage', 1, 0)) AS num_urls, + ROUND(SUM(IF(feat.id = '990' OR feat.feature = 'ServiceWorkerControlledPage', 1, 0)) / COUNT(0) * 100, 5) AS percent +FROM ${ctx.ref('crawl', 'pages')} +LEFT OUTER JOIN + UNNEST(features) AS feat +ON (feat.id = '990' OR feat.feature = 'ServiceWorkerControlledPage') +WHERE + date = '${params.date}' + ${params.devRankFilter} + ${params.lens.sql} + is_root_page +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client, + num_urls DESC +`) + } + ] + }, + webSocketStream: { + SQL: [ + { + type: 'timeseries', + query: DataformTemplateBuilder.create((ctx, params) => ` +SELECT + date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, + client, + SUM(IF(feat.id = '3018' OR feat.feature = 'WebSocketStreamConstructor', 1, 0)) AS num_urls, + ROUND(SUM(IF(feat.id = '3018' OR feat.feature = 'WebSocketStreamConstructor', 1, 0)) / COUNT(0) * 100, 5) AS percent +FROM ${ctx.ref('crawl', 'pages')} +LEFT OUTER JOIN + UNNEST(features) AS feat +ON (feat.id = '3018' OR feat.feature = 'WebSocketStreamConstructor') +WHERE + date = '${params.date}' + ${params.devRankFilter} + ${params.lens.sql} + is_root_page +GROUP BY + date, + timestamp, + client +ORDER BY + date DESC, + client, + num_urls DESC `) } ] From 599d92ebd4afa259cb2687798fb77f1bc52c265d Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Wed, 10 Sep 2025 09:47:53 +0200 Subject: [PATCH 2/3] feat: Add histogram and timeseries data synchronization scripts - Implemented histogram_storage_sync.js for downloading and uploading histogram data to BigQuery. - Created timeseries_storage_sync.js for processing timeseries data with similar functionality. - Added package.json to manage dependencies for Google Cloud Storage and BigQuery. --- definitions/output/reports/reports_dynamic.js | 17 +- includes/reports.js | 90 ----- script/histogram_storage_sync.js | 357 ++++++++++++++++++ script/package.json | 7 + script/timeseries_storage_sync.js | 251 ++++++++++++ 5 files changed, 625 insertions(+), 97 deletions(-) create mode 100644 script/histogram_storage_sync.js create mode 100644 script/package.json create mode 100644 script/timeseries_storage_sync.js diff --git a/definitions/output/reports/reports_dynamic.js b/definitions/output/reports/reports_dynamic.js index a4c8b9d..b5cd016 100644 --- a/definitions/output/reports/reports_dynamic.js +++ b/definitions/output/reports/reports_dynamic.js @@ -24,7 +24,7 @@ const EXPORT_CONFIG = { bucket: constants.bucket, storagePath: constants.storagePath, dataset: 'reports', - testSuffix: '.json' + fileFormat: '.json' } // Date range for report generation @@ -54,7 +54,7 @@ function buildExportPath(reportConfig) { throw new Error(`Unknown SQL type: ${sql.type}`) } - return objectPath + EXPORT_CONFIG.testSuffix + return objectPath + EXPORT_CONFIG.fileFormat } /** @@ -74,17 +74,19 @@ function buildExportQuery(reportConfig) { WHERE date = '${date}' AND metric = '${metric.id}' AND lens = '${lens.name}' - ORDER BY bin ASC + ORDER BY client, bin ASC ` } else if (sql.type === 'timeseries') { query = ` SELECT - FORMAT_DATE('%Y_%m_%d', date) AS date, + UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, * EXCEPT(date, metric, lens) FROM \`${EXPORT_CONFIG.dataset}.${tableName}\` - WHERE metric = '${metric.id}' + WHERE + date = '${date}' + AND metric = '${metric.id}' AND lens = '${lens.name}' - ORDER BY date DESC + ORDER BY date, client DESC ` } else { throw new Error(`Unknown SQL type: ${sql.type}`) @@ -110,7 +112,7 @@ function createReportConfig(date, metric, sql, lensName, lensSQL) { sql, lens: { name: lensName, sql: lensSQL }, devRankFilter: constants.devRankFilter, - tableName: `${metric.id}_${sql.type}` + tableName: sql.type === 'timeseries' ? sql.type : `${metric.id}_${sql.type}` } } @@ -180,6 +182,7 @@ INSERT INTO ${EXPORT_CONFIG.dataset}.${tableName} --*/ SELECT + DATE('${date}') AS date, '${metric.id}' AS metric, '${lens.name}' AS lens, * diff --git a/includes/reports.js b/includes/reports.js index 655c14e..5e0cf87 100644 --- a/includes/reports.js +++ b/includes/reports.js @@ -16,7 +16,6 @@ FROM ( volume / SUM(volume) OVER (PARTITION BY client) AS pdf FROM ( SELECT - date, client, COUNT(0) AS volume, CAST(FLOOR(FLOAT64(summary.bytesCss) / 10240) * 10 AS INT64) AS bin @@ -27,7 +26,6 @@ FROM ( ${params.lens.sql} AND is_root_page GROUP BY - date, bin, client ) @@ -42,7 +40,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesCss), 1001)[OFFSET(101)] / 1024, 2) AS p10, ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesCss), 1001)[OFFSET(251)] / 1024, 2) AS p25, @@ -81,7 +78,6 @@ FROM ( volume / SUM(volume) OVER (PARTITION BY client) AS pdf FROM ( SELECT - date, client, COUNT(0) AS volume, CAST(FLOOR(CAST(IFNULL( @@ -98,7 +94,6 @@ FROM ( ${params.lens.sql} AND is_root_page GROUP BY - date, bin, client HAVING @@ -115,7 +110,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, ROUND(APPROX_QUANTILES(value, 1000)[OFFSET(100)], 2) AS p10, ROUND(APPROX_QUANTILES(value, 1000)[OFFSET(250)], 2) AS p25, @@ -158,7 +152,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, ROUND(SUM(IF(STARTS_WITH(url, 'https'), 1, 0)) * 100 / COUNT(0), 2) AS percent FROM ${ctx.ref('crawl', 'requests')} @@ -187,7 +180,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, SUM(IF(feat.id IS NOT NULL, 1, 0)) AS num_urls, ROUND(SUM(IF(feat.id IS NOT NULL, 1, 0)) / COUNT(0) * 100, 5) AS percent @@ -225,7 +217,6 @@ FROM ( volume / SUM(volume) OVER (PARTITION BY client) AS pdf FROM ( SELECT - date, client, COUNT(0) AS volume, FLOOR(FLOAT64(IFNULL(lighthouse.audits['bootup-time'].numericValue, lighthouse.audits['bootup-time'].rawValue)) / 100) / 10 AS bin @@ -236,7 +227,6 @@ FROM ( ${params.lens.sql} AND is_root_page GROUP BY - date, bin, client HAVING @@ -253,7 +243,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, ROUND(APPROX_QUANTILES(value, 1000)[OFFSET(100)], 2) AS p10, ROUND(APPROX_QUANTILES(value, 1000)[OFFSET(250)], 2) AS p25, @@ -302,7 +291,6 @@ FROM ( volume / SUM(volume) OVER (PARTITION BY client) AS pdf FROM ( SELECT - date, client, COUNT(0) AS volume, CAST(FLOOR(FLOAT64(summary.bytesFont) / 10240) * 10 AS INT64) AS bin @@ -313,7 +301,6 @@ FROM ( ${params.lens.sql} AND is_root_page GROUP BY - date, bin, client ) @@ -328,7 +315,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesFont), 1001)[OFFSET(101)] / 1024, 2) AS p10, ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesFont), 1001)[OFFSET(251)] / 1024, 2) AS p25, @@ -367,7 +353,6 @@ FROM ( volume / SUM(volume) OVER (PARTITION BY client) AS pdf FROM ( SELECT - date, client, COUNT(0) AS volume, CAST(FLOOR(FLOAT64(summary.bytesHtml) / 10240) * 10 AS INT64) AS bin @@ -378,7 +363,6 @@ FROM ( ${params.lens.sql} AND is_root_page GROUP BY - date, bin, client ) @@ -393,7 +377,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesHtml), 1001)[OFFSET(101)] / 1024, 2) AS p10, ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesHtml), 1001)[OFFSET(251)] / 1024, 2) AS p25, @@ -432,7 +415,6 @@ FROM ( volume / SUM(volume) OVER (PARTITION BY client) AS pdf FROM ( SELECT - date, client, COUNT(0) AS volume, CAST(FLOOR(FLOAT64(summary.bytesImg) / 102400) * 100 AS INT64) AS bin @@ -443,7 +425,6 @@ FROM ( ${params.lens.sql} AND is_root_page GROUP BY - date, bin, client ) @@ -458,7 +439,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesImg), 1001)[OFFSET(101)] / 1024, 2) AS p10, ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesImg), 1001)[OFFSET(251)] / 1024, 2) AS p25, @@ -497,7 +477,6 @@ FROM ( volume / SUM(volume) OVER (PARTITION BY client) AS pdf FROM ( SELECT - date, client, COUNT(0) AS volume, CAST(FLOOR(FLOAT64(summary.bytesJS) / 10240) * 10 AS INT64) AS bin @@ -508,7 +487,6 @@ FROM ( ${params.lens.sql} AND is_root_page GROUP BY - date, bin, client ) @@ -523,7 +501,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesJS), 1001)[OFFSET(101)] / 1024, 2) AS p10, ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesJS), 1001)[OFFSET(251)] / 1024, 2) AS p25, @@ -562,7 +539,6 @@ FROM ( volume / SUM(volume) OVER (PARTITION BY client) AS pdf FROM ( SELECT - date, client, COUNT(0) AS volume, CAST(FLOOR(FLOAT64(summary.bytesOther) / 10240) * 10 AS INT64) AS bin @@ -573,7 +549,6 @@ FROM ( ${params.lens.sql} AND is_root_page GROUP BY - date, bin, client ) @@ -588,7 +563,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesOther), 1001)[OFFSET(101)] / 1024, 2) AS p10, ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesOther), 1001)[OFFSET(251)] / 1024, 2) AS p25, @@ -627,7 +601,6 @@ FROM ( volume / SUM(volume) OVER (PARTITION BY client) AS pdf FROM ( SELECT - date, client, COUNT(0) AS volume, CAST(FLOOR(FLOAT64(summary.bytesTotal) / 1024 / 100) * 100 AS INT64) AS bin @@ -638,7 +611,6 @@ FROM ( ${params.lens.sql} AND is_root_page GROUP BY - date, bin, client ) @@ -653,7 +625,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, ROUND(APPROX_QUANTILES(bytesTotal, 1001)[OFFSET(101)] / 1024, 2) AS p10, ROUND(APPROX_QUANTILES(bytesTotal, 1001)[OFFSET(251)] / 1024, 2) AS p25, @@ -692,7 +663,6 @@ FROM ( volume / SUM(volume) OVER (PARTITION BY client) AS pdf FROM ( SELECT - date, client, COUNT(0) AS volume, CAST(FLOOR(FLOAT64(summary.bytesVideo) / 10240) * 10 AS INT64) AS bin @@ -703,7 +673,6 @@ FROM ( ${params.lens.sql} AND is_root_page GROUP BY - date, bin, client ) @@ -718,7 +687,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesVideo), 1001)[OFFSET(101)] / 1024, 2) AS p10, ROUND(APPROX_QUANTILES(FLOAT64(summary.bytesVideo), 1001)[OFFSET(251)] / 1024, 2) AS p25, @@ -757,7 +725,6 @@ FROM ( volume / SUM(volume) OVER (PARTITION BY client) AS pdf FROM ( SELECT - date, client, COUNT(0) AS volume, INT64(payload['_cpu.v8.compile']) AS bin @@ -768,7 +735,6 @@ FROM ( ${params.lens.sql} AND is_root_page GROUP BY - date, bin, client HAVING @@ -785,7 +751,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, ROUND(APPROX_QUANTILES(value, 1000)[OFFSET(100)], 2) AS p10, ROUND(APPROX_QUANTILES(value, 1000)[OFFSET(250)], 2) AS p25, @@ -831,7 +796,6 @@ FROM ( volume / SUM(volume) OVER (PARTITION BY client) AS pdf FROM ( SELECT - date, client, COUNT(0) AS volume, FLOOR(FLOAT64(summary.onContentLoaded) / 1000) AS bin @@ -843,7 +807,6 @@ FROM ( is_root_page AND FLOAT64(summary.onContentLoaded) > 0 GROUP BY - date, bin, client ) @@ -858,7 +821,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, ROUND(APPROX_QUANTILES(FLOAT64(summary.onContentLoaded), 1001)[OFFSET(101)], 2) AS p10, ROUND(APPROX_QUANTILES(FLOAT64(summary.onContentLoaded), 1001)[OFFSET(251)], 2) AS p25, @@ -897,7 +859,6 @@ FROM ( volume / SUM(volume) OVER (PARTITION BY client) AS pdf FROM ( SELECT - date, client, COUNT(0) AS volume, CAST(FLOAT64(r.payload['_cpu.EvaluateScript']) / 20 AS INT64) * 20 AS bin @@ -910,7 +871,6 @@ FROM ( ${params.lens.sql} AND is_root_page GROUP BY - date, bin, client HAVING @@ -938,7 +898,6 @@ FROM ( volume / SUM(volume) OVER (PARTITION BY client) AS pdf FROM ( SELECT - date, client, COUNT(0) AS volume, CAST(FLOOR(FLOAT64(payload['_chromeUserTiming.firstContentfulPaint']) / 1000) AS INT64) AS bin @@ -949,7 +908,6 @@ FROM ( ${params.lens.sql} AND is_root_page GROUP BY - date, bin, client HAVING @@ -966,7 +924,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, ROUND(APPROX_QUANTILES(FLOAT64(payload['_chromeUserTiming.firstContentfulPaint']), 1001)[OFFSET(101)] / 1024, 2) AS p10, ROUND(APPROX_QUANTILES(FLOAT64(payload['_chromeUserTiming.firstContentfulPaint']), 1001)[OFFSET(251)] / 1024, 2) AS p25, @@ -1006,7 +963,6 @@ FROM ( volume / SUM(volume) OVER (PARTITION BY client) AS pdf FROM ( SELECT - date, client, COUNT(0) AS volume, CAST(FLOOR(FLOAT64(payload._gzip_savings) / (1024 * 2)) * 2 AS INT64) AS bin @@ -1017,7 +973,6 @@ FROM ( ${params.lens.sql} AND is_root_page GROUP BY - date, bin, client HAVING @@ -1034,7 +989,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, ROUND(APPROX_QUANTILES(FLOAT64(payload._gzip_savings), 1001)[OFFSET(101)] / 1024, 2) AS p10, ROUND(APPROX_QUANTILES(FLOAT64(payload._gzip_savings), 1001)[OFFSET(251)] / 1024, 2) AS p25, @@ -1072,7 +1026,6 @@ FROM ( volume / SUM(volume) OVER (PARTITION BY client) AS pdf FROM ( SELECT - date, client, COUNT(0) AS volume, FLOOR(FLOAT64(summary.onLoad) / 1000) AS bin @@ -1084,7 +1037,6 @@ FROM ( is_root_page AND FLOAT64(summary.onLoad) > 0 GROUP BY - date, bin, client ) @@ -1099,7 +1051,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, ROUND(APPROX_QUANTILES(FLOAT64(summary.onLoad), 1001)[OFFSET(101)] / 1000, 2) AS p10, ROUND(APPROX_QUANTILES(FLOAT64(summary.onLoad), 1001)[OFFSET(251)] / 1000, 2) AS p25, @@ -1162,7 +1113,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, ROUND(APPROX_QUANTILES(FLOAT64(summary.reqCss), 1001)[OFFSET(101)], 2) AS p10, ROUND(APPROX_QUANTILES(FLOAT64(summary.reqCss), 1001)[OFFSET(251)], 2) AS p25, @@ -1225,7 +1175,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, ROUND(APPROX_QUANTILES(FLOAT64(summary.reqFont), 1001)[OFFSET(101)], 2) AS p10, ROUND(APPROX_QUANTILES(FLOAT64(summary.reqFont), 1001)[OFFSET(251)], 2) AS p25, @@ -1288,7 +1237,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, ROUND(APPROX_QUANTILES(FLOAT64(summary.reqHtml), 1001)[OFFSET(101)], 2) AS p10, ROUND(APPROX_QUANTILES(FLOAT64(summary.reqHtml), 1001)[OFFSET(251)], 2) AS p25, @@ -1351,7 +1299,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, ROUND(APPROX_QUANTILES(FLOAT64(summary.reqImg), 1001)[OFFSET(101)], 2) AS p10, ROUND(APPROX_QUANTILES(FLOAT64(summary.reqImg), 1001)[OFFSET(251)], 2) AS p25, @@ -1414,7 +1361,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, ROUND(APPROX_QUANTILES(FLOAT64(summary.reqJS), 1001)[OFFSET(101)], 2) AS p10, ROUND(APPROX_QUANTILES(FLOAT64(summary.reqJS), 1001)[OFFSET(251)], 2) AS p25, @@ -1477,7 +1423,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, ROUND(APPROX_QUANTILES(FLOAT64(summary.reqOther), 1001)[OFFSET(101)], 2) AS p10, ROUND(APPROX_QUANTILES(FLOAT64(summary.reqOther), 1001)[OFFSET(251)], 2) AS p25, @@ -1540,7 +1485,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, ROUND(APPROX_QUANTILES(FLOAT64(summary.reqTotal), 1001)[OFFSET(101)], 2) AS p10, ROUND(APPROX_QUANTILES(FLOAT64(summary.reqTotal), 1001)[OFFSET(251)], 2) AS p25, @@ -1603,7 +1547,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, ROUND(APPROX_QUANTILES(FLOAT64(summary.reqVideo), 1001)[OFFSET(101)], 2) AS p10, ROUND(APPROX_QUANTILES(FLOAT64(summary.reqVideo), 1001)[OFFSET(251)], 2) AS p25, @@ -1668,7 +1611,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, ROUND(APPROX_QUANTILES(FLOAT64(payload._image_savings), 1001)[OFFSET(101)] / 1024, 2) AS p10, ROUND(APPROX_QUANTILES(FLOAT64(payload._image_savings), 1001)[OFFSET(251)] / 1024, 2) AS p25, @@ -1736,7 +1678,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, ROUND(APPROX_QUANTILES(IFNULL(INT64(lighthouse.audits['offscreen-images'].details.overallSavingsBytes), INT64(lighthouse.audits['offscreen-images'].extendedInfo.value.wastedKb) * 1024), 1001)[OFFSET(101)] / 1024, 2) AS p10, ROUND(APPROX_QUANTILES(IFNULL(INT64(lighthouse.audits['offscreen-images'].details.overallSavingsBytes), INT64(lighthouse.audits['offscreen-images'].extendedInfo.value.wastedKb) * 1024), 1001)[OFFSET(251)] / 1024, 2) AS p25, @@ -1803,7 +1744,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, ROUND(APPROX_QUANTILES(IFNULL(INT64(lighthouse.audits['uses-optimized-images'].details.overallSavingsBytes), INT64(lighthouse.audits['uses-optimized-images'].extendedInfo.value.wastedKb) * 1024), 1001)[OFFSET(101)] / 1024, 2) AS p10, ROUND(APPROX_QUANTILES(IFNULL(INT64(lighthouse.audits['uses-optimized-images'].details.overallSavingsBytes), INT64(lighthouse.audits['uses-optimized-images'].extendedInfo.value.wastedKb) * 1024), 1001)[OFFSET(251)] / 1024, 2) AS p25, @@ -1867,7 +1807,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, ROUND(APPROX_QUANTILES(FLOAT64(payload._SpeedIndex), 1001)[OFFSET(101)] / 1000, 2) AS p10, ROUND(APPROX_QUANTILES(FLOAT64(payload._SpeedIndex), 1001)[OFFSET(251)] / 1000, 2) AS p25, @@ -1934,7 +1873,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, ROUND(COUNT(DISTINCT IF(LOWER(LAX_STRING(attr)) = 'lazy', page, NULL)) * 100 / COUNT(DISTINCT page), 2) AS percent FROM ${ctx.ref('crawl', 'pages')} @@ -1963,7 +1901,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, ROUND(SUM(IF(LAX_STRING(r.summary.respHttpVersion) = 'HTTP/2', 1, 0)) * 100 / COUNT(0), 2) AS percent FROM ${ctx.ref('crawl', 'requests')} r @@ -1992,7 +1929,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, ROUND( SUM( @@ -2033,7 +1969,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, ROUND(SUM(IF(LAX_STRING(lighthouse.audits['font-display'].score) IN ('true', '1'), 1, 0)) * 100 / COUNT(0), 2) AS percent FROM ${ctx.ref('crawl', 'pages')} @@ -2063,7 +1998,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, ROUND(SUM(IF(LAX_STRING(lighthouse.audits.canonical.score) IN ('true', '1'), 1, 0)) * 100 / COUNT(0), 2) AS percent FROM ${ctx.ref('crawl', 'pages')} @@ -2092,7 +2026,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, ROUND(SUM(IF(LAX_STRING(lighthouse.audits['button-name'].score) IN ('true', '1'), 1, 0)) * 100 / COUNT(0), 2) AS percent FROM ${ctx.ref('crawl', 'pages')} @@ -2121,7 +2054,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, ROUND(SUM(IF(LAX_STRING(lighthouse.audits.hreflang.score) IN ('true', '1'), 1, 0)) * 100 / COUNT(0), 2) AS percent FROM ${ctx.ref('crawl', 'pages')} @@ -2151,7 +2083,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, COUNT(0) AS urls FROM ${ctx.ref('crawl', 'pages')} @@ -2178,7 +2109,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, SUM(IF(feat.id IS NOT NULL, 1, 0)) AS num_urls, ROUND(SUM(IF(feat.id IS NOT NULL, 1, 0)) / COUNT(0) * 100, 5) AS percent @@ -2209,7 +2139,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, ROUND(SUM(IF(LAX_STRING(lighthouse.audits['font-size'].score) IN ('true', '1'), 1, 0)) * 100 / COUNT(0), 2) AS percent FROM ${ctx.ref('crawl', 'pages')} @@ -2238,7 +2167,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, ROUND(SUM(IF(LAX_STRING(lighthouse.audits['color-contrast'].score) IN ('true', '1'), 1, 0)) * 100 / COUNT(0), 2) AS percent FROM ${ctx.ref('crawl', 'pages')} @@ -2267,7 +2195,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, ROUND(SUM(IF(LAX_STRING(lighthouse.audits['image-alt'].score) IN ('true', '1'), 1, 0)) * 100 / COUNT(0), 2) AS percent FROM ${ctx.ref('crawl', 'pages')} @@ -2296,7 +2223,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, ROUND(SUM(IF(LAX_STRING(lighthouse.audits.label.score) IN ('true', '1'), 1, 0)) * 100 / COUNT(0), 2) AS percent FROM ${ctx.ref('crawl', 'pages')} @@ -2325,7 +2251,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, ROUND(SUM(IF(LAX_STRING(lighthouse.audits['link-name'].score) IN ('true', '1'), 1, 0)) * 100 / COUNT(0), 2) AS percent FROM ${ctx.ref('crawl', 'pages')} @@ -2362,7 +2287,6 @@ LANGUAGE js AS """ SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, ROUND(APPROX_QUANTILES(score, 1000)[OFFSET(100)], 2) AS p10, ROUND(APPROX_QUANTILES(score, 1000)[OFFSET(250)], 2) AS p25, @@ -2401,7 +2325,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, SUM(IF(feat.id IS NOT NULL, 1, 0)) AS num_urls, ROUND(SUM(IF(feat.id IS NOT NULL, 1, 0)) / COUNT(0) * 100, 5) AS percent @@ -2432,7 +2355,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, SUM(IF(feat.id IS NOT NULL, 1, 0)) AS num_urls, ROUND(SUM(IF(feat.id IS NOT NULL, 1, 0)) / COUNT(0) * 100, 5) AS percent @@ -2463,7 +2385,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, SUM(IF(feat.id IS NOT NULL, 1, 0)) AS num_urls, ROUND(SUM(IF(feat.id IS NOT NULL, 1, 0)) / COUNT(0) * 100, 5) AS percent @@ -2494,7 +2415,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, SUM(IF(feat.id IS NOT NULL, 1, 0)) AS num_urls, ROUND(SUM(IF(feat.id IS NOT NULL, 1, 0)) / COUNT(0) * 100, 5) AS percent @@ -2525,7 +2445,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, SUM(IF(feat.id IS NOT NULL, 1, 0)) AS num_urls, ROUND(SUM(IF(feat.id IS NOT NULL, 1, 0)) / COUNT(0) * 100, 5) AS percent @@ -2556,7 +2475,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, ROUND(SUM(IF(LAX_STRING(lighthouse.audits['link-text'].score) IN ('true', '1'), 1, 0)) * 100 / COUNT(0), 2) AS percent FROM ${ctx.ref('crawl', 'pages')} @@ -2585,7 +2503,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, SUM(IF(feat.id IS NOT NULL, 1, 0)) AS num_urls, ROUND(SUM(IF(feat.id IS NOT NULL, 1, 0)) / COUNT(0) * 100, 5) AS percent @@ -2616,7 +2533,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, SUM(IF(feat.id IS NOT NULL, 1, 0)) AS num_urls, ROUND(SUM(IF(feat.id IS NOT NULL, 1, 0)) / COUNT(0) * 100, 5) AS percent @@ -2647,7 +2563,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, SUM(IF(feat.id IS NOT NULL, 1, 0)) AS num_urls, ROUND(SUM(IF(feat.id IS NOT NULL, 1, 0)) / COUNT(0) * 100, 5) AS percent @@ -2678,7 +2593,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, SUM(IF(feat.id IS NOT NULL, 1, 0)) AS num_urls, ROUND(SUM(IF(feat.id IS NOT NULL, 1, 0)) / COUNT(0) * 100, 5) AS percent @@ -2709,7 +2623,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, SUM(IF(feat.id IS NOT NULL, 1, 0)) AS num_urls, ROUND(SUM(IF(feat.id IS NOT NULL, 1, 0)) / COUNT(0) * 100, 5) AS percent @@ -2740,7 +2653,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, SUM(IF(feat.id IS NOT NULL, 1, 0)) AS num_urls, ROUND(SUM(IF(feat.id IS NOT NULL, 1, 0)) / COUNT(0) * 100, 5) AS percent @@ -2772,7 +2684,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, SUM(IF(feat.id = '990' OR feat.feature = 'ServiceWorkerControlledPage', 1, 0)) AS num_urls, ROUND(SUM(IF(feat.id = '990' OR feat.feature = 'ServiceWorkerControlledPage', 1, 0)) / COUNT(0) * 100, 5) AS percent @@ -2804,7 +2715,6 @@ ORDER BY query: DataformTemplateBuilder.create((ctx, params) => ` SELECT date, - UNIX_DATE(date) * 1000 * 60 * 60 * 24 AS timestamp, client, SUM(IF(feat.id = '3018' OR feat.feature = 'WebSocketStreamConstructor', 1, 0)) AS num_urls, ROUND(SUM(IF(feat.id = '3018' OR feat.feature = 'WebSocketStreamConstructor', 1, 0)) / COUNT(0) * 100, 5) AS percent diff --git a/script/histogram_storage_sync.js b/script/histogram_storage_sync.js new file mode 100644 index 0000000..e828565 --- /dev/null +++ b/script/histogram_storage_sync.js @@ -0,0 +1,357 @@ +import { Storage } from '@google-cloud/storage' +import { BigQuery } from '@google-cloud/bigquery' +import { Readable } from 'stream' + +// Configuration +const CONFIG = { + storage: { bucket: 'httparchive', prefix: 'reports/' }, + bigquery: { projectId: 'httparchive', datasetId: 'reports', tableId: 'histogram1' }, + skipDates: [] +} + +const BACKLOG = [] +/* + +*/ + +const storage = new Storage() +const bigquery = new BigQuery({ projectId: CONFIG.bigquery.projectId }) + +const lenses = ['', 'drupal/', 'magento/', 'top100k/', 'top10k/', 'top1k/', 'top1m/', 'wordpress/'] + +// Generate dates: HTTPArchive collection schedule +function generateHTTPArchiveDates(startDate, endDate) { + const dates = [] + const start = new Date(startDate) + const end = new Date(endDate) + + // Validate dates + if (isNaN(start.getTime()) || isNaN(end.getTime())) { + throw new Error('Invalid date format. Use YYYY-MM-DD format.') + } + + if (start > end) { + throw new Error('Start date must be before or equal to end date.') + } + + const startYear = start.getFullYear() + const startMonth = start.getMonth() + 1 + const endYear = end.getFullYear() + const endMonth = end.getMonth() + 1 + + for (let year = startYear; year <= endYear; year++) { + const monthStart = (year === startYear) ? startMonth : 1 + const monthEnd = (year === endYear) ? endMonth : 12 + + for (let month = monthStart; month <= monthEnd; month++) { + const monthStr = String(month).padStart(2, '0') + + // Always include 1st of month + const firstDate = `${year}-${monthStr}-01` + if (firstDate >= startDate && firstDate <= endDate) { + dates.push(firstDate) + } + + // Add 15th for years 2010-2018 (HTTPArchive historical pattern) + if (year <= 2018) { + const fifteenthDate = `${year}-${monthStr}-15` + if (fifteenthDate >= startDate && fifteenthDate <= endDate) { + dates.push(fifteenthDate) + } + } + } + } + + return dates.sort() +} + +const dates = generateHTTPArchiveDates('2011-06-01', '2025-07-01') + +const histogramMetrics = [ + 'bytesCss', 'bytesImg', 'bytesJs', 'bytesOther', 'bytesTotal', 'evalJs', 'gzipSavings', 'speedIndex', 'dcl', + 'bootupJs', 'bytesFont', 'bytesHtml', 'bytesVideo', 'compileJs', 'fcp', 'imgSavings', 'offscreenImages', 'ol', + 'optimizedImages', 'reqCss', 'reqFont', 'reqHtml', 'reqImg', 'reqJs', 'reqOther', 'reqTotal', 'reqVideo', + 'tcp', 'ttci', 'cruxTtfb', 'cruxOl', 'cruxLcp', 'cruxInp', 'cruxFp', 'cruxFcp', 'cruxDcl', 'cruxCls' +] + +const SCHEMA = [ + { name: 'date', type: 'DATE' }, + { name: 'lens', type: 'STRING' }, + { name: 'client', type: 'STRING' }, + { name: 'metric', type: 'STRING' }, + { name: 'bin', type: 'FLOAT64' }, + { name: 'volume', type: 'FLOAT64' }, + { name: 'cdf', type: 'FLOAT64' }, + { name: 'pdf', type: 'FLOAT64' } +] + +const downloadObject = async (filename) => + (await storage.bucket(CONFIG.storage.bucket).file(filename).download()).toString() + +async function uploadToBigQuery(rows) { + return new Promise((resolve, reject) => { + const table = bigquery.dataset(CONFIG.bigquery.datasetId).table(CONFIG.bigquery.tableId) + const jsonlData = rows.map(row => JSON.stringify(row)).join('\n') + const dataStream = Readable.from([jsonlData]) + + const writeStream = table.createWriteStream({ + sourceFormat: 'NEWLINE_DELIMITED_JSON', + schema: { fields: SCHEMA }, + writeDisposition: 'WRITE_APPEND', + createDisposition: 'CREATE_IF_NEEDED' + }) + + writeStream.on('complete', () => { + resolve() + }) + + writeStream.on('error', (error) => { + console.error('Upload failed:', error.message) + reject(error) + }) + + dataStream.pipe(writeStream) + }) +} + +async function downloadAndParseFile(filename, date, lens, metric) { + try { + const data = await downloadObject(filename) + const rows = JSON.parse(data).map(item => ({ + date, + lens: lens.replace('/', ''), + client: item.client, + metric, + bin: item.bin, + volume: item.volume, + cdf: item.cdf, + pdf: item.pdf + })) + + return { + filename, + success: true, + rows, + rowCount: rows.length, + error: null, + isNotFound: false + } + } catch (error) { + return { + filename, + success: false, + rows: [], + rowCount: 0, + error: error.message, + isNotFound: error.code === 404 || error.message.includes('No such object') + } + } +} + +async function processBacklogFile(filename) { + // Extract metadata from filename: reports/[lens]/YYYY_MM_DD/metric.json + const match = filename.match(/reports\/(?:([^/]+)\/)?(\d{4}_\d{2}_\d{2})\/(.+?)(?:\.json)?$/) + if (!match) { + console.error(`Invalid backlog filename format: ${filename}`) + return { filename, success: false, error: 'Invalid format' } + } + + const [, lensPath = '', dateStr, metric] = match + const date = dateStr.replace(/_/g, '-') + const lens = lensPath + + // Ensure filename has .json extension + const fullFilename = filename.endsWith('.json') ? filename : `${filename}.json` + + const result = await downloadAndParseFile(fullFilename, date, lens, metric) + + // For backlog processing, upload immediately (single files) + if (result.success && result.rows.length > 0) { + try { + await uploadToBigQuery(result.rows) + return { ...result, uploaded: true } + } catch (error) { + return { ...result, success: false, error: error.message, uploaded: false } + } + } + + return result +} + +async function processImportTask(task) { + const { date, lens, metric, filename } = task + const result = await downloadAndParseFile(filename, date, lens, metric) + + return { + ...task, + ...result + } +} + +async function processBacklog() { + if (!BACKLOG || BACKLOG.length === 0) { + console.log('No backlog files to process') + return + } + + console.log(`\nProcessing ${BACKLOG.length} backlog files...`) + + let successCount = 0 + let failCount = 0 + + for (const filename of BACKLOG) { + const result = await processBacklogFile(filename) + + if (result.success) { + console.log(`✓ ${result.filename} (${result.rowCount} rows)`) + successCount++ + } else { + console.log(`✗ ${result.filename}: ${result.error}`) + failCount++ + } + } + + console.log(`\nBacklog completed: ${successCount} successful, ${failCount} failed\n`) +} + +async function processDateData(date) { + console.log(`\nProcessing date: ${date}`) + + const allRows = [] + let totalSuccess = 0 + let totalNotFound = 0 + let totalErrors = 0 + const failedTasks = [] + + // Process each metric sequentially + for (const metric of histogramMetrics) { + console.log(` Processing metric: ${metric} (${histogramMetrics.indexOf(metric) + 1}/${histogramMetrics.length})`) + + // Download all lenses for this metric in parallel + const lensPromises = lenses.map(async (lens) => { + const filename = `${CONFIG.storage.prefix}${lens}${date.replace(/-/g, '_')}/${metric}.json` + const task = { + date, + lens, + metric, + filename, + id: `${date}-${lens || 'all'}-${metric}` + } + + return await processImportTask(task) + }) + + const results = await Promise.allSettled(lensPromises) + + // Process results for this metric + let metricSuccess = 0 + let metricNotFound = 0 + let metricErrors = 0 + + results.forEach((result, index) => { + if (result.status === 'fulfilled') { + const taskResult = result.value + if (taskResult.success) { + // Use concat to avoid stack overflow with large arrays + for (const row of taskResult.rows) { + allRows.push(row) + } + metricSuccess++ + totalSuccess++ + } else if (taskResult.isNotFound) { + metricNotFound++ + totalNotFound++ + } else { + metricErrors++ + totalErrors++ + failedTasks.push(taskResult.filename) + console.error(` ✗ ${taskResult.id}: ${taskResult.error}`) + } + } else { + metricErrors++ + totalErrors++ + const lens = lenses[index] + const filename = `${CONFIG.storage.prefix}${lens}${date.replace(/-/g, '_')}/${metric}.json` + failedTasks.push(filename) + console.error(` ✗ ${date}-${lens || 'all'}-${metric}: ${result.reason?.message || 'Unknown error'}`) + } + }) + + console.log(` ${metricSuccess} success, ${metricNotFound} not found, ${metricErrors} errors`) + } + + console.log(` Total files: ${totalSuccess} success, ${totalNotFound} not found, ${totalErrors} errors`) + + // Upload all data for this date in a single operation + if (allRows.length > 0) { + console.log(` Uploading ${allRows.length.toLocaleString()} rows to BigQuery...`) + try { + await uploadToBigQuery(allRows) + console.log(` ✓ Successfully uploaded all data for ${date}`) + } catch (error) { + console.error(` ✗ Failed to upload data for ${date}: ${error.message}`) + // Add all successful downloads to failed tasks since upload failed + for (const lens of lenses) { + for (const metric of histogramMetrics) { + const filename = `${CONFIG.storage.prefix}${lens}${date.replace(/-/g, '_')}/${metric}.json` + if (!failedTasks.includes(filename)) { + failedTasks.push(filename) + } + } + } + } + } else { + console.log(` No data to upload for ${date}`) + } + + return { + date, + successCount: totalSuccess, + notFoundCount: totalNotFound, + errorCount: totalErrors, + totalRows: allRows.length, + failedTasks + } +} + +async function importHistogramData() { + // Process backlog first + await processBacklog() + + console.log(`Processing ${dates.length} dates`) + + let totalSuccess = 0 + let totalNotFound = 0 + let totalErrors = 0 + let totalRows = 0 + const allFailedTasks = [] + + for (const date of dates) { + if (CONFIG.skipDates.includes(date)) { + console.log(`Skipping date: ${date}`) + continue + } + + const dateResult = await processDateData(date) + + totalSuccess += dateResult.successCount + totalNotFound += dateResult.notFoundCount + totalErrors += dateResult.errorCount + totalRows += dateResult.totalRows + allFailedTasks.push(...dateResult.failedTasks) + } + + console.log(`\n=== FINAL SUMMARY ===`) + console.log(`Dates processed: ${dates.filter(d => !CONFIG.skipDates.includes(d)).length}`) + console.log(`Total files successful: ${totalSuccess}`) + console.log(`Total files not found: ${totalNotFound}`) + console.log(`Total files with errors: ${totalErrors}`) + console.log(`Total rows uploaded: ${totalRows.toLocaleString()}`) + + if (allFailedTasks.length > 0) { + console.log(`\n=== FAILED TASKS (for BACKLOG) ===`) + allFailedTasks.forEach(filename => console.log(` '${filename}',`)) + } +} + +importHistogramData().catch(console.error) diff --git a/script/package.json b/script/package.json new file mode 100644 index 0000000..dc5df04 --- /dev/null +++ b/script/package.json @@ -0,0 +1,7 @@ +{ + "type": "module", + "dependencies": { + "@google-cloud/bigquery": "^7.9.1", + "@google-cloud/storage": "^7.14.0" + } +} diff --git a/script/timeseries_storage_sync.js b/script/timeseries_storage_sync.js new file mode 100644 index 0000000..9fa9d4b --- /dev/null +++ b/script/timeseries_storage_sync.js @@ -0,0 +1,251 @@ +import { Storage } from '@google-cloud/storage' +import { BigQuery } from '@google-cloud/bigquery' +import { Readable } from 'stream' + +const storage = new Storage() +const bucketName = 'httparchive' +const storagePathPrefix = 'reports/' + +const bigquery = new BigQuery({ projectId: 'httparchive' }) +const datasetId = 'reports' +const tableId = 'timeseries' + +const lenses = [ + '', + 'drupal/', + 'magento/', + 'top100k/', + 'top10k/', + 'top1k/', + 'top1m/', + 'wordpress/' +] + +const dates = (function () { + const dates = [] + for (let year = 2016; year <= 2025; year++) { + for (let month = 1; month <= 12; month++) { + dates.push(`${year}_${String(month).padStart(2, '0')}_01`) + if (year <= 2018) { + dates.push(`${year}_${String(month).padStart(2, '0')}_15`) + } + if (year === 2025 && month === 1) { + break + } + } + } + return dates +})() + +const histogramMetrics = new Set([ + 'a11yButtonName', + 'a11yColorContrast', + 'a11yImageAlt', + 'a11yLabel', + 'a11yLinkName', + 'a11yScores', + 'asyncClipboardRead', + 'badgeClear', + 'badgeSet', + 'bootupJs', + 'bytesCss', + 'bytesFont', + 'bytesHtml', + 'bytesImg', + 'bytesJs', + 'bytesOther', + 'bytesTotal', + 'bytesVideo', + 'canonical', + 'contentIndex', + 'cruxFastDcl', + 'cruxFastFcp', + 'cruxFastFp', + 'cruxFastInp', + 'cruxFastLcp', + 'cruxFastOl', + 'cruxFastTtfb', + 'cruxLargeCls', + 'cruxPassesCWV', + 'cruxSlowFcp', + 'cruxSlowInp', + 'cruxSlowLcp', + 'cruxSlowTtfb', + 'cruxSmallCls', + 'dcl', + 'fcp', + 'fontDisplay', + 'getInstalledRelatedApps', + 'gzipSavings', + 'h2', + 'h3', + 'hreflang', + 'idleDetection', + 'imgLazy', + 'imgSavings', + 'legible', + 'linkText', + 'notificationTriggers', + 'numUrls', + 'offscreenImages', + 'ol', + 'optimizedImages', + 'pctHttps', + 'periodicBackgroundSync', + 'periodicBackgroundSyncRegister', + 'quicTransport', + 'reqCss', + 'reqFont', + 'reqHtml', + 'reqImg', + 'reqJs', + 'reqOther', + 'reqTotal', + 'reqVideo', + 'screenWakeLock', + 'speedIndex', + 'storageEstimate', + 'storagePersist', + 'swControlledPages', + 'tcp', + 'ttci', + 'webSocketStream' +]) + +async function downloadObject(bucketName, srcFilename) { + const contents = await storage.bucket(bucketName).file(srcFilename).download() + + return contents.toString() +} + +async function ensureTableExists() { + const schema = [ + { name: 'date', type: 'DATE' }, + { name: 'client', type: 'STRING' }, + { name: 'lens', type: 'STRING' }, + { name: 'metric', type: 'STRING' }, + { name: 'percent', type: 'FLOAT64' } + ] + + const table = bigquery.dataset(datasetId).table(tableId) + + try { + const [exists] = await table.exists() + if (!exists) { + console.log(`Creating table ${datasetId}.${tableId}`) + await table.create({ + schema: schema, + location: 'US', + timePartitioning: { + type: 'DAY', + field: 'date' + }, + clustering: { + fields: ['client', 'lens'] + } + }) + console.log(`Table ${datasetId}.${tableId} created successfully with partitioning and clustering`) + } else { + console.log(`Table ${datasetId}.${tableId} already exists`) + } + } catch (error) { + console.error('Error checking/creating table:', error) + throw error + } +} + +async function uploadToBigQuery(rows) { + const schema = [ + { name: 'date', type: 'DATE' }, + { name: 'client', type: 'STRING' }, + { name: 'lens', type: 'STRING' }, + { name: 'metric', type: 'STRING' }, + { name: 'percent', type: 'FLOAT64' } + ] + + return new Promise((resolve, reject) => { + try { + const table = bigquery.dataset(datasetId).table(tableId) + + // Convert rows to JSONL format + const jsonlData = rows.map(row => JSON.stringify(row)).join('\n') + + // Create a readable stream from the JSONL data + const dataStream = Readable.from([jsonlData]) + + // Create write stream with metadata + const writeStream = table.createWriteStream({ + sourceFormat: 'NEWLINE_DELIMITED_JSON', + schema: { + fields: schema + }, + writeDisposition: 'WRITE_APPEND', + createDisposition: 'CREATE_NEVER' // Table should already exist + }) + + // Handle events + writeStream.on('job', (job) => { + console.log(`Write stream job ${job.id} started`) + }) + + writeStream.on('complete', (job) => { + //console.log(`Write stream job ${job.id} completed successfully`) + console.log(`Successfully uploaded ${rows.length} rows using write stream`) + resolve(job) + }) + + writeStream.on('error', (error) => { + console.error('Error in write stream:', error) + reject(error) + }) + + // Pipe the data stream to the write stream + dataStream.pipe(writeStream) + + } catch (error) { + console.error('Error setting up write stream:', error) + reject(error) + } + }) +} + +async function importHistogramData() { + // Ensure the destination table exists before importing data + await ensureTableExists() + + for (const metric of histogramMetrics) { + for (const lens of lenses) { + const srcFilename = `${storagePathPrefix}${lens}${metric}.json` + console.log(`Downloading ${srcFilename}`) + + try { + const data = await downloadObject(bucketName, srcFilename) + + const rows = JSON.parse(data).map(data => ({ + date: data.date.replace(/_/g, '-'), + client: data.client, + lens: lens.replace('/', ''), + metric, + percent: data.percent + })) + + console.log(`Uploading ${rows.length} rows to BigQuery`) + + await uploadToBigQuery(rows) + } catch (error) { + if (error.code === 404 || error.message.includes('No such object')) { + console.log(`File not found: ${srcFilename} - skipping`) + continue + } else { + console.error(`Error processing ${srcFilename}:`, error.message) + // Continue with next file instead of stopping + continue + } + } + //break // TEMP: only do first metric + } + //break // TEMP: only do first lens + } +} + +importHistogramData().catch(console.error) From 50ec83fd11dc4e121d10fd72e11f75a8eca1ef0f Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Wed, 10 Sep 2025 11:18:11 +0200 Subject: [PATCH 3/3] lint --- script/histogram_storage_sync.js | 4 ++-- script/timeseries_storage_sync.js | 16 ---------------- 2 files changed, 2 insertions(+), 18 deletions(-) diff --git a/script/histogram_storage_sync.js b/script/histogram_storage_sync.js index e828565..36a7dc9 100644 --- a/script/histogram_storage_sync.js +++ b/script/histogram_storage_sync.js @@ -341,7 +341,7 @@ async function importHistogramData() { allFailedTasks.push(...dateResult.failedTasks) } - console.log(`\n=== FINAL SUMMARY ===`) + console.log('\n=== FINAL SUMMARY ===') console.log(`Dates processed: ${dates.filter(d => !CONFIG.skipDates.includes(d)).length}`) console.log(`Total files successful: ${totalSuccess}`) console.log(`Total files not found: ${totalNotFound}`) @@ -349,7 +349,7 @@ async function importHistogramData() { console.log(`Total rows uploaded: ${totalRows.toLocaleString()}`) if (allFailedTasks.length > 0) { - console.log(`\n=== FAILED TASKS (for BACKLOG) ===`) + console.log('\n=== FAILED TASKS (for BACKLOG) ===') allFailedTasks.forEach(filename => console.log(` '${filename}',`)) } } diff --git a/script/timeseries_storage_sync.js b/script/timeseries_storage_sync.js index 9fa9d4b..fc0b38b 100644 --- a/script/timeseries_storage_sync.js +++ b/script/timeseries_storage_sync.js @@ -21,22 +21,6 @@ const lenses = [ 'wordpress/' ] -const dates = (function () { - const dates = [] - for (let year = 2016; year <= 2025; year++) { - for (let month = 1; month <= 12; month++) { - dates.push(`${year}_${String(month).padStart(2, '0')}_01`) - if (year <= 2018) { - dates.push(`${year}_${String(month).padStart(2, '0')}_15`) - } - if (year === 2025 && month === 1) { - break - } - } - } - return dates -})() - const histogramMetrics = new Set([ 'a11yButtonName', 'a11yColorContrast',