From 1d8d997cb71a954018a00f340c373e7d007e9d1d Mon Sep 17 00:00:00 2001 From: Demenech Date: Wed, 24 Sep 2025 09:34:10 -0300 Subject: [PATCH] feat(datasets): cache all datasets and use the cache on the datasets search page --- components/dataset/search/ListOfDatasets.tsx | 2 +- components/dataset/search/SearchContext.tsx | 20 +++++- lib/data.ts | 69 ++++++++++++++++++++ pages/api/datasets/search.tsx | 24 +++++++ 4 files changed, 111 insertions(+), 4 deletions(-) create mode 100644 lib/data.ts create mode 100644 pages/api/datasets/search.tsx diff --git a/components/dataset/search/ListOfDatasets.tsx b/components/dataset/search/ListOfDatasets.tsx index ad30c2b..8f5d8ce 100644 --- a/components/dataset/search/ListOfDatasets.tsx +++ b/components/dataset/search/ListOfDatasets.tsx @@ -51,7 +51,7 @@ function ListItems() {
- {searchResults?.datasets?.map((dataset) => ( + {searchResults?.results?.map((dataset) => ( ))}
diff --git a/components/dataset/search/SearchContext.tsx b/components/dataset/search/SearchContext.tsx index 29a29b7..8d7e526 100644 --- a/components/dataset/search/SearchContext.tsx +++ b/components/dataset/search/SearchContext.tsx @@ -66,6 +66,7 @@ export const SearchStateProvider = ({ offset: options.type != "dataset" ? 0 : options.offset, type: "dataset", }; + // NOTE: our goal is to get rid of this call const { data: packageSearchResults, isValidating: isLoadingPackageSearchResults, @@ -77,6 +78,20 @@ export const SearchStateProvider = ({ { use: [laggy] } ); + const { data: cachedDatasets, isValidating: isLoadingCachedDatasets } = + useSWR([packagesOptions], async (options) => { + const searchParams = new URLSearchParams(); + searchParams.set("limit", String(options.limit)); + const page = Math.floor(options.offset ?? 0 / options.limit) + 1; + searchParams.set("page", String(page)); + searchParams.set("query", String(options.query)); + const datasets = await fetch( + `/api/datasets/search?${searchParams.toString()}` + ); + const data = await datasets.json(); + return data; + }); + const visualizationsOptions = { ...options, resFormat: [], @@ -97,11 +112,11 @@ export const SearchStateProvider = ({ const searchResults = options.type === "visualization" ? visualizationsSearchResults - : packageSearchResults; + : cachedDatasets; const isLoading = options.type === "visualization" ? isLoadingVisualizations - : isLoadingPackageSearchResults; + : isLoadingCachedDatasets; const packageSearchFacets = packageSearchResults?.search_facets ?? {}; const visualizationsSearchFacets = @@ -111,7 +126,6 @@ export const SearchStateProvider = ({ ? visualizationsSearchFacets : packageSearchFacets; - const value: SearchStateContext = { options, setOptions: (options) => setQueryParam(options), diff --git a/lib/data.ts b/lib/data.ts new file mode 100644 index 0000000..7c0da55 --- /dev/null +++ b/lib/data.ts @@ -0,0 +1,69 @@ +import { searchDatasets } from "@/lib/queries/dataset"; +import { Dataset } from "@/schemas/dataset.interface"; +import { unstable_cache } from "next/cache"; +import { z } from "zod"; + +// FIXME: how can we prevent simulateneous cache revalidations +// when a cache revalidation is requested while another is already +// running? Woudln't happen with revalidate set to false +export const getCachedDatasets = unstable_cache( + async () => { + console.log("Revalidating datasets cache: ", new Date().getTime()); + const allDatasets: Dataset[] = []; + const limit = 10; + let page = 0; + while (true) { + const pageDatasets = await searchDatasets({ + limit, + offset: limit * page, + groups: [], + orgs: [], + tags: [], + }); + + if (!pageDatasets?.results?.length) { + break; + } + + allDatasets.push(...pageDatasets.results); + page++; + } + return allDatasets; + }, + ["cached-datasets"], + { + revalidate: false, // TODO: what happens if the UI triggers a time-based revalidation? + } +); + +export const searchOptionsSchema = z.object({ + limit: z + .preprocess((x) => Number(x), z.number().min(0).max(25)) + .optional() + .default(10), + page: z + .preprocess((x) => Number(x), z.number().min(1)) + .optional() + .default(1), +}); + +type SearchOptions = z.infer; + +// NOTE: for search, I think we should use a lib like minisearch +// for the FTS, and use a DTO to return results. We could even +// cache this list of datasets DTO. This would reduce data transfer +// and increase performance in the pages that use search +// The search index can be a module-level singleton, it doesn't have +// to be cached +export async function searchCachedDatasets(options: SearchOptions) { + const { page, limit } = options; + const allDatasets = await getCachedDatasets(); + const filteredDatasets = allDatasets; + // NOTE: maybe https://github.com/itemsapi/itemsjs instead of minisearch ? + + const startIdx = (page - 1) * limit; + const endIdx = startIdx + limit; + const paginatedDatasets = filteredDatasets.slice(startIdx, endIdx); + return { results: paginatedDatasets, count: filteredDatasets.length }; +} + diff --git a/pages/api/datasets/search.tsx b/pages/api/datasets/search.tsx new file mode 100644 index 0000000..480bf5e --- /dev/null +++ b/pages/api/datasets/search.tsx @@ -0,0 +1,24 @@ +import { searchCachedDatasets, searchOptionsSchema } from "@/lib/data"; +import { NextApiRequest, NextApiResponse } from "next"; +import { ZodError } from "zod"; + +export default async function handler( + req: NextApiRequest, + res: NextApiResponse +) { + if (req.method === "GET") { + try { + const validatedOptions = searchOptionsSchema.parse(req.query); + const results = await searchCachedDatasets(validatedOptions); + + res.status(200).json(results); + } catch (e) { + if (e instanceof ZodError) { + res.status(400).json({ message: "Validation Error", errors: e.issues }); + } + } + } else { + res.setHeader("Allow", ["GET"]); + res.status(405).end(`Method ${req.method} Not Allowed`); + } +}