From c7f9ee938bba3a238eab8471df6dcf394d8a412d Mon Sep 17 00:00:00 2001 From: Gregg Donovan Date: Wed, 25 Jun 2025 19:56:24 -0400 Subject: [PATCH 1/7] multi-currency example --- multi-currency/README.md | 105 ++ multi-currency/currency.xml | 908 ++++++++++++++++++ multi-currency/currency_rates.jsonl | 30 + multi-currency/currency_xml_to_vespa_docs.py | 33 + multi-currency/generate_price_filter_query.py | 77 ++ multi-currency/items.jsonl | 100 ++ multi-currency/schemas/currency.sd | 8 + multi-currency/schemas/item.sd | 19 + multi-currency/services.xml | 20 + 9 files changed, 1300 insertions(+) create mode 100644 multi-currency/README.md create mode 100644 multi-currency/currency.xml create mode 100644 multi-currency/currency_rates.jsonl create mode 100644 multi-currency/currency_xml_to_vespa_docs.py create mode 100644 multi-currency/generate_price_filter_query.py create mode 100644 multi-currency/items.jsonl create mode 100644 multi-currency/schemas/currency.sd create mode 100644 multi-currency/schemas/item.sd create mode 100644 multi-currency/services.xml diff --git a/multi-currency/README.md b/multi-currency/README.md new file mode 100644 index 000000000..710fdbbc5 --- /dev/null +++ b/multi-currency/README.md @@ -0,0 +1,105 @@ +# Multi-Currency Vespa Application + +This Vespa application demonstrates multi-currency price handling using global documents holding currency conversion rates to USD. +Item prices are stored in their local currencies, but the app can hydrate and rank items based on their USD equivalent prices. +Price range Filtering can be done by using native currency filtering. + +## Architecture + +The application consists of two document types: +- `currency`: Global document storing currency conversion factors to USD +- `item`: Items with prices in different currencies, referencing currency documents + +The `currency.xml` file contains conversion rates between 30 different currencies. + +## Setup + +1. **Start Vespa container**: + ```bash + vespa config set target local + docker pull vespaengine/vespa + docker run --detach --name vespa --hostname vespa-container \ + --publish 127.0.0.1:8080:8080 --publish 127.0.0.1:19071:19071 \ + vespaengine/vespa + ``` + +2. **Wait for Vespa to be ready**: + ```bash + vespa status deploy --wait 300 + ``` + +3. **Deploy the application**: + ```bash + vespa deploy --wait 300 + ``` + +## Loading Data + +1. **Feed currency conversion factors**: + +Convert the `currency.xml` file to Vespa documents with conversion factors to USD.: +```bash +python3 currency_xml_to_vespa_docs.py | vespa feed - +``` + +The documents look like: +```jsonl +{"put": "id:mynamespace:currency::usd", "fields": {"factor": 1.0}} +{"put": "id:mynamespace:currency::aud", "fields": {"factor": 0.67884054}} +{"put": "id:mynamespace:currency::cad", "fields": {"factor": 0.76028283}} +{"put": "id:mynamespace:currency::cny", "fields": {"factor": 0.1442157}} +``` + +2. **Feed items with currency references**: + +Feed the sample documents: + +```bash +vespa feed items.jsonl +``` + +The item documents look like: + +```bash +{"put": "id:shopping:item::item-1", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 3836, "item_name": "emerald gemstone bracelet"}} +{"put": "id:shopping:item::item-2", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 14, "item_name": "Handmade ceramic ring dish"}} +{"put": "id:shopping:item::item-3", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 45, "item_name": "Handmade wooden cutting board"}} +``` + +## Querying + +1. **View all documents**: + ```bash + vespa visit + ``` + +2. **Query items with currency-based price filtering**: +```bash +vespa query 'select * from item where (currency_ref matches "id:shopping:currency::usd" and price >= 4000.0)' +``` + +3. **Filter by all currencies within a range**: + ```bash + vespa query yql="$(python3 generate_price_filter_query.py --min_price 20 --max_price 100 --currency USD)" + ``` + +## Key Features + +- **Global currency documents**: Currency data is replicated across all content nodes +- **Cross-document field import**: Items can access currency factors via `currency_ref.factor` +- **USD price calculation**: Rank profile computes `usd_price: price * currency_factor` + +## Schema Details + +### Currency Schema +- `factor`: Double field representing conversion rate to USD + +### Item Schema +- `item_name`: String field for item description +- `price`: Double field for price in local currency +- `currency_ref`: Reference to currency document +- Imported field: `currency_factor` from referenced currency document + +## TODOs + +- Show how to hydrate a USD price \ No newline at end of file diff --git a/multi-currency/currency.xml b/multi-currency/currency.xml new file mode 100644 index 000000000..5c089ed83 --- /dev/null +++ b/multi-currency/currency.xml @@ -0,0 +1,908 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/multi-currency/currency_rates.jsonl b/multi-currency/currency_rates.jsonl new file mode 100644 index 000000000..14b37763f --- /dev/null +++ b/multi-currency/currency_rates.jsonl @@ -0,0 +1,30 @@ +{"put": "id:mynamespace:currency::usd", "fields": {"factor": 1.0}} +{"put": "id:mynamespace:currency::aud", "fields": {"factor": 0.67884054}} +{"put": "id:mynamespace:currency::cad", "fields": {"factor": 0.76028283}} +{"put": "id:mynamespace:currency::cny", "fields": {"factor": 0.1442157}} +{"put": "id:mynamespace:currency::czk", "fields": {"factor": 0.04914198}} +{"put": "id:mynamespace:currency::dkk", "fields": {"factor": 0.16289298}} +{"put": "id:mynamespace:currency::hkd", "fields": {"factor": 0.13271224}} +{"put": "id:mynamespace:currency::huf", "fields": {"factor": 0.00303453}} +{"put": "id:mynamespace:currency::inr", "fields": {"factor": 0.0120344}} +{"put": "id:mynamespace:currency::idr", "fields": {"factor": 6.354e-05}} +{"put": "id:mynamespace:currency::ils", "fields": {"factor": 0.30717248}} +{"put": "id:mynamespace:currency::jpy", "fields": {"factor": 0.00720471}} +{"put": "id:mynamespace:currency::myr", "fields": {"factor": 0.24654832}} +{"put": "id:mynamespace:currency::mxn", "fields": {"factor": 0.055135}} +{"put": "id:mynamespace:currency::mad", "fields": {"factor": 0.11377527}} +{"put": "id:mynamespace:currency::nzd", "fields": {"factor": 0.62968327}} +{"put": "id:mynamespace:currency::nok", "fields": {"factor": 0.10324712}} +{"put": "id:mynamespace:currency::php", "fields": {"factor": 0.01839195}} +{"put": "id:mynamespace:currency::sgd", "fields": {"factor": 0.81559416}} +{"put": "id:mynamespace:currency::vnd", "fields": {"factor": 3.957e-05}} +{"put": "id:mynamespace:currency::zar", "fields": {"factor": 0.05826327}} +{"put": "id:mynamespace:currency::sek", "fields": {"factor": 0.11001705}} +{"put": "id:mynamespace:currency::chf", "fields": {"factor": 1.29600829}} +{"put": "id:mynamespace:currency::thb", "fields": {"factor": 0.03207987}} +{"put": "id:mynamespace:currency::gbp", "fields": {"factor": 1.42450142}} +{"put": "id:mynamespace:currency::twd", "fields": {"factor": 0.03562586}} +{"put": "id:mynamespace:currency::try", "fields": {"factor": 0.02602804}} +{"put": "id:mynamespace:currency::eur", "fields": {"factor": 1.21521449}} +{"put": "id:mynamespace:currency::pln", "fields": {"factor": 0.28613941}} +{"put": "id:mynamespace:currency::brl", "fields": {"factor": 0.18918612}} diff --git a/multi-currency/currency_xml_to_vespa_docs.py b/multi-currency/currency_xml_to_vespa_docs.py new file mode 100644 index 000000000..3202fcf3c --- /dev/null +++ b/multi-currency/currency_xml_to_vespa_docs.py @@ -0,0 +1,33 @@ +import sys +import xml.etree.ElementTree as ET +import json + +def convert_currency_xml_to_vespa_jsonl(xml_file): + # Parse the XML file + tree = ET.parse(xml_file) + root = tree.getroot() + + # Add USD to USD conversion (factor = 1.0) + usd_doc = { + "put": "id:shopping:currency::usd", + "fields": {"factor": 1.0} + } + sys.stdout.write(json.dumps(usd_doc) + '\n') + + # Find all rate elements where 'to' attribute is 'USD' + for rate in root.findall('.//rate[@to="USD"]'): + from_currency = rate.get('from').lower() + factor = float(rate.get('rate')) + + # Create Vespa document + doc = { + "put": f"id:shopping:currency::{from_currency}", + "fields": {"factor": factor} + } + + # Write to stdout + sys.stdout.write(json.dumps(doc) + '\n') + +# Usage +if __name__ == "__main__": + convert_currency_xml_to_vespa_jsonl('currency.xml') \ No newline at end of file diff --git a/multi-currency/generate_price_filter_query.py b/multi-currency/generate_price_filter_query.py new file mode 100644 index 000000000..634c878e4 --- /dev/null +++ b/multi-currency/generate_price_filter_query.py @@ -0,0 +1,77 @@ +import sys +import json +import argparse +import xml.etree.ElementTree as ET +from collections import defaultdict + +def load_conversion_rates(xml_file): + """ + Parses the currency XML file and builds a conversion rate table. + Returns a dictionary of rates and a sorted list of all currencies. + """ + try: + tree = ET.parse(xml_file) + root = tree.getroot() + except (ET.ParseError, FileNotFoundError) as e: + print(f"Error: Could not read or parse the currency XML file '{xml_file}'.\n{e}", file=sys.stderr) + sys.exit(1) + + rates = defaultdict(dict) + all_currencies = set() + + for rate_element in root.findall('.//rate'): + from_curr = rate_element.get('from').upper() + to_curr = rate_element.get('to').upper() + rate_value = float(rate_element.get('rate')) + rates[from_curr][to_curr] = rate_value + all_currencies.add(from_curr) + all_currencies.add(to_curr) + + for currency in all_currencies: + rates[currency][currency] = 1.0 + + return rates, sorted(list(all_currencies)) + +def main(): + """ + Main function to generate the Vespa query. + """ + parser = argparse.ArgumentParser( + description="Generate a Vespa query to filter by price across multiple currencies." + ) + parser.add_argument('--min_price', type=float, required=True, help='Minimum price.') + parser.add_argument('--max_price', type=float, required=True, help='Maximum price.') + parser.add_argument('--currency', type=str, required=True, help='The currency for the given min/max price (e.g., USD).') + parser.add_argument('--currency_file', type=str, default='currency.xml', help='Path to the currency conversion XML file.') + + args = parser.parse_args() + + if args.min_price > args.max_price: + print("Error: min_price cannot be greater than max_price.", file=sys.stderr) + sys.exit(1) + + rates, all_currencies = load_conversion_rates(args.currency_file) + source_currency = args.currency.upper() + + if source_currency not in rates: + print(f"Error: The specified currency '{source_currency}' is not found in the conversion table.", file=sys.stderr) + sys.exit(1) + + or_conditions = [] + for target_currency in all_currencies: + if target_currency in rates[source_currency]: + rate = rates[source_currency][target_currency] + converted_min = args.min_price * rate + converted_max = args.max_price * rate + + yql = f"(currency_ref matches \"id:shopping:currency::{target_currency.lower()}\" and price >= {converted_min} and price <= {converted_max})" + + or_conditions.append(yql) + else: + print(f"Warning: No conversion rate from {source_currency} to {target_currency}. Skipping.", file=sys.stderr) + + where = " or ".join(or_conditions) + print(f"select * from item where {where}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/multi-currency/items.jsonl b/multi-currency/items.jsonl new file mode 100644 index 000000000..5a2f96ed4 --- /dev/null +++ b/multi-currency/items.jsonl @@ -0,0 +1,100 @@ +{"put": "id:shopping:item::item-1", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 3836, "item_name": "emerald gemstone bracelet"}} +{"put": "id:shopping:item::item-2", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 14, "item_name": "Handmade ceramic ring dish"}} +{"put": "id:shopping:item::item-3", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 45, "item_name": "Handmade wooden cutting board"}} +{"put": "id:shopping:item::item-4", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 34, "item_name": "Personalized wooden cutting board"}} +{"put": "id:shopping:item::item-5", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 7, "item_name": "Vintage letterpress greeting card"}} +{"put": "id:shopping:item::item-6", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 4172, "item_name": "14k gold hammered ring"}} +{"put": "id:shopping:item::item-7", "fields": {"currency_ref": "id:shopping:currency::gbp", "price": 1664, "item_name": "diamond engagement ring"}} +{"put": "id:shopping:item::item-8", "fields": {"currency_ref": "id:shopping:currency::eur", "price": 4027, "item_name": "sapphire drop earrings"}} +{"put": "id:shopping:item::item-9", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 21, "item_name": "Handmade knitted scarf"}} +{"put": "id:shopping:item::item-10", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 55, "item_name": "Personalized letterpress greeting card"}} +{"put": "id:shopping:item::item-11", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 1, "item_name": "Minimalist beaded necklace"}} +{"put": "id:shopping:item::item-12", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 24, "item_name": "Handcrafted hand stamped necklace"}} +{"put": "id:shopping:item::item-13", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 49, "item_name": "Artisan crochet baby blanket"}} +{"put": "id:shopping:item::item-14", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 17, "item_name": "Handcrafted macrame wall hanging"}} +{"put": "id:shopping:item::item-15", "fields": {"currency_ref": "id:shopping:currency::gbp", "price": 20, "item_name": "Vintage crochet baby blanket"}} +{"put": "id:shopping:item::item-16", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 29, "item_name": "Minimalist personalized leather wallet"}} +{"put": "id:shopping:item::item-17", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 4241, "item_name": "custom engagement ring"}} +{"put": "id:shopping:item::item-18", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 52, "item_name": "Personalized handmade pottery bowl"}} +{"put": "id:shopping:item::item-19", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 20, "item_name": "Handmade resin keychain"}} +{"put": "id:shopping:item::item-20", "fields": {"currency_ref": "id:shopping:currency::myr", "price": 23, "item_name": "Handcrafted macrame wall hanging"}} +{"put": "id:shopping:item::item-21", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 23, "item_name": "Personalized personalized leather wallet"}} +{"put": "id:shopping:item::item-22", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 63, "item_name": "Handcrafted geode coaster set"}} +{"put": "id:shopping:item::item-23", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 63, "item_name": "Handmade leather journal"}} +{"put": "id:shopping:item::item-24", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 3383, "item_name": "18k gold hoop earrings"}} +{"put": "id:shopping:item::item-25", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 45, "item_name": "Rustic beaded necklace"}} +{"put": "id:shopping:item::item-26", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 3408, "item_name": "18k gold hoop earrings"}} +{"put": "id:shopping:item::item-27", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 36, "item_name": "Handmade personalized leather wallet"}} +{"put": "id:shopping:item::item-28", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 41, "item_name": "Rustic beaded necklace"}} +{"put": "id:shopping:item::item-29", "fields": {"currency_ref": "id:shopping:currency::php", "price": 52, "item_name": "Eco-friendly macrame wall hanging"}} +{"put": "id:shopping:item::item-30", "fields": {"currency_ref": "id:shopping:currency::eur", "price": 16, "item_name": "Minimalist beaded necklace"}} +{"put": "id:shopping:item::item-31", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 48, "item_name": "Rustic ceramic mug"}} +{"put": "id:shopping:item::item-32", "fields": {"currency_ref": "id:shopping:currency::eur", "price": 11, "item_name": "Minimalist embroidered tea towel"}} +{"put": "id:shopping:item::item-33", "fields": {"currency_ref": "id:shopping:currency::eur", "price": 20, "item_name": "Handmade polymer clay earrings"}} +{"put": "id:shopping:item::item-34", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 44, "item_name": "Boho ceramic ring dish"}} +{"put": "id:shopping:item::item-35", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 9, "item_name": "Minimalist polymer clay earrings"}} +{"put": "id:shopping:item::item-36", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 28, "item_name": "Artisan ceramic ring dish"}} +{"put": "id:shopping:item::item-37", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 17, "item_name": "Eco-friendly beaded necklace"}} +{"put": "id:shopping:item::item-38", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 26, "item_name": "Eco-friendly knitted scarf"}} +{"put": "id:shopping:item::item-39", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 1, "item_name": "Custom polymer clay earrings"}} +{"put": "id:shopping:item::item-40", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 3, "item_name": "Handcrafted leather journal"}} +{"put": "id:shopping:item::item-41", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 20, "item_name": "Minimalist embroidered tea towel"}} +{"put": "id:shopping:item::item-42", "fields": {"currency_ref": "id:shopping:currency::gbp", "price": 44, "item_name": "Boho polymer clay earrings"}} +{"put": "id:shopping:item::item-43", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 27, "item_name": "Boho ceramic ring dish"}} +{"put": "id:shopping:item::item-44", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 30, "item_name": "Personalized polymer clay earrings"}} +{"put": "id:shopping:item::item-45", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 40, "item_name": "Handmade hand-poured soap"}} +{"put": "id:shopping:item::item-46", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 17, "item_name": "Artisan felted wool slippers"}} +{"put": "id:shopping:item::item-47", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 34, "item_name": "Custom personalized leather wallet"}} +{"put": "id:shopping:item::item-48", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 18, "item_name": "Eco-friendly ceramic ring dish"}} +{"put": "id:shopping:item::item-49", "fields": {"currency_ref": "id:shopping:currency::eur", "price": 4, "item_name": "Artisan crochet baby blanket"}} +{"put": "id:shopping:item::item-50", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 24, "item_name": "Handcrafted leather journal"}} +{"put": "id:shopping:item::item-51", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 4550, "item_name": "emerald gemstone bracelet"}} +{"put": "id:shopping:item::item-52", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 39, "item_name": "Handmade resin keychain"}} +{"put": "id:shopping:item::item-53", "fields": {"currency_ref": "id:shopping:currency::gbp", "price": 35, "item_name": "Rustic felted wool slippers"}} +{"put": "id:shopping:item::item-54", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 30, "item_name": "Minimalist hand-poured soap"}} +{"put": "id:shopping:item::item-55", "fields": {"currency_ref": "id:shopping:currency::gbp", "price": 66, "item_name": "Handmade hand stamped necklace"}} +{"put": "id:shopping:item::item-56", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 36, "item_name": "Custom personalized leather wallet"}} +{"put": "id:shopping:item::item-57", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 2924, "item_name": "14k gold hammered ring"}} +{"put": "id:shopping:item::item-58", "fields": {"currency_ref": "id:shopping:currency::gbp", "price": 45, "item_name": "Artisan felted wool slippers"}} +{"put": "id:shopping:item::item-59", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 64, "item_name": "Artisan ceramic ring dish"}} +{"put": "id:shopping:item::item-60", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 11, "item_name": "Minimalist embroidered tea towel"}} +{"put": "id:shopping:item::item-61", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 14, "item_name": "Eco-friendly handmade pottery bowl"}} +{"put": "id:shopping:item::item-62", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 56, "item_name": "Eco-friendly polymer clay earrings"}} +{"put": "id:shopping:item::item-63", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 18, "item_name": "Minimalist macrame wall hanging"}} +{"put": "id:shopping:item::item-64", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 4566, "item_name": "18k gold hoop earrings"}} +{"put": "id:shopping:item::item-65", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 30, "item_name": "Eco-friendly personalized leather wallet"}} +{"put": "id:shopping:item::item-66", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 46, "item_name": "Personalized letterpress greeting card"}} +{"put": "id:shopping:item::item-67", "fields": {"currency_ref": "id:shopping:currency::gbp", "price": 43, "item_name": "Artisan knitted scarf"}} +{"put": "id:shopping:item::item-68", "fields": {"currency_ref": "id:shopping:currency::eur", "price": 56, "item_name": "Handmade felted wool slippers"}} +{"put": "id:shopping:item::item-69", "fields": {"currency_ref": "id:shopping:currency::eur", "price": 52, "item_name": "Vintage ceramic succulent planter"}} +{"put": "id:shopping:item::item-70", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 46, "item_name": "Minimalist hand stamped necklace"}} +{"put": "id:shopping:item::item-71", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 1849, "item_name": "diamond engagement ring"}} +{"put": "id:shopping:item::item-72", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 18, "item_name": "Handcrafted hand-poured soap"}} +{"put": "id:shopping:item::item-73", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 42, "item_name": "Eco-friendly embroidered tea towel"}} +{"put": "id:shopping:item::item-74", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 4126, "item_name": "sapphire drop earrings"}} +{"put": "id:shopping:item::item-75", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 35, "item_name": "Personalized polymer clay earrings"}} +{"put": "id:shopping:item::item-76", "fields": {"currency_ref": "id:shopping:currency::cad", "price": 3714, "item_name": "custom engagement ring"}} +{"put": "id:shopping:item::item-77", "fields": {"currency_ref": "id:shopping:currency::eur", "price": 10, "item_name": "Handmade handmade pottery bowl"}} +{"put": "id:shopping:item::item-78", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 51, "item_name": "Handcrafted resin keychain"}} +{"put": "id:shopping:item::item-79", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 42, "item_name": "Personalized embroidered tea towel"}} +{"put": "id:shopping:item::item-80", "fields": {"currency_ref": "id:shopping:currency::aud", "price": 31, "item_name": "Personalized ceramic mug"}} +{"put": "id:shopping:item::item-81", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 3876, "item_name": "18k gold hoop earrings"}} +{"put": "id:shopping:item::item-82", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 2519, "item_name": "18k gold hoop earrings"}} +{"put": "id:shopping:item::item-83", "fields": {"currency_ref": "id:shopping:currency::gbp", "price": 27, "item_name": "Boho wooden cutting board"}} +{"put": "id:shopping:item::item-84", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 12, "item_name": "Minimalist hand-poured soap"}} +{"put": "id:shopping:item::item-85", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 33, "item_name": "Custom geode coaster set"}} +{"put": "id:shopping:item::item-86", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 37, "item_name": "Rustic ceramic ring dish"}} +{"put": "id:shopping:item::item-87", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 18, "item_name": "Eco-friendly macrame wall hanging"}} +{"put": "id:shopping:item::item-88", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 47, "item_name": "Vintage leather journal"}} +{"put": "id:shopping:item::item-89", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 14, "item_name": "Minimalist personalized leather wallet"}} +{"put": "id:shopping:item::item-90", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 1, "item_name": "Minimalist knitted scarf"}} +{"put": "id:shopping:item::item-91", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 46, "item_name": "Handmade soy wax candle"}} +{"put": "id:shopping:item::item-92", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 24, "item_name": "Vintage handmade pottery bowl"}} +{"put": "id:shopping:item::item-93", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 28, "item_name": "Personalized ceramic ring dish"}} +{"put": "id:shopping:item::item-94", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 17, "item_name": "Eco-friendly leather journal"}} +{"put": "id:shopping:item::item-95", "fields": {"currency_ref": "id:shopping:currency::gbp", "price": 1207, "item_name": "emerald gemstone bracelet"}} +{"put": "id:shopping:item::item-96", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 6, "item_name": "Eco-friendly hand stamped necklace"}} +{"put": "id:shopping:item::item-97", "fields": {"currency_ref": "id:shopping:currency::cad", "price": 49, "item_name": "Handmade macrame wall hanging"}} +{"put": "id:shopping:item::item-98", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 20, "item_name": "Vintage leather journal"}} +{"put": "id:shopping:item::item-99", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 55, "item_name": "Personalized embroidered tea towel"}} +{"put": "id:shopping:item::item-100", "fields": {"currency_ref": "id:shopping:currency::usd", "price": 42, "item_name": "Custom ceramic mug"}} diff --git a/multi-currency/schemas/currency.sd b/multi-currency/schemas/currency.sd new file mode 100644 index 000000000..54a03aa4e --- /dev/null +++ b/multi-currency/schemas/currency.sd @@ -0,0 +1,8 @@ +schema currency { + document currency { + # multiplier for converting to USD + field factor type double { + indexing: attribute | summary + } + } +} diff --git a/multi-currency/schemas/item.sd b/multi-currency/schemas/item.sd new file mode 100644 index 000000000..f9a9c61d8 --- /dev/null +++ b/multi-currency/schemas/item.sd @@ -0,0 +1,19 @@ +schema item { + document item { + field item_name type string { + indexing: summary | index + } + field price type double { + indexing: summary | attribute + } + field currency_ref type reference { + indexing: attribute | summary + } + } + import field currency_ref.factor as currency_factor {} + + document-summary default { + summary item_name {} + summary currency_factor {} + } +} \ No newline at end of file diff --git a/multi-currency/services.xml b/multi-currency/services.xml new file mode 100644 index 000000000..03c538963 --- /dev/null +++ b/multi-currency/services.xml @@ -0,0 +1,20 @@ + + + + + + + + + + + + 2 + + + + + + + + From 8965ed664355050c2149a397354810121a6bc8f8 Mon Sep 17 00:00:00 2001 From: Gregg Donovan Date: Thu, 26 Jun 2025 11:49:59 -0400 Subject: [PATCH 2/7] add ranking and summary examples --- multi-currency/README.md | 7 +++++- multi-currency/generate_price_filter_query.py | 3 +-- multi-currency/schemas/item.sd | 23 +++++++++++++++++-- 3 files changed, 28 insertions(+), 5 deletions(-) diff --git a/multi-currency/README.md b/multi-currency/README.md index 710fdbbc5..e288c1131 100644 --- a/multi-currency/README.md +++ b/multi-currency/README.md @@ -80,8 +80,13 @@ vespa query 'select * from item where (currency_ref matches "id:shopping:currenc 3. **Filter by all currencies within a range**: ```bash - vespa query yql="$(python3 generate_price_filter_query.py --min_price 20 --max_price 100 --currency USD)" + vespa query yql="select * from item where $(python3 generate_price_filter_query.py --min_price 20 --max_price 100 --currency USD)" ``` +4. **Combine currency filtering with ranking using USD price**: + ```bash + vespa query yql="select * from item where userQuery() AND ($(python3 generate_price_filter_query.py --min_price 20 --max_price 100 --currency USD))" query="vintage" + ``` + ## Key Features diff --git a/multi-currency/generate_price_filter_query.py b/multi-currency/generate_price_filter_query.py index 634c878e4..195645d01 100644 --- a/multi-currency/generate_price_filter_query.py +++ b/multi-currency/generate_price_filter_query.py @@ -70,8 +70,7 @@ def main(): else: print(f"Warning: No conversion rate from {source_currency} to {target_currency}. Skipping.", file=sys.stderr) - where = " or ".join(or_conditions) - print(f"select * from item where {where}") + print(" or ".join(or_conditions)) if __name__ == "__main__": main() \ No newline at end of file diff --git a/multi-currency/schemas/item.sd b/multi-currency/schemas/item.sd index f9a9c61d8..a2516f5cd 100644 --- a/multi-currency/schemas/item.sd +++ b/multi-currency/schemas/item.sd @@ -1,10 +1,11 @@ schema item { document item { field item_name type string { - indexing: summary | index + indexing: index | summary + index: enable-bm25 } field price type double { - indexing: summary | attribute + indexing: attribute | summary } field currency_ref type reference { indexing: attribute | summary @@ -12,6 +13,24 @@ schema item { } import field currency_ref.factor as currency_factor {} + fieldset default { + fields: item_name + } + + rank-profile default { + function usd_price() { + expression: attribute(price) * attribute(currency_factor) + } + first-phase { + expression: bm25(item_name) * log(usd_price) + } + summary-features { + attribute(price) + usd_price + bm25(item_name) + } + } + document-summary default { summary item_name {} summary currency_factor {} From bbbe6aab89cff1adf2fc0cd34e6c90fb3dd3c29a Mon Sep 17 00:00:00 2001 From: Gregg Donovan Date: Thu, 26 Jun 2025 16:25:06 -0400 Subject: [PATCH 3/7] Add a benchmark on 1mm items for filtering via price_usd vs using a multi-currency query. --- multi-currency/README.md | 2 +- multi-currency/filtering_perf_benchmark.py | 210 ++++++++++++++++++ multi-currency/generate_price_filter_query.py | 74 +++--- multi-currency/requirements.txt | 2 + multi-currency/schemas/item.sd | 9 +- 5 files changed, 254 insertions(+), 43 deletions(-) create mode 100644 multi-currency/filtering_perf_benchmark.py create mode 100644 multi-currency/requirements.txt diff --git a/multi-currency/README.md b/multi-currency/README.md index e288c1131..55ca467e6 100644 --- a/multi-currency/README.md +++ b/multi-currency/README.md @@ -92,7 +92,7 @@ vespa query 'select * from item where (currency_ref matches "id:shopping:currenc - **Global currency documents**: Currency data is replicated across all content nodes - **Cross-document field import**: Items can access currency factors via `currency_ref.factor` -- **USD price calculation**: Rank profile computes `usd_price: price * currency_factor` +- **USD price calculation**: Rank profile computes `price_usd: price * currency_factor` ## Schema Details diff --git a/multi-currency/filtering_perf_benchmark.py b/multi-currency/filtering_perf_benchmark.py new file mode 100644 index 000000000..b4578e14c --- /dev/null +++ b/multi-currency/filtering_perf_benchmark.py @@ -0,0 +1,210 @@ +# Benchmark filtering native with a single price_usd filter vs. the dynamic OR across multiple currencies. +# If the dynamic OR is fast enough, using the global currency document and runtime conversions is preferable, +# as it removes the need to update price_usd in all non-USD items for every currency conversion rate change. + +# - delete all Vespa documents. +# - feed the currency conversion documents. +# - generate and save 1mm documents with both native and usd prices. +# - feed those docs into vespa +# - generate and save 5,000 random queries with price filters in various currencies and random keywords. +# - run the queries with both native and usd price filters. +# - compare the results and timings. + + +# filtering_perf_benchmark.py +# python3 filtering_perf_benchmark.py --help to see options +import numpy as np +import json +import random +import subprocess +import time +from pathlib import Path +from typing import Dict, List +from generate_price_filter_query import load_conversion_rates + +RATE_TABLE, _ = load_conversion_rates("currency.xml") + +def pct(values: list[float], *percents: int) -> list[float]: + """ + Returns the requested percentiles from the input list. + + pct([1, 2, 3, 4], 25, 50, 75) -> [1.75, 2.5, 3.25] + """ + arr = np.asarray(values, dtype=np.float64) + # numpy >= 1.22: use method instead of interpolation + return np.percentile(arr, percents, method="linear").tolist() + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _run_cli(cmd: List[str], stdin: bytes | None = None) -> str: + """ + Runs a Vespa CLI command and returns stdout. + Raises `subprocess.CalledProcessError` on failure. + """ + res = subprocess.run(cmd, input=stdin, check=True, text=True, stdout=subprocess.PIPE) + return res.stdout + + +def _feed_jsonl(lines: List[str]) -> None: + """ + Feeds JSONL documents using `vespa feed -`. + """ + data = ("\n".join(lines) + "\n").encode() + _run_cli(["vespa", "feed", "-"], stdin=data) + +def feed_currency_documents(factors: Dict[str, float], ns: str) -> None: + """ + Feeds `currency` documents with conversion factors. + """ + jsonl = [ + json.dumps({"put": f"id:{ns}:currency::{code.lower()}", + "fields": {"factor": factor}}) + for code, factor in factors.items() + ] + _feed_jsonl(jsonl) + +# Price buckets for random price generation. +BUCKETS: list[tuple[int,int]] = [(0,5), (5,10), (10,20), (20,40), (40,80), (80,150), (150,300), (300,600), (600,1000), (1000,10000)] + +def random_price_cents() -> int: + bucket_idx = random.randint(0, len(BUCKETS) - 1) + return random.randint(BUCKETS[bucket_idx][0] * 100, BUCKETS[bucket_idx][1] * 100) + + +CURRENCY_PROBS = { + "USD": 0.70304, + "EUR": 0.11294, + "GBP": 0.10312, + "CAD": 0.03630, + "AUD": 0.02066, + "TRY": 0.00474, + "INR": 0.00286, + "PHP": 0.00197, + "VND": 0.00174, + "HKD": 0.00167, + "SEK": 0.00125, + "IDR": 0.00115, + "NZD": 0.00113, + "CHF": 0.00110, + "ILS": 0.00109, + "MAD": 0.00103, + "SGD": 0.00103, + "MYR": 0.00077, + "ZAR": 0.00074, + "MXN": 0.00062, + "DKK": 0.00060, + "NOK": 0.00044, + "TWD": 0.00001, + "PLN": 0.00001, + "THB": 0.00001, + "JPY": 0.00001, + "CZK": 0.00001, + "CNY": 0.00001, +} + +# ---------------------------------------------------------------------- +def random_currency() -> str: + currencies = list(CURRENCY_PROBS.keys()) + weights = list(CURRENCY_PROBS.values()) + return random.choices(currencies, weights=weights, k=1)[0] + +tokens = [f"token{i}" for i in range(1, 1001)] # Example tokens for item titles +def generate_items(): + """ + Generates a list of item documents with random prices and currency references. + This is a placeholder function; replace with actual item generation logic. + """ + items = [] + for i in range(1, 1_000_001): + price_usd = random_price_cents() + currency = random_currency() + item = { + "put": f"id:shopping:item::item-{i}", + "fields": { + "currency_ref": f"id:shopping:currency::{currency.lower()}", + #"item_name": ' '.join(random.sample(tokens, random.randint(1, 5))), # Randomly select 1-5 tokens + "price_usd": price_usd, + "price": RATE_TABLE[("USD", currency.upper())] * price_usd, # Convert to the target currency + } + } + items.append(json.dumps(item)) + return items + + +def vespa_feed(jsonl_file: Path) -> None: + _run_cli(["vespa", "feed", str(jsonl_file)]) + + +# --------------------------------------------------------------------------- +# Querying +# --------------------------------------------------------------------------- + +def query_items(yql: str, hits: int = 100) -> dict: + """ + Executes a Vespa query using the CLI and returns the response JSON. + """ + stdout = _run_cli(["vespa", "query", f"{yql}", f"hits={hits}"]) + return json.loads(stdout) + +def parse_vespa_results(result: dict) -> tuple[int, list[dict]]: + root = result.get('root', {}) + total_hits = root.get('fields', {}).get('totalCount', 0) + top_100_hits = root.get('children', [])[:100] # keep full hit objects + return total_hits, top_100_hits + +if __name__ == "__main__": + + # from vespa.application import Vespa + # + # # delete all Vespa documents. + # app = Vespa(url="localhost", port=8080) + # response = app.delete_all_docs(content_cluster_name="shopping", schema='item') + # print(response) + # + # # generate 1mm sample items with random prices, item_name, and currency. + # items = generate_items() + # # write to a file + # items_file = Path("1mm_items.jsonl") + # items_file.write_text("\n".join(items) + "\n") + # + # vespa_feed(items_file) + + from generate_price_filter_query import generate_price_filter_query + + # record the latency for both queries + lat_multi: list[float] = [] + lat_single: list[float] = [] + + for i in range(1, 1000): + prices = [random_price_cents() for _ in range(2)] + price_usd_min = min(prices) + price_usd_max = max(prices) + currency = random_currency() + rate = RATE_TABLE[("USD", currency.upper())] + min_price = rate * price_usd_min + max_price = rate * price_usd_max + + multi_currency_where=f"select * from item where {generate_price_filter_query(min_price, max_price, currency.lower())}" + single_currency_where=f"select * from item where price_usd >= {price_usd_min} and price_usd <= {price_usd_max}" + + start = time.perf_counter() + multi_currency_results = parse_vespa_results(query_items(multi_currency_where)) + lat_multi.append(time.perf_counter() - start) + + start = time.perf_counter() + single_currency_results = parse_vespa_results(query_items(single_currency_where)) + lat_single.append(time.perf_counter() - start) + + if multi_currency_results[0] != single_currency_results[0]: + print(f"Total hits mismatch: {multi_currency_results[0]} vs {single_currency_results[0]} currency:{currency}, min_price: {min_price}, max_price: {max_price} price_usd_min: {price_usd_min}, price_usd_max: {price_usd_max} rate: {rate}") + else: + print(f"Total hits: {multi_currency_results[0]} currency:{currency}, min_price: {min_price}, max_price: {max_price} price_usd_min: {price_usd_min}, price_usd_max: {price_usd_max} rate: {rate}") + + print(f"latency for multi-currency query: {pct(lat_multi, [25, 50, 75, 90, 95, 99])}") + print(f"latency for price_usd query: {pct(lat_single, [25, 50, 75, 90, 95, 99])}") + + + + diff --git a/multi-currency/generate_price_filter_query.py b/multi-currency/generate_price_filter_query.py index 195645d01..2557ff826 100644 --- a/multi-currency/generate_price_filter_query.py +++ b/multi-currency/generate_price_filter_query.py @@ -1,10 +1,8 @@ import sys -import json import argparse import xml.etree.ElementTree as ET -from collections import defaultdict -def load_conversion_rates(xml_file): +def load_conversion_rates(xml_file: str) -> tuple[dict[tuple[str, str], float], set[str]]: """ Parses the currency XML file and builds a conversion rate table. Returns a dictionary of rates and a sorted list of all currencies. @@ -16,23 +14,47 @@ def load_conversion_rates(xml_file): print(f"Error: Could not read or parse the currency XML file '{xml_file}'.\n{e}", file=sys.stderr) sys.exit(1) - rates = defaultdict(dict) - all_currencies = set() + rates: dict[tuple[str, str], float] = {} + all_currencies: set[str] = set() for rate_element in root.findall('.//rate'): - from_curr = rate_element.get('from').upper() - to_curr = rate_element.get('to').upper() - rate_value = float(rate_element.get('rate')) - rates[from_curr][to_curr] = rate_value + from_curr: str = rate_element.get('from').upper() + to_curr: str = rate_element.get('to').upper() + rate_value: float = float(rate_element.get('rate')) + rates[(from_curr,to_curr)] = rate_value all_currencies.add(from_curr) all_currencies.add(to_curr) for currency in all_currencies: - rates[currency][currency] = 1.0 + rates[(currency, currency)] = 1.0 - return rates, sorted(list(all_currencies)) + return rates, all_currencies -def main(): +rates, all_currencies = load_conversion_rates("currency.xml") + +def price_filter(currency: str, min_price: float, max_price: float) -> str: + return f"(currency_ref matches 'id:shopping:currency::{currency.lower()}' and price >= {min_price} and price <= {max_price})" + +def generate_price_filter_query(min_price: float, max_price: float, currency: str) -> str: + if min_price > max_price: + raise ValueError("min_price cannot be greater than max_price.") + + source_currency: str = currency.upper() + + or_conditions: list[str] = [] + for target_currency in all_currencies: + if (source_currency, target_currency) in rates: + rate: float = rates[(source_currency, target_currency)] + converted_min: float = min_price * rate + converted_max: float = max_price * rate + + or_conditions.append(price_filter(target_currency, converted_min, converted_max)) + else: + print(f"Warning: No conversion rate from {source_currency} to {target_currency}. Skipping.", file=sys.stderr) + + return " or ".join(or_conditions) + +def main() -> None: """ Main function to generate the Vespa query. """ @@ -42,35 +64,9 @@ def main(): parser.add_argument('--min_price', type=float, required=True, help='Minimum price.') parser.add_argument('--max_price', type=float, required=True, help='Maximum price.') parser.add_argument('--currency', type=str, required=True, help='The currency for the given min/max price (e.g., USD).') - parser.add_argument('--currency_file', type=str, default='currency.xml', help='Path to the currency conversion XML file.') args = parser.parse_args() - - if args.min_price > args.max_price: - print("Error: min_price cannot be greater than max_price.", file=sys.stderr) - sys.exit(1) - - rates, all_currencies = load_conversion_rates(args.currency_file) - source_currency = args.currency.upper() - - if source_currency not in rates: - print(f"Error: The specified currency '{source_currency}' is not found in the conversion table.", file=sys.stderr) - sys.exit(1) - - or_conditions = [] - for target_currency in all_currencies: - if target_currency in rates[source_currency]: - rate = rates[source_currency][target_currency] - converted_min = args.min_price * rate - converted_max = args.max_price * rate - - yql = f"(currency_ref matches \"id:shopping:currency::{target_currency.lower()}\" and price >= {converted_min} and price <= {converted_max})" - - or_conditions.append(yql) - else: - print(f"Warning: No conversion rate from {source_currency} to {target_currency}. Skipping.", file=sys.stderr) - - print(" or ".join(or_conditions)) + print(generate_price_filter_query(args.min_price, args.max_price, args.currency)) if __name__ == "__main__": main() \ No newline at end of file diff --git a/multi-currency/requirements.txt b/multi-currency/requirements.txt new file mode 100644 index 000000000..c88f22c81 --- /dev/null +++ b/multi-currency/requirements.txt @@ -0,0 +1,2 @@ +numpy +pyvespa \ No newline at end of file diff --git a/multi-currency/schemas/item.sd b/multi-currency/schemas/item.sd index a2516f5cd..20317366a 100644 --- a/multi-currency/schemas/item.sd +++ b/multi-currency/schemas/item.sd @@ -7,6 +7,9 @@ schema item { field price type double { indexing: attribute | summary } + field price_usd type double { + indexing: attribute | summary + } field currency_ref type reference { indexing: attribute | summary } @@ -18,15 +21,15 @@ schema item { } rank-profile default { - function usd_price() { + function price_usd() { expression: attribute(price) * attribute(currency_factor) } first-phase { - expression: bm25(item_name) * log(usd_price) + expression: bm25(item_name) * log(price_usd) } summary-features { attribute(price) - usd_price + price_usd bm25(item_name) } } From 425ba4e207e31078225a52b3226d8d4c0968975a Mon Sep 17 00:00:00 2001 From: Gregg Donovan Date: Fri, 27 Jun 2025 11:54:37 -0400 Subject: [PATCH 4/7] try fast-search --- multi-currency/currency_xml_to_vespa_docs.py | 12 ++-- multi-currency/filtering_perf_benchmark.py | 58 ++++++++++++-------- multi-currency/schemas/currency.sd | 2 + multi-currency/schemas/item.sd | 7 +++ multi-currency/services.xml | 1 - 5 files changed, 50 insertions(+), 30 deletions(-) diff --git a/multi-currency/currency_xml_to_vespa_docs.py b/multi-currency/currency_xml_to_vespa_docs.py index 3202fcf3c..e10e6beb9 100644 --- a/multi-currency/currency_xml_to_vespa_docs.py +++ b/multi-currency/currency_xml_to_vespa_docs.py @@ -2,7 +2,7 @@ import xml.etree.ElementTree as ET import json -def convert_currency_xml_to_vespa_jsonl(xml_file): +def convert_currency_xml_to_vespa_jsonl(xml_file) -> list[str]: # Parse the XML file tree = ET.parse(xml_file) root = tree.getroot() @@ -12,7 +12,7 @@ def convert_currency_xml_to_vespa_jsonl(xml_file): "put": "id:shopping:currency::usd", "fields": {"factor": 1.0} } - sys.stdout.write(json.dumps(usd_doc) + '\n') + currency_rates = [json.dumps(usd_doc) + '\n'] # Find all rate elements where 'to' attribute is 'USD' for rate in root.findall('.//rate[@to="USD"]'): @@ -25,9 +25,11 @@ def convert_currency_xml_to_vespa_jsonl(xml_file): "fields": {"factor": factor} } - # Write to stdout - sys.stdout.write(json.dumps(doc) + '\n') + currency_rates.append(json.dumps(doc)) + + return currency_rates # Usage if __name__ == "__main__": - convert_currency_xml_to_vespa_jsonl('currency.xml') \ No newline at end of file + currency_docs = convert_currency_xml_to_vespa_jsonl('currency.xml') + sys.stdout.write("\n".join(currency_docs) + "\n") \ No newline at end of file diff --git a/multi-currency/filtering_perf_benchmark.py b/multi-currency/filtering_perf_benchmark.py index b4578e14c..5f88c5e24 100644 --- a/multi-currency/filtering_perf_benchmark.py +++ b/multi-currency/filtering_perf_benchmark.py @@ -141,11 +141,11 @@ def vespa_feed(jsonl_file: Path) -> None: # Querying # --------------------------------------------------------------------------- -def query_items(yql: str, hits: int = 100) -> dict: +def query_items(yql: str, ranking: str = "unranked", hits: int = 0) -> dict: """ Executes a Vespa query using the CLI and returns the response JSON. """ - stdout = _run_cli(["vespa", "query", f"{yql}", f"hits={hits}"]) + stdout = _run_cli(["vespa", "query", f"{yql}", f"ranking={ranking}", f"hits={hits}"]) return json.loads(stdout) def parse_vespa_results(result: dict) -> tuple[int, list[dict]]: @@ -155,21 +155,22 @@ def parse_vespa_results(result: dict) -> tuple[int, list[dict]]: return total_hits, top_100_hits if __name__ == "__main__": - - # from vespa.application import Vespa - # - # # delete all Vespa documents. - # app = Vespa(url="localhost", port=8080) - # response = app.delete_all_docs(content_cluster_name="shopping", schema='item') - # print(response) - # - # # generate 1mm sample items with random prices, item_name, and currency. - # items = generate_items() - # # write to a file - # items_file = Path("1mm_items.jsonl") - # items_file.write_text("\n".join(items) + "\n") - # - # vespa_feed(items_file) + from vespa.application import Vespa + app = Vespa(url="localhost", port=8080) + _ = app.delete_all_docs(content_cluster_name="shopping", schema='currency') + _ = app.delete_all_docs(content_cluster_name="shopping", schema='item') + + from currency_xml_to_vespa_docs import convert_currency_xml_to_vespa_jsonl + currency_docs = convert_currency_xml_to_vespa_jsonl('currency.xml') + currency_docs_file = Path("currency_docs.jsonl") + currency_docs_file.write_text("\n".join(currency_docs) + "\n") + vespa_feed(currency_docs_file) + + # generate 1mm sample items with random prices, item_name, and currency. + items = generate_items() + items_file = Path("1mm_items.jsonl") + items_file.write_text("\n".join(items) + "\n") + vespa_feed(items_file) from generate_price_filter_query import generate_price_filter_query @@ -189,13 +190,22 @@ def parse_vespa_results(result: dict) -> tuple[int, list[dict]]: multi_currency_where=f"select * from item where {generate_price_filter_query(min_price, max_price, currency.lower())}" single_currency_where=f"select * from item where price_usd >= {price_usd_min} and price_usd <= {price_usd_max}" - start = time.perf_counter() - multi_currency_results = parse_vespa_results(query_items(multi_currency_where)) - lat_multi.append(time.perf_counter() - start) - - start = time.perf_counter() - single_currency_results = parse_vespa_results(query_items(single_currency_where)) - lat_single.append(time.perf_counter() - start) + exec_plan = [ + ("multi", multi_currency_where, lat_multi), + ("single", single_currency_where, lat_single), + ] + random.shuffle(exec_plan) + + # run queries in randomized order + for tag, query_str, lat_list in exec_plan: + start = time.perf_counter() + results = parse_vespa_results(query_items(query_str)) + lat_list.append(time.perf_counter() - start) + + if tag == "multi": + multi_currency_results = results + else: + single_currency_results = results if multi_currency_results[0] != single_currency_results[0]: print(f"Total hits mismatch: {multi_currency_results[0]} vs {single_currency_results[0]} currency:{currency}, min_price: {min_price}, max_price: {max_price} price_usd_min: {price_usd_min}, price_usd_max: {price_usd_max} rate: {rate}") diff --git a/multi-currency/schemas/currency.sd b/multi-currency/schemas/currency.sd index 54a03aa4e..f44ad1a19 100644 --- a/multi-currency/schemas/currency.sd +++ b/multi-currency/schemas/currency.sd @@ -3,6 +3,8 @@ schema currency { # multiplier for converting to USD field factor type double { indexing: attribute | summary + attribute: fast-search + rank: filter } } } diff --git a/multi-currency/schemas/item.sd b/multi-currency/schemas/item.sd index 20317366a..a05d3b6f5 100644 --- a/multi-currency/schemas/item.sd +++ b/multi-currency/schemas/item.sd @@ -2,16 +2,23 @@ schema item { document item { field item_name type string { indexing: index | summary + match: text index: enable-bm25 } field price type double { indexing: attribute | summary + attribute: fast-search + rank: filter } field price_usd type double { indexing: attribute | summary + attribute: fast-search + rank: filter } field currency_ref type reference { indexing: attribute | summary + attribute: fast-search + rank: filter } } import field currency_ref.factor as currency_factor {} diff --git a/multi-currency/services.xml b/multi-currency/services.xml index 03c538963..bcba6e9c0 100644 --- a/multi-currency/services.xml +++ b/multi-currency/services.xml @@ -9,7 +9,6 @@ - 2 From 4b6938086a7a0ba99ec945a03c8c09b8c50c3289 Mon Sep 17 00:00:00 2001 From: Gregg Donovan Date: Fri, 27 Jun 2025 16:43:13 -0400 Subject: [PATCH 5/7] interesting: the big OR with each currency being its own sparse field is equally fast as a single price_usd field. --- multi-currency/filtering_perf_benchmark.py | 10 +- multi-currency/generate_price_filter_query.py | 2 +- multi-currency/schemas/item.sd | 174 ++++++++++++++++++ multi-currency/services.xml | 1 + 4 files changed, 184 insertions(+), 3 deletions(-) diff --git a/multi-currency/filtering_perf_benchmark.py b/multi-currency/filtering_perf_benchmark.py index 5f88c5e24..7fc452bd5 100644 --- a/multi-currency/filtering_perf_benchmark.py +++ b/multi-currency/filtering_perf_benchmark.py @@ -120,15 +120,19 @@ def generate_items(): for i in range(1, 1_000_001): price_usd = random_price_cents() currency = random_currency() + price_native = RATE_TABLE[("USD", currency.upper())] * price_usd # Convert to the target currency item = { "put": f"id:shopping:item::item-{i}", "fields": { "currency_ref": f"id:shopping:currency::{currency.lower()}", #"item_name": ' '.join(random.sample(tokens, random.randint(1, 5))), # Randomly select 1-5 tokens "price_usd": price_usd, - "price": RATE_TABLE[("USD", currency.upper())] * price_usd, # Convert to the target currency + "price": price_native, } } + if currency.lower() != "usd": + item["fields"][f"price_{currency.lower()}"] = price_native + items.append(json.dumps(item)) return items @@ -145,7 +149,9 @@ def query_items(yql: str, ranking: str = "unranked", hits: int = 0) -> dict: """ Executes a Vespa query using the CLI and returns the response JSON. """ - stdout = _run_cli(["vespa", "query", f"{yql}", f"ranking={ranking}", f"hits={hits}"]) + cmd = ["vespa", "query", f"{yql}", f"ranking={ranking}", f"hits={hits}"] + print(' '.join(cmd)) + stdout = _run_cli(cmd) return json.loads(stdout) def parse_vespa_results(result: dict) -> tuple[int, list[dict]]: diff --git a/multi-currency/generate_price_filter_query.py b/multi-currency/generate_price_filter_query.py index 2557ff826..61f5d07ce 100644 --- a/multi-currency/generate_price_filter_query.py +++ b/multi-currency/generate_price_filter_query.py @@ -33,7 +33,7 @@ def load_conversion_rates(xml_file: str) -> tuple[dict[tuple[str, str], float], rates, all_currencies = load_conversion_rates("currency.xml") def price_filter(currency: str, min_price: float, max_price: float) -> str: - return f"(currency_ref matches 'id:shopping:currency::{currency.lower()}' and price >= {min_price} and price <= {max_price})" + return f"(price_{currency.lower()} >= {min_price} and price_{currency.lower()} <= {max_price})" def generate_price_filter_query(min_price: float, max_price: float, currency: str) -> str: if min_price > max_price: diff --git a/multi-currency/schemas/item.sd b/multi-currency/schemas/item.sd index a05d3b6f5..08968199a 100644 --- a/multi-currency/schemas/item.sd +++ b/multi-currency/schemas/item.sd @@ -15,6 +15,180 @@ schema item { attribute: fast-search rank: filter } + field price_aud type double { + indexing: attribute | summary + attribute: fast-search + rank: filter + } + + field price_brl type double { + indexing: attribute | summary + attribute: fast-search + rank: filter + } + + field price_cad type double { + indexing: attribute | summary + attribute: fast-search + rank: filter + } + + field price_chf type double { + indexing: attribute | summary + attribute: fast-search + rank: filter + } + + field price_cny type double { + indexing: attribute | summary + attribute: fast-search + rank: filter + } + + field price_czk type double { + indexing: attribute | summary + attribute: fast-search + rank: filter + } + + field price_dkk type double { + indexing: attribute | summary + attribute: fast-search + rank: filter + } + + field price_eur type double { + indexing: attribute | summary + attribute: fast-search + rank: filter + } + + field price_gbp type double { + indexing: attribute | summary + attribute: fast-search + rank: filter + } + + field price_hkd type double { + indexing: attribute | summary + attribute: fast-search + rank: filter + } + + field price_huf type double { + indexing: attribute | summary + attribute: fast-search + rank: filter + } + + field price_idr type double { + indexing: attribute | summary + attribute: fast-search + rank: filter + } + + field price_ils type double { + indexing: attribute | summary + attribute: fast-search + rank: filter + } + + field price_inr type double { + indexing: attribute | summary + attribute: fast-search + rank: filter + } + + field price_jpy type double { + indexing: attribute | summary + attribute: fast-search + rank: filter + } + + field price_mad type double { + indexing: attribute | summary + attribute: fast-search + rank: filter + } + + field price_mxn type double { + indexing: attribute | summary + attribute: fast-search + rank: filter + } + + field price_myr type double { + indexing: attribute | summary + attribute: fast-search + rank: filter + } + + field price_nok type double { + indexing: attribute | summary + attribute: fast-search + rank: filter + } + + field price_nzd type double { + indexing: attribute | summary + attribute: fast-search + rank: filter + } + + field price_php type double { + indexing: attribute | summary + attribute: fast-search + rank: filter + } + + field price_pln type double { + indexing: attribute | summary + attribute: fast-search + rank: filter + } + + field price_sgd type double { + indexing: attribute | summary + attribute: fast-search + rank: filter + } + + field price_sek type double { + indexing: attribute | summary + attribute: fast-search + rank: filter + } + + field price_thb type double { + indexing: attribute | summary + attribute: fast-search + rank: filter + } + + field price_try type double { + indexing: attribute | summary + attribute: fast-search + rank: filter + } + + field price_twd type double { + indexing: attribute | summary + attribute: fast-search + rank: filter + } + + field price_vnd type double { + indexing: attribute | summary + attribute: fast-search + rank: filter + } + + field price_zar type double { + indexing: attribute | summary + attribute: fast-search + rank: filter + } + field currency_ref type reference { indexing: attribute | summary attribute: fast-search diff --git a/multi-currency/services.xml b/multi-currency/services.xml index bcba6e9c0..ca391d5c6 100644 --- a/multi-currency/services.xml +++ b/multi-currency/services.xml @@ -9,6 +9,7 @@ + 1 From 51e9765068c0f418c4f644ab2d48fe323df5f1d8 Mon Sep 17 00:00:00 2001 From: Gregg Donovan Date: Sat, 28 Jun 2025 08:18:54 -0400 Subject: [PATCH 6/7] Switch to PyVespa for sending queries. This clarified the performance issues. price_usd queries now take ~6ms. The other approaches are around 10-12ms, expect for using 'matches' with the currency_ref id which takes about 80ms. --- multi-currency/README.md | 4 - multi-currency/currency_xml_to_vespa_docs.py | 21 ++++- multi-currency/filtering_perf_benchmark.py | 85 +++++++++---------- multi-currency/generate_price_filter_query.py | 34 ++++++-- multi-currency/schemas/currency.sd | 11 +++ multi-currency/schemas/item.sd | 2 + 6 files changed, 99 insertions(+), 58 deletions(-) diff --git a/multi-currency/README.md b/multi-currency/README.md index 55ca467e6..445fb86eb 100644 --- a/multi-currency/README.md +++ b/multi-currency/README.md @@ -104,7 +104,3 @@ vespa query 'select * from item where (currency_ref matches "id:shopping:currenc - `price`: Double field for price in local currency - `currency_ref`: Reference to currency document - Imported field: `currency_factor` from referenced currency document - -## TODOs - -- Show how to hydrate a USD price \ No newline at end of file diff --git a/multi-currency/currency_xml_to_vespa_docs.py b/multi-currency/currency_xml_to_vespa_docs.py index e10e6beb9..9a30898af 100644 --- a/multi-currency/currency_xml_to_vespa_docs.py +++ b/multi-currency/currency_xml_to_vespa_docs.py @@ -2,11 +2,22 @@ import xml.etree.ElementTree as ET import json + +def parse_currencies(root) -> set[str]: + currencies = [] + for currency in root.findall('.//currency'): + currencies.append(currency.get('code')) + + return sorted(currencies) + + def convert_currency_xml_to_vespa_jsonl(xml_file) -> list[str]: # Parse the XML file tree = ET.parse(xml_file) root = tree.getroot() + currencies = parse_currencies(root) + # Add USD to USD conversion (factor = 1.0) usd_doc = { "put": "id:shopping:currency::usd", @@ -16,13 +27,17 @@ def convert_currency_xml_to_vespa_jsonl(xml_file) -> list[str]: # Find all rate elements where 'to' attribute is 'USD' for rate in root.findall('.//rate[@to="USD"]'): - from_currency = rate.get('from').lower() + currency = rate.get('from').lower() factor = float(rate.get('rate')) # Create Vespa document doc = { - "put": f"id:shopping:currency::{from_currency}", - "fields": {"factor": factor} + "put": f"id:shopping:currency::{currency}", + "fields": { + "code": currency, + "idx": currencies.index(currency.upper()), + "factor": factor, + } } currency_rates.append(json.dumps(doc)) diff --git a/multi-currency/filtering_perf_benchmark.py b/multi-currency/filtering_perf_benchmark.py index 7fc452bd5..f424e27ab 100644 --- a/multi-currency/filtering_perf_benchmark.py +++ b/multi-currency/filtering_perf_benchmark.py @@ -19,9 +19,13 @@ import subprocess import time from pathlib import Path -from typing import Dict, List + +from vespa.application import Vespa +from vespa.io import VespaQueryResponse + from generate_price_filter_query import load_conversion_rates + RATE_TABLE, _ = load_conversion_rates("currency.xml") def pct(values: list[float], *percents: int) -> list[float]: @@ -38,7 +42,7 @@ def pct(values: list[float], *percents: int) -> list[float]: # Helpers # --------------------------------------------------------------------------- -def _run_cli(cmd: List[str], stdin: bytes | None = None) -> str: +def _run_cli(cmd: list[str], stdin: bytes | None = None) -> str: """ Runs a Vespa CLI command and returns stdout. Raises `subprocess.CalledProcessError` on failure. @@ -47,14 +51,14 @@ def _run_cli(cmd: List[str], stdin: bytes | None = None) -> str: return res.stdout -def _feed_jsonl(lines: List[str]) -> None: +def _feed_jsonl(lines: list[str]) -> None: """ Feeds JSONL documents using `vespa feed -`. """ data = ("\n".join(lines) + "\n").encode() _run_cli(["vespa", "feed", "-"], stdin=data) -def feed_currency_documents(factors: Dict[str, float], ns: str) -> None: +def feed_currency_documents(factors: dict[str, float], ns: str) -> None: """ Feeds `currency` documents with conversion factors. """ @@ -125,7 +129,6 @@ def generate_items(): "put": f"id:shopping:item::item-{i}", "fields": { "currency_ref": f"id:shopping:currency::{currency.lower()}", - #"item_name": ' '.join(random.sample(tokens, random.randint(1, 5))), # Randomly select 1-5 tokens "price_usd": price_usd, "price": price_native, } @@ -141,42 +144,26 @@ def vespa_feed(jsonl_file: Path) -> None: _run_cli(["vespa", "feed", str(jsonl_file)]) -# --------------------------------------------------------------------------- -# Querying -# --------------------------------------------------------------------------- - -def query_items(yql: str, ranking: str = "unranked", hits: int = 0) -> dict: - """ - Executes a Vespa query using the CLI and returns the response JSON. - """ - cmd = ["vespa", "query", f"{yql}", f"ranking={ranking}", f"hits={hits}"] - print(' '.join(cmd)) - stdout = _run_cli(cmd) - return json.loads(stdout) - -def parse_vespa_results(result: dict) -> tuple[int, list[dict]]: - root = result.get('root', {}) - total_hits = root.get('fields', {}).get('totalCount', 0) - top_100_hits = root.get('children', [])[:100] # keep full hit objects - return total_hits, top_100_hits - if __name__ == "__main__": - from vespa.application import Vespa - app = Vespa(url="localhost", port=8080) - _ = app.delete_all_docs(content_cluster_name="shopping", schema='currency') - _ = app.delete_all_docs(content_cluster_name="shopping", schema='item') - - from currency_xml_to_vespa_docs import convert_currency_xml_to_vespa_jsonl - currency_docs = convert_currency_xml_to_vespa_jsonl('currency.xml') - currency_docs_file = Path("currency_docs.jsonl") - currency_docs_file.write_text("\n".join(currency_docs) + "\n") - vespa_feed(currency_docs_file) - - # generate 1mm sample items with random prices, item_name, and currency. - items = generate_items() - items_file = Path("1mm_items.jsonl") - items_file.write_text("\n".join(items) + "\n") - vespa_feed(items_file) + reindex = True + if reindex: + docker_feed = Vespa(url="localhost", port=8080) + _ = docker_feed.delete_all_docs(content_cluster_name="shopping", schema='currency') + _ = docker_feed.delete_all_docs(content_cluster_name="shopping", schema='item') + + from currency_xml_to_vespa_docs import convert_currency_xml_to_vespa_jsonl + currency_docs = convert_currency_xml_to_vespa_jsonl('currency.xml') + currency_docs_file = Path("currency_docs.jsonl") + currency_docs_file.write_text("\n".join(currency_docs) + "\n") + vespa_feed(currency_docs_file) + + # generate 1mm sample items with random prices, item_name, and currency. + items = generate_items() + items_file = Path("1mm_items.jsonl") + items_file.write_text("\n".join(items) + "\n") + vespa_feed(items_file) + + app = Vespa(url="http://localhost", port=8080) from generate_price_filter_query import generate_price_filter_query @@ -205,18 +192,24 @@ def parse_vespa_results(result: dict) -> tuple[int, list[dict]]: # run queries in randomized order for tag, query_str, lat_list in exec_plan: start = time.perf_counter() - results = parse_vespa_results(query_items(query_str)) - lat_list.append(time.perf_counter() - start) + r: VespaQueryResponse = app.query({ + "yql": query_str, + "ranking": "unranked", + "hits": 0, + }) + latency = time.perf_counter() - start + lat_list.append(latency) + results: int = r.number_documents_retrieved if tag == "multi": multi_currency_results = results else: single_currency_results = results - if multi_currency_results[0] != single_currency_results[0]: - print(f"Total hits mismatch: {multi_currency_results[0]} vs {single_currency_results[0]} currency:{currency}, min_price: {min_price}, max_price: {max_price} price_usd_min: {price_usd_min}, price_usd_max: {price_usd_max} rate: {rate}") - else: - print(f"Total hits: {multi_currency_results[0]} currency:{currency}, min_price: {min_price}, max_price: {max_price} price_usd_min: {price_usd_min}, price_usd_max: {price_usd_max} rate: {rate}") + #if multi_currency_results != single_currency_results: + # print(f"Latency: multi={lat_multi[-1]} single={lat_single[-1]}. Total hits mismatch: {multi_currency_results} vs {single_currency_results} currency:{currency}, min_price: {min_price}, max_price: {max_price} price_usd_min: {price_usd_min}, price_usd_max: {price_usd_max} rate: {rate}") + #else: + # print(f"Latency: multi={lat_multi[-1]} single={lat_single[-1]}. Total hits: {multi_currency_results} currency:{currency}, min_price: {min_price}, max_price: {max_price} price_usd_min: {price_usd_min}, price_usd_max: {price_usd_max} rate: {rate}") print(f"latency for multi-currency query: {pct(lat_multi, [25, 50, 75, 90, 95, 99])}") print(f"latency for price_usd query: {pct(lat_single, [25, 50, 75, 90, 95, 99])}") diff --git a/multi-currency/generate_price_filter_query.py b/multi-currency/generate_price_filter_query.py index 61f5d07ce..e04093e43 100644 --- a/multi-currency/generate_price_filter_query.py +++ b/multi-currency/generate_price_filter_query.py @@ -2,6 +2,7 @@ import argparse import xml.etree.ElementTree as ET + def load_conversion_rates(xml_file: str) -> tuple[dict[tuple[str, str], float], set[str]]: """ Parses the currency XML file and builds a conversion rate table. @@ -21,19 +22,38 @@ def load_conversion_rates(xml_file: str) -> tuple[dict[tuple[str, str], float], from_curr: str = rate_element.get('from').upper() to_curr: str = rate_element.get('to').upper() rate_value: float = float(rate_element.get('rate')) - rates[(from_curr,to_curr)] = rate_value + rates[(from_curr, to_curr)] = rate_value all_currencies.add(from_curr) all_currencies.add(to_curr) + all_currencies = sorted(all_currencies) + for currency in all_currencies: rates[(currency, currency)] = 1.0 return rates, all_currencies + rates, all_currencies = load_conversion_rates("currency.xml") + +def currency_idx(currency: str) -> int: + return all_currencies.index(currency.upper()) + + def price_filter(currency: str, min_price: float, max_price: float) -> str: - return f"(price_{currency.lower()} >= {min_price} and price_{currency.lower()} <= {max_price})" + # This is fast, too. Only ~2x slower than one price_usd query + return f"(currency_idx = {currency_idx(currency)} and price >= {min_price} and price <= {max_price})" + + # 'matches' is roughly 80ms slower. How to speed this up? + # return f"(currency_ref matches \"id:shopping:currency::{currency.lower()}\" and price >= {min_price} and price <= {max_price})" + + # TODO: how can we get exact matches for currency_code to work? + # return f"(currency_code = \"{currency.lower()}\" and price >= {min_price} and price <= {max_price})" + + # using a field for each currency is fast. 1.75x the price_usd query + # return f"(price_{currency.lower()} >= {min_price} and price_{currency.lower()} <= {max_price})" + def generate_price_filter_query(min_price: float, max_price: float, currency: str) -> str: if min_price > max_price: @@ -50,10 +70,12 @@ def generate_price_filter_query(min_price: float, max_price: float, currency: st or_conditions.append(price_filter(target_currency, converted_min, converted_max)) else: - print(f"Warning: No conversion rate from {source_currency} to {target_currency}. Skipping.", file=sys.stderr) + print(f"Warning: No conversion rate from {source_currency} to {target_currency}. Skipping.", + file=sys.stderr) return " or ".join(or_conditions) + def main() -> None: """ Main function to generate the Vespa query. @@ -63,10 +85,12 @@ def main() -> None: ) parser.add_argument('--min_price', type=float, required=True, help='Minimum price.') parser.add_argument('--max_price', type=float, required=True, help='Maximum price.') - parser.add_argument('--currency', type=str, required=True, help='The currency for the given min/max price (e.g., USD).') + parser.add_argument('--currency', type=str, required=True, + help='The currency for the given min/max price (e.g., USD).') args = parser.parse_args() print(generate_price_filter_query(args.min_price, args.max_price, args.currency)) + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/multi-currency/schemas/currency.sd b/multi-currency/schemas/currency.sd index f44ad1a19..cc841c12f 100644 --- a/multi-currency/schemas/currency.sd +++ b/multi-currency/schemas/currency.sd @@ -1,5 +1,16 @@ schema currency { document currency { + field idx type byte { + indexing: summary | attribute + attribute: fast-search + rank: filter + } + + field code type string { + indexing: attribute | summary + attribute: fast-search + } + # multiplier for converting to USD field factor type double { indexing: attribute | summary diff --git a/multi-currency/schemas/item.sd b/multi-currency/schemas/item.sd index 08968199a..03ba08965 100644 --- a/multi-currency/schemas/item.sd +++ b/multi-currency/schemas/item.sd @@ -195,6 +195,8 @@ schema item { rank: filter } } + import field currency_ref.code as currency_code {} + import field currency_ref.idx as currency_idx {} import field currency_ref.factor as currency_factor {} fieldset default { From 5cbca295fbed4b9c15445adf831c4d8a5cdc0ad6 Mon Sep 17 00:00:00 2001 From: Kristian Aune Date: Tue, 1 Jul 2025 14:24:32 +0200 Subject: [PATCH 7/7] move to examples directory --- {multi-currency => examples/multi-currency}/README.md | 0 {multi-currency => examples/multi-currency}/currency.xml | 0 {multi-currency => examples/multi-currency}/currency_rates.jsonl | 0 .../multi-currency}/currency_xml_to_vespa_docs.py | 0 .../multi-currency}/filtering_perf_benchmark.py | 0 .../multi-currency}/generate_price_filter_query.py | 0 {multi-currency => examples/multi-currency}/items.jsonl | 0 {multi-currency => examples/multi-currency}/requirements.txt | 0 {multi-currency => examples/multi-currency}/schemas/currency.sd | 0 {multi-currency => examples/multi-currency}/schemas/item.sd | 0 {multi-currency => examples/multi-currency}/services.xml | 0 11 files changed, 0 insertions(+), 0 deletions(-) rename {multi-currency => examples/multi-currency}/README.md (100%) rename {multi-currency => examples/multi-currency}/currency.xml (100%) rename {multi-currency => examples/multi-currency}/currency_rates.jsonl (100%) rename {multi-currency => examples/multi-currency}/currency_xml_to_vespa_docs.py (100%) rename {multi-currency => examples/multi-currency}/filtering_perf_benchmark.py (100%) rename {multi-currency => examples/multi-currency}/generate_price_filter_query.py (100%) rename {multi-currency => examples/multi-currency}/items.jsonl (100%) rename {multi-currency => examples/multi-currency}/requirements.txt (100%) rename {multi-currency => examples/multi-currency}/schemas/currency.sd (100%) rename {multi-currency => examples/multi-currency}/schemas/item.sd (100%) rename {multi-currency => examples/multi-currency}/services.xml (100%) diff --git a/multi-currency/README.md b/examples/multi-currency/README.md similarity index 100% rename from multi-currency/README.md rename to examples/multi-currency/README.md diff --git a/multi-currency/currency.xml b/examples/multi-currency/currency.xml similarity index 100% rename from multi-currency/currency.xml rename to examples/multi-currency/currency.xml diff --git a/multi-currency/currency_rates.jsonl b/examples/multi-currency/currency_rates.jsonl similarity index 100% rename from multi-currency/currency_rates.jsonl rename to examples/multi-currency/currency_rates.jsonl diff --git a/multi-currency/currency_xml_to_vespa_docs.py b/examples/multi-currency/currency_xml_to_vespa_docs.py similarity index 100% rename from multi-currency/currency_xml_to_vespa_docs.py rename to examples/multi-currency/currency_xml_to_vespa_docs.py diff --git a/multi-currency/filtering_perf_benchmark.py b/examples/multi-currency/filtering_perf_benchmark.py similarity index 100% rename from multi-currency/filtering_perf_benchmark.py rename to examples/multi-currency/filtering_perf_benchmark.py diff --git a/multi-currency/generate_price_filter_query.py b/examples/multi-currency/generate_price_filter_query.py similarity index 100% rename from multi-currency/generate_price_filter_query.py rename to examples/multi-currency/generate_price_filter_query.py diff --git a/multi-currency/items.jsonl b/examples/multi-currency/items.jsonl similarity index 100% rename from multi-currency/items.jsonl rename to examples/multi-currency/items.jsonl diff --git a/multi-currency/requirements.txt b/examples/multi-currency/requirements.txt similarity index 100% rename from multi-currency/requirements.txt rename to examples/multi-currency/requirements.txt diff --git a/multi-currency/schemas/currency.sd b/examples/multi-currency/schemas/currency.sd similarity index 100% rename from multi-currency/schemas/currency.sd rename to examples/multi-currency/schemas/currency.sd diff --git a/multi-currency/schemas/item.sd b/examples/multi-currency/schemas/item.sd similarity index 100% rename from multi-currency/schemas/item.sd rename to examples/multi-currency/schemas/item.sd diff --git a/multi-currency/services.xml b/examples/multi-currency/services.xml similarity index 100% rename from multi-currency/services.xml rename to examples/multi-currency/services.xml