From 5c36a11cd09da365b558e3249a0e4f94ab885ff6 Mon Sep 17 00:00:00 2001 From: Eugene Yang Date: Tue, 28 Dec 2021 17:39:40 -0500 Subject: [PATCH 1/3] add page for Patapsco --- generate.py | 4 +++ templates/patapsco.html | 59 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) create mode 100644 templates/patapsco.html diff --git a/generate.py b/generate.py index d91d9e0b4..86cf61707 100644 --- a/generate.py +++ b/generate.py @@ -396,6 +396,7 @@ def generate_index(out_dir, version, top_level_map):
  • ir_datasets SIGIR resource paper
  • Using ir_datasets with… PyTerrier · +Patapsco · ir-measures · trec_eval · Experimaestro @@ -615,6 +616,9 @@ def hlb(c): template = Template(filename=os.path.join("templates", "pyterrier.html")) with page_template('pyterrier.html', out_dir, version, title='PyTerrier & ir_datasets', include_irds_title=False) as out: out.write(template.render(hl=hl)) + template = Template(filename=os.path.join("templates", "patapsco.html")) + with page_template('patapsco.html', out_dir, version, title='Patapsco & ir_datasets', include_irds_title=False) as out: + out.write(template.render(hl=hl)) template = Template(filename=os.path.join("templates", "ir-measures.html")) with page_template('ir-measures.html', out_dir, version, title='ir_measures & ir_datasets', include_irds_title=False) as out: out.write(template.render(hl=hl, hlb=hlb)) diff --git a/templates/patapsco.html b/templates/patapsco.html new file mode 100644 index 000000000..aaaaf2442 --- /dev/null +++ b/templates/patapsco.html @@ -0,0 +1,59 @@ +

    +Patapsco is a framework for running cross-language +infomration retrieval (CLIR) experiments developed by Human Language +Technlogy Center of Excellence (HLTCOE) at Johns Hopkins University. +

    + +

    +To get started with Patapsco, see this guide. +

    + +

    Basic Usage

    + +

    +Patapsco specify the source of the collection via config files or config dictionary in Python. +Please see this +example config file for reference. +

    + +

    +For both documents, topics and scores sections, use irds +as the format in the input to tell Pataspco to use ir_datasets and +specify the dataset name at path. The lang value has to match the language +information provided by ir_datasets. +

    + +${hl(''' +documents: + input: + format: irds + lang: zho + path: clirmatrix/zh/bi139-base/en/dev + process: + inherit: text + output: true + +topics: + input: + format: irds + lang: eng + source: original + encoding: utf8 + path: clirmatrix/zh/bi139-base/en/dev + +score: + input: + format: irds + path: clirmatrix/zh/bi139-base/en/dev +''')} + +

    +This YAML config file can also be specified as a Python dictionary. Please refer to the +documentation of Pataspco for further information. +

    + +

    Further Information

    + + From ff0785f3e44b5cf250f524cda5b52cb723dd2d06 Mon Sep 17 00:00:00 2001 From: Eugene Yang Date: Tue, 28 Dec 2021 17:41:37 -0500 Subject: [PATCH 2/3] fix typo --- templates/patapsco.html | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/templates/patapsco.html b/templates/patapsco.html index aaaaf2442..75deb7c4e 100644 --- a/templates/patapsco.html +++ b/templates/patapsco.html @@ -11,15 +11,15 @@

    Basic Usage

    -Patapsco specify the source of the collection via config files or config dictionary in Python. +Patapsco specifies the source of the collection via config files or config dictionaries in Python. Please see this example config file for reference.

    For both documents, topics and scores sections, use irds -as the format in the input to tell Pataspco to use ir_datasets and -specify the dataset name at path. The lang value has to match the language +as the format in the input subsection to tell Pataspco to use ir_datasets +and specify the dataset name at path. The lang value has to match the language information provided by ir_datasets.

    From 4aa98cbbe7500e13a73a90f1f6cea54fa87467f4 Mon Sep 17 00:00:00 2001 From: Sean MacAvaney Date: Wed, 29 Dec 2021 10:28:38 +0000 Subject: [PATCH 3/3] Update patapsco.html --- templates/patapsco.html | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/templates/patapsco.html b/templates/patapsco.html index 75deb7c4e..42d2b1a72 100644 --- a/templates/patapsco.html +++ b/templates/patapsco.html @@ -20,7 +20,9 @@

    Basic Usage

    For both documents, topics and scores sections, use irds as the format in the input subsection to tell Pataspco to use ir_datasets and specify the dataset name at path. The lang value has to match the language -information provided by ir_datasets. +information provided by ir_datasets through dataset.docs_lang() and +dataset.queries_lang(). Note that Patapsco uses the 3-letter ISO 639-3 language codes, +whereas ir_datasets provides two-letter ISO 639-1 language codes.

    ${hl(''' @@ -52,6 +54,19 @@

    Basic Usage

    documentation of Pataspco for further information.

    + + + + + + + + + + + +
    Patapsco's...Corresponds to...Notes
    documentsdocs
    documents.input.pathdataset's ID
    documents.input.langdataset.docs_lang()Need to convert from ISO 639-1 to ISO 639-3
    documents.process.inheritthe doc's field representing the text to use
    topicsqueries
    topics.input.pathdataset's ID
    topics.input.langdataset.queries_lang()Need to convert from ISO 639-1 to ISO 639-3
    scoreqrels
    score.input.pathdataset's ID
    +

    Further Information