diff --git a/generate.py b/generate.py index d91d9e0b4..86cf61707 100644 --- a/generate.py +++ b/generate.py @@ -396,6 +396,7 @@ def generate_index(out_dir, version, top_level_map):
  • ir_datasets SIGIR resource paper
  • Using ir_datasets with… PyTerrier · +Patapsco · ir-measures · trec_eval · Experimaestro @@ -615,6 +616,9 @@ def hlb(c): template = Template(filename=os.path.join("templates", "pyterrier.html")) with page_template('pyterrier.html', out_dir, version, title='PyTerrier & ir_datasets', include_irds_title=False) as out: out.write(template.render(hl=hl)) + template = Template(filename=os.path.join("templates", "patapsco.html")) + with page_template('patapsco.html', out_dir, version, title='Patapsco & ir_datasets', include_irds_title=False) as out: + out.write(template.render(hl=hl)) template = Template(filename=os.path.join("templates", "ir-measures.html")) with page_template('ir-measures.html', out_dir, version, title='ir_measures & ir_datasets', include_irds_title=False) as out: out.write(template.render(hl=hl, hlb=hlb)) diff --git a/templates/patapsco.html b/templates/patapsco.html new file mode 100644 index 000000000..42d2b1a72 --- /dev/null +++ b/templates/patapsco.html @@ -0,0 +1,74 @@ +

    +Patapsco is a framework for running cross-language +infomration retrieval (CLIR) experiments developed by Human Language +Technlogy Center of Excellence (HLTCOE) at Johns Hopkins University. +

    + +

    +To get started with Patapsco, see this guide. +

    + +

    Basic Usage

    + +

    +Patapsco specifies the source of the collection via config files or config dictionaries in Python. +Please see this +example config file for reference. +

    + +

    +For both documents, topics and scores sections, use irds +as the format in the input subsection to tell Pataspco to use ir_datasets +and specify the dataset name at path. The lang value has to match the language +information provided by ir_datasets through dataset.docs_lang() and +dataset.queries_lang(). Note that Patapsco uses the 3-letter ISO 639-3 language codes, +whereas ir_datasets provides two-letter ISO 639-1 language codes. +

    + +${hl(''' +documents: + input: + format: irds + lang: zho + path: clirmatrix/zh/bi139-base/en/dev + process: + inherit: text + output: true + +topics: + input: + format: irds + lang: eng + source: original + encoding: utf8 + path: clirmatrix/zh/bi139-base/en/dev + +score: + input: + format: irds + path: clirmatrix/zh/bi139-base/en/dev +''')} + +

    +This YAML config file can also be specified as a Python dictionary. Please refer to the +documentation of Pataspco for further information. +

    + + + + + + + + + + + + +
    Patapsco's...Corresponds to...Notes
    documentsdocs
    documents.input.pathdataset's ID
    documents.input.langdataset.docs_lang()Need to convert from ISO 639-1 to ISO 639-3
    documents.process.inheritthe doc's field representing the text to use
    topicsqueries
    topics.input.pathdataset's ID
    topics.input.langdataset.queries_lang()Need to convert from ISO 639-1 to ISO 639-3
    scoreqrels
    score.input.pathdataset's ID
    + +

    Further Information

    + +