diff --git a/README.md b/README.md index 4008a49..3b7bfe4 100644 --- a/README.md +++ b/README.md @@ -75,7 +75,7 @@ The `result` is a pandas DataFrame containing the mapped IDs (see below), while ## Retrieving Information -All [supported return fields](https://david-araripe.github.io/UniProtMapper/stable/field_reference.html#supported-fields) are both accessible through the attribute `ProtMapper.fields_table`: +A DataFrame with the [supported return fields](https://david-araripe.github.io/UniProtMapper/stable/field_reference.html#supported-fields) is accessible through the attribute `ProtMapper.fields_table`: ```Python from UniProtMapper import ProtMapper @@ -86,11 +86,11 @@ df.head() ``` | | label | returned_field | field_type | has_full_version | type | |---:|:---------------------|:-----------------|:-----------------|:-------------------|:--------------| -| 0 | Entry | accession | Names & Taxonomy | yes | uniprot_field | -| 1 | Entry Name | id | Names & Taxonomy | yes | uniprot_field | -| 2 | Gene Names | gene_names | Names & Taxonomy | yes | uniprot_field | -| 3 | Gene Names (primary) | gene_primary | Names & Taxonomy | yes | uniprot_field | -| 4 | Gene Names (synonym) | gene_synonym | Names & Taxonomy | yes | uniprot_field | +| 0 | Entry | accession | Names & Taxonomy | - | uniprot_field | +| 1 | Entry Name | id | Names & Taxonomy | - | uniprot_field | +| 2 | Gene Names | gene_names | Names & Taxonomy | - | uniprot_field | +| 3 | Gene Names (primary) | gene_primary | Names & Taxonomy | - | uniprot_field | +| 4 | Gene Names (synonym) | gene_synonym | Names & Taxonomy | - | uniprot_field | From the DataFrame, all `return_field` entries can be used to access UniProt data programmatically: @@ -105,6 +105,23 @@ result, failed = mapper.get(["Q02880"], fields=fields) >>> Fetched: 1 / 1 ``` +Further, for the cross-referenced fields that have `has_full_version` set to `yes`, returning the same field with extra information is supported by passing `_full`, such as `xref_pdb_full`. + +All available return fields are also accessible through the attribute `ProtMapper.supported_return_fields`: + +```python +from UniProtMapper import ProtMapper +mapper = ProtMapper() +print(mapper.supported_return_fields) + +>>> ['accession', +>>> 'id', +>>> 'gene_names', +>>> ... +>>> 'xref_smart_full', +>>> 'xref_supfam_full'] +``` + ## Field-based Querying UniProtMapper supports complex field-based protein queries using boolean operators (AND, OR, NOT) through the `uniprotkb_fields` module. This allows you to create sophisticated searches combining multiple criteria. For example: diff --git a/docs/source/field_reference.rst b/docs/source/field_reference.rst index 66ed74f..cdbba76 100644 --- a/docs/source/field_reference.rst +++ b/docs/source/field_reference.rst @@ -24,10 +24,10 @@ The supported return fields are listed below. The columns contain different info - **label**: The label used by UniProt to represent this field. Also used as column names on the `pd.DataFrame` returned from `get` methods implemented on both APIs. - **returned_field**: Name used to specify which information to retrieve by the APIs. For examples, check below. - **field_type**: The category of the field, as listed above under `Field Categories`. Note that for `type=='cross_reference'`, the field_type is the category of the cross-referenced database. -- **has_full_version**: Always `yes` for `type=='uniprot_field'`. Is used by UniProt to indicate whether a cross-referenced database is fully integrated. +- **has_full_version**: Not available for `type=='uniprot_field'`. If `yes`, a "full" version of the return field is accessible by using ``_full``. - **type**: Either "uniprot_field" or "cross_reference". The former indicates a field that is directly related to the protein, while the latter indicates a field that is a cross-reference to another database and not native to UniProt. -For more up-to-date information on `has_full_version` of cross-referenced fields, check the official UniProt documentation: `Return Fields `_ +For more up-to-date information on `has_full_version` of cross-referenced fields, check the official UniProt documentation: `Return Fields `_. In case of discrepancies, issues or pull requests are welcome! .. csv-table:: Supported Return Fields :header-rows: 1 diff --git a/src/UniProtMapper/idmapping_api.py b/src/UniProtMapper/idmapping_api.py index 5afd476..85ea0b2 100644 --- a/src/UniProtMapper/idmapping_api.py +++ b/src/UniProtMapper/idmapping_api.py @@ -60,20 +60,6 @@ def __init__( backoff_factor, api_url, ) - self.default_fields = ( - "accession", - "id", - "gene_names", - "protein_name", - "organism_name", - "organism_id", - "go_id", - "go_p", - "go_c", - "go_f", - "cc_subcellular_location", - "sequence", - ) @property def _supported_dbs(self) -> list: @@ -170,7 +156,7 @@ def get_id_mapping_results_search(self, fields: str, url: str, compressed: bool) def get( self, ids: Union[List[str], str], - fields: Optional[Union[str, List]] = "default", + fields: Optional[Union[str, List]] = None, from_db: str = "UniProtKB_AC-ID", to_db: str = "UniProtKB-Swiss-Prot", compressed: bool = True, @@ -181,9 +167,10 @@ def get( Args: ids: list of IDs to be mapped or single string. - fields: list of UniProt fields to be retrieved. If None, will return the API's - default fields. `Note:` parameter not supported for datasets that aren't - strictly UniProtKB, e.g.: UniParc, UniRef... Defaults to None. + fields: list of UniProt return fields to be retrieved. If None, will return the + API's default fields. `default` can also be passsed to access `self.default_fields`. + **Note** parameter not supported for datasets that aren't strictly UniProtKB, + e.g.: UniParc, UniRef... Defaults to None. from_db: database for the ids. Defaults to "UniProtKB_AC-ID". to_db: UniProtDB to query to. For reviewed-only accessions, use default. If you want to include unreviewed accessions, use "UniProtKB". Defaults to @@ -207,10 +194,10 @@ def get( fields = self.default_fields else: fields = np.char.lower(np.array(fields)) - if not np.isin(fields, self.fields_table["returned_field"]).all(): + if not np.isin(fields, self.supported_return_fields).all(): raise ValueError( "Invalid fields. Valid fields are: " - f"{self.fields_table['returned_field'].values}" + f"{self.supported_return_fields}" ) if to_db not in ["UniProtKB-Swiss-Prot", "UniProtKB"]: if fields is not None: diff --git a/src/UniProtMapper/interface.py b/src/UniProtMapper/interface.py index 0c1d5e1..710f04d 100644 --- a/src/UniProtMapper/interface.py +++ b/src/UniProtMapper/interface.py @@ -22,6 +22,22 @@ class BaseUniProt(ABC): - BaseUniProt -> ProtKB (UniProtKB API) """ + fields_table = read_fields_table() + default_fields = ( + "accession", + "id", + "gene_names", + "protein_name", + "organism_name", + "organism_id", + "go_id", + "go_p", + "go_c", + "go_f", + "cc_subcellular_location", + "sequence", + ) + def __init__( self, pooling_interval: int = 3, @@ -44,10 +60,20 @@ def __init__( self.session = requests.Session() self._setup_session() self._re_next_link = re.compile(r'<(.+)>; rel="next"') + self._cached_supported_return_fields = None @property - def fields_table(self) -> None: - return read_fields_table() + def supported_return_fields(self) -> list: + """Return a list of the supported fields in UniProtKB & ID mapping API.""" + if self._cached_supported_return_fields is None: + full_version_fields = ( + self.fields_table.query('has_full_version == "yes"')["returned_field"] + + "_full" + ).tolist() + self._cached_supported_return_fields = ( + self.fields_table["returned_field"].tolist() + full_version_fields + ) + return self._cached_supported_return_fields def _setup_retries(self, total_retries, backoff_factor) -> None: return Retry( diff --git a/src/UniProtMapper/resources/uniprot_return_fields.csv b/src/UniProtMapper/resources/uniprot_return_fields.csv index 968d92c..38a504a 100644 --- a/src/UniProtMapper/resources/uniprot_return_fields.csv +++ b/src/UniProtMapper/resources/uniprot_return_fields.csv @@ -1,115 +1,115 @@ label,returned_field,field_type,has_full_version,type -Entry,accession,Names & Taxonomy,yes,uniprot_field -Entry Name,id,Names & Taxonomy,yes,uniprot_field -Gene Names,gene_names,Names & Taxonomy,yes,uniprot_field -Gene Names (primary),gene_primary,Names & Taxonomy,yes,uniprot_field -Gene Names (synonym),gene_synonym,Names & Taxonomy,yes,uniprot_field -Gene Names (ordered locus),gene_oln,Names & Taxonomy,yes,uniprot_field -Gene Names (ORF),gene_orf,Names & Taxonomy,yes,uniprot_field -Organism,organism_name,Names & Taxonomy,yes,uniprot_field -Organism (ID),organism_id,Names & Taxonomy,yes,uniprot_field -Protein names,protein_name,Names & Taxonomy,yes,uniprot_field -Proteomes,xref_proteomes,Names & Taxonomy,yes,uniprot_field -Taxonomic lineage,lineage,Names & Taxonomy,yes,uniprot_field -Taxonomic lineage (IDs),lineage_ids,Names & Taxonomy,yes,uniprot_field -Virus hosts,virus_hosts,Names & Taxonomy,yes,uniprot_field -Alternative products,cc_alternative_products,Sequences,yes,uniprot_field -Alternative sequence,ft_var_seq,Sequences,yes,uniprot_field -Erroneous gene model prediction,error_gmodel_pred,Sequences,yes,uniprot_field -Fragment,fragment,Sequences,yes,uniprot_field -Gene encoded by,organelle,Sequences,yes,uniprot_field -Length,length,Sequences,yes,uniprot_field -Mass,mass,Sequences,yes,uniprot_field -Mass spectrometry,cc_mass_spectrometry,Sequences,yes,uniprot_field -Natural variant,ft_variant,Sequences,yes,uniprot_field -Non-adjacent residues,ft_non_cons,Sequences,yes,uniprot_field -Non-standard residue,ft_non_std,Sequences,yes,uniprot_field -Non-terminal residue,ft_non_ter,Sequences,yes,uniprot_field -Polymorphism,cc_polymorphism,Sequences,yes,uniprot_field -RNA editing,cc_rna_editing,Sequences,yes,uniprot_field -Sequence,sequence,Sequences,yes,uniprot_field -Sequence caution,cc_sequence_caution,Sequences,yes,uniprot_field -Sequence conflict,ft_conflict,Sequences,yes,uniprot_field -Sequence uncertainty,ft_unsure,Sequences,yes,uniprot_field -Sequence version,sequence_version,Sequences,yes,uniprot_field -Absorption,absorption,Function,yes,uniprot_field -Active site,ft_act_site,Function,yes,uniprot_field -Activity regulation,cc_activity_regulation,Function,yes,uniprot_field -Binding site,ft_binding,Function,yes,uniprot_field -Catalytic activity,cc_catalytic_activity,Function,yes,uniprot_field -Cofactor,cc_cofactor,Function,yes,uniprot_field -DNA binding,ft_dna_bind,Function,yes,uniprot_field -EC number,ec,Function,yes,uniprot_field -Function [CC],cc_function,Function,yes,uniprot_field -Kinetics,kinetics,Function,yes,uniprot_field -Pathway,cc_pathway,Function,yes,uniprot_field -pH dependence,ph_dependence,Function,yes,uniprot_field -Redox potential,redox_potential,Function,yes,uniprot_field -Rhea ID,rhea,Function,yes,uniprot_field -Site,ft_site,Function,yes,uniprot_field -Temperature dependence,temp_dependence,Function,yes,uniprot_field -Annotation,annotation_score,Miscellaneous,yes,uniprot_field -Caution,cc_caution,Miscellaneous,yes,uniprot_field -Comment Count,comment_count,Miscellaneous,yes,uniprot_field -Features,feature_count,Miscellaneous,yes,uniprot_field -Keyword ID,keywordid,Miscellaneous,yes,uniprot_field -Keywords,keyword,Miscellaneous,yes,uniprot_field -Miscellaneous [CC],cc_miscellaneous,Miscellaneous,yes,uniprot_field -Protein existence,protein_existence,Miscellaneous,yes,uniprot_field -Reviewed,reviewed,Miscellaneous,yes,uniprot_field -Tools,tools,Miscellaneous,yes,uniprot_field -UniParc,uniparc_id,Miscellaneous,yes,uniprot_field -Interacts with,cc_interaction,Interaction,yes,uniprot_field -Subunit structure [CC],cc_subunit,Interaction,yes,uniprot_field -Developmental stage,cc_developmental_stage,Expression,yes,uniprot_field -Induction,cc_induction,Expression,yes,uniprot_field -Tissue specificity,cc_tissue_specificity,Expression,yes,uniprot_field -Gene Ontology (biological process),go_p,Gene Ontology (GO),yes,uniprot_field -Gene Ontology (cellular component),go_c,Gene Ontology (GO),yes,uniprot_field -Gene Ontology (GO),go,Gene Ontology (GO),yes,uniprot_field -Gene Ontology (molecular function),go_f,Gene Ontology (GO),yes,uniprot_field -Gene Ontology IDs,go_id,Gene Ontology (GO),yes,uniprot_field -Allergenic properties,cc_allergen,Pathology & Biotech,yes,uniprot_field -Biotechnological use,cc_biotechnology,Pathology & Biotech,yes,uniprot_field -Disruption phenotype,cc_disruption_phenotype,Pathology & Biotech,yes,uniprot_field -Involvement in disease,cc_disease,Pathology & Biotech,yes,uniprot_field -Mutagenesis,ft_mutagen,Pathology & Biotech,yes,uniprot_field -Pharmaceutical use,cc_pharmaceutical,Pathology & Biotech,yes,uniprot_field -Toxic dose,cc_toxic_dose,Pathology & Biotech,yes,uniprot_field -Intramembrane,ft_intramem,Subcellular location,yes,uniprot_field -Subcellular location [CC],cc_subcellular_location,Subcellular location,yes,uniprot_field -Topological domain,ft_topo_dom,Subcellular location,yes,uniprot_field -Transmembrane,ft_transmem,Subcellular location,yes,uniprot_field -Chain,ft_chain,PTM / Processsing,yes,uniprot_field -Cross-link,ft_crosslnk,PTM / Processsing,yes,uniprot_field -Disulfide bond,ft_disulfid,PTM / Processsing,yes,uniprot_field -Glycosylation,ft_carbohyd,PTM / Processsing,yes,uniprot_field -Initiator methionine,ft_init_met,PTM / Processsing,yes,uniprot_field -Lipidation,ft_lipid,PTM / Processsing,yes,uniprot_field -Modified residue,ft_mod_res,PTM / Processsing,yes,uniprot_field -Peptide,ft_peptide,PTM / Processsing,yes,uniprot_field -Post-translational modification,cc_ptm,PTM / Processsing,yes,uniprot_field -Propeptide,ft_propep,PTM / Processsing,yes,uniprot_field -Signal peptide,ft_signal,PTM / Processsing,yes,uniprot_field -Transit peptide,ft_transit,PTM / Processsing,yes,uniprot_field -3D,structure_3d,Structure,yes,uniprot_field -Beta strand,ft_strand,Structure,yes,uniprot_field -Helix,ft_helix,Structure,yes,uniprot_field -Turn,ft_turn,Structure,yes,uniprot_field -PubMed ID,lit_pubmed_id,Publications,yes,uniprot_field -Date of creation,date_created,Date of,yes,uniprot_field -Date of last modification,date_modified,Date of,yes,uniprot_field -Date of last sequence modification,date_sequence_modified,Date of,yes,uniprot_field -Entry version,version,Date of,yes,uniprot_field -Coiled coil,ft_coiled,Family & Domains,yes,uniprot_field -Compositional bias,ft_compbias,Family & Domains,yes,uniprot_field -Domain[CC],cc_domain,Family & Domains,yes,uniprot_field -Domain[FT],ft_domain,Family & Domains,yes,uniprot_field -Motif,ft_motif,Family & Domains,yes,uniprot_field -Protein families,protein_families,Family & Domains,yes,uniprot_field -Region,ft_region,Family & Domains,yes,uniprot_field -Repeat,ft_repeat,Family & Domains,yes,uniprot_field -Zinc finger,ft_zn_fing,Family & Domains,yes,uniprot_field +Entry,accession,Names & Taxonomy,-,uniprot_field +Entry Name,id,Names & Taxonomy,-,uniprot_field +Gene Names,gene_names,Names & Taxonomy,-,uniprot_field +Gene Names (primary),gene_primary,Names & Taxonomy,-,uniprot_field +Gene Names (synonym),gene_synonym,Names & Taxonomy,-,uniprot_field +Gene Names (ordered locus),gene_oln,Names & Taxonomy,-,uniprot_field +Gene Names (ORF),gene_orf,Names & Taxonomy,-,uniprot_field +Organism,organism_name,Names & Taxonomy,-,uniprot_field +Organism (ID),organism_id,Names & Taxonomy,-,uniprot_field +Protein names,protein_name,Names & Taxonomy,-,uniprot_field +Proteomes,xref_proteomes,Names & Taxonomy,-,uniprot_field +Taxonomic lineage,lineage,Names & Taxonomy,-,uniprot_field +Taxonomic lineage (IDs),lineage_ids,Names & Taxonomy,-,uniprot_field +Virus hosts,virus_hosts,Names & Taxonomy,-,uniprot_field +Alternative products,cc_alternative_products,Sequences,-,uniprot_field +Alternative sequence,ft_var_seq,Sequences,-,uniprot_field +Erroneous gene model prediction,error_gmodel_pred,Sequences,-,uniprot_field +Fragment,fragment,Sequences,-,uniprot_field +Gene encoded by,organelle,Sequences,-,uniprot_field +Length,length,Sequences,-,uniprot_field +Mass,mass,Sequences,-,uniprot_field +Mass spectrometry,cc_mass_spectrometry,Sequences,-,uniprot_field +Natural variant,ft_variant,Sequences,-,uniprot_field +Non-adjacent residues,ft_non_cons,Sequences,-,uniprot_field +Non-standard residue,ft_non_std,Sequences,-,uniprot_field +Non-terminal residue,ft_non_ter,Sequences,-,uniprot_field +Polymorphism,cc_polymorphism,Sequences,-,uniprot_field +RNA editing,cc_rna_editing,Sequences,-,uniprot_field +Sequence,sequence,Sequences,-,uniprot_field +Sequence caution,cc_sequence_caution,Sequences,-,uniprot_field +Sequence conflict,ft_conflict,Sequences,-,uniprot_field +Sequence uncertainty,ft_unsure,Sequences,-,uniprot_field +Sequence version,sequence_version,Sequences,-,uniprot_field +Absorption,absorption,Function,-,uniprot_field +Active site,ft_act_site,Function,-,uniprot_field +Activity regulation,cc_activity_regulation,Function,-,uniprot_field +Binding site,ft_binding,Function,-,uniprot_field +Catalytic activity,cc_catalytic_activity,Function,-,uniprot_field +Cofactor,cc_cofactor,Function,-,uniprot_field +DNA binding,ft_dna_bind,Function,-,uniprot_field +EC number,ec,Function,-,uniprot_field +Function [CC],cc_function,Function,-,uniprot_field +Kinetics,kinetics,Function,-,uniprot_field +Pathway,cc_pathway,Function,-,uniprot_field +pH dependence,ph_dependence,Function,-,uniprot_field +Redox potential,redox_potential,Function,-,uniprot_field +Rhea ID,rhea,Function,-,uniprot_field +Site,ft_site,Function,-,uniprot_field +Temperature dependence,temp_dependence,Function,-,uniprot_field +Annotation,annotation_score,Miscellaneous,-,uniprot_field +Caution,cc_caution,Miscellaneous,-,uniprot_field +Comment Count,comment_count,Miscellaneous,-,uniprot_field +Features,feature_count,Miscellaneous,-,uniprot_field +Keyword ID,keywordid,Miscellaneous,-,uniprot_field +Keywords,keyword,Miscellaneous,-,uniprot_field +Miscellaneous [CC],cc_miscellaneous,Miscellaneous,-,uniprot_field +Protein existence,protein_existence,Miscellaneous,-,uniprot_field +Reviewed,reviewed,Miscellaneous,-,uniprot_field +Tools,tools,Miscellaneous,-,uniprot_field +UniParc,uniparc_id,Miscellaneous,-,uniprot_field +Interacts with,cc_interaction,Interaction,-,uniprot_field +Subunit structure [CC],cc_subunit,Interaction,-,uniprot_field +Developmental stage,cc_developmental_stage,Expression,-,uniprot_field +Induction,cc_induction,Expression,-,uniprot_field +Tissue specificity,cc_tissue_specificity,Expression,-,uniprot_field +Gene Ontology (biological process),go_p,Gene Ontology (GO),-,uniprot_field +Gene Ontology (cellular component),go_c,Gene Ontology (GO),-,uniprot_field +Gene Ontology (GO),go,Gene Ontology (GO),-,uniprot_field +Gene Ontology (molecular function),go_f,Gene Ontology (GO),-,uniprot_field +Gene Ontology IDs,go_id,Gene Ontology (GO),-,uniprot_field +Allergenic properties,cc_allergen,Pathology & Biotech,-,uniprot_field +Biotechnological use,cc_biotechnology,Pathology & Biotech,-,uniprot_field +Disruption phenotype,cc_disruption_phenotype,Pathology & Biotech,-,uniprot_field +Involvement in disease,cc_disease,Pathology & Biotech,-,uniprot_field +Mutagenesis,ft_mutagen,Pathology & Biotech,-,uniprot_field +Pharmaceutical use,cc_pharmaceutical,Pathology & Biotech,-,uniprot_field +Toxic dose,cc_toxic_dose,Pathology & Biotech,-,uniprot_field +Intramembrane,ft_intramem,Subcellular location,-,uniprot_field +Subcellular location [CC],cc_subcellular_location,Subcellular location,-,uniprot_field +Topological domain,ft_topo_dom,Subcellular location,-,uniprot_field +Transmembrane,ft_transmem,Subcellular location,-,uniprot_field +Chain,ft_chain,PTM / Processsing,-,uniprot_field +Cross-link,ft_crosslnk,PTM / Processsing,-,uniprot_field +Disulfide bond,ft_disulfid,PTM / Processsing,-,uniprot_field +Glycosylation,ft_carbohyd,PTM / Processsing,-,uniprot_field +Initiator methionine,ft_init_met,PTM / Processsing,-,uniprot_field +Lipidation,ft_lipid,PTM / Processsing,-,uniprot_field +Modified residue,ft_mod_res,PTM / Processsing,-,uniprot_field +Peptide,ft_peptide,PTM / Processsing,-,uniprot_field +Post-translational modification,cc_ptm,PTM / Processsing,-,uniprot_field +Propeptide,ft_propep,PTM / Processsing,-,uniprot_field +Signal peptide,ft_signal,PTM / Processsing,-,uniprot_field +Transit peptide,ft_transit,PTM / Processsing,-,uniprot_field +3D,structure_3d,Structure,-,uniprot_field +Beta strand,ft_strand,Structure,-,uniprot_field +Helix,ft_helix,Structure,-,uniprot_field +Turn,ft_turn,Structure,-,uniprot_field +PubMed ID,lit_pubmed_id,Publications,-,uniprot_field +Date of creation,date_created,Date of,-,uniprot_field +Date of last modification,date_modified,Date of,-,uniprot_field +Date of last sequence modification,date_sequence_modified,Date of,-,uniprot_field +Entry version,version,Date of,-,uniprot_field +Coiled coil,ft_coiled,Family & Domains,-,uniprot_field +Compositional bias,ft_compbias,Family & Domains,-,uniprot_field +Domain[CC],cc_domain,Family & Domains,-,uniprot_field +Domain[FT],ft_domain,Family & Domains,-,uniprot_field +Motif,ft_motif,Family & Domains,-,uniprot_field +Protein families,protein_families,Family & Domains,-,uniprot_field +Region,ft_region,Family & Domains,-,uniprot_field +Repeat,ft_repeat,Family & Domains,-,uniprot_field +Zinc finger,ft_zn_fing,Family & Domains,-,uniprot_field CCDS,xref_ccds,Sequences,no,cross_reference EMBL,xref_embl,Sequences,yes,cross_reference PIR,xref_pir,Sequences,yes,cross_reference diff --git a/src/UniProtMapper/uniprotkb_api.py b/src/UniProtMapper/uniprotkb_api.py index 55131d3..1b4de01 100644 --- a/src/UniProtMapper/uniprotkb_api.py +++ b/src/UniProtMapper/uniprotkb_api.py @@ -4,6 +4,7 @@ from logging import info from typing import Generator, List, Optional, Tuple, Union +import numpy as np import pandas as pd import requests from tqdm import tqdm @@ -138,7 +139,7 @@ def _get_batches( def get( self, query: Union[QueryBuilder, str], - fields: Optional[List[str]] = None, + fields: Optional[Union[str, List]] = None, include_isoform: bool = False, compressed: bool = False, size: int = 500, @@ -149,7 +150,11 @@ def get( An example of this would be: Args: - fields: string or QueryBuilder object with the fields to retrieve. + query: Query string or QueryBuilder object (UniProtMapper.uniprot_kb_fields). + fields: list of UniProt return fields to be retrieved. If None, will return the + API's default fields. `default` can also be passsed to access `self.default_fields`. + **Note** parameter not supported for datasets that aren't strictly UniProtKB, + e.g.: UniParc, UniRef... Defaults to None. include_isoform: Whether to include isoforms. Defaults to False compressed: Whether to request compressed response. Defaults to False size: Batch size for pagination. Defaults to 500 @@ -157,6 +162,16 @@ def get( Returns: - DataFrame with the retrieved data """ + if fields is not None: + if fields == "default": + fields = self.default_fields + else: + fields = np.char.lower(np.array(fields)) + if not np.isin(fields, self.supported_return_fields).all(): + raise ValueError( + "Invalid fields. Valid fields are: " + f"{self.supported_return_fields}" + ) if fields is None: info( f"No fields provided. Using default fields: {', '.join(self.default_fields)}"