diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ce31dd61..536eb0ad 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -9,21 +9,32 @@ on: jobs: test: runs-on: ${{ matrix.os }} - env: - NO_ET: 1 - DATACITE_DEV_PASSWORD: ${{ secrets.DATACITE_DEV_PASSWORD }} + defaults: + run: + shell: bash strategy: fail-fast: false matrix: - os: - - windows-latest - - ubuntu-latest - - macos-latest - python: - - 3.9 - - '3.10' - - '3.11' - - '3.12' + os: [windows-latest, ubuntu-latest, macos-latest] + python: ['3.9', '3.10', '3.11', '3.12'] + vendored_env: [unvendored] + include: + - os: ubuntu-latest + python: '3.9' + vendored_env: dandi + instance_name: DANDI + instance_identifier: 'RRID:SCR_017571' + doi_prefix: '10.80507' + - os: ubuntu-latest + python: '3.9' + vendored_env: ember-dandi + instance_name: EMBER-DANDI + instance_identifier: 'RRID:SCR_026700' + doi_prefix: '10.82754' + - os: ubuntu-latest + python: '3.9' + vendored_env: ember-dandi-no-doi + instance_name: EMBER-DANDI steps: - name: Set up environment uses: actions/checkout@v5 @@ -42,7 +53,36 @@ jobs: python -m pip install --upgrade pip wheel python -m pip install --upgrade tox + # Set only if matrix.instance_name is defined + - name: Set DANDI_INSTANCE_NAME + if: ${{ matrix.instance_name }} + run: echo "DANDI_INSTANCE_NAME=${{ matrix.instance_name }}" >> "$GITHUB_ENV" + + # Set only if matrix.instance_identifier is defined + - name: Set DANDI_INSTANCE_IDENTIFIER + if: ${{ matrix.instance_identifier }} + run: echo "DANDI_INSTANCE_IDENTIFIER=${{ matrix.instance_identifier }}" >> "$GITHUB_ENV" + + # Set only if matrix.doi_prefix is defined + - name: Set DANDI_DOI_PREFIX + if: ${{ matrix.doi_prefix }} + run: echo "DANDI_DOI_PREFIX=${{ matrix.doi_prefix }}" >> "$GITHUB_ENV" + + - name: Set DANDI DataCite credentials + if: ${{ matrix.vendored_env == 'dandi' }} + run: | + echo "DATACITE_DEV_LOGIN=${{ secrets.DATACITE_DEV_DANDI_LOGIN }}" >> "$GITHUB_ENV" + echo "DATACITE_DEV_PASSWORD=${{ secrets.DATACITE_DEV_DANDI_PASSWORD }}" >> "$GITHUB_ENV" + + - name: Set EMBER DataCite credentials + if: ${{ matrix.vendored_env == 'ember-dandi' }} + run: | + echo "DATACITE_DEV_LOGIN=${{ secrets.DATACITE_DEV_EMBER_LOGIN }}" >> "$GITHUB_ENV" + echo "DATACITE_DEV_PASSWORD=${{ secrets.DATACITE_DEV_EMBER_PASSWORD }}" >> "$GITHUB_ENV" + - name: Run all tests + env: + NO_ET: 1 run: tox -e py -- -s --cov-report=xml - name: Upload coverage to Codecov diff --git a/README.md b/README.md index d80d8df8..1086dcd3 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,27 @@ Important files in this repository include: - [metadata.py](./dandischema/metadata.py) - contains functions for validating, migrating, and aggregating metadata - [datacite package](./dandischema/datacite) - contains functions for converting Dandiset metadata to DataCite metadata +## Customization with Vendor Information + +The DANDI metadata models defined in this library can be customized with vendor-specific information. +The parameters of the customization are defined by the fields of the `Config` class in +[dandischema/conf.py](./dandischema/conf.py). The `Config` class is a subclass of +[`pydantic_settings.BaseSettings`](https://docs.pydantic.dev/latest/concepts/pydantic_settings/), +and the values of the fields in an instance of the `Config` class can be set through environment +variables and `.env` files, as documented in +[the Pydantic Settings documentation](https://docs.pydantic.dev/latest/concepts/pydantic_settings/). +Specifically, + +- The value of a field is set from an environment variable with the same name, case-insensitively, + as one of the aliases of the field. For example, the `instance_name` field can be set from + the `DANDI_INSTANCE_NAME` or `DJANGO_DANDI_INSTANCE_NAME` environment variable. +- A value of a complex type (e.g., `list`, `set`, `dict`) should be expressed as a JSON-encoded string + in an environment variable. For example, the value for the `licenses` field, which is of + type `set`, can be set from the `DANDI_LICENSES` environment variable defined as the following: + ```shell + export DANDI_LICENSES='["spdx:CC0-1.0", "spdx:CC-BY-4.0"]' + ``` + ## Resources * To learn how to interact with the DANDI archive, diff --git a/dandischema/__init__.py b/dandischema/__init__.py index f2cce782..c2eb4e56 100644 --- a/dandischema/__init__.py +++ b/dandischema/__init__.py @@ -1,4 +1,3 @@ -__all__ = ["__version__", "migrate", "validate"] +__all__ = ["__version__"] from ._version import __version__ -from .metadata import migrate, validate diff --git a/dandischema/_resources/spdx_license_ids.json b/dandischema/_resources/spdx_license_ids.json new file mode 100644 index 00000000..51263914 --- /dev/null +++ b/dandischema/_resources/spdx_license_ids.json @@ -0,0 +1,709 @@ +{ + "source": { + "version": "3.27.0", + "release_date": "2025-07-01T00:00:00Z", + "url": "https://raw.githubusercontent.com/spdx/license-list-data/refs/tags/v3.27.0/json/licenses.json", + "reference": "https://spdx.org/licenses/" + }, + "license_ids": [ + "0BSD", + "3D-Slicer-1.0", + "AAL", + "Abstyles", + "AdaCore-doc", + "Adobe-2006", + "Adobe-Display-PostScript", + "Adobe-Glyph", + "Adobe-Utopia", + "ADSL", + "AFL-1.1", + "AFL-1.2", + "AFL-2.0", + "AFL-2.1", + "AFL-3.0", + "Afmparse", + "AGPL-1.0", + "AGPL-1.0-only", + "AGPL-1.0-or-later", + "AGPL-3.0", + "AGPL-3.0-only", + "AGPL-3.0-or-later", + "Aladdin", + "AMD-newlib", + "AMDPLPA", + "AML", + "AML-glslang", + "AMPAS", + "ANTLR-PD", + "ANTLR-PD-fallback", + "any-OSI", + "any-OSI-perl-modules", + "Apache-1.0", + "Apache-1.1", + "Apache-2.0", + "APAFML", + "APL-1.0", + "App-s2p", + "APSL-1.0", + "APSL-1.1", + "APSL-1.2", + "APSL-2.0", + "Arphic-1999", + "Artistic-1.0", + "Artistic-1.0-cl8", + "Artistic-1.0-Perl", + "Artistic-2.0", + "Artistic-dist", + "Aspell-RU", + "ASWF-Digital-Assets-1.0", + "ASWF-Digital-Assets-1.1", + "Baekmuk", + "Bahyph", + "Barr", + "bcrypt-Solar-Designer", + "Beerware", + "Bitstream-Charter", + "Bitstream-Vera", + "BitTorrent-1.0", + "BitTorrent-1.1", + "blessing", + "BlueOak-1.0.0", + "Boehm-GC", + "Boehm-GC-without-fee", + "Borceux", + "Brian-Gladman-2-Clause", + "Brian-Gladman-3-Clause", + "BSD-1-Clause", + "BSD-2-Clause", + "BSD-2-Clause-Darwin", + "BSD-2-Clause-first-lines", + "BSD-2-Clause-FreeBSD", + "BSD-2-Clause-NetBSD", + "BSD-2-Clause-Patent", + "BSD-2-Clause-pkgconf-disclaimer", + "BSD-2-Clause-Views", + "BSD-3-Clause", + "BSD-3-Clause-acpica", + "BSD-3-Clause-Attribution", + "BSD-3-Clause-Clear", + "BSD-3-Clause-flex", + "BSD-3-Clause-HP", + "BSD-3-Clause-LBNL", + "BSD-3-Clause-Modification", + "BSD-3-Clause-No-Military-License", + "BSD-3-Clause-No-Nuclear-License", + "BSD-3-Clause-No-Nuclear-License-2014", + "BSD-3-Clause-No-Nuclear-Warranty", + "BSD-3-Clause-Open-MPI", + "BSD-3-Clause-Sun", + "BSD-4-Clause", + "BSD-4-Clause-Shortened", + "BSD-4-Clause-UC", + "BSD-4.3RENO", + "BSD-4.3TAHOE", + "BSD-Advertising-Acknowledgement", + "BSD-Attribution-HPND-disclaimer", + "BSD-Inferno-Nettverk", + "BSD-Protection", + "BSD-Source-beginning-file", + "BSD-Source-Code", + "BSD-Systemics", + "BSD-Systemics-W3Works", + "BSL-1.0", + "BUSL-1.1", + "bzip2-1.0.5", + "bzip2-1.0.6", + "C-UDA-1.0", + "CAL-1.0", + "CAL-1.0-Combined-Work-Exception", + "Caldera", + "Caldera-no-preamble", + "Catharon", + "CATOSL-1.1", + "CC-BY-1.0", + "CC-BY-2.0", + "CC-BY-2.5", + "CC-BY-2.5-AU", + "CC-BY-3.0", + "CC-BY-3.0-AT", + "CC-BY-3.0-AU", + "CC-BY-3.0-DE", + "CC-BY-3.0-IGO", + "CC-BY-3.0-NL", + "CC-BY-3.0-US", + "CC-BY-4.0", + "CC-BY-NC-1.0", + "CC-BY-NC-2.0", + "CC-BY-NC-2.5", + "CC-BY-NC-3.0", + "CC-BY-NC-3.0-DE", + "CC-BY-NC-4.0", + "CC-BY-NC-ND-1.0", + "CC-BY-NC-ND-2.0", + "CC-BY-NC-ND-2.5", + "CC-BY-NC-ND-3.0", + "CC-BY-NC-ND-3.0-DE", + "CC-BY-NC-ND-3.0-IGO", + "CC-BY-NC-ND-4.0", + "CC-BY-NC-SA-1.0", + "CC-BY-NC-SA-2.0", + "CC-BY-NC-SA-2.0-DE", + "CC-BY-NC-SA-2.0-FR", + "CC-BY-NC-SA-2.0-UK", + "CC-BY-NC-SA-2.5", + "CC-BY-NC-SA-3.0", + "CC-BY-NC-SA-3.0-DE", + "CC-BY-NC-SA-3.0-IGO", + "CC-BY-NC-SA-4.0", + "CC-BY-ND-1.0", + "CC-BY-ND-2.0", + "CC-BY-ND-2.5", + "CC-BY-ND-3.0", + "CC-BY-ND-3.0-DE", + "CC-BY-ND-4.0", + "CC-BY-SA-1.0", + "CC-BY-SA-2.0", + "CC-BY-SA-2.0-UK", + "CC-BY-SA-2.1-JP", + "CC-BY-SA-2.5", + "CC-BY-SA-3.0", + "CC-BY-SA-3.0-AT", + "CC-BY-SA-3.0-DE", + "CC-BY-SA-3.0-IGO", + "CC-BY-SA-4.0", + "CC-PDDC", + "CC-PDM-1.0", + "CC-SA-1.0", + "CC0-1.0", + "CDDL-1.0", + "CDDL-1.1", + "CDL-1.0", + "CDLA-Permissive-1.0", + "CDLA-Permissive-2.0", + "CDLA-Sharing-1.0", + "CECILL-1.0", + "CECILL-1.1", + "CECILL-2.0", + "CECILL-2.1", + "CECILL-B", + "CECILL-C", + "CERN-OHL-1.1", + "CERN-OHL-1.2", + "CERN-OHL-P-2.0", + "CERN-OHL-S-2.0", + "CERN-OHL-W-2.0", + "CFITSIO", + "check-cvs", + "checkmk", + "ClArtistic", + "Clips", + "CMU-Mach", + "CMU-Mach-nodoc", + "CNRI-Jython", + "CNRI-Python", + "CNRI-Python-GPL-Compatible", + "COIL-1.0", + "Community-Spec-1.0", + "Condor-1.1", + "copyleft-next-0.3.0", + "copyleft-next-0.3.1", + "Cornell-Lossless-JPEG", + "CPAL-1.0", + "CPL-1.0", + "CPOL-1.02", + "Cronyx", + "Crossword", + "CryptoSwift", + "CrystalStacker", + "CUA-OPL-1.0", + "Cube", + "curl", + "cve-tou", + "D-FSL-1.0", + "DEC-3-Clause", + "diffmark", + "DL-DE-BY-2.0", + "DL-DE-ZERO-2.0", + "DOC", + "DocBook-DTD", + "DocBook-Schema", + "DocBook-Stylesheet", + "DocBook-XML", + "Dotseqn", + "DRL-1.0", + "DRL-1.1", + "DSDP", + "dtoa", + "dvipdfm", + "ECL-1.0", + "ECL-2.0", + "eCos-2.0", + "EFL-1.0", + "EFL-2.0", + "eGenix", + "Elastic-2.0", + "Entessa", + "EPICS", + "EPL-1.0", + "EPL-2.0", + "ErlPL-1.1", + "etalab-2.0", + "EUDatagrid", + "EUPL-1.0", + "EUPL-1.1", + "EUPL-1.2", + "Eurosym", + "Fair", + "FBM", + "FDK-AAC", + "Ferguson-Twofish", + "Frameworx-1.0", + "FreeBSD-DOC", + "FreeImage", + "FSFAP", + "FSFAP-no-warranty-disclaimer", + "FSFUL", + "FSFULLR", + "FSFULLRSD", + "FSFULLRWD", + "FSL-1.1-ALv2", + "FSL-1.1-MIT", + "FTL", + "Furuseth", + "fwlw", + "Game-Programming-Gems", + "GCR-docs", + "GD", + "generic-xts", + "GFDL-1.1", + "GFDL-1.1-invariants-only", + "GFDL-1.1-invariants-or-later", + "GFDL-1.1-no-invariants-only", + "GFDL-1.1-no-invariants-or-later", + "GFDL-1.1-only", + "GFDL-1.1-or-later", + "GFDL-1.2", + "GFDL-1.2-invariants-only", + "GFDL-1.2-invariants-or-later", + "GFDL-1.2-no-invariants-only", + "GFDL-1.2-no-invariants-or-later", + "GFDL-1.2-only", + "GFDL-1.2-or-later", + "GFDL-1.3", + "GFDL-1.3-invariants-only", + "GFDL-1.3-invariants-or-later", + "GFDL-1.3-no-invariants-only", + "GFDL-1.3-no-invariants-or-later", + "GFDL-1.3-only", + "GFDL-1.3-or-later", + "Giftware", + "GL2PS", + "Glide", + "Glulxe", + "GLWTPL", + "gnuplot", + "GPL-1.0", + "GPL-1.0+", + "GPL-1.0-only", + "GPL-1.0-or-later", + "GPL-2.0", + "GPL-2.0+", + "GPL-2.0-only", + "GPL-2.0-or-later", + "GPL-2.0-with-autoconf-exception", + "GPL-2.0-with-bison-exception", + "GPL-2.0-with-classpath-exception", + "GPL-2.0-with-font-exception", + "GPL-2.0-with-GCC-exception", + "GPL-3.0", + "GPL-3.0+", + "GPL-3.0-only", + "GPL-3.0-or-later", + "GPL-3.0-with-autoconf-exception", + "GPL-3.0-with-GCC-exception", + "Graphics-Gems", + "gSOAP-1.3b", + "gtkbook", + "Gutmann", + "HaskellReport", + "HDF5", + "hdparm", + "HIDAPI", + "Hippocratic-2.1", + "HP-1986", + "HP-1989", + "HPND", + "HPND-DEC", + "HPND-doc", + "HPND-doc-sell", + "HPND-export-US", + "HPND-export-US-acknowledgement", + "HPND-export-US-modify", + "HPND-export2-US", + "HPND-Fenneberg-Livingston", + "HPND-INRIA-IMAG", + "HPND-Intel", + "HPND-Kevlin-Henney", + "HPND-Markus-Kuhn", + "HPND-merchantability-variant", + "HPND-MIT-disclaimer", + "HPND-Netrek", + "HPND-Pbmplus", + "HPND-sell-MIT-disclaimer-xserver", + "HPND-sell-regexpr", + "HPND-sell-variant", + "HPND-sell-variant-MIT-disclaimer", + "HPND-sell-variant-MIT-disclaimer-rev", + "HPND-UC", + "HPND-UC-export-US", + "HTMLTIDY", + "IBM-pibs", + "ICU", + "IEC-Code-Components-EULA", + "IJG", + "IJG-short", + "ImageMagick", + "iMatix", + "Imlib2", + "Info-ZIP", + "Inner-Net-2.0", + "InnoSetup", + "Intel", + "Intel-ACPI", + "Interbase-1.0", + "IPA", + "IPL-1.0", + "ISC", + "ISC-Veillard", + "Jam", + "JasPer-2.0", + "jove", + "JPL-image", + "JPNIC", + "JSON", + "Kastrup", + "Kazlib", + "Knuth-CTAN", + "LAL-1.2", + "LAL-1.3", + "Latex2e", + "Latex2e-translated-notice", + "Leptonica", + "LGPL-2.0", + "LGPL-2.0+", + "LGPL-2.0-only", + "LGPL-2.0-or-later", + "LGPL-2.1", + "LGPL-2.1+", + "LGPL-2.1-only", + "LGPL-2.1-or-later", + "LGPL-3.0", + "LGPL-3.0+", + "LGPL-3.0-only", + "LGPL-3.0-or-later", + "LGPLLR", + "Libpng", + "libpng-1.6.35", + "libpng-2.0", + "libselinux-1.0", + "libtiff", + "libutil-David-Nugent", + "LiLiQ-P-1.1", + "LiLiQ-R-1.1", + "LiLiQ-Rplus-1.1", + "Linux-man-pages-1-para", + "Linux-man-pages-copyleft", + "Linux-man-pages-copyleft-2-para", + "Linux-man-pages-copyleft-var", + "Linux-OpenIB", + "LOOP", + "LPD-document", + "LPL-1.0", + "LPL-1.02", + "LPPL-1.0", + "LPPL-1.1", + "LPPL-1.2", + "LPPL-1.3a", + "LPPL-1.3c", + "lsof", + "Lucida-Bitmap-Fonts", + "LZMA-SDK-9.11-to-9.20", + "LZMA-SDK-9.22", + "Mackerras-3-Clause", + "Mackerras-3-Clause-acknowledgment", + "magaz", + "mailprio", + "MakeIndex", + "man2html", + "Martin-Birgmeier", + "McPhee-slideshow", + "metamail", + "Minpack", + "MIPS", + "MirOS", + "MIT", + "MIT-0", + "MIT-advertising", + "MIT-Click", + "MIT-CMU", + "MIT-enna", + "MIT-feh", + "MIT-Festival", + "MIT-Khronos-old", + "MIT-Modern-Variant", + "MIT-open-group", + "MIT-testregex", + "MIT-Wu", + "MITNFA", + "MMIXware", + "Motosoto", + "MPEG-SSG", + "mpi-permissive", + "mpich2", + "MPL-1.0", + "MPL-1.1", + "MPL-2.0", + "MPL-2.0-no-copyleft-exception", + "mplus", + "MS-LPL", + "MS-PL", + "MS-RL", + "MTLL", + "MulanPSL-1.0", + "MulanPSL-2.0", + "Multics", + "Mup", + "NAIST-2003", + "NASA-1.3", + "Naumen", + "NBPL-1.0", + "NCBI-PD", + "NCGL-UK-2.0", + "NCL", + "NCSA", + "Net-SNMP", + "NetCDF", + "Newsletr", + "NGPL", + "ngrep", + "NICTA-1.0", + "NIST-PD", + "NIST-PD-fallback", + "NIST-Software", + "NLOD-1.0", + "NLOD-2.0", + "NLPL", + "Nokia", + "NOSL", + "Noweb", + "NPL-1.0", + "NPL-1.1", + "NPOSL-3.0", + "NRL", + "NTIA-PD", + "NTP", + "NTP-0", + "Nunit", + "O-UDA-1.0", + "OAR", + "OCCT-PL", + "OCLC-2.0", + "ODbL-1.0", + "ODC-By-1.0", + "OFFIS", + "OFL-1.0", + "OFL-1.0-no-RFN", + "OFL-1.0-RFN", + "OFL-1.1", + "OFL-1.1-no-RFN", + "OFL-1.1-RFN", + "OGC-1.0", + "OGDL-Taiwan-1.0", + "OGL-Canada-2.0", + "OGL-UK-1.0", + "OGL-UK-2.0", + "OGL-UK-3.0", + "OGTSL", + "OLDAP-1.1", + "OLDAP-1.2", + "OLDAP-1.3", + "OLDAP-1.4", + "OLDAP-2.0", + "OLDAP-2.0.1", + "OLDAP-2.1", + "OLDAP-2.2", + "OLDAP-2.2.1", + "OLDAP-2.2.2", + "OLDAP-2.3", + "OLDAP-2.4", + "OLDAP-2.5", + "OLDAP-2.6", + "OLDAP-2.7", + "OLDAP-2.8", + "OLFL-1.3", + "OML", + "OpenPBS-2.3", + "OpenSSL", + "OpenSSL-standalone", + "OpenVision", + "OPL-1.0", + "OPL-UK-3.0", + "OPUBL-1.0", + "OSET-PL-2.1", + "OSL-1.0", + "OSL-1.1", + "OSL-2.0", + "OSL-2.1", + "OSL-3.0", + "PADL", + "Parity-6.0.0", + "Parity-7.0.0", + "PDDL-1.0", + "PHP-3.0", + "PHP-3.01", + "Pixar", + "pkgconf", + "Plexus", + "pnmstitch", + "PolyForm-Noncommercial-1.0.0", + "PolyForm-Small-Business-1.0.0", + "PostgreSQL", + "PPL", + "PSF-2.0", + "psfrag", + "psutils", + "Python-2.0", + "Python-2.0.1", + "python-ldap", + "Qhull", + "QPL-1.0", + "QPL-1.0-INRIA-2004", + "radvd", + "Rdisc", + "RHeCos-1.1", + "RPL-1.1", + "RPL-1.5", + "RPSL-1.0", + "RSA-MD", + "RSCPL", + "Ruby", + "Ruby-pty", + "SAX-PD", + "SAX-PD-2.0", + "Saxpath", + "SCEA", + "SchemeReport", + "Sendmail", + "Sendmail-8.23", + "Sendmail-Open-Source-1.1", + "SGI-B-1.0", + "SGI-B-1.1", + "SGI-B-2.0", + "SGI-OpenGL", + "SGP4", + "SHL-0.5", + "SHL-0.51", + "SimPL-2.0", + "SISSL", + "SISSL-1.2", + "SL", + "Sleepycat", + "SMAIL-GPL", + "SMLNJ", + "SMPPL", + "SNIA", + "snprintf", + "SOFA", + "softSurfer", + "Soundex", + "Spencer-86", + "Spencer-94", + "Spencer-99", + "SPL-1.0", + "ssh-keyscan", + "SSH-OpenSSH", + "SSH-short", + "SSLeay-standalone", + "SSPL-1.0", + "StandardML-NJ", + "SugarCRM-1.1.3", + "SUL-1.0", + "Sun-PPP", + "Sun-PPP-2000", + "SunPro", + "SWL", + "swrule", + "Symlinks", + "TAPR-OHL-1.0", + "TCL", + "TCP-wrappers", + "TermReadKey", + "TGPPL-1.0", + "ThirdEye", + "threeparttable", + "TMate", + "TORQUE-1.1", + "TOSL", + "TPDL", + "TPL-1.0", + "TrustedQSL", + "TTWL", + "TTYP0", + "TU-Berlin-1.0", + "TU-Berlin-2.0", + "Ubuntu-font-1.0", + "UCAR", + "UCL-1.0", + "ulem", + "UMich-Merit", + "Unicode-3.0", + "Unicode-DFS-2015", + "Unicode-DFS-2016", + "Unicode-TOU", + "UnixCrypt", + "Unlicense", + "Unlicense-libtelnet", + "Unlicense-libwhirlpool", + "UPL-1.0", + "URT-RLE", + "Vim", + "VOSTROM", + "VSL-1.0", + "W3C", + "W3C-19980720", + "W3C-20150513", + "w3m", + "Watcom-1.0", + "Widget-Workshop", + "Wsuipa", + "WTFPL", + "wwl", + "wxWindows", + "X11", + "X11-distribute-modifications-variant", + "X11-swapped", + "Xdebug-1.03", + "Xerox", + "Xfig", + "XFree86-1.1", + "xinetd", + "xkeyboard-config-Zinoviev", + "xlock", + "Xnet", + "xpp", + "XSkat", + "xzoom", + "YPL-1.0", + "YPL-1.1", + "Zed", + "Zeeff", + "Zend-2.0", + "Zimbra-1.3", + "Zimbra-1.4", + "Zlib", + "zlib-acknowledgement", + "ZPL-1.1", + "ZPL-2.0", + "ZPL-2.1" + ] +} diff --git a/dandischema/conf.py b/dandischema/conf.py new file mode 100644 index 00000000..3fb77dc4 --- /dev/null +++ b/dandischema/conf.py @@ -0,0 +1,332 @@ +# This file defines the configuration for the DANDI schema + +from __future__ import annotations + +from datetime import datetime +from enum import Enum +from importlib.resources import files +import logging +from typing import TYPE_CHECKING, Annotated, Any, Optional, Union + +from pydantic import ( + AliasChoices, + AnyHttpUrl, + AnyUrl, + BaseModel, + Field, + StringConstraints, + model_validator, +) +from pydantic.fields import FieldInfo +from pydantic_settings import ( + BaseSettings, + PydanticBaseSettingsSource, + SettingsConfigDict, +) +from typing_extensions import Self + +_MODELS_MODULE_NAME = "dandischema.models" +"""The full import name of the module containing the DANDI Pydantic models""" + +INSTANCE_IDENTIFIER_PATTERN = r"RRID:\S+" +""" +The pattern of the ID identifying the DANDI service instance + +Note +---- + This pattern currently only allows Research Resource Identifiers (RRIDs). +""" + +UNVENDORED_ID_PATTERN = r"[A-Z][-A-Z]*" +UNVENDORED_DOI_PREFIX_PATTERN = r"10\.\d{4,}" + +logger = logging.getLogger(__name__) + +DEFAULT_INSTANCE_NAME = "DANDI-ADHOC" +""" +The default name of the DANDI instance +""" + + +class SpdxLicenseListInfo(BaseModel): + """ + Represents information about the SPDX License List. + """ + + version: str + release_date: datetime + url: AnyUrl + reference: AnyUrl = AnyUrl("https://spdx.org/licenses/") + + +class SpdxLicenseIdList(BaseModel): + """ + Represents a list of SPDX license IDs. + """ + + source: SpdxLicenseListInfo + license_ids: list[str] + + +_license_id_file_path = files(__package__) / "_resources" / "spdx_license_ids.json" + +_spdx_license_id_list = SpdxLicenseIdList.model_validate_json( + _license_id_file_path.read_text() +) + +if TYPE_CHECKING: + # This is just a placeholder for static type checking + class License(Enum): + ... # fmt: skip + +else: + License = Enum( + "License", + [("spdx:" + id_,) * 2 for id_ in _spdx_license_id_list.license_ids], + ) + + +class Config(BaseSettings): + """ + Configuration for the DANDI schema + + Note + ---- + Since this class is subclass of `pydantic.BaseSettings`, each field of an + instance of this class can be populated from an environment variable of the + same name prefixed with the prefix defined in `model_config` with the name + of the environment variable interpreted **case-insensitively**. + For details, see https://docs.pydantic.dev/latest/concepts/pydantic_settings/ + """ + + model_config = SettingsConfigDict( + # TODO: currently `validate_by_name` is set to `False` because of the limitation + # imposed by a bug, https://github.com/pydantic/pydantic/issues/12191, at + # Pydantic. Once that bug is fixed, we should considered setting + # `validate_by_name` to `True` and redefine the fields to allow population + # of field values by field names. + validate_by_name=False, + validate_by_alias=True, + ) + + instance_name: Annotated[ + str, + StringConstraints(pattern=rf"^{UNVENDORED_ID_PATTERN}$"), + Field( + validation_alias=AliasChoices( + "dandi_instance_name", "django_dandi_instance_name" + ) + ), + ] = DEFAULT_INSTANCE_NAME + """Name of the DANDI instance""" + + instance_identifier: Optional[ + Annotated[ + str, + StringConstraints(pattern=rf"^{INSTANCE_IDENTIFIER_PATTERN}$"), + ] + ] = Field( + default=None, + validation_alias=AliasChoices( + "dandi_instance_identifier", "django_dandi_instance_identifier" + ), + ) + """ + ID identifying the DANDI service instance + + Note + ---- + This field currently only accepts Research Resource Identifiers (RRIDs). + """ + + instance_url: Optional[AnyHttpUrl] = Field( + default=None, + validation_alias=AliasChoices("dandi_instance_url", "django_dandi_web_app_url"), + ) + """ + The URL of the DANDI instance + """ + + doi_prefix: Optional[ + Annotated[str, StringConstraints(pattern=rf"^{UNVENDORED_DOI_PREFIX_PATTERN}$")] + ] = Field( + default=None, + validation_alias=AliasChoices( + "dandi_doi_prefix", "django_dandi_doi_api_prefix" + ), + ) + """ + The DOI prefix at DataCite + """ + + licenses: set[License] = Field( + default={License("spdx:CC0-1.0"), License("spdx:CC-BY-4.0")}, + validation_alias=AliasChoices("dandi_licenses", "django_dandi_licenses"), + ) + """ + Set of licenses to be supported by the DANDI instance + + Currently, the values for this set must be the identifier of a license in the + list at https://spdx.org/licenses/ prefixed with "spdx:" when set with the + corresponding environment variable. E.g. + + ```shell + export DANDI_LICENSES='["spdx:CC0-1.0", "spdx:CC-BY-4.0"]' + ``` + """ + + @model_validator(mode="after") + def _ensure_non_none_instance_identifier_if_non_none_doi_prefix( + self, + ) -> Self: + """ + Ensure that if `doi_prefix` is not `None`, then `instance_identifier` + must not be `None`. + """ + + if self.doi_prefix is not None and self.instance_identifier is None: + raise ValueError( + "If `doi_prefix` is set (not `None`), " + "`instance_identifier` must also be set." + ) + return self + + # This is a workaround for the limitation imposed by the bug at + # https://github.com/pydantic/pydantic/issues/12191 mentioned above. + # TODO: This will no longer be needed once that bug is fixed and + # should be removed along with other workarounds in this model because + # of that bug. + @classmethod + def settings_customise_sources( + cls, + settings_cls: type[BaseSettings], + init_settings: PydanticBaseSettingsSource, + env_settings: PydanticBaseSettingsSource, + dotenv_settings: PydanticBaseSettingsSource, + file_secret_settings: PydanticBaseSettingsSource, + ) -> tuple[PydanticBaseSettingsSource, ...]: + def wrap(source: PydanticBaseSettingsSource) -> PydanticBaseSettingsSource: + class Wrapped(PydanticBaseSettingsSource): + def get_field_value( + self, field: FieldInfo, field_name: str + ) -> tuple[Any, str, bool]: + raise NotImplementedError( + "If this method is ever called, there is a bug" + ) + + def __call__(self) -> dict[str, Any]: + result = source().copy() + for field_name in cls.model_fields: + if field_name in result: + alias = f"dandi_{field_name}" + # This overwrites the `alias` key if it already exists + result[alias] = result[field_name] + del result[field_name] + return result + + return Wrapped(settings_cls) + + return ( + wrap(init_settings), + env_settings, + dotenv_settings, + file_secret_settings, + ) + + +_instance_config = Config() # Initial value is set by env vars alone +""" +Configuration of the DANDI instance + +This configuration holds the information used to customize the DANDI schema to a +specific vendor, but it should not be accessed directly. Use `get_instance_config()` +to obtain its value and `set_instance_config()` to set its value. +""" + + +def get_instance_config() -> Config: + """ + Get the configuration of the DANDI instance + + This configuration holds the information used to customize the DANDI schema to a + specific vendor. + + Returns + ------- + Config + The configuration of the DANDI instance + """ + return _instance_config.model_copy(deep=True) + + +def set_instance_config( + config: Optional[Union[Config, dict]] = None, /, **kwargs: Any +) -> None: + """ + Set the DANDI instance configuration returned by `get_instance_config()` + + This setting is done by creating a new instance of `Config` with the positional + argument of type of `Config` or `dict` or the keyword + arguments passed to this function and overwriting the existing one. + + Parameters + ---------- + config : Optional[Union[Config, dict]], optional + An instance of `Config` or a dictionary with the configuration. If an instance + of `Config` is provided, a copy will be made to use to set the DANDI instance + configuration. If a dictionary is provided, it will be validated and converted + to an instance of `Config`. If this argument is provided, no keyword arguments + should be provided. Defaults to `None`. + **kwargs + Keyword arguments to pass to `Config.model_validate()` to create a new + instance of `Config` to set the DANDI instance configuration. + + Raises + ------ + ValueError + If both a non-none positional argument and keyword arguments are provided + + Note + ---- + Use this function to override the initial configuration set by the environment + variables. + + This function should be called before importing `dandischema.models` or the + new configuration will not have any affect in the models defined in + `dandischema.models`. + + """ + if config is not None and kwargs: + raise ValueError( + "Either a positional argument or a set of keyword arguments should be " + "provided, but not both." + ) + + import sys + + global _instance_config + + if config is not None: + if isinstance(config, Config): + new_config = config.model_copy(deep=True) + else: + new_config = Config.model_validate(config) + else: + new_config = Config.model_validate(kwargs) + + if _MODELS_MODULE_NAME in sys.modules: + if new_config != _instance_config: + logger.warning( + f"`{_MODELS_MODULE_NAME}` is already imported. Resetting the DANDI " + f"instance configuration to a different value will not have any affect " + f"in the models defined in `{_MODELS_MODULE_NAME}`." + ) + else: + logger.debug( + f"`{_MODELS_MODULE_NAME}` is already imported. Attempt to " + f"reset the DANDI instance configuration to the same value by " + f"keyword argument has been ignored." + ) + return + + _instance_config = new_config diff --git a/dandischema/datacite/__init__.py b/dandischema/datacite/__init__.py index c9756c66..72536821 100644 --- a/dandischema/datacite/__init__.py +++ b/dandischema/datacite/__init__.py @@ -13,6 +13,8 @@ from jsonschema import Draft7Validator +from dandischema.conf import get_instance_config + from ..models import ( NAME_PATTERN, LicenseType, @@ -117,6 +119,9 @@ def to_datacite( publish: bool = False, ) -> dict: """Convert published Dandiset metadata to Datacite""" + + instance_config = get_instance_config() + if not isinstance(meta, PublishedDandiset): meta = PublishedDandiset(**meta) @@ -142,13 +147,21 @@ def to_datacite( attributes["descriptions"] = [ {"description": meta.description, "descriptionType": "Abstract"} ] + + # Populate publisher info attributes["publisher"] = { - "name": "DANDI Archive", - "schemeUri": "https://scicrunch.org/resolver/", - "publisherIdentifier": "https://scicrunch.org/resolver/RRID:SCR_017571", - "publisherIdentifierScheme": "RRID", + "name": f"{instance_config.instance_name} Archive", "lang": "en", } + if instance_config.instance_identifier: + attributes["publisher"].update( + { + "schemeUri": "https://scicrunch.org/resolver/", + "publisherIdentifier": f"https://scicrunch.org/resolver/{instance_config.instance_identifier}", + "publisherIdentifierScheme": "RRID", + } + ) + attributes["publicationYear"] = str(meta.datePublished.year) # not sure about it dandi-api had "resourceTypeGeneral": "NWB" attributes["types"] = { diff --git a/dandischema/datacite/tests/test_datacite.py b/dandischema/datacite/tests/test_datacite.py index dc62e951..65e12e29 100644 --- a/dandischema/datacite/tests/test_datacite.py +++ b/dandischema/datacite/tests/test_datacite.py @@ -1,7 +1,6 @@ from enum import Enum import json import os -from pathlib import Path import random from typing import TYPE_CHECKING, Any, Dict, Tuple, cast @@ -9,6 +8,7 @@ import pytest import requests +from dandischema.conf import get_instance_config from dandischema.models import ( LicenseType, PublishedDandiset, @@ -16,11 +16,21 @@ ResourceType, RoleType, ) -import dandischema.tests -from dandischema.tests.utils import _basic_publishmeta, skipif_no_network +from dandischema.tests.utils import ( + DANDISET_METADATA_DIR, + DOI_PREFIX, + INSTANCE_NAME, + basic_publishmeta, + skipif_no_datacite_auth, + skipif_no_doi_prefix, + skipif_no_network, + skipif_no_test_dandiset_metadata_dir, +) from .. import _get_datacite_schema, _licenses_to_rights_list, to_datacite +_INSTANCE_CONFIG = get_instance_config() + class TestLicensesToRightsList: """ @@ -144,7 +154,7 @@ def datacite_post(datacite: dict, doi: str) -> None: "https://api.test.datacite.org/dois", json=datacite, headers={"Content-Type": "application/vnd.api+json"}, - auth=("DARTLIB.DANDI", os.environ["DATACITE_DEV_PASSWORD"]), + auth=(os.environ["DATACITE_DEV_LOGIN"], os.environ["DATACITE_DEV_PASSWORD"]), ) rp.raise_for_status() @@ -160,7 +170,7 @@ def _clean_doi(doi: str) -> None: """Remove doi. Status code is ignored""" requests.delete( f"https://api.test.datacite.org/dois/{doi}", - auth=("DARTLIB.DANDI", os.environ["DATACITE_DEV_PASSWORD"]), + auth=(os.environ["DATACITE_DEV_LOGIN"], os.environ["DATACITE_DEV_PASSWORD"]), ) @@ -172,7 +182,7 @@ def schema() -> Any: @pytest.fixture(scope="function") def metadata_basic() -> Dict[str, Any]: dandi_id_noprefix = f"000{random.randrange(100, 999)}" - dandi_id = f"DANDI:{dandi_id_noprefix}" + dandi_id = f"{INSTANCE_NAME}:{dandi_id_noprefix}" version = "0.0.0" # meta data without doi, datePublished and publishedBy meta_dict = { @@ -215,19 +225,17 @@ def metadata_basic() -> Dict[str, Any]: @skipif_no_network -@pytest.mark.skipif( - not os.getenv("DATACITE_DEV_PASSWORD"), reason="no datacite password available" -) +@skipif_no_datacite_auth +@skipif_no_doi_prefix +@skipif_no_test_dandiset_metadata_dir @pytest.mark.parametrize("dandi_id", ["000004", "000008"]) def test_datacite(dandi_id: str, schema: Any) -> None: """checking to_datacite for a specific datasets""" + assert DOI_PREFIX is not None + # reading metadata taken from exemplary dandisets and saved in json files - with ( - Path(dandischema.tests.__file__).with_name("data") - / "metadata" - / f"meta_{dandi_id}.json" - ).open() as f: + with (DANDISET_METADATA_DIR / f"meta_{dandi_id}.json").open() as f: meta_js = json.load(f) version = "0.0.0" @@ -237,7 +245,11 @@ def test_datacite(dandi_id: str, schema: Any) -> None: # updating with basic fields required for PublishDandiset meta_js.update( - _basic_publishmeta(dandi_id.replace("000", str(random.randrange(100, 999)))) + basic_publishmeta( + INSTANCE_NAME, + dandi_id.replace("000", str(random.randrange(100, 999))), + prefix=DOI_PREFIX, + ) ) meta = PublishedDandiset(**meta_js) @@ -265,8 +277,9 @@ def test_datacite(dandi_id: str, schema: Any) -> None: "publisher": ( None, { - "name": "DANDI Archive", - "publisherIdentifier": "https://scicrunch.org/resolver/RRID:SCR_017571", + "name": f"{_INSTANCE_CONFIG.instance_name} Archive", + "publisherIdentifier": f"https://scicrunch.org/resolver/" + f"{_INSTANCE_CONFIG.instance_identifier}", "publisherIdentifierScheme": "RRID", "schemeUri": "https://scicrunch.org/resolver/", "lang": "en", @@ -473,9 +486,8 @@ def test_datacite(dandi_id: str, schema: Any) -> None: ), ], ) -@pytest.mark.skipif( - not os.getenv("DATACITE_DEV_PASSWORD"), reason="no datacite password available" -) +@skipif_no_datacite_auth +@skipif_no_doi_prefix def test_dandimeta_datacite( schema: Any, metadata_basic: Dict[str, Any], @@ -487,10 +499,14 @@ def test_dandimeta_datacite( posting datacite object and checking the status code """ + assert DOI_PREFIX is not None + dandi_id = metadata_basic["identifier"] dandi_id_noprefix = dandi_id.split(":")[1] - metadata_basic.update(_basic_publishmeta(dandi_id=dandi_id_noprefix)) + metadata_basic.update( + basic_publishmeta(INSTANCE_NAME, dandi_id=dandi_id_noprefix, prefix=DOI_PREFIX) + ) metadata_basic.update(additional_meta) # creating and validating datacite objects @@ -519,11 +535,16 @@ def test_dandimeta_datacite( datacite_post(datacite, metadata_basic["doi"]) +@skipif_no_doi_prefix def test_datacite_publish(metadata_basic: Dict[str, Any]) -> None: + assert DOI_PREFIX is not None + dandi_id = metadata_basic["identifier"] dandi_id_noprefix = dandi_id.split(":")[1] version = metadata_basic["version"] - metadata_basic.update(_basic_publishmeta(dandi_id=dandi_id_noprefix)) + metadata_basic.update( + basic_publishmeta(INSTANCE_NAME, dandi_id=dandi_id_noprefix, prefix=DOI_PREFIX) + ) # creating and validating datacite objects datacite = to_datacite(metadata_basic, publish=True, validate=True) @@ -531,7 +552,7 @@ def test_datacite_publish(metadata_basic: Dict[str, Any]) -> None: assert datacite == { # 'data': {} "data": { - "id": f"10.80507/dandi.{dandi_id_noprefix}/{version}", + "id": f"{DOI_PREFIX}/{INSTANCE_NAME.lower()}.{dandi_id_noprefix}/{version}", "type": "dois", "attributes": { "event": "publish", @@ -561,7 +582,10 @@ def test_datacite_publish(metadata_basic: Dict[str, Any]) -> None: "descriptions": [ {"description": "testing", "descriptionType": "Abstract"} ], - "doi": f"10.80507/dandi.{dandi_id_noprefix}/{version}", + "doi": ( + f"{DOI_PREFIX}/" + f"{INSTANCE_NAME.lower()}.{dandi_id_noprefix}/{version}" + ), "alternateIdentifiers": [ { "alternateIdentifier": f"https://identifiers.org/{dandi_id}/{version}", @@ -577,8 +601,9 @@ def test_datacite_publish(metadata_basic: Dict[str, Any]) -> None: ], "publicationYear": "1970", "publisher": { - "name": "DANDI Archive", - "publisherIdentifier": "https://scicrunch.org/resolver/RRID:SCR_017571", + "name": f"{_INSTANCE_CONFIG.instance_name} Archive", + "publisherIdentifier": f"https://scicrunch.org/resolver/" + f"{_INSTANCE_CONFIG.instance_identifier}", "publisherIdentifierScheme": "RRID", "schemeUri": "https://scicrunch.org/resolver/", "lang": "en", @@ -638,9 +663,7 @@ def test_datacite_publish(metadata_basic: Dict[str, Any]) -> None: ), ], ) -@pytest.mark.skipif( - not os.getenv("DATACITE_DEV_PASSWORD"), reason="no datacite password available" -) +@skipif_no_doi_prefix def test_datacite_related_res_url( metadata_basic: Dict[str, Any], related_res_url: Dict[str, Any], @@ -650,10 +673,14 @@ def test_datacite_related_res_url( checking if urls provided in the relatedResource.identifier could be translated to DOI for some websites: e.g. bioarxiv.org, doi.org """ + assert DOI_PREFIX is not None + dandi_id = metadata_basic["identifier"] dandi_id_noprefix = dandi_id.split(":")[1] - metadata_basic.update(_basic_publishmeta(dandi_id=dandi_id_noprefix)) + metadata_basic.update( + basic_publishmeta(INSTANCE_NAME, dandi_id=dandi_id_noprefix, prefix=DOI_PREFIX) + ) metadata_basic["relatedResource"] = [related_res_url] # creating and validating datacite objects diff --git a/dandischema/metadata.py b/dandischema/metadata.py index 5be9294b..ebf68e41 100644 --- a/dandischema/metadata.py +++ b/dandischema/metadata.py @@ -91,7 +91,7 @@ def generate_context() -> dict: "@id": cast(str, field.json_schema_extra["nskey"]) + ":" + name } else: - fields[name] = {"@id": "dandi:" + name} + fields[name] = {"@id": f"{models.DANDI_NSKEY}:{name}"} # The annotation without the top-level optional stripped_annotation = strip_top_level_optional(field.annotation) @@ -123,8 +123,8 @@ def generate_context() -> dict: for item in models.DigestType: fields[item.value] = {"@id": item.value, "@nest": "digest"} - fields["Dandiset"] = "dandi:Dandiset" - fields["Asset"] = "dandi:Asset" + fields["Dandiset"] = f"{models.DANDI_NSKEY}:Dandiset" + fields["Asset"] = f"{models.DANDI_NSKEY}:Asset" fields = {k: fields[k] for k in sorted(fields)} field_preamble.update(**fields) return {"@context": field_preamble} diff --git a/dandischema/models.py b/dandischema/models.py index 637a804d..b35275fe 100644 --- a/dandischema/models.py +++ b/dandischema/models.py @@ -2,9 +2,9 @@ from datetime import date, datetime from enum import Enum -import os import re from typing import ( + TYPE_CHECKING, Annotated, Any, Dict, @@ -28,7 +28,6 @@ GetJsonSchemaHandler, SerializerFunctionWrapHandler, StringConstraints, - TypeAdapter, ValidationInfo, field_serializer, field_validator, @@ -38,6 +37,13 @@ from pydantic_core import CoreSchema from zarr_checksum.checksum import InvalidZarrChecksum, ZarrDirectoryDigest +from dandischema.conf import ( + DEFAULT_INSTANCE_NAME, + UNVENDORED_DOI_PREFIX_PATTERN, + UNVENDORED_ID_PATTERN, + get_instance_config, +) + from .consts import DANDI_SCHEMA_VERSION from .digests.dandietag import DandiETag from .types import ByteSizeJsonSchema @@ -50,16 +56,29 @@ else: _has_anys = True -# Use DJANGO_DANDI_WEB_APP_URL to point to a specific deployment. -DANDI_INSTANCE_URL: Optional[str] -try: - DANDI_INSTANCE_URL = os.environ["DJANGO_DANDI_WEB_APP_URL"] -except KeyError: - DANDI_INSTANCE_URL = None - DANDI_INSTANCE_URL_PATTERN = ".*" -else: - # Ensure no trailing / for consistency - DANDI_INSTANCE_URL_PATTERN = re.escape(DANDI_INSTANCE_URL.rstrip("/")) +# Load needed configurations into constants +_INSTANCE_CONFIG = get_instance_config() + +# Regex pattern for the prefix of identifiers +ID_PATTERN = ( + _INSTANCE_CONFIG.instance_name + if _INSTANCE_CONFIG.instance_name != DEFAULT_INSTANCE_NAME + else UNVENDORED_ID_PATTERN +) + +# The pattern that a DOI prefix of a dandiset must conform to +DOI_PREFIX_PATTERN = ( + re.escape(_INSTANCE_CONFIG.doi_prefix) + if _INSTANCE_CONFIG.doi_prefix is not None + else UNVENDORED_DOI_PREFIX_PATTERN +) + +# The pattern of the DANDI instance URL +DANDI_INSTANCE_URL_PATTERN = ( + ".*" + if _INSTANCE_CONFIG.instance_url is None + else re.escape(str(_INSTANCE_CONFIG.instance_url).rstrip("/")) +) NAME_PATTERN = r"^([\w\s\-\.']+),\s+([\w\s\-\.']+)$" UUID_PATTERN = ( @@ -67,8 +86,17 @@ ) ASSET_UUID_PATTERN = r"^dandiasset:" + UUID_PATTERN VERSION_PATTERN = r"\d{6}/\d+\.\d+\.\d+" -DANDI_DOI_PATTERN = rf"^10.(48324|80507)/dandi\.{VERSION_PATTERN}" -DANDI_PUBID_PATTERN = rf"^DANDI:{VERSION_PATTERN}" +_INNER_DANDI_DOI_PATTERN = ( + rf"{DOI_PREFIX_PATTERN}/{ID_PATTERN.lower()}\.{VERSION_PATTERN}" +) +DANDI_DOI_PATTERN = ( + rf"^{_INNER_DANDI_DOI_PATTERN}$" + if _INSTANCE_CONFIG.doi_prefix is not None + else rf"^({_INNER_DANDI_DOI_PATTERN}|)$" # This matches an empty string as well +) +DANDI_PUBID_PATTERN = rf"^{ID_PATTERN}:{VERSION_PATTERN}$" +DANDI_NSKEY = "dandi" # Namespace for DANDI ontology + PUBLISHED_VERSION_URL_PATTERN = ( rf"^{DANDI_INSTANCE_URL_PATTERN}/dandiset/{VERSION_PATTERN}$" ) @@ -85,19 +113,39 @@ def diff_models(model1: M, model2: M) -> None: print(f"{field} is different") +if TYPE_CHECKING: + # This is just a placeholder for static type checking + class LicenseType(Enum): + ... # fmt: skip + +else: + LicenseType = Enum( + "LicenseType", + [(license_.name, license_.value) for license_ in _INSTANCE_CONFIG.licenses], + ) + r""" + An enumeration of supported licenses + + The value of each member is a string that matches the regex pattern of + `^([^:\s]+):(\S+)$` in which the first group matches the license scheme such + as `"spdx"`, and the second group matches the license identifier such as + `"CC-BY-4.0"`. + """ + + class AccessType(Enum): """An enumeration of access status options""" #: The dandiset is openly accessible - OpenAccess = "dandi:OpenAccess" + OpenAccess = f"{DANDI_NSKEY}:OpenAccess" #: The dandiset is embargoed - EmbargoedAccess = "dandi:EmbargoedAccess" + EmbargoedAccess = f"{DANDI_NSKEY}:EmbargoedAccess" """ Uncomment when restricted access is implemented: #: The dandiset is restricted - RestrictedAccess = "dandi:RestrictedAccess" + RestrictedAccess = f"{DANDI_NSKEY}:RestrictedAccess" """ @@ -105,45 +153,38 @@ class DigestType(Enum): """An enumeration of checksum types""" #: MD5 checksum - md5 = "dandi:md5" + md5 = f"{DANDI_NSKEY}:md5" #: SHA1 checksum - sha1 = "dandi:sha1" + sha1 = f"{DANDI_NSKEY}:sha1" #: SHA2-256 checksum - sha2_256 = "dandi:sha2-256" + sha2_256 = f"{DANDI_NSKEY}:sha2-256" #: SHA3-256 checksum - sha3_256 = "dandi:sha3-256" + sha3_256 = f"{DANDI_NSKEY}:sha3-256" #: BLAKE2B-256 checksum - blake2b_256 = "dandi:blake2b-256" + blake2b_256 = f"{DANDI_NSKEY}:blake2b-256" #: BLAKE3-256 checksum - blake3 = "dandi:blake3" + blake3 = f"{DANDI_NSKEY}:blake3" #: S3-style ETag - dandi_etag = "dandi:dandi-etag" + dandi_etag = f"{DANDI_NSKEY}:dandi-etag" #: DANDI Zarr checksum - dandi_zarr_checksum = "dandi:dandi-zarr-checksum" + dandi_zarr_checksum = f"{DANDI_NSKEY}:dandi-zarr-checksum" class IdentifierType(Enum): """An enumeration of identifiers""" - doi = "dandi:doi" - orcid = "dandi:orcid" - ror = "dandi:ror" - dandi = "dandi:dandi" - rrid = "dandi:rrid" - - -class LicenseType(Enum): - """An enumeration of supported licenses""" - - CC0_10 = "spdx:CC0-1.0" - CC_BY_40 = "spdx:CC-BY-4.0" + doi = f"{DANDI_NSKEY}:doi" + orcid = f"{DANDI_NSKEY}:orcid" + ror = f"{DANDI_NSKEY}:ror" + dandi = f"{DANDI_NSKEY}:dandi" + rrid = f"{DANDI_NSKEY}:rrid" class RelationType(Enum): @@ -256,19 +297,19 @@ class ParticipantRelationType(Enum): """An enumeration of participant relations""" #: Indicates that A is a child of B - isChildOf = "dandi:isChildOf" + isChildOf = f"{DANDI_NSKEY}:isChildOf" #: Indicates that A is a parent of B - isParentOf = "dandi:isParentOf" + isParentOf = f"{DANDI_NSKEY}:isParentOf" #: Indicates that A is a sibling of B - isSiblingOf = "dandi:isSiblingOf" + isSiblingOf = f"{DANDI_NSKEY}:isSiblingOf" #: Indicates that A is a monozygotic twin of B - isMonozygoticTwinOf = "dandi:isMonozygoticTwinOf" + isMonozygoticTwinOf = f"{DANDI_NSKEY}:isMonozygoticTwinOf" #: Indicates that A is a dizygotic twin of B - isDizygoticTwinOf = "dandi:isDizygoticTwinOf" + isDizygoticTwinOf = f"{DANDI_NSKEY}:isDizygoticTwinOf" class RoleType(Enum): @@ -487,10 +528,10 @@ class AgeReferenceType(Enum): """An enumeration of age reference""" #: Age since Birth - BirthReference = "dandi:BirthReference" + BirthReference = f"{DANDI_NSKEY}:BirthReference" #: Age of a pregnancy (https://en.wikipedia.org/wiki/Gestational_age) - GestationalReference = "dandi:GestationalReference" + GestationalReference = f"{DANDI_NSKEY}:GestationalReference" class DandiBaseModel(BaseModel): @@ -704,7 +745,7 @@ class BaseType(DandiBaseModel): schemaKey: str = Field( "BaseType", validate_default=True, json_schema_extra={"readOnly": True} ) - _ldmeta = {"rdfs:subClassOf": ["prov:Entity", "schema:Thing"], "nskey": "dandi"} + _ldmeta = {"rdfs:subClassOf": ["prov:Entity", "schema:Thing"], "nskey": DANDI_NSKEY} @classmethod def __get_pydantic_json_schema__( @@ -782,7 +823,7 @@ class Disorder(BaseType): None, title="Dates of diagnosis", description="Dates of diagnosis", - json_schema_extra={"nskey": "dandi", "rangeIncludes": "schema:Date"}, + json_schema_extra={"nskey": DANDI_NSKEY, "rangeIncludes": "schema:Date"}, ) schemaKey: Literal["Disorder"] = Field( "Disorder", validate_default=True, json_schema_extra={"readOnly": True} @@ -879,13 +920,13 @@ class Contributor(DandiBaseModel): title="Include contributor in citation", description="A flag to indicate whether a contributor should be included " "when generating a citation for the item.", - json_schema_extra={"nskey": "dandi"}, + json_schema_extra={"nskey": DANDI_NSKEY}, ) awardNumber: Optional[Identifier] = Field( None, title="Identifier for an award", description="Identifier associated with a sponsored or gift award.", - json_schema_extra={"nskey": "dandi"}, + json_schema_extra={"nskey": DANDI_NSKEY}, ) schemaKey: Literal["Contributor", "Organization", "Person"] = Field( "Contributor", validate_default=True, json_schema_extra={"readOnly": True} @@ -916,7 +957,7 @@ class Organization(Contributor): title="Include contributor in citation", description="A flag to indicate whether a contributor should be included " "when generating a citation for the item", - json_schema_extra={"nskey": "dandi"}, + json_schema_extra={"nskey": DANDI_NSKEY}, ) contactPoint: Optional[List[ContactPoint]] = Field( None, @@ -929,7 +970,7 @@ class Organization(Contributor): ) _ldmeta = { "rdfs:subClassOf": ["schema:Organization", "prov:Organization"], - "nskey": "dandi", + "nskey": DANDI_NSKEY, } @@ -950,7 +991,7 @@ class Affiliation(DandiBaseModel): _ldmeta = { "rdfs:subClassOf": ["schema:Organization", "prov:Organization"], - "nskey": "dandi", + "nskey": DANDI_NSKEY, } @@ -977,7 +1018,10 @@ class Person(Contributor): "Person", validate_default=True, json_schema_extra={"readOnly": True} ) - _ldmeta = {"rdfs:subClassOf": ["schema:Person", "prov:Person"], "nskey": "dandi"} + _ldmeta = { + "rdfs:subClassOf": ["schema:Person", "prov:Person"], + "nskey": DANDI_NSKEY, + } class Software(DandiBaseModel): @@ -1001,7 +1045,7 @@ class Software(DandiBaseModel): _ldmeta = { "rdfs:subClassOf": ["schema:SoftwareApplication", "prov:Software"], - "nskey": "dandi", + "nskey": DANDI_NSKEY, } @@ -1022,7 +1066,7 @@ class Agent(DandiBaseModel): _ldmeta = { "rdfs:subClassOf": ["prov:Agent"], - "nskey": "dandi", + "nskey": DANDI_NSKEY, } @@ -1043,7 +1087,7 @@ class EthicsApproval(DandiBaseModel): "EthicsApproval", validate_default=True, json_schema_extra={"readOnly": True} ) - _ldmeta = {"rdfs:subClassOf": ["schema:Thing", "prov:Entity"], "nskey": "dandi"} + _ldmeta = {"rdfs:subClassOf": ["schema:Thing", "prov:Entity"], "nskey": DANDI_NSKEY} class Resource(DandiBaseModel): @@ -1060,19 +1104,19 @@ class Resource(DandiBaseModel): None, title="Name of the repository", description="Name of the repository in which the resource is housed.", - json_schema_extra={"nskey": "dandi"}, + json_schema_extra={"nskey": DANDI_NSKEY}, ) relation: RelationType = Field( title="Resource relation", description="Indicates how the resource is related to the dataset. " "This relation should satisfy: dandiset resource.", - json_schema_extra={"nskey": "dandi"}, + json_schema_extra={"nskey": DANDI_NSKEY}, ) resourceType: Optional[ResourceType] = Field( default=None, title="Resource type", description="The type of resource.", - json_schema_extra={"nskey": "dandi"}, + json_schema_extra={"nskey": DANDI_NSKEY}, ) schemaKey: Literal["Resource"] = Field( @@ -1083,7 +1127,7 @@ class Resource(DandiBaseModel): "rdfs:subClassOf": ["schema:CreativeWork", "prov:Entity"], "rdfs:comment": "A resource related to the project (e.g., another " "dataset, publication, Webpage)", - "nskey": "dandi", + "nskey": DANDI_NSKEY, } @model_validator(mode="after") @@ -1100,7 +1144,7 @@ class AccessRequirements(DandiBaseModel): status: AccessType = Field( title="Access status", description="The access status of the item.", - json_schema_extra={"nskey": "dandi"}, + json_schema_extra={"nskey": DANDI_NSKEY}, ) contactPoint: Optional[ContactPoint] = Field( None, @@ -1118,7 +1162,7 @@ class AccessRequirements(DandiBaseModel): description="Date on which embargo ends.", json_schema_extra={ "readOnly": True, - "nskey": "dandi", + "nskey": DANDI_NSKEY, "rangeIncludes": "schema:Date", }, ) @@ -1128,7 +1172,7 @@ class AccessRequirements(DandiBaseModel): json_schema_extra={"readOnly": True}, ) - _ldmeta = {"rdfs:subClassOf": ["schema:Thing", "prov:Entity"], "nskey": "dandi"} + _ldmeta = {"rdfs:subClassOf": ["schema:Thing", "prov:Entity"], "nskey": DANDI_NSKEY} @model_validator(mode="after") def open_or_embargoed(self) -> "AccessRequirements": @@ -1181,7 +1225,7 @@ class AssetsSummary(DandiBaseModel): _ldmeta = { "rdfs:subClassOf": ["schema:CreativeWork", "prov:Entity"], - "nskey": "dandi", + "nskey": DANDI_NSKEY, } @@ -1206,7 +1250,7 @@ class Equipment(DandiBaseModel): _ldmeta = { "rdfs:subClassOf": ["schema:CreativeWork", "prov:Entity"], - "nskey": "dandi", + "nskey": DANDI_NSKEY, } @@ -1249,7 +1293,10 @@ class Activity(DandiBaseModel): "Activity", validate_default=True, json_schema_extra={"readOnly": True} ) - _ldmeta = {"rdfs:subClassOf": ["prov:Activity", "schema:Thing"], "nskey": "dandi"} + _ldmeta = { + "rdfs:subClassOf": ["prov:Activity", "schema:Thing"], + "nskey": DANDI_NSKEY, + } class Project(Activity): @@ -1301,7 +1348,7 @@ class Locus(DandiBaseModel): schemaKey: Literal["Locus"] = Field( "Locus", validate_default=True, json_schema_extra={"readOnly": True} ) - _ldmeta = {"nskey": "dandi"} + _ldmeta = {"nskey": DANDI_NSKEY} class Allele(DandiBaseModel): @@ -1314,7 +1361,7 @@ class Allele(DandiBaseModel): schemaKey: Literal["Allele"] = Field( "Allele", validate_default=True, json_schema_extra={"readOnly": True} ) - _ldmeta = {"nskey": "dandi"} + _ldmeta = {"nskey": DANDI_NSKEY} class GenotypeInfo(DandiBaseModel): @@ -1328,7 +1375,7 @@ class GenotypeInfo(DandiBaseModel): schemaKey: Literal["GenotypeInfo"] = Field( "GenotypeInfo", validate_default=True, json_schema_extra={"readOnly": True} ) - _ldmeta = {"nskey": "dandi"} + _ldmeta = {"nskey": DANDI_NSKEY} class RelatedParticipant(DandiBaseModel): @@ -1350,7 +1397,7 @@ class RelatedParticipant(DandiBaseModel): description="Indicates how the current participant or subject is related " "to the other participant or subject. This relation should " "satisfy: Participant/Subject relatedParticipant/Subject.", - json_schema_extra={"nskey": "dandi"}, + json_schema_extra={"nskey": DANDI_NSKEY}, ) schemaKey: Literal["RelatedParticipant"] = Field( "RelatedParticipant", @@ -1362,7 +1409,7 @@ class RelatedParticipant(DandiBaseModel): "rdfs:subClassOf": ["schema:CreativeWork", "prov:Entity"], "rdfs:comment": "Another participant or subject related to the current " "participant or subject (e.g., another parent, sibling, child).", - "nskey": "dandi", + "nskey": DANDI_NSKEY, } @@ -1376,57 +1423,59 @@ class Participant(DandiBaseModel): identifier: Identifier = Field(json_schema_extra={"nskey": "schema"}) altName: Optional[List[Identifier]] = Field( - None, json_schema_extra={"nskey": "dandi"} + None, json_schema_extra={"nskey": DANDI_NSKEY} ) strain: Optional[StrainType] = Field( None, description="Identifier for the strain of the participant or subject.", - json_schema_extra={"nskey": "dandi"}, + json_schema_extra={"nskey": DANDI_NSKEY}, ) cellLine: Optional[Identifier] = Field( None, description="Cell line associated with the participant or subject.", - json_schema_extra={"nskey": "dandi"}, + json_schema_extra={"nskey": DANDI_NSKEY}, + ) + vendor: Optional[Organization] = Field( + None, json_schema_extra={"nskey": DANDI_NSKEY} ) - vendor: Optional[Organization] = Field(None, json_schema_extra={"nskey": "dandi"}) age: Optional[PropertyValue] = Field( None, description="A representation of age using ISO 8601 duration. This " "should include a valueReference if anything other than " "date of birth is used.", - json_schema_extra={"nskey": "dandi", "rangeIncludes": "schema:Duration"}, + json_schema_extra={"nskey": DANDI_NSKEY, "rangeIncludes": "schema:Duration"}, ) sex: Optional[SexType] = Field( None, description="Identifier for sex of the participant or subject if " "available. (e.g. from OBI)", - json_schema_extra={"nskey": "dandi"}, + json_schema_extra={"nskey": DANDI_NSKEY}, ) genotype: Optional[Union[List[GenotypeInfo], Identifier]] = Field( None, description="Genotype descriptor of participant or subject if available", - json_schema_extra={"nskey": "dandi"}, + json_schema_extra={"nskey": DANDI_NSKEY}, ) species: Optional[SpeciesType] = Field( None, description="An identifier indicating the taxonomic classification of " "the participant or subject.", - json_schema_extra={"nskey": "dandi"}, + json_schema_extra={"nskey": DANDI_NSKEY}, ) disorder: Optional[List[Disorder]] = Field( None, description="Any current diagnosed disease or disorder associated with " "the participant or subject.", - json_schema_extra={"nskey": "dandi"}, + json_schema_extra={"nskey": DANDI_NSKEY}, ) relatedParticipant: Optional[List[RelatedParticipant]] = Field( None, description="Information about related participants or subjects in a " "study or across studies.", - json_schema_extra={"nskey": "dandi"}, + json_schema_extra={"nskey": DANDI_NSKEY}, ) sameAs: Optional[List[Identifier]] = Field( None, @@ -1440,7 +1489,7 @@ class Participant(DandiBaseModel): _ldmeta = { "rdfs:subClassOf": ["prov:Agent"], "rdfs:label": "Information about the participant or subject.", - "nskey": "dandi", + "nskey": DANDI_NSKEY, } @@ -1450,18 +1499,18 @@ class BioSample(DandiBaseModel): identifier: Identifier = Field(json_schema_extra={"nskey": "schema"}) sampleType: SampleType = Field( description="Identifier for the sample characteristics (e.g., from OBI, Encode).", - json_schema_extra={"nskey": "dandi"}, + json_schema_extra={"nskey": DANDI_NSKEY}, ) assayType: Optional[List[AssayType]] = Field( None, description="Identifier for the assay(s) used (e.g., OBI).", - json_schema_extra={"nskey": "dandi"}, + json_schema_extra={"nskey": DANDI_NSKEY}, ) anatomy: Optional[List[Anatomy]] = Field( None, description="Identifier for what organ the sample belongs " "to. Use the most specific descriptor from sources such as UBERON.", - json_schema_extra={"nskey": "dandi"}, + json_schema_extra={"nskey": DANDI_NSKEY}, ) wasDerivedFrom: Optional[List["BioSample"]] = Field( @@ -1487,7 +1536,7 @@ class BioSample(DandiBaseModel): _ldmeta = { "rdfs:subClassOf": ["schema:Thing", "prov:Entity"], "rdfs:label": "Information about the biosample.", - "nskey": "dandi", + "nskey": DANDI_NSKEY, } @@ -1539,7 +1588,7 @@ class CommonModel(DandiBaseModel): studyTarget: Optional[List[str]] = Field( None, description="Objectives or specific questions of the study.", - json_schema_extra={"nskey": "dandi"}, + json_schema_extra={"nskey": DANDI_NSKEY}, ) license: Optional[List[LicenseType]] = Field( None, @@ -1552,10 +1601,10 @@ class CommonModel(DandiBaseModel): None, description="A list of persistent URLs describing the protocol (e.g. " "protocols.io, or other DOIs).", - json_schema_extra={"nskey": "dandi"}, + json_schema_extra={"nskey": DANDI_NSKEY}, ) ethicsApproval: Optional[List[EthicsApproval]] = Field( - None, title="Ethics approvals", json_schema_extra={"nskey": "dandi"} + None, title="Ethics approvals", json_schema_extra={"nskey": DANDI_NSKEY} ) keywords: Optional[List[str]] = Field( None, @@ -1565,14 +1614,14 @@ class CommonModel(DandiBaseModel): acknowledgement: Optional[str] = Field( None, description="Any acknowledgments not covered by contributors or external resources.", - json_schema_extra={"nskey": "dandi"}, + json_schema_extra={"nskey": DANDI_NSKEY}, ) # Linking to this dandiset or the larger thing access: List[AccessRequirements] = Field( title="Access information", default_factory=lambda: [AccessRequirements(status=AccessType.OpenAccess)], - json_schema_extra={"nskey": "dandi", "readOnly": True}, + json_schema_extra={"nskey": DANDI_NSKEY, "readOnly": True}, ) url: Optional[AnyHttpUrl] = Field( None, @@ -1580,18 +1629,12 @@ class CommonModel(DandiBaseModel): json_schema_extra={"readOnly": True, "nskey": "schema"}, ) repository: Optional[AnyHttpUrl] = Field( - # mypy doesn't like using a string as the default for an AnyHttpUrl - # attribute, so we have to convert it to an AnyHttpUrl: - ( - TypeAdapter(AnyHttpUrl).validate_python(DANDI_INSTANCE_URL) - if DANDI_INSTANCE_URL is not None - else None - ), + default=_INSTANCE_CONFIG.instance_url, description="location of the item", - json_schema_extra={"nskey": "dandi", "readOnly": True}, + json_schema_extra={"nskey": DANDI_NSKEY, "readOnly": True}, ) relatedResource: Optional[List[Resource]] = Field( - None, json_schema_extra={"nskey": "dandi"} + None, json_schema_extra={"nskey": DANDI_NSKEY} ) wasGeneratedBy: Optional[Sequence[Activity]] = Field( @@ -1622,14 +1665,16 @@ def contributor_musthave_contact( id: str = Field( description="Uniform resource identifier", - pattern=r"^(dandi|DANDI):\d{6}(/(draft|\d+\.\d+\.\d+))$", + pattern=( + rf"^({ID_PATTERN}|{ID_PATTERN.lower()}):\d{{6}}(/(draft|\d+\.\d+\.\d+))$" + ), json_schema_extra={"readOnly": True}, ) identifier: DANDI = Field( title="Dandiset identifier", description="A Dandiset identifier that can be resolved by identifiers.org.", - pattern=r"^DANDI:\d{6}$", + pattern=rf"^{ID_PATTERN}:\d{{6}}$", json_schema_extra={"readOnly": True, "nskey": "schema"}, ) name: str = Field( @@ -1674,12 +1719,12 @@ def contributor_musthave_contact( # From assets assetsSummary: AssetsSummary = Field( - json_schema_extra={"nskey": "dandi", "readOnly": True} + json_schema_extra={"nskey": DANDI_NSKEY, "readOnly": True} ) # From server (requested by users even for drafts) manifestLocation: List[AnyHttpUrl] = Field( - min_length=1, json_schema_extra={"nskey": "dandi", "readOnly": True} + min_length=1, json_schema_extra={"nskey": DANDI_NSKEY, "readOnly": True} ) version: str = Field(json_schema_extra={"nskey": "schema", "readOnly": True}) @@ -1698,7 +1743,7 @@ def contributor_musthave_contact( _ldmeta = { "rdfs:subClassOf": ["schema:Dataset", "prov:Entity"], "rdfs:label": "Information about the dataset", - "nskey": "dandi", + "nskey": DANDI_NSKEY, } @@ -1714,9 +1759,9 @@ class BareAsset(CommonModel): ) digest: Dict[DigestType, str] = Field( title="A map of dandi digests to their values", - json_schema_extra={"nskey": "dandi"}, + json_schema_extra={"nskey": DANDI_NSKEY}, ) - path: str = Field(json_schema_extra={"nskey": "dandi"}) + path: str = Field(json_schema_extra={"nskey": DANDI_NSKEY}) dateModified: Optional[datetime] = Field( None, @@ -1725,21 +1770,23 @@ class BareAsset(CommonModel): ) blobDateModified: Optional[datetime] = Field( None, - json_schema_extra={"nskey": "dandi"}, + json_schema_extra={"nskey": DANDI_NSKEY}, title="Asset file modification date and time.", ) # overload to restrict with max_items=1 access: List[AccessRequirements] = Field( title="Access information", default_factory=lambda: [AccessRequirements(status=AccessType.OpenAccess)], - json_schema_extra={"nskey": "dandi"}, + json_schema_extra={"nskey": DANDI_NSKEY}, max_length=1, ) # this is from C2M2 level 1 - using EDAM vocabularies - in our case we would # need to come up with things for neurophys # TODO: waiting on input - dataType: Optional[AnyHttpUrl] = Field(None, json_schema_extra={"nskey": "dandi"}) + dataType: Optional[AnyHttpUrl] = Field( + None, json_schema_extra={"nskey": DANDI_NSKEY} + ) sameAs: Optional[List[AnyHttpUrl]] = Field( None, json_schema_extra={"nskey": "schema"} @@ -1747,7 +1794,7 @@ class BareAsset(CommonModel): # TODO approach: Optional[List[ApproachType]] = Field( - None, json_schema_extra={"readOnly": True, "nskey": "dandi"} + None, json_schema_extra={"readOnly": True, "nskey": DANDI_NSKEY} ) measurementTechnique: Optional[List[MeasurementTechniqueType]] = Field( None, json_schema_extra={"readOnly": True, "nskey": "schema"} @@ -1779,7 +1826,7 @@ class BareAsset(CommonModel): _ldmeta = { "rdfs:subClassOf": ["schema:CreativeWork", "prov:Entity"], "rdfs:label": "Information about the asset", - "nskey": "dandi", + "nskey": DANDI_NSKEY, } @field_validator("digest") @@ -1837,7 +1884,7 @@ class Asset(BareAsset): class Publishable(DandiBaseModel): publishedBy: Union[AnyHttpUrl, PublishActivity] = Field( description="The URL should contain the provenance of the publishing process.", - json_schema_extra={"readOnly": True, "nskey": "dandi"}, + json_schema_extra={"readOnly": True, "nskey": DANDI_NSKEY}, ) datePublished: datetime = Field( json_schema_extra={"readOnly": True, "nskey": "schema"} @@ -1847,18 +1894,29 @@ class Publishable(DandiBaseModel): ) +_doi_field_kwargs: dict[str, Any] = { + "title": "DOI", + "pattern": DANDI_DOI_PATTERN, + "json_schema_extra": {"readOnly": True, "nskey": DANDI_NSKEY}, +} +if _INSTANCE_CONFIG.doi_prefix is None: + _doi_field_kwargs["default"] = "" + + class PublishedDandiset(Dandiset, Publishable): id: str = Field( description="Uniform resource identifier.", pattern=DANDI_PUBID_PATTERN, json_schema_extra={"readOnly": True}, ) + doi: str = Field(**_doi_field_kwargs) + """ + The DOI of the published Dandiset + + The value of the empty string indicates that there is no DOI for the published + Dandiset. + """ - doi: str = Field( - title="DOI", - pattern=DANDI_DOI_PATTERN, - json_schema_extra={"readOnly": True, "nskey": "dandi"}, - ) url: AnyHttpUrl = Field( description="Permalink to the Dandiset.", json_schema_extra={"readOnly": True, "nskey": "schema"}, diff --git a/dandischema/tests/conftest.py b/dandischema/tests/conftest.py index d507def0..3449cd63 100644 --- a/dandischema/tests/conftest.py +++ b/dandischema/tests/conftest.py @@ -1,7 +1,12 @@ import os -from typing import Iterator +import sys +from typing import Generator, Iterator +from pydantic import ConfigDict, TypeAdapter, ValidationError import pytest +from typing_extensions import TypedDict + +from dandischema.conf import Config @pytest.fixture(scope="session", autouse=True) @@ -13,3 +18,103 @@ def disable_http() -> Iterator[None]: yield else: yield + + +_CONFIG_PARAMS = list(Config.model_fields) +"""Configuration parameters of the `dandischema` package""" +# noinspection PyTypedDict +_ENV_DICT = TypedDict( # type: ignore[misc] + "_ENV_DICT", {fname: str for fname in _CONFIG_PARAMS}, total=False +) +_ENV_DICT.__pydantic_config__ = ConfigDict( # type: ignore[attr-defined] + # Values have to be strictly of type `str` + strict=True, + # Keys not listed are not allowed + extra="forbid", +) +_ENV_DICT_ADAPTER = TypeAdapter(_ENV_DICT) + + +@pytest.fixture +def clear_dandischema_modules_and_set_env_vars( + request: pytest.FixtureRequest, monkeypatch: pytest.MonkeyPatch +) -> Generator[None, None, None]: + """ + This fixture clears all `dandischema` modules from `sys.modules` and sets + environment variables that configure the `dandischema` package. + + With this fixture, tests can import `dandischema` modules cleanly in an environment + defined by the provided values for the environment variables. + + This fixture expects values for the environment variables to be passed indirectly + from the calling test function using `request.param`. `request.param` should be a + `dict[str, str]` consisting of keys that are a subset of the fields of + `dandischema.conf.Config`. Each value in the dictionary will be used to set an + environment variable with a name that is the same as its key but in upper case and + prefixed with "DANDI_". + + Example usage: + ```python + @pytest.mark.parametrize( + "clear_dandischema_modules_and_set_env_vars", + [ + {}, + { + "instance_name": "DANDI", + "doi_prefix": "10.48324", + }, + { + "instance_name": "EMBER-DANDI", + "doi_prefix": "10.60533", + } + ], + indirect=True, + ) + def test_foo(clear_dandischema_modules_and_set_env_vars): + # Your test code here + ... + ``` + + Note + ---- + When this fixture is torn down, it restores the original `sys.modules` and undo + the environment variable changes made. + + The user of this fixture needs to ensure that no other threads besides a calling + thread of this fixture are modifying `sys.modules` during the execution of this + fixture, which should be a common situation. + """ + # Check if the calling test has passed valid `indirect` arguments + ev = ValueError( + "The calling test must use the `indirect` parameter to pass " + "a `dict[str, str]` for setting environment variables." + ) + if not hasattr(request, "param"): + raise ev + try: + _ENV_DICT_ADAPTER.validate_python(request.param) + except ValidationError as e: + raise ev from e + + modules = sys.modules + modules_original = modules.copy() + + # Remove all dandischema modules from sys.modules + for name in list(modules): + if name.startswith("dandischema.") or name == "dandischema": + del modules[name] + + # Monkey patch environment variables with arguments from the calling test + for p in _CONFIG_PARAMS: + if p in request.param: + monkeypatch.setenv(f"DANDI_{p.upper()}", request.param[p]) + else: + monkeypatch.delenv(f"DANDI_{p.upper()}", raising=False) + + yield + + # Restore the original modules + for name in list(modules): + if name not in modules_original: + del modules[name] + modules.update(modules_original) diff --git a/dandischema/tests/data/metadata/DANDI-ADHOC/meta_000004.json b/dandischema/tests/data/metadata/DANDI-ADHOC/meta_000004.json new file mode 100644 index 00000000..20f554fc --- /dev/null +++ b/dandischema/tests/data/metadata/DANDI-ADHOC/meta_000004.json @@ -0,0 +1,429 @@ +{ + "id": "DANDI-ADHOC:000004/draft", + "schemaKey": "Dandiset", + "schemaVersion": "0.4.4", + "name": "A NWB-based dataset and processing pipeline of human single-neuron activity during a declarative memory task", + "description": "A challenge for data sharing in systems neuroscience is the multitude of different data formats used. Neurodata Without Borders: Neurophysiology 2.0 (NWB:N) has emerged as a standardized data format for the storage of cellular-level data together with meta-data, stimulus information, and behavior. A key next step to facilitate NWB:N adoption is to provide easy to use processing pipelines to import/export data from/to NWB:N. Here, we present a NWB-formatted dataset of 1863 single neurons recorded from the medial temporal lobes of 59 human subjects undergoing intracranial monitoring while they performed a recognition memory task. We provide code to analyze and export/import stimuli, behavior, and electrophysiological recordings to/from NWB in both MATLAB and Python. The data files are NWB:N compliant, which affords interoperability between programming languages and operating systems. This combined data and code release is a case study for how to utilize NWB:N for human single-neuron recordings and enables easy reuse of this hard-to-obtain data for both teaching and research on the mechanisms of human memory.", + "contributor": [ + { + "schemaKey": "Person", + "identifier": "0000-0003-0161-4007", + "name": "Chandravadia, Nand", + "email": "nandc10@gmail.com", + "roleName": [ + "dcite:Author", + "dcite:ContactPerson", + "dcite:DataCurator", + "dcite:DataManager", + "dcite:FormalAnalysis", + "dcite:Investigation", + "dcite:Maintainer", + "dcite:Methodology", + "dcite:ProjectLeader", + "dcite:ProjectManager", + "dcite:ProjectMember", + "dcite:Researcher", + "dcite:Software", + "dcite:Validation", + "dcite:Visualization" + ], + "includeInCitation": true, + "affiliation": [ + { + "schemaKey": "Affiliation", + "name": "Department of Neurosurgery, Cedars-Sinai Medical Center, Los Angeles, CA, USA" + } + ] + }, + { + "schemaKey": "Person", + "name": "Liang, Dehua", + "email": "liang134@mail.chapman.edu", + "roleName": [ + "dcite:Author", + "dcite:Methodology", + "dcite:ProjectMember", + "dcite:Software", + "dcite:Validation" + ], + "includeInCitation": true, + "affiliation": [ + { + "schemaKey": "Affiliation", + "name": "Institute for Interdisciplinary Brain and Behavioral Sciences, Crean College of Health and Behavioral Sciences, Schmid College of Science and Technology, Chapman University, Orange, CA, USA" + } + ] + }, + { + "schemaKey": "Person", + "identifier": "0000-0002-4319-7689", + "name": "Schjetnan, Andrea Gomez Palacio", + "email": "Andrea.Schjetan@uhnresearch.ca", + "roleName": [ + "dcite:Author", + "dcite:DataCollector", + "dcite:ProjectMember", + "dcite:Validation" + ], + "includeInCitation": true, + "affiliation": [ + { + "schemaKey": "Affiliation", + "name": "Krembil Brain Institute, Toronto Western Hospital, Toronto, Canada" + } + ] + }, + { + "schemaKey": "Person", + "identifier": "0000-0002-9207-7069", + "name": "Carlson, April", + "email": "april.carlson@tufts.edu", + "roleName": [ + "dcite:Author", + "dcite:DataCurator", + "dcite:ProjectMember", + "dcite:Validation" + ], + "includeInCitation": true, + "affiliation": [ + { + "schemaKey": "Affiliation", + "name": "Department of Neurosurgery, Cedars-Sinai Medical Center, Los Angeles, CA, USA" + } + ] + }, + { + "schemaKey": "Person", + "name": "Faraut, Mailys", + "email": "mailyscm.faraut@gmail.com", + "roleName": [ + "dcite:Author", + "dcite:DataCollector", + "dcite:ProjectMember", + "dcite:Validation" + ], + "includeInCitation": true, + "affiliation": [ + { + "schemaKey": "Affiliation", + "name": "Department of Neurosurgery, Cedars-Sinai Medical Center, Los Angeles, CA, USA" + } + ] + }, + { + "schemaKey": "Person", + "name": "Chung, Jeffrey M.", + "email": "Jeffrey.Chung@cshs.org", + "roleName": [ + "dcite:Author", + "dcite:ProjectMember", + "dcite:Validation" + ], + "includeInCitation": true, + "affiliation": [ + { + "schemaKey": "Affiliation", + "name": "Department of Neurology, Cedars-Sinai Medical Center, Los Angeles, CA, USA" + } + ] + }, + { + "schemaKey": "Person", + "name": "Reed, Chrystal M.", + "email": "Chrystal.Reed@csmc.edu", + "roleName": [ + "dcite:Author", + "dcite:ProjectMember", + "dcite:Validation" + ], + "includeInCitation": true, + "affiliation": [ + { + "schemaKey": "Affiliation", + "name": "Department of Neurology, Cedars-Sinai Medical Center, Los Angeles, CA, USA" + } + ] + }, + { + "schemaKey": "Person", + "name": "Dichter, Ben", + "email": "ben.dichter@gmail.com", + "roleName": [ + "dcite:Author", + "dcite:Software", + "dcite:ProjectMember", + "dcite:Validation" + ], + "includeInCitation": true, + "affiliation": [ + { + "schemaKey": "Affiliation", + "name": "Biological Systems & Engineering Division, Lawrence Berkeley National Laboratory, Berkeley, CA, USA" + }, + { + "schemaKey": "Affiliation", + "name": "Department of Neurosurgery, Stanford University, Stanford, CA, USA" + } + ] + }, + { + "schemaKey": "Person", + "name": "Maoz, Uri", + "email": "maoz.uri@gmail.com", + "roleName": [ + "dcite:Author", + "dcite:Conceptualization", + "dcite:ProjectMember", + "dcite:Validation" + ], + "includeInCitation": true, + "affiliation": [ + { + "schemaKey": "Affiliation", + "name": "Institute for Interdisciplinary Brain and Behavioral Sciences, Crean College of Health and Behavioral Sciences, Schmid College of Science and Technology, Chapman University, Orange, CA, USA" + }, + { + "schemaKey": "Affiliation", + "name": "Division of Biology and Biological Engineering, California Institute of Technology, Pasadena, CA, USA" + } + ] + }, + { + "schemaKey": "Person", + "name": "Kalia, Suneil K.", + "email": "suneil.kalia@uhn.ca", + "roleName": [ + "dcite:Author", + "dcite:ProjectMember", + "dcite:Validation" + ], + "includeInCitation": true, + "affiliation": [ + { + "schemaKey": "Affiliation", + "name": "Division of Neurosurgery, Department of Surgery, University of Toronto, Toronto, Canada" + }, + { + "schemaKey": "Affiliation", + "name": "Krembil Brain Institute, Toronto Western Hospital, Toronto, Canada" + } + ] + }, + { + "schemaKey": "Person", + "name": "Valiante, Taufik A.", + "email": "Taufik.Valiante@uhn.ca", + "roleName": [ + "dcite:Author", + "dcite:ProjectMember", + "dcite:Validation" + ], + "includeInCitation": true, + "affiliation": [ + { + "schemaKey": "Affiliation", + "name": "Krembil Brain Institute, Toronto Western Hospital, Toronto, Canada" + }, + { + "schemaKey": "Affiliation", + "name": "Division of Neurosurgery, Department of Surgery, University of Toronto, Toronto, Canada" + } + ] + }, + { + "schemaKey": "Person", + "name": "Mamelak, Adam N.", + "email": "Adam.Mamelak@cshs.org", + "roleName": [ + "dcite:Author", + "dcite:ProjectMember", + "dcite:Validation" + ], + "includeInCitation": true, + "affiliation": [ + { + "schemaKey": "Affiliation", + "name": "Department of Neurosurgery, Cedars-Sinai Medical Center, Los Angeles, CA, USA" + } + ] + }, + { + "schemaKey": "Person", + "identifier": "0000-0002-9207-7069", + "name": "Rutishauser, Ueli", + "email": "Ueli.Rutishauser@cshs.org", + "roleName": [ + "dcite:Author", + "dcite:Conceptualization", + "dcite:FundingAcquisition", + "dcite:ProjectMember", + "dcite:Resources", + "dcite:Software", + "dcite:Supervision", + "dcite:Validation" + ], + "includeInCitation": true, + "affiliation": [ + { + "schemaKey": "Affiliation", + "name": "Department of Neurosurgery, Cedars-Sinai Medical Center, Los Angeles, CA, USA" + }, + { + "schemaKey": "Affiliation", + "name": "Department of Neurology, Cedars-Sinai Medical Center, Los Angeles, CA, USA" + }, + { + "schemaKey": "Affiliation", + "name": "Division of Biology and Biological Engineering, California Institute of Technology, Pasadena, CA, USA" + }, + { + "schemaKey": "Affiliation", + "name": "Computational and Neural Systems Program, California Institute of Technology, Pasadena, CA, USA" + }, + { + "schemaKey": "Affiliation", + "name": "Center for Neural Science and Medicine, Department of Biomedical Science, Cedars-Sinai Medical Center, Los Angeles, CA, USA" + } + ] + }, + { + "schemaKey": "Organization", + "name": "Stroke, National Institute of Neurological Disorders and", + "roleName": [ + "dcite:Sponsor" + ], + "includeInCitation": false, + "awardNumber": "U01NS103792" + }, + { + "schemaKey": "Organization", + "name": "Foundation, National Science", + "roleName": [ + "dcite:Sponsor" + ], + "includeInCitation": false, + "awardNumber": "1554105" + }, + { + "schemaKey": "Organization", + "name": "Health, National Institute of Mental", + "roleName": [ + "dcite:Sponsor" + ], + "includeInCitation": false, + "awardNumber": "R01MH110831" + }, + { + "schemaKey": "Organization", + "name": "Neuroscience, McKnight Endowment for", + "roleName": [ + "dcite:Sponsor" + ], + "includeInCitation": false + }, + { + "schemaKey": "Organization", + "name": "Foundation, NARSAD Young Investigator grant from the Brain & Behavior Research", + "roleName": [ + "dcite:Sponsor" + ], + "includeInCitation": false + }, + { + "schemaKey": "Organization", + "name": "Foundation, Kavli", + "roleName": [ + "dcite:Sponsor" + ], + "includeInCitation": false + }, + { + "schemaKey": "Organization", + "name": "initiative, BRAIN", + "roleName": [ + "dcite:Sponsor" + ], + "includeInCitation": false, + "awardNumber": "U19NS104590" + } + ], + "about": [ + { + "schemaKey": "GenericType", + "name": "Medial Temporal Lobe" + } + ], + "license": [ + "spdx:CC-BY-4.0" + ], + "keywords": [ + "cognitive neuroscience", + "data standardization", + "decision making", + "declarative memory", + "neurophysiology", + "neurosurgery", + "NWB", + "open source", + "single-neurons" + ], + "access": [ + { + "schemaKey": "AccessRequirements", + "status": "dandi:OpenAccess" + } + ], + "url": "https://dandiarchive.org/dandiset/000004/draft", + "repository": "https://dandiarchive.org/", + "relatedResource": [ + { + "schemaKey": "Resource", + "identifier": "DOI:10.17605/OSF.IO/HV7JA", + "name": "A NWB-based Dataset and Processing Pipeline of Human Single-Neuron Activity During a Declarative Memory Task", + "url": "https://osf.io/hv7ja/", + "repository": "Open Science Framework", + "relation": "dcite:IsDerivedFrom" + }, + { + "schemaKey": "Resource", + "identifier": "DOI:10.1038/s41597-020-0415-9", + "url": "https://www.nature.com/articles/s41597-020-0415-9", + "relation": "dcite:IsDescribedBy" + } + ], + "identifier": "DANDI-ADHOC:000004", + "citation": "Chandravadia, Nand; Liang, Dehua; Schjetnan, Andrea Gomez Palacio; Carlson, April; Faraut, Mailys; Chung, Jeffrey M.; Reed, Chrystal M.; Dichter, Ben; Maoz, Uri; Kalia, Suneil K.; Valiante, Taufik A.; Mamelak, Adam N.; Rutishauser, Ueli (2021) A NWB-based dataset and processing pipeline of human single-neuron activity during a declarative memory task. Online: https://dandiarchive.org/000004/draft", + "assetsSummary": { + "schemaKey": "AssetsSummary", + "numberOfBytes": 10, + "numberOfFiles": 1, + "dataStandard": [ + { + "schemaKey": "StandardsType", + "name": "NWB" + } + ], + "approach": [ + { + "schemaKey": "ApproachType", + "name": "electrophysiology" + } + ], + "measurementTechnique": [ + { + "schemaKey": "MeasurementTechniqueType", + "name": "two-photon microscopy technique" + } + ], + "species": [ + { + "schemaKey": "SpeciesType", + "name": "Human" + } + ] + }, + "manifestLocation": [ + "https://api.dandiarchive.org/api/dandisets/000004/versions/draft/assets/" + ], + "version": "draft" +} diff --git a/dandischema/tests/data/metadata/DANDI-ADHOC/meta_000004old.json b/dandischema/tests/data/metadata/DANDI-ADHOC/meta_000004old.json new file mode 100644 index 00000000..c4ad1ee7 --- /dev/null +++ b/dandischema/tests/data/metadata/DANDI-ADHOC/meta_000004old.json @@ -0,0 +1,474 @@ +{ + "id": "DANDI-ADHOC:000004/draft", + "url": "https://dandiarchive.org/dandiset/000004/draft", + "name": "A NWB-based dataset and processing pipeline of human single-neuron activity during a declarative memory task", + "about": [ + { + "name": "Right Temporal Lobe", + "identifier": "http://purl.obolibrary.org/obo/UBERON_0002809" + }, + { + "name": "Medial Temporal Lobe" + } + ], + "access": [ + { + "status": "dandi:OpenAccess", + "contactPoint": { + "schemaKey": "ContactPoint" + } + } + ], + "license": [ + "spdx:CC-BY-4.0" + ], + "version": "draft", + "@context": "https://raw.githubusercontent.com/dandi/schema/master/releases/0.4.4/context.json", + "citation": "Chandravadia, Nand; Liang, Dehua; Schjetnan, Andrea Gomez Palacio; Carlson, April; Faraut, Mailys; Chung, Jeffrey M.; Reed, Chrystal M.; Dichter, Ben; Maoz, Uri; Kalia, Suneil K.; Valiante, Taufik A.; Mamelak, Adam N.; Rutishauser, Ueli (2021) A NWB-based dataset and processing pipeline of human single-neuron activity during a declarative memory task (Version draft) [Data set]. DANDI archive. https://dandiarchive.org/dandiset/000004/draft", + "keywords": [ + "cognitive neuroscience", + "data standardization", + "decision making", + "declarative memory", + "neurophysiology", + "neurosurgery", + "NWB", + "open source", + "single-neurons" + ], + "protocol": [], + "identifier": "DANDI-ADHOC:000004", + "repository": "https://dandiarchive.org/", + "contributor": [ + { + "name": "Chandravadia, Nand", + "email": "nandc10@gmail.com", + "roleName": [ + "dcite:Author", + "dcite:ContactPerson", + "dcite:DataCurator", + "dcite:DataManager", + "dcite:FormalAnalysis", + "dcite:Investigation", + "dcite:Maintainer", + "dcite:Methodology", + "dcite:ProjectLeader", + "dcite:ProjectManager", + "dcite:ProjectMember", + "dcite:Researcher", + "dcite:Software", + "dcite:Validation", + "dcite:Visualization" + ], + "schemaKey": "Person", + "identifier": "0000-0003-0161-4007", + "affiliation": [ + { + "name": "Department of Neurosurgery, Cedars-Sinai Medical Center, Los Angeles, CA, USA", + "schemaKey": "Affiliation", + "includeInCitation": false + } + ], + "includeInCitation": true + }, + { + "name": "Liang, Dehua", + "email": "liang134@mail.chapman.edu", + "roleName": [ + "dcite:Author", + "dcite:Methodology", + "dcite:ProjectMember", + "dcite:Software", + "dcite:Validation" + ], + "schemaKey": "Person", + "affiliation": [ + { + "name": "Institute for Interdisciplinary Brain and Behavioral Sciences, Crean College of Health and Behavioral Sciences, Schmid College of Science and Technology, Chapman University, Orange, CA, USA", + "schemaKey": "Affiliation", + "includeInCitation": false + } + ], + "includeInCitation": true + }, + { + "name": "Schjetnan, Andrea Gomez Palacio", + "email": "Andrea.Schjetan@uhnresearch.ca", + "roleName": [ + "dcite:Author", + "dcite:DataCollector", + "dcite:ProjectMember", + "dcite:Validation" + ], + "schemaKey": "Person", + "identifier": "0000-0002-4319-7689", + "affiliation": [ + { + "name": "Krembil Brain Institute, Toronto Western Hospital, Toronto, Canada", + "schemaKey": "Affiliation", + "includeInCitation": false + } + ], + "includeInCitation": true + }, + { + "name": "Carlson, April", + "email": "april.carlson@tufts.edu", + "roleName": [ + "dcite:Author", + "dcite:DataCurator", + "dcite:ProjectMember", + "dcite:Validation" + ], + "schemaKey": "Person", + "identifier": "0000-0002-9207-7069", + "affiliation": [ + { + "name": "Department of Neurosurgery, Cedars-Sinai Medical Center, Los Angeles, CA, USA", + "schemaKey": "Affiliation", + "includeInCitation": false + } + ], + "includeInCitation": true + }, + { + "name": "Faraut, Mailys", + "email": "mailyscm.faraut@gmail.com", + "roleName": [ + "dcite:Author", + "dcite:DataCollector", + "dcite:ProjectMember", + "dcite:Validation" + ], + "schemaKey": "Person", + "affiliation": [ + { + "name": "Department of Neurosurgery, Cedars-Sinai Medical Center, Los Angeles, CA, USA", + "schemaKey": "Affiliation", + "includeInCitation": false + } + ], + "includeInCitation": true + }, + { + "name": "Chung, Jeffrey M.", + "email": "Jeffrey.Chung@cshs.org", + "roleName": [ + "dcite:Author", + "dcite:ProjectMember", + "dcite:Validation" + ], + "schemaKey": "Person", + "affiliation": [ + { + "name": "Department of Neurology, Cedars-Sinai Medical Center, Los Angeles, CA, USA", + "schemaKey": "Affiliation", + "includeInCitation": false + } + ], + "includeInCitation": true + }, + { + "name": "Reed, Chrystal M.", + "email": "Chrystal.Reed@csmc.edu", + "roleName": [ + "dcite:Author", + "dcite:ProjectMember", + "dcite:Validation" + ], + "schemaKey": "Person", + "affiliation": [ + { + "name": "Department of Neurology, Cedars-Sinai Medical Center, Los Angeles, CA, USA", + "schemaKey": "Affiliation", + "includeInCitation": false + } + ], + "includeInCitation": true + }, + { + "name": "Dichter, Ben", + "email": "ben.dichter@gmail.com", + "roleName": [ + "dcite:Author", + "dcite:Software", + "dcite:ProjectMember", + "dcite:Validation" + ], + "schemaKey": "Person", + "affiliation": [ + { + "name": "Biological Systems & Engineering Division, Lawrence Berkeley National Laboratory, Berkeley, CA, USA", + "schemaKey": "Affiliation", + "includeInCitation": false + }, + { + "name": "Department of Neurosurgery, Stanford University, Stanford, CA, USA", + "schemaKey": "Affiliation", + "includeInCitation": false + } + ], + "includeInCitation": true + }, + { + "name": "Maoz, Uri", + "email": "maoz.uri@gmail.com", + "roleName": [ + "dcite:Author", + "dcite:Conceptualization", + "dcite:ProjectMember", + "dcite:Validation" + ], + "schemaKey": "Person", + "affiliation": [ + { + "name": "Institute for Interdisciplinary Brain and Behavioral Sciences, Crean College of Health and Behavioral Sciences, Schmid College of Science and Technology, Chapman University, Orange, CA, USA", + "schemaKey": "Affiliation", + "includeInCitation": false + }, + { + "name": "Division of Biology and Biological Engineering, California Institute of Technology, Pasadena, CA, USA", + "schemaKey": "Affiliation", + "includeInCitation": false + } + ], + "includeInCitation": true + }, + { + "name": "Kalia, Suneil K.", + "email": "suneil.kalia@uhn.ca", + "roleName": [ + "dcite:Author", + "dcite:ProjectMember", + "dcite:Validation" + ], + "schemaKey": "Person", + "affiliation": [ + { + "name": "Division of Neurosurgery, Department of Surgery, University of Toronto, Toronto, Canada", + "schemaKey": "Affiliation", + "includeInCitation": false + }, + { + "name": "Krembil Brain Institute, Toronto Western Hospital, Toronto, Canada", + "schemaKey": "Affiliation", + "includeInCitation": false + } + ], + "includeInCitation": true + }, + { + "name": "Valiante, Taufik A.", + "email": "Taufik.Valiante@uhn.ca", + "roleName": [ + "dcite:Author", + "dcite:ProjectMember", + "dcite:Validation" + ], + "schemaKey": "Person", + "affiliation": [ + { + "name": "Krembil Brain Institute, Toronto Western Hospital, Toronto, Canada", + "schemaKey": "Affiliation", + "includeInCitation": false + }, + { + "name": "Division of Neurosurgery, Department of Surgery, University of Toronto, Toronto, Canada", + "schemaKey": "Affiliation", + "includeInCitation": false + } + ], + "includeInCitation": true + }, + { + "name": "Mamelak, Adam N.", + "email": "Adam.Mamelak@cshs.org", + "roleName": [ + "dcite:Author", + "dcite:ProjectMember", + "dcite:Validation" + ], + "schemaKey": "Person", + "affiliation": [ + { + "name": "Department of Neurosurgery, Cedars-Sinai Medical Center, Los Angeles, CA, USA", + "schemaKey": "Affiliation", + "includeInCitation": false + } + ], + "includeInCitation": true + }, + { + "name": "Rutishauser, Ueli", + "email": "Ueli.Rutishauser@cshs.org", + "roleName": [ + "dcite:Author", + "dcite:Conceptualization", + "dcite:FundingAcquisition", + "dcite:ProjectMember", + "dcite:Resources", + "dcite:Software", + "dcite:Supervision", + "dcite:Validation" + ], + "schemaKey": "Person", + "identifier": "0000-0002-9207-7069", + "affiliation": [ + { + "name": "Department of Neurosurgery, Cedars-Sinai Medical Center, Los Angeles, CA, USA", + "schemaKey": "Affiliation", + "includeInCitation": false + }, + { + "name": "Department of Neurology, Cedars-Sinai Medical Center, Los Angeles, CA, USA", + "schemaKey": "Affiliation", + "includeInCitation": false + }, + { + "name": "Division of Biology and Biological Engineering, California Institute of Technology, Pasadena, CA, USA", + "schemaKey": "Affiliation", + "includeInCitation": false + }, + { + "name": "Computational and Neural Systems Program, California Institute of Technology, Pasadena, CA, USA", + "schemaKey": "Affiliation", + "includeInCitation": false + }, + { + "name": "Center for Neural Science and Medicine, Department of Biomedical Science, Cedars-Sinai Medical Center, Los Angeles, CA, USA", + "schemaKey": "Affiliation", + "includeInCitation": false + } + ], + "includeInCitation": true + }, + { + "name": "National Institute of Neurological Disorders and Stroke", + "roleName": [ + "dcite:Sponsor" + ], + "schemaKey": "Organization", + "awardNumber": "U01NS103792", + "contactPoint": [], + "includeInCitation": false + }, + { + "name": "National Science Foundation", + "roleName": [ + "dcite:Sponsor" + ], + "schemaKey": "Organization", + "awardNumber": "1554105", + "contactPoint": [], + "includeInCitation": false + }, + { + "name": "National Institute of Mental Health", + "roleName": [ + "dcite:Sponsor" + ], + "schemaKey": "Organization", + "awardNumber": "R01MH110831", + "contactPoint": [], + "includeInCitation": false + }, + { + "name": "McKnight Endowment for Neuroscience", + "roleName": [ + "dcite:Sponsor" + ], + "schemaKey": "Organization", + "contactPoint": [], + "includeInCitation": false + }, + { + "name": "NARSAD Young Investigator grant from the Brain & Behavior Research Foundation", + "roleName": [ + "dcite:Sponsor" + ], + "schemaKey": "Organization", + "contactPoint": [], + "includeInCitation": false + }, + { + "name": "Kavli Foundation", + "roleName": [ + "dcite:Sponsor" + ], + "schemaKey": "Organization", + "contactPoint": [], + "includeInCitation": false + }, + { + "name": "BRAIN initiative", + "roleName": [ + "dcite:Sponsor" + ], + "schemaKey": "Organization", + "awardNumber": "U19NS104590", + "contactPoint": [], + "includeInCitation": false + } + ], + "description": "A challenge for data sharing in systems neuroscience is the multitude of different data formats used. Neurodata Without Borders: Neurophysiology 2.0 (NWB:N) has emerged as a standardized data format for the storage of cellular-level data together with meta-data, stimulus information, and behavior. A key next step to facilitate NWB:N adoption is to provide easy to use processing pipelines to import/export data from/to NWB:N. Here, we present a NWB-formatted dataset of 1863 single neurons recorded from the medial temporal lobes of 59 human subjects undergoing intracranial monitoring while they performed a recognition memory task. We provide code to analyze and export/import stimuli, behavior, and electrophysiological recordings to/from NWB in both MATLAB and Python. The data files are NWB:N compliant, which affords interoperability between programming languages and operating systems. This combined data and code release is a case study for how to utilize NWB:N for human single-neuron recordings and enables easy reuse of this hard-to-obtain data for both teaching and research on the mechanisms of human memory.", + "studyTarget": [], + "assetsSummary": { + "species": [ + { + "name": "Human", + "schemaKey": "SpeciesType", + "identifier": "http://purl.obolibrary.org/obo/NCBITaxon_9606" + } + ], + "approach": [ + { + "name": "electrophysiological approach", + "schemaKey": "ApproachType" + } + ], + "dataStandard": [ + { + "name": "Neurodata Without Borders (NWB)", + "schemaKey": "StandardsType", + "identifier": "RRID:SCR_015242" + } + ], + "numberOfBytes": 6197474020, + "numberOfFiles": 87, + "numberOfSubjects": 59, + "variableMeasured": [ + "Units", + "ElectrodeGroup" + ], + "measurementTechnique": [ + { + "name": "spike sorting technique", + "schemaKey": "MeasurementTechniqueType" + }, + { + "name": "surgical technique", + "schemaKey": "MeasurementTechniqueType" + } + ] + }, + "schemaVersion": "0.4.4", + "ethicsApproval": [], + "wasGeneratedBy": [], + "relatedResource": [ + { + "url": "https://osf.io/hv7ja/", + "name": "A NWB-based Dataset and Processing Pipeline of Human Single-Neuron Activity During a Declarative Memory Task", + "relation": "dcite:IsDerivedFrom", + "identifier": "DOI:10.17605/OSF.IO/HV7JA", + "repository": "Open Science Framework" + }, + { + "url": "https://www.nature.com/articles/s41597-020-0415-9", + "relation": "dcite:IsDescribedBy", + "identifier": "DOI:10.1038/s41597-020-0415-9" + } + ], + "manifestLocation": [ + "https://api.dandiarchive.org/api/dandisets/000004/versions/draft/assets/" + ] +} diff --git a/dandischema/tests/data/metadata/DANDI-ADHOC/meta_000008.json b/dandischema/tests/data/metadata/DANDI-ADHOC/meta_000008.json new file mode 100644 index 00000000..75715701 --- /dev/null +++ b/dandischema/tests/data/metadata/DANDI-ADHOC/meta_000008.json @@ -0,0 +1,216 @@ +{ + "id": "DANDI-ADHOC:000008/draft", + "schemaKey": "Dandiset", + "schemaVersion": "0.4.0", + "name": "Phenotypic variation within and across transcriptomic cell types in mouse motor cortex", + "description": "Data from the Tolias Lab shared in the BICCN project", + "contributor": [ + { + "schemaKey": "Person", + "name": "Scala, Federico", + "email": "fscala@example.com", + "roleName": [ + "dcite:DataCollector", + "dcite:Author", + "dcite:ContactPerson" + ], + "includeInCitation": true + }, + { + "schemaKey": "Person", + "identifier": "0000-0002-5639-7209", + "name": "Kobak, Dmitry", + "roleName": [ + "dcite:Author" + ], + "includeInCitation": true + }, + { + "schemaKey": "Person", + "identifier": "0000-0003-4458-117X", + "name": "Bernabucci, Matteo", + "roleName": [ + "dcite:Author" + ], + "includeInCitation": true + }, + { + "schemaKey": "Person", + "name": "Bernaerts, Yves", + "roleName": [ + "dcite:Author" + ], + "includeInCitation": true + }, + { + "schemaKey": "Person", + "identifier": "0000-0003-1963-8285", + "name": "Cadwell, Cathryn Rene", + "roleName": [ + "dcite:Author" + ], + "includeInCitation": true + }, + { + "schemaKey": "Person", + "name": "Castro, Jesus Ramon", + "roleName": [ + "dcite:Author" + ], + "includeInCitation": true + }, + { + "schemaKey": "Person", + "identifier": "0000-0002-4922-8781", + "name": "Hartmanis, Leonard", + "roleName": [ + "dcite:Author" + ], + "includeInCitation": true + }, + { + "schemaKey": "Person", + "identifier": "0000-0001-8066-1383", + "name": "Jiang, Xiaolong", + "roleName": [ + "dcite:Author" + ], + "includeInCitation": true + }, + { + "schemaKey": "Person", + "identifier": "0000-0001-9532-788X", + "name": "Laturnus, Sophie", + "roleName": [ + "dcite:Author" + ], + "includeInCitation": true + }, + { + "schemaKey": "Person", + "name": "Miranda, Elanine", + "roleName": [ + "dcite:Author" + ], + "includeInCitation": true + }, + { + "schemaKey": "Person", + "identifier": "0000-0001-8736-527X", + "name": "Mulherkar, Shalaka", + "roleName": [ + "dcite:Author" + ], + "includeInCitation": true + }, + { + "schemaKey": "Person", + "name": "Tan, Zheng Huan", + "roleName": [ + "dcite:Author" + ], + "includeInCitation": true + }, + { + "schemaKey": "Person", + "identifier": "0000-0002-9361-5607", + "name": "Yao, Zizhen", + "roleName": [ + "dcite:Author" + ], + "includeInCitation": true + }, + { + "schemaKey": "Person", + "identifier": "0000-0002-0326-5878", + "name": "Zeng, Hongkui", + "roleName": [ + "dcite:Author" + ], + "includeInCitation": true + }, + { + "schemaKey": "Person", + "identifier": "0000-0001-6473-1740", + "name": "Sandberg, Rickard", + "roleName": [ + "dcite:Author" + ], + "includeInCitation": true + }, + { + "schemaKey": "Person", + "identifier": "0000-0002-0199-4727", + "name": "Berens, Philipp", + "roleName": [ + "dcite:Author" + ], + "includeInCitation": true + }, + { + "schemaKey": "Person", + "identifier": "0000-0002-4305-6376", + "name": "Tolias, Andreas Savas", + "email": "atolias@example.com", + "roleName": [ + "dcite:Author", + "dcite:ContactPerson" + ], + "includeInCitation": true + } + ], + "license": [ + "spdx:CC-BY-4.0" + ], + "access": [ + { + "schemaKey": "AccessRequirements", + "status": "dandi:OpenAccess" + } + ], + "url": "https://dandiarchive.org/dandiset/000008/draft", + "repository": "https://dandiarchive.org/", + "relatedResource": [ + { + "schemaKey": "Resource", + "identifier": "doi:10.1101/2020.02.03.929158", + "url": "https://www.biorxiv.org/content/10.1101/2020.02.03.929158v1.full", + "relation": "dcite:IsDescribedBy" + } + ], + "identifier": "DANDI-ADHOC:000008", + "citation": "Scala, Federico; Kobak, Dmitry; Bernabucci, Matteo; Bernaerts, Yves; Cadwell, Cathryn Rene; Castro, Jesus Ramon; Hartmanis, Leonard; Jiang, Xiaolong; Laturnus, Sophie; Miranda, Elanine; Mulherkar, Shalaka; Tan, Zheng Huan; Yao, Zizhen; Zeng, Hongkui; Sandberg, Rickard; Berens, Philipp; Tolias, Andreas Savas (2021) Phenotypic variation within and across transcriptomic cell types in mouse motor cortex. Online: https://dandiarchive.org/000008/draft", + "assetsSummary": { + "schemaKey": "AssetsSummary", + "numberOfBytes": 10, + "numberOfFiles": 1, + "dataStandard": [ + { + "schemaKey": "StandardsType", + "name": "NWB" + } + ], + "approach": [ + { + "schemaKey": "ApproachType", + "name": "electrophysiology" + } + ], + "measurementTechnique": [ + { + "schemaKey": "MeasurementTechniqueType", + "name": "two-photon microscopy technique" + } + ], + "species": [ + { + "schemaKey": "SpeciesType", + "name": "Human" + } + ] + }, + "manifestLocation": [ + "https://api.dandiarchive.org/api/dandisets/000008/versions/draft/assets/" + ], + "version": "draft" +} diff --git a/dandischema/tests/data/metadata/meta_000004.json b/dandischema/tests/data/metadata/DANDI/meta_000004.json similarity index 100% rename from dandischema/tests/data/metadata/meta_000004.json rename to dandischema/tests/data/metadata/DANDI/meta_000004.json diff --git a/dandischema/tests/data/metadata/meta_000004old.json b/dandischema/tests/data/metadata/DANDI/meta_000004old.json similarity index 100% rename from dandischema/tests/data/metadata/meta_000004old.json rename to dandischema/tests/data/metadata/DANDI/meta_000004old.json diff --git a/dandischema/tests/data/metadata/meta_000008.json b/dandischema/tests/data/metadata/DANDI/meta_000008.json similarity index 100% rename from dandischema/tests/data/metadata/meta_000008.json rename to dandischema/tests/data/metadata/DANDI/meta_000008.json diff --git a/dandischema/tests/data/metadata/EMBER-DANDI/meta_000004.json b/dandischema/tests/data/metadata/EMBER-DANDI/meta_000004.json new file mode 100644 index 00000000..7691e2ef --- /dev/null +++ b/dandischema/tests/data/metadata/EMBER-DANDI/meta_000004.json @@ -0,0 +1,429 @@ +{ + "id": "EMBER-DANDI:000004/draft", + "schemaKey": "Dandiset", + "schemaVersion": "0.4.4", + "name": "A NWB-based dataset and processing pipeline of human single-neuron activity during a declarative memory task", + "description": "A challenge for data sharing in systems neuroscience is the multitude of different data formats used. Neurodata Without Borders: Neurophysiology 2.0 (NWB:N) has emerged as a standardized data format for the storage of cellular-level data together with meta-data, stimulus information, and behavior. A key next step to facilitate NWB:N adoption is to provide easy to use processing pipelines to import/export data from/to NWB:N. Here, we present a NWB-formatted dataset of 1863 single neurons recorded from the medial temporal lobes of 59 human subjects undergoing intracranial monitoring while they performed a recognition memory task. We provide code to analyze and export/import stimuli, behavior, and electrophysiological recordings to/from NWB in both MATLAB and Python. The data files are NWB:N compliant, which affords interoperability between programming languages and operating systems. This combined data and code release is a case study for how to utilize NWB:N for human single-neuron recordings and enables easy reuse of this hard-to-obtain data for both teaching and research on the mechanisms of human memory.", + "contributor": [ + { + "schemaKey": "Person", + "identifier": "0000-0003-0161-4007", + "name": "Chandravadia, Nand", + "email": "nandc10@gmail.com", + "roleName": [ + "dcite:Author", + "dcite:ContactPerson", + "dcite:DataCurator", + "dcite:DataManager", + "dcite:FormalAnalysis", + "dcite:Investigation", + "dcite:Maintainer", + "dcite:Methodology", + "dcite:ProjectLeader", + "dcite:ProjectManager", + "dcite:ProjectMember", + "dcite:Researcher", + "dcite:Software", + "dcite:Validation", + "dcite:Visualization" + ], + "includeInCitation": true, + "affiliation": [ + { + "schemaKey": "Affiliation", + "name": "Department of Neurosurgery, Cedars-Sinai Medical Center, Los Angeles, CA, USA" + } + ] + }, + { + "schemaKey": "Person", + "name": "Liang, Dehua", + "email": "liang134@mail.chapman.edu", + "roleName": [ + "dcite:Author", + "dcite:Methodology", + "dcite:ProjectMember", + "dcite:Software", + "dcite:Validation" + ], + "includeInCitation": true, + "affiliation": [ + { + "schemaKey": "Affiliation", + "name": "Institute for Interdisciplinary Brain and Behavioral Sciences, Crean College of Health and Behavioral Sciences, Schmid College of Science and Technology, Chapman University, Orange, CA, USA" + } + ] + }, + { + "schemaKey": "Person", + "identifier": "0000-0002-4319-7689", + "name": "Schjetnan, Andrea Gomez Palacio", + "email": "Andrea.Schjetan@uhnresearch.ca", + "roleName": [ + "dcite:Author", + "dcite:DataCollector", + "dcite:ProjectMember", + "dcite:Validation" + ], + "includeInCitation": true, + "affiliation": [ + { + "schemaKey": "Affiliation", + "name": "Krembil Brain Institute, Toronto Western Hospital, Toronto, Canada" + } + ] + }, + { + "schemaKey": "Person", + "identifier": "0000-0002-9207-7069", + "name": "Carlson, April", + "email": "april.carlson@tufts.edu", + "roleName": [ + "dcite:Author", + "dcite:DataCurator", + "dcite:ProjectMember", + "dcite:Validation" + ], + "includeInCitation": true, + "affiliation": [ + { + "schemaKey": "Affiliation", + "name": "Department of Neurosurgery, Cedars-Sinai Medical Center, Los Angeles, CA, USA" + } + ] + }, + { + "schemaKey": "Person", + "name": "Faraut, Mailys", + "email": "mailyscm.faraut@gmail.com", + "roleName": [ + "dcite:Author", + "dcite:DataCollector", + "dcite:ProjectMember", + "dcite:Validation" + ], + "includeInCitation": true, + "affiliation": [ + { + "schemaKey": "Affiliation", + "name": "Department of Neurosurgery, Cedars-Sinai Medical Center, Los Angeles, CA, USA" + } + ] + }, + { + "schemaKey": "Person", + "name": "Chung, Jeffrey M.", + "email": "Jeffrey.Chung@cshs.org", + "roleName": [ + "dcite:Author", + "dcite:ProjectMember", + "dcite:Validation" + ], + "includeInCitation": true, + "affiliation": [ + { + "schemaKey": "Affiliation", + "name": "Department of Neurology, Cedars-Sinai Medical Center, Los Angeles, CA, USA" + } + ] + }, + { + "schemaKey": "Person", + "name": "Reed, Chrystal M.", + "email": "Chrystal.Reed@csmc.edu", + "roleName": [ + "dcite:Author", + "dcite:ProjectMember", + "dcite:Validation" + ], + "includeInCitation": true, + "affiliation": [ + { + "schemaKey": "Affiliation", + "name": "Department of Neurology, Cedars-Sinai Medical Center, Los Angeles, CA, USA" + } + ] + }, + { + "schemaKey": "Person", + "name": "Dichter, Ben", + "email": "ben.dichter@gmail.com", + "roleName": [ + "dcite:Author", + "dcite:Software", + "dcite:ProjectMember", + "dcite:Validation" + ], + "includeInCitation": true, + "affiliation": [ + { + "schemaKey": "Affiliation", + "name": "Biological Systems & Engineering Division, Lawrence Berkeley National Laboratory, Berkeley, CA, USA" + }, + { + "schemaKey": "Affiliation", + "name": "Department of Neurosurgery, Stanford University, Stanford, CA, USA" + } + ] + }, + { + "schemaKey": "Person", + "name": "Maoz, Uri", + "email": "maoz.uri@gmail.com", + "roleName": [ + "dcite:Author", + "dcite:Conceptualization", + "dcite:ProjectMember", + "dcite:Validation" + ], + "includeInCitation": true, + "affiliation": [ + { + "schemaKey": "Affiliation", + "name": "Institute for Interdisciplinary Brain and Behavioral Sciences, Crean College of Health and Behavioral Sciences, Schmid College of Science and Technology, Chapman University, Orange, CA, USA" + }, + { + "schemaKey": "Affiliation", + "name": "Division of Biology and Biological Engineering, California Institute of Technology, Pasadena, CA, USA" + } + ] + }, + { + "schemaKey": "Person", + "name": "Kalia, Suneil K.", + "email": "suneil.kalia@uhn.ca", + "roleName": [ + "dcite:Author", + "dcite:ProjectMember", + "dcite:Validation" + ], + "includeInCitation": true, + "affiliation": [ + { + "schemaKey": "Affiliation", + "name": "Division of Neurosurgery, Department of Surgery, University of Toronto, Toronto, Canada" + }, + { + "schemaKey": "Affiliation", + "name": "Krembil Brain Institute, Toronto Western Hospital, Toronto, Canada" + } + ] + }, + { + "schemaKey": "Person", + "name": "Valiante, Taufik A.", + "email": "Taufik.Valiante@uhn.ca", + "roleName": [ + "dcite:Author", + "dcite:ProjectMember", + "dcite:Validation" + ], + "includeInCitation": true, + "affiliation": [ + { + "schemaKey": "Affiliation", + "name": "Krembil Brain Institute, Toronto Western Hospital, Toronto, Canada" + }, + { + "schemaKey": "Affiliation", + "name": "Division of Neurosurgery, Department of Surgery, University of Toronto, Toronto, Canada" + } + ] + }, + { + "schemaKey": "Person", + "name": "Mamelak, Adam N.", + "email": "Adam.Mamelak@cshs.org", + "roleName": [ + "dcite:Author", + "dcite:ProjectMember", + "dcite:Validation" + ], + "includeInCitation": true, + "affiliation": [ + { + "schemaKey": "Affiliation", + "name": "Department of Neurosurgery, Cedars-Sinai Medical Center, Los Angeles, CA, USA" + } + ] + }, + { + "schemaKey": "Person", + "identifier": "0000-0002-9207-7069", + "name": "Rutishauser, Ueli", + "email": "Ueli.Rutishauser@cshs.org", + "roleName": [ + "dcite:Author", + "dcite:Conceptualization", + "dcite:FundingAcquisition", + "dcite:ProjectMember", + "dcite:Resources", + "dcite:Software", + "dcite:Supervision", + "dcite:Validation" + ], + "includeInCitation": true, + "affiliation": [ + { + "schemaKey": "Affiliation", + "name": "Department of Neurosurgery, Cedars-Sinai Medical Center, Los Angeles, CA, USA" + }, + { + "schemaKey": "Affiliation", + "name": "Department of Neurology, Cedars-Sinai Medical Center, Los Angeles, CA, USA" + }, + { + "schemaKey": "Affiliation", + "name": "Division of Biology and Biological Engineering, California Institute of Technology, Pasadena, CA, USA" + }, + { + "schemaKey": "Affiliation", + "name": "Computational and Neural Systems Program, California Institute of Technology, Pasadena, CA, USA" + }, + { + "schemaKey": "Affiliation", + "name": "Center for Neural Science and Medicine, Department of Biomedical Science, Cedars-Sinai Medical Center, Los Angeles, CA, USA" + } + ] + }, + { + "schemaKey": "Organization", + "name": "Stroke, National Institute of Neurological Disorders and", + "roleName": [ + "dcite:Sponsor" + ], + "includeInCitation": false, + "awardNumber": "U01NS103792" + }, + { + "schemaKey": "Organization", + "name": "Foundation, National Science", + "roleName": [ + "dcite:Sponsor" + ], + "includeInCitation": false, + "awardNumber": "1554105" + }, + { + "schemaKey": "Organization", + "name": "Health, National Institute of Mental", + "roleName": [ + "dcite:Sponsor" + ], + "includeInCitation": false, + "awardNumber": "R01MH110831" + }, + { + "schemaKey": "Organization", + "name": "Neuroscience, McKnight Endowment for", + "roleName": [ + "dcite:Sponsor" + ], + "includeInCitation": false + }, + { + "schemaKey": "Organization", + "name": "Foundation, NARSAD Young Investigator grant from the Brain & Behavior Research", + "roleName": [ + "dcite:Sponsor" + ], + "includeInCitation": false + }, + { + "schemaKey": "Organization", + "name": "Foundation, Kavli", + "roleName": [ + "dcite:Sponsor" + ], + "includeInCitation": false + }, + { + "schemaKey": "Organization", + "name": "initiative, BRAIN", + "roleName": [ + "dcite:Sponsor" + ], + "includeInCitation": false, + "awardNumber": "U19NS104590" + } + ], + "about": [ + { + "schemaKey": "GenericType", + "name": "Medial Temporal Lobe" + } + ], + "license": [ + "spdx:CC-BY-4.0" + ], + "keywords": [ + "cognitive neuroscience", + "data standardization", + "decision making", + "declarative memory", + "neurophysiology", + "neurosurgery", + "NWB", + "open source", + "single-neurons" + ], + "access": [ + { + "schemaKey": "AccessRequirements", + "status": "dandi:OpenAccess" + } + ], + "url": "https://dandiarchive.org/dandiset/000004/draft", + "repository": "https://dandiarchive.org/", + "relatedResource": [ + { + "schemaKey": "Resource", + "identifier": "DOI:10.17605/OSF.IO/HV7JA", + "name": "A NWB-based Dataset and Processing Pipeline of Human Single-Neuron Activity During a Declarative Memory Task", + "url": "https://osf.io/hv7ja/", + "repository": "Open Science Framework", + "relation": "dcite:IsDerivedFrom" + }, + { + "schemaKey": "Resource", + "identifier": "DOI:10.1038/s41597-020-0415-9", + "url": "https://www.nature.com/articles/s41597-020-0415-9", + "relation": "dcite:IsDescribedBy" + } + ], + "identifier": "EMBER-DANDI:000004", + "citation": "Chandravadia, Nand; Liang, Dehua; Schjetnan, Andrea Gomez Palacio; Carlson, April; Faraut, Mailys; Chung, Jeffrey M.; Reed, Chrystal M.; Dichter, Ben; Maoz, Uri; Kalia, Suneil K.; Valiante, Taufik A.; Mamelak, Adam N.; Rutishauser, Ueli (2021) A NWB-based dataset and processing pipeline of human single-neuron activity during a declarative memory task. Online: https://dandiarchive.org/000004/draft", + "assetsSummary": { + "schemaKey": "AssetsSummary", + "numberOfBytes": 10, + "numberOfFiles": 1, + "dataStandard": [ + { + "schemaKey": "StandardsType", + "name": "NWB" + } + ], + "approach": [ + { + "schemaKey": "ApproachType", + "name": "electrophysiology" + } + ], + "measurementTechnique": [ + { + "schemaKey": "MeasurementTechniqueType", + "name": "two-photon microscopy technique" + } + ], + "species": [ + { + "schemaKey": "SpeciesType", + "name": "Human" + } + ] + }, + "manifestLocation": [ + "https://api.dandiarchive.org/api/dandisets/000004/versions/draft/assets/" + ], + "version": "draft" +} diff --git a/dandischema/tests/data/metadata/EMBER-DANDI/meta_000004old.json b/dandischema/tests/data/metadata/EMBER-DANDI/meta_000004old.json new file mode 100644 index 00000000..302ee8ef --- /dev/null +++ b/dandischema/tests/data/metadata/EMBER-DANDI/meta_000004old.json @@ -0,0 +1,474 @@ +{ + "id": "EMBER-DANDI:000004/draft", + "url": "https://dandiarchive.org/dandiset/000004/draft", + "name": "A NWB-based dataset and processing pipeline of human single-neuron activity during a declarative memory task", + "about": [ + { + "name": "Right Temporal Lobe", + "identifier": "http://purl.obolibrary.org/obo/UBERON_0002809" + }, + { + "name": "Medial Temporal Lobe" + } + ], + "access": [ + { + "status": "dandi:OpenAccess", + "contactPoint": { + "schemaKey": "ContactPoint" + } + } + ], + "license": [ + "spdx:CC-BY-4.0" + ], + "version": "draft", + "@context": "https://raw.githubusercontent.com/dandi/schema/master/releases/0.4.4/context.json", + "citation": "Chandravadia, Nand; Liang, Dehua; Schjetnan, Andrea Gomez Palacio; Carlson, April; Faraut, Mailys; Chung, Jeffrey M.; Reed, Chrystal M.; Dichter, Ben; Maoz, Uri; Kalia, Suneil K.; Valiante, Taufik A.; Mamelak, Adam N.; Rutishauser, Ueli (2021) A NWB-based dataset and processing pipeline of human single-neuron activity during a declarative memory task (Version draft) [Data set]. DANDI archive. https://dandiarchive.org/dandiset/000004/draft", + "keywords": [ + "cognitive neuroscience", + "data standardization", + "decision making", + "declarative memory", + "neurophysiology", + "neurosurgery", + "NWB", + "open source", + "single-neurons" + ], + "protocol": [], + "identifier": "EMBER-DANDI:000004", + "repository": "https://dandiarchive.org/", + "contributor": [ + { + "name": "Chandravadia, Nand", + "email": "nandc10@gmail.com", + "roleName": [ + "dcite:Author", + "dcite:ContactPerson", + "dcite:DataCurator", + "dcite:DataManager", + "dcite:FormalAnalysis", + "dcite:Investigation", + "dcite:Maintainer", + "dcite:Methodology", + "dcite:ProjectLeader", + "dcite:ProjectManager", + "dcite:ProjectMember", + "dcite:Researcher", + "dcite:Software", + "dcite:Validation", + "dcite:Visualization" + ], + "schemaKey": "Person", + "identifier": "0000-0003-0161-4007", + "affiliation": [ + { + "name": "Department of Neurosurgery, Cedars-Sinai Medical Center, Los Angeles, CA, USA", + "schemaKey": "Affiliation", + "includeInCitation": false + } + ], + "includeInCitation": true + }, + { + "name": "Liang, Dehua", + "email": "liang134@mail.chapman.edu", + "roleName": [ + "dcite:Author", + "dcite:Methodology", + "dcite:ProjectMember", + "dcite:Software", + "dcite:Validation" + ], + "schemaKey": "Person", + "affiliation": [ + { + "name": "Institute for Interdisciplinary Brain and Behavioral Sciences, Crean College of Health and Behavioral Sciences, Schmid College of Science and Technology, Chapman University, Orange, CA, USA", + "schemaKey": "Affiliation", + "includeInCitation": false + } + ], + "includeInCitation": true + }, + { + "name": "Schjetnan, Andrea Gomez Palacio", + "email": "Andrea.Schjetan@uhnresearch.ca", + "roleName": [ + "dcite:Author", + "dcite:DataCollector", + "dcite:ProjectMember", + "dcite:Validation" + ], + "schemaKey": "Person", + "identifier": "0000-0002-4319-7689", + "affiliation": [ + { + "name": "Krembil Brain Institute, Toronto Western Hospital, Toronto, Canada", + "schemaKey": "Affiliation", + "includeInCitation": false + } + ], + "includeInCitation": true + }, + { + "name": "Carlson, April", + "email": "april.carlson@tufts.edu", + "roleName": [ + "dcite:Author", + "dcite:DataCurator", + "dcite:ProjectMember", + "dcite:Validation" + ], + "schemaKey": "Person", + "identifier": "0000-0002-9207-7069", + "affiliation": [ + { + "name": "Department of Neurosurgery, Cedars-Sinai Medical Center, Los Angeles, CA, USA", + "schemaKey": "Affiliation", + "includeInCitation": false + } + ], + "includeInCitation": true + }, + { + "name": "Faraut, Mailys", + "email": "mailyscm.faraut@gmail.com", + "roleName": [ + "dcite:Author", + "dcite:DataCollector", + "dcite:ProjectMember", + "dcite:Validation" + ], + "schemaKey": "Person", + "affiliation": [ + { + "name": "Department of Neurosurgery, Cedars-Sinai Medical Center, Los Angeles, CA, USA", + "schemaKey": "Affiliation", + "includeInCitation": false + } + ], + "includeInCitation": true + }, + { + "name": "Chung, Jeffrey M.", + "email": "Jeffrey.Chung@cshs.org", + "roleName": [ + "dcite:Author", + "dcite:ProjectMember", + "dcite:Validation" + ], + "schemaKey": "Person", + "affiliation": [ + { + "name": "Department of Neurology, Cedars-Sinai Medical Center, Los Angeles, CA, USA", + "schemaKey": "Affiliation", + "includeInCitation": false + } + ], + "includeInCitation": true + }, + { + "name": "Reed, Chrystal M.", + "email": "Chrystal.Reed@csmc.edu", + "roleName": [ + "dcite:Author", + "dcite:ProjectMember", + "dcite:Validation" + ], + "schemaKey": "Person", + "affiliation": [ + { + "name": "Department of Neurology, Cedars-Sinai Medical Center, Los Angeles, CA, USA", + "schemaKey": "Affiliation", + "includeInCitation": false + } + ], + "includeInCitation": true + }, + { + "name": "Dichter, Ben", + "email": "ben.dichter@gmail.com", + "roleName": [ + "dcite:Author", + "dcite:Software", + "dcite:ProjectMember", + "dcite:Validation" + ], + "schemaKey": "Person", + "affiliation": [ + { + "name": "Biological Systems & Engineering Division, Lawrence Berkeley National Laboratory, Berkeley, CA, USA", + "schemaKey": "Affiliation", + "includeInCitation": false + }, + { + "name": "Department of Neurosurgery, Stanford University, Stanford, CA, USA", + "schemaKey": "Affiliation", + "includeInCitation": false + } + ], + "includeInCitation": true + }, + { + "name": "Maoz, Uri", + "email": "maoz.uri@gmail.com", + "roleName": [ + "dcite:Author", + "dcite:Conceptualization", + "dcite:ProjectMember", + "dcite:Validation" + ], + "schemaKey": "Person", + "affiliation": [ + { + "name": "Institute for Interdisciplinary Brain and Behavioral Sciences, Crean College of Health and Behavioral Sciences, Schmid College of Science and Technology, Chapman University, Orange, CA, USA", + "schemaKey": "Affiliation", + "includeInCitation": false + }, + { + "name": "Division of Biology and Biological Engineering, California Institute of Technology, Pasadena, CA, USA", + "schemaKey": "Affiliation", + "includeInCitation": false + } + ], + "includeInCitation": true + }, + { + "name": "Kalia, Suneil K.", + "email": "suneil.kalia@uhn.ca", + "roleName": [ + "dcite:Author", + "dcite:ProjectMember", + "dcite:Validation" + ], + "schemaKey": "Person", + "affiliation": [ + { + "name": "Division of Neurosurgery, Department of Surgery, University of Toronto, Toronto, Canada", + "schemaKey": "Affiliation", + "includeInCitation": false + }, + { + "name": "Krembil Brain Institute, Toronto Western Hospital, Toronto, Canada", + "schemaKey": "Affiliation", + "includeInCitation": false + } + ], + "includeInCitation": true + }, + { + "name": "Valiante, Taufik A.", + "email": "Taufik.Valiante@uhn.ca", + "roleName": [ + "dcite:Author", + "dcite:ProjectMember", + "dcite:Validation" + ], + "schemaKey": "Person", + "affiliation": [ + { + "name": "Krembil Brain Institute, Toronto Western Hospital, Toronto, Canada", + "schemaKey": "Affiliation", + "includeInCitation": false + }, + { + "name": "Division of Neurosurgery, Department of Surgery, University of Toronto, Toronto, Canada", + "schemaKey": "Affiliation", + "includeInCitation": false + } + ], + "includeInCitation": true + }, + { + "name": "Mamelak, Adam N.", + "email": "Adam.Mamelak@cshs.org", + "roleName": [ + "dcite:Author", + "dcite:ProjectMember", + "dcite:Validation" + ], + "schemaKey": "Person", + "affiliation": [ + { + "name": "Department of Neurosurgery, Cedars-Sinai Medical Center, Los Angeles, CA, USA", + "schemaKey": "Affiliation", + "includeInCitation": false + } + ], + "includeInCitation": true + }, + { + "name": "Rutishauser, Ueli", + "email": "Ueli.Rutishauser@cshs.org", + "roleName": [ + "dcite:Author", + "dcite:Conceptualization", + "dcite:FundingAcquisition", + "dcite:ProjectMember", + "dcite:Resources", + "dcite:Software", + "dcite:Supervision", + "dcite:Validation" + ], + "schemaKey": "Person", + "identifier": "0000-0002-9207-7069", + "affiliation": [ + { + "name": "Department of Neurosurgery, Cedars-Sinai Medical Center, Los Angeles, CA, USA", + "schemaKey": "Affiliation", + "includeInCitation": false + }, + { + "name": "Department of Neurology, Cedars-Sinai Medical Center, Los Angeles, CA, USA", + "schemaKey": "Affiliation", + "includeInCitation": false + }, + { + "name": "Division of Biology and Biological Engineering, California Institute of Technology, Pasadena, CA, USA", + "schemaKey": "Affiliation", + "includeInCitation": false + }, + { + "name": "Computational and Neural Systems Program, California Institute of Technology, Pasadena, CA, USA", + "schemaKey": "Affiliation", + "includeInCitation": false + }, + { + "name": "Center for Neural Science and Medicine, Department of Biomedical Science, Cedars-Sinai Medical Center, Los Angeles, CA, USA", + "schemaKey": "Affiliation", + "includeInCitation": false + } + ], + "includeInCitation": true + }, + { + "name": "National Institute of Neurological Disorders and Stroke", + "roleName": [ + "dcite:Sponsor" + ], + "schemaKey": "Organization", + "awardNumber": "U01NS103792", + "contactPoint": [], + "includeInCitation": false + }, + { + "name": "National Science Foundation", + "roleName": [ + "dcite:Sponsor" + ], + "schemaKey": "Organization", + "awardNumber": "1554105", + "contactPoint": [], + "includeInCitation": false + }, + { + "name": "National Institute of Mental Health", + "roleName": [ + "dcite:Sponsor" + ], + "schemaKey": "Organization", + "awardNumber": "R01MH110831", + "contactPoint": [], + "includeInCitation": false + }, + { + "name": "McKnight Endowment for Neuroscience", + "roleName": [ + "dcite:Sponsor" + ], + "schemaKey": "Organization", + "contactPoint": [], + "includeInCitation": false + }, + { + "name": "NARSAD Young Investigator grant from the Brain & Behavior Research Foundation", + "roleName": [ + "dcite:Sponsor" + ], + "schemaKey": "Organization", + "contactPoint": [], + "includeInCitation": false + }, + { + "name": "Kavli Foundation", + "roleName": [ + "dcite:Sponsor" + ], + "schemaKey": "Organization", + "contactPoint": [], + "includeInCitation": false + }, + { + "name": "BRAIN initiative", + "roleName": [ + "dcite:Sponsor" + ], + "schemaKey": "Organization", + "awardNumber": "U19NS104590", + "contactPoint": [], + "includeInCitation": false + } + ], + "description": "A challenge for data sharing in systems neuroscience is the multitude of different data formats used. Neurodata Without Borders: Neurophysiology 2.0 (NWB:N) has emerged as a standardized data format for the storage of cellular-level data together with meta-data, stimulus information, and behavior. A key next step to facilitate NWB:N adoption is to provide easy to use processing pipelines to import/export data from/to NWB:N. Here, we present a NWB-formatted dataset of 1863 single neurons recorded from the medial temporal lobes of 59 human subjects undergoing intracranial monitoring while they performed a recognition memory task. We provide code to analyze and export/import stimuli, behavior, and electrophysiological recordings to/from NWB in both MATLAB and Python. The data files are NWB:N compliant, which affords interoperability between programming languages and operating systems. This combined data and code release is a case study for how to utilize NWB:N for human single-neuron recordings and enables easy reuse of this hard-to-obtain data for both teaching and research on the mechanisms of human memory.", + "studyTarget": [], + "assetsSummary": { + "species": [ + { + "name": "Human", + "schemaKey": "SpeciesType", + "identifier": "http://purl.obolibrary.org/obo/NCBITaxon_9606" + } + ], + "approach": [ + { + "name": "electrophysiological approach", + "schemaKey": "ApproachType" + } + ], + "dataStandard": [ + { + "name": "Neurodata Without Borders (NWB)", + "schemaKey": "StandardsType", + "identifier": "RRID:SCR_015242" + } + ], + "numberOfBytes": 6197474020, + "numberOfFiles": 87, + "numberOfSubjects": 59, + "variableMeasured": [ + "Units", + "ElectrodeGroup" + ], + "measurementTechnique": [ + { + "name": "spike sorting technique", + "schemaKey": "MeasurementTechniqueType" + }, + { + "name": "surgical technique", + "schemaKey": "MeasurementTechniqueType" + } + ] + }, + "schemaVersion": "0.4.4", + "ethicsApproval": [], + "wasGeneratedBy": [], + "relatedResource": [ + { + "url": "https://osf.io/hv7ja/", + "name": "A NWB-based Dataset and Processing Pipeline of Human Single-Neuron Activity During a Declarative Memory Task", + "relation": "dcite:IsDerivedFrom", + "identifier": "DOI:10.17605/OSF.IO/HV7JA", + "repository": "Open Science Framework" + }, + { + "url": "https://www.nature.com/articles/s41597-020-0415-9", + "relation": "dcite:IsDescribedBy", + "identifier": "DOI:10.1038/s41597-020-0415-9" + } + ], + "manifestLocation": [ + "https://api.dandiarchive.org/api/dandisets/000004/versions/draft/assets/" + ] +} diff --git a/dandischema/tests/data/metadata/EMBER-DANDI/meta_000008.json b/dandischema/tests/data/metadata/EMBER-DANDI/meta_000008.json new file mode 100644 index 00000000..84f49a44 --- /dev/null +++ b/dandischema/tests/data/metadata/EMBER-DANDI/meta_000008.json @@ -0,0 +1,216 @@ +{ + "id": "EMBER-DANDI:000008/draft", + "schemaKey": "Dandiset", + "schemaVersion": "0.4.0", + "name": "Phenotypic variation within and across transcriptomic cell types in mouse motor cortex", + "description": "Data from the Tolias Lab shared in the BICCN project", + "contributor": [ + { + "schemaKey": "Person", + "name": "Scala, Federico", + "email": "fscala@example.com", + "roleName": [ + "dcite:DataCollector", + "dcite:Author", + "dcite:ContactPerson" + ], + "includeInCitation": true + }, + { + "schemaKey": "Person", + "identifier": "0000-0002-5639-7209", + "name": "Kobak, Dmitry", + "roleName": [ + "dcite:Author" + ], + "includeInCitation": true + }, + { + "schemaKey": "Person", + "identifier": "0000-0003-4458-117X", + "name": "Bernabucci, Matteo", + "roleName": [ + "dcite:Author" + ], + "includeInCitation": true + }, + { + "schemaKey": "Person", + "name": "Bernaerts, Yves", + "roleName": [ + "dcite:Author" + ], + "includeInCitation": true + }, + { + "schemaKey": "Person", + "identifier": "0000-0003-1963-8285", + "name": "Cadwell, Cathryn Rene", + "roleName": [ + "dcite:Author" + ], + "includeInCitation": true + }, + { + "schemaKey": "Person", + "name": "Castro, Jesus Ramon", + "roleName": [ + "dcite:Author" + ], + "includeInCitation": true + }, + { + "schemaKey": "Person", + "identifier": "0000-0002-4922-8781", + "name": "Hartmanis, Leonard", + "roleName": [ + "dcite:Author" + ], + "includeInCitation": true + }, + { + "schemaKey": "Person", + "identifier": "0000-0001-8066-1383", + "name": "Jiang, Xiaolong", + "roleName": [ + "dcite:Author" + ], + "includeInCitation": true + }, + { + "schemaKey": "Person", + "identifier": "0000-0001-9532-788X", + "name": "Laturnus, Sophie", + "roleName": [ + "dcite:Author" + ], + "includeInCitation": true + }, + { + "schemaKey": "Person", + "name": "Miranda, Elanine", + "roleName": [ + "dcite:Author" + ], + "includeInCitation": true + }, + { + "schemaKey": "Person", + "identifier": "0000-0001-8736-527X", + "name": "Mulherkar, Shalaka", + "roleName": [ + "dcite:Author" + ], + "includeInCitation": true + }, + { + "schemaKey": "Person", + "name": "Tan, Zheng Huan", + "roleName": [ + "dcite:Author" + ], + "includeInCitation": true + }, + { + "schemaKey": "Person", + "identifier": "0000-0002-9361-5607", + "name": "Yao, Zizhen", + "roleName": [ + "dcite:Author" + ], + "includeInCitation": true + }, + { + "schemaKey": "Person", + "identifier": "0000-0002-0326-5878", + "name": "Zeng, Hongkui", + "roleName": [ + "dcite:Author" + ], + "includeInCitation": true + }, + { + "schemaKey": "Person", + "identifier": "0000-0001-6473-1740", + "name": "Sandberg, Rickard", + "roleName": [ + "dcite:Author" + ], + "includeInCitation": true + }, + { + "schemaKey": "Person", + "identifier": "0000-0002-0199-4727", + "name": "Berens, Philipp", + "roleName": [ + "dcite:Author" + ], + "includeInCitation": true + }, + { + "schemaKey": "Person", + "identifier": "0000-0002-4305-6376", + "name": "Tolias, Andreas Savas", + "email": "atolias@example.com", + "roleName": [ + "dcite:Author", + "dcite:ContactPerson" + ], + "includeInCitation": true + } + ], + "license": [ + "spdx:CC-BY-4.0" + ], + "access": [ + { + "schemaKey": "AccessRequirements", + "status": "dandi:OpenAccess" + } + ], + "url": "https://dandiarchive.org/dandiset/000008/draft", + "repository": "https://dandiarchive.org/", + "relatedResource": [ + { + "schemaKey": "Resource", + "identifier": "doi:10.1101/2020.02.03.929158", + "url": "https://www.biorxiv.org/content/10.1101/2020.02.03.929158v1.full", + "relation": "dcite:IsDescribedBy" + } + ], + "identifier": "EMBER-DANDI:000008", + "citation": "Scala, Federico; Kobak, Dmitry; Bernabucci, Matteo; Bernaerts, Yves; Cadwell, Cathryn Rene; Castro, Jesus Ramon; Hartmanis, Leonard; Jiang, Xiaolong; Laturnus, Sophie; Miranda, Elanine; Mulherkar, Shalaka; Tan, Zheng Huan; Yao, Zizhen; Zeng, Hongkui; Sandberg, Rickard; Berens, Philipp; Tolias, Andreas Savas (2021) Phenotypic variation within and across transcriptomic cell types in mouse motor cortex. Online: https://dandiarchive.org/000008/draft", + "assetsSummary": { + "schemaKey": "AssetsSummary", + "numberOfBytes": 10, + "numberOfFiles": 1, + "dataStandard": [ + { + "schemaKey": "StandardsType", + "name": "NWB" + } + ], + "approach": [ + { + "schemaKey": "ApproachType", + "name": "electrophysiology" + } + ], + "measurementTechnique": [ + { + "schemaKey": "MeasurementTechniqueType", + "name": "two-photon microscopy technique" + } + ], + "species": [ + { + "schemaKey": "SpeciesType", + "name": "Human" + } + ] + }, + "manifestLocation": [ + "https://api.dandiarchive.org/api/dandisets/000008/versions/draft/assets/" + ], + "version": "draft" +} diff --git a/dandischema/tests/test_conf.py b/dandischema/tests/test_conf.py new file mode 100644 index 00000000..433f16d1 --- /dev/null +++ b/dandischema/tests/test_conf.py @@ -0,0 +1,476 @@ +import json +import logging +from pathlib import Path +from typing import Optional, Union + +from pydantic import ValidationError +import pytest + + +def test_get_instance_config() -> None: + from dandischema.conf import _instance_config, get_instance_config + + obtained_config = get_instance_config() + + assert obtained_config == _instance_config + assert ( + obtained_config is not _instance_config + ), "`get_instance_config` should return a copy of the instance config" + + +FOO_CONFIG_DICT = { + "instance_name": "FOO", + "instance_identifier": "RRID:ABC_123456", + "instance_url": "https://dandiarchive.org/", + "doi_prefix": "10.1234", + "licenses": ["spdx:AdaCore-doc", "spdx:AGPL-3.0-or-later", "spdx:NBPL-1.0"], +} + +# Same as `FOO_CONFIG_DICT` but with the field aliases instead of the field names being +# the keys +FOO_CONFIG_DICT_WITH_ALIASES = {f"dandi_{k}": v for k, v in FOO_CONFIG_DICT.items()} + +FOO_CONFIG_ENV_VARS = { + k: v if k != "licenses" else json.dumps(v) for k, v in FOO_CONFIG_DICT.items() +} + + +class TestConfig: + @pytest.mark.parametrize( + "clear_dandischema_modules_and_set_env_vars", [{}], indirect=True + ) + @pytest.mark.usefixtures("clear_dandischema_modules_and_set_env_vars") + @pytest.mark.parametrize( + "instance_name", + ["DANDI-ADHOC", "DANDI-TEST", "DANDI", "DANDI--TEST", "DANDI-TE-ST"], + ) + def test_valid_instance_name(self, instance_name: str) -> None: + """ + Test instantiating `dandischema.conf.Config` with a valid instance name + """ + from dandischema.conf import Config + + Config(instance_name=instance_name) + + @pytest.mark.parametrize( + "clear_dandischema_modules_and_set_env_vars", [{}], indirect=True + ) + @pytest.mark.usefixtures("clear_dandischema_modules_and_set_env_vars") + @pytest.mark.parametrize("instance_name", ["-DANDI", "dandi", "DANDI0", "DANDI*"]) + def test_invalid_instance_name(self, instance_name: str) -> None: + """ + Test instantiating `dandischema.conf.Config` with an invalid instance name + """ + from dandischema.conf import Config + + with pytest.raises(ValidationError) as exc_info: + Config(instance_name=instance_name) + + assert len(exc_info.value.errors()) == 1 + assert exc_info.value.errors()[0]["loc"] == ("dandi_instance_name",) + + @pytest.mark.parametrize( + "clear_dandischema_modules_and_set_env_vars", [{}], indirect=True + ) + @pytest.mark.usefixtures("clear_dandischema_modules_and_set_env_vars") + @pytest.mark.parametrize( + "instance_identifier", [None, "RRID:ABC_123456", "RRID:SCR_1234567891234"] + ) + def test_valid_instance_identifier( + self, instance_identifier: Optional[str] + ) -> None: + """ + Test instantiating `dandischema.conf.Config` with a valid instance identifier + """ + from dandischema.conf import Config + + Config(instance_identifier=instance_identifier) + + @pytest.mark.parametrize( + "clear_dandischema_modules_and_set_env_vars", [{}], indirect=True + ) + @pytest.mark.usefixtures("clear_dandischema_modules_and_set_env_vars") + @pytest.mark.parametrize("instance_identifier", ["", "RRID:AB C", "ID:ABC_123456"]) + def test_invalid_instance_identifier(self, instance_identifier: str) -> None: + """ + Test instantiating `dandischema.conf.Config` with an invalid instance identifier + """ + from dandischema.conf import Config + + with pytest.raises(ValidationError) as exc_info: + Config(instance_identifier=instance_identifier) + + assert len(exc_info.value.errors()) == 1 + assert exc_info.value.errors()[0]["loc"] == ("dandi_instance_identifier",) + + @pytest.mark.parametrize( + "clear_dandischema_modules_and_set_env_vars", [{}], indirect=True + ) + @pytest.mark.usefixtures("clear_dandischema_modules_and_set_env_vars") + def test_without_instance_identifier_with_doi_prefix(self) -> None: + """ + Test instantiating `dandischema.conf.Config` without an instance identifier + when a DOI prefix is provided + """ + from dandischema.conf import Config + + with pytest.raises( + ValidationError, match="`instance_identifier` must also be set." + ): + Config(doi_prefix="10.1234") + + @pytest.mark.parametrize( + "clear_dandischema_modules_and_set_env_vars", [{}], indirect=True + ) + @pytest.mark.usefixtures("clear_dandischema_modules_and_set_env_vars") + @pytest.mark.parametrize( + "doi_prefix", ["10.1234", "10.5678", "10.12345678", "10.987654321"] + ) + def test_valid_doi_prefix(self, doi_prefix: str) -> None: + """ + Test instantiating `dandischema.conf.Config` with a valid DOI prefix + """ + from dandischema.conf import Config + + Config( + # Instance identifier must be provided if doi_prefix is provided + instance_identifier="RRID:SCR_017571", + doi_prefix=doi_prefix, + ) + + @pytest.mark.parametrize( + "clear_dandischema_modules_and_set_env_vars", [{}], indirect=True + ) + @pytest.mark.usefixtures("clear_dandischema_modules_and_set_env_vars") + @pytest.mark.parametrize("doi_prefix", ["1234", ".1234", "1.1234", "10.123"]) + def test_invalid_doi_prefix(self, doi_prefix: str) -> None: + """ + Test instantiating `dandischema.conf.Config` with an invalid DOI prefix + """ + from dandischema.conf import Config + + with pytest.raises(ValidationError) as exc_info: + Config( + # Instance identifier must be provided if doi_prefix is provided + instance_identifier="RRID:SCR_017571", + doi_prefix=doi_prefix, + ) + + assert len(exc_info.value.errors()) == 1 + assert exc_info.value.errors()[0]["loc"] == ("dandi_doi_prefix",) + + @pytest.mark.parametrize( + "clear_dandischema_modules_and_set_env_vars", [{}], indirect=True + ) + @pytest.mark.usefixtures("clear_dandischema_modules_and_set_env_vars") + @pytest.mark.parametrize( + "licenses", + [ + [], + ["spdx:AGPL-1.0-only"], + ["spdx:AGPL-1.0-only", "spdx:LOOP", "spdx:SPL-1.0", "spdx:LOOP"], + set(), + {"spdx:AGPL-1.0-only"}, + {"spdx:AGPL-1.0-only", "spdx:LOOP", "spdx:SPL-1.0"}, + ], + ) + def test_valid_licenses_by_args(self, licenses: Union[list[str], set[str]]) -> None: + """ + Test instantiating `dandischema.conf.Config` with a valid list/set of licenses + as argument. + """ + from dandischema.conf import Config, License + + # noinspection PyTypeChecker + config = Config(licenses=licenses) + + assert config.licenses == {License(license_) for license_ in set(licenses)} + + @pytest.mark.parametrize( + ("clear_dandischema_modules_and_set_env_vars", "licenses"), + [ + ({"licenses": "[]"}, set()), + ( + {"licenses": '["spdx:AGPL-1.0-only"]'}, + {"spdx:AGPL-1.0-only"}, + ), + ( + { + "licenses": '["spdx:AGPL-1.0-only", "spdx:LOOP", "spdx:SPL-1.0", "spdx:LOOP"]' + }, + {"spdx:AGPL-1.0-only", "spdx:LOOP", "spdx:SPL-1.0", "spdx:LOOP"}, + ), + ], + indirect=["clear_dandischema_modules_and_set_env_vars"], + ) + def test_valid_licenses_by_env_var( + self, clear_dandischema_modules_and_set_env_vars: None, licenses: set[str] + ) -> None: + """ + Test instantiating `dandischema.conf.Config` with a valid array of licenses, + in JSON format, as an environment variable. + """ + from dandischema.conf import Config, License + + # noinspection PyTypeChecker + config = Config() + + assert config.licenses == {License(license_) for license_ in licenses} + + @pytest.mark.parametrize( + "clear_dandischema_modules_and_set_env_vars", [{}], indirect=True + ) + @pytest.mark.usefixtures("clear_dandischema_modules_and_set_env_vars") + @pytest.mark.parametrize( + "licenses", + [ + {"AGPL-1.0-only"}, + {"spdx:AGPL-1.0-only", "spdx:NOT-A-LICENSE", "spdx:SPL-1.0"}, + ], + ) + def test_invalid_licenses_by_args(self, licenses: set[str]) -> None: + """ + Test instantiating `dandischema.conf.Config` with an invalid list/set of + licenses as an argument + """ + from dandischema.conf import Config + + with pytest.raises(ValidationError) as exc_info: + # noinspection PyTypeChecker + Config(licenses=licenses) + + assert len(exc_info.value.errors()) == 1 + assert exc_info.value.errors()[0]["loc"][:-1] == ("dandi_licenses",) + + @pytest.mark.parametrize( + "clear_dandischema_modules_and_set_env_vars", + [ + {}, + {"instance_name": "BAR"}, + {"instance_name": "BAZ", "instance_url": "https://www.example.com/"}, + ], + indirect=True, + ) + @pytest.mark.parametrize( + "config_dict", [FOO_CONFIG_DICT, FOO_CONFIG_DICT_WITH_ALIASES] + ) + def test_init_by_kwargs( + self, clear_dandischema_modules_and_set_env_vars: None, config_dict: dict + ) -> None: + """ + Test instantiating `Config` using keyword arguments + + The kwargs are expected to override any environment variables + """ + from dandischema.conf import Config + + config = Config.model_validate(config_dict) + config_json_dump = config.model_dump(mode="json") + + assert config_json_dump.keys() == FOO_CONFIG_DICT.keys() + for k, v in FOO_CONFIG_DICT.items(): + if k == "licenses": + assert sorted(config_json_dump[k]) == sorted(v) + else: + assert config_json_dump[k] == v + + @pytest.mark.parametrize( + "clear_dandischema_modules_and_set_env_vars", + [ + {}, + ], + indirect=True, + ) + def test_init_by_field_names_through_dotenv( + self, + clear_dandischema_modules_and_set_env_vars: None, + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + ) -> None: + """ + Test instantiating `Config` using a dotenv file with field names as keys + + The initialization is expected to fail because the proper keys are the aliases + when using environment variables or dotenv files. + """ + from dandischema.conf import Config + + dotenv_file_name = "test.env" + dotenv_file_path = tmp_path / dotenv_file_name + + # Write a dotenv file with a field name as key + dotenv_file_path.write_text("instance_name=DANDI-TEST") + + monkeypatch.chdir(tmp_path) + + with pytest.raises(ValidationError) as exc_info: + # noinspection PyArgumentList + Config(_env_file=dotenv_file_name) + + errors = exc_info.value.errors() + assert len(errors) == 1 + + assert errors[0]["type"] == "extra_forbidden" + + @pytest.mark.parametrize( + "clear_dandischema_modules_and_set_env_vars", + [ + {}, + ], + indirect=True, + ) + def test_round_trip(self, clear_dandischema_modules_and_set_env_vars: None) -> None: + """ + Test that a `Config` instance can be round-tripped through JSON serialization + and deserialization without loss of information. + """ + from dandischema.conf import Config + + config_original = Config.model_validate(FOO_CONFIG_DICT) + config_original_str = config_original.model_dump_json() + + config_reconstituted = Config.model_validate_json(config_original_str) + + assert ( + config_reconstituted == config_original + ), "Round-trip of `Config` instance failed" + + +class TestSetInstanceConfig: + @pytest.mark.parametrize( + ("arg", "kwargs"), + [ + (FOO_CONFIG_DICT, {"instance_name": "BAR"}), + ( + FOO_CONFIG_DICT, + {"instance_name": "Baz", "key": "value"}, + ), + ], + ) + def test_invalid_args(self, arg: dict, kwargs: dict) -> None: + """ + Test that `set_instance_config` raises a `ValueError` when called with both + a non-none positional argument and one or more keyword arguments. + """ + from dandischema.conf import Config, set_instance_config + + # Loop over arg in different types/forms + for arg_ in (arg, Config.model_validate(arg)): + with pytest.raises(ValueError, match="not both"): + set_instance_config(arg_, **kwargs) + + @pytest.mark.parametrize( + ("clear_dandischema_modules_and_set_env_vars", "arg", "kwargs"), + [ + ({}, FOO_CONFIG_DICT, {}), + ({}, FOO_CONFIG_DICT, {}), + ({}, None, FOO_CONFIG_DICT), + ], + indirect=["clear_dandischema_modules_and_set_env_vars"], + ) + def test_before_models_import( + self, + clear_dandischema_modules_and_set_env_vars: None, + arg: Optional[dict], + kwargs: dict, + ) -> None: + """ + Test setting the instance configuration before importing `dandischema.models`. + """ + + # Import entities in `dandischema.conf` after clearing dandischema modules + from dandischema.conf import Config, get_instance_config, set_instance_config + + # Loop over arg in different types/forms + for arg_ in (arg, Config.model_validate(arg)) if arg is not None else (arg,): + set_instance_config(arg_, **kwargs) + assert get_instance_config() == Config.model_validate( + FOO_CONFIG_DICT + ), "Configuration values are not set to the expected values" + + @pytest.mark.parametrize( + "clear_dandischema_modules_and_set_env_vars", + [FOO_CONFIG_ENV_VARS], + indirect=True, + ) + def test_after_models_import_same_config( + self, + clear_dandischema_modules_and_set_env_vars: None, + caplog: pytest.LogCaptureFixture, + ) -> None: + """ + Test setting the instance configuration after importing `dandischema.models` + with the same configuration. + """ + from dandischema.conf import Config, get_instance_config, set_instance_config + + # Make sure the `dandischema.models` module is imported before calling + # `set_instance_config` + import dandischema.models # noqa: F401 + + initial_config = get_instance_config() + + caplog.clear() + caplog.set_level(logging.DEBUG, logger="dandischema.conf") + set_instance_config(**FOO_CONFIG_DICT) + + assert ( + len(caplog.records) == 1 + ), "There should be only one log record from logger `dandischema.conf`" + + record_tuple = caplog.record_tuples[0] + assert record_tuple[:-1] == ("dandischema.conf", logging.DEBUG) + assert ( + "reset the DANDI instance configuration to the same value" + in record_tuple[2] + ) + + assert ( + get_instance_config() + == initial_config + == Config.model_validate(FOO_CONFIG_DICT) + ), "Configuration values should remain the same" + + @pytest.mark.parametrize( + "clear_dandischema_modules_and_set_env_vars", + [FOO_CONFIG_ENV_VARS], + indirect=True, + ) + def test_after_models_import_different_config( + self, + clear_dandischema_modules_and_set_env_vars: None, + caplog: pytest.LogCaptureFixture, + ) -> None: + """ + Test setting the instance configuration after importing `dandischema.models` + with a different configuration. + """ + from dandischema.conf import Config, get_instance_config, set_instance_config + + # Make sure the `dandischema.models` module is imported before calling + # `set_instance_config` + import dandischema.models # noqa: F401 + + new_config_dict = { + "instance_name": "BAR", + "doi_prefix": "10.5678", + } + + # noinspection DuplicatedCode + caplog.clear() + caplog.set_level(logging.DEBUG, logger="dandischema.conf") + set_instance_config(**new_config_dict) + + assert ( + len(caplog.records) == 1 + ), "There should be only one log record from logger `dandischema.conf`" + record_tuple = caplog.record_tuples[0] + assert record_tuple[:-1] == ( + "dandischema.conf", + logging.WARNING, + ) + assert "different value will not have any affect" in record_tuple[2] + + assert get_instance_config() == Config.model_validate( + new_config_dict + ), "Configuration values should be set to the new values" diff --git a/dandischema/tests/test_metadata.py b/dandischema/tests/test_metadata.py index ce10b104..8aca6abe 100644 --- a/dandischema/tests/test_metadata.py +++ b/dandischema/tests/test_metadata.py @@ -12,7 +12,15 @@ from dandischema.models import Asset, Dandiset, PublishedAsset, PublishedDandiset from dandischema.utils import TransitionalGenerateJsonSchema, jsonschema_validator -from .utils import skipif_no_network +from .utils import ( + DANDISET_METADATA_DIR, + DOI_PREFIX, + INSTANCE_NAME, + METADATA_DIR, + skipif_instance_name_not_dandi, + skipif_no_network, + skipif_no_test_dandiset_metadata_dir, +) from ..consts import DANDI_SCHEMA_VERSION from ..exceptions import JsonschemaValidationError, PydanticValidationError from ..metadata import ( @@ -27,8 +35,6 @@ validate, ) -METADATA_DIR = Path(__file__).with_name("data") / "metadata" - @pytest.fixture(scope="module") def schema_dir(tmp_path_factory: pytest.TempPathFactory) -> Path: @@ -45,8 +51,9 @@ def test_asset(schema_dir: Path) -> None: validate(data_as_dict) +@skipif_no_test_dandiset_metadata_dir def test_dandiset(schema_dir: Path) -> None: - with (METADATA_DIR / "meta_000004.json").open() as fp: + with (DANDISET_METADATA_DIR / "meta_000004.json").open() as fp: data_as_dict = json.load(fp) data_as_dict["schemaVersion"] = DANDI_SCHEMA_VERSION _validate_dandiset_json(data_as_dict, schema_dir) @@ -59,11 +66,16 @@ def test_id(schema_dir: Path) -> None: @skipif_no_network +@skipif_no_test_dandiset_metadata_dir def test_pydantic_validation(schema_dir: Path) -> None: - with (METADATA_DIR / "meta_000004.json").open() as fp: + with (DANDISET_METADATA_DIR / "meta_000004.json").open() as fp: data_as_dict = json.load(fp) data_as_dict["schemaVersion"] = "0.4.4" - validate(data_as_dict, schema_key="Dandiset", json_validation=True) + if INSTANCE_NAME == "DANDI": + # This is run only when the DANDI instance is `"DANDI"` + # since the JSON schema at `0.4.4` is hardcoded to only + # for an instance named `DANDI` + validate(data_as_dict, schema_key="Dandiset", json_validation=True) data_as_dict["schemaVersion"] = DANDI_SCHEMA_VERSION validate(data_as_dict, schema_key="Dandiset", json_validation=True) validate(data_as_dict["about"][0]) @@ -72,6 +84,9 @@ def test_pydantic_validation(schema_dir: Path) -> None: @skipif_no_network +# Skip for when the instance being tested is not `DANDI` since the JSON schema +# version at `0.4.4` and `0.6.0` is hardcoded to only for an instance named `DANDI` +@skipif_instance_name_not_dandi def test_json_schemakey_validation() -> None: with pytest.raises(JsonschemaValidationError) as exc: validate( @@ -120,20 +135,24 @@ def test_mismatch_key(schema_version: str, schema_key: str) -> None: {"schemaKey": "Dandiset"}, "PublishedDandiset", { - "assetsSummary", - "citation", - "contributor", - "datePublished", - "description", - "doi", - "id", - "identifier", - "license", - "manifestLocation", - "name", - "publishedBy", - "url", - "version", + e + for e in [ + "assetsSummary", + "citation", + "contributor", + "datePublished", + "description", + "doi", + "id", + "identifier", + "license", + "manifestLocation", + "name", + "publishedBy", + "url", + "version", + ] + if DOI_PREFIX is not None or e != "doi" }, ), ( @@ -143,20 +162,24 @@ def test_mismatch_key(schema_version: str, schema_key: str) -> None: }, "PublishedDandiset", { - "assetsSummary", - "citation", - "contributor", - "datePublished", - "description", - "doi", - "id", - "identifier", - "license", - "manifestLocation", - "name", - "publishedBy", - "url", - "version", + e + for e in [ + "assetsSummary", + "citation", + "contributor", + "datePublished", + "description", + "doi", + "id", + "identifier", + "license", + "manifestLocation", + "name", + "publishedBy", + "url", + "version", + ] + if DOI_PREFIX is not None or e != "doi" }, ), ( @@ -173,19 +196,23 @@ def test_mismatch_key(schema_version: str, schema_key: str) -> None: }, "PublishedDandiset", { - "assetsSummary", - "citation", - "datePublished", - "description", - "doi", - "id", - "identifier", - "license", - "manifestLocation", - "name", - "publishedBy", - "url", - "version", + e + for e in [ + "assetsSummary", + "citation", + "datePublished", + "description", + "doi", + "id", + "identifier", + "license", + "manifestLocation", + "name", + "publishedBy", + "url", + "version", + ] + if DOI_PREFIX is not None or e != "doi" }, ), ( @@ -304,7 +331,7 @@ def test_requirements( ( { "schemaKey": "Dandiset", - "identifier": "DANDI:000000", + "identifier": f"{INSTANCE_NAME}:000000", "schemaVersion": "0.4.4", }, None, @@ -328,16 +355,19 @@ def test_missing_ok( @skipif_no_network def test_missing_ok_error() -> None: - with pytest.raises(JsonschemaValidationError): - validate( - { - "schemaKey": "Dandiset", - "identifier": "000000", - "schemaVersion": "0.4.4", - }, - json_validation=True, - missing_ok=True, - ) + if INSTANCE_NAME == "DANDI": + # Skip for when the instance being tested is not `DANDI` since the JSON schema + # version at `0.4.4` is hardcoded to only for an instance named `DANDI` + with pytest.raises(JsonschemaValidationError): + validate( + { + "schemaKey": "Dandiset", + "identifier": "000000", + "schemaVersion": "0.4.4", + }, + json_validation=True, + missing_ok=True, + ) with pytest.raises(PydanticValidationError): validate( { @@ -409,8 +439,13 @@ def test_migrate_value_errors_lesser_target(monkeypatch: pytest.MonkeyPatch) -> @skipif_no_network +@skipif_no_test_dandiset_metadata_dir +# Skip for instance name not being DANDI because JSON schema version at `0.4.4`, the +# schema version of the metadata in `meta_000004old.json`, is hardcoded to only for +# an DANDI instance named `DANDI` +@skipif_instance_name_not_dandi def test_migrate_044(schema_dir: Path) -> None: - with (METADATA_DIR / "meta_000004old.json").open() as fp: + with (DANDISET_METADATA_DIR / "meta_000004old.json").open() as fp: data_as_dict = json.load(fp) with pytest.raises(ValueError): validate(data_as_dict) @@ -586,12 +621,17 @@ def test_aggregate_nonsupported(version: str) -> None: @skipif_no_network def test_validate_older() -> None: - with pytest.raises(ValueError): - validate( - {"schemaVersion": "0.5.2", "schemaKey": "Anykey"}, json_validation=True - ) - with pytest.raises(JsonschemaValidationError): - validate({"schemaVersion": "0.5.2", "schemaKey": "Asset"}, json_validation=True) + if INSTANCE_NAME == "DANDI": + # Skip for when the instance being tested is not `DANDI` since the JSON schema + # version at `0.5.2` is hardcoded to only for an instance named `DANDI` + with pytest.raises(ValueError): + validate( + {"schemaVersion": "0.5.2", "schemaKey": "Anykey"}, json_validation=True + ) + with pytest.raises(JsonschemaValidationError): + validate( + {"schemaVersion": "0.5.2", "schemaKey": "Asset"}, json_validation=True + ) with pytest.raises(JsonschemaValidationError): validate( {"schemaVersion": DANDI_SCHEMA_VERSION, "schemaKey": "Asset"}, diff --git a/dandischema/tests/test_models.py b/dandischema/tests/test_models.py index e7463349..8bb87ccb 100644 --- a/dandischema/tests/test_models.py +++ b/dandischema/tests/test_models.py @@ -5,10 +5,12 @@ import anys import pydantic -from pydantic import Field, ValidationError +from pydantic import BaseModel, ConfigDict, Field, ValidationError import pytest -from .utils import _basic_publishmeta +from dandischema.conf import get_instance_config + +from .utils import DOI_PREFIX, INSTANCE_NAME, basic_publishmeta, skipif_no_doi_prefix from .. import models from ..models import ( DANDI_INSTANCE_URL_PATTERN, @@ -36,6 +38,8 @@ ) from ..utils import TransitionalGenerateJsonSchema +_INSTANCE_CONFIG = get_instance_config() + @pytest.mark.parametrize( ("y_type", "anys_value"), @@ -353,10 +357,7 @@ def test_asset_digest() -> None: ), ( LicenseType, - { - "CC0_10": "spdx:CC0-1.0", - "CC_BY_40": "spdx:CC-BY-4.0", - }, + {member.name: member.value for member in _INSTANCE_CONFIG.licenses}, ), ( IdentifierType, @@ -401,12 +402,16 @@ def test_autogenerated_titles() -> None: assert schema["$defs"]["PropertyValue"]["title"] == "Property Value" +@skipif_no_doi_prefix def test_dandimeta_1() -> None: """checking basic metadata for publishing""" + + assert DOI_PREFIX is not None + # metadata without doi, datePublished and publishedBy meta_dict: Dict[str, Any] = { - "identifier": "DANDI:999999", - "id": "DANDI:999999/draft", + "identifier": f"{INSTANCE_NAME}:999999", + "id": f"{INSTANCE_NAME}:999999/draft", "version": "1.0.0", "name": "testing dataset", "description": "testing", @@ -462,7 +467,7 @@ def test_dandimeta_1() -> None: ("doi",): ErrDetail(type="missing", msg=None), } - assert len(exc.value.errors()) == 6 + assert len(exc.value.errors()) == len(expected_errors) for err in exc.value.errors(): err_loc = err["loc"] assert err_loc in expected_errors @@ -472,20 +477,25 @@ def test_dandimeta_1() -> None: assert err["msg"] == expected_errors[err_loc].msg assert set([el["loc"][0] for el in exc.value.errors()]) == { - "assetsSummary", - "datePublished", - "publishedBy", - "doi", - "url", - "id", + e + for e in [ + "assetsSummary", + "datePublished", + "publishedBy", + "doi", + "url", + "id", + ] } # after adding basic meta required to publish: doi, datePublished, publishedBy, assetsSummary, # so PublishedDandiset should work meta_dict["url"] = "https://dandiarchive.org/dandiset/999999/0.0.0" - meta_dict["id"] = "DANDI:999999/0.0.0" + meta_dict["id"] = f"{INSTANCE_NAME}:999999/0.0.0" meta_dict["version"] = "0.0.0" - meta_dict.update(_basic_publishmeta(dandi_id="999999")) + meta_dict.update( + basic_publishmeta(INSTANCE_NAME, dandi_id="999999", prefix=DOI_PREFIX) + ) meta_dict["assetsSummary"].update(**{"numberOfBytes": 1, "numberOfFiles": 1}) # Test that releaseNotes is optional (can be omitted) @@ -780,3 +790,196 @@ def test_with_email(self, roles: List[RoleType]) -> None: Test creating a `Contributor` instance with an email """ Contributor(email="nemo@dandiarchive.org", roleName=roles) + + +def _get_field_pattern( + field_name: str, + model: Type[BaseModel], +) -> str: + """ + Get the regex pattern for a field in a Pydantic model. + + Parameters + ---------- + field_name : str + The name of the field to get the pattern for. + model : Type[BaseModel] + The Pydantic model class. + + Returns + ------- + str + The regex pattern for the field. + """ + if field_name not in model.model_fields: + raise ValueError(f"Field '{field_name}' not found in model '{model.__name__}'") + + field = model.model_fields[field_name] + for data in field.metadata: + if hasattr(data, "pattern"): + assert isinstance(data.pattern, str) + return data.pattern + else: + raise ValueError( + f"field `{field_name}` in model `{model.__name__}` has no pattern " + f"constraint" + ) + + +@pytest.mark.parametrize( + ( + "clear_dandischema_modules_and_set_env_vars", + # "exp" means "expected" in the following names + "exp_id_pattern", + "exp_doi_prefix_pattern", + "valid_vendored_fields", + "invalid_vendored_fields", + ), + [ + # === DANDI DANDI instance test cases === + # Without any environment variables set. dandischema is unvendorized. + ( + {}, + r"[A-Z][-A-Z]*", + r"10\.\d{4,}", + { + "dandiset_id": "DANDI-ADHOC:001350/draft", + "dandiset_identifier": "DANDI-ADHOC:001350", + "published_dandiset_id": "DANDI-ADHOC:001350/0.250511.1527", + "published_dandiset_doi": "", + }, + { + "dandiset_id": "45:001350/draft", # Invalid id prefix + "dandiset_identifier": "DANDI-ADHOC:001350", + "published_dandiset_id": "DANDI-ADHOC:001350/0.250511.1527", + "published_dandiset_doi": "", + }, + ), + ( + { + "instance_name": "DANDI", + "instance_identifier": "RRID:ABC_123456", + "doi_prefix": "10.48324", + }, + "DANDI", + r"10\.48324", + { + "dandiset_id": "DANDI:001425/draft", + "dandiset_identifier": "DANDI:001425", + "published_dandiset_id": "DANDI:001425/0.250514.0602", + "published_dandiset_doi": "10.48324/dandi.001425/0.250514.0602", + }, + { + "dandiset_id": "DANDI:001425/draft", + "dandiset_identifier": "DANDI:001425", + "published_dandiset_id": "DANDI:001425/0.250514.0602", + # Invalid registrant code in the DOI prefix + "published_dandiset_doi": "10.1234/dandi.001425/0.250514.0602", + }, + ), + ( + { + "instance_name": "DANDI", + }, + "DANDI", + r"10\.\d{4,}", + { + "dandiset_id": "DANDI:001425/draft", + "dandiset_identifier": "DANDI:001425", + "published_dandiset_id": "DANDI:001425/0.250514.0602", + "published_dandiset_doi": "10.48324/dandi.001425/0.250514.0602", + }, + { + "dandiset_id": "DANDI:001425/draft", + "dandiset_identifier": "DANDI:001425", + # Not matching the `ID_PATTERN` regex + "published_dandiset_id": "DANDI3:001425/0.250514.0602", + "published_dandiset_doi": "10.48324/dandi.001425/0.250514.0602", + }, + ), + # === EMBER DANDI instance test cases === + # Without any environment variables set. dandischema is unvendorized. + ( + {}, + r"[A-Z][-A-Z]*", + r"10\.\d{4,}", + { + "dandiset_id": "DANDI-ADHOC:000005/draft", + "dandiset_identifier": "ABC:000005", + "published_dandiset_id": "DANDI-ADHOC:000005/0.250404.1839", + "published_dandiset_doi": "10.60533/ember-dandi.000005/0.250404.1839", + }, + { + "dandiset_id": "DANDI-ADHOC:000005/draft", + "dandiset_identifier": "ABC:000005", + "published_dandiset_id": "DANDI-ADHOC:000005/0.250404.1839", + # Invalid registrant code in the DOI prefix + "published_dandiset_doi": "10.605/ember-dandi.000005/0.250404.1839", + }, + ), + ( + { + "instance_name": "EMBER-DANDI", + "instance_identifier": "RRID:ABC_123456", + "doi_prefix": "10.60533", + }, + "EMBER-DANDI", + r"10\.60533", + { + "dandiset_id": "EMBER-DANDI:000005/draft", + "dandiset_identifier": "EMBER-DANDI:000005", + "published_dandiset_id": "EMBER-DANDI:000005/0.250404.1839", + "published_dandiset_doi": "10.60533/ember-dandi.000005/0.250404.1839", + }, + { + "dandiset_id": "EMBER-DANDI:000005/draft", + "dandiset_identifier": "EMBER-DANDI:000005", + # Invalid id prefix + "published_dandiset_id": "EM:000005/0.250404.1839", + "published_dandiset_doi": "10.60533/ember-dandi.000005/0.250404.1839", + }, + ), + ], + indirect=["clear_dandischema_modules_and_set_env_vars"], +) +def test_vendorization( + clear_dandischema_modules_and_set_env_vars: None, + exp_id_pattern: str, + exp_doi_prefix_pattern: str, + # Fields that are valid for the vendorization + valid_vendored_fields: dict[str, str], + # Fields that are invalid for the vendorization + invalid_vendored_fields: dict[str, str], +) -> None: + """ + Test the vendorization of the DANDI schema + """ + import dandischema.models as models_ + + assert models_.ID_PATTERN == exp_id_pattern + assert models_.DOI_PREFIX_PATTERN == exp_doi_prefix_pattern + + class VendoredFieldModel(BaseModel): + """ + A model consisting of fields with vendorized patterns in `dandischema.models` + """ + + dandiset_id: str = Field(pattern=_get_field_pattern("id", models_.Dandiset)) + dandiset_identifier: str = Field( + pattern=_get_field_pattern("identifier", models_.Dandiset) + ) + published_dandiset_id: str = Field( + pattern=_get_field_pattern("id", models_.PublishedDandiset) + ) + published_dandiset_doi: str = Field( + pattern=_get_field_pattern("doi", models_.PublishedDandiset) + ) + + model_config = ConfigDict(strict=True) + + # Validate the valid vendored fields against the vendored patterns + VendoredFieldModel.model_validate(valid_vendored_fields) + + # Validate the invalid vendored fields against the vendored patterns + with pytest.raises(ValidationError): + VendoredFieldModel.model_validate(invalid_vendored_fields) diff --git a/dandischema/tests/utils.py b/dandischema/tests/utils.py index 5e76d5da..76391762 100644 --- a/dandischema/tests/utils.py +++ b/dandischema/tests/utils.py @@ -1,16 +1,47 @@ from datetime import datetime import os +from pathlib import Path from typing import Any, Dict import pytest +from dandischema.conf import get_instance_config + +_INSTANCE_CONFIG = get_instance_config() +INSTANCE_NAME = _INSTANCE_CONFIG.instance_name +DOI_PREFIX = _INSTANCE_CONFIG.doi_prefix + +METADATA_DIR = Path(__file__).with_name("data") / "metadata" +DANDISET_METADATA_DIR = METADATA_DIR / INSTANCE_NAME + + +skipif_no_datacite_auth = pytest.mark.skipif( + not os.getenv("DATACITE_DEV_LOGIN") or not os.getenv("DATACITE_DEV_PASSWORD"), + reason="no non-empty datacite login and password provided", +) + +skipif_no_doi_prefix = pytest.mark.skipif( + DOI_PREFIX is None, reason="DOI_PREFIX is not set" +) + skipif_no_network = pytest.mark.skipif( bool(os.environ.get("DANDI_TESTS_NONETWORK")), reason="no network settings" ) +skipif_no_test_dandiset_metadata_dir = pytest.mark.skipif( + not DANDISET_METADATA_DIR.is_dir(), + reason=f"No test Dandiset metadata directory for a DANDI instance named " + f"{INSTANCE_NAME} exists", +) + + +skipif_instance_name_not_dandi = pytest.mark.skipif( + INSTANCE_NAME != "DANDI", reason='The DANDI instance\'s name is not "DANDI"' +) + -def _basic_publishmeta( - dandi_id: str, version: str = "0.0.0", prefix: str = "10.80507" +def basic_publishmeta( + instance_name: str, dandi_id: str, version: str = "0.0.0", prefix: str = "10.80507" ) -> Dict[str, Any]: """Return extra metadata required by PublishedDandiset @@ -35,6 +66,6 @@ def _basic_publishmeta( "schemaKey": "PublishActivity", }, "version": version, - "doi": f"{prefix}/dandi.{dandi_id}/{version}", + "doi": f"{prefix}/{instance_name.lower()}.{dandi_id}/{version}", } return publish_meta diff --git a/setup.cfg b/setup.cfg index 7fc2607e..07e5cd8b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -30,6 +30,7 @@ python_requires = >=3.9 install_requires = jsonschema[format] packaging>=14.0 + pydantic-settings pydantic[email] ~= 2.4 requests zarr_checksum diff --git a/tools/fetch_spdx_licenses.py b/tools/fetch_spdx_licenses.py new file mode 100644 index 00000000..2431c2f5 --- /dev/null +++ b/tools/fetch_spdx_licenses.py @@ -0,0 +1,71 @@ +# This script is for constructing the SPDX License ID list from a SPDX License List +# JSON file available at a specific URL. +# The `licenses_source_url` is to be modified to point to the desired version of the +# SPDX License List JSON file. + +from datetime import datetime +from importlib.resources import as_file, files + +from pydantic import AnyUrl, BaseModel, Field +from requests import get + +from dandischema.conf import SpdxLicenseIdList, SpdxLicenseListInfo + +licenses_source_url = ( + "https://raw.githubusercontent.com/spdx/license-list-data" + "/refs/tags/v3.27.0/json/licenses.json" +) + + +class SpdxLicense(BaseModel): + """ + Represent a license in the SPDX License List, https://spdx.org/licenses/. + + Notes + ---- + An object of this class is loaded from the JSON version of the list at + https://github.com/spdx/license-list-data/blob/main/json/licenses.json + at a specific version, e.g., "3.27.0" + """ + + license_id: str = Field(validation_alias="licenseId") + + +class SpdxLicenseList(BaseModel): + """ + Represents the SPDX License List, https://spdx.org/licenses/. + + Notes + ---- + The resulting object is a representation of the JSON version of the list at + https://github.com/spdx/license-list-data/blob/main/json/licenses.json + at a specific version, e.g., "3.27.0" + + """ + + license_list_version: str = Field(validation_alias="licenseListVersion") + licenses: list[SpdxLicense] + release_date: datetime = Field(validation_alias="releaseDate") + + +resp = get(licenses_source_url, timeout=30.0) +resp.raise_for_status() +spdx_license_list = SpdxLicenseList.model_validate_json(resp.text) + +spdx_license_id_list = SpdxLicenseIdList( + source=SpdxLicenseListInfo( + version=spdx_license_list.license_list_version, + release_date=spdx_license_list.release_date, + url=AnyUrl(licenses_source_url), + ), + license_ids=[license_.license_id for license_ in spdx_license_list.licenses], +) + + +license_id_file_path = files("dandischema") / "_resources" / "spdx_license_ids.json" + + +with as_file(license_id_file_path) as license_id_file_path_writable: + license_id_file_path_writable.write_text( + spdx_license_id_list.model_dump_json(indent=2) + ) diff --git a/tox.ini b/tox.ini index c32f3b96..af9e7536 100644 --- a/tox.ini +++ b/tox.ini @@ -5,7 +5,8 @@ isolated_build = True [testenv] extras = test passenv = - DANDI_TESTS_NONETWORK + DANDI_* + DATACITE_DEV_LOGIN DATACITE_DEV_PASSWORD NO_ET commands =