diff --git a/.readthedocs.yml b/.readthedocs.yml index cadaedc1448..792a9f05119 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -5,9 +5,9 @@ formats: - pdf build: - os: ubuntu-22.04 + os: ubuntu-24.04 tools: - python: "3.10" + python: "3.12" apt_packages: - graphviz diff --git a/doc/release-notes/cors-filter-fix.md b/doc/release-notes/cors-filter-fix.md new file mode 100644 index 00000000000..3b69d8e7d0f --- /dev/null +++ b/doc/release-notes/cors-filter-fix.md @@ -0,0 +1,3 @@ +## CORS Filter Fix + +Fixed an inconsistency where the `CorsFilter` was not always being invoked when accessing `/api/...` endpoints, preventing these endpoints from being used from webapps even when CORS was properly configured. diff --git a/doc/sphinx-guides/requirements.txt b/doc/sphinx-guides/requirements.txt index 9c74ed75f6d..0422dd03ca1 100755 --- a/doc/sphinx-guides/requirements.txt +++ b/doc/sphinx-guides/requirements.txt @@ -13,4 +13,6 @@ sphinx-tabs==3.4.5 sphinxcontrib-jquery Sphinx-Substitution-Extensions==2025.1.2 -semver>=3,<4 \ No newline at end of file +semver>=3,<4 + +sphinx-reredirects==1.1.0 \ No newline at end of file diff --git a/doc/sphinx-guides/source/_static/installation/cors/cors.json b/doc/sphinx-guides/source/_static/installation/cors/cors.json new file mode 100644 index 00000000000..941eee1109f --- /dev/null +++ b/doc/sphinx-guides/source/_static/installation/cors/cors.json @@ -0,0 +1,10 @@ +{ + "CORSRules": [ + { + "AllowedOrigins": ["*"], + "AllowedHeaders": ["*"], + "AllowedMethods": ["PUT", "GET"], + "ExposeHeaders": ["ETag", "Accept-Ranges", "Content-Encoding", "Content-Range"] + } + ] +} \ No newline at end of file diff --git a/doc/sphinx-guides/source/_static/installation/cors/cors.xml b/doc/sphinx-guides/source/_static/installation/cors/cors.xml new file mode 100644 index 00000000000..98e461c3ffb --- /dev/null +++ b/doc/sphinx-guides/source/_static/installation/cors/cors.xml @@ -0,0 +1,13 @@ + + + + * + * + PUT + GET + ETag + Accept-Ranges + Content-Encoding + Content-Range + + \ No newline at end of file diff --git a/doc/sphinx-guides/source/admin/big-data-administration.rst b/doc/sphinx-guides/source/admin/big-data-administration.rst index c4a98a6987a..882edbbf9e7 100644 --- a/doc/sphinx-guides/source/admin/big-data-administration.rst +++ b/doc/sphinx-guides/source/admin/big-data-administration.rst @@ -77,7 +77,7 @@ Benefits: S3 offers several advantages over file storage: Challenges: -- One additional step that is required to enable direct uploads via a Dataverse installation and for direct download to work with previewers and direct upload to work with DVWebloader (:ref:`folder-upload`) is to allow cross site (CORS) requests on your S3 store. +- One additional step that is required to enable direct uploads via a Dataverse installation and for direct download to work with previewers and direct upload to work with DVWebloader (:ref:`folder-upload`) is to allow :ref:`cross site (CORS) requests on your S3 store `. - Cost: S3 offers a pricing model that allows you to pay for the storage and transfer of data based on current usage (versus long term demand) but commercial providers charge more per TB than the equivalent cost of a local disk (though commercial S3 storage is cheaper than commercial file storage). There can also be egress and other charges. Overall, S3 storage is generally more expensive than local file storage but cheaper than cloud file storage. diff --git a/doc/sphinx-guides/source/api/external-tools.rst b/doc/sphinx-guides/source/api/external-tools.rst index 7a708accd3a..c583c9516cc 100644 --- a/doc/sphinx-guides/source/api/external-tools.rst +++ b/doc/sphinx-guides/source/api/external-tools.rst @@ -12,9 +12,13 @@ Introduction External tools are additional applications the user can access or open from your Dataverse installation to preview, explore, and manipulate data files and datasets. The term "external" is used to indicate that the tool is not part of the main Dataverse Software. .. note:: - Browser-based tools must have CORS explicitly enabled via :ref:`dataverse.cors.origin `. List every origin that will host your tool (or use ``*`` when a wildcard is acceptable). If an origin is not listed, the browser will block that tool's API requests even if the tool page itself loads. + Browser-based tools require CORS explicitly enabled in Dataverse. See :ref:`dataverse.cors` for details. -Once you have created the external tool itself (which is most of the work!), you need to teach a Dataverse installation how to construct URLs that your tool needs to operate. For example, if you've deployed your tool to fabulousfiletool.com your tool might want the ID of a file and the siteUrl of the Dataverse installation like this: https://fabulousfiletool.com?fileId=42&siteUrl=https://demo.dataverse.org + List every origin that will host your tool (or use ``*`` when a wildcard is acceptable and no authentication is required). + If an origin is not listed, the browser will block that tool's API requests even if the tool page itself loads. + +Once you have created the external tool itself (which is most of the work!), you need to teach a Dataverse installation how to construct URLs that your tool needs to operate. +For example, if you've deployed your tool to *fabulousfiletool.com* your tool might want the ID of a file and the siteUrl of the Dataverse installation like this: *https://fabulousfiletool.com?fileId=42&siteUrl=https://demo.dataverse.org* In short, you will be creating a manifest in JSON format that describes not only how to construct URLs for your tool, but also what types of files your tool operates on, where it should appear in the Dataverse installation web interfaces, etc. diff --git a/doc/sphinx-guides/source/api/intro.rst b/doc/sphinx-guides/source/api/intro.rst index 8eb11798dd7..f100fec0a04 100755 --- a/doc/sphinx-guides/source/api/intro.rst +++ b/doc/sphinx-guides/source/api/intro.rst @@ -207,11 +207,11 @@ Please note that some APIs are only documented in other guides that are more sui - Installation Guide - :doc:`/installation/config` + - :doc:`/installation/big-data-support` - Developer Guide - :doc:`/developers/aux-file-support` - - :doc:`/developers/big-data-support` - :doc:`/developers/dataset-migration-api` - :doc:`/developers/dataset-semantic-metadata-api` - :doc:`/developers/s3-direct-upload-api` diff --git a/doc/sphinx-guides/source/conf.py b/doc/sphinx-guides/source/conf.py index b6aadae5761..6285558f572 100755 --- a/doc/sphinx-guides/source/conf.py +++ b/doc/sphinx-guides/source/conf.py @@ -47,6 +47,7 @@ 'myst_parser', 'sphinx_tabs.tabs', 'sphinx_substitution_extensions', + 'sphinx_reredirects', ] # Add any paths that contain templates here, relative to this directory. @@ -78,6 +79,12 @@ # for a list of supported languages. language = 'en' +# Redirects for pages that have been moved +# See https://documatt.com/sphinx-reredirects/usage for detailed information +redirects = { + 'developers/big-data-support': '../installation/big-data-support.html', +} + # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: #today = '' diff --git a/doc/sphinx-guides/source/developers/index.rst b/doc/sphinx-guides/source/developers/index.rst index 28b1fbaae82..6b7994e2884 100755 --- a/doc/sphinx-guides/source/developers/index.rst +++ b/doc/sphinx-guides/source/developers/index.rst @@ -37,7 +37,6 @@ Developer Guide remote-users geospatial selinux - big-data-support aux-file-support s3-direct-upload-api globus-api diff --git a/doc/sphinx-guides/source/developers/big-data-support.rst b/doc/sphinx-guides/source/installation/big-data-support.rst similarity index 85% rename from doc/sphinx-guides/source/developers/big-data-support.rst rename to doc/sphinx-guides/source/installation/big-data-support.rst index 7077fdfcd19..8c70a42b9f5 100644 --- a/doc/sphinx-guides/source/developers/big-data-support.rst +++ b/doc/sphinx-guides/source/installation/big-data-support.rst @@ -49,47 +49,74 @@ The following features are disabled when S3 direct upload is enabled. - Creation of NcML auxiliary files (See :ref:`netcdf-and-hdf5`.) - Extraction of a geospatial bounding box from NetCDF and HDF5 files (see :ref:`netcdf-and-hdf5`) unless :ref:`dataverse.netcdf.geo-extract-s3-direct-upload` is set to true. + .. _cors-s3-bucket: Allow CORS for S3 Buckets ~~~~~~~~~~~~~~~~~~~~~~~~~ -**IMPORTANT:** One additional step that is required to enable direct uploads via a Dataverse installation and for direct download to work with previewers and direct upload to work with dvwebloader (:ref:`folder-upload`) is to allow cross site (CORS) requests on your S3 store. -The example below shows how to enable CORS rules (to support upload and download) on a bucket using the AWS CLI command line tool. Note that you may want to limit the AllowedOrigins and/or AllowedHeaders further. https://github.com/gdcc/dataverse-previewers/wiki/Using-Previewers-with-download-redirects-from-S3 has some additional information about doing this. +**IMPORTANT:** This additional step of allowing cross-site request to your S3 buckets is required to enable direct uploads via a Dataverse installation, direct download to work with previewers, or direct upload to work with *dvwebloader* (:ref:`folder-upload`). + +To successfully enable direct uploads (e.g. :ref:`folder-upload`) or direct downloads (e. g. consumed by previewers), you must both: +* Enable CORS in Dataverse (see :ref:`dataverse.cors`). +* Configure a matching/compatible CORS policy on each S3 bucket (and any CDN/proxy in front of it) that will be used. -Dataverse itself will only emit the necessary ``Access-Control-*`` headers to browsers when CORS has been explicitly enabled via the JVM/MicroProfile setting :ref:`dataverse.cors.origin `. You must both: +**NOTE:** Make sure the bucket's CORS configuration ``AllowedOrigins`` is at least as permissive as the origins you configure in :ref:`dataverse.cors.origin`. +If the bucket allows the wildcard ``*`` but the Dataverse application only allows a subset, the browser will still enforce the more restrictive application response! -* Configure an appropriate ``dataverse.cors.origin`` value (single origin, comma-separated list, or ``*``) on the Dataverse application server; and -* Configure a matching/compatible CORS policy on each S3 bucket (and any CDN/proxy in front of it) that will be used for direct upload or for redirect (download-redirect) operations consumed by previewers. +Detailed information for the most common S3 admin tools around CORS: -If you specify multiple origins in ``dataverse.cors.origin`` Dataverse will echo back the requesting origin (when it matches) and will include ``Vary: Origin`` so that shared caches do not serve one origin's response to another. If you configure ``*`` Dataverse will respond with ``Access-Control-Allow-Origin: *`` (note that browsers will not allow credentialed requests with a wildcard). +- `AWS `_ +- `Minio mc `_ +- `s3cmd `_ -Make sure the bucket CORS configuration ``AllowedOrigins`` is at least as permissive as the origins you configure in ``dataverse.cors.origin``. If the bucket allows ``*`` but the Dataverse application only allows a subset, the browser will still enforce the more restrictive application response. +Get Current CORS Policy on Bucket ++++++++++++++++++++++++++++++++++ If you'd like to check the CORS configuration on your bucket before making changes: -``aws s3api get-bucket-cors --bucket `` +.. tabs:: + .. group-tab:: AWS CLI + :code:`aws s3api get-bucket-cors --bucket ` + + .. group-tab:: Minio Client (mc) + :code:`mc cors get /` + +Set CORS Policy on Bucket ++++++++++++++++++++++++++ + +The examples below shows how to enable CORS rules (to support upload and download) on a bucket. + +**Note:** You may want to limit the ``AllowedOrigins`` and/or ``AllowedHeaders`` further. +`GDCC/dataverse-previewers `_ has some additional information about doing this. + +Both JSON and XML format are explained in detail in `AWS Docs `_. + +.. tabs:: + .. group-tab:: AWS CLI + Create a file :download:`cors.json ` as follows: + + .. literalinclude:: /_static/installation/cors/cors.json + :name: aws-cors + :language: json + + Proceed with making the changes: + + :code:`aws s3api put-bucket-cors --bucket --cors-configuration file://cors.json` -To proceed with making changes: + Alternatively, you can enable CORS using the AWS S3 web interface, using json-encoded rules as in the example above. -``aws s3api put-bucket-cors --bucket --cors-configuration file://cors.json`` + .. group-tab:: Minio Client (mc) + Create a file :download:`cors.xml ` as follows: -with the contents of the file cors.json as follows: + .. literalinclude:: /_static/installation/cors/cors.xml + :name: xml-cors + :language: xml -.. code-block:: json + Proceed with making the changes: - { - "CORSRules": [ - { - "AllowedOrigins": ["*"], - "AllowedHeaders": ["*"], - "AllowedMethods": ["PUT", "GET"], - "ExposeHeaders": ["ETag", "Accept-Ranges", "Content-Encoding", "Content-Range"] - } - ] - } + :code:`mc cors set / ./cors.xml` -Alternatively, you can enable CORS using the AWS S3 web interface, using json-encoded rules as in the example above. .. _s3-tags-and-direct-upload: diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 15ae99f7f50..e7da01ccafb 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -217,6 +217,36 @@ Dataverse installations are explicity set to "Lax" out of the box by the install To inspect cookie attributes like SameSite, you can use ``curl -s -I http://localhost:8080 | grep JSESSIONID``, for example, looking for the "Set-Cookie" header. + +.. _dataverse.cors: + +Cross-Origin Resource Sharing (CORS) +++++++++++++++++++++++++++++++++++++ + +For any Dataverse installation using or planning to use advanced features like big data support or previewers, dealing with CORS is imminent. + +To understand what CORS is all about and how it works, the following are recommended reads: + +- https://developer.mozilla.org/en-US/docs/Web/HTTP/Guides/CORS +- https://corsfix.com/cors-headers +- https://www.caduh.com/blog/understanding-cors +- https://medium.com/@roelljr/demystifying-cors-its-just-http-headers-i-promise-4a02caf460fa + +To learn how to configure the Dataverse application to send CORS headers to browsers, these JVM options are relevant: + +- :ref:`dataverse.cors.origin` +- :ref:`dataverse.cors.methods` +- :ref:`dataverse.cors.headers.allow` +- :ref:`dataverse.cors.headers.expose` + +Dataverse will only emit the necessary ``Access-Control-*`` headers to browsers when CORS has been explicitly enabled via the JVM option :ref:`dataverse.cors.origin `. + +For any resources to be integrated with Dataverse, find documentation how to set up CORS rules on their end at: + +- :ref:`Big Data: CORS for S3 buckets ` +- `GDCC/dataverse-previewers `_ + + .. _ongoing-security: Ongoing Security of Your Installation @@ -1514,7 +1544,7 @@ In addition to having the type "remote" and requiring a label, Trusted Remote St These and other available options are described in the table below. Trusted remote stores can range from being a static trusted website to a sophisticated service managing access requests and logging activity -and/or managing access to a secure enclave. See :doc:`/admin/big-data-administration` (specifically :ref:`remote-stores`) and :doc:`/developers/big-data-support` for additional information on how to use a trusted remote store. For specific remote stores, consult their documentation when configuring the remote store in your Dataverse installation. +and/or managing access to a secure enclave. See :doc:`/admin/big-data-administration` (specifically :ref:`remote-stores`) and :doc:`/installation/big-data-support` for additional information on how to use a trusted remote store. For specific remote stores, consult their documentation when configuring the remote store in your Dataverse installation. Note that in the current implementation, activities where Dataverse needs access to data bytes, e.g. to create thumbnails or validate hash values at publication will fail if a remote store does not allow Dataverse access. Implementers of such trusted remote stores should consider using Dataverse's settings to disable ingest, validation of files at publication, etc. as needed. @@ -1548,7 +1578,7 @@ Globus Storage ++++++++++++++ Globus stores allow Dataverse to manage files stored in Globus endpoints or to reference files in remote Globus endpoints, with users leveraging Globus to transfer files to/from Dataverse (rather than using HTTP/HTTPS). -See :doc:`/developers/big-data-support` for additional information on how to use a globus store. Consult the `Globus documentation `_ for information about using Globus and configuring Globus endpoints. +See :doc:`/installation/big-data-support` for additional information on how to use a globus store. Consult the `Globus documentation `_ for information about using Globus and configuring Globus endpoints. In addition to having the type "globus" and requiring a label, Globus Stores share many options with Trusted Remote Stores and options to specify and access a Globus endpoint(s). As with Remote Stores, Globus Stores also use a baseStore - a file, s3, or swift store that can be used to store additional ancillary dataset files (e.g. metadata exports, thumbnails, auxiliary files, etc.). These and other available options are described in the table below. @@ -2670,7 +2700,7 @@ to avoid filled up disks, aid in performance, etc. This directory is used for a to final storage location and/or ingest. 3. ``/googlecloudkey.json`` used with :ref:`Google Cloud Configuration` for BagIt exports. This location is deprecated and might be refactored into a distinct setting in the future. -4. The experimental DCM feature for :doc:`../developers/big-data-support` is able to trigger imports for externally +4. The experimental DCM feature for :doc:`/installation/big-data-support` is able to trigger imports for externally uploaded files in a directory tree at ``//`` under certain conditions. This directory may also be used by file stores for :ref:`permanent file storage `, but this is controlled by other, store-specific settings. @@ -3766,21 +3796,38 @@ dataverse.search.default-service Experimental. See :doc:`/developers/search-services`. -.. _dataverse.cors: - -CORS Settings -+++++++++++++ - -The following settings control Cross-Origin Resource Sharing (CORS) for your Dataverse installation. - .. _dataverse.cors.origin: dataverse.cors.origin +++++++++++++++++++++ -Allowed origins for CORS requests. If this setting is not defined, CORS headers are not added. Set to ``*`` to allow all origins (note that browsers will not allow credentialed requests with ``*``) or provide a comma-separated list of explicit origins. +Allowed origins for CORS requests. + +Default: ``*`` (allow all) -Multiple origins can be specified as a comma-separated list (whitespace is ignored): +.. warning:: | If this setting is not explicitly configured, no CORS headers at all are added to responses. + | The default policy (see all CORS related settings) is still being enforced! + +.. list-table:: + :align: left + :widths: 10 10 80 + :header-rows: 1 + :stub-columns: 1 + + * - Type + - Value/Example + - Description + * - Wildcard + - ``*`` + - - Allow access from all origins. + - Response header echoes ``Access-Control-Allow-Origin: *`` + - Browsers will not allow credentialed requests with this setting. + * - List of Origins + - ``https://example.org, https://example.com`` + - - Comma separated, white space ignored. + - Single matching request ``Origin`` header echoed as response header ``Access-Control-Allow-Origin``. + - ``Vary: Origin`` header added to support correct proxy/CDN caching. + - Use ``${dataverse.siteurl}`` to dynamically add the installation's URL to the list. Example: @@ -3788,18 +3835,14 @@ Example: Can also be set via any `supported MicroProfile Config API source`_, e.g. the environment variable ``DATAVERSE_CORS_ORIGIN``. -Behavior: - -* When a list of origins is configured, Dataverse echoes the single matching request ``Origin`` value in ``Access-Control-Allow-Origin`` and adds ``Vary: Origin`` to support correct proxy/CDN caching. -* When ``*`` is configured, ``Access-Control-Allow-Origin: *`` is sent and ``Vary`` is not modified. - .. _dataverse.cors.methods: dataverse.cors.methods ++++++++++++++++++++++ -Allowed HTTP methods for CORS requests. The default when this setting is missing is "GET,POST,OPTIONS,PUT,DELETE". -Multiple methods can be specified as a comma-separated list. +Allowed HTTP methods for CORS requests as a comma separated list. Whitespace is ignored. + +Default: ``GET,POST,OPTIONS,PUT,DELETE`` Example: @@ -3812,8 +3855,9 @@ Can also be set via any `supported MicroProfile Config API source`_, e.g. the en dataverse.cors.headers.allow ++++++++++++++++++++++++++++ -Allowed headers for CORS requests. The default when this setting is missing is "Accept,Content-Type,X-Dataverse-key,Range". -Multiple headers can be specified as a comma-separated list. +Allowed headers for CORS requests as a comma separated list. Whitespace is ignored. + +Default: ``Accept, Content-Type, X-Dataverse-key, Range`` Example: @@ -3826,8 +3870,9 @@ Can also be set via any `supported MicroProfile Config API source`_, e.g. the en dataverse.cors.headers.expose +++++++++++++++++++++++++++++ -Headers to expose in CORS responses. The default when this setting is missing is "Accept-Ranges,Content-Range,Content-Encoding". -Multiple headers can be specified as a comma-separated list. +Headers to expose in CORS responses as a comma separated list. Whitespace is ignored. + +Default: ``Accept-Ranges, Content-Range, Content-Encoding`` Example: diff --git a/doc/sphinx-guides/source/installation/index.rst b/doc/sphinx-guides/source/installation/index.rst index a0a88700d3d..bdfb4cc8037 100755 --- a/doc/sphinx-guides/source/installation/index.rst +++ b/doc/sphinx-guides/source/installation/index.rst @@ -16,6 +16,7 @@ Installation Guide prerequisites installation-main config + big-data-support upgrading shibboleth oauth2 diff --git a/src/main/java/edu/harvard/iq/dataverse/filter/CorsFilter.java b/src/main/java/edu/harvard/iq/dataverse/filter/CorsFilter.java index d7f14fff245..a27996e47e6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/filter/CorsFilter.java +++ b/src/main/java/edu/harvard/iq/dataverse/filter/CorsFilter.java @@ -9,6 +9,7 @@ import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.util.ListSplitUtil; +import jakarta.servlet.DispatcherType; import jakarta.servlet.Filter; import jakarta.servlet.FilterChain; import jakarta.servlet.FilterConfig; @@ -27,11 +28,22 @@ * 1. Reads CORS configuration from JVM settings (dataverse.cors.*). See the Dataverse Configuration Guide for more details. * 2. Determines whether CORS should be allowed based on these settings. * 3. If CORS is allowed, it adds the appropriate CORS headers to all HTTP responses. The JVMSettings allow customization of the header contents if desired. - * + * + * The broader dispatcher set is intentional: + * - REQUEST applies CORS to direct client requests. + * - FORWARD covers internal forwards, including API paths rewritten by + * {@link edu.harvard.iq.dataverse.api.ApiRouter} from {@code /api/...} to {@code /api/v1/...}. + * - ERROR ensures error responses also carry CORS headers, so browser clients can read error details. + * - ASYNC keeps behavior consistent for asynchronous servlet/JAX-RS processing. + * * The filter is applied to all paths ("/*") in the application. */ - -@WebFilter("/*") +@WebFilter(value = "/*", dispatcherTypes = { + DispatcherType.REQUEST, + DispatcherType.FORWARD, + DispatcherType.ERROR, + DispatcherType.ASYNC +}) public class CorsFilter implements Filter { private boolean allowCors; diff --git a/src/test/java/edu/harvard/iq/dataverse/api/CorsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/CorsIT.java new file mode 100644 index 00000000000..f3504a4fb4a --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/api/CorsIT.java @@ -0,0 +1,96 @@ +package edu.harvard.iq.dataverse.api; + +import io.restassured.RestAssured; +import io.restassured.response.Response; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Locale; +import java.util.Set; +import java.util.stream.Collectors; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +import static io.restassured.RestAssured.given; +import static org.hamcrest.Matchers.anyOf; +import static org.hamcrest.Matchers.blankOrNullString; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.not; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Integration tests for CORS headers on API endpoints. These tests verify that the expected CORS + * headers are present and contain the correct values for preflight OPTIONS requests to key + * API endpoints. + * + * For this to work CORS has to be enabled. Eg. in docker-compose-dev.yml add + * DATAVERSE_CORS_ORIGIN: "*" + * env to `dev_dataverse`. + */ +class CorsIT { + private static final String ORIGIN_NULL = "null"; + + private final List expectedCorsMethods = List.of("GET", "POST", "PUT", "DELETE", "OPTIONS"); + private final List expectedCorsAllowHeaders = List.of("Accept", "Content-Type", "X-Dataverse-key", "Range"); + private final List expectedCorsExposeHeaders = List.of("Accept-Ranges", "Content-Range", "Content-Encoding"); + + @BeforeAll + static void setUp() { + RestAssured.baseURI = UtilIT.getRestAssuredBaseUri(); + } + + @ParameterizedTest(name = "CORS preflight headers on {0}") + @ValueSource(strings = { + "/api/dataverses/root/datasets", + "/api/v1/dataverses/root/datasets", + "/page_doesnt_exist", + "/dvn/api/data-deposit/v1.1/swordv2/collection/dataverse/root" + }) + void testPreflightOptionsCorsHeaders(String path) { + Response response = + given() + .header("Accept", "*/*") + .header("Accept-Language", "en-US,en;q=0.9,es;q=0.8,hu;q=0.7") + .header("Access-Control-Request-Headers", "content-type,x-dataverse-key") + .header("Access-Control-Request-Method", "POST") + .header("Origin", ORIGIN_NULL) + .header("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36") + .when() + .options(path) + .then() + .log().ifValidationFails() + .statusCode(anyOf(is(200), is(204))) + .header("Access-Control-Allow-Methods", not(blankOrNullString())) + .header("Access-Control-Allow-Headers", not(blankOrNullString())) + .header("Access-Control-Expose-Headers", not(blankOrNullString())) + .extract() + .response(); + + assertHeaderSetEquals("Access-Control-Allow-Methods", expectedCorsMethods, response); + assertHeaderSetEquals("Access-Control-Allow-Headers", expectedCorsAllowHeaders, response); + assertHeaderSetEquals("Access-Control-Expose-Headers", expectedCorsExposeHeaders, response); + } + + private static void assertHeaderSetEquals(String headerName, List expectedTokens, Response response) { + String headerValue = response.getHeader(headerName); + assertTrue(headerValue != null && !headerValue.isBlank(), "Missing header: " + headerName); + Set actual = normalizeTokens(headerValue); + Set expected = expectedTokens.stream() + .map(CorsIT::normalizeToken) + .collect(Collectors.toCollection(HashSet::new)); + assertEquals(expected, actual, "Unexpected value for header: " + headerName); + } + + private static Set normalizeTokens(String headerValue) { + return Arrays.stream(headerValue.split(",")) + .map(CorsIT::normalizeToken) + .filter(token -> !token.isEmpty()) + .collect(Collectors.toCollection(HashSet::new)); + } + + private static String normalizeToken(String value) { + return value == null ? "" : value.trim().toLowerCase(Locale.ROOT); + } +} diff --git a/tests/integration-tests.txt b/tests/integration-tests.txt index 51253928df9..c271657eaac 100644 --- a/tests/integration-tests.txt +++ b/tests/integration-tests.txt @@ -1 +1 @@ -DataversesIT,DatasetsIT,SwordIT,AdminIT,BuiltinUsersIT,UsersIT,UtilIT,ConfirmEmailIT,FileMetadataIT,FilesIT,SearchIT,InReviewWorkflowIT,HarvestingServerIT,HarvestingClientsIT,MoveIT,MakeDataCountApiIT,FileTypeDetectionIT,EditDDIIT,ExternalToolsIT,AccessIT,DuplicateFilesIT,DownloadFilesIT,LinkIT,DeleteUsersIT,DeactivateUsersIT,AuxiliaryFilesIT,InvalidCharactersIT,LicensesIT,NotificationsIT,BagIT,MetadataBlocksIT,NetcdfIT,SignpostingIT,FitsIT,LogoutIT,DataRetrieverApiIT,ProvIT,S3AccessIT,OpenApiIT,InfoIT,DatasetFieldsIT,SavedSearchIT,DatasetTypesIT,DataverseFeaturedItemsIT,SendFeedbackApiIT,CustomizationIT,JsonLDExportIT,WorkflowsIT,LDNInboxIT,LocalContextsIT +DataversesIT,DatasetsIT,SwordIT,AdminIT,BuiltinUsersIT,UsersIT,UtilIT,ConfirmEmailIT,FileMetadataIT,FilesIT,SearchIT,InReviewWorkflowIT,HarvestingServerIT,HarvestingClientsIT,MoveIT,MakeDataCountApiIT,FileTypeDetectionIT,EditDDIIT,ExternalToolsIT,AccessIT,DuplicateFilesIT,DownloadFilesIT,LinkIT,DeleteUsersIT,DeactivateUsersIT,AuxiliaryFilesIT,InvalidCharactersIT,LicensesIT,NotificationsIT,BagIT,MetadataBlocksIT,NetcdfIT,SignpostingIT,FitsIT,LogoutIT,DataRetrieverApiIT,ProvIT,S3AccessIT,OpenApiIT,InfoIT,DatasetFieldsIT,SavedSearchIT,DatasetTypesIT,DataverseFeaturedItemsIT,SendFeedbackApiIT,CustomizationIT,JsonLDExportIT,WorkflowsIT,LDNInboxIT,LocalContextsIT,CorsIT