From 76920321792e0dcd881d79a4e0e0777e2c1971fa Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Fri, 18 Jul 2025 22:24:10 +0000 Subject: [PATCH 01/51] chore(deps): update dependency commons-io:commons-io to v2.20.0 --- build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.gradle b/build.gradle index ae540446..31bcf53e 100644 --- a/build.gradle +++ b/build.gradle @@ -52,7 +52,7 @@ dependencies { // http client implementation group: 'org.apache.httpcomponents', name: 'httpclient', version: '4.5.14' // common file system operations - implementation group: 'commons-io', name: 'commons-io', version: '2.19.0' + implementation group: 'commons-io', name: 'commons-io', version: '2.20.0' // read from and write to zip files implementation group: 'net.lingala.zip4j', name: 'zip4j', version: '2.11.5' // compare json documents in tests From a850f650dc730c7dbf1531dee93cd98351a60031 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Wed, 9 Jul 2025 19:58:52 +0000 Subject: [PATCH 02/51] chore(deps): update dependency commons-validator:commons-validator to v1.10.0 --- build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.gradle b/build.gradle index 31bcf53e..55db11d1 100644 --- a/build.gradle +++ b/build.gradle @@ -58,7 +58,7 @@ dependencies { // compare json documents in tests implementation 'com.github.fslev:json-compare:7.0' // url validator - implementation group: 'commons-validator', name: 'commons-validator', version: '1.9.0' + implementation group: 'commons-validator', name: 'commons-validator', version: '1.10.0' // logging implementation group: 'org.slf4j', name: 'slf4j-jdk14', version: '2.0.17' // JSON-LD, Zenodo mapping From 76669b32dec67f17ddeb44a530f6e4e2a702a7cb Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Fri, 4 Jul 2025 15:10:06 +0000 Subject: [PATCH 03/51] chore(deps): update dependency gradle to v8.14.3 --- gradle/wrapper/gradle-wrapper.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index ff23a68d..d4081da4 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,6 +1,6 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-8.14.2-bin.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-8.14.3-bin.zip networkTimeout=10000 validateDistributionUrl=true zipStoreBase=GRADLE_USER_HOME From 0573f84adea0a426f44af7d4647afbe04d5098f3 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Sat, 28 Jun 2025 05:43:53 +0000 Subject: [PATCH 04/51] chore(deps): update dependency com.networknt:json-schema-validator to v1.5.8 --- build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.gradle b/build.gradle index 55db11d1..e93088a2 100644 --- a/build.gradle +++ b/build.gradle @@ -64,7 +64,7 @@ dependencies { // JSON-LD, Zenodo mapping implementation group: 'com.apicatalog', name: 'titanium-json-ld', version: '1.6.0' // metadata validation, profiles based on JSON schema - implementation group: "com.networknt", name: "json-schema-validator", version: "1.5.7" + implementation group: "com.networknt", name: "json-schema-validator", version: "1.5.8" implementation 'org.glassfish:jakarta.json:2.0.1' //JTE for template processing implementation('gg.jte:jte:3.2.1') From 42f4186a599cd496c2943adcbec931f052f6cd3d Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Wed, 18 Jun 2025 11:09:31 +0000 Subject: [PATCH 05/51] chore(deps): update plugin io.freefair.maven-publish-java to v8.14 --- build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.gradle b/build.gradle index e93088a2..f6ff51c2 100644 --- a/build.gradle +++ b/build.gradle @@ -13,7 +13,7 @@ plugins { // Publishing of JAR to Nexus instances (e.g., OSSRH) // https://github.com/gradle-nexus/publish-plugin id "io.github.gradle-nexus.publish-plugin" version "2.0.0" - id "io.freefair.maven-publish-java" version "8.13.1" + id "io.freefair.maven-publish-java" version "8.14" } group 'edu.kit.datamanager' From 92bcf09a58d88c7763ebba3dc5d6b2d8366f1431 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Fri, 18 Jul 2025 20:10:07 +0000 Subject: [PATCH 06/51] chore(deps): update jacksonversion to v2.19.2 --- build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.gradle b/build.gradle index f6ff51c2..29386fca 100644 --- a/build.gradle +++ b/build.gradle @@ -38,7 +38,7 @@ repositories { } ext { - jacksonVersion = '2.19.0' + jacksonVersion = '2.19.2' } dependencies { From bfb85d7a55e297b266be63909f977008a45eaaca Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Mon, 21 Jul 2025 10:33:32 +0000 Subject: [PATCH 07/51] chore(deps): update dependency org.junit:junit-bom to v5.13.4 --- build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.gradle b/build.gradle index 29386fca..c03911a1 100644 --- a/build.gradle +++ b/build.gradle @@ -43,7 +43,7 @@ ext { dependencies { // JUnit setup for testing - testImplementation(platform("org.junit:junit-bom:5.13.0")) + testImplementation(platform("org.junit:junit-bom:5.13.4")) testImplementation('org.junit.jupiter:junit-jupiter') testRuntimeOnly('org.junit.platform:junit-platform-launcher') // JSON object mapping / (de-)serialization From f711e677f3a5b8decda6a7b94d69bac63d30af2d Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Sun, 31 Aug 2025 09:15:10 +0000 Subject: [PATCH 08/51] chore(deps): update jacksonversion to v2.20.0 --- build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.gradle b/build.gradle index c03911a1..3fd00661 100644 --- a/build.gradle +++ b/build.gradle @@ -38,7 +38,7 @@ repositories { } ext { - jacksonVersion = '2.19.2' + jacksonVersion = '2.20.0' } dependencies { From 0f144c63818a48bafe1ec3c24540aa3d715e0228 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Thu, 21 Aug 2025 05:48:58 +0000 Subject: [PATCH 09/51] chore(deps): update actions/setup-java action to v5 --- .github/workflows/gradle.yml | 2 +- .github/workflows/publishRelease.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/gradle.yml b/.github/workflows/gradle.yml index fe8a5ce3..79eac069 100644 --- a/.github/workflows/gradle.yml +++ b/.github/workflows/gradle.yml @@ -27,7 +27,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Set up openJDK version - uses: actions/setup-java@v4 + uses: actions/setup-java@v5 with: java-version: ${{ matrix.jdk }} distribution: 'zulu' diff --git a/.github/workflows/publishRelease.yml b/.github/workflows/publishRelease.yml index b70d034c..11006f24 100644 --- a/.github/workflows/publishRelease.yml +++ b/.github/workflows/publishRelease.yml @@ -10,7 +10,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Set up Java - uses: actions/setup-java@v4 + uses: actions/setup-java@v5 with: java-version: 21 distribution: 'zulu' # openjdk From 80c29035c0662a37e704824f8c6d19f05f236bd2 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Mon, 18 Aug 2025 21:05:57 +0000 Subject: [PATCH 10/51] chore(deps): update plugin io.freefair.maven-publish-java to v8.14.2 --- build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.gradle b/build.gradle index 3fd00661..11a4a465 100644 --- a/build.gradle +++ b/build.gradle @@ -13,7 +13,7 @@ plugins { // Publishing of JAR to Nexus instances (e.g., OSSRH) // https://github.com/gradle-nexus/publish-plugin id "io.github.gradle-nexus.publish-plugin" version "2.0.0" - id "io.freefair.maven-publish-java" version "8.14" + id "io.freefair.maven-publish-java" version "8.14.2" } group 'edu.kit.datamanager' From 901f8012060165e16abfbf628f00cf6ba5e90c17 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Mon, 11 Aug 2025 14:12:19 +0000 Subject: [PATCH 11/51] chore(deps): update actions/checkout action to v5 --- .github/workflows/codeql-analysis.yml | 2 +- .github/workflows/gradle.yml | 2 +- .github/workflows/publishRelease.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 95eef02a..c09d909e 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -38,7 +38,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v4 + uses: actions/checkout@v5 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL diff --git a/.github/workflows/gradle.yml b/.github/workflows/gradle.yml index 79eac069..314f91ac 100644 --- a/.github/workflows/gradle.yml +++ b/.github/workflows/gradle.yml @@ -25,7 +25,7 @@ jobs: runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Set up openJDK version uses: actions/setup-java@v5 with: diff --git a/.github/workflows/publishRelease.yml b/.github/workflows/publishRelease.yml index 11006f24..dedd9bce 100644 --- a/.github/workflows/publishRelease.yml +++ b/.github/workflows/publishRelease.yml @@ -8,7 +8,7 @@ jobs: publish: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v5 - name: Set up Java uses: actions/setup-java@v5 with: From a0e31788d6590acf25f5143e8fae1fb004710187 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Fri, 1 Aug 2025 18:13:01 +0000 Subject: [PATCH 12/51] chore(deps): update dependency com.github.fslev:json-compare to v7.1 --- build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.gradle b/build.gradle index 11a4a465..94538473 100644 --- a/build.gradle +++ b/build.gradle @@ -56,7 +56,7 @@ dependencies { // read from and write to zip files implementation group: 'net.lingala.zip4j', name: 'zip4j', version: '2.11.5' // compare json documents in tests - implementation 'com.github.fslev:json-compare:7.0' + implementation 'com.github.fslev:json-compare:7.1' // url validator implementation group: 'commons-validator', name: 'commons-validator', version: '1.10.0' // logging From f87beea0753e57e82f5cdf199787e7720f7b3924 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Mon, 27 Oct 2025 19:01:25 +0000 Subject: [PATCH 13/51] chore(deps): update actions/upload-artifact action to v5 --- .github/workflows/gradle.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/gradle.yml b/.github/workflows/gradle.yml index 314f91ac..6bb82775 100644 --- a/.github/workflows/gradle.yml +++ b/.github/workflows/gradle.yml @@ -39,7 +39,7 @@ jobs: run: ./gradlew -Dprofile=release build - name: Upload (test) reports as artifact on GitHub on manual runs if: github.event_name == 'workflow_dispatch' - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v5 with: name: test-report ${{ matrix.os }} JDK ${{ matrix.jdk }} path: build/reports From ac1ae172a2e559ff7fc7bfa2a667a4a458838b09 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Tue, 14 Oct 2025 02:07:48 +0000 Subject: [PATCH 14/51] chore(deps): update dependency jacoco to v0.8.14 --- build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.gradle b/build.gradle index 94538473..7adde30a 100644 --- a/build.gradle +++ b/build.gradle @@ -189,7 +189,7 @@ jacocoTestReport { } jacoco { - toolVersion = "0.8.13" + toolVersion = "0.8.14" } // maxParallelForks(2) From c928ed5b68260c75d9c9c7792e0546c1b4995557 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Mon, 13 Oct 2025 17:35:40 +0000 Subject: [PATCH 15/51] chore(deps): update github/codeql-action action to v4 --- .github/workflows/codeql-analysis.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index c09d909e..b9a11d34 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -42,7 +42,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@v3 + uses: github/codeql-action/init@v4 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -56,7 +56,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@v3 + uses: github/codeql-action/autobuild@v4 # Command-line programs to run using the OS shell. # See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun @@ -69,4 +69,4 @@ jobs: # ./location_of_script_within_repo/buildscript.sh - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v3 + uses: github/codeql-action/analyze@v4 From 3a446e178062843982be6b444ff7c5ce81f55677 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Wed, 8 Oct 2025 21:15:30 +0000 Subject: [PATCH 16/51] chore(deps): update dependency com.apicatalog:titanium-json-ld to v1.7.0 --- build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.gradle b/build.gradle index 7adde30a..5d5e63da 100644 --- a/build.gradle +++ b/build.gradle @@ -62,7 +62,7 @@ dependencies { // logging implementation group: 'org.slf4j', name: 'slf4j-jdk14', version: '2.0.17' // JSON-LD, Zenodo mapping - implementation group: 'com.apicatalog', name: 'titanium-json-ld', version: '1.6.0' + implementation group: 'com.apicatalog', name: 'titanium-json-ld', version: '1.7.0' // metadata validation, profiles based on JSON schema implementation group: "com.networknt", name: "json-schema-validator", version: "1.5.8" implementation 'org.glassfish:jakarta.json:2.0.1' From 4679880892c7220c6a2ec2c5bdc66e7d34dbb6a2 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Sat, 4 Oct 2025 12:59:38 +0000 Subject: [PATCH 17/51] chore(deps): update gradle/actions action to v5 --- .github/workflows/gradle.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/gradle.yml b/.github/workflows/gradle.yml index 6bb82775..008461e5 100644 --- a/.github/workflows/gradle.yml +++ b/.github/workflows/gradle.yml @@ -34,7 +34,7 @@ jobs: - name: Install Dependencies run: npm install -g ro-crate-html-js - name: Setup Gradle - uses: gradle/actions/setup-gradle@v4 + uses: gradle/actions/setup-gradle@v5 - name: Build and Test with Gradle run: ./gradlew -Dprofile=release build - name: Upload (test) reports as artifact on GitHub on manual runs From d7b3bb54e9073ea5bd131f91678997b835e26ace Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Sat, 27 Sep 2025 01:34:43 +0000 Subject: [PATCH 18/51] chore(deps): update plugin io.freefair.maven-publish-java to v9 --- build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.gradle b/build.gradle index 5d5e63da..62beb407 100644 --- a/build.gradle +++ b/build.gradle @@ -13,7 +13,7 @@ plugins { // Publishing of JAR to Nexus instances (e.g., OSSRH) // https://github.com/gradle-nexus/publish-plugin id "io.github.gradle-nexus.publish-plugin" version "2.0.0" - id "io.freefair.maven-publish-java" version "8.14.2" + id "io.freefair.maven-publish-java" version "9.0.0" } group 'edu.kit.datamanager' From 89cbca57caebf5c67653d0c263fe28c2cfd20bf3 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Mon, 3 Nov 2025 18:14:52 +0000 Subject: [PATCH 19/51] chore(deps): update dependency org.junit:junit-bom to v5.14.1 --- build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.gradle b/build.gradle index 62beb407..15be1bdf 100644 --- a/build.gradle +++ b/build.gradle @@ -43,7 +43,7 @@ ext { dependencies { // JUnit setup for testing - testImplementation(platform("org.junit:junit-bom:5.13.4")) + testImplementation(platform("org.junit:junit-bom:5.14.1")) testImplementation('org.junit.jupiter:junit-jupiter') testRuntimeOnly('org.junit.platform:junit-platform-launcher') // JSON object mapping / (de-)serialization From 32feded9c0fa71b341991297f7d715e65d01df5f Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Sat, 13 Sep 2025 23:30:40 +0000 Subject: [PATCH 20/51] chore(deps): update dependency com.networknt:json-schema-validator to v1.5.9 --- build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.gradle b/build.gradle index 15be1bdf..7921bb78 100644 --- a/build.gradle +++ b/build.gradle @@ -64,7 +64,7 @@ dependencies { // JSON-LD, Zenodo mapping implementation group: 'com.apicatalog', name: 'titanium-json-ld', version: '1.7.0' // metadata validation, profiles based on JSON schema - implementation group: "com.networknt", name: "json-schema-validator", version: "1.5.8" + implementation group: "com.networknt", name: "json-schema-validator", version: "1.5.9" implementation 'org.glassfish:jakarta.json:2.0.1' //JTE for template processing implementation('gg.jte:jte:3.2.1') From b6cc350df221231670fa54350692381c059c4721 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Wed, 5 Nov 2025 16:55:16 +0000 Subject: [PATCH 21/51] chore(deps): update jacksonversion to v2.20.1 --- build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.gradle b/build.gradle index 7921bb78..dabb332f 100644 --- a/build.gradle +++ b/build.gradle @@ -38,7 +38,7 @@ repositories { } ext { - jacksonVersion = '2.20.0' + jacksonVersion = '2.20.1' } dependencies { From b7e4d010f26661fe00b7023d5bb9a8317ff9dc4e Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Wed, 5 Nov 2025 17:10:17 +0000 Subject: [PATCH 22/51] chore(deps): update dependency org.junit:junit-bom to v6 --- build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.gradle b/build.gradle index dabb332f..e965fd9a 100644 --- a/build.gradle +++ b/build.gradle @@ -43,7 +43,7 @@ ext { dependencies { // JUnit setup for testing - testImplementation(platform("org.junit:junit-bom:5.14.1")) + testImplementation(platform("org.junit:junit-bom:6.0.1")) testImplementation('org.junit.jupiter:junit-jupiter') testRuntimeOnly('org.junit.platform:junit-platform-launcher') // JSON object mapping / (de-)serialization From 2e31dce3bc337f474841dfb5790330a7d2c9e4ce Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Sat, 8 Nov 2025 05:58:29 +0000 Subject: [PATCH 23/51] chore(deps): update dependency commons-io:commons-io to v2.21.0 --- build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.gradle b/build.gradle index e965fd9a..3005bbed 100644 --- a/build.gradle +++ b/build.gradle @@ -52,7 +52,7 @@ dependencies { // http client implementation group: 'org.apache.httpcomponents', name: 'httpclient', version: '4.5.14' // common file system operations - implementation group: 'commons-io', name: 'commons-io', version: '2.20.0' + implementation group: 'commons-io', name: 'commons-io', version: '2.21.0' // read from and write to zip files implementation group: 'net.lingala.zip4j', name: 'zip4j', version: '2.11.5' // compare json documents in tests From a3f54a49c7b31271aef7d1de5d018284943cd899 Mon Sep 17 00:00:00 2001 From: Andreas Pfeil Date: Thu, 2 Oct 2025 14:52:28 +0200 Subject: [PATCH 24/51] docs: document members concerning the improved removal speed --- .../kit/datamanager/ro_crate/entities/AbstractEntity.java | 5 +++++ .../datamanager/ro_crate/entities/data/DataSetEntity.java | 4 ++++ .../edu/kit/datamanager/ro_crate/payload/RoCratePayload.java | 5 +++++ 3 files changed, 14 insertions(+) diff --git a/src/main/java/edu/kit/datamanager/ro_crate/entities/AbstractEntity.java b/src/main/java/edu/kit/datamanager/ro_crate/entities/AbstractEntity.java index 38afcf4e..3b51367f 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/entities/AbstractEntity.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/entities/AbstractEntity.java @@ -49,6 +49,11 @@ public class AbstractEntity { private static final EntityValidation entityValidation = new EntityValidation(new JsonSchemaValidation()); + /** + * This set contains all the ids of the entities that are linked by + * this entity. This information is provided to crate payloads to make + * the removal of entities faster. + */ @JsonIgnore private final Set linkedTo; diff --git a/src/main/java/edu/kit/datamanager/ro_crate/entities/data/DataSetEntity.java b/src/main/java/edu/kit/datamanager/ro_crate/entities/data/DataSetEntity.java index 2ef078ff..04912b05 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/entities/data/DataSetEntity.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/entities/data/DataSetEntity.java @@ -18,6 +18,10 @@ public class DataSetEntity extends DataEntity { public static final String TYPE = "Dataset"; + /** + * The hasPart property is used to indicate that the described resource is a + * composite resource, and to point to the parts that it includes. + */ @JsonSerialize(using = HasPartSerializer.class) @JsonInclude(JsonInclude.Include.NON_EMPTY) public Set hasPart; diff --git a/src/main/java/edu/kit/datamanager/ro_crate/payload/RoCratePayload.java b/src/main/java/edu/kit/datamanager/ro_crate/payload/RoCratePayload.java index f5f09363..bc76d6a2 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/payload/RoCratePayload.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/payload/RoCratePayload.java @@ -20,6 +20,11 @@ public class RoCratePayload implements CratePayload { private final HashMap dataEntities; private final HashMap contextualEntities; + + /** + * A map containing for each entity id a set of ids of entities that are linked to it. + * This is used to make the removal of entities from the crate faster. + */ private final HashMap> associatedItems; /** From 1887d4615af93598fc1e93fcb33acbee21089d8d Mon Sep 17 00:00:00 2001 From: Andreas Pfeil Date: Thu, 2 Oct 2025 14:58:01 +0200 Subject: [PATCH 25/51] refactor: deprecate hasInHasPart method and introduce hasPart method for clarity --- .../ro_crate/entities/data/DataSetEntity.java | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/main/java/edu/kit/datamanager/ro_crate/entities/data/DataSetEntity.java b/src/main/java/edu/kit/datamanager/ro_crate/entities/data/DataSetEntity.java index 04912b05..90584993 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/entities/data/DataSetEntity.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/entities/data/DataSetEntity.java @@ -45,10 +45,28 @@ public void addToHasPart(String id) { this.hasPart.add(id); } + /** + * Check if the hasPart property contains a specific id. + * + * @deprecated use {@link #hasPart(String)} instead. + * + * @param id the id to check for + * @return true if the id is present, false otherwise + */ + @Deprecated(forRemoval = true) public boolean hasInHasPart(String id) { return this.hasPart.contains(id); } + /** + * Check if the hasPart property contains a specific id. + * @param id the id to check for + * @return true if the id is present, false otherwise + */ + public boolean hasPart(String id) { + return this.hasPart.contains(id); + } + abstract static class AbstractDataSetBuilder> extends AbstractDataEntityBuilder { From e4c68a86b1653dba2fd74021fd86fea39ade98b3 Mon Sep 17 00:00:00 2001 From: Andreas Pfeil Date: Thu, 2 Oct 2025 14:59:23 +0200 Subject: [PATCH 26/51] chore: fix typo in parameter name in setHasPart method --- .../kit/datamanager/ro_crate/entities/data/DataSetEntity.java | 4 ++-- .../datamanager/ro_crate/entities/data/DataSetEntityTest.java | 2 +- .../ro_crate/entities/data/RootDataEntityTest.java | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/java/edu/kit/datamanager/ro_crate/entities/data/DataSetEntity.java b/src/main/java/edu/kit/datamanager/ro_crate/entities/data/DataSetEntity.java index 90584993..31c2fb7a 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/entities/data/DataSetEntity.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/entities/data/DataSetEntity.java @@ -76,8 +76,8 @@ public AbstractDataSetBuilder() { this.hasPart = new HashSet<>(); } - public T setHasPart(Set hastPart) { - this.hasPart = hastPart; + public T setHasPart(Set hasPart) { + this.hasPart = hasPart; return self(); } diff --git a/src/test/java/edu/kit/datamanager/ro_crate/entities/data/DataSetEntityTest.java b/src/test/java/edu/kit/datamanager/ro_crate/entities/data/DataSetEntityTest.java index b6d51243..5ae4142d 100644 --- a/src/test/java/edu/kit/datamanager/ro_crate/entities/data/DataSetEntityTest.java +++ b/src/test/java/edu/kit/datamanager/ro_crate/entities/data/DataSetEntityTest.java @@ -72,7 +72,7 @@ void testDirWithHasPartDeserialization() throws IOException { .addToHasPart(second_content) .build(); - assertTrue(dir.hasInHasPart(id)); + assertTrue(dir.hasPart(id)); HelpFunctions.compareEntityWithFile(dir, "/json/entities/data/directoryWeb.json"); } diff --git a/src/test/java/edu/kit/datamanager/ro_crate/entities/data/RootDataEntityTest.java b/src/test/java/edu/kit/datamanager/ro_crate/entities/data/RootDataEntityTest.java index b846ef87..76033f70 100644 --- a/src/test/java/edu/kit/datamanager/ro_crate/entities/data/RootDataEntityTest.java +++ b/src/test/java/edu/kit/datamanager/ro_crate/entities/data/RootDataEntityTest.java @@ -46,8 +46,8 @@ void testSerialization() throws IOException { .addAuthor("a2") .build(); - assertTrue(rootDataEntity.hasInHasPart(id1)); - assertTrue(rootDataEntity.hasInHasPart(id2)); + assertTrue(rootDataEntity.hasPart(id1)); + assertTrue(rootDataEntity.hasPart(id2)); HelpFunctions.compareEntityWithFile(rootDataEntity, "/json/entities/data/root.json"); } From 185767d6d41d532409645ce9a761369ff6ed10b5 Mon Sep 17 00:00:00 2001 From: Andreas Pfeil Date: Fri, 10 Oct 2025 17:25:42 +0200 Subject: [PATCH 27/51] test: sketch hierarchy creation api --- .../edu/kit/datamanager/ro_crate/RoCrate.java | 159 ++++++-- .../crate/HierarchyRecognitionConfig.java | 98 +++++ .../crate/HierarchyRecognitionResult.java | 341 +++++++++++++++++ .../AutomaticHierarchyRecognitionTest.java | 359 ++++++++++++++++++ 4 files changed, 915 insertions(+), 42 deletions(-) create mode 100644 src/main/java/edu/kit/datamanager/ro_crate/crate/HierarchyRecognitionConfig.java create mode 100644 src/main/java/edu/kit/datamanager/ro_crate/crate/HierarchyRecognitionResult.java create mode 100644 src/test/java/edu/kit/datamanager/ro_crate/crate/AutomaticHierarchyRecognitionTest.java diff --git a/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java b/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java index 8ebae01e..5f43796e 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java @@ -4,14 +4,14 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.ObjectNode; - import edu.kit.datamanager.ro_crate.context.CrateMetadataContext; import edu.kit.datamanager.ro_crate.context.RoCrateMetadataContext; +import edu.kit.datamanager.ro_crate.crate.HierarchyRecognitionConfig; +import edu.kit.datamanager.ro_crate.crate.HierarchyRecognitionResult; import edu.kit.datamanager.ro_crate.entities.AbstractEntity; import edu.kit.datamanager.ro_crate.entities.contextual.ContextualEntity; import edu.kit.datamanager.ro_crate.entities.contextual.JsonDescriptor; import edu.kit.datamanager.ro_crate.entities.data.DataEntity; - import edu.kit.datamanager.ro_crate.entities.data.RootDataEntity; import edu.kit.datamanager.ro_crate.externalproviders.dataentities.ImportFromDataCite; import edu.kit.datamanager.ro_crate.objectmapper.MyObjectMapper; @@ -23,7 +23,6 @@ import edu.kit.datamanager.ro_crate.special.JsonUtilFunctions; import edu.kit.datamanager.ro_crate.validation.JsonSchemaValidation; import edu.kit.datamanager.ro_crate.validation.Validator; - import java.io.File; import java.net.URI; import java.util.*; @@ -105,12 +104,12 @@ public ContextualEntity getJsonDescriptor() { public void setJsonDescriptor(ContextualEntity jsonDescriptor) { this.jsonDescriptor = jsonDescriptor; } - + @Override public RootDataEntity getRootDataEntity() { return rootDataEntity; } - + @Override public void setRootDataEntity(RootDataEntity rootDataEntity) { this.rootDataEntity = rootDataEntity; @@ -123,8 +122,7 @@ public RoCrate() { this.roCratePayload = new RoCratePayload(); this.untrackedFiles = new HashSet<>(); this.metadataContext = new RoCrateMetadataContext(); - rootDataEntity = new RootDataEntity.RootDataEntityBuilder() - .build(); + rootDataEntity = new RootDataEntity.RootDataEntityBuilder().build(); jsonDescriptor = new JsonDescriptor(); } @@ -150,12 +148,12 @@ public Optional getVersion() { JsonNode conformsTo = this.jsonDescriptor.getProperty("conformsTo"); if (conformsTo.isArray()) { return StreamSupport.stream(conformsTo.spliterator(), false) - .filter(TreeNode::isObject) - .map(obj -> obj.path("@id").asText()) - .map(CrateVersion::fromSpecUri) - .filter(Optional::isPresent) - .map(Optional::get) - .findFirst(); + .filter(TreeNode::isObject) + .map(obj -> obj.path("@id").asText()) + .map(CrateVersion::fromSpecUri) + .filter(Optional::isPresent) + .map(Optional::get) + .findFirst(); } else if (conformsTo.isObject()) { return CrateVersion.fromSpecUri(conformsTo.get("@id").asText()); } else { @@ -168,9 +166,9 @@ public Collection getProfiles() { JsonNode conformsTo = this.jsonDescriptor.getProperty("conformsTo"); if (conformsTo.isArray()) { return StreamSupport.stream(conformsTo.spliterator(), false) - .filter(TreeNode::isObject) - .map(obj -> obj.path("@id").asText()) - .collect(Collectors.toSet()); + .filter(TreeNode::isObject) + .map(obj -> obj.path("@id").asText()) + .collect(Collectors.toSet()); } else { return Collections.emptySet(); } @@ -184,11 +182,19 @@ public String getJsonMetadata() { node.setAll(this.metadataContext.getContextJsonEntity()); var graph = objectMapper.createArrayNode(); - ObjectNode root = objectMapper.convertValue(this.rootDataEntity, ObjectNode.class); + ObjectNode root = objectMapper.convertValue( + this.rootDataEntity, + ObjectNode.class + ); graph.add(root); - graph.add(objectMapper.convertValue(this.jsonDescriptor, JsonNode.class)); - if (this.roCratePayload != null && this.roCratePayload.getEntitiesMetadata() != null) { + graph.add( + objectMapper.convertValue(this.jsonDescriptor, JsonNode.class) + ); + if ( + this.roCratePayload != null && + this.roCratePayload.getEntitiesMetadata() != null + ) { graph.addAll(this.roCratePayload.getEntitiesMetadata()); } node.set("@graph", graph); @@ -248,8 +254,14 @@ public void deleteEntityById(String entityId) { // remove from the root data entity hasPart this.rootDataEntity.removeFromHasPart(entityId); // remove from the root entity and the file descriptor - JsonUtilFunctions.removeFieldsWith(entityId, this.rootDataEntity.getProperties()); - JsonUtilFunctions.removeFieldsWith(entityId, this.jsonDescriptor.getProperties()); + JsonUtilFunctions.removeFieldsWith( + entityId, + this.rootDataEntity.getProperties() + ); + JsonUtilFunctions.removeFieldsWith( + entityId, + this.jsonDescriptor.getProperties() + ); } @Override @@ -268,7 +280,9 @@ public void deleteUrlFromContext(String key) { } @Override - public void addFromCollection(Collection entities) { + public void addFromCollection( + Collection entities + ) { this.roCratePayload.addEntities(entities); } @@ -282,6 +296,38 @@ public Collection getUntrackedFiles() { return this.untrackedFiles; } + /** + * Automatically recognizes hierarchical file structure from DataEntity IDs + * and connects them using hasPart relationships. + *

+ * WARNING: This will not change existing hasPart relationships. + * Only processes IDs that appear to be relative file paths. + * + * @param alsoAddIsPartOf if true, also adds isPartOf relationships from child to parent + * @return result object containing information about what was processed, as well as potential errors. + */ + public HierarchyRecognitionResult createDataEntityFileStructure(boolean alsoAddIsPartOf) { + HierarchyRecognitionConfig config = + new HierarchyRecognitionConfig().setInverseRelationships( + alsoAddIsPartOf + ); + return this.createDataEntityFileStructure(config); + } + + /** + * Automatically recognizes hierarchical file structure from DataEntity IDs + * and connects them using hasPart relationships with fine-grained configuration. + * + * @param config configuration object specifying how the recognition should behave + * @return result object containing information about what was processed, as well as potential errors. + */ + public HierarchyRecognitionResult createDataEntityFileStructure( + HierarchyRecognitionConfig config + ) { + // TODO: Implement the actual hierarchy recognition logic + throw new UnsupportedOperationException("Not implemented yet"); + } + /** * The inner class builder for the easier creation of a ROCrate. */ @@ -306,13 +352,18 @@ public static class RoCrateBuilder { * @param datePublished the published date of the crate. * @param licenseId the license identifier of the crate. */ - public RoCrateBuilder(String name, String description, String datePublished, String licenseId) { + public RoCrateBuilder( + String name, + String description, + String datePublished, + String licenseId + ) { this.payload = new RoCratePayload(); this.metadataContext = new RoCrateMetadataContext(); this.rootDataEntity = new RootDataEntity.RootDataEntityBuilder() - .addProperty("name", name) - .addProperty(PROPERTY_DESCRIPTION, description) - .build(); + .addProperty("name", name) + .addProperty(PROPERTY_DESCRIPTION, description) + .build(); this.setLicense(licenseId); this.addDatePublishedWithExceptions(datePublished); } @@ -325,13 +376,18 @@ public RoCrateBuilder(String name, String description, String datePublished, Str * @param datePublished the published date of the crate. * @param license the license entity of the crate. */ - public RoCrateBuilder(String name, String description, String datePublished, ContextualEntity license) { + public RoCrateBuilder( + String name, + String description, + String datePublished, + ContextualEntity license + ) { this.payload = new RoCratePayload(); this.metadataContext = new RoCrateMetadataContext(); this.rootDataEntity = new RootDataEntity.RootDataEntityBuilder() - .addProperty("name", name) - .addProperty(PROPERTY_DESCRIPTION, description) - .build(); + .addProperty("name", name) + .addProperty(PROPERTY_DESCRIPTION, description) + .build(); this.setLicense(license); this.addDatePublishedWithExceptions(datePublished); } @@ -343,8 +399,7 @@ public RoCrateBuilder(String name, String description, String datePublished, Con public RoCrateBuilder() { this.payload = new RoCratePayload(); this.metadataContext = new RoCrateMetadataContext(); - rootDataEntity = new RootDataEntity.RootDataEntityBuilder() - .build(); + rootDataEntity = new RootDataEntity.RootDataEntityBuilder().build(); } /** @@ -399,7 +454,9 @@ public RoCrateBuilder addDataEntity(DataEntity dataEntity) { return this; } - public RoCrateBuilder addContextualEntity(ContextualEntity contextualEntity) { + public RoCrateBuilder addContextualEntity( + ContextualEntity contextualEntity + ) { this.metadataContext.checkEntity(contextualEntity); this.payload.addContextualEntity(contextualEntity); return this; @@ -430,7 +487,8 @@ public RoCrateBuilder setLicense(ContextualEntity license) { * @return the builder */ public RoCrateBuilder setLicense(String licenseId) { - ContextualEntity licenseEntity = new ContextualEntity.ContextualEntityBuilder() + ContextualEntity licenseEntity = + new ContextualEntity.ContextualEntityBuilder() .setId(licenseId) .build(); this.setLicense(licenseEntity); @@ -445,8 +503,12 @@ public RoCrateBuilder setLicense(String licenseId) { * @return this builder * @throws IllegalArgumentException if format is not ISO 8601 */ - public RoCrateBuilder addDatePublishedWithExceptions(String dateValue) throws IllegalArgumentException { - this.rootDataEntity.addDateTimePropertyWithExceptions("datePublished", dateValue); + public RoCrateBuilder addDatePublishedWithExceptions(String dateValue) + throws IllegalArgumentException { + this.rootDataEntity.addDateTimePropertyWithExceptions( + "datePublished", + dateValue + ); return this; } @@ -460,7 +522,10 @@ public RoCrateBuilder addUrlToContext(java.lang.String url) { return this; } - public RoCrateBuilder addValuePairToContext(java.lang.String key, java.lang.String value) { + public RoCrateBuilder addValuePairToContext( + java.lang.String key, + java.lang.String value + ) { this.metadataContext.addToContext(key, value); return this; } @@ -507,7 +572,12 @@ public BuilderWithDraftFeatures() { /** * @see RoCrateBuilder#RoCrateBuilder(String, String, String, String) */ - public BuilderWithDraftFeatures(String name, String description, String datePublished, String licenseId) { + public BuilderWithDraftFeatures( + String name, + String description, + String datePublished, + String licenseId + ) { super(name, description, datePublished, licenseId); } @@ -515,7 +585,12 @@ public BuilderWithDraftFeatures(String name, String description, String datePubl * @see RoCrateBuilder#RoCrateBuilder(String, String, String, * ContextualEntity) */ - public BuilderWithDraftFeatures(String name, String description, String datePublished, ContextualEntity licenseId) { + public BuilderWithDraftFeatures( + String name, + String description, + String datePublished, + ContextualEntity licenseId + ) { super(name, description, datePublished, licenseId); } @@ -540,9 +615,9 @@ public BuilderWithDraftFeatures(RoCrate crate) { */ public BuilderWithDraftFeatures alsoConformsTo(URI specification) { descriptorBuilder - .addConformsTo(specification) - // usage of a draft feature results in draft version numbers of the crate - .setVersion(CrateVersion.LATEST_UNSTABLE); + .addConformsTo(specification) + // usage of a draft feature results in draft version numbers of the crate + .setVersion(CrateVersion.LATEST_UNSTABLE); return this; } } diff --git a/src/main/java/edu/kit/datamanager/ro_crate/crate/HierarchyRecognitionConfig.java b/src/main/java/edu/kit/datamanager/ro_crate/crate/HierarchyRecognitionConfig.java new file mode 100644 index 00000000..dec46d6e --- /dev/null +++ b/src/main/java/edu/kit/datamanager/ro_crate/crate/HierarchyRecognitionConfig.java @@ -0,0 +1,98 @@ +package edu.kit.datamanager.ro_crate.crate; + +/** + * Configuration class for automatic hierarchy recognition functionality. + * This class provides fine-grained control over how the hierarchy recognition + * algorithm behaves using a fluent interface with setter methods. + */ +public class HierarchyRecognitionConfig { + + /** + * Whether missing intermediate folder entities should be automatically created. + *

+ * Default: false (only connect existing entities) + */ + public boolean createMissingIntermediateEntities = false; + + /** + * Whether isPartOf relationships should be added in addition to hasPart. + *

+ * Default: false (only add hasPart relationships) + */ + public boolean setInverseRelationships = false; + + /** + * Whether hasPart relationships should be added (false) + * or remove existing relations in beforehand (true). + *

+ * Default: false (keep relations) + */ + public boolean removeExistingConnections = false; + + /** + * Creates a new configuration with default values. + */ + public HierarchyRecognitionConfig() { + // All defaults are set via field initializers + } + + /** + * Sets whether missing intermediate folder entities should be automatically created. + * + * @param create true to create missing DataSetEntity instances for intermediate folders + * @return this configuration object for method chaining + */ + public HierarchyRecognitionConfig createMissingIntermediateEntities( + boolean create + ) { + this.createMissingIntermediateEntities = create; + return this; + } + + /** + * Sets whether isPartOf relationships should be added in addition to hasPart. + * + * @param addIsPartOf true to add bidirectional relationships + * @return this configuration object for method chaining + */ + public HierarchyRecognitionConfig setInverseRelationships( + boolean addIsPartOf + ) { + this.setInverseRelationships = addIsPartOf; + return this; + } + + /** + * Whether hasPart relationships should be added (false) + * or remove existing relations in beforehand (true). + * + * @param removeExistingConnections true to remove existing connections + * @return this configuration object for method chaining + */ + public HierarchyRecognitionConfig removeExistingConnections( + boolean removeExistingConnections + ) { + this.removeExistingConnections = removeExistingConnections; + return this; + } + + /** + * Creates a configuration with default sensible values. + * @return default configuration + */ + public static HierarchyRecognitionConfig defaultConfig() { + return new HierarchyRecognitionConfig(); + } + + @Override + public String toString() { + return ( + "HierarchyRecognitionConfig{" + + "createMissingIntermediateEntities=" + + createMissingIntermediateEntities + + ", addIsPartOfRelationships=" + + setInverseRelationships + + '}' + ); + } +} diff --git a/src/main/java/edu/kit/datamanager/ro_crate/crate/HierarchyRecognitionResult.java b/src/main/java/edu/kit/datamanager/ro_crate/crate/HierarchyRecognitionResult.java new file mode 100644 index 00000000..5f9e3bd4 --- /dev/null +++ b/src/main/java/edu/kit/datamanager/ro_crate/crate/HierarchyRecognitionResult.java @@ -0,0 +1,341 @@ +package edu.kit.datamanager.ro_crate.crate; + +import edu.kit.datamanager.ro_crate.entities.data.DataEntity; +import java.util.*; + +/** + * Result class containing information about the automatic hierarchy recognition operation. + * This class provides details about what was processed, created, and any issues encountered + * during the hierarchy recognition process. Always contains complete information about the + * operation result, including success/failure and any errors encountered. + */ +public class HierarchyRecognitionResult { + + private final Set createdEntities; + private final Map> processedRelationships; + private final Set skippedEntities; + private final List warnings; + private final List errors; + private final boolean successful; + + private HierarchyRecognitionResult(Builder builder) { + this.createdEntities = Collections.unmodifiableSet( + new HashSet<>(builder.createdEntities) + ); + this.processedRelationships = Collections.unmodifiableMap( + new HashMap<>(builder.processedRelationships) + ); + this.skippedEntities = Collections.unmodifiableSet( + new HashSet<>(builder.skippedEntities) + ); + this.warnings = Collections.unmodifiableList( + new ArrayList<>(builder.warnings) + ); + this.errors = Collections.unmodifiableList( + new ArrayList<>(builder.errors) + ); + // If there are any errors, the operation is not successful + this.successful = builder.successful && builder.errors.isEmpty(); + } + + /** + * Gets the entities that were automatically created during the process. + * These are typically intermediate folder entities that were missing. + * + * @return set of created entities + */ + public Set getCreatedEntities() { + return createdEntities; + } + + /** + * Gets the parent-child relationships that were processed. + * The map contains parent entity IDs as keys and sets of child entity IDs as values. + * + * @return map of processed relationships + */ + public Map> getProcessedRelationships() { + return processedRelationships; + } + + /** + * Gets the entities that were skipped during processing. + * These might include entities with non-file-path IDs or entities + * that couldn't be processed for other reasons. + * + * @return set of skipped entities + */ + public Set getSkippedEntities() { + return skippedEntities; + } + + /** + * Gets any warnings generated during the process. + * Warnings indicate potential issues that didn't prevent the operation + * from completing but might need attention. + * + * @return list of warning messages + */ + public List getWarnings() { + return warnings; + } + + /** + * Gets any errors that occurred during the process. + * Errors indicate problems that prevented the operation from + * completing successfully or caused it to fail. + * + * @return list of error messages + */ + public List getErrors() { + return errors; + } + + /** + * Indicates whether the operation completed successfully. + * Even successful operations might have warnings or skipped entities. + * + * @return true if the operation completed successfully + */ + public boolean isSuccessful() { + return successful; + } + + /** + * Gets the total number of relationships that were established. + * + * @return total count of parent-child relationships + */ + public int getTotalRelationshipsCount() { + return processedRelationships + .values() + .stream() + .mapToInt(Set::size) + .sum(); + } + + /** + * Checks if any entities were created during the process. + * + * @return true if entities were created + */ + public boolean hasCreatedEntities() { + return !createdEntities.isEmpty(); + } + + /** + * Checks if any entities were skipped during the process. + * + * @return true if entities were skipped + */ + public boolean hasSkippedEntities() { + return !skippedEntities.isEmpty(); + } + + /** + * Checks if any warnings were generated during the process. + * + * @return true if warnings exist + */ + public boolean hasWarnings() { + return !warnings.isEmpty(); + } + + /** + * Checks if any errors occurred during the process. + * + * @return true if errors exist + */ + public boolean hasErrors() { + return !errors.isEmpty(); + } + + /** + * Creates a new builder for constructing HierarchyRecognitionResult instances. + * + * @return new builder instance + */ + public static Builder builder() { + return new Builder(); + } + + /** + * Builder class for creating HierarchyRecognitionResult instances. + */ + public static class Builder { + + private final Set createdEntities = new HashSet<>(); + private final Map> processedRelationships = + new HashMap<>(); + private final Set skippedEntities = new HashSet<>(); + private final List warnings = new ArrayList<>(); + private final List errors = new ArrayList<>(); + private boolean successful = true; + + /** + * Adds an entity that was created during the process. + * + * @param entity the created entity + * @return this builder + */ + public Builder addCreatedEntity(DataEntity entity) { + this.createdEntities.add(entity); + return this; + } + + /** + * Adds multiple entities that were created during the process. + * + * @param entities the created entities + * @return this builder + */ + public Builder addCreatedEntities(Collection entities) { + this.createdEntities.addAll(entities); + return this; + } + + /** + * Adds a processed parent-child relationship. + * + * @param parentId the parent entity ID + * @param childId the child entity ID + * @return this builder + */ + public Builder addProcessedRelationship( + String parentId, + String childId + ) { + this.processedRelationships.computeIfAbsent(parentId, k -> + new HashSet<>() + ).add(childId); + return this; + } + + /** + * Adds multiple processed relationships for a parent. + * + * @param parentId the parent entity ID + * @param childIds the child entity IDs + * @return this builder + */ + public Builder addProcessedRelationships( + String parentId, + Collection childIds + ) { + this.processedRelationships.computeIfAbsent(parentId, k -> + new HashSet<>() + ).addAll(childIds); + return this; + } + + /** + * Adds an entity that was skipped during processing. + * + * @param entity the skipped entity + * @return this builder + */ + public Builder addSkippedEntity(DataEntity entity) { + this.skippedEntities.add(entity); + return this; + } + + /** + * Adds multiple entities that were skipped during processing. + * + * @param entities the skipped entities + * @return this builder + */ + public Builder addSkippedEntities(Collection entities) { + this.skippedEntities.addAll(entities); + return this; + } + + /** + * Adds a warning message. + * + * @param warning the warning message + * @return this builder + */ + public Builder addWarning(String warning) { + this.warnings.add(warning); + return this; + } + + /** + * Adds multiple warning messages. + * + * @param warnings the warning messages + * @return this builder + */ + public Builder addWarnings(Collection warnings) { + this.warnings.addAll(warnings); + return this; + } + + /** + * Adds an error message and marks the operation as unsuccessful. + * + * @param error the error message + * @return this builder + */ + public Builder addError(String error) { + this.errors.add(error); + this.successful = false; + return this; + } + + /** + * Adds multiple error messages and marks the operation as unsuccessful. + * + * @param errors the error messages + * @return this builder + */ + public Builder addErrors(Collection errors) { + this.errors.addAll(errors); + if (!errors.isEmpty()) { + this.successful = false; + } + return this; + } + + /** + * Sets whether the operation was successful. + * + * @param successful true if successful + * @return this builder + */ + public Builder setSuccessful(boolean successful) { + this.successful = successful; + return this; + } + + /** + * Builds the result object. + * + * @return the constructed HierarchyRecognitionResult + */ + public HierarchyRecognitionResult build() { + return new HierarchyRecognitionResult(this); + } + } + + @Override + public String toString() { + return ( + "HierarchyRecognitionResult{" + + "successful=" + + successful + + ", createdEntities=" + + createdEntities.size() + + ", processedRelationships=" + + getTotalRelationshipsCount() + + ", skippedEntities=" + + skippedEntities.size() + + ", warnings=" + + warnings.size() + + ", errors=" + + errors.size() + + '}' + ); + } +} diff --git a/src/test/java/edu/kit/datamanager/ro_crate/crate/AutomaticHierarchyRecognitionTest.java b/src/test/java/edu/kit/datamanager/ro_crate/crate/AutomaticHierarchyRecognitionTest.java new file mode 100644 index 00000000..6c78a641 --- /dev/null +++ b/src/test/java/edu/kit/datamanager/ro_crate/crate/AutomaticHierarchyRecognitionTest.java @@ -0,0 +1,359 @@ +package edu.kit.datamanager.ro_crate.crate; + +import static org.junit.jupiter.api.Assertions.*; + +import edu.kit.datamanager.ro_crate.RoCrate; +import edu.kit.datamanager.ro_crate.entities.data.DataEntity; +import edu.kit.datamanager.ro_crate.entities.data.DataSetEntity; +import edu.kit.datamanager.ro_crate.entities.data.FileEntity; +import java.nio.file.Paths; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +/** + * Automatic Hierarchy Recognition - API Tests & Usage Examples + */ +public class AutomaticHierarchyRecognitionTest { + + private RoCrate crate; + + @BeforeEach + void setUp() { + crate = new RoCrate.RoCrateBuilder( + "Test Crate", + "A crate for testing hierarchy recognition", + "2024", + "https://creativecommons.org/licenses/by/4.0/" + ).build(); + } + + /** + * One-directional recognition in simple hierarchy. + */ + @Test + void givenFilesInFolderHierarchy_whenRecognizeStructure_thenEstablishesParentChildRelations() { + // Given: A crate with files and folders in a hierarchy + FileEntity file1 = new FileEntity.FileEntityBuilder() + .setId("data/raw/experiment1.csv") + .setLocationWithExceptions(Paths.get("test1.csv")) + .build(); + + FileEntity file2 = new FileEntity.FileEntityBuilder() + .setId("data/processed/results.txt") + .setLocationWithExceptions(Paths.get("test2.txt")) + .build(); + + DataSetEntity dataFolder = new DataSetEntity.DataSetBuilder() + .setId("data/") + .addProperty("name", "Data Directory") + .build(); + + DataSetEntity rawFolder = new DataSetEntity.DataSetBuilder() + .setId("data/raw/") + .addProperty("name", "Raw Data") + .build(); + + DataSetEntity processedFolder = new DataSetEntity.DataSetBuilder() + .setId("data/processed/") + .addProperty("name", "Processed Data") + .build(); + + crate.addDataEntity(file1); + crate.addDataEntity(file2); + crate.addDataEntity(dataFolder); + crate.addDataEntity(rawFolder); + crate.addDataEntity(processedFolder); + + // When: We automatically recognize hierarchy + crate.createDataEntityFileStructure(false); + + // Then: Hierarchy should be established + assertTrue(dataFolder.hasPart("data/raw/")); + assertTrue(dataFolder.hasPart("data/processed/")); + assertTrue(rawFolder.hasPart("data/raw/experiment1.csv")); + assertTrue(processedFolder.hasPart("data/processed/results.txt")); + + // Root should only contain top-level entities + assertTrue(crate.getRootDataEntity().hasPart("data/")); + assertFalse( + crate.getRootDataEntity().hasPart("data/raw/experiment1.csv") + ); + } + + /** + * Adding bidirectional relationships. + */ + @Test + void givenFileInFolder_whenRecognizeWithIsPartOf_thenCreatesBidirectionalRelations() { + FileEntity file = new FileEntity.FileEntityBuilder() + .setId("folder/file.txt") + .setLocationWithExceptions(Paths.get("test.txt")) + .build(); + + DataSetEntity folder = new DataSetEntity.DataSetBuilder() + .setId("folder/") + .build(); + + crate.addDataEntity(file); + crate.addDataEntity(folder); + + // When: We enable isPartOf relationships + crate.createDataEntityFileStructure(true); + + // Then: Both hasPart and isPartOf should be set + assertTrue(folder.hasPart("folder/file.txt")); + assertEquals("folder/", file.getProperties().get("isPartOf").asText()); + } + + /** + * Advanced configuration with missing-folder-creation enabled. + */ + @Test + void givenDeepNestedPathWithMissingIntermediates_whenRecognizeWithCreateMissing_thenCreatesAllIntermediateEntities() { + FileEntity file = new FileEntity.FileEntityBuilder() + .setId("data/deep/nested/file.txt") + .setLocationWithExceptions(Paths.get("test.txt")) + .build(); + + crate.addDataEntity(file); + + // When: We configure to create missing intermediate entities + HierarchyRecognitionConfig config = new HierarchyRecognitionConfig() + .createMissingIntermediateEntities(true) + .setInverseRelationships(true) + .removeExistingConnections(true); + + HierarchyRecognitionResult result = crate.createDataEntityFileStructure( + config + ); + + // Then: Missing intermediate entities should be created + assertTrue(result.isSuccessful()); + assertNotNull(crate.getDataEntityById("data/")); + assertNotNull(crate.getDataEntityById("data/deep/")); + assertNotNull(crate.getDataEntityById("data/deep/nested/")); + + // And hierarchy should be established + assertTrue( + ((DataSetEntity) crate.getDataEntityById("data/")).hasPart( + "data/deep/" + ) + ); + assertTrue( + ((DataSetEntity) crate.getDataEntityById("data/deep/")).hasPart( + "data/deep/nested/" + ) + ); + assertTrue( + ((DataSetEntity) crate.getDataEntityById( + "data/deep/nested/" + )).hasPart("data/deep/nested/file.txt") + ); + } + + /** + * Removing existing manual relationships. + */ + @Test + void givenFolderWithExistingRelations_whenRecognizeWithRemoveExisting_thenKeepsOnlyNewRelations() { + FileEntity file1 = new FileEntity.FileEntityBuilder() + .setId("folder/file1.txt") + .setLocationWithExceptions(Paths.get("test1.txt")) + .build(); + + FileEntity file2 = new FileEntity.FileEntityBuilder() + .setId("folder/file2.txt") + .setLocationWithExceptions(Paths.get("test2.txt")) + .build(); + + DataSetEntity folder = new DataSetEntity.DataSetBuilder() + .setId("folder/") + .addToHasPart("manually-added-entity") + .build(); + + crate.addDataEntity(file1); + crate.addDataEntity(file2); + crate.addDataEntity(folder); + + // When: We merge with existing relationships + HierarchyRecognitionConfig config = + new HierarchyRecognitionConfig().removeExistingConnections(true); + + crate.createDataEntityFileStructure(config); + + // Then: Both existing and new relationships should exist + assertFalse(folder.hasPart("manually-added-entity")); + assertTrue(folder.hasPart("folder/file1.txt")); + assertTrue(folder.hasPart("folder/file2.txt")); + } + + /** + * Default behavior keeps existing relationships. + */ + @Test + void givenFolderWithExistingRelations_whenRecognizeWithDefaultBehavior_thenKeepsExistingRelations() { + FileEntity file = new FileEntity.FileEntityBuilder() + .setId("folder/file.txt") + .setLocationWithExceptions(Paths.get("test.txt")) + .build(); + + DataSetEntity folder = new DataSetEntity.DataSetBuilder() + .setId("folder/") + .addToHasPart("manually-added-entity") + .build(); + + crate.addDataEntity(file); + crate.addDataEntity(folder); + + // When: We use default behavior (keep existing) + crate.createDataEntityFileStructure(false); + + // Then: Only new relationships should exist + assertTrue(folder.hasPart("manually-added-entity")); + assertTrue(folder.hasPart("folder/file.txt")); + } + + /** + * Test skipping non-file-path IDs + */ + @Test + void givenMixOfFilePathsUrlsAndDois_whenRecognizeStructure_thenProcessesOnlyFilePaths() { + FileEntity localFile = new FileEntity.FileEntityBuilder() + .setId("folder/file.txt") + .setLocationWithExceptions(Paths.get("test.txt")) + .build(); + + DataEntity remoteEntity = new DataEntity.DataEntityBuilder() + .setId("https://example.com/remote-file.txt") + .build(); + + DataEntity doiEntity = new DataEntity.DataEntityBuilder() + .setId("doi:10.1234/example") + .build(); + + DataSetEntity folder = new DataSetEntity.DataSetBuilder() + .setId("folder/") + .build(); + + crate.addDataEntity(localFile); + crate.addDataEntity(remoteEntity); + crate.addDataEntity(doiEntity); + crate.addDataEntity(folder); + + // When: We recognize hierarchy + crate.createDataEntityFileStructure(false); + + // Then: Only local file paths should be processed + assertTrue(folder.hasPart("folder/file.txt")); + assertFalse(folder.hasPart("https://example.com/remote-file.txt")); + assertFalse(folder.hasPart("doi:10.1234/example")); + + // Remote and DOI entities should remain in root + assertTrue( + crate + .getRootDataEntity() + .hasPart("https://example.com/remote-file.txt") + ); + assertTrue(crate.getRootDataEntity().hasPart("doi:10.1234/example")); + } + + /** + * Test error handling with circular references + */ + @Test + void givenEntitiesWithCircularPathReferences_whenRecognizeStructure_thenHandlesGracefullyWithoutException() { + // This would be a malformed crate, but we should handle it gracefully + DataSetEntity folder1 = new DataSetEntity.DataSetBuilder() + .setId("folder1/") + .build(); + + DataSetEntity folder2 = new DataSetEntity.DataSetBuilder() + .setId("folder1/folder2/") + .build(); + + // Manually create circular reference in IDs (this is contrived but tests the logic) + DataEntity circularEntity = new DataEntity.DataEntityBuilder() + .setId("folder1/folder2/../../../folder1/file.txt") // resolves to folder1/file.txt + .build(); + + crate.addDataEntity(folder1); + crate.addDataEntity(folder2); + crate.addDataEntity(circularEntity); + + // When/Then: Should handle gracefully + assertDoesNotThrow(() -> { + // When: Default configuration for hierarchy recognition + HierarchyRecognitionResult result = + crate.createDataEntityFileStructure( + new HierarchyRecognitionConfig() + ); + // Then: Does not throw exception or error. + assertTrue(result.isSuccessful()); + }); + } + + /** + * Test validation before any changes are made + */ + @Test + void givenInvalidEntityData_whenRecognizeStructure_thenFailsWithoutMakingChanges() { + // Given: A file appears to be inside another file (invalid hierarchy) + FileEntity parentFile = new FileEntity.FileEntityBuilder() + .setId("document.pdf") + .setLocationWithExceptions(Paths.get("document.pdf")) + .build(); + + FileEntity childFile = new FileEntity.FileEntityBuilder() + .setId("document.pdf/embedded_data.txt") // Invalid: file inside a file + .setLocationWithExceptions(Paths.get("embedded.txt")) + .build(); + + crate.addDataEntity(parentFile); + crate.addDataEntity(childFile); + + // When: We try to recognize hierarchy + HierarchyRecognitionConfig config = new HierarchyRecognitionConfig(); + HierarchyRecognitionResult result = crate.createDataEntityFileStructure( + config + ); + + // Then: Should fail without making any changes + assertFalse(result.isSuccessful()); + assertTrue(result.hasErrors()); + + // Original state should be preserved + assertTrue(crate.getRootDataEntity().hasPart("")); // Original relationship intact + } + + /** + * Test result object provides useful information + */ + @Test + void givenFileRequiringIntermediateCreation_whenRecognizeStructure_thenReturnsDetailedOperationInfo() { + FileEntity file = new FileEntity.FileEntityBuilder() + .setId("folder/file.txt") + .setLocationWithExceptions(Paths.get("test.txt")) + .build(); + + crate.addDataEntity(file); + + HierarchyRecognitionConfig config = + new HierarchyRecognitionConfig().createMissingIntermediateEntities( + true + ); + + // When: We recognize hierarchy + HierarchyRecognitionResult result = crate.createDataEntityFileStructure( + config + ); + + // Then: Result should provide useful information + assertTrue(result.isSuccessful()); + HierarchyRecognitionResult info = result; + + assertEquals(1, info.getCreatedEntities().size()); // "folder/" was created + assertEquals(1, info.getProcessedRelationships().size()); // folder -> file relationship + assertTrue(info.getSkippedEntities().isEmpty()); // no entities skipped + assertTrue(info.getWarnings().isEmpty()); // no warnings + } +} From faad861bfefd86895a504fed723b3d7be97aad10 Mon Sep 17 00:00:00 2001 From: Andreas Pfeil Date: Fri, 10 Oct 2025 23:48:26 +0200 Subject: [PATCH 28/51] feat: file hierarchy creation --- build.gradle | 8 +- .../edu/kit/datamanager/ro_crate/Crate.java | 24 ++ .../edu/kit/datamanager/ro_crate/RoCrate.java | 33 +-- .../ro_crate/crate/HierarchyRecognition.java | 209 ++++++++++++++++++ .../ro_crate/util/FileSystemUtil.java | 48 ++++ .../AutomaticHierarchyRecognitionTest.java | 22 +- 6 files changed, 311 insertions(+), 33 deletions(-) create mode 100644 src/main/java/edu/kit/datamanager/ro_crate/crate/HierarchyRecognition.java diff --git a/build.gradle b/build.gradle index 3005bbed..8ac1e3cf 100644 --- a/build.gradle +++ b/build.gradle @@ -16,7 +16,7 @@ plugins { id "io.freefair.maven-publish-java" version "9.0.0" } -group 'edu.kit.datamanager' +group = 'edu.kit.datamanager' description = "A library for easy creation and modification of valid RO-Crates." println "Running gradle version: $gradle.gradleVersion" @@ -24,8 +24,10 @@ println "Building ${name} version: ${version}" println "JDK version: ${JavaVersion.current()}" println "Profile (system property): ${System.getProperty('profile')}" -sourceCompatibility = JavaVersion.VERSION_17 -targetCompatibility = JavaVersion.VERSION_17 +java { + sourceCompatibility = JavaVersion.VERSION_17 + targetCompatibility = JavaVersion.VERSION_17 +} if (JavaVersion.current() == JavaVersion.VERSION_17) { println "Setting encoding to UTF-8 manually" diff --git a/src/main/java/edu/kit/datamanager/ro_crate/Crate.java b/src/main/java/edu/kit/datamanager/ro_crate/Crate.java index 782ff601..7c2873f3 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/Crate.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/Crate.java @@ -7,6 +7,8 @@ import java.util.Set; import edu.kit.datamanager.ro_crate.context.CrateMetadataContext; +import edu.kit.datamanager.ro_crate.crate.HierarchyRecognitionConfig; +import edu.kit.datamanager.ro_crate.crate.HierarchyRecognitionResult; import edu.kit.datamanager.ro_crate.entities.AbstractEntity; import edu.kit.datamanager.ro_crate.entities.contextual.ContextualEntity; import edu.kit.datamanager.ro_crate.entities.data.DataEntity; @@ -135,4 +137,26 @@ public interface Crate { void deleteUrlFromContext(String url); Collection getUntrackedFiles(); + + /** + * Automatically recognizes hierarchical file structure from DataEntity IDs + * and connects them using hasPart relationships. + *

+ * WARNING: This will not change existing hasPart relationships. + * + * @param addInverseRelationships if true, also adds isPartOf relationships from child to parent + * @return result object containing information about what was processed, as well as potential errors. + */ + HierarchyRecognitionResult createDataEntityFileStructure(boolean addInverseRelationships); + + /** + * Automatically recognizes hierarchical file structure from DataEntity IDs + * and connects them using hasPart relationships with fine-grained configuration. + *

+ * Note: Only processes IDs that appear to be relative file paths. + * + * @param config configuration object specifying how the recognition should behave + * @return result object containing information about what was processed, as well as potential errors. + */ + HierarchyRecognitionResult createDataEntityFileStructure(HierarchyRecognitionConfig config); } diff --git a/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java b/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java index 5f43796e..f5ea03b7 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java @@ -6,6 +6,7 @@ import com.fasterxml.jackson.databind.node.ObjectNode; import edu.kit.datamanager.ro_crate.context.CrateMetadataContext; import edu.kit.datamanager.ro_crate.context.RoCrateMetadataContext; +import edu.kit.datamanager.ro_crate.crate.HierarchyRecognition; import edu.kit.datamanager.ro_crate.crate.HierarchyRecognitionConfig; import edu.kit.datamanager.ro_crate.crate.HierarchyRecognitionResult; import edu.kit.datamanager.ro_crate.entities.AbstractEntity; @@ -296,36 +297,20 @@ public Collection getUntrackedFiles() { return this.untrackedFiles; } - /** - * Automatically recognizes hierarchical file structure from DataEntity IDs - * and connects them using hasPart relationships. - *

- * WARNING: This will not change existing hasPart relationships. - * Only processes IDs that appear to be relative file paths. - * - * @param alsoAddIsPartOf if true, also adds isPartOf relationships from child to parent - * @return result object containing information about what was processed, as well as potential errors. - */ - public HierarchyRecognitionResult createDataEntityFileStructure(boolean alsoAddIsPartOf) { - HierarchyRecognitionConfig config = - new HierarchyRecognitionConfig().setInverseRelationships( - alsoAddIsPartOf - ); + @Override + public HierarchyRecognitionResult createDataEntityFileStructure( + boolean addInverseRelationships + ) { + HierarchyRecognitionConfig config = new HierarchyRecognitionConfig() + .setInverseRelationships(addInverseRelationships); return this.createDataEntityFileStructure(config); } - /** - * Automatically recognizes hierarchical file structure from DataEntity IDs - * and connects them using hasPart relationships with fine-grained configuration. - * - * @param config configuration object specifying how the recognition should behave - * @return result object containing information about what was processed, as well as potential errors. - */ + @Override public HierarchyRecognitionResult createDataEntityFileStructure( HierarchyRecognitionConfig config ) { - // TODO: Implement the actual hierarchy recognition logic - throw new UnsupportedOperationException("Not implemented yet"); + return new HierarchyRecognition(this, config).buildHierarchy(); } /** diff --git a/src/main/java/edu/kit/datamanager/ro_crate/crate/HierarchyRecognition.java b/src/main/java/edu/kit/datamanager/ro_crate/crate/HierarchyRecognition.java new file mode 100644 index 00000000..8f92d68d --- /dev/null +++ b/src/main/java/edu/kit/datamanager/ro_crate/crate/HierarchyRecognition.java @@ -0,0 +1,209 @@ +package edu.kit.datamanager.ro_crate.crate; + +import edu.kit.datamanager.ro_crate.Crate; +import edu.kit.datamanager.ro_crate.entities.data.DataEntity; +import edu.kit.datamanager.ro_crate.entities.data.DataSetEntity; +import edu.kit.datamanager.ro_crate.util.FileSystemUtil; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +public class HierarchyRecognition { + protected final Crate crate; + protected final HierarchyRecognitionConfig config; + + public HierarchyRecognition(Crate crate, HierarchyRecognitionConfig config) { + this.crate = crate; + this.config = config; + } + + public HierarchyRecognitionResult buildHierarchy() { + HierarchyRecognitionResult.Builder resultBuilder = + new HierarchyRecognitionResult.Builder(); + + try { + // Get all data entities to process + Set allEntities = this.crate.getAllDataEntities(); + allEntities.add(crate.getRootDataEntity()); + + // Filter entities with file path IDs (not URLs, DOIs, etc.) + Map pathEntities = new HashMap<>(); + for (DataEntity entity : allEntities) { + String id = entity.getId(); + if (FileSystemUtil.isFilePath(id)) { + pathEntities.put(id, entity); + } else { + resultBuilder.addSkippedEntity(entity); + } + } + + + // Validate hierarchy before making changes + if (!HierarchyRecognition.validateHierarchy(pathEntities, resultBuilder)) { + return resultBuilder.build(); + } + + // Create missing intermediate entities if configured + if (config.createMissingIntermediateEntities) { + this.createMissingIntermediateEntities(pathEntities, resultBuilder); + } + + // Clear existing relationships if configured + if (config.removeExistingConnections) { + this.clearExistingRelationships(pathEntities); + } + + // Build hierarchy relationships + this.buildHierarchyRelationships(pathEntities, config, resultBuilder); + + return resultBuilder.build(); + } catch (Exception e) { + resultBuilder.addError( + "Unexpected error during hierarchy recognition: " + + e.getMessage() + ); + return resultBuilder.build(); + } + } + + /** + * Validates that the hierarchy is consistent (no files containing other files/folders). + * + * @param pathEntities map of path IDs to DataEntities + * @param resultBuilder builder to collect errors + * @return true if valid, false if invalid hierarchy detected + */ + protected static boolean validateHierarchy( + Map pathEntities, + HierarchyRecognitionResult.Builder resultBuilder + ) { + for (Map.Entry entry : pathEntities.entrySet()) { + String childId = entry.getKey(); + String parentPath = FileSystemUtil.getParentPath(childId); + if (parentPath == null || parentPath.equals("./")) { + continue; + } + + // Check both with and without trailing slash since files don't have slash but folders do + DataEntity parentEntity = pathEntities.get(parentPath); + if (parentEntity == null) { + parentEntity = pathEntities.get(parentPath + "/"); + } + + if (parentEntity == null) { + continue; + } + + // Check for invalid hierarchy: file cannot contain another file/folder + if (parentEntity.getTypes().contains("File")) { + resultBuilder.addError( + "Invalid hierarchy: file '" + + parentEntity.getId() + + "' cannot contain '" + + childId + + "'" + ); + return false; + } + } + return true; + } + + /** + * Creates missing intermediate DataSetEntity instances for folder paths. + * + * @param pathEntities map of path IDs to DataEntities + * @param resultBuilder builder to collect created entities + */ + protected void createMissingIntermediateEntities( + Map pathEntities, + HierarchyRecognitionResult.Builder resultBuilder + ) { + Set missingPaths = new HashSet<>(); + + // Find all missing intermediate paths + for (String path : pathEntities.keySet()) { + String parentPath = FileSystemUtil.getParentPath(path); + while (parentPath != null && !parentPath.equals("./")) { + String folderPath = parentPath + "/"; + final boolean containsParent = pathEntities.containsKey(parentPath); + final boolean containsFolder = pathEntities.containsKey(folderPath); + if (!containsParent && !containsFolder) { + missingPaths.add(folderPath); + } + parentPath = FileSystemUtil.getParentPath(parentPath); + } + } + + // Create missing DataSetEntity instances + for (String missingPath : missingPaths) { + DataSetEntity newEntity = new DataSetEntity.DataSetBuilder() + .setId(missingPath) + .addProperty("name", "Auto-generated folder: " + missingPath) + .build(); + + this.crate.addDataEntity(newEntity); + pathEntities.put(missingPath, newEntity); + resultBuilder.addCreatedEntity(newEntity); + } + } + + protected void buildHierarchyRelationships( + Map pathEntities, + HierarchyRecognitionConfig config, + HierarchyRecognitionResult.Builder resultBuilder + ) { + for (Map.Entry entry : pathEntities.entrySet()) { + String childId = entry.getKey(); + DataEntity childEntity = entry.getValue(); + String parentPath = FileSystemUtil.getParentPath(childId); + if (parentPath == null) { + continue; + } + + // Check both with and without trailing slash since files don't have slash but folders do + DataEntity parentEntity = pathEntities.get(parentPath); + String actualParentId = parentPath; + + if (parentEntity == null) { + parentEntity = pathEntities.get(parentPath + "/"); + actualParentId = parentPath + "/"; + } + + if (parentEntity == null) { + continue; + } + + // Add hasPart relationship + if (parentEntity instanceof DataSetEntity) { + ((DataSetEntity) parentEntity).addToHasPart(childId); + resultBuilder.addProcessedRelationship( + actualParentId, + childId + ); + } + + // Add isPartOf relationship if configured + if (config.setInverseRelationships) { + childEntity.addProperty("isPartOf", actualParentId); + } + + // Remove from root if it has a parent that is not root + if (!parentPath.equals("./")) { + this.crate.getRootDataEntity().removeFromHasPart(childId); + } + } + } + + protected void clearExistingRelationships( + Map pathEntities + ) { + for (DataEntity entity : pathEntities.values()) { + if (entity instanceof DataSetEntity) { + ((DataSetEntity) entity).hasPart.clear(); + } + } + } +} diff --git a/src/main/java/edu/kit/datamanager/ro_crate/util/FileSystemUtil.java b/src/main/java/edu/kit/datamanager/ro_crate/util/FileSystemUtil.java index e4d75442..ca6756f7 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/util/FileSystemUtil.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/util/FileSystemUtil.java @@ -12,6 +12,54 @@ private FileSystemUtil() { // Utility class, no instantiation } + /** + * Checks if the given ID appears to be a file path. + *

+ * Specifically excludes IDs starting with "doi:", "http", or "https". + * + * @param id the ID to check + * @return true if it looks like a file path, false otherwise + */ + public static boolean isFilePath(String id) { + return !( + id.startsWith("doi:") && + id.startsWith("http") && + id.startsWith("https") + ); + } + + /** + * Gets the parent path of a given path. + * @param path the path to evaluate. + * @return the parent path, or null if no parent exists. + */ + public static String getParentPath(String path) { + if (path == null || path.equals("./") || path.isEmpty()) { + return null; + } + + // Normalize path - remove trailing slash for consistency + String normalizedPath = path.endsWith("/") + ? path.substring(0, path.length() - 1) + : path; + + int lastSlash = normalizedPath.lastIndexOf('/'); + if (lastSlash == -1) { + return "./"; // Root directory + } + + String parentPath = normalizedPath.substring(0, lastSlash); + + // If parent is empty, it's root + if (parentPath.isEmpty()) { + return "./"; + } + + // For validation, we need to check both with and without trailing slash + // since files don't have trailing slash but folders do + return parentPath; + } + /** * Removes a specific set of given file extensions from a file name, if present. * The extensions are case-insensitive. Given "ELN", "eln" or "Eln" will also match. diff --git a/src/test/java/edu/kit/datamanager/ro_crate/crate/AutomaticHierarchyRecognitionTest.java b/src/test/java/edu/kit/datamanager/ro_crate/crate/AutomaticHierarchyRecognitionTest.java index 6c78a641..59038a87 100644 --- a/src/test/java/edu/kit/datamanager/ro_crate/crate/AutomaticHierarchyRecognitionTest.java +++ b/src/test/java/edu/kit/datamanager/ro_crate/crate/AutomaticHierarchyRecognitionTest.java @@ -74,10 +74,9 @@ void givenFilesInFolderHierarchy_whenRecognizeStructure_thenEstablishesParentChi assertTrue(processedFolder.hasPart("data/processed/results.txt")); // Root should only contain top-level entities - assertTrue(crate.getRootDataEntity().hasPart("data/")); - assertFalse( - crate.getRootDataEntity().hasPart("data/raw/experiment1.csv") - ); + var root = crate.getRootDataEntity(); + assertTrue(root.hasPart("data/")); + assertEquals(1, root.hasPart.size()); } /** @@ -103,6 +102,11 @@ void givenFileInFolder_whenRecognizeWithIsPartOf_thenCreatesBidirectionalRelatio // Then: Both hasPart and isPartOf should be set assertTrue(folder.hasPart("folder/file.txt")); assertEquals("folder/", file.getProperties().get("isPartOf").asText()); + // same for root! + var root = crate.getRootDataEntity(); + assertTrue(root.hasPart("folder/")); + assertEquals(1, root.hasPart.size()); + assertEquals(root.getId(), folder.getProperties().get("isPartOf").asText("")); } /** @@ -225,10 +229,12 @@ void givenMixOfFilePathsUrlsAndDois_whenRecognizeStructure_thenProcessesOnlyFile DataEntity remoteEntity = new DataEntity.DataEntityBuilder() .setId("https://example.com/remote-file.txt") + .addType("File") .build(); DataEntity doiEntity = new DataEntity.DataEntityBuilder() .setId("doi:10.1234/example") + .addType("CreativeWork") .build(); DataSetEntity folder = new DataSetEntity.DataSetBuilder() @@ -274,6 +280,7 @@ void givenEntitiesWithCircularPathReferences_whenRecognizeStructure_thenHandlesG // Manually create circular reference in IDs (this is contrived but tests the logic) DataEntity circularEntity = new DataEntity.DataEntityBuilder() .setId("folder1/folder2/../../../folder1/file.txt") // resolves to folder1/file.txt + .addType("File") .build(); crate.addDataEntity(folder1); @@ -322,7 +329,10 @@ void givenInvalidEntityData_whenRecognizeStructure_thenFailsWithoutMakingChanges assertTrue(result.hasErrors()); // Original state should be preserved - assertTrue(crate.getRootDataEntity().hasPart("")); // Original relationship intact + assertTrue(crate.getRootDataEntity().hasPart("document.pdf")); + assertTrue( + crate.getRootDataEntity().hasPart("document.pdf/embedded_data.txt") + ); } /** @@ -352,7 +362,7 @@ void givenFileRequiringIntermediateCreation_whenRecognizeStructure_thenReturnsDe HierarchyRecognitionResult info = result; assertEquals(1, info.getCreatedEntities().size()); // "folder/" was created - assertEquals(1, info.getProcessedRelationships().size()); // folder -> file relationship + assertEquals(2, info.getProcessedRelationships().size()); // root -> folder -> file relationship assertTrue(info.getSkippedEntities().isEmpty()); // no entities skipped assertTrue(info.getWarnings().isEmpty()); // no warnings } From 727ac5c9f27c84ef008eed44c5ea8dde4e892ea4 Mon Sep 17 00:00:00 2001 From: Andreas Pfeil Date: Tue, 4 Nov 2025 15:52:10 +0100 Subject: [PATCH 29/51] feat: addDataEntity connecting to a given entity ID instead of root --- .../edu/kit/datamanager/ro_crate/Crate.java | 10 +++ .../edu/kit/datamanager/ro_crate/RoCrate.java | 78 ++++++++++++++++++- 2 files changed, 86 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/kit/datamanager/ro_crate/Crate.java b/src/main/java/edu/kit/datamanager/ro_crate/Crate.java index 7c2873f3..51c411f3 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/Crate.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/Crate.java @@ -122,6 +122,16 @@ public interface Crate { */ void addDataEntity(DataEntity entity); + /** + * Adds a data entity to the crate with a specified parent ID. + *

+ * Consider using + * @param entity the DataEntity to add to this crate. + * @param parentId the ID of the parent entity. Must not be null. + * @throws IllegalArgumentException if parentId is null or not found, or not a DataEntity. + */ + void addDataEntity(DataEntity entity, String parentId) throws IllegalArgumentException; + void addContextualEntity(ContextualEntity entity); void deleteEntityById(String entityId); diff --git a/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java b/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java index f5ea03b7..2ac6dc48 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java @@ -13,6 +13,7 @@ import edu.kit.datamanager.ro_crate.entities.contextual.ContextualEntity; import edu.kit.datamanager.ro_crate.entities.contextual.JsonDescriptor; import edu.kit.datamanager.ro_crate.entities.data.DataEntity; +import edu.kit.datamanager.ro_crate.entities.data.DataSetEntity; import edu.kit.datamanager.ro_crate.entities.data.RootDataEntity; import edu.kit.datamanager.ro_crate.externalproviders.dataentities.ImportFromDataCite; import edu.kit.datamanager.ro_crate.objectmapper.MyObjectMapper; @@ -242,6 +243,42 @@ public void addDataEntity(DataEntity entity) { this.rootDataEntity.addToHasPart(entity.getId()); } + @Override + public void addDataEntity(DataEntity entity, String parentId) + throws IllegalArgumentException { + if (parentId == null) { + throw new IllegalArgumentException("Parent ID is null."); + } + + DataEntity parentEntity = this.getDataEntityById(parentId); + if (parentEntity == null) { + throw new IllegalArgumentException( + "Parent ID not found in the crate." + ); + } + + if (parentEntity.getTypes().contains("File")) { + throw new IllegalArgumentException( + "Parent entity cannot be a File." + ); + } + + if (!parentEntity.getTypes().contains("Dataset")) { + throw new IllegalArgumentException( + "Parent entity must be a Dataset in order to contain another DataEntity as a part." + ); + } + + this.metadataContext.checkEntity(entity); + + if (parentEntity instanceof DataSetEntity) { + ((DataSetEntity) parentEntity).addToHasPart(entity.getId()); + } else { + parentEntity.addProperty("hasPart", entity.getId()); + } + this.roCratePayload.addDataEntity(entity); + } + @Override public void addContextualEntity(ContextualEntity entity) { this.metadataContext.checkEntity(entity); @@ -301,8 +338,10 @@ public Collection getUntrackedFiles() { public HierarchyRecognitionResult createDataEntityFileStructure( boolean addInverseRelationships ) { - HierarchyRecognitionConfig config = new HierarchyRecognitionConfig() - .setInverseRelationships(addInverseRelationships); + HierarchyRecognitionConfig config = + new HierarchyRecognitionConfig().setInverseRelationships( + addInverseRelationships + ); return this.createDataEntityFileStructure(config); } @@ -439,6 +478,41 @@ public RoCrateBuilder addDataEntity(DataEntity dataEntity) { return this; } + public void addDataEntity(DataEntity entity, String parentId) + throws IllegalArgumentException { + if (parentId == null) { + throw new IllegalArgumentException("Parent ID is null."); + } + + DataEntity parentEntity = this.getDataEntityById(parentId); + if (parentEntity == null) { + throw new IllegalArgumentException( + "Parent ID not found in the crate." + ); + } + + if (parentEntity.getTypes().contains("File")) { + throw new IllegalArgumentException( + "Parent entity cannot be a File." + ); + } + + if (!parentEntity.getTypes().contains("Dataset")) { + throw new IllegalArgumentException( + "Parent entity must be a Dataset in order to contain another DataEntity as a part." + ); + } + + this.metadataContext.checkEntity(entity); + + if (parentEntity instanceof DataSetEntity) { + ((DataSetEntity) parentEntity).addToHasPart(entity.getId()); + } else { + parentEntity.addProperty("hasPart", entity.getId()); + } + this.roCratePayload.addDataEntity(entity); + } + public RoCrateBuilder addContextualEntity( ContextualEntity contextualEntity ) { From 1dbc1def3971e810434be4052191bc4ef57ca65d Mon Sep 17 00:00:00 2001 From: Andreas Pfeil Date: Wed, 5 Nov 2025 12:00:55 +0100 Subject: [PATCH 30/51] feat: ignore empty strings as entity types --- .../edu/kit/datamanager/ro_crate/entities/AbstractEntity.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/kit/datamanager/ro_crate/entities/AbstractEntity.java b/src/main/java/edu/kit/datamanager/ro_crate/entities/AbstractEntity.java index 3b51367f..14171c0d 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/entities/AbstractEntity.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/entities/AbstractEntity.java @@ -443,7 +443,9 @@ public T addType(String type) { if (this.types == null) { this.types = new HashSet<>(); } - this.types.add(type); + if (type != null || !type.isEmpty()) { + this.types.add(type); + } return self(); } From af85c119ede64f3dcb481b7d8bdff1749bf6b3c1 Mon Sep 17 00:00:00 2001 From: Andreas Pfeil Date: Wed, 5 Nov 2025 12:05:02 +0100 Subject: [PATCH 31/51] fix: deserialize entity types properly into types set for consistent access --- .../kit/datamanager/ro_crate/entities/AbstractEntity.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/main/java/edu/kit/datamanager/ro_crate/entities/AbstractEntity.java b/src/main/java/edu/kit/datamanager/ro_crate/entities/AbstractEntity.java index 14171c0d..f8823a40 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/entities/AbstractEntity.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/entities/AbstractEntity.java @@ -629,6 +629,13 @@ public T setAllUnsafe(ObjectNode properties) { // This will currently only print errors. AbstractEntity.entityValidation.entityValidation(properties); this.properties = properties; + JsonNode typeProps = properties.path("@type"); + if (typeProps.isArray()) { + typeProps.valueStream() + .forEach(value -> this.addType(value.asText())); + } else if (typeProps.isTextual()) { + this.addType(typeProps.asText()); + } this.relatedItems.addAll(JsonUtilFunctions.getIdPropertiesFromJsonNode(properties)); return self(); } From bbdf8956f53bfe0a133d1d2bfea75ae3d4b074b8 Mon Sep 17 00:00:00 2001 From: Andreas Pfeil Date: Wed, 5 Nov 2025 12:18:42 +0100 Subject: [PATCH 32/51] fix: deserialize DataSetEntity properly Previously, Dataset entities were just deserialized into DataEntity instances, making casting impossible. Now we can cast them or retrieve them, and make advantage of their specializations. --- .../edu/kit/datamanager/ro_crate/Crate.java | 8 +++ .../edu/kit/datamanager/ro_crate/RoCrate.java | 9 ++++ .../ro_crate/reader/CrateReader.java | 51 ++++++++++++------- 3 files changed, 51 insertions(+), 17 deletions(-) diff --git a/src/main/java/edu/kit/datamanager/ro_crate/Crate.java b/src/main/java/edu/kit/datamanager/ro_crate/Crate.java index 51c411f3..3ecf1e32 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/Crate.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/Crate.java @@ -12,6 +12,7 @@ import edu.kit.datamanager.ro_crate.entities.AbstractEntity; import edu.kit.datamanager.ro_crate.entities.contextual.ContextualEntity; import edu.kit.datamanager.ro_crate.entities.data.DataEntity; +import edu.kit.datamanager.ro_crate.entities.data.DataSetEntity; import edu.kit.datamanager.ro_crate.entities.data.RootDataEntity; import edu.kit.datamanager.ro_crate.preview.CratePreview; import edu.kit.datamanager.ro_crate.special.CrateVersion; @@ -107,6 +108,13 @@ public interface Crate { DataEntity getDataEntityById(java.lang.String id); + /** + * Gets a data set entity by its ID. + * @param id the ID of the data set entity + * @return the DataSetEntity with the specified ID or empty if not found + */ + Optional getDataSetById(String id); + Set getAllDataEntities(); ContextualEntity getContextualEntityById(java.lang.String id); diff --git a/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java b/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java index 2ac6dc48..81e8dd1f 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java @@ -208,6 +208,15 @@ public DataEntity getDataEntityById(java.lang.String id) { return this.roCratePayload.getDataEntityById(id); } + @Override + public Optional getDataSetById(String id) { + DataEntity data = this.roCratePayload.getDataEntityById(id); + if (data instanceof DataSetEntity) { + return Optional.of((DataSetEntity) data); + } + return Optional.empty(); + } + @Override public Set getAllDataEntities() { return new HashSet<>(this.roCratePayload.getAllDataEntities()); diff --git a/src/main/java/edu/kit/datamanager/ro_crate/reader/CrateReader.java b/src/main/java/edu/kit/datamanager/ro_crate/reader/CrateReader.java index 271725b5..15cae146 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/reader/CrateReader.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/reader/CrateReader.java @@ -8,6 +8,7 @@ import edu.kit.datamanager.ro_crate.context.RoCrateMetadataContext; import edu.kit.datamanager.ro_crate.entities.contextual.ContextualEntity; import edu.kit.datamanager.ro_crate.entities.data.DataEntity; +import edu.kit.datamanager.ro_crate.entities.data.DataSetEntity; import edu.kit.datamanager.ro_crate.entities.data.RootDataEntity; import edu.kit.datamanager.ro_crate.special.IdentifierUtils; import edu.kit.datamanager.ro_crate.special.JsonUtilFunctions; @@ -114,31 +115,47 @@ private RoCrate rebuildCrate(ObjectNode metadataJson, File files, HashSet dataEntityIds = getDataEntityIds(root, graph); - for (JsonNode entityJson : graph) { - String eId = unpackId(entityJson); - if (dataEntityIds.contains(eId)) { + } + RootDataEntity root = crate.getRootDataEntity(); + boolean rootExtractionSuccessful = root != null; + + if (rootExtractionSuccessful) { + Set dataEntityIds = getDataEntityIds(root, graph); + for (JsonNode entityJson : graph) { + String eId = unpackId(entityJson); + ObjectNode properties = entityJson.deepCopy(); + boolean isDataEntity = dataEntityIds.contains(eId); + + if (isDataEntity) { + DataEntity data = null; + boolean isDataSet = properties.path("@type").asText().equals("Dataset") + || properties.path("@type").valueStream() + .anyMatch(typeString -> typeString.asText().equals("Dataset")); + if (isDataSet) { + data = new DataSetEntity.DataSetBuilder() + .setAllUnsafe(properties) + .build(); + } else { // data entity - DataEntity.DataEntityBuilder dataEntity = new DataEntity.DataEntityBuilder() - .setAllUnsafe(entityJson.deepCopy()); + DataEntity.DataEntityBuilder builder = new DataEntity.DataEntityBuilder() + .setAllUnsafe(properties); // Handle data entities with corresponding file checkFolderHasFile(entityJson.get(PROP_ID).asText(), files).ifPresent(file -> { usedFiles.add(file.getPath()); - dataEntity.setLocationWithExceptions(file.toPath()) + builder.setLocationWithExceptions(file.toPath()) .setId(file.getName()); }); - - crate.addDataEntityWithoutRootHasPart(dataEntity.build()); - } else { - // contextual entity - crate.addContextualEntity( - new ContextualEntity.ContextualEntityBuilder() - .setAllUnsafe(entityJson.deepCopy()) - .build()); + data = builder.build(); } + crate.addDataEntityWithoutRootHasPart(data); + + } else { + // contextual entity + crate.addContextualEntity( + new ContextualEntity.ContextualEntityBuilder() + .setAllUnsafe(properties) + .build()); } } } From e672f8fcfe4bd9aa1fed292b5aeb9331df3bc0c1 Mon Sep 17 00:00:00 2001 From: Andreas Pfeil Date: Wed, 5 Nov 2025 12:19:34 +0100 Subject: [PATCH 33/51] chore: cleanup --- src/main/java/edu/kit/datamanager/ro_crate/Crate.java | 7 ++++++- src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/kit/datamanager/ro_crate/Crate.java b/src/main/java/edu/kit/datamanager/ro_crate/Crate.java index 3ecf1e32..058346d1 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/Crate.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/Crate.java @@ -106,7 +106,12 @@ public interface Crate { String getJsonMetadata(); - DataEntity getDataEntityById(java.lang.String id); + /** + * Gets a data entity by its ID. + * @param id the ID of the data entity + * @return the DataEntity with the specified ID or null if not found + */ + DataEntity getDataEntityById(String id); /** * Gets a data set entity by its ID. diff --git a/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java b/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java index 81e8dd1f..63f74e1c 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java @@ -204,7 +204,7 @@ public String getJsonMetadata() { } @Override - public DataEntity getDataEntityById(java.lang.String id) { + public DataEntity getDataEntityById(String id) { return this.roCratePayload.getDataEntityById(id); } From af62864eb161e4ca59ef5bc5da7555d88e35631b Mon Sep 17 00:00:00 2001 From: Andreas Pfeil Date: Wed, 5 Nov 2025 12:20:20 +0100 Subject: [PATCH 34/51] fix: allow DataEntity to connect to root explicitly --- src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java b/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java index 63f74e1c..cf4326a4 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java @@ -259,7 +259,10 @@ public void addDataEntity(DataEntity entity, String parentId) throw new IllegalArgumentException("Parent ID is null."); } - DataEntity parentEntity = this.getDataEntityById(parentId); + DataEntity parentEntity = parentId.equals("./") + ? this.getRootDataEntity() + : this.getDataEntityById(parentId); + if (parentEntity == null) { throw new IllegalArgumentException( "Parent ID not found in the crate." From 784a211ffd1b0bb706af36590887343c1c29a62a Mon Sep 17 00:00:00 2001 From: Andreas Pfeil Date: Wed, 5 Nov 2025 12:21:55 +0100 Subject: [PATCH 35/51] chore: fixup compilation in RoCrateBuilder::addDataEntity --- src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java b/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java index cf4326a4..1549bc5d 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java @@ -496,7 +496,7 @@ public void addDataEntity(DataEntity entity, String parentId) throw new IllegalArgumentException("Parent ID is null."); } - DataEntity parentEntity = this.getDataEntityById(parentId); + DataEntity parentEntity = this.payload.getDataEntityById(parentId); if (parentEntity == null) { throw new IllegalArgumentException( "Parent ID not found in the crate." @@ -522,7 +522,7 @@ public void addDataEntity(DataEntity entity, String parentId) } else { parentEntity.addProperty("hasPart", entity.getId()); } - this.roCratePayload.addDataEntity(entity); + this.payload.addDataEntity(entity); } public RoCrateBuilder addContextualEntity( From 55f57ba071224a8f12f8554980faf539eab239d8 Mon Sep 17 00:00:00 2001 From: Andreas Pfeil Date: Wed, 5 Nov 2025 12:22:31 +0100 Subject: [PATCH 36/51] test: add several tests for addDataEntity --- .../ro_crate/crate/HasPartTest.java | 102 ++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 src/test/java/edu/kit/datamanager/ro_crate/crate/HasPartTest.java diff --git a/src/test/java/edu/kit/datamanager/ro_crate/crate/HasPartTest.java b/src/test/java/edu/kit/datamanager/ro_crate/crate/HasPartTest.java new file mode 100644 index 00000000..17ea83d3 --- /dev/null +++ b/src/test/java/edu/kit/datamanager/ro_crate/crate/HasPartTest.java @@ -0,0 +1,102 @@ +package edu.kit.datamanager.ro_crate.crate; + +import edu.kit.datamanager.ro_crate.Crate; +import edu.kit.datamanager.ro_crate.RoCrate; +import edu.kit.datamanager.ro_crate.entities.data.DataSetEntity; +import edu.kit.datamanager.ro_crate.entities.data.FileEntity; +import edu.kit.datamanager.ro_crate.reader.Readers; +import edu.kit.datamanager.ro_crate.writer.Writers; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import java.io.IOException; +import java.nio.file.Path; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * Test similar to AutomaticHierarchyRecognitionTest + */ +public class HasPartTest { + + private RoCrate crate; + + @BeforeEach + void setUp() { + crate = new RoCrate.RoCrateBuilder( + "Test Crate", + "HasPartTest", + "2025", + "https://creativecommons.org/licenses/by/4.0/" + ).build(); + } + + @Test + public void givenEmptyCrate_whenAddingWithConnection_thenThrowsException() { + // Given empty crate (default) + // ... + // When adding entity with connection, Throws Exception + FileEntity d = new FileEntity.FileEntityBuilder().build(); + assertThrows(IllegalArgumentException.class, () -> this.crate.addDataEntity(d, "nonexitent")); + } + + @Test + public void givenEmptyCrate_whenAddingToRoot_thenConnectionExists() { + // Given empty crate (default) + // ... + // When adding entity to root + final String id = "d"; + FileEntity d = new FileEntity.FileEntityBuilder() + .setId(id) + .build(); + this.crate.addDataEntity(d, "./"); + // Then root added entity with hasPart + assertTrue(this.crate.getRootDataEntity().hasPart(id)); + assertNotNull(this.crate.getEntityById(id)); + } + + @Test + public void givenCrateWithFolder_whenAddingToFolder_thenConnectionExists() { + // Given crate with folder + this.crate = new RoCrate.RoCrateBuilder() + .addDataEntity(new DataSetEntity.DataSetBuilder().setId("./folder").build()) + .build(); + // When adding entity to folder + String dataId = "d"; + FileEntity d = new FileEntity.FileEntityBuilder() + .setId(dataId) + .build(); + this.crate.addDataEntity(d, "./folder"); + // Then this connection exists + // Cast required because type was not yet serialized and is not yet in properties. + assertTrue(((DataSetEntity) this.crate.getDataEntityById("./folder")).hasPart(dataId)); + assertNotNull(this.crate.getEntityById(dataId)); + } + + @Test + public void givenCrateFromDisk_whenAddingToFolder_thenConnectionExists( + @TempDir Path path + ) throws IOException { + // Given crate from disk + String folderId = "./folder/"; + this.crate = new RoCrate.RoCrateBuilder() + .addDataEntity(new DataSetEntity.DataSetBuilder().setId(folderId).build()) + .build(); + + Writers.newFolderWriter().save(this.crate, path.toString()); + this.crate = Readers.newFolderReader().readCrate(path.toString()); + + // When adding entity to folder + String dataId = "d"; + FileEntity d = new FileEntity.FileEntityBuilder() + .setId(dataId) + .build(); + this.crate.addDataEntity(d, folderId); + // Then this connection exists + // Note how the types are loaded when deserializing. Alternatively, you can find them in their properties. + assertTrue(this.crate.getDataEntityById(folderId).getTypes().contains("Dataset")); + // Note how you can cast an entity to a dataSetEntity. + assertTrue(this.crate.getDataSetById(folderId).orElseThrow().hasPart(dataId)); + } +} \ No newline at end of file From 140717ff70d49425e7a41eff8176f5cb648adfc2 Mon Sep 17 00:00:00 2001 From: Andreas Pfeil Date: Wed, 5 Nov 2025 12:42:45 +0100 Subject: [PATCH 37/51] test: add several tests for addDataEntity, splitting between the builder version and the crate version. --- .../edu/kit/datamanager/ro_crate/RoCrate.java | 5 +- .../ro_crate/crate/HasPartTest.java | 241 ++++++++++++------ 2 files changed, 173 insertions(+), 73 deletions(-) diff --git a/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java b/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java index 1549bc5d..05822b22 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java @@ -496,7 +496,10 @@ public void addDataEntity(DataEntity entity, String parentId) throw new IllegalArgumentException("Parent ID is null."); } - DataEntity parentEntity = this.payload.getDataEntityById(parentId); + DataEntity parentEntity = parentId.equals("./") + ? this.rootDataEntity + : this.payload.getDataEntityById(parentId); + if (parentEntity == null) { throw new IllegalArgumentException( "Parent ID not found in the crate." diff --git a/src/test/java/edu/kit/datamanager/ro_crate/crate/HasPartTest.java b/src/test/java/edu/kit/datamanager/ro_crate/crate/HasPartTest.java index 17ea83d3..0dcdece3 100644 --- a/src/test/java/edu/kit/datamanager/ro_crate/crate/HasPartTest.java +++ b/src/test/java/edu/kit/datamanager/ro_crate/crate/HasPartTest.java @@ -7,6 +7,8 @@ import edu.kit.datamanager.ro_crate.reader.Readers; import edu.kit.datamanager.ro_crate.writer.Writers; import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; @@ -20,83 +22,178 @@ */ public class HasPartTest { - private RoCrate crate; + @Nested + @DisplayName("Test for crate (not the builder!) addDataEntity(DataEntity, String)") + class CrateHasPartTest { + private RoCrate crate; - @BeforeEach - void setUp() { - crate = new RoCrate.RoCrateBuilder( - "Test Crate", - "HasPartTest", - "2025", - "https://creativecommons.org/licenses/by/4.0/" - ).build(); - } + @BeforeEach + void setUp() { + crate = new RoCrate.RoCrateBuilder( + "Test Crate", + "HasPartTest", + "2025", + "https://creativecommons.org/licenses/by/4.0/" + ).build(); + } - @Test - public void givenEmptyCrate_whenAddingWithConnection_thenThrowsException() { - // Given empty crate (default) - // ... - // When adding entity with connection, Throws Exception - FileEntity d = new FileEntity.FileEntityBuilder().build(); - assertThrows(IllegalArgumentException.class, () -> this.crate.addDataEntity(d, "nonexitent")); - } + @Test + public void givenEmptyCrate_whenAddingWithConnection_thenThrowsException() { + // Given empty crate (default) + // ... + // When adding entity with connection, Throws Exception + FileEntity d = new FileEntity.FileEntityBuilder().build(); + assertThrows(IllegalArgumentException.class, () -> this.crate.addDataEntity(d, "nonexitent")); + } - @Test - public void givenEmptyCrate_whenAddingToRoot_thenConnectionExists() { - // Given empty crate (default) - // ... - // When adding entity to root - final String id = "d"; - FileEntity d = new FileEntity.FileEntityBuilder() - .setId(id) - .build(); - this.crate.addDataEntity(d, "./"); - // Then root added entity with hasPart - assertTrue(this.crate.getRootDataEntity().hasPart(id)); - assertNotNull(this.crate.getEntityById(id)); - } + @Test + public void givenEmptyCrate_whenAddingToRoot_thenConnectionExists() { + // Given empty crate (default) + // ... + // When adding entity to root + final String id = "d"; + FileEntity d = new FileEntity.FileEntityBuilder() + .setId(id) + .build(); + this.crate.addDataEntity(d, "./"); + // Then root added entity with hasPart + assertTrue(this.crate.getRootDataEntity().hasPart(id)); + assertNotNull(this.crate.getEntityById(id)); + } + + @Test + public void givenCrateWithFolder_whenAddingToFolder_thenConnectionExists() { + // Given crate with folder + this.crate = new RoCrate.RoCrateBuilder() + .addDataEntity(new DataSetEntity.DataSetBuilder().setId("./folder").build()) + .build(); + // When adding entity to folder + String dataId = "d"; + FileEntity d = new FileEntity.FileEntityBuilder() + .setId(dataId) + .build(); + this.crate.addDataEntity(d, "./folder"); + // Then this connection exists + // Cast required because type was not yet serialized and is not yet in properties. + assertTrue(((DataSetEntity) this.crate.getDataEntityById("./folder")).hasPart(dataId)); + assertNotNull(this.crate.getEntityById(dataId)); + } - @Test - public void givenCrateWithFolder_whenAddingToFolder_thenConnectionExists() { - // Given crate with folder - this.crate = new RoCrate.RoCrateBuilder() - .addDataEntity(new DataSetEntity.DataSetBuilder().setId("./folder").build()) - .build(); - // When adding entity to folder - String dataId = "d"; - FileEntity d = new FileEntity.FileEntityBuilder() - .setId(dataId) - .build(); - this.crate.addDataEntity(d, "./folder"); - // Then this connection exists - // Cast required because type was not yet serialized and is not yet in properties. - assertTrue(((DataSetEntity) this.crate.getDataEntityById("./folder")).hasPart(dataId)); - assertNotNull(this.crate.getEntityById(dataId)); + @Test + public void givenCrateFromDisk_whenAddingToFolder_thenConnectionExists( + @TempDir Path path + ) throws IOException { + // Given crate from disk + String folderId = "./folder/"; + this.crate = new RoCrate.RoCrateBuilder() + .addDataEntity(new DataSetEntity.DataSetBuilder().setId(folderId).build()) + .build(); + + Writers.newFolderWriter().save(this.crate, path.toString()); + this.crate = Readers.newFolderReader().readCrate(path.toString()); + + // When adding entity to folder + String dataId = "d"; + FileEntity d = new FileEntity.FileEntityBuilder() + .setId(dataId) + .build(); + this.crate.addDataEntity(d, folderId); + // Then this connection exists + // Note how the types are loaded when deserializing. Alternatively, you can find them in their properties. + assertTrue(this.crate.getDataEntityById(folderId).getTypes().contains("Dataset")); + // Note how you can cast an entity to a dataSetEntity. + assertTrue(this.crate.getDataSetById(folderId).orElseThrow().hasPart(dataId)); + } } - @Test - public void givenCrateFromDisk_whenAddingToFolder_thenConnectionExists( - @TempDir Path path - ) throws IOException { - // Given crate from disk - String folderId = "./folder/"; - this.crate = new RoCrate.RoCrateBuilder() - .addDataEntity(new DataSetEntity.DataSetBuilder().setId(folderId).build()) - .build(); - - Writers.newFolderWriter().save(this.crate, path.toString()); - this.crate = Readers.newFolderReader().readCrate(path.toString()); - - // When adding entity to folder - String dataId = "d"; - FileEntity d = new FileEntity.FileEntityBuilder() - .setId(dataId) - .build(); - this.crate.addDataEntity(d, folderId); - // Then this connection exists - // Note how the types are loaded when deserializing. Alternatively, you can find them in their properties. - assertTrue(this.crate.getDataEntityById(folderId).getTypes().contains("Dataset")); - // Note how you can cast an entity to a dataSetEntity. - assertTrue(this.crate.getDataSetById(folderId).orElseThrow().hasPart(dataId)); + @Nested + @DisplayName("Testing the builder addDataEntity(DataEntity, String)") + class BuilderHasPartTest { + private RoCrate.RoCrateBuilder builder; + + @BeforeEach + void setUp() { + builder = new RoCrate.RoCrateBuilder( + "Test Crate", + "HasPartTest", + "2025", + "https://creativecommons.org/licenses/by/4.0/" + ); + } + + @Test + public void givenEmptyCrate_whenAddingWithConnection_thenThrowsException() { + // Given empty crate (default) + // ... + // When adding entity with connection, Throws Exception + FileEntity d = new FileEntity.FileEntityBuilder().build(); + assertThrows(IllegalArgumentException.class, () -> this.builder.addDataEntity(d, "nonexitent")); + } + + @Test + public void givenEmptyCrate_whenAddingToRoot_thenConnectionExists() { + // Given empty crate (default) + // ... + // When adding entity to root + final String id = "d"; + FileEntity d = new FileEntity.FileEntityBuilder() + .setId(id) + .build(); + this.builder.addDataEntity(d, "./"); + // Then root added entity with hasPart + Crate crate = this.builder.build(); + assertTrue(crate.getRootDataEntity().hasPart(id)); + assertNotNull(crate.getEntityById(id)); + } + + @Test + public void givenCrateWithFolder_whenAddingToFolder_thenConnectionExists() { + // Given crate with folder + this.builder.addDataEntity( + new DataSetEntity.DataSetBuilder() + .setId("./folder") + .build() + ); + // When adding entity to folder + String dataId = "d"; + FileEntity d = new FileEntity.FileEntityBuilder() + .setId(dataId) + .build(); + this.builder.addDataEntity(d, "./folder"); + // Then this connection exists + Crate crate = this.builder.build(); + // Cast required because type was not yet serialized and is not yet in properties. + assertTrue(((DataSetEntity) crate.getDataEntityById("./folder")).hasPart(dataId)); + assertNotNull(crate.getEntityById(dataId)); + } + + @Test + public void givenCrateFromDisk_whenAddingToFolder_thenConnectionExists( + @TempDir Path path + ) throws IOException { + // Given crate from disk + String folderId = "./folder/"; + Crate crate = this.builder.addDataEntity( + new DataSetEntity.DataSetBuilder() + .setId(folderId) + .build() + ) + .build(); + + Writers.newFolderWriter().save(crate, path.toString()); + Crate read = Readers.newFolderReader().readCrate(path.toString()); + + // When adding entity to folder + String dataId = "d"; + FileEntity d = new FileEntity.FileEntityBuilder() + .setId(dataId) + .build(); + read.addDataEntity(d, folderId); + // Then this connection exists + // Note how the types are loaded when deserializing. Alternatively, you can find them in their properties. + assertTrue(read.getDataEntityById(folderId).getTypes().contains("Dataset")); + // Note how you can cast an entity to a dataSetEntity. + assertTrue(read.getDataSetById(folderId).orElseThrow().hasPart(dataId)); + } } } \ No newline at end of file From 39073858bdf816dbb161e865787a00a774bcd348 Mon Sep 17 00:00:00 2001 From: Andreas Pfeil Date: Wed, 5 Nov 2025 13:04:25 +0100 Subject: [PATCH 38/51] cleanup: simplify HierarchyRecognitionResult --- .../ro_crate/crate/HierarchyRecognition.java | 35 +- .../crate/HierarchyRecognitionResult.java | 338 ++---------------- .../AutomaticHierarchyRecognitionTest.java | 9 +- 3 files changed, 45 insertions(+), 337 deletions(-) diff --git a/src/main/java/edu/kit/datamanager/ro_crate/crate/HierarchyRecognition.java b/src/main/java/edu/kit/datamanager/ro_crate/crate/HierarchyRecognition.java index 8f92d68d..7fc2d49a 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/crate/HierarchyRecognition.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/crate/HierarchyRecognition.java @@ -20,8 +20,7 @@ public HierarchyRecognition(Crate crate, HierarchyRecognitionConfig config) { } public HierarchyRecognitionResult buildHierarchy() { - HierarchyRecognitionResult.Builder resultBuilder = - new HierarchyRecognitionResult.Builder(); + HierarchyRecognitionResult result = new HierarchyRecognitionResult(); try { // Get all data entities to process @@ -35,19 +34,19 @@ public HierarchyRecognitionResult buildHierarchy() { if (FileSystemUtil.isFilePath(id)) { pathEntities.put(id, entity); } else { - resultBuilder.addSkippedEntity(entity); + result.addSkippedEntity(entity); } } // Validate hierarchy before making changes - if (!HierarchyRecognition.validateHierarchy(pathEntities, resultBuilder)) { - return resultBuilder.build(); + if (!HierarchyRecognition.validateHierarchy(pathEntities, result)) { + return result; } // Create missing intermediate entities if configured if (config.createMissingIntermediateEntities) { - this.createMissingIntermediateEntities(pathEntities, resultBuilder); + this.createMissingIntermediateEntities(pathEntities, result); } // Clear existing relationships if configured @@ -56,15 +55,15 @@ public HierarchyRecognitionResult buildHierarchy() { } // Build hierarchy relationships - this.buildHierarchyRelationships(pathEntities, config, resultBuilder); + this.buildHierarchyRelationships(pathEntities, config, result); - return resultBuilder.build(); + return result; } catch (Exception e) { - resultBuilder.addError( + result.addError( "Unexpected error during hierarchy recognition: " + e.getMessage() ); - return resultBuilder.build(); + return result; } } @@ -72,12 +71,12 @@ public HierarchyRecognitionResult buildHierarchy() { * Validates that the hierarchy is consistent (no files containing other files/folders). * * @param pathEntities map of path IDs to DataEntities - * @param resultBuilder builder to collect errors + * @param result builder to collect errors * @return true if valid, false if invalid hierarchy detected */ protected static boolean validateHierarchy( Map pathEntities, - HierarchyRecognitionResult.Builder resultBuilder + HierarchyRecognitionResult result ) { for (Map.Entry entry : pathEntities.entrySet()) { String childId = entry.getKey(); @@ -98,7 +97,7 @@ protected static boolean validateHierarchy( // Check for invalid hierarchy: file cannot contain another file/folder if (parentEntity.getTypes().contains("File")) { - resultBuilder.addError( + result.addError( "Invalid hierarchy: file '" + parentEntity.getId() + "' cannot contain '" + @@ -115,11 +114,11 @@ protected static boolean validateHierarchy( * Creates missing intermediate DataSetEntity instances for folder paths. * * @param pathEntities map of path IDs to DataEntities - * @param resultBuilder builder to collect created entities + * @param result builder to collect created entities */ protected void createMissingIntermediateEntities( Map pathEntities, - HierarchyRecognitionResult.Builder resultBuilder + HierarchyRecognitionResult result ) { Set missingPaths = new HashSet<>(); @@ -146,14 +145,14 @@ protected void createMissingIntermediateEntities( this.crate.addDataEntity(newEntity); pathEntities.put(missingPath, newEntity); - resultBuilder.addCreatedEntity(newEntity); + result.addCreatedEntity(newEntity); } } protected void buildHierarchyRelationships( Map pathEntities, HierarchyRecognitionConfig config, - HierarchyRecognitionResult.Builder resultBuilder + HierarchyRecognitionResult result ) { for (Map.Entry entry : pathEntities.entrySet()) { String childId = entry.getKey(); @@ -179,7 +178,7 @@ protected void buildHierarchyRelationships( // Add hasPart relationship if (parentEntity instanceof DataSetEntity) { ((DataSetEntity) parentEntity).addToHasPart(childId); - resultBuilder.addProcessedRelationship( + result.addProcessedRelationship( actualParentId, childId ); diff --git a/src/main/java/edu/kit/datamanager/ro_crate/crate/HierarchyRecognitionResult.java b/src/main/java/edu/kit/datamanager/ro_crate/crate/HierarchyRecognitionResult.java index 5f9e3bd4..ad6708db 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/crate/HierarchyRecognitionResult.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/crate/HierarchyRecognitionResult.java @@ -1,6 +1,8 @@ package edu.kit.datamanager.ro_crate.crate; import edu.kit.datamanager.ro_crate.entities.data.DataEntity; +import edu.kit.datamanager.ro_crate.entities.data.DataSetEntity; + import java.util.*; /** @@ -9,333 +11,41 @@ * during the hierarchy recognition process. Always contains complete information about the * operation result, including success/failure and any errors encountered. */ -public class HierarchyRecognitionResult { - - private final Set createdEntities; - private final Map> processedRelationships; - private final Set skippedEntities; - private final List warnings; - private final List errors; - private final boolean successful; - - private HierarchyRecognitionResult(Builder builder) { - this.createdEntities = Collections.unmodifiableSet( - new HashSet<>(builder.createdEntities) - ); - this.processedRelationships = Collections.unmodifiableMap( - new HashMap<>(builder.processedRelationships) - ); - this.skippedEntities = Collections.unmodifiableSet( - new HashSet<>(builder.skippedEntities) - ); - this.warnings = Collections.unmodifiableList( - new ArrayList<>(builder.warnings) - ); - this.errors = Collections.unmodifiableList( - new ArrayList<>(builder.errors) - ); - // If there are any errors, the operation is not successful - this.successful = builder.successful && builder.errors.isEmpty(); - } - - /** - * Gets the entities that were automatically created during the process. - * These are typically intermediate folder entities that were missing. - * - * @return set of created entities - */ - public Set getCreatedEntities() { - return createdEntities; - } - - /** - * Gets the parent-child relationships that were processed. - * The map contains parent entity IDs as keys and sets of child entity IDs as values. - * - * @return map of processed relationships - */ - public Map> getProcessedRelationships() { - return processedRelationships; - } - - /** - * Gets the entities that were skipped during processing. - * These might include entities with non-file-path IDs or entities - * that couldn't be processed for other reasons. - * - * @return set of skipped entities - */ - public Set getSkippedEntities() { - return skippedEntities; +public record HierarchyRecognitionResult( + Set createdEntities, + Map> processedRelationships, + Set skippedEntities, + List warnings, + List errors +) { + HierarchyRecognitionResult() { + this(new HashSet<>(), new HashMap<>(), new HashSet<>(), new ArrayList<>(), new ArrayList<>()); } /** - * Gets any warnings generated during the process. - * Warnings indicate potential issues that didn't prevent the operation - * from completing but might need attention. + * Whether there were no errors during the hierarchy recognition operation. * - * @return list of warning messages - */ - public List getWarnings() { - return warnings; - } - - /** - * Gets any errors that occurred during the process. - * Errors indicate problems that prevented the operation from - * completing successfully or caused it to fail. - * - * @return list of error messages - */ - public List getErrors() { - return errors; - } - - /** - * Indicates whether the operation completed successfully. - * Even successful operations might have warnings or skipped entities. - * - * @return true if the operation completed successfully + * @return true if the operation completed successfully (no errors), false otherwise. */ public boolean isSuccessful() { - return successful; + return this.errors.isEmpty(); } - /** - * Gets the total number of relationships that were established. - * - * @return total count of parent-child relationships - */ - public int getTotalRelationshipsCount() { - return processedRelationships - .values() - .stream() - .mapToInt(Set::size) - .sum(); + public void addSkippedEntity(DataEntity entity) { + this.skippedEntities.add(entity); } - /** - * Checks if any entities were created during the process. - * - * @return true if entities were created - */ - public boolean hasCreatedEntities() { - return !createdEntities.isEmpty(); + public void addError(String errorMessage) { + this.errors.add(errorMessage); } - /** - * Checks if any entities were skipped during the process. - * - * @return true if entities were skipped - */ - public boolean hasSkippedEntities() { - return !skippedEntities.isEmpty(); - } - - /** - * Checks if any warnings were generated during the process. - * - * @return true if warnings exist - */ - public boolean hasWarnings() { - return !warnings.isEmpty(); - } - - /** - * Checks if any errors occurred during the process. - * - * @return true if errors exist - */ - public boolean hasErrors() { - return !errors.isEmpty(); - } - - /** - * Creates a new builder for constructing HierarchyRecognitionResult instances. - * - * @return new builder instance - */ - public static Builder builder() { - return new Builder(); - } - - /** - * Builder class for creating HierarchyRecognitionResult instances. - */ - public static class Builder { - - private final Set createdEntities = new HashSet<>(); - private final Map> processedRelationships = - new HashMap<>(); - private final Set skippedEntities = new HashSet<>(); - private final List warnings = new ArrayList<>(); - private final List errors = new ArrayList<>(); - private boolean successful = true; - - /** - * Adds an entity that was created during the process. - * - * @param entity the created entity - * @return this builder - */ - public Builder addCreatedEntity(DataEntity entity) { - this.createdEntities.add(entity); - return this; - } - - /** - * Adds multiple entities that were created during the process. - * - * @param entities the created entities - * @return this builder - */ - public Builder addCreatedEntities(Collection entities) { - this.createdEntities.addAll(entities); - return this; - } - - /** - * Adds a processed parent-child relationship. - * - * @param parentId the parent entity ID - * @param childId the child entity ID - * @return this builder - */ - public Builder addProcessedRelationship( - String parentId, - String childId - ) { - this.processedRelationships.computeIfAbsent(parentId, k -> - new HashSet<>() - ).add(childId); - return this; - } - - /** - * Adds multiple processed relationships for a parent. - * - * @param parentId the parent entity ID - * @param childIds the child entity IDs - * @return this builder - */ - public Builder addProcessedRelationships( - String parentId, - Collection childIds - ) { - this.processedRelationships.computeIfAbsent(parentId, k -> - new HashSet<>() - ).addAll(childIds); - return this; - } - - /** - * Adds an entity that was skipped during processing. - * - * @param entity the skipped entity - * @return this builder - */ - public Builder addSkippedEntity(DataEntity entity) { - this.skippedEntities.add(entity); - return this; - } - - /** - * Adds multiple entities that were skipped during processing. - * - * @param entities the skipped entities - * @return this builder - */ - public Builder addSkippedEntities(Collection entities) { - this.skippedEntities.addAll(entities); - return this; - } - - /** - * Adds a warning message. - * - * @param warning the warning message - * @return this builder - */ - public Builder addWarning(String warning) { - this.warnings.add(warning); - return this; - } - - /** - * Adds multiple warning messages. - * - * @param warnings the warning messages - * @return this builder - */ - public Builder addWarnings(Collection warnings) { - this.warnings.addAll(warnings); - return this; - } - - /** - * Adds an error message and marks the operation as unsuccessful. - * - * @param error the error message - * @return this builder - */ - public Builder addError(String error) { - this.errors.add(error); - this.successful = false; - return this; - } - - /** - * Adds multiple error messages and marks the operation as unsuccessful. - * - * @param errors the error messages - * @return this builder - */ - public Builder addErrors(Collection errors) { - this.errors.addAll(errors); - if (!errors.isEmpty()) { - this.successful = false; - } - return this; - } - - /** - * Sets whether the operation was successful. - * - * @param successful true if successful - * @return this builder - */ - public Builder setSuccessful(boolean successful) { - this.successful = successful; - return this; - } - - /** - * Builds the result object. - * - * @return the constructed HierarchyRecognitionResult - */ - public HierarchyRecognitionResult build() { - return new HierarchyRecognitionResult(this); - } + public void addCreatedEntity(DataSetEntity newEntity) { + this.createdEntities.add(newEntity); } - @Override - public String toString() { - return ( - "HierarchyRecognitionResult{" + - "successful=" + - successful + - ", createdEntities=" + - createdEntities.size() + - ", processedRelationships=" + - getTotalRelationshipsCount() + - ", skippedEntities=" + - skippedEntities.size() + - ", warnings=" + - warnings.size() + - ", errors=" + - errors.size() + - '}' - ); + public void addProcessedRelationship(String from, String to) { + this.processedRelationships + .computeIfAbsent(from, k -> new HashSet<>()) + .add(to); } } diff --git a/src/test/java/edu/kit/datamanager/ro_crate/crate/AutomaticHierarchyRecognitionTest.java b/src/test/java/edu/kit/datamanager/ro_crate/crate/AutomaticHierarchyRecognitionTest.java index 59038a87..cb21b969 100644 --- a/src/test/java/edu/kit/datamanager/ro_crate/crate/AutomaticHierarchyRecognitionTest.java +++ b/src/test/java/edu/kit/datamanager/ro_crate/crate/AutomaticHierarchyRecognitionTest.java @@ -326,7 +326,6 @@ void givenInvalidEntityData_whenRecognizeStructure_thenFailsWithoutMakingChanges // Then: Should fail without making any changes assertFalse(result.isSuccessful()); - assertTrue(result.hasErrors()); // Original state should be preserved assertTrue(crate.getRootDataEntity().hasPart("document.pdf")); @@ -361,9 +360,9 @@ void givenFileRequiringIntermediateCreation_whenRecognizeStructure_thenReturnsDe assertTrue(result.isSuccessful()); HierarchyRecognitionResult info = result; - assertEquals(1, info.getCreatedEntities().size()); // "folder/" was created - assertEquals(2, info.getProcessedRelationships().size()); // root -> folder -> file relationship - assertTrue(info.getSkippedEntities().isEmpty()); // no entities skipped - assertTrue(info.getWarnings().isEmpty()); // no warnings + assertEquals(1, info.createdEntities().size()); // "folder/" was created + assertEquals(2, info.processedRelationships().size()); // root -> folder -> file relationship + assertTrue(info.skippedEntities().isEmpty()); // no entities skipped + assertTrue(info.warnings().isEmpty()); // no warnings } } From 79e0bb112b418581402f84d6af19967a12557ef7 Mon Sep 17 00:00:00 2001 From: Andreas Pfeil Date: Wed, 5 Nov 2025 13:25:56 +0100 Subject: [PATCH 39/51] cleanup: simplify HierarchyRecognitionConfig --- .../edu/kit/datamanager/ro_crate/RoCrate.java | 6 +- .../ro_crate/crate/HierarchyRecognition.java | 6 +- .../crate/HierarchyRecognitionConfig.java | 109 +++++------------- .../AutomaticHierarchyRecognitionTest.java | 25 ++-- 4 files changed, 43 insertions(+), 103 deletions(-) diff --git a/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java b/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java index 05822b22..d6072446 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java @@ -350,10 +350,8 @@ public Collection getUntrackedFiles() { public HierarchyRecognitionResult createDataEntityFileStructure( boolean addInverseRelationships ) { - HierarchyRecognitionConfig config = - new HierarchyRecognitionConfig().setInverseRelationships( - addInverseRelationships - ); + HierarchyRecognitionConfig config = new HierarchyRecognitionConfig() + .withSetInverseRelationships(addInverseRelationships); return this.createDataEntityFileStructure(config); } diff --git a/src/main/java/edu/kit/datamanager/ro_crate/crate/HierarchyRecognition.java b/src/main/java/edu/kit/datamanager/ro_crate/crate/HierarchyRecognition.java index 7fc2d49a..5c2c4bd2 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/crate/HierarchyRecognition.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/crate/HierarchyRecognition.java @@ -45,12 +45,12 @@ public HierarchyRecognitionResult buildHierarchy() { } // Create missing intermediate entities if configured - if (config.createMissingIntermediateEntities) { + if (config.createMissingIntermediateEntities()) { this.createMissingIntermediateEntities(pathEntities, result); } // Clear existing relationships if configured - if (config.removeExistingConnections) { + if (config.removeExistingConnections()) { this.clearExistingRelationships(pathEntities); } @@ -185,7 +185,7 @@ protected void buildHierarchyRelationships( } // Add isPartOf relationship if configured - if (config.setInverseRelationships) { + if (config.createInverseRelationships()) { childEntity.addProperty("isPartOf", actualParentId); } diff --git a/src/main/java/edu/kit/datamanager/ro_crate/crate/HierarchyRecognitionConfig.java b/src/main/java/edu/kit/datamanager/ro_crate/crate/HierarchyRecognitionConfig.java index dec46d6e..ce602e5e 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/crate/HierarchyRecognitionConfig.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/crate/HierarchyRecognitionConfig.java @@ -1,98 +1,43 @@ package edu.kit.datamanager.ro_crate.crate; /** - * Configuration class for automatic hierarchy recognition functionality. - * This class provides fine-grained control over how the hierarchy recognition - * algorithm behaves using a fluent interface with setter methods. + * Configuration class for automatic hierarchy recognition. + * This class provides control over how the hierarchy recognition. + * + *

    + *
  • createMissingIntermediateEntities: Whether missing intermediate folder entities should be automatically created. Default: false
  • + *
  • createInverseRelationships: Whether isPartOf relationships should be added in addition to hasPart. Default: false
  • + *
  • removeExistingConnections: Whether hasPart relationships should be added (false) or remove existing relations in beforehand (true). Default: false
  • + *
*/ -public class HierarchyRecognitionConfig { - - /** - * Whether missing intermediate folder entities should be automatically created. - *

- * Default: false (only connect existing entities) - */ - public boolean createMissingIntermediateEntities = false; - - /** - * Whether isPartOf relationships should be added in addition to hasPart. - *

- * Default: false (only add hasPart relationships) - */ - public boolean setInverseRelationships = false; - - /** - * Whether hasPart relationships should be added (false) - * or remove existing relations in beforehand (true). - *

- * Default: false (keep relations) - */ - public boolean removeExistingConnections = false; - +public record HierarchyRecognitionConfig( + boolean createMissingIntermediateEntities, + boolean createInverseRelationships, + boolean removeExistingConnections +) { /** * Creates a new configuration with default values. + *

+ * Default values: + *

    + *
  • createMissingIntermediateEntities: false
  • + *
  • createInverseRelationships: false
  • + *
  • removeExistingConnections: false
  • + *
*/ public HierarchyRecognitionConfig() { - // All defaults are set via field initializers - } - - /** - * Sets whether missing intermediate folder entities should be automatically created. - * - * @param create true to create missing DataSetEntity instances for intermediate folders - * @return this configuration object for method chaining - */ - public HierarchyRecognitionConfig createMissingIntermediateEntities( - boolean create - ) { - this.createMissingIntermediateEntities = create; - return this; - } - - /** - * Sets whether isPartOf relationships should be added in addition to hasPart. - * - * @param addIsPartOf true to add bidirectional relationships - * @return this configuration object for method chaining - */ - public HierarchyRecognitionConfig setInverseRelationships( - boolean addIsPartOf - ) { - this.setInverseRelationships = addIsPartOf; - return this; + this(false, false, false); } - /** - * Whether hasPart relationships should be added (false) - * or remove existing relations in beforehand (true). - * - * @param removeExistingConnections true to remove existing connections - * @return this configuration object for method chaining - */ - public HierarchyRecognitionConfig removeExistingConnections( - boolean removeExistingConnections - ) { - this.removeExistingConnections = removeExistingConnections; - return this; + public HierarchyRecognitionConfig withCreateMissingIntermediateEntities(boolean value) { + return new HierarchyRecognitionConfig(value, this.createInverseRelationships, this.removeExistingConnections); } - /** - * Creates a configuration with default sensible values. - * @return default configuration - */ - public static HierarchyRecognitionConfig defaultConfig() { - return new HierarchyRecognitionConfig(); + public HierarchyRecognitionConfig withSetInverseRelationships(boolean value) { + return new HierarchyRecognitionConfig(this.createMissingIntermediateEntities, value, this.removeExistingConnections); } - @Override - public String toString() { - return ( - "HierarchyRecognitionConfig{" + - "createMissingIntermediateEntities=" + - createMissingIntermediateEntities + - ", addIsPartOfRelationships=" + - setInverseRelationships + - '}' - ); + public HierarchyRecognitionConfig withRemoveExistingConnections(boolean value) { + return new HierarchyRecognitionConfig(this.createMissingIntermediateEntities, this.createInverseRelationships, value); } } diff --git a/src/test/java/edu/kit/datamanager/ro_crate/crate/AutomaticHierarchyRecognitionTest.java b/src/test/java/edu/kit/datamanager/ro_crate/crate/AutomaticHierarchyRecognitionTest.java index cb21b969..ac10fac6 100644 --- a/src/test/java/edu/kit/datamanager/ro_crate/crate/AutomaticHierarchyRecognitionTest.java +++ b/src/test/java/edu/kit/datamanager/ro_crate/crate/AutomaticHierarchyRecognitionTest.java @@ -123,9 +123,9 @@ void givenDeepNestedPathWithMissingIntermediates_whenRecognizeWithCreateMissing_ // When: We configure to create missing intermediate entities HierarchyRecognitionConfig config = new HierarchyRecognitionConfig() - .createMissingIntermediateEntities(true) - .setInverseRelationships(true) - .removeExistingConnections(true); + .withCreateMissingIntermediateEntities(true) + .withSetInverseRelationships(true) + .withRemoveExistingConnections(true); HierarchyRecognitionResult result = crate.createDataEntityFileStructure( config @@ -180,8 +180,8 @@ void givenFolderWithExistingRelations_whenRecognizeWithRemoveExisting_thenKeepsO crate.addDataEntity(folder); // When: We merge with existing relationships - HierarchyRecognitionConfig config = - new HierarchyRecognitionConfig().removeExistingConnections(true); + HierarchyRecognitionConfig config = new HierarchyRecognitionConfig() + .withRemoveExistingConnections(true); crate.createDataEntityFileStructure(config); @@ -346,10 +346,8 @@ void givenFileRequiringIntermediateCreation_whenRecognizeStructure_thenReturnsDe crate.addDataEntity(file); - HierarchyRecognitionConfig config = - new HierarchyRecognitionConfig().createMissingIntermediateEntities( - true - ); + HierarchyRecognitionConfig config = new HierarchyRecognitionConfig() + .withCreateMissingIntermediateEntities(true); // When: We recognize hierarchy HierarchyRecognitionResult result = crate.createDataEntityFileStructure( @@ -358,11 +356,10 @@ void givenFileRequiringIntermediateCreation_whenRecognizeStructure_thenReturnsDe // Then: Result should provide useful information assertTrue(result.isSuccessful()); - HierarchyRecognitionResult info = result; - assertEquals(1, info.createdEntities().size()); // "folder/" was created - assertEquals(2, info.processedRelationships().size()); // root -> folder -> file relationship - assertTrue(info.skippedEntities().isEmpty()); // no entities skipped - assertTrue(info.warnings().isEmpty()); // no warnings + assertEquals(1, result.createdEntities().size()); // "folder/" was created + assertEquals(2, result.processedRelationships().size()); // root -> folder -> file relationship + assertTrue(result.skippedEntities().isEmpty()); // no entities skipped + assertTrue(result.warnings().isEmpty()); // no warnings } } From 35ba4bef54db7213dd5b52f23fb43370c3eb7779 Mon Sep 17 00:00:00 2001 From: Andreas Pfeil Date: Wed, 5 Nov 2025 13:29:34 +0100 Subject: [PATCH 40/51] chore: ignore language server related files and directories --- .gitignore | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.gitignore b/.gitignore index 2e6ac719..dc7ed9cd 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,8 @@ +.classpath +.project +.settings +.tmp + ### VSCode ### .vscode/* #!.vscode/settings.json From 0b53863dd8147b10b5f78f308200a279f661e1fe Mon Sep 17 00:00:00 2001 From: Andreas Pfeil Date: Wed, 5 Nov 2025 13:37:24 +0100 Subject: [PATCH 41/51] cleanup: remove unnecessary java.lang before String types --- src/main/java/edu/kit/datamanager/ro_crate/Crate.java | 4 ++-- src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java | 7 ++----- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/kit/datamanager/ro_crate/Crate.java b/src/main/java/edu/kit/datamanager/ro_crate/Crate.java index 058346d1..f1cd3c65 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/Crate.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/Crate.java @@ -122,11 +122,11 @@ public interface Crate { Set getAllDataEntities(); - ContextualEntity getContextualEntityById(java.lang.String id); + ContextualEntity getContextualEntityById(String id); Set getAllContextualEntities(); - AbstractEntity getEntityById(java.lang.String id); + AbstractEntity getEntityById(String id); /** * Adds a data entity to the crate. diff --git a/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java b/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java index d6072446..59fa360b 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java @@ -589,15 +589,12 @@ public RoCrateBuilder setContext(CrateMetadataContext context) { return this; } - public RoCrateBuilder addUrlToContext(java.lang.String url) { + public RoCrateBuilder addUrlToContext(String url) { this.metadataContext.addToContextFromUrl(url); return this; } - public RoCrateBuilder addValuePairToContext( - java.lang.String key, - java.lang.String value - ) { + public RoCrateBuilder addValuePairToContext(String key, String value) { this.metadataContext.addToContext(key, value); return this; } From 26d9bfde10c380b975b1cf6c0c049b16663dff61 Mon Sep 17 00:00:00 2001 From: Andreas Pfeil Date: Wed, 5 Nov 2025 16:28:06 +0100 Subject: [PATCH 42/51] fix(test): tests indicated misleadingly that we properly deserialize "hasPart" --- .../ro_crate/crate/HasPartTest.java | 34 ++++++++++--------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/src/test/java/edu/kit/datamanager/ro_crate/crate/HasPartTest.java b/src/test/java/edu/kit/datamanager/ro_crate/crate/HasPartTest.java index 0dcdece3..44f11cae 100644 --- a/src/test/java/edu/kit/datamanager/ro_crate/crate/HasPartTest.java +++ b/src/test/java/edu/kit/datamanager/ro_crate/crate/HasPartTest.java @@ -80,7 +80,7 @@ public void givenCrateWithFolder_whenAddingToFolder_thenConnectionExists() { } @Test - public void givenCrateFromDisk_whenAddingToFolder_thenConnectionExists( + public void givenCrateWithFolderWithFile_whenReadingFromDisk_thenConnectionExists( @TempDir Path path ) throws IOException { // Given crate from disk @@ -89,20 +89,21 @@ public void givenCrateFromDisk_whenAddingToFolder_thenConnectionExists( .addDataEntity(new DataSetEntity.DataSetBuilder().setId(folderId).build()) .build(); - Writers.newFolderWriter().save(this.crate, path.toString()); - this.crate = Readers.newFolderReader().readCrate(path.toString()); - // When adding entity to folder String dataId = "d"; FileEntity d = new FileEntity.FileEntityBuilder() .setId(dataId) .build(); this.crate.addDataEntity(d, folderId); + + Writers.newFolderWriter().save(this.crate, path.toString()); + Crate read = Readers.newFolderReader().readCrate(path.toString()); + // Then this connection exists // Note how the types are loaded when deserializing. Alternatively, you can find them in their properties. - assertTrue(this.crate.getDataEntityById(folderId).getTypes().contains("Dataset")); + assertTrue(read.getDataEntityById(folderId).getTypes().contains("Dataset")); // Note how you can cast an entity to a dataSetEntity. - assertTrue(this.crate.getDataSetById(folderId).orElseThrow().hasPart(dataId)); + assertTrue(read.getDataSetById(folderId).orElseThrow().hasPart(dataId)); } } @@ -168,27 +169,28 @@ public void givenCrateWithFolder_whenAddingToFolder_thenConnectionExists() { } @Test - public void givenCrateFromDisk_whenAddingToFolder_thenConnectionExists( + public void givenCrateWithFolderWithFile_whenReadingFromDisk_thenConnectionExists( @TempDir Path path ) throws IOException { // Given crate from disk String folderId = "./folder/"; - Crate crate = this.builder.addDataEntity( - new DataSetEntity.DataSetBuilder() - .setId(folderId) - .build() - ) - .build(); + this.builder.addDataEntity( + new DataSetEntity.DataSetBuilder() + .setId(folderId) + .build() + ); - Writers.newFolderWriter().save(crate, path.toString()); - Crate read = Readers.newFolderReader().readCrate(path.toString()); // When adding entity to folder String dataId = "d"; FileEntity d = new FileEntity.FileEntityBuilder() .setId(dataId) .build(); - read.addDataEntity(d, folderId); + this.builder.addDataEntity(d, folderId); + + Writers.newFolderWriter().save(this.builder.build(), path.toString()); + Crate read = Readers.newFolderReader().readCrate(path.toString()); + // Then this connection exists // Note how the types are loaded when deserializing. Alternatively, you can find them in their properties. assertTrue(read.getDataEntityById(folderId).getTypes().contains("Dataset")); From c07728dd2220fecbd0729a0e483b4b569cc9a156 Mon Sep 17 00:00:00 2001 From: Andreas Pfeil Date: Wed, 5 Nov 2025 16:31:06 +0100 Subject: [PATCH 43/51] fix: deserialize hasPart for DataSetEntitys into member field --- .../ro_crate/entities/data/DataSetEntity.java | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/main/java/edu/kit/datamanager/ro_crate/entities/data/DataSetEntity.java b/src/main/java/edu/kit/datamanager/ro_crate/entities/data/DataSetEntity.java index 31c2fb7a..7f711efe 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/entities/data/DataSetEntity.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/entities/data/DataSetEntity.java @@ -1,8 +1,10 @@ package edu.kit.datamanager.ro_crate.entities.data; import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.annotation.JsonSerialize; +import com.fasterxml.jackson.databind.node.ObjectNode; import edu.kit.datamanager.ro_crate.entities.serializers.HasPartSerializer; import java.util.HashSet; @@ -97,6 +99,17 @@ public T addToHasPart(String dataEntity) { return self(); } + @Override + public T setAllUnsafe(ObjectNode properties) { + super.setAllUnsafe(properties); + JsonNode hasPart = properties.path("hasPart"); + this.hasPart.add(hasPart.asText()); + hasPart.valueStream().forEach( + value -> this.hasPart.add(value.asText()) + ); + return self(); + } + @Override public abstract DataSetEntity build(); } From a2c0350139f678cbd3b9ee72d48ce3a157c6fafd Mon Sep 17 00:00:00 2001 From: Andreas Pfeil Date: Wed, 5 Nov 2025 16:33:49 +0100 Subject: [PATCH 44/51] fix: avoid adding blank strings as hasPart IDs to DataSetEntitys --- .../ro_crate/entities/data/DataSetEntity.java | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/kit/datamanager/ro_crate/entities/data/DataSetEntity.java b/src/main/java/edu/kit/datamanager/ro_crate/entities/data/DataSetEntity.java index 7f711efe..64a78bba 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/entities/data/DataSetEntity.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/entities/data/DataSetEntity.java @@ -9,6 +9,7 @@ import java.util.HashSet; import java.util.Set; +import java.util.stream.Collectors; /** * A helping class for the creating of Data entities of type Dataset. @@ -35,7 +36,9 @@ public class DataSetEntity extends DataEntity { */ public DataSetEntity(AbstractDataSetBuilder entityBuilder) { super(entityBuilder); - this.hasPart = entityBuilder.hasPart; + this.hasPart = entityBuilder.hasPart.stream() + .filter(s -> !s.isBlank()) + .collect(Collectors.toSet()); this.addType(TYPE); } @@ -44,7 +47,9 @@ public void removeFromHasPart(String str) { } public void addToHasPart(String id) { - this.hasPart.add(id); + if (id != null && !id.isEmpty()) { + this.hasPart.add(id); + } } /** From 36a5c07c9312f622f19f176adaaf7be8df6a1fa4 Mon Sep 17 00:00:00 2001 From: Andreas Pfeil Date: Wed, 5 Nov 2025 16:37:27 +0100 Subject: [PATCH 45/51] docs: rephase DataSet related Javadocs more concise and informative --- .../datamanager/ro_crate/entities/data/DataSetEntity.java | 7 +++++-- .../edu/kit/datamanager/ro_crate/crate/HasPartTest.java | 3 ++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/kit/datamanager/ro_crate/entities/data/DataSetEntity.java b/src/main/java/edu/kit/datamanager/ro_crate/entities/data/DataSetEntity.java index 64a78bba..f6d5a160 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/entities/data/DataSetEntity.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/entities/data/DataSetEntity.java @@ -22,8 +22,11 @@ public class DataSetEntity extends DataEntity { public static final String TYPE = "Dataset"; /** - * The hasPart property is used to indicate that the described resource is a - * composite resource, and to point to the parts that it includes. + * Points to the parts of this dataset. + *

+ * This will be serialized to and deserialized from the "hasPart" property + * and exists for convenience to represent the additional capabilities of + * a DataSetEntity over a normal DataEntity. */ @JsonSerialize(using = HasPartSerializer.class) @JsonInclude(JsonInclude.Include.NON_EMPTY) diff --git a/src/test/java/edu/kit/datamanager/ro_crate/crate/HasPartTest.java b/src/test/java/edu/kit/datamanager/ro_crate/crate/HasPartTest.java index 44f11cae..49811fc7 100644 --- a/src/test/java/edu/kit/datamanager/ro_crate/crate/HasPartTest.java +++ b/src/test/java/edu/kit/datamanager/ro_crate/crate/HasPartTest.java @@ -18,7 +18,8 @@ import static org.junit.jupiter.api.Assertions.*; /** - * Test similar to AutomaticHierarchyRecognitionTest + * Tests related to the addDataEntity(DataEntity, String) method and the hasPart + * property. */ public class HasPartTest { From dc4f4773eb21f58b83b47ef70db5ff9af1dec8da Mon Sep 17 00:00:00 2001 From: Andreas Pfeil Date: Mon, 10 Nov 2025 11:34:44 +0100 Subject: [PATCH 46/51] refactor: rename hierarchy module and move tests accordingly --- src/main/java/edu/kit/datamanager/ro_crate/Crate.java | 4 ++-- src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java | 6 +++--- .../ro_crate/{crate => hierarchy}/HierarchyRecognition.java | 2 +- .../{crate => hierarchy}/HierarchyRecognitionConfig.java | 2 +- .../{crate => hierarchy}/HierarchyRecognitionResult.java | 2 +- .../HierarchyRecognitionTest.java} | 5 +++-- 6 files changed, 11 insertions(+), 10 deletions(-) rename src/main/java/edu/kit/datamanager/ro_crate/{crate => hierarchy}/HierarchyRecognition.java (99%) rename src/main/java/edu/kit/datamanager/ro_crate/{crate => hierarchy}/HierarchyRecognitionConfig.java (97%) rename src/main/java/edu/kit/datamanager/ro_crate/{crate => hierarchy}/HierarchyRecognitionResult.java (97%) rename src/test/java/edu/kit/datamanager/ro_crate/{crate/AutomaticHierarchyRecognitionTest.java => hierarchy/HierarchyRecognitionTest.java} (99%) diff --git a/src/main/java/edu/kit/datamanager/ro_crate/Crate.java b/src/main/java/edu/kit/datamanager/ro_crate/Crate.java index f1cd3c65..89428d0b 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/Crate.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/Crate.java @@ -7,8 +7,8 @@ import java.util.Set; import edu.kit.datamanager.ro_crate.context.CrateMetadataContext; -import edu.kit.datamanager.ro_crate.crate.HierarchyRecognitionConfig; -import edu.kit.datamanager.ro_crate.crate.HierarchyRecognitionResult; +import edu.kit.datamanager.ro_crate.hierarchy.HierarchyRecognitionConfig; +import edu.kit.datamanager.ro_crate.hierarchy.HierarchyRecognitionResult; import edu.kit.datamanager.ro_crate.entities.AbstractEntity; import edu.kit.datamanager.ro_crate.entities.contextual.ContextualEntity; import edu.kit.datamanager.ro_crate.entities.data.DataEntity; diff --git a/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java b/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java index 59fa360b..cb960b4a 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/RoCrate.java @@ -6,9 +6,9 @@ import com.fasterxml.jackson.databind.node.ObjectNode; import edu.kit.datamanager.ro_crate.context.CrateMetadataContext; import edu.kit.datamanager.ro_crate.context.RoCrateMetadataContext; -import edu.kit.datamanager.ro_crate.crate.HierarchyRecognition; -import edu.kit.datamanager.ro_crate.crate.HierarchyRecognitionConfig; -import edu.kit.datamanager.ro_crate.crate.HierarchyRecognitionResult; +import edu.kit.datamanager.ro_crate.hierarchy.HierarchyRecognition; +import edu.kit.datamanager.ro_crate.hierarchy.HierarchyRecognitionConfig; +import edu.kit.datamanager.ro_crate.hierarchy.HierarchyRecognitionResult; import edu.kit.datamanager.ro_crate.entities.AbstractEntity; import edu.kit.datamanager.ro_crate.entities.contextual.ContextualEntity; import edu.kit.datamanager.ro_crate.entities.contextual.JsonDescriptor; diff --git a/src/main/java/edu/kit/datamanager/ro_crate/crate/HierarchyRecognition.java b/src/main/java/edu/kit/datamanager/ro_crate/hierarchy/HierarchyRecognition.java similarity index 99% rename from src/main/java/edu/kit/datamanager/ro_crate/crate/HierarchyRecognition.java rename to src/main/java/edu/kit/datamanager/ro_crate/hierarchy/HierarchyRecognition.java index 5c2c4bd2..46a278c7 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/crate/HierarchyRecognition.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/hierarchy/HierarchyRecognition.java @@ -1,4 +1,4 @@ -package edu.kit.datamanager.ro_crate.crate; +package edu.kit.datamanager.ro_crate.hierarchy; import edu.kit.datamanager.ro_crate.Crate; import edu.kit.datamanager.ro_crate.entities.data.DataEntity; diff --git a/src/main/java/edu/kit/datamanager/ro_crate/crate/HierarchyRecognitionConfig.java b/src/main/java/edu/kit/datamanager/ro_crate/hierarchy/HierarchyRecognitionConfig.java similarity index 97% rename from src/main/java/edu/kit/datamanager/ro_crate/crate/HierarchyRecognitionConfig.java rename to src/main/java/edu/kit/datamanager/ro_crate/hierarchy/HierarchyRecognitionConfig.java index ce602e5e..c37372d0 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/crate/HierarchyRecognitionConfig.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/hierarchy/HierarchyRecognitionConfig.java @@ -1,4 +1,4 @@ -package edu.kit.datamanager.ro_crate.crate; +package edu.kit.datamanager.ro_crate.hierarchy; /** * Configuration class for automatic hierarchy recognition. diff --git a/src/main/java/edu/kit/datamanager/ro_crate/crate/HierarchyRecognitionResult.java b/src/main/java/edu/kit/datamanager/ro_crate/hierarchy/HierarchyRecognitionResult.java similarity index 97% rename from src/main/java/edu/kit/datamanager/ro_crate/crate/HierarchyRecognitionResult.java rename to src/main/java/edu/kit/datamanager/ro_crate/hierarchy/HierarchyRecognitionResult.java index ad6708db..ce4eab50 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/crate/HierarchyRecognitionResult.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/hierarchy/HierarchyRecognitionResult.java @@ -1,4 +1,4 @@ -package edu.kit.datamanager.ro_crate.crate; +package edu.kit.datamanager.ro_crate.hierarchy; import edu.kit.datamanager.ro_crate.entities.data.DataEntity; import edu.kit.datamanager.ro_crate.entities.data.DataSetEntity; diff --git a/src/test/java/edu/kit/datamanager/ro_crate/crate/AutomaticHierarchyRecognitionTest.java b/src/test/java/edu/kit/datamanager/ro_crate/hierarchy/HierarchyRecognitionTest.java similarity index 99% rename from src/test/java/edu/kit/datamanager/ro_crate/crate/AutomaticHierarchyRecognitionTest.java rename to src/test/java/edu/kit/datamanager/ro_crate/hierarchy/HierarchyRecognitionTest.java index ac10fac6..e4c82e36 100644 --- a/src/test/java/edu/kit/datamanager/ro_crate/crate/AutomaticHierarchyRecognitionTest.java +++ b/src/test/java/edu/kit/datamanager/ro_crate/hierarchy/HierarchyRecognitionTest.java @@ -1,4 +1,4 @@ -package edu.kit.datamanager.ro_crate.crate; +package edu.kit.datamanager.ro_crate.hierarchy; import static org.junit.jupiter.api.Assertions.*; @@ -7,13 +7,14 @@ import edu.kit.datamanager.ro_crate.entities.data.DataSetEntity; import edu.kit.datamanager.ro_crate.entities.data.FileEntity; import java.nio.file.Paths; + import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; /** * Automatic Hierarchy Recognition - API Tests & Usage Examples */ -public class AutomaticHierarchyRecognitionTest { +public class HierarchyRecognitionTest { private RoCrate crate; From eef989c58be857a2d01afc55f5523caecde88709 Mon Sep 17 00:00:00 2001 From: Andreas Pfeil Date: Mon, 10 Nov 2025 11:50:14 +0100 Subject: [PATCH 47/51] deprecate: announce deprecation of unsafe addFromCollection method We still use it internally, but I plan to replace it at some point in future. We should be able to generate at least basically valid entities from external providers. And if not we can still work around this with a custom, private subclass of RoCrate. --- src/main/java/edu/kit/datamanager/ro_crate/Crate.java | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/main/java/edu/kit/datamanager/ro_crate/Crate.java b/src/main/java/edu/kit/datamanager/ro_crate/Crate.java index 89428d0b..652ccdcc 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/Crate.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/Crate.java @@ -151,6 +151,15 @@ public interface Crate { void setUntrackedFiles(Collection files); + /** + * Unsafely adds a collection of entities to the crate. + *

+ * WARNING: This method does not perform any checks and may lead to an inconsistent crate state. + * + * @param entities the collection of entities to add + * @deprecated use individual add methods to ensure crate consistency. If you really need an unchecked method, consider creating a subclass or contact us at our issue tracker so we can discuss replacements before removal. + */ + @Deprecated(forRemoval = true) void addFromCollection(Collection entities); void addItemFromDataCite(String locationUrl); From c23df4f72f871a81b693bd949385e728c0c6e099 Mon Sep 17 00:00:00 2001 From: Andreas Pfeil Date: Mon, 10 Nov 2025 12:07:16 +0100 Subject: [PATCH 48/51] docs: add package documentation for the hierarchy recognition feature --- .../kit/datamanager/ro_crate/hierarchy/package-info.java | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 src/main/java/edu/kit/datamanager/ro_crate/hierarchy/package-info.java diff --git a/src/main/java/edu/kit/datamanager/ro_crate/hierarchy/package-info.java b/src/main/java/edu/kit/datamanager/ro_crate/hierarchy/package-info.java new file mode 100644 index 00000000..5d8f119f --- /dev/null +++ b/src/main/java/edu/kit/datamanager/ro_crate/hierarchy/package-info.java @@ -0,0 +1,9 @@ +/** + * This package contains classes to enable automatic recognition and construction of + * file and folder hierarchies within RO-Crates. + *

+ * The main functionality is provided by the {@link edu.kit.datamanager.ro_crate.hierarchy.HierarchyRecognition} + * class, which analyzes the paths of data entities and establishes appropriate + * "hasPart" and "isPartOf" relationships to reflect the underlying file system structure. + */ +package edu.kit.datamanager.ro_crate.hierarchy; \ No newline at end of file From 59f10e16fabb25582da9375c0b3bb2ee428f3057 Mon Sep 17 00:00:00 2001 From: Andreas Pfeil Date: Mon, 10 Nov 2025 12:30:22 +0100 Subject: [PATCH 49/51] fix: only add entity types which are not null AND not empty Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- .../edu/kit/datamanager/ro_crate/entities/AbstractEntity.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/kit/datamanager/ro_crate/entities/AbstractEntity.java b/src/main/java/edu/kit/datamanager/ro_crate/entities/AbstractEntity.java index f8823a40..e978b199 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/entities/AbstractEntity.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/entities/AbstractEntity.java @@ -443,7 +443,7 @@ public T addType(String type) { if (this.types == null) { this.types = new HashSet<>(); } - if (type != null || !type.isEmpty()) { + if (type != null && !type.isEmpty()) { this.types.add(type); } return self(); From ee6d01481ff452ca758dfa405934c8dbeaeba937 Mon Sep 17 00:00:00 2001 From: Andreas Pfeil Date: Mon, 10 Nov 2025 12:43:28 +0100 Subject: [PATCH 50/51] fix: avoid reading empty strings to hasPart --- .../ro_crate/entities/data/DataSetEntity.java | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/kit/datamanager/ro_crate/entities/data/DataSetEntity.java b/src/main/java/edu/kit/datamanager/ro_crate/entities/data/DataSetEntity.java index f6d5a160..8030c04c 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/entities/data/DataSetEntity.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/entities/data/DataSetEntity.java @@ -111,10 +111,16 @@ public T addToHasPart(String dataEntity) { public T setAllUnsafe(ObjectNode properties) { super.setAllUnsafe(properties); JsonNode hasPart = properties.path("hasPart"); - this.hasPart.add(hasPart.asText()); - hasPart.valueStream().forEach( - value -> this.hasPart.add(value.asText()) - ); + String txt = hasPart.asText(); + if (!txt.isBlank()) { + this.hasPart.add(txt); + } + hasPart.valueStream() + .map(JsonNode::asText) + .filter(value -> !value.isBlank()) + .forEach( + value -> this.hasPart.add(value) + ); return self(); } From 6a21367d1fa440893a257e7c747697063c99def8 Mon Sep 17 00:00:00 2001 From: Andreas Pfeil Date: Mon, 10 Nov 2025 12:54:10 +0100 Subject: [PATCH 51/51] fix: error in local file path detection --- .../edu/kit/datamanager/ro_crate/util/FileSystemUtil.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/kit/datamanager/ro_crate/util/FileSystemUtil.java b/src/main/java/edu/kit/datamanager/ro_crate/util/FileSystemUtil.java index ca6756f7..82c39529 100644 --- a/src/main/java/edu/kit/datamanager/ro_crate/util/FileSystemUtil.java +++ b/src/main/java/edu/kit/datamanager/ro_crate/util/FileSystemUtil.java @@ -21,10 +21,10 @@ private FileSystemUtil() { * @return true if it looks like a file path, false otherwise */ public static boolean isFilePath(String id) { - return !( - id.startsWith("doi:") && - id.startsWith("http") && - id.startsWith("https") + return id != null && !( + id.startsWith("doi:") || + id.startsWith("http://") || + id.startsWith("https://") ); }