diff --git a/.github/workflows/validate-xml.yml b/.github/workflows/validate-xml.yml new file mode 100644 index 00000000..67479ace --- /dev/null +++ b/.github/workflows/validate-xml.yml @@ -0,0 +1,28 @@ +name: Validate TEI XML + +on: + push: + branches: [master] + workflow_dispatch: + +jobs: + validate: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Install xmlint + run: sudo apt-get install -y libxml2-utils + + - name: Run validation script + run: ./tests/test_valid_xml.sh + + - name: Commit badge if changed + run: | + git config --global user.name "GitHub Action" + git config --global user.email "action@github.com" + git add tests/xml_validation_badge.svg + git diff --cached --quiet || git commit -m "Update XML validation badge" + git push diff --git a/README.md b/README.md index 0b9063af..99b8f01e 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ # Coptic Scriptorium - Corpora +![XML Validation](./tests/xml_validation_badge.svg) + This is the public repository for Coptic SCRIPTORIUM corpora. The documents are available in multiple formats: CoNLL-U, relANNIS, PAULA XML, TEI XML, and TreeTagger SGML (`*.tt`). The `*.tt` files generally contain the most complete representations of document annotations, though note that corpus level metadata is only included in the PAULA XML and relANNIS versions. Corpora can be searched, viewed, and queried with complex queries http://data.copticscriptorium.org. Project homepage is http://copticscriptorium.org diff --git a/tests/tei_validation_badge.svg b/tests/tei_validation_badge.svg new file mode 100644 index 00000000..69323a08 --- /dev/null +++ b/tests/tei_validation_badge.svg @@ -0,0 +1,15 @@ + + + + + + + + + + + validation + 95% + + diff --git a/tests/test_valid_xml.sh b/tests/test_valid_xml.sh new file mode 100755 index 00000000..329b997b --- /dev/null +++ b/tests/test_valid_xml.sh @@ -0,0 +1,77 @@ +#!/bin/bash + +# Get the directory of the current script +TEST_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# Get the parent directory of the script directory +BASE_DIR="$(dirname "$TEST_DIR")" + +# Initialize counters +total=0 +valid=0 + +# Loop over XML files +for file in $(find $BASE_DIR -name '*.xml'); do + if [ -f "$file" ]; then + ((total++)) + + # TODO Use the TEI schema to validate the XML file + # if xmllint --noout --schema "$SCHEMA" "$file" 2>/dev/null; then + + # Validate using xmllint + if xmllint --noout "$file" 2>/dev/null; then + ((valid++)) + else + echo "$file: invalid" + fi + fi +done + +# Calculate and print the percentage of valid files +if [ $total -eq 0 ]; then + echo "No XML files found." +else + percentage=$((100 * valid / total)) + echo "Valid XML files: $valid/$total ($percentage%)" +fi + +# Set badge color based on percentage +if [ $percentage -ge 99 ]; then + color="#4c1" # green +elif [ $percentage -ge 90 ]; then + color="#dfb317" # yellow +else + color="#e05d44" # red +fi + +# Define label and message +label="XML Valid" +message="${percentage}%" + +# Calculate dynamic widths +label_width=70 +message_width=$(( ${#message} * 7 + 20 )) # dynamic width based on text length +total_width=$((label_width + message_width)) + +# Generate the badge SVG +cat < "$TEST_DIR/xml_validation_badge.svg" + + $label: $message + + + + + + + + + + + + + + $label + $message + + +EOF diff --git a/tests/xml_validation_badge.svg b/tests/xml_validation_badge.svg new file mode 100644 index 00000000..88dac79a --- /dev/null +++ b/tests/xml_validation_badge.svg @@ -0,0 +1,19 @@ + + XML Valid: 95% + + + + + + + + + + + + + + XML Valid + 95% + +