diff --git a/.github/workflows/ci-build-manual-onnx.yml b/.github/workflows/ci-build-manual-onnx.yml new file mode 100644 index 0000000000..dd03ee2903 --- /dev/null +++ b/.github/workflows/ci-build-manual-onnx.yml @@ -0,0 +1,79 @@ +name: Build and push an ONNX docker image + +# This workflow builds the lightweight ONNX/Wapiti-only Docker image +# (no Python/DeLFT/TensorFlow dependencies) + +on: + push: + branches: + - feature/onnx-models + workflow_dispatch: + inputs: + custom_tag: + type: string + description: Docker image tag + required: true + default: "latest-onnx" + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v5 + with: + fetch-tags: true + fetch-depth: 0 + - name: Set up JDK 21 + uses: actions/setup-java@v5 + with: + java-version: '21' + distribution: 'temurin' + cache: 'gradle' + - name: Build with Gradle + run: ./gradlew build -x test + + docker-build-onnx: + needs: [ build ] + runs-on: ubuntu-latest + + steps: + - name: Create more disk space + run: sudo rm -rf /usr/share/dotnet && sudo rm -rf /opt/ghc && sudo rm -rf "/usr/local/share/boost" && sudo rm -rf "$AGENT_TOOLSDIRECTORY" + - uses: actions/checkout@v5 + - name: Build and push + id: docker_build + uses: mr-smithers-excellent/docker-build-push@v5 + with: + username: ${{ secrets.DOCKERHUB_USERNAME_LFOPPIANO }} + password: ${{ secrets.DOCKERHUB_TOKEN_LFOPPIANO }} + image: lfoppiano/grobid + registry: docker.io + pushImage: true + tags: latest-onnx, ${{ github.event.inputs.custom_tag || github.sha }} + dockerfile: Dockerfile.onnx + - name: Image digest + run: echo ${{ steps.docker_build.outputs.digest }} + - name: Docker Image Summary + run: | + echo "## 🐳 Docker Image Uploaded Successfully" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "**Image Details:**" >> $GITHUB_STEP_SUMMARY + echo "- **Registry:** docker.io" >> $GITHUB_STEP_SUMMARY + echo "- **Image:** lfoppiano/grobid" >> $GITHUB_STEP_SUMMARY + echo "- **Type:** ONNX/Wapiti only (lightweight, no Python/DeLFT)" >> $GITHUB_STEP_SUMMARY + echo "- **Tags:**" >> $GITHUB_STEP_SUMMARY + echo " - \`latest-onnx\`" >> $GITHUB_STEP_SUMMARY + echo " - \`${{ github.event.inputs.custom_tag || github.sha }}\`" >> $GITHUB_STEP_SUMMARY + echo "- **Digest:** \`${{ steps.docker_build.outputs.digest }}\`" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "**Features:**" >> $GITHUB_STEP_SUMMARY + echo "- ONNX Runtime for deep learning models (CPU only)" >> $GITHUB_STEP_SUMMARY + echo "- Wapiti CRF for traditional models" >> $GITHUB_STEP_SUMMARY + echo "- No Python, TensorFlow, or DeLFT dependencies" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "**Usage:**" >> $GITHUB_STEP_SUMMARY + echo "\`\`\`bash" >> $GITHUB_STEP_SUMMARY + echo "docker pull lfoppiano/grobid:latest-onnx" >> $GITHUB_STEP_SUMMARY + echo "docker run -t --rm --init -p 8070:8070 -p 8071:8071 lfoppiano/grobid:latest-onnx" >> $GITHUB_STEP_SUMMARY + echo "\`\`\`" >> $GITHUB_STEP_SUMMARY diff --git a/.gitignore b/.gitignore index 35526b5247..498d6a05cd 100644 --- a/.gitignore +++ b/.gitignore @@ -89,3 +89,5 @@ Dockerfile.dataseer Dockerfile.software Dockerfile.datastet .run + +.kotlin \ No newline at end of file diff --git a/Dockerfile.crf b/Dockerfile.crf index 2199240af8..5982cc37d1 100644 --- a/Dockerfile.crf +++ b/Dockerfile.crf @@ -49,8 +49,9 @@ RUN rm -rf grobid-home/lib/lin-32 RUN rm -rf grobid-home/lib/win-* RUN rm -rf grobid-home/lib/mac-64 -# cleaning Delft models -RUN rm -rf grobid-home/models/*-BidLSTM_CRF* +# cleaning Delft and ONNX models +RUN rm -rf grobid-home/models/*-BidLSTM* +RUN rm -rf grobid-home/models/*.onnx ENV GROBID_SERVICE_OPTS="-Djava.library.path=grobid-home/lib/lin-64:grobid-home/lib/lin-64/jep" diff --git a/Dockerfile.onnx b/Dockerfile.onnx new file mode 100644 index 0000000000..764ebf9d1e --- /dev/null +++ b/Dockerfile.onnx @@ -0,0 +1,117 @@ +## Docker GROBID image using ONNX models and/or Wapiti CRF models +## This is a lightweight image without Python/TensorFlow/DeLFT/JEP dependencies +## Uses ONNX Runtime (CPU only) + +## See https://grobid.readthedocs.io/en/latest/Grobid-docker/ + +## usage example with version 0.8.0: +## docker build -t grobid/grobid:0.8.0-onnx --build-arg GROBID_VERSION=0.8.0 --file Dockerfile.onnx . + +## run: +## docker run -t --rm --init -p 8070:8070 -p 8071:8071 grobid/grobid:0.8.0-onnx + +# ------------------- +# build builder image +# ------------------- + +FROM eclipse-temurin:21-jdk AS builder + +USER root + +RUN apt-get update && \ + apt-get -y upgrade && \ + apt-get -y --no-install-recommends install unzip git python3 python3-pip + +WORKDIR /opt/grobid-source + +# gradle +COPY gradle/ ./gradle/ +COPY gradlew ./ +COPY gradle.properties ./ +COPY build.gradle ./ +COPY settings.gradle ./ + +# git +COPY .git/ ./.git + +# source +COPY grobid-home/ ./grobid-home/ +COPY grobid-core/ ./grobid-core/ +COPY grobid-service/ ./grobid-service/ +COPY grobid-trainer/ ./grobid-trainer/ + +# cleaning unused native libraries before packaging +RUN rm -rf grobid-home/pdf2xml +RUN rm -rf grobid-home/pdfalto/lin-32 +RUN rm -rf grobid-home/pdfalto/mac-64 +RUN rm -rf grobid-home/pdfalto/mac_arm-64 +RUN rm -rf grobid-home/pdfalto/win-* +RUN rm -rf grobid-home/lib/lin-32 +RUN rm -rf grobid-home/lib/win-* +RUN rm -rf grobid-home/lib/mac-64 +RUN rm -rf grobid-home/lib/lin-64/jep + +# Use ONNX configuration (no DeLFT models) +RUN rm grobid-home/config/grobid.yaml && \ + mv grobid-home/config/grobid-onnx.yaml grobid-home/config/grobid.yaml + +RUN ./gradlew clean assemble --no-daemon --info --stacktrace + +# Preload embeddings in raw float32 format for ONNX inference +# Using standalone script that doesn't require DeLFT +RUN pip3 install --no-cache-dir --break-system-packages lmdb requests +COPY grobid-home/scripts/preload_embeddings_standalone.py . +COPY grobid-home/config/resources-registry.json . +RUN python3 preload_embeddings_standalone.py --registry ./resources-registry.json + +WORKDIR /opt/grobid +RUN unzip -o /opt/grobid-source/grobid-service/build/distributions/grobid-service-*.zip && \ + mv grobid-service* grobid-service +RUN unzip -o /opt/grobid-source/grobid-home/build/distributions/grobid-home-*.zip && \ + chmod -R 755 /opt/grobid/grobid-home/pdfalto + +# Move preloaded embeddings to final location +RUN mkdir -p /opt/grobid/data/db && \ + mv /opt/grobid-source/data/db/* /opt/grobid/data/db/ + +# ------------------- +# build runtime image +# ------------------- + +FROM eclipse-temurin:21-jre + +# setting locale +ENV LANG=C.UTF-8 + +# Install minimal runtime dependencies +RUN apt-get update && \ + apt-get -y --no-install-recommends install \ + libxml2 libfontconfig \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /opt/grobid + +COPY --from=builder /opt/grobid . + +# Add Tini +ENV TINI_VERSION=v0.19.0 +ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /tini +RUN chmod +x /tini +ENTRYPOINT ["/tini", "-s", "--"] + +WORKDIR /opt/grobid + +ENV JAVA_OPTS="-Xmx4g --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED" + + +CMD ["./grobid-service/bin/grobid-service"] + +ARG GROBID_VERSION + +LABEL \ + authors="The contributors" \ + org.label-schema.name="GROBID" \ + org.label-schema.description="Image with GROBID service (ONNX/Wapiti only, no DeLFT)" \ + org.label-schema.url="https://github.com/kermitt2/grobid" \ + org.label-schema.version=${GROBID_VERSION} diff --git a/build.gradle b/build.gradle index 7311b05eb9..bb778ba54a 100644 --- a/build.gradle +++ b/build.gradle @@ -24,7 +24,7 @@ def getJavaLibraryPath = { } } else if (Os.isFamily(Os.FAMILY_UNIX)) { jepLocalLibraries = "${rootProject.rootDir.getAbsolutePath()}/grobid-home/lib/lin-64/jep:" + - "${rootProject.rootDir.getAbsolutePath()}/grobid-home/lib/lin-64" + "${rootProject.rootDir.getAbsolutePath()}/grobid-home/lib/lin-64" } else { throw new RuntimeException("Unsupported platform!") } @@ -210,15 +210,22 @@ subprojects { if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) { jvmArgs "--add-opens", "java.base/java.util.stream=ALL-UNNAMED", - "--add-opens", "java.base/java.io=ALL-UNNAMED", - "--add-opens", "java.xml/jdk.xml.internal=ALL-UNNAMED", - // PowerMock compatibility with Java 21 - TODO: Replace PowerMock with Mockito/JUnit Jupiter - "--add-opens", "java.base/java.lang=ALL-UNNAMED", - "--add-opens", "java.base/java.util=ALL-UNNAMED" + "--add-opens", "java.base/java.io=ALL-UNNAMED", + "--add-opens", "java.xml/jdk.xml.internal=ALL-UNNAMED", + // PowerMock compatibility with Java 21 - TODO: Replace PowerMock with Mockito/JUnit Jupiter + "--add-opens", "java.base/java.lang=ALL-UNNAMED", + "--add-opens", "java.base/java.util=ALL-UNNAMED" } - + systemProperty "java.library.path", "${javaLibraryPath}" } + + tasks.withType(Test).configureEach { + // Needed by some native libs (e.g. onnxruntime) under Java 17+ / 21 when + // accessing direct buffer internals. + jvmArgs '--add-opens=java.base/java.nio=ALL-UNNAMED', + '--add-opens=java.base/sun.nio.ch=ALL-UNNAMED' // Required for LMDB + } } /** SUBPROJECTS **/ @@ -259,6 +266,21 @@ project("grobid-core") { implementation 'org.apache.opennlp:opennlp-tools:1.9.4' implementation group: 'org.jruby', name: 'jruby-complete', version: '9.4.12.1' + // ONNX Runtime for model inference + // GPU version (onnxruntime_gpu) only supports Linux with NVIDIA CUDA + // CPU version (onnxruntime) supports all platforms including macOS ARM64 + // if (System.getProperty("os.name").toLowerCase().contains("linux")) { + // implementation 'com.microsoft.onnxruntime:onnxruntime_gpu:1.23.2' + // } else { + implementation 'com.microsoft.onnxruntime:onnxruntime:1.23.2' + // } + + // LMDB for word embeddings lookup (0.9.0+ includes Apple Silicon native libs) + implementation 'org.lmdbjava:lmdbjava:0.9.2' + + // JSON parsing (for CRF params and vocab) + implementation 'com.google.code.gson:gson:2.10.1' + shadedLib "org.apache.lucene:lucene-analyzers-common:4.5.1" } @@ -299,18 +321,18 @@ project("grobid-core") { processResources { filesMatching(["grobid-version.txt", "grobid-revision.txt"]) { expand( - project_version: project.property('version') ?: "unknown", - project_revision: rootProject.ext.gitRevision + project_version: project.property('version') ?: "unknown", + project_revision: rootProject.ext.gitRevision ) } } tasks.register( - "install", - { - dependsOn publishToMavenLocal - dependsOn 'shadowJar' - } + "install", + { + dependsOn publishToMavenLocal + dependsOn 'shadowJar' + } ) } @@ -336,8 +358,8 @@ project("grobid-home") { } } + import org.apache.tools.ant.taskdefs.condition.Os -import org.jetbrains.kotlin.gradle.tasks.KotlinCompile project(":grobid-service") { apply plugin: 'application' @@ -353,7 +375,9 @@ project(":grobid-service") { def javaLibraryPath = getJavaLibraryPath() if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) { - jvmArgs "--add-opens", "java.base/java.lang=ALL-UNNAMED" + jvmArgs "--add-opens", "java.base/java.lang=ALL-UNNAMED", + "--add-opens", "java.base/java.nio=ALL-UNNAMED", // Required for LMDB + "--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED" // Required for LMDB } workingDir = rootProject.rootDir @@ -490,32 +514,32 @@ project(":grobid-trainer") { } def trainerTasks = [ - "train_name_header" : "org.grobid.trainer.NameHeaderTrainer", - "train_name_citation" : "org.grobid.trainer.NameCitationTrainer", - "train_affiliation_address" : "org.grobid.trainer.AffiliationAddressTrainer", - "train_shorttext" : "org.grobid.trainer.ShorttextTrainer", - "train_figure" : "org.grobid.trainer.FigureTrainer", - "train_table" : "org.grobid.trainer.TableTrainer", - "train_citation" : "org.grobid.trainer.CitationTrainer", - "train_date" : "org.grobid.trainer.DateTrainer", - "train_reference_segmentation" : "org.grobid.trainer.ReferenceSegmenterTrainer", - "train_ebook_model" : "org.grobid.trainer.EbookTrainer", - "train_patent_citation" : "org.grobid.trainer.PatentParserTrainer", - "train_funding_acknowledgement": "org.grobid.trainer.FundingAcknowledgementTrainer" + "train_name_header" : "org.grobid.trainer.NameHeaderTrainer", + "train_name_citation" : "org.grobid.trainer.NameCitationTrainer", + "train_affiliation_address" : "org.grobid.trainer.AffiliationAddressTrainer", + "train_shorttext" : "org.grobid.trainer.ShorttextTrainer", + "train_figure" : "org.grobid.trainer.FigureTrainer", + "train_table" : "org.grobid.trainer.TableTrainer", + "train_citation" : "org.grobid.trainer.CitationTrainer", + "train_date" : "org.grobid.trainer.DateTrainer", + "train_reference_segmentation" : "org.grobid.trainer.ReferenceSegmenterTrainer", + "train_ebook_model" : "org.grobid.trainer.EbookTrainer", + "train_patent_citation" : "org.grobid.trainer.PatentParserTrainer", + "train_funding_acknowledgement": "org.grobid.trainer.FundingAcknowledgementTrainer" ] def complexTrainerTasks = [ - "train_header" : ["org.grobid.trainer.HeaderTrainer", ""], - "train_header_article_light" : ["org.grobid.trainer.HeaderTrainer", "article/light"], - "train_header_article_light_ref" : ["org.grobid.trainer.HeaderTrainer", "article/light-ref"], - "train_header_ietf" : ["org.grobid.trainer.HeaderTrainer", "sdo/ietf"], - "train_segmentation" : ["org.grobid.trainer.SegmentationTrainer", ""], - "train_segmentation_article_light" : ["org.grobid.trainer.SegmentationTrainer", "article/light"], - "train_segmentation_article_light_ref": ["org.grobid.trainer.SegmentationTrainer", "article/light-ref"], - "train_segmentation_ietf" : ["org.grobid.trainer.SegmentationTrainer", "sdo/ietf"], - "train_fulltext" : ["org.grobid.trainer.FulltextTrainer", ""], - "train_fulltext_article_light" : ["org.grobid.trainer.FulltextTrainer", "article/light"], - "train_fulltext_article_light_ref" : ["org.grobid.trainer.FulltextTrainer", "article/light-ref"], + "train_header" : ["org.grobid.trainer.HeaderTrainer", ""], + "train_header_article_light" : ["org.grobid.trainer.HeaderTrainer", "article/light"], + "train_header_article_light_ref" : ["org.grobid.trainer.HeaderTrainer", "article/light-ref"], + "train_header_ietf" : ["org.grobid.trainer.HeaderTrainer", "sdo/ietf"], + "train_segmentation" : ["org.grobid.trainer.SegmentationTrainer", ""], + "train_segmentation_article_light" : ["org.grobid.trainer.SegmentationTrainer", "article/light"], + "train_segmentation_article_light_ref": ["org.grobid.trainer.SegmentationTrainer", "article/light-ref"], + "train_segmentation_ietf" : ["org.grobid.trainer.SegmentationTrainer", "sdo/ietf"], + "train_fulltext" : ["org.grobid.trainer.FulltextTrainer", ""], + "train_fulltext_article_light" : ["org.grobid.trainer.FulltextTrainer", "article/light"], + "train_fulltext_article_light_ref" : ["org.grobid.trainer.FulltextTrainer", "article/light-ref"], ] def libraries = "" @@ -527,7 +551,7 @@ project(":grobid-trainer") { } } else if (Os.isFamily(Os.FAMILY_UNIX)) { libraries = "${file("../grobid-home/lib/lin-64/jep").absolutePath}:" + - "${file("../grobid-home/lib/lin-64").absolutePath}:" + "${file("../grobid-home/lib/lin-64").absolutePath}:" } else { throw new RuntimeException("Unsupported platform!") } @@ -577,12 +601,18 @@ project(":grobid-trainer") { classpath = sourceSets.main.runtimeClasspath args 'nlm', getArg('p2t', '.'), getArg('run', '0'), getArg('fileRatio', '1.0'), getArg('flavor', '') def javaLibraryPath = getJavaLibraryPath() + // Add additional lib paths for native libraries (LMDB) + def lmdbLibPath = Os.isFamily(Os.FAMILY_MAC) ? + ":/opt/homebrew/lib:/usr/local/lib" : + Os.isFamily(Os.FAMILY_UNIX) ? ":/usr/lib/x86_64-linux-gnu:/usr/lib64:/usr/local/lib" : "" if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) { - jvmArgs '-Xmx3072m', "--add-opens", "java.base/java.lang=ALL-UNNAMED" + jvmArgs '-Xmx3072m', "--add-opens", "java.base/java.lang=ALL-UNNAMED", + "--add-opens", "java.base/java.nio=ALL-UNNAMED", // Required for LMDB + "--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED" // Required for LMDB } else { jvmArgs '-Xmx3072m' } - systemProperty "java.library.path", "${javaLibraryPath}" + systemProperty "java.library.path", "${javaLibraryPath}${lmdbLibPath}" } task(teiEval, dependsOn: 'classes', type: JavaExec, group: 'modelevaluation') { @@ -590,13 +620,18 @@ project(":grobid-trainer") { classpath = sourceSets.main.runtimeClasspath args 'tei', getArg('p2t', '.'), getArg('run', '0'), getArg('fileRatio', '1.0'), getArg('flavor', '') def javaLibraryPath = getJavaLibraryPath() - + // Add additional lib paths for native libraries (LMDB) + def lmdbLibPath = Os.isFamily(Os.FAMILY_MAC) ? + ":/opt/homebrew/lib:/usr/local/lib" : + Os.isFamily(Os.FAMILY_UNIX) ? ":/usr/lib/x86_64-linux-gnu:/usr/lib64:/usr/local/lib" : "" if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) { - jvmArgs '-Xmx3072m', "--add-opens", "java.base/java.lang=ALL-UNNAMED" + jvmArgs '-Xmx3072m', "--add-opens", "java.base/java.lang=ALL-UNNAMED", + "--add-opens", "java.base/java.nio=ALL-UNNAMED", // Required for LMDB + "--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED" // Required for LMDB } else { jvmArgs '-Xmx3072m' } - systemProperty "java.library.path", "${javaLibraryPath}" + systemProperty "java.library.path", "${javaLibraryPath}${lmdbLibPath}" } task(PrepareDOIMatching, dependsOn: 'classes', type: JavaExec, group: 'modelevaluation') { @@ -604,14 +639,18 @@ project(":grobid-trainer") { classpath = sourceSets.main.runtimeClasspath args 'data', getArg('p2t', '.') def javaLibraryPath = getJavaLibraryPath() - + // Add additional lib paths for native libraries (LMDB) + def lmdbLibPath = Os.isFamily(Os.FAMILY_MAC) ? + ":/opt/homebrew/lib:/usr/local/lib" : + Os.isFamily(Os.FAMILY_UNIX) ? ":/usr/lib/x86_64-linux-gnu:/usr/lib64:/usr/local/lib" : "" if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) { - jvmArgs '-Xmx3072m', "--add-opens", "java.base/java.lang=ALL-UNNAMED" + jvmArgs '-Xmx3072m', "--add-opens", "java.base/java.lang=ALL-UNNAMED", + "--add-opens", "java.base/java.nio=ALL-UNNAMED", // Required for LMDB + "--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED" // Required for LMDB } else { jvmArgs '-Xmx3072m' } - - systemProperty "java.library.path", "${javaLibraryPath}" + systemProperty "java.library.path", "${javaLibraryPath}${lmdbLibPath}" } task(EvaluateDOIMatching, dependsOn: 'classes', type: JavaExec, group: 'modelevaluation') { @@ -620,14 +659,18 @@ project(":grobid-trainer") { args 'eval', getArg('p2t', '.') def javaLibraryPath = getJavaLibraryPath() - + // Add additional lib paths for native libraries (LMDB) + def lmdbLibPath = Os.isFamily(Os.FAMILY_MAC) ? + ":/opt/homebrew/lib:/usr/local/lib" : + Os.isFamily(Os.FAMILY_UNIX) ? ":/usr/lib/x86_64-linux-gnu:/usr/lib64:/usr/local/lib" : "" if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) { - jvmArgs '-Xmx3072m', "--add-opens", "java.base/java.lang=ALL-UNNAMED" + jvmArgs '-Xmx3072m', "--add-opens", "java.base/java.lang=ALL-UNNAMED", + "--add-opens", "java.base/java.nio=ALL-UNNAMED", // Required for LMDB + "--add-opens", "java.base/sun.nio.ch=ALL-UNNAMED" // Required for LMDB } else { jvmArgs '-Xmx3072m' } - - systemProperty "java.library.path", "${javaLibraryPath}" + systemProperty "java.library.path", "${javaLibraryPath}${lmdbLibPath}" } } @@ -664,7 +707,7 @@ tasks.register("codeCoverageReport", JacocoReport) { /** COVERALLS **/ coveralls { - sourceDirs = files(subprojects.findAll { it.plugins.hasPlugin('java') }.sourceSets.main.allSource.srcDirs).files.absolutePath + sourceDirs = files(subprojects.findAll { it.plugins.hasPlugin('java') }.sourceSets.main.allSource.srcDirs).files.absolutePath } tasks.coveralls { diff --git a/doc/benchmarks/Benchmarking-biorxiv.md b/doc/benchmarks/Benchmarking-biorxiv.md index 8e60478dba..7dd67baa89 100644 --- a/doc/benchmarks/Benchmarking-biorxiv.md +++ b/doc/benchmarks/Benchmarking-biorxiv.md @@ -40,75 +40,75 @@ Evaluation on 2000 random PDF files out of 1998 PDF (ratio 1.0). **Field-level results** -| label | precision | recall | f1 | support | -|-----------------------------|-----------|-----------|----------|---------| -| abstract | 2.42 | 2.36 | 2.39 | 1990 | -| authors | 85.12 | 84.39 | 84.75 | 1999 | -| first_author | 96.92 | 96.19 | 96.56 | 1997 | -| keywords | 58.13 | 59.24 | 58.68 | 839 | -| title | 77.33 | 76.6 | 76.97 | 2000 | -| | | | | | -| **all fields (micro avg.)** | **65** | **64.41** | **64.7** | 8825 | -| all fields (macro avg.) | 63.98 | 63.76 | 63.87 | 8825 | - -#### Soft Matching (ignoring punctuation, case and space characters mismatches) - -**Field-level results** - | label | precision | recall | f1 | support | |-----------------------------|-----------|-----------|-----------|---------| -| abstract | 60.57 | 59.2 | 59.87 | 1990 | -| authors | 85.52 | 84.79 | 85.15 | 1999 | -| first_author | 97.12 | 96.39 | 96.76 | 1997 | -| keywords | 63.27 | 64.48 | 63.87 | 839 | -| title | 79.45 | 78.7 | 79.08 | 2000 | +| abstract | 2.26 | 2.21 | 2.24 | 1990 | +| authors | 84.13 | 83.54 | 83.84 | 1999 | +| first_author | 96.17 | 95.59 | 95.88 | 1997 | +| keywords | 49.6 | 51.61 | 50.58 | 839 | +| title | 76.04 | 75.55 | 75.8 | 2000 | | | | | | | -| **all fields (micro avg.)** | **79.05** | **78.33** | **78.69** | 8825 | -| all fields (macro avg.) | 77.19 | 76.71 | 76.95 | 8825 | +| **all fields (micro avg.)** | **63.44** | **63.08** | **63.26** | 8825 | +| all fields (macro avg.) | 61.64 | 61.7 | 61.67 | 8825 | -#### Levenshtein Matching (Minimum Levenshtein distance at 0.8) +#### Soft Matching (ignoring punctuation, case and space characters mismatches) **Field-level results** | label | precision | recall | f1 | support | |-----------------------------|-----------|-----------|----------|---------| -| abstract | 80.72 | 78.89 | 79.8 | 1990 | -| authors | 92.63 | 91.85 | 92.24 | 1999 | -| first_author | 97.38 | 96.64 | 97.01 | 1997 | -| keywords | 79.3 | 80.81 | 80.05 | 839 | -| title | 91.97 | 91.1 | 91.53 | 2000 | +| abstract | 57.63 | 56.33 | 56.98 | 1990 | +| authors | 84.53 | 83.94 | 84.24 | 1999 | +| first_author | 96.32 | 95.74 | 96.03 | 1997 | +| keywords | 54.07 | 56.26 | 55.14 | 839 | +| title | 78.41 | 77.9 | 78.15 | 2000 | | | | | | | -| **all fields (micro avg.)** | **89.61** | **88.79** | **89.2** | 8825 | -| all fields (macro avg.) | 88.4 | 87.86 | 88.13 | 8825 | +| **all fields (micro avg.)** | **76.82** | **76.39** | **76.6** | 8825 | +| all fields (macro avg.) | 74.19 | 74.03 | 74.11 | 8825 | -#### Ratcliff/Obershelp Matching (Minimum Ratcliff/Obershelp similarity at 0.95) +#### Levenshtein Matching (Minimum Levenshtein distance at 0.8) **Field-level results** | label | precision | recall | f1 | support | |-----------------------------|-----------|-----------|-----------|---------| -| abstract | 77.22 | 75.48 | 76.34 | 1990 | -| authors | 88.7 | 87.94 | 88.32 | 1999 | -| first_author | 96.92 | 96.19 | 96.56 | 1997 | -| keywords | 70.99 | 72.35 | 71.66 | 839 | -| title | 87.63 | 86.8 | 87.21 | 2000 | +| abstract | 79.13 | 77.34 | 78.22 | 1990 | +| authors | 92.04 | 91.4 | 91.72 | 1999 | +| first_author | 96.62 | 96.04 | 96.33 | 1997 | +| keywords | 73.88 | 76.88 | 75.35 | 839 | +| title | 91.6 | 91 | 91.3 | 2000 | | | | | | | -| **all fields (micro avg.)** | **86.04** | **85.26** | **85.65** | 8825 | -| all fields (macro avg.) | 84.29 | 83.75 | 84.02 | 8825 | +| **all fields (micro avg.)** | **88.31** | **87.81** | **88.06** | 8825 | +| all fields (macro avg.) | 86.65 | 86.53 | 86.58 | 8825 | + +#### Ratcliff/Obershelp Matching (Minimum Ratcliff/Obershelp similarity at 0.95) + +**Field-level results** + +| label | precision | recall | f1 | support | +|-----------------------------|-----------|-----------|--------|---------| +| abstract | 75.48 | 73.77 | 74.61 | 1990 | +| authors | 87.76 | 87.14 | 87.45 | 1999 | +| first_author | 96.17 | 95.59 | 95.88 | 1997 | +| keywords | 62.08 | 64.6 | 63.32 | 839 | +| title | 87.12 | 86.55 | 86.83 | 2000 | +| | | | | | +| **all fields (micro avg.)** | **84.24** | **83.76** | **84** | 8825 | +| all fields (macro avg.) | 81.72 | 81.53 | 81.62 | 8825 | #### Instance-level results ``` -Total expected instances: 2000 -Total correct instances: 40 (strict) -Total correct instances: 728 (soft) -Total correct instances: 1237 (Levenshtein) -Total correct instances: 1066 (ObservedRatcliffObershelp) - -Instance-level recall: 2 (strict) -Instance-level recall: 36.4 (soft) -Instance-level recall: 61.85 (Levenshtein) -Instance-level recall: 53.3 (RatcliffObershelp) +Total expected instances: 2000 +Total correct instances: 37 (strict) +Total correct instances: 641 (soft) +Total correct instances: 1176 (Levenshtein) +Total correct instances: 978 (ObservedRatcliffObershelp) + +Instance-level recall: 1.85 (strict) +Instance-level recall: 32.05 (soft) +Instance-level recall: 58.8 (Levenshtein) +Instance-level recall: 48.9 (RatcliffObershelp) ``` ## Citation metadata @@ -121,20 +121,20 @@ Evaluation on 2000 random PDF files out of 1998 PDF (ratio 1.0). | label | precision | recall | f1 | support | |-----------------------------|-----------|-----------|-----------|---------| -| authors | 88.2 | 83.12 | 85.58 | 97183 | -| date | 91.71 | 86.15 | 88.84 | 97630 | -| doi | 70.86 | 83.85 | 76.81 | 16894 | -| first_author | 95.08 | 89.53 | 92.22 | 97183 | -| inTitle | 82.9 | 79.31 | 81.06 | 96430 | -| issue | 94.35 | 91.93 | 93.13 | 30312 | -| page | 94.99 | 78.22 | 85.79 | 88597 | -| pmcid | 66.44 | 86.12 | 75.01 | 807 | -| pmid | 69.99 | 84.57 | 76.59 | 2093 | -| title | 84.9 | 83.42 | 84.16 | 92463 | -| volume | 96.27 | 95.07 | 95.66 | 87709 | +| authors | 87.25 | 81.11 | 84.07 | 97183 | +| date | 90.77 | 84.01 | 87.26 | 97630 | +| doi | 68.31 | 75.43 | 71.69 | 16894 | +| first_author | 94.26 | 87.54 | 90.78 | 97183 | +| inTitle | 82.48 | 78.11 | 80.23 | 96430 | +| issue | 92.34 | 84.86 | 88.44 | 30312 | +| page | 94.38 | 77.78 | 85.28 | 88597 | +| pmcid | 66.24 | 82.65 | 73.54 | 807 | +| pmid | 68.23 | 81.56 | 74.3 | 2093 | +| title | 84.61 | 82.04 | 83.3 | 92463 | +| volume | 95.36 | 93.56 | 94.45 | 87709 | | | | | | | -| **all fields (micro avg.)** | **89.87** | **85.21** | **87.48** | 707301 | -| all fields (macro avg.) | 85.06 | 85.57 | 84.99 | 707301 | +| **all fields (micro avg.)** | **89.09** | **83.26** | **86.08** | 707301 | +| all fields (macro avg.) | 84.02 | 82.6 | 83.03 | 707301 | #### Soft Matching (ignoring punctuation, case and space characters mismatches) @@ -142,20 +142,20 @@ Evaluation on 2000 random PDF files out of 1998 PDF (ratio 1.0). | label | precision | recall | f1 | support | |-----------------------------|-----------|-----------|-----------|---------| -| authors | 89.35 | 84.21 | 86.71 | 97183 | -| date | 91.71 | 86.15 | 88.84 | 97630 | -| doi | 75.34 | 89.16 | 81.67 | 16894 | -| first_author | 95.51 | 89.93 | 92.64 | 97183 | -| inTitle | 92.37 | 88.38 | 90.33 | 96430 | -| issue | 94.35 | 91.93 | 93.13 | 30312 | -| page | 94.99 | 78.22 | 85.79 | 88597 | -| pmcid | 75.72 | 98.14 | 85.48 | 807 | -| pmid | 74.42 | 89.92 | 81.44 | 2093 | -| title | 93.25 | 91.63 | 92.43 | 92463 | -| volume | 96.27 | 95.07 | 95.66 | 87709 | +| authors | 88.39 | 82.17 | 85.16 | 97183 | +| date | 90.77 | 84.01 | 87.26 | 97630 | +| doi | 72.82 | 80.41 | 76.43 | 16894 | +| first_author | 94.67 | 87.92 | 91.17 | 97183 | +| inTitle | 91.9 | 87.03 | 89.4 | 96430 | +| issue | 92.34 | 84.86 | 88.44 | 30312 | +| page | 94.38 | 77.78 | 85.28 | 88597 | +| pmcid | 75.17 | 93.8 | 83.46 | 807 | +| pmid | 72.7 | 86.91 | 79.17 | 2093 | +| title | 92.89 | 90.07 | 91.46 | 92463 | +| volume | 95.36 | 93.56 | 94.45 | 87709 | | | | | | | -| **all fields (micro avg.)** | **92.69** | **87.88** | **90.22** | 707301 | -| all fields (macro avg.) | 88.48 | 89.34 | 88.56 | 707301 | +| **all fields (micro avg.)** | **91.88** | **85.87** | **88.78** | 707301 | +| all fields (macro avg.) | 87.4 | 86.23 | 86.52 | 707301 | #### Levenshtein Matching (Minimum Levenshtein distance at 0.8) @@ -163,20 +163,20 @@ Evaluation on 2000 random PDF files out of 1998 PDF (ratio 1.0). | label | precision | recall | f1 | support | |-----------------------------|-----------|-----------|-----------|---------| -| authors | 94.61 | 89.16 | 91.81 | 97183 | -| date | 91.71 | 86.15 | 88.84 | 97630 | -| doi | 77.58 | 91.81 | 84.1 | 16894 | -| first_author | 95.66 | 90.08 | 92.78 | 97183 | -| inTitle | 93.36 | 89.32 | 91.29 | 96430 | -| issue | 94.35 | 91.93 | 93.13 | 30312 | -| page | 94.99 | 78.22 | 85.79 | 88597 | -| pmcid | 75.72 | 98.14 | 85.48 | 807 | -| pmid | 74.42 | 89.92 | 81.44 | 2093 | -| title | 96.08 | 94.41 | 95.24 | 92463 | -| volume | 96.27 | 95.07 | 95.66 | 87709 | +| authors | 93.71 | 87.12 | 90.29 | 97183 | +| date | 90.77 | 84.01 | 87.26 | 97630 | +| doi | 76.99 | 85.02 | 80.81 | 16894 | +| first_author | 94.82 | 88.06 | 91.31 | 97183 | +| inTitle | 92.84 | 87.92 | 90.31 | 96430 | +| issue | 92.34 | 84.86 | 88.44 | 30312 | +| page | 94.38 | 77.78 | 85.28 | 88597 | +| pmcid | 75.17 | 93.8 | 83.46 | 807 | +| pmid | 72.74 | 86.96 | 79.22 | 2093 | +| title | 95.79 | 92.88 | 94.31 | 92463 | +| volume | 95.36 | 93.56 | 94.45 | 87709 | | | | | | | -| **all fields (micro avg.)** | **94.01** | **89.14** | **91.51** | 707301 | -| all fields (macro avg.) | 89.52 | 90.38 | 89.6 | 707301 | +| **all fields (micro avg.)** | **93.27** | **87.17** | **90.12** | 707301 | +| all fields (macro avg.) | 88.63 | 87.45 | 87.74 | 707301 | #### Ratcliff/Obershelp Matching (Minimum Ratcliff/Obershelp similarity at 0.95) @@ -184,73 +184,73 @@ Evaluation on 2000 random PDF files out of 1998 PDF (ratio 1.0). | label | precision | recall | f1 | support | |-----------------------------|-----------|-----------|-----------|---------| -| authors | 91.57 | 86.3 | 88.85 | 97183 | -| date | 91.71 | 86.15 | 88.84 | 97630 | -| doi | 76.04 | 89.98 | 82.42 | 16894 | -| first_author | 95.13 | 89.58 | 92.27 | 97183 | -| inTitle | 91.13 | 87.19 | 89.11 | 96430 | -| issue | 94.35 | 91.93 | 93.13 | 30312 | -| page | 94.99 | 78.22 | 85.79 | 88597 | -| pmcid | 66.44 | 86.12 | 75.01 | 807 | -| pmid | 69.99 | 84.57 | 76.59 | 2093 | -| title | 95.41 | 93.75 | 94.57 | 92463 | -| volume | 96.27 | 95.07 | 95.66 | 87709 | +| authors | 90.59 | 84.22 | 87.29 | 97183 | +| date | 90.77 | 84.01 | 87.26 | 97630 | +| doi | 74.72 | 82.51 | 78.42 | 16894 | +| first_author | 94.3 | 87.58 | 90.82 | 97183 | +| inTitle | 90.71 | 85.9 | 88.24 | 96430 | +| issue | 92.34 | 84.86 | 88.44 | 30312 | +| page | 94.38 | 77.78 | 85.28 | 88597 | +| pmcid | 66.24 | 82.65 | 73.54 | 807 | +| pmid | 68.23 | 81.56 | 74.3 | 2093 | +| title | 95.12 | 92.24 | 93.66 | 92463 | +| volume | 95.36 | 93.56 | 94.45 | 87709 | | | | | | | -| **all fields (micro avg.)** | **93.05** | **88.22** | **90.57** | 707301 | -| all fields (macro avg.) | 87.55 | 88.08 | 87.48 | 707301 | +| **all fields (micro avg.)** | **92.3** | **86.26** | **89.18** | 707301 | +| all fields (macro avg.) | 86.62 | 85.17 | 85.61 | 707301 | #### Instance-level results ``` -Total expected instances: 98799 -Total extracted instances: 97808 -Total correct instances: 43695 (strict) -Total correct instances: 54689 (soft) -Total correct instances: 58863 (Levenshtein) -Total correct instances: 55597 (RatcliffObershelp) +Total expected instances: 98799 +Total extracted instances: 98373 +Total correct instances: 41161 (strict) +Total correct instances: 51566 (soft) +Total correct instances: 55887 (Levenshtein) +Total correct instances: 52758 (RatcliffObershelp) -Instance-level precision: 44.67 (strict) -Instance-level precision: 55.91 (soft) -Instance-level precision: 60.18 (Levenshtein) -Instance-level precision: 56.84 (RatcliffObershelp) +Instance-level precision: 41.84 (strict) +Instance-level precision: 52.42 (soft) +Instance-level precision: 56.81 (Levenshtein) +Instance-level precision: 53.63 (RatcliffObershelp) -Instance-level recall: 44.23 (strict) -Instance-level recall: 55.35 (soft) -Instance-level recall: 59.58 (Levenshtein) -Instance-level recall: 56.27 (RatcliffObershelp) +Instance-level recall: 41.66 (strict) +Instance-level recall: 52.19 (soft) +Instance-level recall: 56.57 (Levenshtein) +Instance-level recall: 53.4 (RatcliffObershelp) -Instance-level f-score: 44.45 (strict) -Instance-level f-score: 55.63 (soft) -Instance-level f-score: 59.88 (Levenshtein) -Instance-level f-score: 56.56 (RatcliffObershelp) +Instance-level f-score: 41.75 (strict) +Instance-level f-score: 52.31 (soft) +Instance-level f-score: 56.69 (Levenshtein) +Instance-level f-score: 53.51 (RatcliffObershelp) -Matching 1 : 79152 +Matching 1 : 77036 -Matching 2 : 4442 +Matching 2 : 4315 -Matching 3 : 4360 +Matching 3 : 4784 -Matching 4 : 2101 +Matching 4 : 2655 -Total matches : 90055 +Total matches : 88790 ``` #### Citation context resolution ``` -Total expected references: 98797 - 49.4 references per article -Total predicted references: 97808 - 48.9 references per article +Total expected references: 98797 - 49.4 references per article +Total predicted references: 98373 - 49.19 references per article -Total expected citation contexts: 142862 - 71.43 citation contexts per article -Total predicted citation contexts: 134498 - 67.25 citation contexts per article +Total expected citation contexts: 142862 - 71.43 citation contexts per article +Total predicted citation contexts: 132881 - 66.44 citation contexts per article -Total correct predicted citation contexts: 115971 - 57.99 citation contexts per article -Total wrong predicted citation contexts: 18527 (wrong callout matching, callout missing in NLM, or matching with a bib. ref. not aligned with a bib.ref. in NLM) +Total correct predicted citation contexts: 113221 - 56.61 citation contexts per article +Total wrong predicted citation contexts: 19660 (wrong callout matching, callout missing in NLM, or matching with a bib. ref. not aligned with a bib.ref. in NLM) -Precision citation contexts: 86.23 -Recall citation contexts: 81.18 -fscore citation contexts: 83.62 +Precision citation contexts: 85.2 +Recall citation contexts: 79.25 +fscore citation contexts: 82.12 ``` ## Fulltext structures @@ -268,17 +268,17 @@ Evaluation on 2000 random PDF files out of 1998 PDF (ratio 1.0). | label | precision | recall | f1 | support | |-----------------------------|-----------|-----------|-----------|---------| -| availability_stmt | 29.61 | 25.56 | 27.44 | 446 | -| figure_title | 4.29 | 2.34 | 3.03 | 22978 | -| funding_stmt | 3.46 | 22.95 | 6.01 | 745 | -| reference_citation | 72.02 | 70.94 | 71.48 | 147470 | -| reference_figure | 70.41 | 77.14 | 73.62 | 47984 | -| reference_table | 45.65 | 86.74 | 59.82 | 5957 | -| section_title | 71.35 | 69.91 | 70.62 | 32398 | +| availability_stmt | 29.05 | 25.34 | 27.07 | 446 | +| figure_title | 4.3 | 2.35 | 3.04 | 22978 | +| funding_stmt | 3.73 | 23.29 | 6.43 | 747 | +| reference_citation | 71.97 | 70.64 | 71.3 | 147470 | +| reference_figure | 70.38 | 77.13 | 73.6 | 47984 | +| reference_table | 45.62 | 86.64 | 59.76 | 5957 | +| section_title | 71.29 | 69.91 | 70.59 | 32398 | | table_title | 7.41 | 2.7 | 3.96 | 3925 | | | | | | | -| **all fields (micro avg.)** | **65.48** | **65.06** | **65.27** | 261903 | -| all fields (macro avg.) | 38.02 | 44.79 | 39.5 | 261903 | +| **all fields (micro avg.)** | **65.49** | **64.88** | **65.19** | 261905 | +| all fields (macro avg.) | 37.97 | 44.75 | 39.47 | 261905 | #### Soft Matching (ignoring punctuation, case and space characters mismatches) @@ -286,26 +286,26 @@ Evaluation on 2000 random PDF files out of 1998 PDF (ratio 1.0). | label | precision | recall | f1 | support | |-----------------------------|-----------|-----------|-----------|---------| -| availability_stmt | 50.65 | 43.72 | 46.93 | 446 | -| figure_title | 68.34 | 37.29 | 48.25 | 22978 | -| funding_stmt | 3.68 | 24.43 | 6.39 | 745 | -| reference_citation | 84.34 | 83.08 | 83.7 | 147470 | -| reference_figure | 71.05 | 77.84 | 74.29 | 47984 | -| reference_table | 46.07 | 87.53 | 60.36 | 5957 | -| section_title | 76.91 | 75.37 | 76.13 | 32398 | -| table_title | 82.8 | 30.17 | 44.22 | 3925 | +| availability_stmt | 49.87 | 43.5 | 46.47 | 446 | +| figure_title | 68.24 | 37.29 | 48.23 | 22978 | +| funding_stmt | 3.96 | 24.77 | 6.83 | 747 | +| reference_citation | 84.28 | 82.73 | 83.5 | 147470 | +| reference_figure | 71.02 | 77.83 | 74.27 | 47984 | +| reference_table | 46.03 | 87.43 | 60.31 | 5957 | +| section_title | 76.85 | 75.36 | 76.1 | 32398 | +| table_title | 82.73 | 30.14 | 44.18 | 3925 | | | | | | | -| **all fields (micro avg.)** | **76.72** | **76.22** | **76.47** | 261903 | -| all fields (macro avg.) | 60.48 | 57.43 | 55.04 | 261903 | +| **all fields (micro avg.)** | **76.73** | **76.02** | **76.38** | 261905 | +| all fields (macro avg.) | 60.37 | 57.38 | 54.99 | 261905 | **Document-level ratio results** | label | precision | recall | f1 | support | |-----------------------------|-----------|-----------|-----------|---------| -| availability_stmt | 84.8 | 86.32 | 85.56 | 446 | +| availability_stmt | 84.2 | 87.22 | 85.68 | 446 | | | | | | | -| **all fields (micro avg.)** | **84.8** | **86.32** | **85.56** | 446 | -| all fields (macro avg.) | 84.8 | 86.32 | 85.56 | 446 | +| **all fields (micro avg.)** | **84.2** | **87.22** | **85.68** | 446 | +| all fields (macro avg.) | 84.2 | 87.22 | 85.68 | 446 | -Evaluation metrics produced in 1607.353 seconds +Evaluation metrics produced in 1598.033 seconds diff --git a/doc/benchmarks/Benchmarking-elife.md b/doc/benchmarks/Benchmarking-elife.md index da17249502..b5bf3e6376 100644 --- a/doc/benchmarks/Benchmarking-elife.md +++ b/doc/benchmarks/Benchmarking-elife.md @@ -41,13 +41,13 @@ Evaluation on 984 random PDF files out of 982 PDF (ratio 1.0). | label | precision | recall | f1 | support | |-----------------------------|-----------|-----------|-----------|---------| -| abstract | 9.53 | 9.25 | 9.39 | 984 | -| authors | 74.79 | 73.96 | 74.37 | 983 | -| first_author | 92.59 | 91.65 | 92.12 | 982 | -| title | 86.93 | 85.16 | 86.04 | 984 | +| abstract | 9.56 | 9.25 | 9.4 | 984 | +| authors | 18.51 | 18.21 | 18.36 | 983 | +| first_author | 54.91 | 54.07 | 54.49 | 982 | +| title | 72.93 | 71.75 | 72.34 | 984 | | | | | | | -| **all fields (micro avg.)** | **66.17** | **64.99** | **65.57** | 3933 | -| all fields (macro avg.) | 65.96 | 65 | 65.48 | 3933 | +| **all fields (micro avg.)** | **39.1** | **38.32** | **38.71** | 3933 | +| all fields (macro avg.) | 38.98 | 38.32 | 38.65 | 3933 | #### Soft Matching (ignoring punctuation, case and space characters mismatches) @@ -55,13 +55,13 @@ Evaluation on 984 random PDF files out of 982 PDF (ratio 1.0). | label | precision | recall | f1 | support | |-----------------------------|-----------|-----------|-----------|---------| -| abstract | 22.3 | 21.65 | 21.97 | 984 | -| authors | 75.1 | 74.26 | 74.68 | 983 | -| first_author | 92.59 | 91.65 | 92.12 | 982 | -| title | 94.92 | 92.99 | 93.94 | 984 | +| abstract | 24.16 | 23.37 | 23.76 | 984 | +| authors | 18.82 | 18.51 | 18.67 | 983 | +| first_author | 54.91 | 54.07 | 54.49 | 982 | +| title | 81.3 | 79.98 | 80.64 | 984 | | | | | | | -| **all fields (micro avg.)** | **71.4** | **70.12** | **70.75** | 3933 | -| all fields (macro avg.) | 71.23 | 70.14 | 70.68 | 3933 | +| **all fields (micro avg.)** | **44.89** | **43.99** | **44.43** | 3933 | +| all fields (macro avg.) | 44.8 | 43.99 | 44.39 | 3933 | #### Levenshtein Matching (Minimum Levenshtein distance at 0.8) @@ -69,13 +69,13 @@ Evaluation on 984 random PDF files out of 982 PDF (ratio 1.0). | label | precision | recall | f1 | support | |-----------------------------|-----------|-----------|-----------|---------| -| abstract | 47.43 | 46.04 | 46.73 | 984 | -| authors | 88.68 | 87.69 | 88.18 | 983 | -| first_author | 92.9 | 91.96 | 92.43 | 982 | -| title | 96.37 | 94.41 | 95.38 | 984 | +| abstract | 48.63 | 47.05 | 47.83 | 984 | +| authors | 53.57 | 52.7 | 53.13 | 983 | +| first_author | 55.33 | 54.48 | 54.9 | 982 | +| title | 94.32 | 92.78 | 93.55 | 984 | | | | | | | -| **all fields (micro avg.)** | **81.47** | **80.02** | **80.73** | 3933 | -| all fields (macro avg.) | 81.35 | 80.02 | 80.68 | 3933 | +| **all fields (micro avg.)** | **63.03** | **61.76** | **62.39** | 3933 | +| all fields (macro avg.) | 62.96 | 61.75 | 62.35 | 3933 | #### Ratcliff/Obershelp Matching (Minimum Ratcliff/Obershelp similarity at 0.95) @@ -83,27 +83,27 @@ Evaluation on 984 random PDF files out of 982 PDF (ratio 1.0). | label | precision | recall | f1 | support | |-----------------------------|-----------|-----------|-----------|---------| -| abstract | 44.5 | 43.19 | 43.84 | 984 | -| authors | 80.35 | 79.45 | 79.9 | 983 | -| first_author | 92.59 | 91.65 | 92.12 | 982 | -| title | 96.37 | 94.41 | 95.38 | 984 | +| abstract | 46.32 | 44.82 | 45.56 | 984 | +| authors | 29.27 | 28.79 | 29.03 | 983 | +| first_author | 54.91 | 54.07 | 54.49 | 982 | +| title | 90.39 | 88.92 | 89.65 | 984 | | | | | | | -| **all fields (micro avg.)** | **78.57** | **77.17** | **77.86** | 3933 | -| all fields (macro avg.) | 78.45 | 77.18 | 77.81 | 3933 | +| **all fields (micro avg.)** | **55.27** | **54.16** | **54.71** | 3933 | +| all fields (macro avg.) | 55.22 | 54.15 | 54.68 | 3933 | #### Instance-level results ``` -Total expected instances: 984 -Total correct instances: 74 (strict) -Total correct instances: 196 (soft) -Total correct instances: 381 (Levenshtein) -Total correct instances: 338 (ObservedRatcliffObershelp) - -Instance-level recall: 7.52 (strict) -Instance-level recall: 19.92 (soft) -Instance-level recall: 38.72 (Levenshtein) -Instance-level recall: 34.35 (RatcliffObershelp) +Total expected instances: 984 +Total correct instances: 9 (strict) +Total correct instances: 23 (soft) +Total correct instances: 119 (Levenshtein) +Total correct instances: 76 (ObservedRatcliffObershelp) + +Instance-level recall: 0.91 (strict) +Instance-level recall: 2.34 (soft) +Instance-level recall: 12.09 (Levenshtein) +Instance-level recall: 7.72 (RatcliffObershelp) ``` ## Citation metadata @@ -116,17 +116,17 @@ Evaluation on 984 random PDF files out of 982 PDF (ratio 1.0). | label | precision | recall | f1 | support | |-----------------------------|-----------|-----------|-----------|---------| -| authors | 79.43 | 78.36 | 78.89 | 63265 | -| date | 95.89 | 94.19 | 95.03 | 63662 | -| first_author | 94.83 | 93.51 | 94.17 | 63265 | -| inTitle | 95.82 | 94.87 | 95.34 | 63213 | -| issue | 2 | 75 | 3.9 | 16 | -| page | 96.28 | 95.44 | 95.86 | 53375 | -| title | 90.27 | 90.87 | 90.57 | 62044 | -| volume | 97.88 | 98.39 | 98.14 | 61049 | +| authors | 77.14 | 75.27 | 76.2 | 63265 | +| date | 94.9 | 91.62 | 93.23 | 63662 | +| first_author | 92.48 | 90.1 | 91.27 | 63265 | +| inTitle | 94.56 | 92.81 | 93.68 | 63213 | +| issue | 1.58 | 81.25 | 3.09 | 16 | +| page | 95.15 | 93.33 | 94.23 | 53375 | +| title | 89.52 | 88.7 | 89.11 | 62044 | +| volume | 97.08 | 96.22 | 96.65 | 61049 | | | | | | | -| **all fields (micro avg.)** | **92.7** | **92.13** | **92.42** | 429889 | -| all fields (macro avg.) | 81.55 | 90.08 | 81.49 | 429889 | +| **all fields (micro avg.)** | **91.28** | **89.61** | **90.43** | 429889 | +| all fields (macro avg.) | 80.3 | 88.66 | 79.68 | 429889 | #### Soft Matching (ignoring punctuation, case and space characters mismatches) @@ -134,17 +134,17 @@ Evaluation on 984 random PDF files out of 982 PDF (ratio 1.0). | label | precision | recall | f1 | support | |-----------------------------|-----------|-----------|-----------|---------| -| authors | 79.57 | 78.49 | 79.03 | 63265 | -| date | 95.89 | 94.19 | 95.03 | 63662 | -| first_author | 94.91 | 93.59 | 94.25 | 63265 | -| inTitle | 96.29 | 95.35 | 95.82 | 63213 | -| issue | 2 | 75 | 3.9 | 16 | -| page | 96.28 | 95.44 | 95.86 | 53375 | -| title | 95.94 | 96.58 | 96.26 | 62044 | -| volume | 97.88 | 98.39 | 98.14 | 61049 | +| authors | 77.28 | 75.4 | 76.33 | 63265 | +| date | 94.9 | 91.62 | 93.23 | 63662 | +| first_author | 92.55 | 90.17 | 91.35 | 63265 | +| inTitle | 95.02 | 93.25 | 94.13 | 63213 | +| issue | 1.58 | 81.25 | 3.09 | 16 | +| page | 95.15 | 93.33 | 94.23 | 53375 | +| title | 95.17 | 94.3 | 94.73 | 62044 | +| volume | 97.08 | 96.22 | 96.65 | 61049 | | | | | | | -| **all fields (micro avg.)** | **93.63** | **93.06** | **93.34** | 429889 | -| all fields (macro avg.) | 82.35 | 90.88 | 82.28 | 429889 | +| **all fields (micro avg.)** | **92.2** | **90.51** | **91.35** | 429889 | +| all fields (macro avg.) | 81.09 | 89.44 | 80.47 | 429889 | #### Levenshtein Matching (Minimum Levenshtein distance at 0.8) @@ -152,17 +152,17 @@ Evaluation on 984 random PDF files out of 982 PDF (ratio 1.0). | label | precision | recall | f1 | support | |-----------------------------|-----------|-----------|-----------|---------| -| authors | 93.33 | 92.07 | 92.7 | 63265 | -| date | 95.89 | 94.19 | 95.03 | 63662 | -| first_author | 95.36 | 94.03 | 94.69 | 63265 | -| inTitle | 96.62 | 95.67 | 96.14 | 63213 | -| issue | 2 | 75 | 3.9 | 16 | -| page | 96.28 | 95.44 | 95.86 | 53375 | -| title | 97.69 | 98.34 | 98.01 | 62044 | -| volume | 97.88 | 98.39 | 98.14 | 61049 | +| authors | 91.16 | 88.94 | 90.04 | 63265 | +| date | 94.9 | 91.62 | 93.23 | 63662 | +| first_author | 92.98 | 90.59 | 91.77 | 63265 | +| inTitle | 95.35 | 93.58 | 94.46 | 63213 | +| issue | 1.58 | 81.25 | 3.09 | 16 | +| page | 95.15 | 93.33 | 94.23 | 53375 | +| title | 97.34 | 96.45 | 96.89 | 62044 | +| volume | 97.08 | 96.22 | 96.65 | 61049 | | | | | | | -| **all fields (micro avg.)** | **96.01** | **95.42** | **95.71** | 429889 | -| all fields (macro avg.) | 84.38 | 92.89 | 84.31 | 429889 | +| **all fields (micro avg.)** | **94.65** | **92.92** | **93.78** | 429889 | +| all fields (macro avg.) | 83.19 | 91.5 | 82.54 | 429889 | #### Ratcliff/Obershelp Matching (Minimum Ratcliff/Obershelp similarity at 0.95) @@ -170,70 +170,70 @@ Evaluation on 984 random PDF files out of 982 PDF (ratio 1.0). | label | precision | recall | f1 | support | |-----------------------------|-----------|-----------|-----------|---------| -| authors | 86.75 | 85.57 | 86.16 | 63265 | -| date | 95.89 | 94.19 | 95.03 | 63662 | -| first_author | 94.84 | 93.53 | 94.18 | 63265 | -| inTitle | 96.3 | 95.35 | 95.82 | 63213 | -| issue | 2 | 75 | 3.9 | 16 | -| page | 96.28 | 95.44 | 95.86 | 53375 | -| title | 97.54 | 98.18 | 97.86 | 62044 | -| volume | 97.88 | 98.39 | 98.14 | 61049 | +| authors | 84.33 | 82.28 | 83.29 | 63265 | +| date | 94.9 | 91.62 | 93.23 | 63662 | +| first_author | 92.49 | 90.11 | 91.29 | 63265 | +| inTitle | 95.04 | 93.28 | 94.15 | 63213 | +| issue | 1.58 | 81.25 | 3.09 | 16 | +| page | 95.15 | 93.33 | 94.23 | 53375 | +| title | 97.08 | 96.19 | 96.63 | 62044 | +| volume | 97.08 | 96.22 | 96.65 | 61049 | | | | | | | -| **all fields (micro avg.)** | **94.9** | **94.32** | **94.61** | 429889 | -| all fields (macro avg.) | 83.43 | 91.96 | 83.37 | 429889 | +| **all fields (micro avg.)** | **93.5** | **91.79** | **92.64** | 429889 | +| all fields (macro avg.) | 82.2 | 90.53 | 81.57 | 429889 | #### Instance-level results ``` -Total expected instances: 63664 -Total extracted instances: 66152 -Total correct instances: 42401 (strict) -Total correct instances: 45243 (soft) -Total correct instances: 52907 (Levenshtein) -Total correct instances: 49502 (RatcliffObershelp) +Total expected instances: 63664 +Total extracted instances: 68032 +Total correct instances: 40158 (strict) +Total correct instances: 42810 (soft) +Total correct instances: 50131 (Levenshtein) +Total correct instances: 46885 (RatcliffObershelp) -Instance-level precision: 64.1 (strict) -Instance-level precision: 68.39 (soft) -Instance-level precision: 79.98 (Levenshtein) -Instance-level precision: 74.83 (RatcliffObershelp) +Instance-level precision: 59.03 (strict) +Instance-level precision: 62.93 (soft) +Instance-level precision: 73.69 (Levenshtein) +Instance-level precision: 68.92 (RatcliffObershelp) -Instance-level recall: 66.6 (strict) -Instance-level recall: 71.07 (soft) -Instance-level recall: 83.1 (Levenshtein) -Instance-level recall: 77.76 (RatcliffObershelp) +Instance-level recall: 63.08 (strict) +Instance-level recall: 67.24 (soft) +Instance-level recall: 78.74 (Levenshtein) +Instance-level recall: 73.64 (RatcliffObershelp) -Instance-level f-score: 65.32 (strict) -Instance-level f-score: 69.7 (soft) -Instance-level f-score: 81.51 (Levenshtein) -Instance-level f-score: 76.26 (RatcliffObershelp) +Instance-level f-score: 60.99 (strict) +Instance-level f-score: 65.01 (soft) +Instance-level f-score: 76.13 (Levenshtein) +Instance-level f-score: 71.2 (RatcliffObershelp) -Matching 1 : 58715 +Matching 1 : 57007 -Matching 2 : 1019 +Matching 2 : 1057 -Matching 3 : 1252 +Matching 3 : 1499 -Matching 4 : 368 +Matching 4 : 731 -Total matches : 61354 +Total matches : 60294 ``` #### Citation context resolution ``` -Total expected references: 63664 - 64.7 references per article -Total predicted references: 66152 - 67.23 references per article +Total expected references: 63664 - 64.7 references per article +Total predicted references: 68032 - 69.14 references per article -Total expected citation contexts: 109022 - 110.79 citation contexts per article -Total predicted citation contexts: 99982 - 101.61 citation contexts per article +Total expected citation contexts: 109022 - 110.79 citation contexts per article +Total predicted citation contexts: 98768 - 100.37 citation contexts per article -Total correct predicted citation contexts: 96212 - 97.78 citation contexts per article -Total wrong predicted citation contexts: 3770 (wrong callout matching, callout missing in NLM, or matching with a bib. ref. not aligned with a bib.ref. in NLM) +Total correct predicted citation contexts: 93697 - 95.22 citation contexts per article +Total wrong predicted citation contexts: 5071 (wrong callout matching, callout missing in NLM, or matching with a bib. ref. not aligned with a bib.ref. in NLM) -Precision citation contexts: 96.23 -Recall citation contexts: 88.25 -fscore citation contexts: 92.07 +Precision citation contexts: 94.87 +Recall citation contexts: 85.94 +fscore citation contexts: 90.18 ``` ## Fulltext structures @@ -251,43 +251,43 @@ Evaluation on 984 random PDF files out of 982 PDF (ratio 1.0). | label | precision | recall | f1 | support | |-----------------------------|-----------|-----------|-----------|---------| -| availability_stmt | 29.21 | 27.86 | 28.52 | 585 | +| availability_stmt | 29.11 | 27.86 | 28.47 | 585 | | figure_title | 0.07 | 0.02 | 0.03 | 31718 | -| funding_stmt | 6.18 | 29.53 | 10.22 | 921 | -| reference_citation | 57.06 | 55.97 | 56.51 | 108949 | -| reference_figure | 58.42 | 51.02 | 54.47 | 68926 | -| reference_table | 71.83 | 73.46 | 72.63 | 2381 | -| section_title | 82.81 | 77.25 | 79.93 | 21831 | +| funding_stmt | 7.11 | 23.45 | 10.91 | 921 | +| reference_citation | 56.96 | 55.81 | 56.38 | 108949 | +| reference_figure | 58.41 | 51.02 | 54.46 | 68926 | +| reference_table | 71.71 | 73.46 | 72.57 | 2381 | +| section_title | 82.82 | 77.27 | 79.95 | 21831 | | table_title | 0 | 0 | 0 | 1925 | | | | | | | -| **all fields (micro avg.)** | **56.03** | **48.56** | **52.03** | 237236 | -| all fields (macro avg.) | 38.2 | 39.39 | 37.79 | 237236 | +| **all fields (micro avg.)** | **56.3** | **48.46** | **52.09** | 237236 | +| all fields (macro avg.) | 38.27 | 38.61 | 37.85 | 237236 | #### Soft Matching (ignoring punctuation, case and space characters mismatches) **Field-level results** -| label | precision | recall | f1 | support | -|-----------------------------|-----------|-----------|----------|---------| -| availability_stmt | 40.68 | 38.8 | 39.72 | 585 | -| figure_title | 49.75 | 16 | 24.21 | 31718 | -| funding_stmt | 6.18 | 29.53 | 10.22 | 921 | -| reference_citation | 93.6 | 91.81 | 92.7 | 108949 | -| reference_figure | 58.71 | 51.27 | 54.73 | 68926 | -| reference_table | 71.91 | 73.54 | 72.72 | 2381 | -| section_title | 83.85 | 78.21 | 80.93 | 21831 | -| table_title | 94.26 | 28.16 | 43.36 | 1925 | -| | | | | | -| **all fields (micro avg.)** | **77.97** | **67.57** | **72.4** | 237236 | -| all fields (macro avg.) | 62.37 | 50.92 | 52.32 | 237236 | - -**Document-level ratio results** - | label | precision | recall | f1 | support | |-----------------------------|-----------|-----------|-----------|---------| -| availability_stmt | 96.88 | 95.38 | 96.12 | 585 | +| availability_stmt | 40 | 38.29 | 39.13 | 585 | +| figure_title | 49.52 | 16.04 | 24.24 | 31718 | +| funding_stmt | 7.11 | 23.45 | 10.91 | 921 | +| reference_citation | 93.51 | 91.61 | 92.55 | 108949 | +| reference_figure | 58.69 | 51.27 | 54.73 | 68926 | +| reference_table | 71.83 | 73.58 | 72.7 | 2381 | +| section_title | 83.86 | 78.23 | 80.95 | 21831 | +| table_title | 94.25 | 28.1 | 43.3 | 1925 | | | | | | | -| **all fields (micro avg.)** | **96.88** | **95.38** | **96.12** | 585 | -| all fields (macro avg.) | 96.88 | 95.38 | 96.12 | 585 | +| **all fields (micro avg.)** | **78.38** | **67.46** | **72.51** | 237236 | +| all fields (macro avg.) | 62.35 | 50.07 | 52.31 | 237236 | + +**Document-level ratio results** + +| label | precision | recall | f1 | support | +|-----------------------------|-----------|-----------|----------|---------| +| availability_stmt | 96.89 | 95.73 | 96.3 | 585 | +| | | | | | +| **all fields (micro avg.)** | **96.89** | **95.73** | **96.3** | 585 | +| all fields (macro avg.) | 96.89 | 95.73 | 96.3 | 585 | -Evaluation metrics produced in 1348.778 seconds +Evaluation metrics produced in 1353.877 seconds diff --git a/doc/benchmarks/Benchmarking-plos.md b/doc/benchmarks/Benchmarking-plos.md index 0bd2c68a3e..e6601603e6 100644 --- a/doc/benchmarks/Benchmarking-plos.md +++ b/doc/benchmarks/Benchmarking-plos.md @@ -41,44 +41,44 @@ Evaluation on 1000 random PDF files out of 998 PDF (ratio 1.0). | label | precision | recall | f1 | support | |-----------------------------|-----------|-----------|-----------|---------| -| abstract | 13.33 | 13.33 | 13.33 | 960 | -| authors | 99.07 | 99.07 | 99.07 | 969 | -| first_author | 99.28 | 99.28 | 99.28 | 969 | +| abstract | 12.53 | 12.81 | 12.67 | 960 | +| authors | 50.77 | 50.77 | 50.77 | 969 | +| first_author | 82.77 | 82.77 | 82.77 | 969 | | keywords | 0 | 0 | 0 | 0 | -| title | 95.97 | 95.3 | 95.63 | 1000 | +| title | 67.14 | 66.8 | 66.97 | 1000 | | | | | | | -| **all fields (micro avg.)** | **77.18** | **77.04** | **77.11** | 3898 | -| all fields (macro avg.) | 76.91 | 76.75 | 76.83 | 3898 | +| **all fields (micro avg.)** | **53.26** | **53.49** | **53.37** | 3898 | +| all fields (macro avg.) | 53.3 | 53.29 | 53.29 | 3898 | #### Soft Matching (ignoring punctuation, case and space characters mismatches) **Field-level results** -| label | precision | recall | f1 | support | -|-----------------------------|-----------|-----------|----------|---------| -| abstract | 50.52 | 50.52 | 50.52 | 960 | -| authors | 99.07 | 99.07 | 99.07 | 969 | -| first_author | 99.28 | 99.28 | 99.28 | 969 | -| keywords | 0 | 0 | 0 | 0 | -| title | 99.6 | 98.9 | 99.25 | 1000 | -| | | | | | -| **all fields (micro avg.)** | **87.28** | **87.12** | **87.2** | 3898 | -| all fields (macro avg.) | 87.12 | 86.94 | 87.03 | 3898 | +| label | precision | recall | f1 | support | +|-----------------------------|-----------|-----------|-----------|---------| +| abstract | 46.33 | 47.4 | 46.86 | 960 | +| authors | 52.32 | 52.32 | 52.32 | 969 | +| first_author | 82.97 | 82.97 | 82.97 | 969 | +| keywords | 0 | 0 | 0 | 0 | +| title | 81.51 | 81.1 | 81.3 | 1000 | +| | | | | | +| **all fields (micro avg.)** | **65.82** | **66.11** | **65.97** | 3898 | +| all fields (macro avg.) | 65.78 | 65.95 | 65.86 | 3898 | #### Levenshtein Matching (Minimum Levenshtein distance at 0.8) **Field-level results** -| label | precision | recall | f1 | support | -|-----------------------------|-----------|-----------|----------|---------| -| abstract | 76.67 | 76.67 | 76.67 | 960 | -| authors | 99.48 | 99.48 | 99.48 | 969 | -| first_author | 99.38 | 99.38 | 99.38 | 969 | -| keywords | 0 | 0 | 0 | 0 | -| title | 99.7 | 99 | 99.35 | 1000 | -| | | | | | -| **all fields (micro avg.)** | **93.88** | **93.71** | **93.8** | 3898 | -| all fields (macro avg.) | 93.81 | 93.63 | 93.72 | 3898 | +| label | precision | recall | f1 | support | +|-----------------------------|-----------|-----------|-----------|---------| +| abstract | 70.37 | 71.98 | 71.16 | 960 | +| authors | 81.01 | 81.01 | 81.01 | 969 | +| first_author | 84.42 | 84.42 | 84.42 | 969 | +| keywords | 0 | 0 | 0 | 0 | +| title | 97.39 | 96.9 | 97.14 | 1000 | +| | | | | | +| **all fields (micro avg.)** | **83.35** | **83.71** | **83.53** | 3898 | +| all fields (macro avg.) | 83.3 | 83.58 | 83.43 | 3898 | #### Ratcliff/Obershelp Matching (Minimum Ratcliff/Obershelp similarity at 0.95) @@ -86,28 +86,28 @@ Evaluation on 1000 random PDF files out of 998 PDF (ratio 1.0). | label | precision | recall | f1 | support | |-----------------------------|-----------|-----------|-----------|---------| -| abstract | 66.56 | 66.56 | 66.56 | 960 | -| authors | 99.38 | 99.38 | 99.38 | 969 | -| first_author | 99.28 | 99.28 | 99.28 | 969 | +| abstract | 62.22 | 63.65 | 62.92 | 960 | +| authors | 62.33 | 62.33 | 62.33 | 969 | +| first_author | 82.87 | 82.87 | 82.87 | 969 | | keywords | 0 | 0 | 0 | 0 | -| title | 99.7 | 99 | 99.35 | 1000 | +| title | 92.66 | 92.2 | 92.43 | 1000 | | | | | | | -| **all fields (micro avg.)** | **91.34** | **91.17** | **91.26** | 3898 | -| all fields (macro avg.) | 91.23 | 91.06 | 91.14 | 3898 | +| **all fields (micro avg.)** | **75.1** | **75.42** | **75.26** | 3898 | +| all fields (macro avg.) | 75.02 | 75.26 | 75.14 | 3898 | #### Instance-level results ``` -Total expected instances: 1000 -Total correct instances: 142 (strict) -Total correct instances: 491 (soft) -Total correct instances: 729 (Levenshtein) -Total correct instances: 641 (ObservedRatcliffObershelp) - -Instance-level recall: 14.2 (strict) -Instance-level recall: 49.1 (soft) -Instance-level recall: 72.9 (Levenshtein) -Instance-level recall: 64.1 (RatcliffObershelp) +Total expected instances: 1000 +Total correct instances: 58 (strict) +Total correct instances: 227 (soft) +Total correct instances: 504 (Levenshtein) +Total correct instances: 365 (ObservedRatcliffObershelp) + +Instance-level recall: 5.8 (strict) +Instance-level recall: 22.7 (soft) +Instance-level recall: 50.4 (Levenshtein) +Instance-level recall: 36.5 (RatcliffObershelp) ``` ## Citation metadata @@ -120,17 +120,17 @@ Evaluation on 1000 random PDF files out of 998 PDF (ratio 1.0). | label | precision | recall | f1 | support | |-----------------------------|-----------|-----------|-----------|---------| -| authors | 81.18 | 78.43 | 79.78 | 44770 | -| date | 84.64 | 81.25 | 82.91 | 45457 | -| first_author | 91.49 | 88.36 | 89.9 | 44770 | -| inTitle | 81.69 | 83.58 | 82.62 | 42795 | -| issue | 93.63 | 92.71 | 93.17 | 18983 | -| page | 93.72 | 77.57 | 84.88 | 40844 | -| title | 59.97 | 60.48 | 60.23 | 43101 | -| volume | 95.91 | 96.12 | 96.02 | 40458 | +| authors | 81.1 | 77.96 | 79.49 | 44770 | +| date | 84.27 | 80.33 | 82.25 | 45457 | +| first_author | 91.33 | 87.77 | 89.51 | 44770 | +| inTitle | 81.71 | 83.51 | 82.6 | 42795 | +| issue | 91.98 | 89.6 | 90.77 | 18983 | +| page | 93.59 | 77.77 | 84.95 | 40844 | +| title | 59.92 | 59.92 | 59.92 | 43101 | +| volume | 94.88 | 95.2 | 95.04 | 40458 | | | | | | | -| **all fields (micro avg.)** | **84.25** | **81.46** | **82.83** | 321178 | -| all fields (macro avg.) | 85.28 | 82.31 | 83.69 | 321178 | +| **all fields (micro avg.)** | **83.93** | **80.82** | **82.35** | 321178 | +| all fields (macro avg.) | 84.85 | 81.51 | 83.07 | 321178 | #### Soft Matching (ignoring punctuation, case and space characters mismatches) @@ -138,17 +138,17 @@ Evaluation on 1000 random PDF files out of 998 PDF (ratio 1.0). | label | precision | recall | f1 | support | |-----------------------------|-----------|-----------|-----------|---------| -| authors | 81.5 | 78.74 | 80.09 | 44770 | -| date | 84.64 | 81.25 | 82.91 | 45457 | -| first_author | 91.71 | 88.57 | 90.11 | 44770 | -| inTitle | 85.52 | 87.51 | 86.5 | 42795 | -| issue | 93.63 | 92.71 | 93.17 | 18983 | -| page | 93.72 | 77.57 | 84.88 | 40844 | -| title | 91.98 | 92.75 | 92.37 | 43101 | -| volume | 95.91 | 96.12 | 96.02 | 40458 | +| authors | 81.43 | 78.28 | 79.83 | 44770 | +| date | 84.27 | 80.33 | 82.25 | 45457 | +| first_author | 91.56 | 87.99 | 89.74 | 44770 | +| inTitle | 85.55 | 87.44 | 86.48 | 42795 | +| issue | 91.98 | 89.6 | 90.77 | 18983 | +| page | 93.59 | 77.77 | 84.95 | 40844 | +| title | 91.97 | 91.96 | 91.96 | 43101 | +| volume | 94.88 | 95.2 | 95.04 | 40458 | | | | | | | -| **all fields (micro avg.)** | **89.34** | **86.38** | **87.84** | 321178 | -| all fields (macro avg.) | 89.83 | 86.9 | 88.26 | 321178 | +| **all fields (micro avg.)** | **89.02** | **85.72** | **87.34** | 321178 | +| all fields (macro avg.) | 89.4 | 86.07 | 87.63 | 321178 | #### Levenshtein Matching (Minimum Levenshtein distance at 0.8) @@ -156,88 +156,88 @@ Evaluation on 1000 random PDF files out of 998 PDF (ratio 1.0). | label | precision | recall | f1 | support | |-----------------------------|-----------|-----------|-----------|---------| -| authors | 90.66 | 87.59 | 89.09 | 44770 | -| date | 84.64 | 81.25 | 82.91 | 45457 | -| first_author | 92.25 | 89.09 | 90.64 | 44770 | -| inTitle | 86.46 | 88.48 | 87.46 | 42795 | -| issue | 93.63 | 92.71 | 93.17 | 18983 | -| page | 93.72 | 77.57 | 84.88 | 40844 | -| title | 94.58 | 95.38 | 94.98 | 43101 | -| volume | 95.91 | 96.12 | 96.02 | 40458 | +| authors | 90.55 | 87.04 | 88.76 | 44770 | +| date | 84.27 | 80.33 | 82.25 | 45457 | +| first_author | 92.09 | 88.5 | 90.26 | 44770 | +| inTitle | 86.33 | 88.23 | 87.27 | 42795 | +| issue | 91.98 | 89.6 | 90.77 | 18983 | +| page | 93.59 | 77.77 | 84.95 | 40844 | +| title | 94.58 | 94.58 | 94.58 | 43101 | +| volume | 94.88 | 95.2 | 95.04 | 40458 | | | | | | | -| **all fields (micro avg.)** | **91.19** | **88.17** | **89.65** | 321178 | -| all fields (macro avg.) | 91.48 | 88.52 | 89.89 | 321178 | +| **all fields (micro avg.)** | **90.84** | **87.47** | **89.12** | 321178 | +| all fields (macro avg.) | 91.03 | 87.66 | 89.24 | 321178 | #### Ratcliff/Obershelp Matching (Minimum Ratcliff/Obershelp similarity at 0.95) **Field-level results** -| label | precision | recall | f1 | support | -|-----------------------------|-----------|-----------|----------|---------| -| authors | 84.94 | 82.07 | 83.48 | 44770 | -| date | 84.64 | 81.25 | 82.91 | 45457 | -| first_author | 91.49 | 88.36 | 89.9 | 44770 | -| inTitle | 85.17 | 87.15 | 86.15 | 42795 | -| issue | 93.63 | 92.71 | 93.17 | 18983 | -| page | 93.72 | 77.57 | 84.88 | 40844 | -| title | 93.97 | 94.76 | 94.36 | 43101 | -| volume | 95.91 | 96.12 | 96.02 | 40458 | -| | | | | | -| **all fields (micro avg.)** | **90.02** | **87.04** | **88.5** | 321178 | -| all fields (macro avg.) | 90.43 | 87.5 | 88.86 | 321178 | +| label | precision | recall | f1 | support | +|-----------------------------|-----------|-----------|-----------|---------| +| authors | 84.87 | 81.59 | 83.2 | 44770 | +| date | 84.27 | 80.33 | 82.25 | 45457 | +| first_author | 91.33 | 87.77 | 89.51 | 44770 | +| inTitle | 85.17 | 87.05 | 86.1 | 42795 | +| issue | 91.98 | 89.6 | 90.77 | 18983 | +| page | 93.59 | 77.77 | 84.95 | 40844 | +| title | 93.9 | 93.9 | 93.9 | 43101 | +| volume | 94.88 | 95.2 | 95.04 | 40458 | +| | | | | | +| **all fields (micro avg.)** | **89.68** | **86.35** | **87.99** | 321178 | +| all fields (macro avg.) | 90 | 86.65 | 88.22 | 321178 | #### Instance-level results ``` -Total expected instances: 48449 -Total extracted instances: 48221 -Total correct instances: 13495 (strict) -Total correct instances: 22265 (soft) -Total correct instances: 24914 (Levenshtein) -Total correct instances: 23267 (RatcliffObershelp) +Total expected instances: 48449 +Total extracted instances: 48947 +Total correct instances: 13305 (strict) +Total correct instances: 21985 (soft) +Total correct instances: 24553 (Levenshtein) +Total correct instances: 22959 (RatcliffObershelp) -Instance-level precision: 27.99 (strict) -Instance-level precision: 46.17 (soft) -Instance-level precision: 51.67 (Levenshtein) -Instance-level precision: 48.25 (RatcliffObershelp) +Instance-level precision: 27.18 (strict) +Instance-level precision: 44.92 (soft) +Instance-level precision: 50.16 (Levenshtein) +Instance-level precision: 46.91 (RatcliffObershelp) -Instance-level recall: 27.85 (strict) -Instance-level recall: 45.96 (soft) -Instance-level recall: 51.42 (Levenshtein) -Instance-level recall: 48.02 (RatcliffObershelp) +Instance-level recall: 27.46 (strict) +Instance-level recall: 45.38 (soft) +Instance-level recall: 50.68 (Levenshtein) +Instance-level recall: 47.39 (RatcliffObershelp) -Instance-level f-score: 27.92 (strict) -Instance-level f-score: 46.06 (soft) -Instance-level f-score: 51.54 (Levenshtein) -Instance-level f-score: 48.14 (RatcliffObershelp) +Instance-level f-score: 27.32 (strict) +Instance-level f-score: 45.15 (soft) +Instance-level f-score: 50.42 (Levenshtein) +Instance-level f-score: 47.15 (RatcliffObershelp) -Matching 1 : 35376 +Matching 1 : 34852 -Matching 2 : 1259 +Matching 2 : 1274 -Matching 3 : 3266 +Matching 3 : 3388 -Matching 4 : 1799 +Matching 4 : 2026 -Total matches : 41700 +Total matches : 41540 ``` #### Citation context resolution ``` -Total expected references: 48449 - 48.45 references per article -Total predicted references: 48221 - 48.22 references per article +Total expected references: 48449 - 48.45 references per article +Total predicted references: 48947 - 48.95 references per article -Total expected citation contexts: 69755 - 69.75 citation contexts per article -Total predicted citation contexts: 73164 - 73.16 citation contexts per article +Total expected citation contexts: 69755 - 69.75 citation contexts per article +Total predicted citation contexts: 71896 - 71.9 citation contexts per article -Total correct predicted citation contexts: 56709 - 56.71 citation contexts per article -Total wrong predicted citation contexts: 16455 (wrong callout matching, callout missing in NLM, or matching with a bib. ref. not aligned with a bib.ref. in NLM) +Total correct predicted citation contexts: 55389 - 55.39 citation contexts per article +Total wrong predicted citation contexts: 16507 (wrong callout matching, callout missing in NLM, or matching with a bib. ref. not aligned with a bib.ref. in NLM) -Precision citation contexts: 77.51 -Recall citation contexts: 81.3 -fscore citation contexts: 79.36 +Precision citation contexts: 77.04 +Recall citation contexts: 79.41 +fscore citation contexts: 78.2 ``` ## Fulltext structures @@ -253,47 +253,46 @@ Evaluation on 1000 random PDF files out of 998 PDF (ratio 1.0). **Field-level results** -| label | precision | recall | f1 | support | -|-----------------------------|-----------|-----------|-----------|---------| -| availability_stmt | 54 | 51.99 | 52.98 | 779 | -| figure_title | 0.2 | 0.1 | 0.13 | 8943 | -| funding_stmt | 5.47 | 30.72 | 9.28 | 1507 | -| reference_citation | 87.96 | 94.35 | 91.04 | 69741 | -| reference_figure | 74.18 | 85.72 | 79.53 | 11010 | -| reference_table | 70.28 | 94.3 | 80.54 | 5159 | -| section_title | 72.63 | 66.19 | 69.26 | 17540 | -| table_title | 0 | 0 | 0 | 6092 | -| | | | | | -| **all fields (micro avg.)** | **74.06** | **76.67** | **75.34** | 120771 | -| all fields (macro avg.) | 45.59 | 52.92 | 47.85 | 120771 | +| label | precision | recall | f1 | support | +|-----------------------------|-----------|----------|----------|---------| +| availability_stmt | 46.79 | 44.93 | 45.84 | 779 | +| figure_title | 0.22 | 0.11 | 0.15 | 8943 | +| funding_stmt | 5.77 | 26.74 | 9.49 | 1507 | +| reference_citation | 87.92 | 94.06 | 90.89 | 69741 | +| reference_figure | 74.2 | 85.74 | 79.55 | 11010 | +| reference_table | 70.27 | 94.28 | 80.52 | 5159 | +| section_title | 72.65 | 66.18 | 69.27 | 17540 | +| table_title | 0.12 | 0.02 | 0.03 | 6092 | +| | | | | | +| **all fields (micro avg.)** | **74.82** | **76.4** | **75.6** | 120771 | +| all fields (macro avg.) | 44.74 | 51.51 | 46.97 | 120771 | #### Soft Matching (ignoring punctuation, case and space characters mismatches) **Field-level results** -| label | precision | recall | f1 | support | -|-----------------------------|-----------|----------|-----------|---------| -| availability_stmt | 79.73 | 76.77 | 78.22 | 779 | -| figure_title | 90.96 | 45.79 | 60.91 | 8943 | -| funding_stmt | 6.99 | 39.28 | 11.87 | 1507 | -| reference_citation | 87.96 | 94.36 | 91.05 | 69741 | -| reference_figure | 74.42 | 86 | 79.8 | 11010 | -| reference_table | 70.44 | 94.51 | 80.72 | 5159 | -| section_title | 78.4 | 71.45 | 74.76 | 17540 | -| table_title | 53.33 | 7.5 | 13.15 | 6092 | -| | | | | | -| **all fields (micro avg.)** | **78.73** | **81.5** | **80.09** | 120771 | -| all fields (macro avg.) | 67.78 | 64.46 | 61.31 | 120771 | +| label | precision | recall | f1 | support | +|-----------------------------|-----------|-----------|-----------|---------| +| availability_stmt | 68.58 | 65.85 | 67.19 | 779 | +| figure_title | 90.88 | 45.78 | 60.89 | 8943 | +| funding_stmt | 7.26 | 33.64 | 11.94 | 1507 | +| reference_citation | 87.93 | 94.06 | 90.89 | 69741 | +| reference_figure | 74.44 | 86.02 | 79.81 | 11010 | +| reference_table | 70.43 | 94.5 | 80.71 | 5159 | +| section_title | 78.43 | 71.44 | 74.77 | 17540 | +| table_title | 53.5 | 7.52 | 13.18 | 6092 | +| | | | | | +| **all fields (micro avg.)** | **79.51** | **81.19** | **80.34** | 120771 | +| all fields (macro avg.) | 66.43 | 62.35 | 59.92 | 120771 | **Document-level ratio results** -| label | precision | recall | f1 | support | -|-----------------------------|-----------|-----------|----------|---------| -| availability_stmt | 100 | 96.28 | 98.1 | 779 | -| | | | | | -| **all fields (micro avg.)** | **100** | **96.28** | **98.1** | 779 | -| all fields (macro avg.) | 100 | 96.28 | 98.1 | 779 | - -Evaluation metrics produced in 795.257 seconds +| label | precision | recall | f1 | support | +|-----------------------------|-----------|-----------|-----------|---------| +| availability_stmt | 99.6 | 96.02 | 97.78 | 779 | +| | | | | | +| **all fields (micro avg.)** | **99.6** | **96.02** | **97.78** | 779 | +| all fields (macro avg.) | 99.6 | 96.02 | 97.78 | 779 | +Evaluation metrics produced in 777.894 seconds diff --git a/doc/benchmarks/Benchmarking-pmc.md b/doc/benchmarks/Benchmarking-pmc.md index 4f428bd1bc..cbf044c891 100644 --- a/doc/benchmarks/Benchmarking-pmc.md +++ b/doc/benchmarks/Benchmarking-pmc.md @@ -41,14 +41,14 @@ Evaluation on 1943 random PDF files out of 1941 PDF (ratio 1.0). | label | precision | recall | f1 | support | |-----------------------------|-----------|-----------|-----------|---------| -| abstract | 16.89 | 16.54 | 16.71 | 1911 | -| authors | 92.78 | 92.63 | 92.7 | 1941 | -| first_author | 96.8 | 96.65 | 96.73 | 1941 | -| keywords | 65.58 | 63.91 | 64.73 | 1380 | -| title | 84.46 | 84.2 | 84.33 | 1943 | +| abstract | 16.21 | 15.91 | 16.06 | 1911 | +| authors | 61.02 | 60.74 | 60.88 | 1941 | +| first_author | 88.51 | 88.1 | 88.3 | 1941 | +| keywords | 46.36 | 40.14 | 43.03 | 1380 | +| title | 70.06 | 69.12 | 69.59 | 1943 | | | | | | | -| **all fields (micro avg.)** | **72.08** | **71.39** | **71.73** | 9116 | -| all fields (macro avg.) | 71.3 | 70.79 | 71.04 | 9116 | +| **all fields (micro avg.)** | **57.51** | **55.84** | **56.66** | 9116 | +| all fields (macro avg.) | 56.43 | 54.8 | 55.57 | 9116 | #### Soft Matching (ignoring punctuation, case and space characters mismatches) @@ -56,14 +56,14 @@ Evaluation on 1943 random PDF files out of 1941 PDF (ratio 1.0). | label | precision | recall | f1 | support | |-----------------------------|-----------|-----------|-----------|---------| -| abstract | 63.98 | 62.64 | 63.3 | 1911 | -| authors | 94.74 | 94.59 | 94.66 | 1941 | -| first_author | 97.21 | 97.06 | 97.14 | 1941 | -| keywords | 74.2 | 72.32 | 73.25 | 1380 | -| title | 92 | 91.71 | 91.86 | 1943 | +| abstract | 59.25 | 58.14 | 58.69 | 1911 | +| authors | 61.23 | 60.95 | 61.09 | 1941 | +| first_author | 88.61 | 88.2 | 88.41 | 1941 | +| keywords | 53.72 | 46.52 | 49.86 | 1380 | +| title | 77.99 | 76.94 | 77.46 | 1943 | | | | | | | -| **all fields (micro avg.)** | **85.25** | **84.43** | **84.84** | 9116 | -| all fields (macro avg.) | 84.43 | 83.66 | 84.04 | 9116 | +| **all fields (micro avg.)** | **69.4** | **67.39** | **68.38** | 9116 | +| all fields (macro avg.) | 68.16 | 66.15 | 67.1 | 9116 | #### Levenshtein Matching (Minimum Levenshtein distance at 0.8) @@ -71,14 +71,14 @@ Evaluation on 1943 random PDF files out of 1941 PDF (ratio 1.0). | label | precision | recall | f1 | support | |-----------------------------|-----------|-----------|-----------|---------| -| abstract | 90.86 | 88.96 | 89.9 | 1911 | -| authors | 96.65 | 96.5 | 96.57 | 1941 | -| first_author | 97.47 | 97.32 | 97.4 | 1941 | -| keywords | 84.61 | 82.46 | 83.52 | 1380 | -| title | 98.24 | 97.94 | 98.09 | 1943 | +| abstract | 86.29 | 84.67 | 85.47 | 1911 | +| authors | 72.15 | 71.82 | 71.99 | 1941 | +| first_author | 88.98 | 88.56 | 88.77 | 1941 | +| keywords | 74.9 | 64.86 | 69.51 | 1380 | +| title | 92.12 | 90.89 | 91.5 | 1943 | | | | | | | -| **all fields (micro avg.)** | **94.17** | **93.28** | **93.72** | 9116 | -| all fields (macro avg.) | 93.57 | 92.64 | 93.1 | 9116 | +| **all fields (micro avg.)** | **83.52** | **81.09** | **82.28** | 9116 | +| all fields (macro avg.) | 82.89 | 80.16 | 81.45 | 9116 | #### Ratcliff/Obershelp Matching (Minimum Ratcliff/Obershelp similarity at 0.95) @@ -86,28 +86,28 @@ Evaluation on 1943 random PDF files out of 1941 PDF (ratio 1.0). | label | precision | recall | f1 | support | |-----------------------------|-----------|-----------|-----------|---------| -| abstract | 87.07 | 85.24 | 86.14 | 1911 | -| authors | 95.67 | 95.52 | 95.59 | 1941 | -| first_author | 96.8 | 96.65 | 96.73 | 1941 | -| keywords | 79.93 | 77.9 | 78.9 | 1380 | -| title | 96.18 | 95.88 | 96.03 | 1943 | +| abstract | 82.35 | 80.8 | 81.56 | 1911 | +| authors | 64.34 | 64.04 | 64.19 | 1941 | +| first_author | 88.51 | 88.1 | 88.3 | 1941 | +| keywords | 63.68 | 55.14 | 59.11 | 1380 | +| title | 86.8 | 85.64 | 86.22 | 1943 | | | | | | | -| **all fields (micro avg.)** | **91.89** | **91.02** | **91.45** | 9116 | -| all fields (macro avg.) | 91.13 | 90.24 | 90.68 | 9116 | +| **all fields (micro avg.)** | **78.21** | **75.93** | **77.05** | 9116 | +| all fields (macro avg.) | 77.14 | 74.74 | 75.88 | 9116 | #### Instance-level results ``` -Total expected instances: 1943 -Total correct instances: 216 (strict) -Total correct instances: 906 (soft) -Total correct instances: 1445 (Levenshtein) -Total correct instances: 1297 (ObservedRatcliffObershelp) - -Instance-level recall: 11.12 (strict) -Instance-level recall: 46.63 (soft) -Instance-level recall: 74.37 (Levenshtein) -Instance-level recall: 66.75 (RatcliffObershelp) +Total expected instances: 1943 +Total correct instances: 112 (strict) +Total correct instances: 483 (soft) +Total correct instances: 915 (Levenshtein) +Total correct instances: 734 (ObservedRatcliffObershelp) + +Instance-level recall: 5.76 (strict) +Instance-level recall: 24.86 (soft) +Instance-level recall: 47.09 (Levenshtein) +Instance-level recall: 37.78 (RatcliffObershelp) ``` ## Citation metadata @@ -120,17 +120,17 @@ Evaluation on 1943 random PDF files out of 1941 PDF (ratio 1.0). | label | precision | recall | f1 | support | |-----------------------------|-----------|-----------|-----------|---------| -| authors | 83.11 | 75.94 | 79.36 | 85778 | -| date | 94.69 | 83.83 | 88.93 | 87067 | -| first_author | 89.85 | 82.09 | 85.8 | 85778 | -| inTitle | 73.27 | 71.45 | 72.35 | 81007 | -| issue | 91.43 | 87.44 | 89.39 | 16635 | -| page | 94.68 | 83.31 | 88.63 | 80501 | -| title | 79.78 | 74.95 | 77.29 | 80736 | -| volume | 96.17 | 89.37 | 92.64 | 80067 | +| authors | 82.48 | 74.66 | 78.37 | 85778 | +| date | 94.36 | 82.52 | 88.05 | 87067 | +| first_author | 89.17 | 80.66 | 84.7 | 85778 | +| inTitle | 73.04 | 70.81 | 71.9 | 81007 | +| issue | 89.71 | 84.04 | 86.78 | 16635 | +| page | 94.22 | 83.04 | 88.28 | 80501 | +| title | 79.48 | 74.13 | 76.71 | 80736 | +| volume | 95.28 | 88.56 | 91.8 | 80067 | | | | | | | -| **all fields (micro avg.)** | **87.32** | **80.34** | **83.69** | 597569 | -| all fields (macro avg.) | 87.87 | 81.05 | 84.3 | 597569 | +| **all fields (micro avg.)** | **86.79** | **79.33** | **82.89** | 597569 | +| all fields (macro avg.) | 87.22 | 79.8 | 83.32 | 597569 | #### Soft Matching (ignoring punctuation, case and space characters mismatches) @@ -138,17 +138,17 @@ Evaluation on 1943 random PDF files out of 1941 PDF (ratio 1.0). | label | precision | recall | f1 | support | |-----------------------------|-----------|-----------|-----------|---------| -| authors | 83.58 | 76.37 | 79.81 | 85778 | -| date | 94.69 | 83.83 | 88.93 | 87067 | -| first_author | 90.02 | 82.24 | 85.96 | 85778 | -| inTitle | 85.03 | 82.92 | 83.97 | 81007 | -| issue | 91.43 | 87.44 | 89.39 | 16635 | -| page | 94.68 | 83.31 | 88.63 | 80501 | -| title | 91.55 | 86.01 | 88.69 | 80736 | -| volume | 96.17 | 89.37 | 92.64 | 80067 | +| authors | 82.96 | 75.09 | 78.83 | 85778 | +| date | 94.36 | 82.52 | 88.05 | 87067 | +| first_author | 89.33 | 80.81 | 84.86 | 85778 | +| inTitle | 84.75 | 82.16 | 83.43 | 81007 | +| issue | 89.71 | 84.04 | 86.78 | 16635 | +| page | 94.22 | 83.04 | 88.28 | 80501 | +| title | 91.07 | 84.94 | 87.89 | 80736 | +| volume | 95.28 | 88.56 | 91.8 | 80067 | | | | | | | -| **all fields (micro avg.)** | **90.73** | **83.48** | **86.95** | 597569 | -| all fields (macro avg.) | 90.9 | 83.94 | 87.25 | 597569 | +| **all fields (micro avg.)** | **90.16** | **82.41** | **86.11** | 597569 | +| all fields (macro avg.) | 90.21 | 82.65 | 86.24 | 597569 | #### Levenshtein Matching (Minimum Levenshtein distance at 0.8) @@ -156,17 +156,17 @@ Evaluation on 1943 random PDF files out of 1941 PDF (ratio 1.0). | label | precision | recall | f1 | support | |-----------------------------|-----------|-----------|-----------|---------| -| authors | 89.29 | 81.59 | 85.27 | 85778 | -| date | 94.69 | 83.83 | 88.93 | 87067 | -| first_author | 90.24 | 82.44 | 86.17 | 85778 | -| inTitle | 86.28 | 84.14 | 85.2 | 81007 | -| issue | 91.43 | 87.44 | 89.39 | 16635 | -| page | 94.68 | 83.31 | 88.63 | 80501 | -| title | 93.9 | 88.22 | 90.97 | 80736 | -| volume | 96.17 | 89.37 | 92.64 | 80067 | +| authors | 88.75 | 80.33 | 84.33 | 85778 | +| date | 94.36 | 82.52 | 88.05 | 87067 | +| first_author | 89.55 | 81 | 85.06 | 85778 | +| inTitle | 86.03 | 83.41 | 84.7 | 81007 | +| issue | 89.71 | 84.04 | 86.78 | 16635 | +| page | 94.22 | 83.04 | 88.28 | 80501 | +| title | 93.59 | 87.29 | 90.33 | 80736 | +| volume | 95.28 | 88.56 | 91.8 | 80067 | | | | | | | -| **all fields (micro avg.)** | **92.07** | **84.72** | **88.24** | 597569 | -| all fields (macro avg.) | 92.09 | 85.04 | 88.4 | 597569 | +| **all fields (micro avg.)** | **91.55** | **83.68** | **87.44** | 597569 | +| all fields (macro avg.) | 91.44 | 83.77 | 87.42 | 597569 | #### Ratcliff/Obershelp Matching (Minimum Ratcliff/Obershelp similarity at 0.95) @@ -174,70 +174,70 @@ Evaluation on 1943 random PDF files out of 1941 PDF (ratio 1.0). | label | precision | recall | f1 | support | |-----------------------------|-----------|-----------|-----------|---------| -| authors | 86.05 | 78.63 | 82.18 | 85778 | -| date | 94.69 | 83.83 | 88.93 | 87067 | -| first_author | 89.87 | 82.1 | 85.81 | 85778 | -| inTitle | 83.59 | 81.52 | 82.55 | 81007 | -| issue | 91.43 | 87.44 | 89.39 | 16635 | -| page | 94.68 | 83.31 | 88.63 | 80501 | -| title | 93.5 | 87.84 | 90.58 | 80736 | -| volume | 96.17 | 89.37 | 92.64 | 80067 | +| authors | 85.4 | 77.3 | 81.15 | 85778 | +| date | 94.36 | 82.52 | 88.05 | 87067 | +| first_author | 89.18 | 80.67 | 84.71 | 85778 | +| inTitle | 83.31 | 80.77 | 82.02 | 81007 | +| issue | 89.71 | 84.04 | 86.78 | 16635 | +| page | 94.22 | 83.04 | 88.28 | 80501 | +| title | 93.09 | 86.82 | 89.85 | 80736 | +| volume | 95.28 | 88.56 | 91.8 | 80067 | | | | | | | -| **all fields (micro avg.)** | **91.12** | **83.84** | **87.33** | 597569 | -| all fields (macro avg.) | 91.25 | 84.26 | 87.59 | 597569 | +| **all fields (micro avg.)** | **90.56** | **82.77** | **86.49** | 597569 | +| all fields (macro avg.) | 90.57 | 82.97 | 86.58 | 597569 | #### Instance-level results ``` -Total expected instances: 90125 -Total extracted instances: 85141 -Total correct instances: 38534 (strict) -Total correct instances: 50633 (soft) -Total correct instances: 55471 (Levenshtein) -Total correct instances: 52032 (RatcliffObershelp) +Total expected instances: 90125 +Total extracted instances: 86125 +Total correct instances: 37732 (strict) +Total correct instances: 49502 (soft) +Total correct instances: 54240 (Levenshtein) +Total correct instances: 50880 (RatcliffObershelp) -Instance-level precision: 45.26 (strict) -Instance-level precision: 59.47 (soft) -Instance-level precision: 65.15 (Levenshtein) -Instance-level precision: 61.11 (RatcliffObershelp) +Instance-level precision: 43.81 (strict) +Instance-level precision: 57.48 (soft) +Instance-level precision: 62.98 (Levenshtein) +Instance-level precision: 59.08 (RatcliffObershelp) -Instance-level recall: 42.76 (strict) -Instance-level recall: 56.18 (soft) -Instance-level recall: 61.55 (Levenshtein) -Instance-level recall: 57.73 (RatcliffObershelp) +Instance-level recall: 41.87 (strict) +Instance-level recall: 54.93 (soft) +Instance-level recall: 60.18 (Levenshtein) +Instance-level recall: 56.45 (RatcliffObershelp) -Instance-level f-score: 43.97 (strict) -Instance-level f-score: 57.78 (soft) -Instance-level f-score: 63.3 (Levenshtein) -Instance-level f-score: 59.37 (RatcliffObershelp) +Instance-level f-score: 42.82 (strict) +Instance-level f-score: 56.17 (soft) +Instance-level f-score: 61.55 (Levenshtein) +Instance-level f-score: 57.74 (RatcliffObershelp) -Matching 1 : 67991 +Matching 1 : 66746 -Matching 2 : 4123 +Matching 2 : 4079 -Matching 3 : 1868 +Matching 3 : 2288 -Matching 4 : 661 +Matching 4 : 986 -Total matches : 74643 +Total matches : 74099 ``` #### Citation context resolution ``` -Total expected references: 90125 - 46.38 references per article -Total predicted references: 85141 - 43.82 references per article +Total expected references: 90125 - 46.38 references per article +Total predicted references: 86125 - 44.33 references per article -Total expected citation contexts: 139835 - 71.97 citation contexts per article -Total predicted citation contexts: 114496 - 58.93 citation contexts per article +Total expected citation contexts: 139835 - 71.97 citation contexts per article +Total predicted citation contexts: 112840 - 58.08 citation contexts per article -Total correct predicted citation contexts: 96976 - 49.91 citation contexts per article -Total wrong predicted citation contexts: 17520 (wrong callout matching, callout missing in NLM, or matching with a bib. ref. not aligned with a bib.ref. in NLM) +Total correct predicted citation contexts: 95258 - 49.03 citation contexts per article +Total wrong predicted citation contexts: 17582 (wrong callout matching, callout missing in NLM, or matching with a bib. ref. not aligned with a bib.ref. in NLM) -Precision citation contexts: 84.7 -Recall citation contexts: 69.35 -fscore citation contexts: 76.26 +Precision citation contexts: 84.42 +Recall citation contexts: 68.12 +fscore citation contexts: 75.4 ``` ## Fulltext structures @@ -253,17 +253,17 @@ Evaluation on 1943 random PDF files out of 1941 PDF (ratio 1.0). **Field-level results** -| label | precision | recall | f1 | support | -|-----------------------------|-----------|----------|-----------|---------| -| figure_title | 31.53 | 26.55 | 28.82 | 7281 | -| reference_citation | 58.14 | 58.76 | 58.45 | 134196 | -| reference_figure | 60.59 | 68.27 | 64.2 | 19330 | -| reference_table | 82.87 | 89.52 | 86.06 | 7327 | -| section_title | 73.58 | 67.75 | 70.55 | 27619 | -| table_title | 67.76 | 49.58 | 57.26 | 3971 | -| | | | | | -| **all fields (micro avg.)** | **60.68** | **60.7** | **60.69** | 199724 | -| all fields (macro avg.) | 62.41 | 60.07 | 60.89 | 199724 | +| label | precision | recall | f1 | support | +|-----------------------------|-----------|-----------|-----------|---------| +| figure_title | 31.63 | 26.59 | 28.89 | 7281 | +| reference_citation | 58.1 | 58.67 | 58.38 | 134196 | +| reference_figure | 60.63 | 68.29 | 64.23 | 19330 | +| reference_table | 82.88 | 89.57 | 86.09 | 7327 | +| section_title | 73.73 | 67.79 | 70.64 | 27619 | +| table_title | 67.71 | 49.58 | 57.25 | 3971 | +| | | | | | +| **all fields (micro avg.)** | **60.67** | **60.65** | **60.66** | 199724 | +| all fields (macro avg.) | 62.45 | 60.08 | 60.91 | 199724 | #### Soft Matching (ignoring punctuation, case and space characters mismatches) @@ -271,15 +271,15 @@ Evaluation on 1943 random PDF files out of 1941 PDF (ratio 1.0). | label | precision | recall | f1 | support | |-----------------------------|-----------|-----------|-----------|---------| -| figure_title | 79.55 | 66.98 | 72.73 | 7281 | -| reference_citation | 62.42 | 63.09 | 62.75 | 134196 | -| reference_figure | 61.09 | 68.84 | 64.73 | 19330 | -| reference_table | 83.04 | 89.71 | 86.25 | 7327 | -| section_title | 79.09 | 72.82 | 75.83 | 27619 | -| table_title | 94.22 | 68.95 | 79.63 | 3971 | +| figure_title | 79.43 | 66.78 | 72.56 | 7281 | +| reference_citation | 62.35 | 62.96 | 62.65 | 134196 | +| reference_figure | 61.12 | 68.85 | 64.76 | 19330 | +| reference_table | 83.05 | 89.76 | 86.28 | 7327 | +| section_title | 79.24 | 72.85 | 75.91 | 27619 | +| table_title | 94.19 | 68.98 | 79.63 | 3971 | | | | | | | -| **all fields (micro avg.)** | **66.2** | **66.22** | **66.21** | 199724 | -| all fields (macro avg.) | 76.57 | 71.73 | 73.65 | 199724 | +| **all fields (micro avg.)** | **66.17** | **66.14** | **66.16** | 199724 | +| all fields (macro avg.) | 76.56 | 71.7 | 73.63 | 199724 | **Document-level ratio results** @@ -289,4 +289,4 @@ Evaluation on 1943 random PDF files out of 1941 PDF (ratio 1.0). | **all fields (micro avg.)** | **0** | **0** | **0** | 0 | | all fields (macro avg.) | 0 | 0 | 0 | 0 | -Evaluation metrics produced in 1311.519 seconds +Evaluation metrics produced in 1263.288 seconds diff --git a/grobid-core/src/main/java/org/grobid/core/engines/CitationParser.java b/grobid-core/src/main/java/org/grobid/core/engines/CitationParser.java index 24f5cbcf12..8b606efa68 100755 --- a/grobid-core/src/main/java/org/grobid/core/engines/CitationParser.java +++ b/grobid-core/src/main/java/org/grobid/core/engines/CitationParser.java @@ -59,33 +59,34 @@ public CitationParser(EngineParsers parsers) { /** * Process one single raw reference string - */ + */ public BiblioItem processingString(String input, int consolidate) { List inputs = new ArrayList<>(); - input = TextUtilities.removeLeadingAndTrailingChars(input, "[({.,])}: \n"," \n"); + input = TextUtilities.removeLeadingAndTrailingChars(input, "[({.,])}: \n", " \n"); inputs.add(input); List result = processingStringMultiple(inputs, consolidate); - if (result != null && result.size()>0) + if (result != null && result.size() > 0) return result.get(0); else return null; } /** - * Process a list of raw reference strings by taking advantage of batch processing + * Process a list of raw reference strings by taking advantage of batch + * processing * when a DeLFT deep learning model is used - */ + */ public List processingStringMultiple(List inputs, int consolidate) { if (inputs == null || inputs.size() == 0) return null; List> tokenList = new ArrayList<>(); - for(String input : inputs) { - if (StringUtils.isBlank(input)) + for (String input : inputs) { + if (StringUtils.isBlank(input)) tokenList.add(new ArrayList()); else { // some cleaning input = UnicodeUtil.normaliseText(input); - input = TextUtilities.removeLeadingAndTrailingChars(input, "[({.,])}: \n"," \n"); + input = TextUtilities.removeLeadingAndTrailingChars(input, "[({.,])}: \n", " \n"); List tokens = analyzer.tokenizeWithLayoutToken(input); tokens = analyzer.retokenizeSubdigitsFromLayoutToken(tokens); tokenList.add(tokens); @@ -99,7 +100,7 @@ public List processingStringMultiple(List inputs, int consol for (BiblioItem result : results) { if (result != null) { String localInput = inputs.get(i); - localInput = TextUtilities.removeLeadingAndTrailingChars(localInput, "[({.,])}: \n"," \n"); + localInput = TextUtilities.removeLeadingAndTrailingChars(localInput, "[({.,])}: \n", " \n"); result.setReference(localInput); } i++; @@ -110,21 +111,22 @@ public List processingStringMultiple(List inputs, int consol /** * Process one single raw reference string tokenized as layout objects - */ + */ public BiblioItem processingLayoutToken(List tokens, int consolidate) { List> tokenList = new ArrayList<>(); tokenList.add(tokens); List result = processingLayoutTokenMultiple(tokenList, consolidate); - if (result != null && result.size()>0) + if (result != null && result.size() > 0) return result.get(0); else return null; } /** - * Process a list of raw reference string, each one tokenized as layout objects, and taking advantage + * Process a list of raw reference string, each one tokenized as layout objects, + * and taking advantage * of batch processing when a DeLFT deep learning model is used - */ + */ public List processingLayoutTokenMultiple(List> tokenList, int consolidate) { if (tokenList == null || tokenList.size() == 0) return null; @@ -132,7 +134,7 @@ public List processingLayoutTokenMultiple(List> to StringBuilder featuredInput = new StringBuilder(); int p = 0; - for(List tokens : tokenList) { + for (List tokens : tokenList) { tokenList.set(p, analyzer.retokenizeSubdigitsFromLayoutToken(tokens)); p++; } @@ -151,9 +153,9 @@ public List processingLayoutTokenMultiple(List> to List urlPositions = lexicon.tokenPositionsUrlPattern(tokens); try { - String featuredBlock = FeaturesVectorCitation.addFeaturesCitation(tokens, null, journalsPositions, - abbrevJournalsPositions, conferencesPositions, publishersPositions, locationsPositions, - collaborationsPositions, identifiersPositions, urlPositions); + String featuredBlock = FeaturesVectorCitation.addFeaturesCitation(tokens, null, journalsPositions, + abbrevJournalsPositions, conferencesPositions, publishersPositions, locationsPositions, + collaborationsPositions, identifiersPositions, urlPositions); featuredInput.append(featuredBlock); featuredInput.append("\n\n"); @@ -161,8 +163,8 @@ public List processingLayoutTokenMultiple(List> to LOGGER.error("An exception occured while adding features for processing a citation.", e); } } - - if (featuredInput.toString().length() == 0) + + if (featuredInput.toString().length() == 0) return null; String allRes = null; @@ -176,23 +178,39 @@ public List processingLayoutTokenMultiple(List> to if (allRes == null || allRes.length() == 0) return null; - String[] resBlocks = allRes.split("\n\n"); + // Use regex to split on one or more consecutive empty lines, and filter empty + // blocks + String[] resBlocksRaw = allRes.split("\n\n+"); + List resBlocksList = new ArrayList<>(); + for (String block : resBlocksRaw) { + String trimmed = block.trim(); + if (!trimmed.isEmpty()) { + resBlocksList.add(trimmed); + } + } + String[] resBlocks = resBlocksList.toArray(new String[0]); int i = 0; for (List tokens : tokenList) { if (CollectionUtils.isEmpty(tokens)) results.add(null); else { + if (i >= resBlocks.length) { + LOGGER.warn("Mismatch between tokenList size and result blocks: expected more blocks"); + results.add(null); + continue; + } String res = resBlocks[i]; i++; BiblioItem resCitation = resultExtractionLayoutTokens(res, true, tokens); - + // post-processing (additional field parsing and cleaning) if (resCitation != null) { BiblioItem.cleanTitles(resCitation); resCitation.setOriginalAuthors(resCitation.getAuthors()); try { - resCitation.setFullAuthors(parsers.getAuthorParser().processingCitation(resCitation.getAuthors())); + resCitation + .setFullAuthors(parsers.getAuthorParser().processingCitation(resCitation.getAuthors())); } catch (Exception e) { LOGGER.error("An exception occured when processing author names of a citation.", e); } @@ -230,7 +248,8 @@ public List processingLayoutTokenMultiple(List> to // editors (they are human persons in theory) resCitation.setOriginalEditors(resCitation.getEditors()); try { - resCitation.setFullEditors(parsers.getAuthorParser().processingCitation(resCitation.getEditors())); + resCitation + .setFullEditors(parsers.getAuthorParser().processingCitation(resCitation.getEditors())); } catch (Exception e) { LOGGER.error("An exception occured when processing editor names of a citation.", e); } @@ -244,7 +263,8 @@ public List processingLayoutTokenMultiple(List> to return results; } - public List processingReferenceSection(String referenceTextBlock, ReferenceSegmenter referenceSegmenter) { + public List processingReferenceSection(String referenceTextBlock, + ReferenceSegmenter referenceSegmenter) { List segm = referenceSegmenter.extract(referenceTextBlock); List results = new ArrayList<>(); @@ -255,7 +275,7 @@ public List processingReferenceSection(String referenceTextBlock, Re if (ref.getTokens() == null || ref.getTokens().size() == 0) continue; List localTokens = ref.getTokens(); - localTokens = TextUtilities.removeLeadingAndTrailingCharsLayoutTokens(localTokens, "[({.,])}: \n"," \n"); + localTokens = TextUtilities.removeLeadingAndTrailingCharsLayoutTokens(localTokens, "[({.,])}: \n", " \n"); allRefBlocks.add(localTokens); } @@ -269,13 +289,13 @@ public List processingReferenceSection(String referenceTextBlock, Re if ((bib != null) && !bib.rejectAsReference()) { BibDataSet bds = new BibDataSet(); String localLabel = ref.getLabel(); - if (localLabel != null && localLabel.length()>0) { + if (localLabel != null && localLabel.length() > 0) { // cleaning the label for matching localLabel = TextUtilities.removeLeadingAndTrailingChars(localLabel, "([{<,. \n", ")}]>,.: \n"); } String localRef = ref.getReferenceText(); - localRef = TextUtilities.removeLeadingAndTrailingChars(localRef, "[({.,])}: \n"," \n"); + localRef = TextUtilities.removeLeadingAndTrailingChars(localRef, "[({.,])}: \n", " \n"); bds.setRefSymbol(localLabel); bib.setReference(localRef); bds.setResBib(bib); @@ -287,7 +307,8 @@ public List processingReferenceSection(String referenceTextBlock, Re return results; } - public List processingReferenceSection(Document doc, ReferenceSegmenter referenceSegmenter, int consolidate) { + public List processingReferenceSection(Document doc, ReferenceSegmenter referenceSegmenter, + int consolidate) { List results = new ArrayList<>(); String referencesStr = doc.getDocumentPartText(SegmentationLabels.REFERENCES); @@ -308,71 +329,75 @@ public List processingReferenceSection(Document doc, ReferenceSegmen cntManager.i(CitationParserCounters.SEGMENTED_REFERENCES, references.size()); } - // consolidation: if selected, it is NOT done individually for each citation but + // consolidation: if selected, it is NOT done individually for each citation but // in a second stage for all citations if (references != null) { - /*List refTexts = new ArrayList<>(); - for (LabeledReferenceResult ref : references) { - // paranoiac check - if (ref == null) - continue; - - String localRef = ref.getReferenceText(); - localRef = TextUtilities.removeLeadingAndTrailingChars(localRef, "[({.,])}: \n"," \n"); - refTexts.add(localRef); - } - List bibList = processingStringMultiple(refTexts, 0);*/ + /* + * List refTexts = new ArrayList<>(); + * for (LabeledReferenceResult ref : references) { + * // paranoiac check + * if (ref == null) + * continue; + * + * String localRef = ref.getReferenceText(); + * localRef = TextUtilities.removeLeadingAndTrailingChars(localRef, + * "[({.,])}: \n"," \n"); + * refTexts.add(localRef); + * } + * List bibList = processingStringMultiple(refTexts, 0); + */ List> allRefBlocks = new ArrayList<>(); for (LabeledReferenceResult ref : references) { // paranoiac check - if (ref == null) + if (ref == null) continue; List localTokens = ref.getTokens(); - localTokens = TextUtilities.removeLeadingAndTrailingCharsLayoutTokens(localTokens, "[({.,])}: \n"," \n"); + localTokens = TextUtilities.removeLeadingAndTrailingCharsLayoutTokens(localTokens, "[({.,])}: \n", + " \n"); allRefBlocks.add(localTokens); } List bibList = processingLayoutTokenMultiple(allRefBlocks, 0); - if (bibList != null && bibList.size()>0) { + if (bibList != null && bibList.size() > 0) { int i = 0; for (LabeledReferenceResult ref : references) { // paranoiac check - if (ref == null) + if (ref == null) continue; - //BiblioItem bib = processingString(ref.getReferenceText(), 0); + // BiblioItem bib = processingString(ref.getReferenceText(), 0); BiblioItem bib = bibList.get(i); i++; - if (bib == null) + if (bib == null) continue; // check if we have an interesting url annotation over this bib. ref. List refTokens = ref.getTokens(); if ((refTokens != null) && (refTokens.size() > 0)) { List localPages = new ArrayList(); - for(LayoutToken token : refTokens) { + for (LayoutToken token : refTokens) { if (!localPages.contains(token.getPage())) { localPages.add(token.getPage()); } } - for(PDFAnnotation annotation : doc.getPDFAnnotations()) { - if (annotation.getType() != Type.URI) + for (PDFAnnotation annotation : doc.getPDFAnnotations()) { + if (annotation.getType() != Type.URI) continue; if (!localPages.contains(annotation.getPageNumber())) continue; - for(LayoutToken token : refTokens) { + for (LayoutToken token : refTokens) { if (annotation.cover(token)) { // annotation covers tokens, let's look at the href String uri = annotation.getDestination(); // is it a DOI? Matcher doiMatcher = TextUtilities.DOIPattern.matcher(uri); - if (doiMatcher.find()) { - // the BiblioItem setter will take care of the prefix and doi cleaninng + if (doiMatcher.find()) { + // the BiblioItem setter will take care of the prefix and doi cleaninng bib.setDOI(uri); } - // TBD: is it something else? + // TBD: is it something else? } } } @@ -381,13 +406,14 @@ public List processingReferenceSection(Document doc, ReferenceSegmen if (!bib.rejectAsReference()) { BibDataSet bds = new BibDataSet(); String localLabel = ref.getLabel(); - if (localLabel != null && localLabel.length()>0) { + if (localLabel != null && localLabel.length() > 0) { // cleaning the label for matching - localLabel = TextUtilities.removeLeadingAndTrailingChars(localLabel, "([{<,. \n", ")}]>,.: \n"); + localLabel = TextUtilities.removeLeadingAndTrailingChars(localLabel, "([{<,. \n", + ")}]>,.: \n"); } String localRef = ref.getReferenceText(); - localRef = TextUtilities.removeLeadingAndTrailingChars(localRef, "[({.,])}: \n"," \n"); + localRef = TextUtilities.removeLeadingAndTrailingChars(localRef, "[({.,])}: \n", " \n"); bds.setRefSymbol(localLabel); bds.setResBib(bib); @@ -404,16 +430,16 @@ public List processingReferenceSection(Document doc, ReferenceSegmen if (consolidate != 0) { Consolidation consolidator = Consolidation.getInstance(); if (consolidator.getCntManager() == null) - consolidator.setCntManager(cntManager); - Map resConsolidation = null; + consolidator.setCntManager(cntManager); + Map resConsolidation = null; try { resConsolidation = consolidator.consolidate(results); - } catch(Exception e) { + } catch (Exception e) { throw new GrobidException( - "An exception occured while running consolidation on bibliographical references.", e); - } + "An exception occured while running consolidation on bibliographical references.", e); + } if (resConsolidation != null) { - for(int i=0; i processingReferenceSection(File input, - ReferenceSegmenter referenceSegmenter, - int consolidate) { + ReferenceSegmenter referenceSegmenter, + int consolidate) { DocumentSource documentSource = DocumentSource.fromPdf(input); return processingReferenceSection(documentSource, referenceSegmenter, consolidate); } public List processingReferenceSection(File input, - String md5Str, - ReferenceSegmenter referenceSegmenter, - int consolidate) { + String md5Str, + ReferenceSegmenter referenceSegmenter, + int consolidate) { DocumentSource documentSource = DocumentSource.fromPdf(input); documentSource.setMD5(md5Str); return processingReferenceSection(documentSource, referenceSegmenter, consolidate); } public List processingReferenceSection(DocumentSource documentSource, - ReferenceSegmenter referenceSegmenter, - int consolidate) { + ReferenceSegmenter referenceSegmenter, + int consolidate) { List results; try { Document doc = parsers.getSegmentationParser().processing(documentSource, @@ -466,7 +492,6 @@ public List processingReferenceSection(DocumentSource documentSource return results; } - /** * Extract results from a labeled sequence. * @@ -476,8 +501,8 @@ public List processingReferenceSection(DocumentSource documentSource * @return biblio item */ public BiblioItem resultExtractionLayoutTokens(String result, - boolean volumePostProcess, - List tokenizations) { + boolean volumePostProcess, + List tokenizations) { BiblioItem biblio = new BiblioItem(); TaggingLabel lastClusterLabel = null; @@ -494,10 +519,12 @@ public BiblioItem resultExtractionLayoutTokens(String result, TaggingLabel clusterLabel = cluster.getTaggingLabel(); Engine.getCntManager().i(clusterLabel); - //String clusterContent = LayoutTokensUtil.normalizeText(LayoutTokensUtil.toText(cluster.concatTokens())); - //String clusterContent = LayoutTokensUtil.toText(cluster.concatTokens()); + // String clusterContent = + // LayoutTokensUtil.normalizeText(LayoutTokensUtil.toText(cluster.concatTokens())); + // String clusterContent = LayoutTokensUtil.toText(cluster.concatTokens()); String clusterContent = LayoutTokensUtil.normalizeDehyphenizeText(cluster.concatTokens()); - //String clusterNonDehypenizedContent = LayoutTokensUtil.toText(cluster.concatTokens()); + // String clusterNonDehypenizedContent = + // LayoutTokensUtil.toText(cluster.concatTokens()); if (clusterLabel.equals(TaggingLabels.CITATION_TITLE)) { if (biblio.getTitle() == null) biblio.setTitle(clusterContent); @@ -555,7 +582,7 @@ else if (biblio.getSerieTitle().length() >= clusterContent.length()) { } else if (clusterLabel.equals(TaggingLabels.CITATION_JOURNAL)) { if (biblio.getJournal() == null) { biblio.setJournal(clusterContent); - }else if (biblio.getJournal().length() >= clusterContent.length()) { + } else if (biblio.getJournal().length() >= clusterContent.length()) { biblio.setNoteOrConcatenateIfNotEmpty(clusterContent); } else { biblio.setNoteOrConcatenateIfNotEmpty(biblio.getJournal()); @@ -577,7 +604,7 @@ else if (biblio.getSerieTitle().length() >= clusterContent.length()) { } else { biblio.setInstitution(clusterContent); } - } else if (clusterLabel.equals(TaggingLabels.CITATION_NOTE)) { + } else if (clusterLabel.equals(TaggingLabels.CITATION_NOTE)) { biblio.setNoteOrConcatenateIfNotEmpty(clusterContent); } else if (clusterLabel.equals(TaggingLabels.CITATION_PUBNUM)) { String clusterNonDehypenizedContent = LayoutTokensUtil.toText(cluster.concatTokens()); @@ -587,7 +614,7 @@ else if (biblio.getSerieTitle().length() >= clusterContent.length()) { String clusterNonDehypenizedContent = LayoutTokensUtil.toText(cluster.concatTokens()); biblio.setWeb(clusterNonDehypenizedContent); } - } + } return biblio; } @@ -601,21 +628,21 @@ else if (biblio.getSerieTitle().length() >= clusterContent.length()) { */ public BiblioItem consolidateCitation(BiblioItem resCitation, String rawCitation, int consolidate) { if (consolidate == 0) { - // no consolidation + // no consolidation return resCitation; } Consolidation consolidator = null; try { consolidator = Consolidation.getInstance(); if (consolidator.getCntManager() == null) - consolidator.setCntManager(cntManager); + consolidator.setCntManager(cntManager); List biblios = new ArrayList(); BibDataSet theBib = new BibDataSet(); theBib.setResBib(resCitation); biblios.add(theBib); - Map bibis = consolidator.consolidate(biblios); + Map bibis = consolidator.consolidate(biblios); - //BiblioItem bibo = consolidator.consolidate(resCitation, rawCitation); + // BiblioItem bibo = consolidator.consolidate(resCitation, rawCitation); BiblioItem bibo = bibis.get(0); if (bibo != null) { if (consolidate == 1) @@ -627,7 +654,7 @@ else if (consolidate == 2) LOGGER.error("An exception occurred while running bibliographical data consolidation.", e); throw new GrobidException( "An exception occurred while running bibliographical data consolidation.", e); - } + } return resCitation; } @@ -675,12 +702,11 @@ public StringBuilder trainingExtraction(List inputs) { urlPositions = lexicon.tokenPositionsUrlPattern(tokenizations); String ress = FeaturesVectorCitation.addFeaturesCitation(tokenizations, - null, journalsPositions, abbrevJournalsPositions, - conferencesPositions, publishersPositions, locationsPositions, + null, journalsPositions, abbrevJournalsPositions, + conferencesPositions, publishersPositions, locationsPositions, collaborationsPositions, identifiersPositions, urlPositions); String res = label(ress); - String lastTag = null; String lastTag0; String currentTag0 = null; @@ -697,7 +723,7 @@ public StringBuilder trainingExtraction(List inputs) { if (tok.length() == 0) { // new citation - //buffer.append("/t\n"); + // buffer.append("/t\n"); start = true; continue; } @@ -710,7 +736,7 @@ public StringBuilder trainingExtraction(List inputs) { String s = stt.nextToken().trim(); if (i == 0) { s2 = TextUtilities.HTMLEncode(s); - //s2 = s; + // s2 = s; boolean strop = false; while ((!strop) && (p < tokenizations.size())) { @@ -725,7 +751,7 @@ public StringBuilder trainingExtraction(List inputs) { } } else if (i == ll - 1) { s1 = s; - } + } i++; } @@ -750,7 +776,7 @@ public StringBuilder trainingExtraction(List inputs) { } } - //tagClosed = lastTag0 != null && + // tagClosed = lastTag0 != null && if ((lastTag0 != null) && (currentTag0 != null)) testClosingTag(buffer, currentTag0, lastTag0); @@ -760,52 +786,53 @@ public StringBuilder trainingExtraction(List inputs) { } if (output == null) { output = writeField(s1, lastTag0, s2, "", "", addSpace, 0); - } + } if (output == null) { output = writeField(s1, lastTag0, s2, "", "", addSpace, 0); - } + } if (output == null) { output = writeField(s1, lastTag0, s2, "<series>", "<title level=\"s\">", addSpace, 0); - } + } if (output == null) { output = writeField(s1, lastTag0, s2, "<booktitle>", "<title level=\"m\">", addSpace, 0); - } + } if (output == null) { output = writeField(s1, lastTag0, s2, "<date>", "<date>", addSpace, 0); - } + } if (output == null) { output = writeField(s1, lastTag0, s2, "<volume>", "<biblScope unit=\"volume\">", addSpace, 0); - } + } if (output == null) { output = writeField(s1, lastTag0, s2, "<publisher>", "<publisher>", addSpace, 0); - } + } if (output == null) { output = writeField(s1, lastTag0, s2, "<location>", "<pubPlace>", addSpace, 0); - } + } if (output == null) { output = writeField(s1, lastTag0, s2, "<editor>", "<editor>", addSpace, 0); - } + } if (output == null) { output = writeField(s1, lastTag0, s2, "<pages>", "<biblScope unit=\"page\">", addSpace, 0); - } + } if (output == null) { output = writeField(s1, lastTag0, s2, "<tech>", "<note type=\"report\">", addSpace, 0); - } + } if (output == null) { output = writeField(s1, lastTag0, s2, "<issue>", "<biblScope unit=\"issue\">", addSpace, 0); - } + } if (output == null) { output = writeField(s1, lastTag0, s2, "<web>", "<ptr type=\"web\">", addSpace, 0); - } + } if (output == null) { output = writeField(s1, lastTag0, s2, "<note>", "<note>", addSpace, 0); - } + } if (output == null) { output = writeField(s1, lastTag0, s2, "<institution>", "<orgName>", addSpace, 0); - } + } if (output == null) { - output = writeField(s1, lastTag0, s2, "<collaboration>", "<orgName type=\"collaboration\">", addSpace, 0); - } + output = writeField(s1, lastTag0, s2, "<collaboration>", "<orgName type=\"collaboration\">", + addSpace, 0); + } if (output == null) { String localTag = null; String cleanS2 = StringUtils.normalizeSpace(s2); @@ -820,16 +847,16 @@ public StringBuilder trainingExtraction(List<String> inputs) { if (arxivMatcher.find()) localTag = "<idno type=\"arXiv\">"; } - + if (localTag == null) { Matcher pmidMatcher = TextUtilities.pmidPattern.matcher(cleanS2); - if (pmidMatcher.find()) + if (pmidMatcher.find()) localTag = "<idno type=\"PMID\">"; } if (localTag == null) { Matcher pmcidMatcher = TextUtilities.pmcidPattern.matcher(cleanS2); - if (pmcidMatcher.find()) + if (pmcidMatcher.find()) localTag = "<idno type=\"PMC\">"; } @@ -851,7 +878,7 @@ public StringBuilder trainingExtraction(List<String> inputs) { localTag = "<idno>"; output = writeField(s1, lastTag0, s2, "<pubnum>", localTag, addSpace, 0); - } + } if (output != null) { buffer.append(output); lastTag = s1; @@ -871,7 +898,7 @@ public StringBuilder trainingExtraction(List<String> inputs) { buffer.append("</bibl>\n"); } } - + } catch (Exception e) { throw new GrobidException("An exception occured while running Grobid.", e); } @@ -879,7 +906,7 @@ public StringBuilder trainingExtraction(List<String> inputs) { } private String writeField(String s1, String lastTag0, String s2, - String field, String outField, boolean addSpace, int nbIndent) { + String field, String outField, boolean addSpace, int nbIndent) { String result = null; if ((s1.equals(field)) || (s1.equals("I-" + field))) { if (s1.equals(lastTag0) || s1.equals("I-" + lastTag0)) { @@ -889,9 +916,11 @@ private String writeField(String s1, String lastTag0, String s2, result = s2; } else { result = ""; - /*for (int i = 0; i < nbIndent; i++) { - result += "\t"; - }*/ + /* + * for (int i = 0; i < nbIndent; i++) { + * result += "\t"; + * } + */ if (addSpace) { result += " " + outField + s2; } else { @@ -902,7 +931,8 @@ private String writeField(String s1, String lastTag0, String s2, return result; } - private boolean writeField2(StringBuilder buffer, String s1, String lastTag0, String s2, String field, String outField, boolean addSpace) { + private boolean writeField2(StringBuilder buffer, String s1, String lastTag0, String s2, String field, + String outField, boolean addSpace) { boolean result = false; if ((s1.equals(field)) || (s1.equals("I-" + field))) { result = true; @@ -922,7 +952,7 @@ private boolean writeField2(StringBuilder buffer, String s1, String lastTag0, St } private boolean testClosingTag(StringBuilder buffer, String currentTag0, - String lastTag0) { + String lastTag0) { boolean res = false; if (!currentTag0.equals(lastTag0)) { res = true; diff --git a/grobid-core/src/main/java/org/grobid/core/engines/HeaderParser.java b/grobid-core/src/main/java/org/grobid/core/engines/HeaderParser.java index 14b76a04a3..9f14077598 100755 --- a/grobid-core/src/main/java/org/grobid/core/engines/HeaderParser.java +++ b/grobid-core/src/main/java/org/grobid/core/engines/HeaderParser.java @@ -340,7 +340,10 @@ public String processingHeaderSection( // copyrights/license identification if (StringUtils.isNotBlank(resHeader.getCopyright())) { - if (GrobidProperties.getGrobidEngineName("copyright").equals("delft")) { + if ( + GrobidProperties.getGrobidEngineName("copyright").equals("delft") + || GrobidProperties.getGrobidEngineName("copyright").equals("onnx") + ){ CopyrightsLicense copyrightsLicense = LicenseClassifier.getInstance().classify(resHeader.getCopyright()); if (copyrightsLicense != null) resHeader.setCopyrightsLicense(copyrightsLicense); @@ -579,35 +582,10 @@ else if (previousLineStartX - lineStartX > characterWidth) continue; } - features = new FeaturesVectorHeader(); - features.token = token; - features.string = text; + features = FeaturesVectorHeader.fromLayoutToken(token); if (newline) features.lineStatus = "LINESTART"; - - Matcher m0 = featureFactory.isPunct.matcher(text); - if (m0.find()) { - features.punctType = "PUNCT"; - } - if (text.equals("(") || text.equals("[")) { - features.punctType = "OPENBRACKET"; - - } else if (text.equals(")") || text.equals("]")) { - features.punctType = "ENDBRACKET"; - - } else if (text.equals(".")) { - features.punctType = "DOT"; - - } else if (text.equals(",")) { - features.punctType = "COMMA"; - - } else if (text.equals("-")) { - features.punctType = "HYPHEN"; - - } else if (text.equals("\"") || text.equals("\'") || text.equals("`")) { - features.punctType = "QUOTE"; - } if (n == startIndex) { // beginning of block @@ -677,43 +655,6 @@ else if (features.blockStatus == null) features.alignmentStatus = "ALIGNEDLEFT"; } - if (text.length() == 1) { - features.singleChar = true; - } - - if (Character.isUpperCase(text.charAt(0))) { - features.capitalisation = "INITCAP"; - } - - if (featureFactory.test_all_capital(text)) { - features.capitalisation = "ALLCAP"; - } - - if (featureFactory.test_digit(text)) { - features.digit = "CONTAINSDIGITS"; - } - - Matcher m = featureFactory.isDigit.matcher(text); - if (m.find()) { - features.digit = "ALLDIGIT"; - } - - if (featureFactory.test_common(text)) { - features.commonName = true; - } - - if (featureFactory.test_names(text)) { - features.properName = true; - } - - if (featureFactory.test_month(text)) { - features.month = true; - } - - Matcher m2 = featureFactory.year.matcher(text); - if (m2.find()) { - features.year = true; - } // check token offsets for email and http address, or known location if (locationPositions != null) { @@ -775,21 +716,6 @@ else if (features.blockStatus == null) /*if (token.isSuperscript()) features.superscript = true;*/ - if (token.isBold()) - features.bold = true; - - if (token.isItalic()) - features.italic = true; - - if (features.capitalisation == null) - features.capitalisation = "NOCAPS"; - - if (features.digit == null) - features.digit = "NODIGIT"; - - if (features.punctType == null) - features.punctType = "NOPUNCT"; - /*if (spacingPreviousBlock != 0.0) { features.spacingWithPreviousBlock = featureFactory .linearScaling(spacingPreviousBlock-doc.getMinBlockSpacing(), doc.getMaxBlockSpacing()-doc.getMinBlockSpacing(), NBBINS_SPACE); diff --git a/grobid-core/src/main/java/org/grobid/core/engines/LicenseClassifier.java b/grobid-core/src/main/java/org/grobid/core/engines/LicenseClassifier.java index 0672d994e8..0f46baf38a 100644 --- a/grobid-core/src/main/java/org/grobid/core/engines/LicenseClassifier.java +++ b/grobid-core/src/main/java/org/grobid/core/engines/LicenseClassifier.java @@ -8,8 +8,8 @@ import org.grobid.core.data.CopyrightsLicense; import org.grobid.core.data.CopyrightsLicense.CopyrightsOwner; import org.grobid.core.data.CopyrightsLicense.License; -import org.grobid.core.utilities.GrobidProperties; -import org.grobid.core.jni.DeLFTClassifierModel; +import org.grobid.core.engines.tagging.ClassifierFactory; +import org.grobid.core.engines.tagging.GenericClassifier; import com.fasterxml.jackson.core.*; import com.fasterxml.jackson.databind.*; @@ -22,11 +22,11 @@ public class LicenseClassifier { private static final Logger LOGGER = LoggerFactory.getLogger(LicenseClassifier.class); // multi-class/multi-label classifier - private DeLFTClassifierModel classifierCopyrightsOwner = null; - private DeLFTClassifierModel classifierLicense = null; + private GenericClassifier classifierCopyrightsOwner = null; + private GenericClassifier classifierLicense = null; // binary classifiers to be added if used - private Boolean useBinary = false; + private Boolean useBinary = false; private JsonParser parser; @@ -51,12 +51,13 @@ private static synchronized void getNewInstance() { } private LicenseClassifier() { - this.classifierCopyrightsOwner = new DeLFTClassifierModel("copyright", GrobidProperties.getDelftArchitecture("copyright")); - this.classifierLicense = new DeLFTClassifierModel("license", GrobidProperties.getDelftArchitecture("license")); + this.classifierCopyrightsOwner = ClassifierFactory.getClassifier("copyright"); + this.classifierLicense = ClassifierFactory.getClassifier("license"); } /** * Classify a simple piece of text + * * @return list of predicted labels/scores pairs */ public CopyrightsLicense classify(String text) throws Exception { @@ -69,13 +70,14 @@ public CopyrightsLicense classify(String text) throws Exception { /** * Classify an array of texts + * * @return list of predicted labels/scores pairs for each text */ public List<CopyrightsLicense> classify(List<String> texts) throws Exception { if (CollectionUtils.isEmpty(texts)) return null; - LOGGER.info("classify: " + texts.size()); + LOGGER.debug("classify: " + texts.size()); String copyrightOwnerAsJson = this.classifierCopyrightsOwner.classify(texts); String licencesAsJson = this.classifierLicense.classify(texts); @@ -92,11 +94,11 @@ protected static List<CopyrightsLicense> extractResults(String copyrightOwnerAsJ JsonNode root_copyrights = mapper.readTree(copyrightOwnerAsJson); JsonNode root_licenses = mapper.readTree(licencesAsJson); - int entityRank =0; + int entityRank = 0; JsonNode classificationsNodeCopyrights = root_copyrights.findPath("classifications"); JsonNode classificationsNodeLicenses = root_licenses.findPath("classifications"); - if ((classificationsNodeCopyrights != null) && (!classificationsNodeCopyrights.isMissingNode()) && - (classificationsNodeLicenses != null) && (!classificationsNodeLicenses.isMissingNode())) { + if ((classificationsNodeCopyrights != null) && (!classificationsNodeCopyrights.isMissingNode()) && + (classificationsNodeLicenses != null) && (!classificationsNodeLicenses.isMissingNode())) { Iterator<JsonNode> ite1 = classificationsNodeCopyrights.elements(); Iterator<JsonNode> ite2 = classificationsNodeLicenses.elements(); while (ite1.hasNext()) { @@ -106,7 +108,7 @@ protected static List<CopyrightsLicense> extractResults(String copyrightOwnerAsJ List<String> owners = CopyrightsLicense.copyrightOwners; List<Double> scoreFields = new ArrayList<>(); - for(String fieldOwners : owners) { + for (String fieldOwners : owners) { JsonNode fieldNode = classificationsNode.findPath(fieldOwners); double scoreField = 0.0; if ((fieldNode != null) && (!fieldNode.isMissingNode())) { @@ -119,7 +121,7 @@ protected static List<CopyrightsLicense> extractResults(String copyrightOwnerAsJ double scoreUndecided = 0.0; int rank = 0; for (Double scoreField : scoreFields) { - if (scoreField>0.5 && scoreField > bestProb) { + if (scoreField > 0.5 && scoreField > bestProb) { owner = CopyrightsOwner.valueOf(owners.get(rank).toUpperCase()); bestProb = scoreField; } @@ -142,7 +144,7 @@ protected static List<CopyrightsLicense> extractResults(String copyrightOwnerAsJ List<String> licenses = CopyrightsLicense.licenses; scoreFields = new ArrayList<>(); - for(String fieldLicenses : licenses) { + for (String fieldLicenses : licenses) { JsonNode fieldNode = classificationsNode.findPath(fieldLicenses); double scoreField = 0.0; if ((fieldNode != null) && (!fieldNode.isMissingNode())) { @@ -155,7 +157,7 @@ protected static List<CopyrightsLicense> extractResults(String copyrightOwnerAsJ License license = null; rank = 0; for (Double scoreField : scoreFields) { - if (scoreField>0.5 && scoreField > bestProb) { + if (scoreField > 0.5 && scoreField > bestProb) { String valueLicense = licenses.get(rank); valueLicense = valueLicense.replace("-", ""); license = License.valueOf(valueLicense.toUpperCase()); @@ -178,10 +180,10 @@ protected static List<CopyrightsLicense> extractResults(String copyrightOwnerAsJ entityRank++; } } - } catch(JsonProcessingException e) { + } catch (JsonProcessingException e) { LOGGER.error("failed to parse JSON copyrights/licenses classification result", e); } - + return results; } diff --git a/grobid-core/src/main/java/org/grobid/core/engines/ReferenceSegmenterParser.java b/grobid-core/src/main/java/org/grobid/core/engines/ReferenceSegmenterParser.java index 6a2d8e35b4..568e1c01a6 100644 --- a/grobid-core/src/main/java/org/grobid/core/engines/ReferenceSegmenterParser.java +++ b/grobid-core/src/main/java/org/grobid/core/engines/ReferenceSegmenterParser.java @@ -3,16 +3,14 @@ import com.google.common.base.Function; import com.google.common.collect.Lists; import com.google.common.collect.Sets; -import org.grobid.core.GrobidModels; +import org.apache.commons.lang3.tuple.Pair; import org.grobid.core.document.Document; import org.grobid.core.document.DocumentPiece; import org.grobid.core.document.DocumentPointer; import org.grobid.core.engines.citations.LabeledReferenceResult; import org.grobid.core.engines.citations.ReferenceSegmenter; import org.grobid.core.engines.label.SegmentationLabels; -import org.grobid.core.engines.label.TaggingLabels; import org.grobid.core.engines.tagging.GenericTaggerUtils; -import org.grobid.core.engines.tagging.GrobidCRFEngine; import org.grobid.core.exceptions.GrobidException; import org.grobid.core.features.FeatureFactory; import org.grobid.core.features.FeaturesVectorReferenceSegmenter; @@ -21,232 +19,87 @@ import org.grobid.core.tokenization.LabeledTokensContainer; import org.grobid.core.tokenization.TaggingTokenSynchronizer; import org.grobid.core.utilities.BoundingBoxCalculator; -import org.grobid.core.utilities.GrobidProperties; import org.grobid.core.utilities.TextUtilities; import org.grobid.core.utilities.Triple; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; -import java.util.SortedSet; +import java.util.*; import java.util.regex.Matcher; -import org.apache.commons.lang3.tuple.Pair; +import static org.grobid.core.GrobidModels.REFERENCE_SEGMENTER; public class ReferenceSegmenterParser extends AbstractParser implements ReferenceSegmenter { - private static final Logger LOGGER = LoggerFactory.getLogger(ReferenceSegmenterParser.class); + private static final Logger LOGGER = LoggerFactory.getLogger(ReferenceSegmenterParser.class); // projection scale for line length private static final int LINESCALE = 10; protected ReferenceSegmenterParser() { - super(GrobidModels.REFERENCE_SEGMENTER); + super(REFERENCE_SEGMENTER); } - @Override - public List<LabeledReferenceResult> extract(String referenceBlock) { - Document res = Document.createFromText(referenceBlock); + @Override + public List<LabeledReferenceResult> extract(String referenceBlock) { + Document res = Document.createFromText(referenceBlock); - DocumentPiece piece = new DocumentPiece( - new DocumentPointer(0, 0, 0), - new DocumentPointer(0, res.getTokenizations().size() - 1, res.getTokenizations().size() - 1)); + DocumentPiece piece = new DocumentPiece( + new DocumentPointer(0, 0, 0), + new DocumentPointer(0, res.getTokenizations().size() - 1, res.getTokenizations().size() - 1)); - return extract(res, Sets.newTreeSet(Collections.singletonList(piece)), false); - } + return extract(res, Sets.newTreeSet(Collections.singletonList(piece)), false); + } - /** + /** * * @param doc Document object - * @return <reference_label, reference_string> Note, that label is null when no label was detected - * example: <"[1]", "Hu W., Barkana, R., & Gruzinov A. Phys. Rev. Lett. 85, 1158"> + * @return <reference_label, reference_string> Note, that label is null when no + * label was detected + * example: <"[1]", "Hu W., Barkana, R., & Gruzinov A. Phys. Rev. + * Lett. 85, 1158"> */ - public List<LabeledReferenceResult> extract(Document doc) { - return extract(doc, false); - } - - public List<LabeledReferenceResult> extract(Document doc, boolean training) { - SortedSet<DocumentPiece> referencesParts = doc.getDocumentPart(SegmentationLabels.REFERENCES); - return extract(doc, referencesParts, training); - } - - public List<LabeledReferenceResult> extract(Document doc, SortedSet<DocumentPiece> referencesParts, boolean training) { - - Pair<String,List<LayoutToken>> featSeg = getReferencesSectionFeatured(doc, referencesParts); - String res; - List<LayoutToken> tokenizationsReferences; - if (featSeg == null) { - return null; - } - // if featSeg is null, it usually means that no reference segment is found in the - // document segmentation - String featureVector = featSeg.getLeft(); - tokenizationsReferences = featSeg.getRight(); - try { - - // to support long sequence in case of RNN usage we segment in pieces of less than the - // max_sequence_length and quite significantly overlapping - // this does not apply to CRF which can process "infinite" input sequence - // this is relevant to the reference segmenter RNN model, which is position-free in its - // application, but could not be generalized to other RNN or transformer model long inputs - if (GrobidProperties.getGrobidEngine(GrobidModels.REFERENCE_SEGMENTER) == GrobidCRFEngine.DELFT) { - String[] featureVectorLines = featureVector.split("\n"); - -/*for(LayoutToken token : tokenizationsReferences) { -System.out.print(token.getText()); -} -System.out.println("\n"); -System.out.println("total input lines: " + featureVectorLines.length + " - " + tokenizationsReferences.size() + " tokens");*/ - - int originalMaxSequence = 2000; - if (GrobidProperties.getInstance().getDelftRuntimeMaxSequenceLength(GrobidModels.REFERENCE_SEGMENTER.getModelName()) != -1) { - originalMaxSequence = GrobidProperties.getInstance().getDelftRuntimeMaxSequenceLength(GrobidModels.REFERENCE_SEGMENTER.getModelName()); - } - - if (featureVectorLines.length < originalMaxSequence || originalMaxSequence < 600) { - // if the input is lower than max sequence length, not need to segment - // if the max sequence length is too small, e.g. transformer, we won't be able to manage - // overlaps adapted to references - res = label(featureVector); - } else { - // we adjust max sequence value to take into account 500 token lines overlap - int maxSequence = Math.max(500, originalMaxSequence - 1000); - -//System.out.println("originalMaxSequence: " + originalMaxSequence + " / maxSequence adjusted to: " + maxSequence); - - List<List<String>> featureVectorPieces = new ArrayList<>(); - // segment the input vectors in overlapping sequences, according to the model max_sequence_length parameter - for(int i=0; (i*maxSequence) < featureVectorLines.length; i++) { - int lowerBound = i*maxSequence; - // overlapping: this localRes has 500 extra lines after the normal end - int upperBound = Math.min( ((i+1)*maxSequence)+500, featureVectorLines.length ); - if (featureVectorLines.length - lowerBound < originalMaxSequence) - upperBound = featureVectorLines.length; - -//System.out.println("lowerBound: " + lowerBound + " - upperBound: " + upperBound); - List<String> featureVectorPiece = new ArrayList<>(); - for(int j=lowerBound; j<upperBound; j++) - featureVectorPiece.add(featureVectorLines[j]); - featureVectorPieces.add(featureVectorPiece); - - if (upperBound == featureVectorLines.length) - break; - } - -/*System.out.println("featureVectorPieces.size(): " + featureVectorPieces.size()); -for(List<String> featureVectorPiece : featureVectorPieces) { -System.out.println(featureVectorPiece.size()); -}*/ - // label every pieces in batch - List<String> allRes = new ArrayList<>(); - List<String> allVectors = new ArrayList<>(); - for(List<String> featureVectorPiece : featureVectorPieces) { - StringBuilder localFeatureVector = new StringBuilder(); - for(int j=0; j<featureVectorPiece.size(); j++) { - localFeatureVector.append(featureVectorPiece.get(j)).append("\n"); - } - allVectors.add(localFeatureVector.toString()); - } - - // parallel labeling of the input segments - String fullRes = label(allVectors); - - // segment this result to get back the input chunk alignment (with extra 500 overlaping lines) - String[] fullResLines = fullRes.split("\n"); - int pos = 0; - for(List<String> featureVectorPiece : featureVectorPieces) { - StringBuilder localRes = new StringBuilder(); - int localSize = featureVectorPiece.size(); - for(int i=pos; i<pos+localSize; i++) { - localRes.append(fullResLines[i]).append("\n"); - } - allRes.add(localRes.toString()); - pos += localSize; - } - - // combine results and reconnect smoothly overlaps - StringBuilder resBuilder = new StringBuilder(); - int previousTransitionPos = 0; - for(int i=0; i<allRes.size(); i++) { - String localRes = allRes.get(i); - String[] localResLines = localRes.split("\n"); -//System.out.println("localResLines.length: " + localResLines.length); - int transitionPos = localResLines.length; - if (i != allRes.size()-1) { - // in the trailing redundant part (500 last lines), we identify the line index - // of the last "closing" label, this is the point where we will reconnect the - // labeled segments to avoid breaking a labeled field - - for(int k=localResLines.length-1; k>=0; k--) { - if (localResLines.length-k == 500) { - // this is the max overlap, we don't go beyond! - transitionPos = k; - break; - } - - String line = localResLines[k]; - if (line.endsWith(TaggingLabels.GROBID_START_ENTITY_LABEL_PREFIX+"<label>") || - line.endsWith(TaggingLabels.GROBID_START_ENTITY_LABEL_PREFIX+"<reference>")) { - // we can stop the line before this one - transitionPos = k; - break; - } - } - } - // else: we are at the last chunk, so we take the content until the very end - -//System.out.println("previousTransitionPos: " + previousTransitionPos); -//System.out.println("transitionPos: " + transitionPos + "\n"); - - List<String> selectedlocalResLines = new ArrayList<>(); - for(int j= previousTransitionPos; j<transitionPos; j++) { - if (j == previousTransitionPos && previousTransitionPos != 0) { - // we want to be sure to have a starting label - String localLine = localResLines[j]; - if (localLine.indexOf(TaggingLabels.GROBID_START_ENTITY_LABEL_PREFIX) == -1) { - localLine = localLine.replace("<label>", TaggingLabels.GROBID_START_ENTITY_LABEL_PREFIX+"<label>"); - localLine = localLine.replace("<reference>", TaggingLabels.GROBID_START_ENTITY_LABEL_PREFIX+"<reference>"); - } - selectedlocalResLines.add(localLine); - } else if (j == previousTransitionPos && previousTransitionPos == 0 && i != 0) { - // previousTransitionPos is 0 and we are not at the first segment: we had a non overlapping - // transition, we might want to avoid a starting label at this point - String localLine = localResLines[j]; - if (localLine.indexOf(TaggingLabels.GROBID_START_ENTITY_LABEL_PREFIX) != -1) { - localLine = localLine.replace(TaggingLabels.GROBID_START_ENTITY_LABEL_PREFIX+"<label>", "<label>"); - localLine = localLine.replace(TaggingLabels.GROBID_START_ENTITY_LABEL_PREFIX+"<reference>", "<reference>"); - } - selectedlocalResLines.add(localLine); - } else { - selectedlocalResLines.add(localResLines[j]); - } - } - for(String localResLine : selectedlocalResLines) - resBuilder.append(localResLine).append("\n"); - - previousTransitionPos = transitionPos-maxSequence; - } - res = resBuilder.toString(); - } - } else - res = label(featureVector); - } - catch(Exception e) { - throw new GrobidException("Labeling in ReferenceSegmenter fails.", e); - } - if (res == null) { - return null; - } - - // if we extract for generating training data, we also give back the used features + public List<LabeledReferenceResult> extract(Document doc) { + return extract(doc, false); + } + + public List<LabeledReferenceResult> extract(Document doc, boolean training) { + SortedSet<DocumentPiece> referencesParts = doc.getDocumentPart(SegmentationLabels.REFERENCES); + return extract(doc, referencesParts, training); + } + + public List<LabeledReferenceResult> extract(Document doc, SortedSet<DocumentPiece> referencesParts, + boolean training) { + + Pair<String, List<LayoutToken>> featSeg = getReferencesSectionFeatured(doc, referencesParts); + String res; + List<LayoutToken> tokenizationsReferences; + if (featSeg == null) { + return null; + } + // if featSeg is null, it usually means that no reference segment is found in + // the document segmentation + String featureVector = featSeg.getLeft(); + tokenizationsReferences = featSeg.getRight(); + try { + res = label(featureVector); + } catch (Exception e) { + throw new GrobidException("Labeling in ReferenceSegmenter fails.", e); + } + if (res == null) { + return null; + } + + // if we extract for generating training data, we also give back the used + // features List<Triple<String, String, String>> labeled = GenericTaggerUtils.getTokensWithLabelsAndFeatures(res, training); return getExtractionResult(tokenizationsReferences, labeled); } - private List<LabeledReferenceResult> getExtractionResult(List<LayoutToken> tokenizations, List<Triple<String, String, String>> labeled) { + private List<LabeledReferenceResult> getExtractionResult( + List<LayoutToken> tokenizations, + List<Triple<String, String, String>> labeled + ) { final List<LabeledReferenceResult> resultList = new ArrayList<>(); final StringBuilder reference = new StringBuilder(); final List<LayoutToken> referenceTokens = new ArrayList<>(); @@ -255,23 +108,22 @@ private List<LabeledReferenceResult> getExtractionResult(List<LayoutToken> token TaggingTokenSynchronizer synchronizer = new TaggingTokenSynchronizer(null, labeled, tokenizations); - Function<LabeledTokensContainer, Void> function = new Function<LabeledTokensContainer, Void>() { - @Override public Void apply(LabeledTokensContainer container) { - features.append(container.getFeatureString()); - features.append('\n'); - if (container.isBeginning()) { - if (reference.length() != 0) { - resultList.add(new LabeledReferenceResult(referenceLabel.length() == 0 ? null : - referenceLabel.toString().trim(), reference.toString().trim(), Lists.newArrayList(referenceTokens), - features.toString(), BoundingBoxCalculator.calculate(referenceTokens))); - reference.setLength(0); - referenceLabel.setLength(0); - features.setLength(0); - referenceTokens.clear(); - } + Function<LabeledTokensContainer, Void> function = container -> { + features.append(container.getFeatureString()); + features.append('\n'); + if (container.isBeginning()) { + if (reference.length() != 0) { + resultList.add(new LabeledReferenceResult( + referenceLabel.length() == 0 ? null : referenceLabel.toString().trim(), + reference.toString().trim(), Lists.newArrayList(referenceTokens), + features.toString(), BoundingBoxCalculator.calculate(referenceTokens))); + reference.setLength(0); + referenceLabel.setLength(0); + features.setLength(0); + referenceTokens.clear(); } - return null; } + return null; }; Iterator<LabeledTokensContainer> iterator = synchronizer.iterator(); @@ -306,8 +158,9 @@ private List<LabeledReferenceResult> getExtractionResult(List<LayoutToken> token // Handle last one. if (!iterator.hasNext()) { - resultList.add(new LabeledReferenceResult(referenceLabel.length() == 0 ? null : - referenceLabel.toString().trim(), reference.toString().trim(), + resultList.add(new LabeledReferenceResult( + referenceLabel.length() == 0 ? null : referenceLabel.toString().trim(), + reference.toString().trim(), referenceTokens, features.toString(), BoundingBoxCalculator.calculate(referenceTokens))); reference.setLength(0); @@ -318,190 +171,184 @@ private List<LabeledReferenceResult> getExtractionResult(List<LayoutToken> token return resultList; } - public Pair<String,String> createTrainingData(Document doc, int id) { - SortedSet<DocumentPiece> referencesParts = doc.getDocumentPart(SegmentationLabels.REFERENCES); - Pair<String,List<LayoutToken>> featSeg = getReferencesSectionFeatured(doc, referencesParts); - String res; - List<LayoutToken> tokenizations; - if (featSeg == null) { - return null; - } - // if featSeg is null, it usually means that no reference segment is found in the - // document segmentation - String featureVector = featSeg.getLeft(); - tokenizations = featSeg.getRight(); - try { - res = label(featureVector); - } - catch(Exception e) { - throw new GrobidException("Sequence labeling in ReferenceSegmenter fails.", e); - } - if (res == null) { - return null; - } + public Pair<String, String> createTrainingData(Document doc, int id) { + SortedSet<DocumentPiece> referencesParts = doc.getDocumentPart(SegmentationLabels.REFERENCES); + Pair<String, List<LayoutToken>> featSeg = getReferencesSectionFeatured(doc, referencesParts); + String res; + List<LayoutToken> tokenizations; + if (featSeg == null) { + return null; + } + // if featSeg is null, it usually means that no reference segment is found in + // the + // document segmentation + String featureVector = featSeg.getLeft(); + tokenizations = featSeg.getRight(); + try { + res = label(featureVector); + } catch (Exception e) { + throw new GrobidException("Sequence labeling in ReferenceSegmenter fails.", e); + } + if (res == null) { + return null; + } List<Pair<String, String>> labeled = GenericTaggerUtils.getTokensAndLabels(res); StringBuilder sb = new StringBuilder(); - //noinspection StringConcatenationInsideStringBufferAppend - sb.append("<tei xml:space=\"preserve\">\n" + - " <teiHeader>\n" + - " <fileDesc xml:id=\"_" + id + "\"/>\n" + - " </teiHeader>\n" + - " <text xml:lang=\"en\">\n" + - " <listBibl>\n"); - - int tokPtr = 0; - boolean addSpace = false; - boolean addEOL = false; - String lastTag = null; - boolean refOpen = false; - for (Pair<String, String> l : labeled) { + // noinspection StringConcatenationInsideStringBufferAppend + sb.append("<tei xml:space=\"preserve\">\n" + + " <teiHeader>\n" + + " <fileDesc xml:id=\"_" + id + "\"/>\n" + + " </teiHeader>\n" + + " <text xml:lang=\"en\">\n" + + " <listBibl>\n"); + + int tokPtr = 0; + boolean addSpace = false; + boolean addEOL = false; + String lastTag = null; + boolean refOpen = false; + for (Pair<String, String> l : labeled) { String tok = l.getLeft(); String label = l.getRight(); - int tokPtr2 = tokPtr; - for(; tokPtr2 < tokenizations.size(); tokPtr2++) { + int tokPtr2 = tokPtr; + for (; tokPtr2 < tokenizations.size(); tokPtr2++) { if (tokenizations.get(tokPtr2).t().equals(" ")) { - addSpace = true; - } - else if (tokenizations.get(tokPtr2).t().equals("\n") || - tokenizations.get(tokPtr).t().equals("\r") ) { - addEOL = true; - } - else { - break; - } + addSpace = true; + } else if (tokenizations.get(tokPtr2).t().equals("\n") || + tokenizations.get(tokPtr).t().equals("\r")) { + addEOL = true; + } else { + break; + } } - tokPtr = tokPtr2; + tokPtr = tokPtr2; if (tokPtr >= tokenizations.size()) { - LOGGER.error("Implementation error: Reached the end of tokenizations, but current token is " + tok); - // we add a space to avoid concatenated text - addSpace = true; + LOGGER.error("Implementation error: Reached the end of tokenizations, but current token is " + tok); + // we add a space to avoid concatenated text + addSpace = true; + } else { + String tokenizationToken = tokenizations.get(tokPtr).getText(); + + if ((tokPtr != tokenizations.size()) && !tokenizationToken.equals(tok)) { + // and we add a space by default to avoid concatenated text + addSpace = true; + if (!tok.startsWith(tokenizationToken)) { + // this is a very exceptional case due to a sequence of accent/diacresis, in + // this case we skip + // a shift in the tokenizations list and continue on the basis of the labeled + // token + // we check one ahead + tokPtr++; + tokenizationToken = tokenizations.get(tokPtr).getText(); + if (!tok.equals(tokenizationToken)) { + // we try another position forward (second hope!) + tokPtr++; + tokenizationToken = tokenizations.get(tokPtr).getText(); + if (!tok.equals(tokenizationToken)) { + // we try another position forward (last hope!) + tokPtr++; + tokenizationToken = tokenizations.get(tokPtr).getText(); + if (!tok.equals(tokenizationToken)) { + // we return to the initial position + tokPtr = tokPtr - 3; + tokenizationToken = tokenizations.get(tokPtr).getText(); + LOGGER.error("Implementation error, tokens out of sync: " + + tokenizationToken + " != " + tok + ", at position " + tokPtr); + } + } + } + } + // note: if the above condition is true, this is an exceptional case due to a + // sequence of accent/diacresis and we can go on as a full string match + } } - else { - String tokenizationToken = tokenizations.get(tokPtr).getText(); - - if ((tokPtr != tokenizations.size()) && !tokenizationToken.equals(tok)) { - // and we add a space by default to avoid concatenated text - addSpace = true; - if (!tok.startsWith(tokenizationToken)) { - // this is a very exceptional case due to a sequence of accent/diacresis, in this case we skip - // a shift in the tokenizations list and continue on the basis of the labeled token - // we check one ahead - tokPtr++; - tokenizationToken = tokenizations.get(tokPtr).getText(); - if (!tok.equals(tokenizationToken)) { - // we try another position forward (second hope!) - tokPtr++; - tokenizationToken = tokenizations.get(tokPtr).getText(); - if (!tok.equals(tokenizationToken)) { - // we try another position forward (last hope!) - tokPtr++; - tokenizationToken = tokenizations.get(tokPtr).getText(); - if (!tok.equals(tokenizationToken)) { - // we return to the initial position - tokPtr = tokPtr-3; - tokenizationToken = tokenizations.get(tokPtr).getText(); - LOGGER.error("Implementation error, tokens out of sync: " + - tokenizationToken + " != " + tok + ", at position " + tokPtr); - } - } - } - } - // note: if the above condition is true, this is an exceptional case due to a - // sequence of accent/diacresis and we can go on as a full string match - } - } - - String plainLabel = GenericTaggerUtils.getPlainLabel(label); - - boolean tagClosed = (lastTag != null) && testClosingTag(sb, label, lastTag, addSpace, addEOL); - - if (tagClosed) { - addSpace = false; - addEOL = false; - } - if (tagClosed && lastTag.equals("<reference>")) { - refOpen = false; - } - String output; - String field; - if (refOpen) { - field = "<label>"; - } - else { - field = "<bibl><label>"; - } - output = writeField(label, lastTag, tok, "<label>", field, addSpace, addEOL, 2); - if (output != null) { - sb.append(output); - refOpen = true; - } - else { - if (refOpen) { - field = ""; - } - else { - field = "<bibl>"; - } - output = writeField(label, lastTag, tok, "<reference>", field, addSpace, addEOL, 2); - if (output != null) { - sb.append(output); - refOpen= true; - } - else { - output = writeField(label, lastTag, tok, "<other>", "", addSpace, addEOL, 2); - if (output != null) { - sb.append(output); - refOpen = false; - } - } - } - - lastTag = plainLabel; - addSpace = false; - addEOL = false; + + String plainLabel = GenericTaggerUtils.getPlainLabel(label); + + boolean tagClosed = (lastTag != null) && testClosingTag(sb, label, lastTag, addSpace, addEOL); + + if (tagClosed) { + addSpace = false; + addEOL = false; + } + if (tagClosed && lastTag.equals("<reference>")) { + refOpen = false; + } + String output; + String field; + if (refOpen) { + field = "<label>"; + } else { + field = "<bibl><label>"; + } + output = writeField(label, lastTag, tok, "<label>", field, addSpace, addEOL, 2); + if (output != null) { + sb.append(output); + refOpen = true; + } else { + if (refOpen) { + field = ""; + } else { + field = "<bibl>"; + } + output = writeField(label, lastTag, tok, "<reference>", field, addSpace, addEOL, 2); + if (output != null) { + sb.append(output); + refOpen = true; + } else { + output = writeField(label, lastTag, tok, "<other>", "", addSpace, addEOL, 2); + if (output != null) { + sb.append(output); + refOpen = false; + } + } + } + + lastTag = plainLabel; + addSpace = false; + addEOL = false; tokPtr++; } - if (refOpen) { - sb.append("</bibl>"); - } + if (refOpen) { + sb.append("</bibl>"); + } sb.append("\n </listBibl>\n" + - " </text>\n" + - "</tei>\n"); + " </text>\n" + + "</tei>\n"); - return Pair.of(sb.toString(), featureVector); + return Pair.of(sb.toString(), featureVector); } - - private boolean testClosingTag(StringBuilder buffer, + private boolean testClosingTag(StringBuilder buffer, String currentTag, String lastTag, - boolean addSpace, - boolean addEOL) { + boolean addSpace, + boolean addEOL) { boolean res = false; if (!currentTag.equals(lastTag)) { res = true; // we close the current tag if (lastTag.equals("<other>")) { - if (addEOL) + if (addEOL) buffer.append("<lb/>"); - if (addSpace) + if (addSpace) buffer.append(" "); buffer.append("\n"); } else if (lastTag.equals("<label>")) { - buffer.append("</label>"); - if (addEOL) + buffer.append("</label>"); + if (addEOL) buffer.append("<lb/>"); - if (addSpace) + if (addSpace) buffer.append(" "); } else if (lastTag.equals("<reference>")) { - if (addEOL) + if (addEOL) buffer.append("<lb/>"); - if (addSpace) + if (addSpace) buffer.append(" "); buffer.append("</bibl>\n"); } else { @@ -517,42 +364,40 @@ private String writeField(String currentTag, String field, String outField, boolean addSpace, - boolean addEOL, - int nbIndent) { + boolean addEOL, + int nbIndent) { String result = null; if (currentTag.endsWith(field)) { if (currentTag.endsWith("<other>")) { result = ""; - if (currentTag.equals("I-<other>")) { - result += "\n"; - for (int i = 0; i < nbIndent; i++) { - result += " "; - } - } - if (addEOL) + if (currentTag.equals("I-<other>")) { + result += "\n"; + for (int i = 0; i < nbIndent; i++) { + result += " "; + } + } + if (addEOL) result += "<lb/>"; - if (addSpace) + if (addSpace) result += " "; result += TextUtilities.HTMLEncode(token); - } - else if ((lastTag != null) && currentTag.endsWith(lastTag)) { + } else if ((lastTag != null) && currentTag.endsWith(lastTag)) { result = ""; - if (addEOL) + if (addEOL) result += "<lb/>"; - if (addSpace) + if (addSpace) result += " "; - if (currentTag.startsWith("I-")) - result += outField; + if (currentTag.startsWith("I-")) + result += outField; result += TextUtilities.HTMLEncode(token); - } - else { + } else { result = ""; - if (outField.length() > 0) { - for (int i = 0; i < nbIndent; i++) { - result += " "; - } - } - if (addEOL) + if (outField.length() > 0) { + for (int i = 0; i < nbIndent; i++) { + result += " "; + } + } + if (addEOL) result += "<lb/>"; if (addSpace) result += " "; @@ -562,138 +407,136 @@ else if ((lastTag != null) && currentTag.endsWith(lastTag)) { return result; } - static public Pair<String,List<LayoutToken>> getReferencesSectionFeatured(Document doc, - SortedSet<DocumentPiece> referencesParts) { - if ((referencesParts == null) || (referencesParts.size() == 0)) { - return null; - } - FeatureFactory featureFactory = FeatureFactory.getInstance(); - List<Block> blocks = doc.getBlocks(); - if ( (blocks == null) || blocks.size() == 0) { - return null; - } - - StringBuilder citations = new StringBuilder(); + static public Pair<String, List<LayoutToken>> getReferencesSectionFeatured(Document doc, + SortedSet<DocumentPiece> referencesParts) { + if ((referencesParts == null) || (referencesParts.size() == 0)) { + return null; + } + FeatureFactory featureFactory = FeatureFactory.getInstance(); + List<Block> blocks = doc.getBlocks(); + if ((blocks == null) || blocks.size() == 0) { + return null; + } + + StringBuilder citations = new StringBuilder(); boolean newline; int n; // overall token number - FeaturesVectorReferenceSegmenter features; - FeaturesVectorReferenceSegmenter previousFeatures = null; - boolean endblock; - boolean startblock; - //int mm = 0; // token position in the sentence + FeaturesVectorReferenceSegmenter features; + FeaturesVectorReferenceSegmenter previousFeatures = null; + boolean endblock; + boolean startblock; + // int mm = 0; // token position in the sentence int nn; // token position in the line - double lineStartX = Double.NaN; - boolean indented = false; + double lineStartX = Double.NaN; + boolean indented = false; - List<LayoutToken> tokenizationsReferences = new ArrayList<LayoutToken>(); - List<LayoutToken> tokenizations = doc.getTokenizations(); + List<LayoutToken> tokenizationsReferences = new ArrayList<LayoutToken>(); + List<LayoutToken> tokenizations = doc.getTokenizations(); - int maxLineLength = 1; - //List<Integer> lineLengths = new ArrayList<Integer>(); - int currentLineLength = 0; - //int lineIndex = 0; + int maxLineLength = 1; + // List<Integer> lineLengths = new ArrayList<Integer>(); + int currentLineLength = 0; + // int lineIndex = 0; - // we calculate current max line length and intialize the body tokenization structure - for(DocumentPiece docPiece : referencesParts) { - DocumentPointer dp1 = docPiece.getLeft(); - DocumentPointer dp2 = docPiece.getRight(); + // we calculate current max line length and intialize the body tokenization + // structure + for (DocumentPiece docPiece : referencesParts) { + DocumentPointer dp1 = docPiece.getLeft(); + DocumentPointer dp2 = docPiece.getRight(); int tokens = dp1.getTokenDocPos(); int tokene = dp2.getTokenDocPos(); for (int i = tokens; i <= tokene; i++) { tokenizationsReferences.add(tokenizations.get(i)); - currentLineLength += tokenizations.get(i).getText().length(); - if (tokenizations.get(i).t().equals("\n") || tokenizations.get(i).t().equals("\r") ) { - //lineLengths.add(currentLineLength); - if (currentLineLength > maxLineLength) - maxLineLength = currentLineLength; - currentLineLength = 0; - } + currentLineLength += tokenizations.get(i).getText().length(); + if (tokenizations.get(i).t().equals("\n") || tokenizations.get(i).t().equals("\r")) { + // lineLengths.add(currentLineLength); + if (currentLineLength > maxLineLength) + maxLineLength = currentLineLength; + currentLineLength = 0; + } } - } - - for(DocumentPiece docPiece : referencesParts) { - DocumentPointer dp1 = docPiece.getLeft(); - DocumentPointer dp2 = docPiece.getRight(); - -/*for(int i=dp1.getTokenDocPos(); i<dp2.getTokenDocPos(); i++) { - System.out.print(tokenizations.get(i)); -} -System.out.println(""); -*/ - //currentLineLength = lineLengths.get(lineIndex); - nn = 0; - int tokenIndex = 0; - int blockIndex = dp1.getBlockPtr(); - Block block = null; - List<LayoutToken> tokens; - boolean previousNewline = true; - currentLineLength = 0; - String currentLineProfile = null; + } + + for (DocumentPiece docPiece : referencesParts) { + DocumentPointer dp1 = docPiece.getLeft(); + DocumentPointer dp2 = docPiece.getRight(); + + /* + * for(int i=dp1.getTokenDocPos(); i<dp2.getTokenDocPos(); i++) { + * System.out.print(tokenizations.get(i)); + * } + * System.out.println(""); + */ + // currentLineLength = lineLengths.get(lineIndex); + nn = 0; + int tokenIndex = 0; + int blockIndex = dp1.getBlockPtr(); + Block block = null; + List<LayoutToken> tokens; + boolean previousNewline = true; + currentLineLength = 0; + String currentLineProfile = null; for (n = dp1.getTokenDocPos(); n <= dp2.getTokenDocPos(); n++) { String text = tokenizations.get(n).getText(); - if (text == null) { - continue; - } - - // set corresponding block - if ( (block != null) && (n > block.getEndToken()) ) { - blockIndex++; - tokenIndex = 0; - currentLineLength = 0; - currentLineProfile = null; - } - - if (blockIndex<blocks.size()) { - block = blocks.get(blockIndex); - if (n == block.getStartToken()) { - startblock = true; - endblock = false; - } - else if (n == block.getEndToken()) { - startblock = false; - endblock = true; - } - else { - startblock = false; - endblock = false; - } - } - else { - block = null; - startblock = false; - endblock = false; - } - // set corresponding token - if (block != null) - tokens = block.getTokens(); - else - tokens = null; - - if (text.equals("\n") || text.equals("\r")) { - previousNewline = true; - nn = 0; - currentLineLength = 0; - currentLineProfile = null; - //lineIndex++; - //currentLineLength = lineLengths.get(lineIndex); + if (text == null) { continue; } - else { + + // set corresponding block + if ((block != null) && (n > block.getEndToken())) { + blockIndex++; + tokenIndex = 0; + currentLineLength = 0; + currentLineProfile = null; + } + + if (blockIndex < blocks.size()) { + block = blocks.get(blockIndex); + if (n == block.getStartToken()) { + startblock = true; + endblock = false; + } else if (n == block.getEndToken()) { + startblock = false; + endblock = true; + } else { + startblock = false; + endblock = false; + } + } else { + block = null; + startblock = false; + endblock = false; + } + // set corresponding token + if (block != null) + tokens = block.getTokens(); + else + tokens = null; + + if (text.equals("\n") || text.equals("\r")) { + previousNewline = true; + nn = 0; + currentLineLength = 0; + currentLineProfile = null; + // lineIndex++; + // currentLineLength = lineLengths.get(lineIndex); + continue; + } else { newline = false; - nn += text.length(); // +1 for segmentation symbol - } + nn += text.length(); // +1 for segmentation symbol + } - if (text.equals(" ") || text.equals("\t")) { + if (text.equals(" ") || text.equals("\t")) { nn++; continue; - } + } - if (text.trim().length() == 0) { - continue; - } + if (text.trim().length() == 0) { + continue; + } LayoutToken token = null; if (tokens != null) { @@ -711,23 +554,25 @@ else if (n == block.getEndToken()) { if (previousNewline) { newline = true; previousNewline = false; - if (token != null && previousFeatures != null) { - double previousLineStartX = lineStartX; + if (token != null && previousFeatures != null) { + double previousLineStartX = lineStartX; lineStartX = token.getX(); double characterWidth = token.width / token.getText().length(); - if (!Double.isNaN(previousLineStartX)) { - // Indentation if line start is > 1 character width to the right of previous line start + if (!Double.isNaN(previousLineStartX)) { + // Indentation if line start is > 1 character width to the right of previous + // line start if (lineStartX - previousLineStartX > characterWidth) - indented = true; - // Indentation ends if line start is > 1 character width to the left of previous line start + indented = true; + // Indentation ends if line start is > 1 character width to the left of previous + // line start else if (previousLineStartX - lineStartX > characterWidth) indented = false; // Otherwise indentation is unchanged - } - } + } + } } - if (TextUtilities.filterLine(text)) { + if (TextUtilities.filterLine(text)) { continue; } @@ -762,53 +607,50 @@ else if (previousLineStartX - lineStartX > characterWidth) } - if ( (n == 0) || (previousNewline) ) { + if ((n == 0) || (previousNewline)) { features.lineStatus = "LINESTART"; - if (n == 0) - features.blockStatus = "BLOCKSTART"; - nn = 0; + if (n == 0) + features.blockStatus = "BLOCKSTART"; + nn = 0; } if (indented) { - features.alignmentStatus = "LINEINDENT"; - } - else { - features.alignmentStatus = "ALIGNEDLEFT"; + features.alignmentStatus = "LINEINDENT"; + } else { + features.alignmentStatus = "ALIGNEDLEFT"; } - { + { // look ahead... boolean endline = true; int ii = 1; boolean endloop = false; - String accumulated = text; + String accumulated = text; while ((n + ii < tokenizations.size()) && (!endloop)) { String tok = tokenizations.get(n + ii).getText(); if (tok != null) { - if (currentLineProfile == null) - accumulated += tok; + if (currentLineProfile == null) + accumulated += tok; if (tok.equals("\n") || tok.equals("\r")) { endloop = true; - if (currentLineLength ==0) { - currentLineLength = accumulated.length(); - } - if (currentLineProfile == null) { - currentLineProfile = TextUtilities.punctuationProfile(accumulated); - } - } - else if (!tok.equals(" ") && !tok.equals("\t")) { - endline = false; - } - else { + if (currentLineLength == 0) { + currentLineLength = accumulated.length(); + } + if (currentLineProfile == null) { + currentLineProfile = TextUtilities.punctuationProfile(accumulated); + } + } else if (!tok.equals(" ") && !tok.equals("\t")) { + endline = false; + } else { if (TextUtilities.filterLine(tok)) { endloop = true; - if (currentLineLength ==0) { - currentLineLength = accumulated.length(); - } - if (currentLineProfile == null) { - currentLineProfile = TextUtilities.punctuationProfile(accumulated); - } + if (currentLineLength == 0) { + currentLineLength = accumulated.length(); + } + if (currentLineProfile == null) { + currentLineProfile = TextUtilities.punctuationProfile(accumulated); + } } } } @@ -818,23 +660,22 @@ else if (!tok.equals(" ") && !tok.equals("\t")) { endline = true; } - if (endline && (block != null) && (n+ii == block.getEndToken())) { - endblock = true; - } + if (endline && (block != null) && (n + ii == block.getEndToken())) { + endblock = true; + } ii++; } if ((!endline) && !(newline)) { features.lineStatus = "LINEIN"; - } - else if (!newline) { + } else if (!newline) { features.lineStatus = "LINEEND"; previousNewline = true; } - if (startblock) { - features.blockStatus = "BLOCKSTART"; - } + if (startblock) { + features.blockStatus = "BLOCKSTART"; + } if ((!endblock) && (features.blockStatus == null)) features.blockStatus = "BLOCKIN"; else if (features.blockStatus == null) { @@ -890,10 +731,10 @@ else if (features.blockStatus == null) { features.http = true; } - if ( (token != null) && (token.isBold()) ) + if ((token != null) && (token.isBold())) features.bold = true; - if ( (token != null) && (token.isItalic()) ) + if ((token != null) && (token.isItalic())) features.italic = true; if (features.capitalisation == null) @@ -904,24 +745,24 @@ else if (features.blockStatus == null) { if (features.punctType == null) features.punctType = "NOPUNCT"; -//System.out.println(nn + "\t" + currentLineLength + "\t" + maxLineLength); + // System.out.println(nn + "\t" + currentLineLength + "\t" + maxLineLength); features.lineLength = featureFactory - .linearScaling(currentLineLength, maxLineLength, LINESCALE); + .linearScaling(currentLineLength, maxLineLength, LINESCALE); - features.relativePosition = featureFactory - .linearScaling(nn, currentLineLength, LINESCALE); + features.relativePosition = featureFactory + .linearScaling(nn, currentLineLength, LINESCALE); - features.punctuationProfile = currentLineProfile; + features.punctuationProfile = currentLineProfile; if (previousFeatures != null) citations.append(previousFeatures.printVector()); - //mm++; + // mm++; previousFeatures = features; - } - } - if (previousFeatures != null) - citations.append(previousFeatures.printVector()); + } + } + if (previousFeatures != null) + citations.append(previousFeatures.printVector()); - return Pair.of(citations.toString(), tokenizationsReferences); - } + return Pair.of(citations.toString(), tokenizationsReferences); + } } diff --git a/grobid-core/src/main/java/org/grobid/core/engines/tagging/ClassifierFactory.java b/grobid-core/src/main/java/org/grobid/core/engines/tagging/ClassifierFactory.java new file mode 100644 index 0000000000..06a9c42b5a --- /dev/null +++ b/grobid-core/src/main/java/org/grobid/core/engines/tagging/ClassifierFactory.java @@ -0,0 +1,99 @@ +package org.grobid.core.engines.tagging; + +import org.grobid.core.engines.tagging.delft.OnnxClassificationModel; +import org.grobid.core.jni.DeLFTClassifierModel; +import org.grobid.core.utilities.GrobidProperties; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.nio.file.Path; +import java.util.HashMap; +import java.util.Map; + +/** + * Factory for text classification models. + * + * Supports both JEP-based DeLFT classifiers and ONNX-based classifiers. + * The engine is selected based on grobid.yaml configuration. + */ +public class ClassifierFactory { + + private static final Logger LOGGER = LoggerFactory.getLogger(ClassifierFactory.class); + + private static Map<String, GenericClassifier> cache = new HashMap<>(); + + private ClassifierFactory() { + } + + /** + * Get a classifier for the given model name. + * + * @param modelName The model name (e.g., "copyright", "license") + * @return A GenericClassifier instance + */ + public static synchronized GenericClassifier getClassifier(String modelName) { + GrobidCRFEngine engine = GrobidProperties.getGrobidEngine(modelName); + String architecture; + if (engine == GrobidCRFEngine.ONNX) { + architecture = GrobidProperties.getOnnxArchitecture(modelName); + } else { + architecture = GrobidProperties.getDelftArchitecture(modelName); + } + return getClassifier(modelName, architecture); + } + + /** + * Get a classifier for the given model name and architecture. + * + * @param modelName The model name (e.g., "copyright", "license") + * @param architecture The DeLFT architecture (e.g., "gru") + * @return A GenericClassifier instance + */ + public static synchronized GenericClassifier getClassifier(String modelName, String architecture) { + GenericClassifier classifier = cache.get(modelName); + if (classifier == null) { + GrobidCRFEngine engine = GrobidProperties.getGrobidEngine(modelName); + + if (engine == null) { + throw new IllegalStateException("No engine configured for model: " + modelName); + } + + switch (engine) { + case DELFT: + LOGGER.info("Creating DeLFT classifier for model: {}", modelName); + classifier = new DeLFTClassifierModel(modelName, architecture); + break; + case ONNX: + LOGGER.info("Creating ONNX classifier for model: {}", modelName); + try { + // Model path: {grobid-home}/models/{modelName}-{architecture}.onnx/ + File modelPath = GrobidProperties.getModelPath(); + Path modelDir = modelPath.toPath().resolve(modelName + "-" + architecture + ".onnx"); + classifier = new OnnxClassificationModel(modelDir); + } catch (Exception e) { + throw new RuntimeException("Failed to load ONNX classification model: " + modelName, e); + } + break; + default: + throw new IllegalStateException("Unsupported engine for classification: " + engine); + } + cache.put(modelName, classifier); + } + return classifier; + } + + /** + * Clear the classifier cache. + */ + public static synchronized void clearCache() { + for (GenericClassifier classifier : cache.values()) { + try { + classifier.close(); + } catch (Exception e) { + LOGGER.warn("Error closing classifier", e); + } + } + cache.clear(); + } +} diff --git a/grobid-core/src/main/java/org/grobid/core/engines/tagging/GenericClassifier.java b/grobid-core/src/main/java/org/grobid/core/engines/tagging/GenericClassifier.java new file mode 100644 index 0000000000..0ab27d5f53 --- /dev/null +++ b/grobid-core/src/main/java/org/grobid/core/engines/tagging/GenericClassifier.java @@ -0,0 +1,19 @@ +package org.grobid.core.engines.tagging; + +import java.io.Closeable; +import java.util.List; + +/** + * Common interface for text classification models. + * Supports both JEP-based DeLFT classifiers and ONNX-based classifiers. + */ +public interface GenericClassifier extends Closeable { + + /** + * Classify texts in batch. + * + * @param texts List of texts to classify + * @return JSON string with classification results in DeLFT format + */ + String classify(List<String> texts) throws Exception; +} diff --git a/grobid-core/src/main/java/org/grobid/core/engines/tagging/GrobidCRFEngine.java b/grobid-core/src/main/java/org/grobid/core/engines/tagging/GrobidCRFEngine.java index 0c0014a4c8..28ce7998ff 100644 --- a/grobid-core/src/main/java/org/grobid/core/engines/tagging/GrobidCRFEngine.java +++ b/grobid-core/src/main/java/org/grobid/core/engines/tagging/GrobidCRFEngine.java @@ -3,12 +3,13 @@ import java.util.Arrays; /** - * Sequence labeling engine in GROBID + * Sequence labeling engine in GROBID */ public enum GrobidCRFEngine { WAPITI("wapiti"), CRFPP("crf"), DELFT("delft"), + ONNX("onnx"), DUMMY("dummy"); private final String ext; @@ -36,5 +37,4 @@ public static GrobidCRFEngine get(String name) { "', possible values are: " + Arrays.toString(values())); } - } diff --git a/grobid-core/src/main/java/org/grobid/core/engines/tagging/OnnxTagger.java b/grobid-core/src/main/java/org/grobid/core/engines/tagging/OnnxTagger.java new file mode 100644 index 0000000000..5ce2f12b9a --- /dev/null +++ b/grobid-core/src/main/java/org/grobid/core/engines/tagging/OnnxTagger.java @@ -0,0 +1,68 @@ +package org.grobid.core.engines.tagging; + +import com.google.common.base.Joiner; +import org.grobid.core.GrobidModel; +import org.grobid.core.engines.tagging.delft.OnnxSequenceLabellingModel; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; + +/** + * ONNX-based tagger for sequence labeling. + * Uses DeLFT models exported to ONNX format with CRF decoding in pure Java. + * + * This tagger provides a pure Java alternative to the DeLFT tagger, which + * requires + * Python/JEP at runtime. ONNX models are loaded from directories with the + * naming + * convention: {model-name}-{architecture}.onnx + */ +public class OnnxTagger implements GenericTagger { + + private static final Logger LOGGER = LoggerFactory.getLogger(OnnxTagger.class); + + private final OnnxSequenceLabellingModel model; + private final GrobidModel grobidModel; + + /** + * Create an ONNX tagger for the given model. + * + * @param grobidModel The GROBID model to use + * @param architecture The DeLFT architecture (e.g., "BidLSTM_CRF_FEATURES") - + * not used, included for API compatibility + */ + public OnnxTagger(GrobidModel grobidModel, String architecture) { + this.grobidModel = grobidModel; + + // Model path is now correctly resolved by GrobidProperties.getModelPath() for + // ONNX engine + File modelDir = new File(grobidModel.getModelPath()); + + LOGGER.info("Loading ONNX model from: {}", modelDir); + + try { + this.model = new OnnxSequenceLabellingModel(modelDir.toPath()); + } catch (Exception e) { + throw new RuntimeException("Failed to load ONNX model: " + modelDir, e); + } + } + + @Override + public String label(Iterable<String> data) { + return label(Joiner.on('\n').join(data)); + } + + @Override + public String label(String data) { + return model.labelGrobidInput(data); + } + + @Override + public void close() throws IOException { + if (model != null) { + model.close(); + } + } +} diff --git a/grobid-core/src/main/java/org/grobid/core/engines/tagging/TaggerFactory.java b/grobid-core/src/main/java/org/grobid/core/engines/tagging/TaggerFactory.java index 639b1011fc..7b7440108e 100644 --- a/grobid-core/src/main/java/org/grobid/core/engines/tagging/TaggerFactory.java +++ b/grobid-core/src/main/java/org/grobid/core/engines/tagging/TaggerFactory.java @@ -14,7 +14,7 @@ /** * Factory for a sequence labelling, aka a tagger, instance. - * Supported implementations are CRF (CRFPP, Wapiti) and Deep Learning (DeLFT) + * Supported implementations are CRF (CRFPP, Wapiti) and Deep Learning (DeLFT) * */ public class TaggerFactory { @@ -22,7 +22,8 @@ public class TaggerFactory { private static Map<GrobidModel, GenericTagger> cache = new HashMap<>(); - private TaggerFactory() {} + private TaggerFactory() { + } public static synchronized GenericTagger getTagger(GrobidModel model) { return getTagger(model, GrobidProperties.getGrobidEngine(model), GrobidProperties.getDelftArchitecture(model)); @@ -35,11 +36,11 @@ public static synchronized GenericTagger getTagger(GrobidModel model, GrobidCRFE public static synchronized GenericTagger getTagger(GrobidModel model, GrobidCRFEngine engine, String architecture) { GenericTagger t = cache.get(model); if (t == null) { - if(model.equals(GrobidModels.DUMMY)) { + if (model.equals(GrobidModels.DUMMY)) { return new DummyTagger(model); } - if(engine != null) { + if (engine != null) { switch (engine) { case CRFPP: t = new CRFPPTagger(model); @@ -50,12 +51,17 @@ public static synchronized GenericTagger getTagger(GrobidModel model, GrobidCRFE case DELFT: t = new DeLFTTagger(model, architecture); break; + case ONNX: + t = new OnnxTagger(model, GrobidProperties.getOnnxArchitecture(model)); + break; default: - throw new IllegalStateException("Unsupported Grobid sequence labelling engine: " + engine.getExt()); + throw new IllegalStateException( + "Unsupported Grobid sequence labelling engine: " + engine.getExt()); } cache.put(model, t); } else { - throw new IllegalStateException("Unsupported or null Grobid sequence labelling engine: " + engine.getExt()); + throw new IllegalStateException( + "Unsupported or null Grobid sequence labelling engine: " + engine.getExt()); } } return t; diff --git a/grobid-core/src/main/java/org/grobid/core/engines/tagging/delft/CRFDecoder.java b/grobid-core/src/main/java/org/grobid/core/engines/tagging/delft/CRFDecoder.java new file mode 100644 index 0000000000..02d95e29b7 --- /dev/null +++ b/grobid-core/src/main/java/org/grobid/core/engines/tagging/delft/CRFDecoder.java @@ -0,0 +1,186 @@ +package org.grobid.core.engines.tagging.delft; + +import com.google.gson.Gson; +import com.google.gson.JsonObject; + +import java.io.FileReader; +import java.io.IOException; +import java.nio.file.Path; +import java.util.Arrays; + +/** + * CRF Viterbi decoder for use with ONNX-exported DeLFT models. + * + * Performs Viterbi decoding given emission scores and learned transition + * matrices. + * The transition matrices are loaded from JSON exported by Python. + */ +public class CRFDecoder { + + private final int numTags; + private final float[][] transitions; // [from_tag][to_tag] + private final float[] startTransitions; // [num_tags] + private final float[] endTransitions; // [num_tags] + + /** + * Create a CRF decoder with learned parameters. + * + * @param transitions Transition matrix [from_tag][to_tag] + * @param startTransitions Start transition scores + * @param endTransitions End transition scores + */ + public CRFDecoder(float[][] transitions, float[] startTransitions, float[] endTransitions) { + this.numTags = transitions.length; + this.transitions = transitions; + this.startTransitions = startTransitions; + this.endTransitions = endTransitions; + } + + /** + * Load CRF parameters from JSON file exported by Python. + * + * @param jsonPath Path to crf_params.json + * @return CRFDecoder instance + */ + public static CRFDecoder fromJson(Path jsonPath) throws IOException { + Gson gson = new Gson(); + try (FileReader reader = new FileReader(jsonPath.toFile())) { + JsonObject json = gson.fromJson(reader, JsonObject.class); + + // Parse transitions [num_tags][num_tags] + double[][] transitionsDouble = gson.fromJson(json.get("transitions"), double[][].class); + float[][] transitions = toFloatArray2D(transitionsDouble); + + // Parse start transitions [num_tags] + double[] startDouble = gson.fromJson(json.get("startTransitions"), double[].class); + float[] startTransitions = toFloatArray(startDouble); + + // Parse end transitions [num_tags] + double[] endDouble = gson.fromJson(json.get("endTransitions"), double[].class); + float[] endTransitions = toFloatArray(endDouble); + + return new CRFDecoder(transitions, startTransitions, endTransitions); + } + } + + private static float[] toFloatArray(double[] doubles) { + float[] floats = new float[doubles.length]; + for (int i = 0; i < doubles.length; i++) { + floats[i] = (float) doubles[i]; + } + return floats; + } + + private static float[][] toFloatArray2D(double[][] doubles) { + float[][] floats = new float[doubles.length][]; + for (int i = 0; i < doubles.length; i++) { + floats[i] = toFloatArray(doubles[i]); + } + return floats; + } + + /** + * Decode the best tag sequence using Viterbi algorithm. + * + * @param emissions Emission scores from the model [seq_len][num_tags] + * @param mask Mask indicating valid tokens (true = valid). Can be null. + * @return Best tag sequence as array of tag indices + */ + public int[] decode(float[][] emissions, boolean[] mask) { + int seqLength = emissions.length; + + if (mask == null) { + mask = new boolean[seqLength]; + Arrays.fill(mask, true); + } + + // Find actual sequence length (excluding padding) + int actualLength = 0; + for (int i = 0; i < seqLength; i++) { + if (mask[i]) + actualLength = i + 1; + } + + if (actualLength == 0) { + return new int[0]; + } + + // Viterbi score matrix [seq_len][num_tags] + float[][] viterbiScore = new float[actualLength][numTags]; + + // Backpointer matrix [seq_len][num_tags] + int[][] backpointers = new int[actualLength][numTags]; + + // Initialize first position with start transitions + emissions + for (int tag = 0; tag < numTags; tag++) { + viterbiScore[0][tag] = startTransitions[tag] + emissions[0][tag]; + } + + // Forward pass + for (int t = 1; t < actualLength; t++) { + for (int currentTag = 0; currentTag < numTags; currentTag++) { + float bestScore = Float.NEGATIVE_INFINITY; + int bestPrevTag = 0; + + for (int prevTag = 0; prevTag < numTags; prevTag++) { + float score = viterbiScore[t - 1][prevTag] + + transitions[prevTag][currentTag] + + emissions[t][currentTag]; + + if (score > bestScore) { + bestScore = score; + bestPrevTag = prevTag; + } + } + + viterbiScore[t][currentTag] = bestScore; + backpointers[t][currentTag] = bestPrevTag; + } + } + + // Add end transitions to find best final tag + float bestFinalScore = Float.NEGATIVE_INFINITY; + int bestLastTag = 0; + + for (int tag = 0; tag < numTags; tag++) { + float score = viterbiScore[actualLength - 1][tag] + endTransitions[tag]; + if (score > bestFinalScore) { + bestFinalScore = score; + bestLastTag = tag; + } + } + + // Backtrack to find best sequence + int[] bestPath = new int[actualLength]; + bestPath[actualLength - 1] = bestLastTag; + + for (int t = actualLength - 2; t >= 0; t--) { + bestPath[t] = backpointers[t + 1][bestPath[t + 1]]; + } + + return bestPath; + } + + /** + * Decode multiple sequences (batch processing). + * + * @param emissions Batch of emission scores [batch_size][seq_len][num_tags] + * @param masks Masks for each sequence. Can be null. + * @return Array of best tag sequences + */ + public int[][] decodeBatch(float[][][] emissions, boolean[][] masks) { + int batchSize = emissions.length; + int[][] results = new int[batchSize][]; + + for (int i = 0; i < batchSize; i++) { + boolean[] mask = (masks != null) ? masks[i] : null; + results[i] = decode(emissions[i], mask); + } + + return results; + } + + public int getNumTags() { + return numTags; + } +} diff --git a/grobid-core/src/main/java/org/grobid/core/engines/tagging/delft/OnnxClassificationModel.java b/grobid-core/src/main/java/org/grobid/core/engines/tagging/delft/OnnxClassificationModel.java new file mode 100644 index 0000000000..aad802fec9 --- /dev/null +++ b/grobid-core/src/main/java/org/grobid/core/engines/tagging/delft/OnnxClassificationModel.java @@ -0,0 +1,235 @@ +package org.grobid.core.engines.tagging.delft; + +import ai.onnxruntime.OrtException; +import com.google.gson.Gson; +import com.google.gson.JsonArray; +import com.google.gson.JsonObject; +import org.grobid.core.analyzers.GrobidAnalyzer; +import org.grobid.core.engines.tagging.GenericClassifier; +import org.grobid.core.layout.LayoutToken; +import org.grobid.core.utilities.GrobidProperties; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.FileReader; +import java.io.IOException; +import java.nio.file.Path; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * ONNX-based text classification model. + * + * Provides pure Java inference for classification models exported from DeLFT, + * eliminating the need for JEP/Python at runtime. + * + * Model directory structure: + * - classifier.onnx : The ONNX model + * - config.json : Model config (maxlen, wordEmbeddingSize, embeddingsName) + * - labels.json : Label mappings (labels array, indexToLabel) + */ +public class OnnxClassificationModel implements GenericClassifier { + + private static final Logger LOGGER = LoggerFactory.getLogger(OnnxClassificationModel.class); + + private final OnnxClassificationRunner modelRunner; + private final WordEmbeddings embeddings; + private final String[] labels; + private final Map<String, Integer> labelToIndex; + private final int maxlen; + private final int embeddingSize; + private final String modelName; + + /** + * Load an ONNX classification model from a directory. + * + * @param modelDir Directory containing classifier.onnx, config.json, + * labels.json + */ + public OnnxClassificationModel(Path modelDir) throws IOException, OrtException { + Gson gson = new Gson(); + + // Read config.json + Path configPath = modelDir.resolve("config.json"); + JsonObject config; + try (FileReader reader = new FileReader(configPath.toFile())) { + config = gson.fromJson(reader, JsonObject.class); + } + + this.modelName = config.get("modelName").getAsString(); + this.maxlen = config.get("maxlen").getAsInt(); + this.embeddingSize = config.get("wordEmbeddingSize").getAsInt(); + String embeddingsName = config.get("embeddingsName").getAsString(); + + // Read labels.json + Path labelsPath = modelDir.resolve("labels.json"); + JsonObject labelsJson; + try (FileReader reader = new FileReader(labelsPath.toFile())) { + labelsJson = gson.fromJson(reader, JsonObject.class); + } + + JsonArray labelsArray = labelsJson.getAsJsonArray("labels"); + this.labels = new String[labelsArray.size()]; + for (int i = 0; i < labelsArray.size(); i++) { + this.labels[i] = labelsArray.get(i).getAsString(); + } + + JsonObject labelToIndexJson = labelsJson.getAsJsonObject("labelToIndex"); + this.labelToIndex = new HashMap<>(); + for (String label : labelToIndexJson.keySet()) { + this.labelToIndex.put(label, labelToIndexJson.get(label).getAsInt()); + } + + // Load embeddings from DeLFT path + String delftPath = GrobidProperties.getDeLFTFilePath(); + Path embeddingsPath = Path.of(delftPath, "data", "db", embeddingsName); + + LOGGER.info("Loading ONNX classification model from: {}", modelDir); + LOGGER.info("Loading embeddings from: {}", embeddingsPath); + + // Load ONNX model via runner + this.modelRunner = new OnnxClassificationRunner(modelDir.resolve("classifier.onnx")); + + // Load embeddings + this.embeddings = WordEmbeddings.getInstance(embeddingsPath, embeddingSize); + + LOGGER.info("ONNX classification model {} loaded", modelName); + LOGGER.info("Labels: {}", String.join(", ", labels)); + } + + /** + * Classify texts and return results in DeLFT JSON format. + * + * @param texts List of texts to classify + * @return JSON string matching DeLFT classifier output format + */ + @Override + public String classify(List<String> texts) throws Exception { + if (texts == null || texts.isEmpty()) { + return null; + } + + int batchSize = texts.size(); + float[][][] batchEmbeddings = new float[batchSize][maxlen][embeddingSize]; + + // Process each text + for (int b = 0; b < batchSize; b++) { + String text = texts.get(b); + + // Tokenize using GrobidAnalyzer (same as sequence labeling) + List<LayoutToken> tokens = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(text); + List<String> words = new ArrayList<>(); + for (LayoutToken token : tokens) { + String txt = token.getText(); + if (txt != null && !txt.trim().isEmpty()) { + words.add(txt); + } + } + + // Get embeddings for words + String[] wordArray = words.toArray(new String[0]); + float[][] wordEmbs = embeddings.getEmbeddings(wordArray); + + // Copy to batch (pad/truncate to maxlen) + int numTokens = Math.min(wordEmbs.length, maxlen); + for (int i = 0; i < numTokens; i++) { + batchEmbeddings[b][i] = wordEmbs[i]; + } + // Rest is zero-padded (default float array initialization) + } + + // Run inference + float[][] predictions = runInference(batchEmbeddings); + + // Format as DeLFT JSON + return formatAsJson(texts, predictions); + } + + /** + * Run ONNX inference. + * + * @param embeddingsInput Input embeddings [batch, maxlen, embeddingSize] + * @return Predictions [batch, numClasses] (softmax-normalized probabilities) + */ + private float[][] runInference(float[][][] embeddingsInput) throws OrtException { + // Delegate to runner for ONNX inference + float[][] logits = modelRunner.runInference(embeddingsInput); + + // Apply softmax to convert logits to probabilities + int batchSize = logits.length; + int numClasses = logits[0].length; + float[][] predictions = new float[batchSize][numClasses]; + for (int b = 0; b < batchSize; b++) { + predictions[b] = softmax(logits[b]); + } + + return predictions; + } + + /** + * Apply softmax activation to convert logits to probabilities. + * For single-label classification (copyright, license). + */ + private float[] softmax(float[] logits) { + float max = Float.NEGATIVE_INFINITY; + for (float v : logits) { + if (v > max) + max = v; + } + + float sum = 0.0f; + float[] exp = new float[logits.length]; + for (int i = 0; i < logits.length; i++) { + exp[i] = (float) Math.exp(logits[i] - max); // subtract max for numerical stability + sum += exp[i]; + } + + float[] probs = new float[logits.length]; + for (int i = 0; i < logits.length; i++) { + probs[i] = exp[i] / sum; + } + return probs; + } + + /** + * Format predictions as DeLFT-compatible JSON. + */ + private String formatAsJson(List<String> texts, float[][] predictions) { + JsonObject root = new JsonObject(); + + JsonArray classifications = new JsonArray(); + for (int i = 0; i < texts.size(); i++) { + JsonObject classificationEntry = new JsonObject(); + classificationEntry.addProperty("text", texts.get(i)); + + // Add probability for each label + for (int j = 0; j < labels.length; j++) { + classificationEntry.addProperty(labels[j], predictions[i][j]); + } + + classifications.add(classificationEntry); + } + + root.add("classifications", classifications); + root.addProperty("model", modelName); + root.addProperty("software", "DeLFT"); + root.addProperty("date", LocalDateTime.now().format(DateTimeFormatter.ISO_LOCAL_DATE_TIME)); + + Gson gson = new Gson(); + return gson.toJson(root); + } + + @Override + public void close() throws IOException { + if (modelRunner != null) { + modelRunner.close(); + } + if (embeddings != null) { + embeddings.close(); + } + } +} diff --git a/grobid-core/src/main/java/org/grobid/core/engines/tagging/delft/OnnxClassificationRunner.java b/grobid-core/src/main/java/org/grobid/core/engines/tagging/delft/OnnxClassificationRunner.java new file mode 100644 index 0000000000..38f7f05393 --- /dev/null +++ b/grobid-core/src/main/java/org/grobid/core/engines/tagging/delft/OnnxClassificationRunner.java @@ -0,0 +1,137 @@ +package org.grobid.core.engines.tagging.delft; + +import ai.onnxruntime.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Closeable; +import java.nio.FloatBuffer; +import java.nio.file.Path; +import java.util.HashMap; +import java.util.Map; + +/** + * ONNX Runtime wrapper for running DeLFT classification models. + * + * Handles low-level ONNX operations: loading models, creating tensors, + * running inference, and managing resources. The higher-level + * OnnxClassificationModel handles embeddings, tokenization, and output + * formatting. + */ +public class OnnxClassificationRunner implements Closeable { + + private static final Logger LOGGER = LoggerFactory.getLogger(OnnxClassificationRunner.class); + + private final OrtEnvironment env; + private final OrtSession session; + private final String inputName; + + /** + * Load an ONNX classification model. + * + * @param modelPath Path to the .onnx file + */ + public OnnxClassificationRunner(Path modelPath) throws OrtException { + this.env = OrtEnvironment.getEnvironment(); + + OrtSession.SessionOptions options = new OrtSession.SessionOptions(); + options.setOptimizationLevel(OrtSession.SessionOptions.OptLevel.ALL_OPT); + + // Configure threading for optimal CPU inference performance + // Since GROBID manages concurrency at the worker level (e.g., 10 concurrent + // workers), use single-threaded inference per session to avoid CPU + // oversubscription + options.setIntraOpNumThreads(1); + + // interOpNumThreads: threads for parallel execution of multiple operators + // Set to 1 since GROBID manages concurrency at a higher level + options.setInterOpNumThreads(1); + + // Use sequential execution mode (vs parallel) since GROBID handles parallelism + options.setExecutionMode(OrtSession.SessionOptions.ExecutionMode.SEQUENTIAL); + + this.session = env.createSession(modelPath.toString(), options); + + // Get the first input name (classification models typically have one input) + this.inputName = session.getInputNames().iterator().next(); + + LOGGER.info("Loaded ONNX classification model from {} (single-threaded, sequential mode)", modelPath); + LOGGER.info("Input names: {}", session.getInputNames()); + LOGGER.info("Output names: {}", session.getOutputNames()); + } + + /** + * Run inference on embeddings input. + * + * @param embeddings Input embeddings [batch, maxlen, embeddingSize] + * @return Raw logits [batch, numClasses] (not yet softmax-normalized) + */ + public float[][] runInference(float[][][] embeddings) throws OrtException { + int batchSize = embeddings.length; + int maxlen = embeddings[0].length; + int embeddingSize = embeddings[0][0].length; + + // Flatten embeddings for ONNX tensor + float[] flat = flatten3D(embeddings); + + // Create input tensor + OnnxTensor inputTensor = OnnxTensor.createTensor(env, + FloatBuffer.wrap(flat), + new long[] { batchSize, maxlen, embeddingSize }); + + Map<String, OnnxTensor> inputs = new HashMap<>(); + inputs.put(inputName, inputTensor); + + try (OrtSession.Result result = session.run(inputs)) { + // Get output - should be [batch, numClasses] (raw logits) + OnnxTensor outputTensor = (OnnxTensor) result.get(0); + long[] shape = outputTensor.getInfo().getShape(); + int numClasses = (int) shape[1]; + + float[] outputFlat = outputTensor.getFloatBuffer().array(); + + // Reshape to 2D + float[][] logits = new float[batchSize][numClasses]; + int idx = 0; + for (int b = 0; b < batchSize; b++) { + for (int c = 0; c < numClasses; c++) { + logits[b][c] = outputFlat[idx++]; + } + } + + return logits; + } finally { + inputTensor.close(); + } + } + + /** + * Flatten 3D array to 1D for ONNX tensor creation. + */ + private float[] flatten3D(float[][][] arr) { + int d1 = arr.length; + int d2 = arr[0].length; + int d3 = arr[0][0].length; + float[] flat = new float[d1 * d2 * d3]; + int idx = 0; + for (int i = 0; i < d1; i++) { + for (int j = 0; j < d2; j++) { + for (int k = 0; k < d3; k++) { + flat[idx++] = arr[i][j][k]; + } + } + } + return flat; + } + + @Override + public void close() { + try { + if (session != null) { + session.close(); + } + } catch (Exception e) { + LOGGER.warn("Error closing ONNX session", e); + } + } +} diff --git a/grobid-core/src/main/java/org/grobid/core/engines/tagging/delft/OnnxSequenceLabellingModel.java b/grobid-core/src/main/java/org/grobid/core/engines/tagging/delft/OnnxSequenceLabellingModel.java new file mode 100644 index 0000000000..7db6e1ea51 --- /dev/null +++ b/grobid-core/src/main/java/org/grobid/core/engines/tagging/delft/OnnxSequenceLabellingModel.java @@ -0,0 +1,767 @@ +package org.grobid.core.engines.tagging.delft; + +import ai.onnxruntime.OrtException; +import com.google.gson.Gson; +import com.google.gson.JsonObject; +import org.grobid.core.layout.LayoutToken; +import org.grobid.core.engines.label.TaggingLabels; +import org.grobid.core.utilities.GrobidProperties; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Closeable; +import java.io.FileReader; +import java.io.IOException; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +/** + * ONNX Runtime wrapper for running DeLFT encoder models. + */ +public class OnnxSequenceLabellingModel implements Closeable { + + private static final Logger LOGGER = LoggerFactory.getLogger(OnnxSequenceLabellingModel.class); + + private final OnnxSequenceLabellingRunner modelRunner; + private final CRFDecoder crfDecoder; + private final Preprocessor preprocessor; + private final WordEmbeddings embeddings; + private final int maxSeqLength; + + public OnnxSequenceLabellingModel(Path modelDir) throws IOException, OrtException { + // Read config.json + Gson gson = new Gson(); + Path configPath = modelDir.resolve("config.json"); + JsonObject config; + try (FileReader reader = new FileReader(configPath.toFile())) { + config = gson.fromJson(reader, JsonObject.class); + } + + int embeddingSize = config.get("wordEmbeddingSize").getAsInt(); + this.maxSeqLength = config.get("maxSequenceLength").getAsInt(); + String embeddingsName = config.get("embeddingsName").getAsString(); + + // Embeddings are stored under delft installation path: + // {delft}/data/db/{name} + String delftPath = GrobidProperties.getDeLFTFilePath(); + Path embeddingsPath = Path.of(delftPath, "data", "db", embeddingsName); + + LOGGER.info("Loading ONNX model from: {}", modelDir); + LOGGER.info("Loading embeddings from: {}", embeddingsPath); + + // Load components + this.modelRunner = new OnnxSequenceLabellingRunner(modelDir.resolve("encoder.onnx")); + this.crfDecoder = CRFDecoder.fromJson(modelDir.resolve("crf_params.json")); + this.preprocessor = Preprocessor.fromJson(modelDir.resolve("vocab.json")); + this.embeddings = WordEmbeddings.getInstance(embeddingsPath, embeddingSize); + + LOGGER.info("DeLFT model loaded from {}", modelDir); + LOGGER.info("Model has features: {}", preprocessor.hasFeatures()); + } + + /** + * Load a DeLFT model from exported directory. + * + * @param modelDir Directory containing encoder.onnx, crf_params.json, + * vocab.json + * @param embeddingsPath Path to LMDB embeddings database + * @param embeddingSize Dimension of word embeddings + * @param maxSeqLength Maximum sequence length + */ + public OnnxSequenceLabellingModel(Path modelDir, Path embeddingsPath, int embeddingSize, int maxSeqLength) + throws IOException, OrtException { + + this.maxSeqLength = maxSeqLength; + + // Load components + this.modelRunner = new OnnxSequenceLabellingRunner(modelDir.resolve("encoder.onnx")); + this.crfDecoder = CRFDecoder.fromJson(modelDir.resolve("crf_params.json")); + this.preprocessor = Preprocessor.fromJson(modelDir.resolve("vocab.json")); + this.embeddings = WordEmbeddings.getInstance(embeddingsPath, embeddingSize); + + LOGGER.info("DeLFT model loaded from {}", modelDir); + LOGGER.info("Model has features: {}", preprocessor.hasFeatures()); + } + + /** + * Annotate text with sequence labels (no features). + * + * @param text Input text + * @return Annotation result + */ + public AnnotationResult annotate(String text) throws OrtException { + List<LayoutToken> tokens = preprocessor.tokenize(text); + String[] words = new String[tokens.size()]; + for (int i = 0; i < tokens.size(); i++) { + words[i] = tokens.get(i).getText(); + } + + return annotateTokens(words, null); + } + + /** + * Annotate tokens with features. + * + * For BidLSTM_CRF_FEATURES models, features must be provided. + * Each row in features corresponds to a token, with one value per feature + * column. + * + * @param tokens Array of token strings + * @param features Feature values per token [numTokens][numFeatures], can be + * null for non-feature models + * @return Annotation result + */ + public AnnotationResult annotateTokens(String[] tokens, String[][] features) throws OrtException { + int numTokens = Math.min(tokens.length, maxSeqLength); + if (tokens.length > maxSeqLength) { + LOGGER.warn("Input sequence length {} exceeds maxSeqLength {}. Truncating.", tokens.length, maxSeqLength); + } + + if (numTokens == 0) { + return new AnnotationResult(null, new String[0], new String[0]); + } + + // Truncate to max sequence length + String[] words = new String[numTokens]; + System.arraycopy(tokens, 0, words, 0, numTokens); + + // Get embeddings [seq_len][embed_size] + float[][] wordEmbs = embeddings.getEmbeddings(words); + + // Pad to maxSeqLength + float[][] paddedEmbs = new float[maxSeqLength][embeddings.getEmbeddingSize()]; + for (int i = 0; i < numTokens; i++) { + paddedEmbs[i] = wordEmbs[i]; + } + + // Get char indices [seq_len][max_char] + List<LayoutToken> layoutTokens = new ArrayList<>(); + for (String word : words) { + LayoutToken lt = new LayoutToken(); + lt.setText(word); + layoutTokens.add(lt); + } + long[][] charIndices = preprocessor.tokensToCharIndices(layoutTokens, maxSeqLength); + + // Create batch of 1 + float[][][] batchEmbs = new float[][][] { paddedEmbs }; + long[][][] batchChars = new long[][][] { charIndices }; + + // Handle features + long[][][] batchFeatures = null; + if (preprocessor.hasFeatures() && features != null) { + long[][] featureIndices = preprocessor.tokensToFeatureIndices(features, maxSeqLength); + batchFeatures = new long[][][] { featureIndices }; + } + + // Run model + float[][][] emissions = modelRunner.runInference(batchEmbs, batchChars, batchFeatures); + + // CRF decode + boolean[] mask = preprocessor.createMask(numTokens, maxSeqLength); + int[] tagIndices = crfDecoder.decode(emissions[0], mask); + + // Convert to tag names (only for actual tokens) + String[] tags = new String[numTokens]; + for (int i = 0; i < numTokens; i++) { + tags[i] = delft2grobidLabel( + preprocessor.getTagIndex().getOrDefault(tagIndices[i], TaggingLabels.IOB_OTHER_LABEL)); + } + + return new AnnotationResult(String.join(" ", words), words, tags); + } + + /** + * Annotate multiple token sequences in a single batch inference call. + * All sequences are padded to maxSeqLength. + * + * @param tokensBatch Array of token arrays [batchSize][numTokens] + * @param featuresBatch Feature values per token + * [batchSize][numTokens][numFeatures], can be null + * @return Array of AnnotationResult, one per input sequence + */ + public AnnotationResult[] annotateTokensBatch(String[][] tokensBatch, String[][][] featuresBatch) + throws OrtException { + int batchSize = tokensBatch.length; + + if (batchSize == 0) { + return new AnnotationResult[0]; + } + + // Prepare arrays to store actual token counts for each sequence + int[] numTokensPerSeq = new int[batchSize]; + + // Prepare batch data + float[][][] batchEmbs = new float[batchSize][maxSeqLength][embeddings.getEmbeddingSize()]; + long[][][] batchChars = new long[batchSize][maxSeqLength][]; + long[][][] batchFeatures = null; + + if (preprocessor.hasFeatures() && featuresBatch != null) { + batchFeatures = new long[batchSize][][]; + } + + for (int b = 0; b < batchSize; b++) { + String[] tokens = tokensBatch[b]; + int numTokens = Math.min(tokens.length, maxSeqLength); + if (tokens.length > maxSeqLength) { + LOGGER.warn("Input sequence length {} exceeds maxSeqLength {}. Truncating.", tokens.length, + maxSeqLength); + } + numTokensPerSeq[b] = numTokens; + + if (numTokens == 0) { + // Initialize with zeros for empty sequences + for (int i = 0; i < maxSeqLength; i++) { + batchEmbs[b][i] = new float[embeddings.getEmbeddingSize()]; + batchChars[b][i] = new long[preprocessor.getMaxCharLength()]; + } + if (batchFeatures != null) { + batchFeatures[b] = new long[maxSeqLength][preprocessor.getNumFeatures()]; + } + continue; + } + + // Truncate to max sequence length + String[] words = new String[numTokens]; + System.arraycopy(tokens, 0, words, 0, numTokens); + + // Get embeddings and pad + float[][] wordEmbs = embeddings.getEmbeddings(words); + for (int i = 0; i < numTokens; i++) { + batchEmbs[b][i] = wordEmbs[i]; + } + for (int i = numTokens; i < maxSeqLength; i++) { + batchEmbs[b][i] = new float[embeddings.getEmbeddingSize()]; + } + + // Get char indices + List<LayoutToken> layoutTokens = new ArrayList<>(); + for (String word : words) { + LayoutToken lt = new LayoutToken(); + lt.setText(word); + layoutTokens.add(lt); + } + batchChars[b] = preprocessor.tokensToCharIndices(layoutTokens, maxSeqLength); + + // Handle features + if (batchFeatures != null) { + if (featuresBatch != null && featuresBatch[b] != null) { + batchFeatures[b] = preprocessor.tokensToFeatureIndices(featuresBatch[b], maxSeqLength); + } else { + batchFeatures[b] = new long[maxSeqLength][preprocessor.getNumFeatures()]; + } + } + } + + // Run batch inference + float[][][] emissions = modelRunner.runInference(batchEmbs, batchChars, batchFeatures); + + // Decode each sequence + AnnotationResult[] results = new AnnotationResult[batchSize]; + for (int b = 0; b < batchSize; b++) { + int numTokens = numTokensPerSeq[b]; + + if (numTokens == 0) { + results[b] = new AnnotationResult(null, new String[0], new String[0]); + continue; + } + + // CRF decode + boolean[] mask = preprocessor.createMask(numTokens, maxSeqLength); + int[] tagIndices = crfDecoder.decode(emissions[b], mask); + + // Convert to tag names (only for actual tokens) + String[] tags = new String[numTokens]; + String[] words = new String[numTokens]; + System.arraycopy(tokensBatch[b], 0, words, 0, numTokens); + + for (int i = 0; i < numTokens; i++) { + tags[i] = delft2grobidLabel( + preprocessor.getTagIndex().getOrDefault(tagIndices[i], TaggingLabels.IOB_OTHER_LABEL)); + } + + results[b] = new AnnotationResult(String.join(" ", words), words, tags); + } + + return results; + } + + /** + * Check if this model requires features. + */ + public boolean hasFeatures() { + return preprocessor.hasFeatures(); + } + + /** + * Get the number of features expected per token (0 if no features). + */ + public int getNumFeatures() { + return preprocessor.getNumFeatures(); + } + + /** + * Maximum sequence length supported by the model. + * <p> + * For the DeLFT-exported ONNX bundles shipped with GROBID, this value is read + * from the accompanying {@code config.json} (field {@code maxSequenceLength}). + */ + public int getMaxSeqLength() { + return maxSeqLength; + } + + /** + * Read the maximum sequence length from a DeLFT ONNX bundle directory. + * <p> + * This method only parses {@code config.json} and does not load ONNX Runtime, + * embeddings (LMDB) or any other native dependency. + * + * @param modelDir directory containing {@code config.json} + * @return maxSequenceLength from config.json + */ + public static int readMaxSequenceLength(Path modelDir) throws IOException { + Gson gson = new Gson(); + Path configPath = modelDir.resolve("config.json"); + try (FileReader reader = new FileReader(configPath.toFile())) { + JsonObject config = gson.fromJson(reader, JsonObject.class); + return config.get("maxSequenceLength").getAsInt(); + } + } + + /** + * Annotate multiple texts in batch. + */ + public List<AnnotationResult> annotateBatch(List<String> texts) throws OrtException { + List<AnnotationResult> results = new ArrayList<>(); + for (String text : texts) { + results.add(annotate(text)); + } + return results; + } + + /** + * Label GROBID-formatted input data with support for long sequences. + * + * For sequences exceeding maxSeqLength, this method chunks the sequence, + * runs inference on each chunk independently, and concatenates the results. + * This matches the behavior of DeLFT's Python grobidTagger.tag() method. + * + * Input format: token\tfeature1\tfeature2\t...\n + * Output format: token\tfeature1\tfeature2\t...\tlabel\n + * + * @param data GROBID feature data + * @return Labeled output in GROBID format + */ + public String labelGrobidInput(String data) { + try { + // Parse input into sequences (separated by empty lines) + List<List<String>> sequences = new ArrayList<>(); + List<String> currentSequence = new ArrayList<>(); + + String[] lines = data.split("\n", -1); // -1 to keep trailing empty strings + + for (String line : lines) { + if (line.trim().isEmpty()) { + if (!currentSequence.isEmpty()) { + sequences.add(currentSequence); + currentSequence = new ArrayList<>(); + } + } else { + currentSequence.add(line); + } + } + // Don't forget the last sequence if it doesn't end with empty line + if (!currentSequence.isEmpty()) { + sequences.add(currentSequence); + } + + if (sequences.isEmpty()) { + return ""; + } + + // Process each sequence independently (with chunking if needed) + List<List<String>> allSequenceLabels = new ArrayList<>(); + + for (List<String> sequence : sequences) { + List<String> sequenceLabels = labelSequenceWithChunking(sequence); + allSequenceLabels.add(sequenceLabels); + } + + // Rebuild output with original structure + StringBuilder output = new StringBuilder(); + int seqIdx = 0; + int tokenInSeqIdx = 0; + boolean inSequence = false; + + for (String line : lines) { + if (line.trim().isEmpty()) { + if (inSequence && seqIdx < sequences.size()) { + // End of a sequence - add separator + output.append("\n"); + seqIdx++; + tokenInSeqIdx = 0; + inSequence = false; + } + } else { + inSequence = true; + if (seqIdx < allSequenceLabels.size() && + tokenInSeqIdx < allSequenceLabels.get(seqIdx).size()) { + String label = allSequenceLabels.get(seqIdx).get(tokenInSeqIdx); + output.append(line).append("\t").append(label).append("\n"); + tokenInSeqIdx++; + } + } + } + + return output.toString(); + } catch (OrtException e) { + throw new RuntimeException("ONNX inference failed", e); + } + } + + /** + * Label a single sequence, chunking if it exceeds maxSeqLength. + * + * @param sequenceLines Lines of the sequence (token\tfeatures format) + * @return List of labels, one per token + */ + private List<String> labelSequenceWithChunking(List<String> sequenceLines) throws OrtException { + // Parse tokens and features from the sequence + List<String[]> tokensWithFeatures = new ArrayList<>(); + for (String line : sequenceLines) { + String[] parts = line.split("[\\t\\s]+"); + tokensWithFeatures.add(parts); + } + + int totalTokens = tokensWithFeatures.size(); + + // If sequence fits in one chunk, process directly + if (totalTokens <= maxSeqLength) { + return labelTokensWithFeatures(tokensWithFeatures); + } + + // Collect all chunks for batch processing + List<List<String[]>> chunks = new ArrayList<>(); + int offset = 0; + + while (offset < totalTokens) { + int chunkEnd = Math.min(offset + maxSeqLength, totalTokens); + chunks.add(new ArrayList<>(tokensWithFeatures.subList(offset, chunkEnd))); + offset = chunkEnd; + } + + // Convert chunks to batch arrays + int batchSize = chunks.size(); + String[][] tokensBatch = new String[batchSize][]; + String[][][] featuresBatch = new String[batchSize][][]; + + List<Integer> featuresIndices = preprocessor.getFeaturesIndices(); + + for (int i = 0; i < batchSize; i++) { + List<String[]> chunk = chunks.get(i); + tokensBatch[i] = new String[chunk.size()]; + featuresBatch[i] = null; + + for (int j = 0; j < chunk.size(); j++) { + String[] parts = chunk.get(j); + tokensBatch[i][j] = parts[0]; + + if (featuresIndices != null && !featuresIndices.isEmpty()) { + if (featuresBatch[i] == null) { + featuresBatch[i] = new String[chunk.size()][featuresIndices.size()]; + } + + // We need to map the features from the input line to the features expected by + // the model + // The input line parts[0] is the token, parts[1]... are features + // featuresIndices contains the 0-based index of the feature in the training + // data + // We assume the input data has the same structure as training data (Grobid + // structure) + + for (int k = 0; k < featuresIndices.size(); k++) { + int featureIndex = featuresIndices.get(k); + // IMPORTANT: featuresIndices values (e.g., 9, 10, ...) are 1-based indices + // into Python's features array, which excludes the token. + // In Python: features = pieces[1:], so features[9] = pieces[10]. + // In Java: parts[0] is token, so we need parts[featureIndex + 1] to match. + int adjustedIndex = featureIndex + 1; + + if (adjustedIndex < parts.length) { + featuresBatch[i][j][k] = parts[adjustedIndex]; + } else { + // Missing feature in input + featuresBatch[i][j][k] = "0"; + } + } + } + } + } + + // Run batch inference + AnnotationResult[] results = annotateTokensBatch(tokensBatch, featuresBatch); + + // Combine results + List<String> allLabels = new ArrayList<>(); + for (AnnotationResult result : results) { + allLabels.addAll(Arrays.asList(result.getLabels())); + } + + return allLabels; + } + + /** + * Label a list of tokens with features. + * + * @param tokensWithFeatures Each element is [token, feature1, feature2, ...] + * @return List of labels + */ + private List<String> labelTokensWithFeatures(List<String[]> tokensWithFeatures) throws OrtException { + if (tokensWithFeatures.isEmpty()) { + return new ArrayList<>(); + } + + // Extract tokens and selected features (using featuresIndices) + String[] tokens = new String[tokensWithFeatures.size()]; + String[][] features = null; + List<Integer> featuresIndices = preprocessor.getFeaturesIndices(); + + for (int i = 0; i < tokensWithFeatures.size(); i++) { + String[] parts = tokensWithFeatures.get(i); + tokens[i] = parts[0]; + + // Extract only the features specified in featuresIndices + if (featuresIndices != null && !featuresIndices.isEmpty()) { + if (features == null) { + features = new String[tokensWithFeatures.size()][featuresIndices.size()]; + } + for (int k = 0; k < featuresIndices.size(); k++) { + int featureIndex = featuresIndices.get(k); + // IMPORTANT: featuresIndices values (e.g., 9, 10, ...) are 1-based indices + // into Python's features array, which excludes the token. + // In Python: features = pieces[1:], so features[9] = pieces[10]. + // In Java: parts[0] is token, so we need parts[featureIndex + 1] to match. + int adjustedIndex = featureIndex + 1; + if (adjustedIndex < parts.length) { + features[i][k] = parts[adjustedIndex]; + } else { + features[i][k] = "0"; + } + } + } + } + + // Run annotation + AnnotationResult result = annotateTokens(tokens, features); + return Arrays.asList(result.getLabels()); + } + + @Override + public void close() { + if (modelRunner != null) + modelRunner.close(); + if (embeddings != null) + embeddings.close(); + } + + private static String delft2grobidLabel(String label) { + if (label.equals(TaggingLabels.IOB_OTHER_LABEL) || label.trim().equals("<PAD>")) { + return TaggingLabels.OTHER_LABEL; + } else if (label.startsWith(TaggingLabels.IOB_START_ENTITY_LABEL_PREFIX)) { + return label.replace(TaggingLabels.IOB_START_ENTITY_LABEL_PREFIX, + TaggingLabels.GROBID_START_ENTITY_LABEL_PREFIX); + } else if (label.startsWith(TaggingLabels.IOB_INSIDE_LABEL_PREFIX)) { + return label.replace(TaggingLabels.IOB_INSIDE_LABEL_PREFIX, + TaggingLabels.GROBID_INSIDE_ENTITY_LABEL_PREFIX); + } + return label; + } + + /** + * Annotation result containing tokens and labels. + */ + public static class AnnotationResult { + private final String text; + private final String[] tokens; + private final String[] labels; + + public AnnotationResult(String text, String[] tokens, String[] labels) { + this.text = text; + this.tokens = tokens; + this.labels = labels; + } + + public String getText() { + return text; + } + + public String[] getTokens() { + return tokens; + } + + public String[] getLabels() { + return labels; + } + + /** + * Represents an extracted entity with its label and text. + */ + public static class Entity { + public final String label; + public final String text; + public final int startToken; + public final int endToken; + + public Entity(String label, String text, int startToken, int endToken) { + this.label = label; + this.text = text; + this.startToken = startToken; + this.endToken = endToken; + } + } + + /** + * Extract entities from BIO-tagged sequence. + * Groups consecutive tokens with the same label into entities. + */ + public List<Entity> extractEntities() { + List<Entity> entities = new ArrayList<>(); + if (tokens == null || tokens.length == 0) { + return entities; + } + + String currentLabel = null; + int startIdx = -1; + StringBuilder currentText = new StringBuilder(); + + for (int i = 0; i < tokens.length; i++) { + String label = labels[i]; + String baseLabel = getBaseLabel(label); + + // Logic for Grobid-like labels (I-<label> is start, <label> is inside) + boolean isO = label.equals(TaggingLabels.OTHER_LABEL) || label.equals("O"); + boolean isBegin = label.startsWith(TaggingLabels.GROBID_START_ENTITY_LABEL_PREFIX); + boolean isInside = !isO && !isBegin; + + if (isBegin || (isInside && !baseLabel.equals(currentLabel))) { + // Save previous entity if exists + if (currentLabel != null) { + entities.add(new Entity(currentLabel, currentText.toString().trim(), startIdx, i - 1)); + } + // Start new entity + currentLabel = baseLabel; + startIdx = i; + currentText = new StringBuilder(tokens[i]); + } else if (isInside && baseLabel.equals(currentLabel)) { + // Continue current entity + currentText.append(" ").append(tokens[i]); + } else if (isO) { + // End current entity if exists + if (currentLabel != null) { + entities.add(new Entity(currentLabel, currentText.toString().trim(), startIdx, i - 1)); + currentLabel = null; + startIdx = -1; + currentText = new StringBuilder(); + } + } + } + + // Don't forget last entity + if (currentLabel != null) { + entities.add(new Entity(currentLabel, currentText.toString().trim(), startIdx, tokens.length - 1)); + } + + return entities; + } + + /** + * Get base label without B-/I- prefix (also removes angle brackets). + */ + private String getBaseLabel(String label) { + String base = label; + if (label.startsWith(TaggingLabels.GROBID_START_ENTITY_LABEL_PREFIX)) { // I- + base = label.substring(TaggingLabels.GROBID_START_ENTITY_LABEL_PREFIX.length()); + } else if (label.startsWith(TaggingLabels.IOB_START_ENTITY_LABEL_PREFIX)) { // B- + base = label.substring(TaggingLabels.IOB_START_ENTITY_LABEL_PREFIX.length()); + } else if (label.startsWith(TaggingLabels.IOB_INSIDE_LABEL_PREFIX)) { // I- (original IOB) + base = label.substring(TaggingLabels.IOB_INSIDE_LABEL_PREFIX.length()); + } + + // Remove angle brackets if present (e.g., <title> -> title) + if (base.startsWith("<") && base.endsWith(">")) { + return base.substring(1, base.length() - 1); + } + return base; + } + + /** + * Format entities as XML-like string. + * E.g., "<title>Analysis of 10,478 cancer genomesBen + * Kinnersley" + */ + public String toXmlString() { + List entities = extractEntities(); + StringBuilder sb = new StringBuilder(); + for (Entity entity : entities) { + sb.append("<").append(entity.label).append(">") + .append(entity.text) + .append(""); + } + return sb.toString(); + } + + public String toJson() { + List entities = extractEntities(); + + StringBuilder sb = new StringBuilder(); + sb.append("{\n"); + sb.append(" \"text\": \"").append(escapeJson(text)).append("\",\n"); + sb.append(" \"tokens\": ["); + for (int i = 0; i < tokens.length; i++) { + if (i > 0) + sb.append(", "); + sb.append("\"").append(escapeJson(tokens[i])).append("\""); + } + sb.append("],\n"); + sb.append(" \"labels\": ["); + for (int i = 0; i < labels.length; i++) { + if (i > 0) + sb.append(", "); + sb.append("\"").append(labels[i]).append("\""); + } + sb.append("],\n"); + + // Add XML-formatted entities + sb.append(" \"entitiesXml\": \"").append(escapeJson(toXmlString())).append("\",\n"); + + // Add structured entities list + sb.append(" \"entities\": ["); + for (int i = 0; i < entities.size(); i++) { + Entity e = entities.get(i); + if (i > 0) + sb.append(", "); + sb.append("\n {\"label\": \"").append(e.label) + .append("\", \"text\": \"").append(escapeJson(e.text)) + .append("\", \"start\": ").append(e.startToken) + .append(", \"end\": ").append(e.endToken).append("}"); + } + if (!entities.isEmpty()) { + sb.append("\n "); + } + sb.append("]\n}"); + return sb.toString(); + } + + private String escapeJson(String s) { + if (s == null) + return ""; + return s.replace("\\", "\\\\") + .replace("\"", "\\\"") + .replace("\n", "\\n") + .replace("\r", "\\r") + .replace("\t", "\\t"); + } + } +} diff --git a/grobid-core/src/main/java/org/grobid/core/engines/tagging/delft/OnnxSequenceLabellingRunner.java b/grobid-core/src/main/java/org/grobid/core/engines/tagging/delft/OnnxSequenceLabellingRunner.java new file mode 100644 index 0000000000..59b9f2ad07 --- /dev/null +++ b/grobid-core/src/main/java/org/grobid/core/engines/tagging/delft/OnnxSequenceLabellingRunner.java @@ -0,0 +1,226 @@ +package org.grobid.core.engines.tagging.delft; + +import ai.onnxruntime.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Closeable; +import java.nio.FloatBuffer; +import java.nio.LongBuffer; +import java.nio.file.Path; +import java.util.HashMap; +import java.util.Map; + +/** + * ONNX Runtime wrapper for running DeLFT encoder models. + */ +public class OnnxSequenceLabellingRunner implements Closeable { + + private static final Logger LOGGER = LoggerFactory.getLogger(OnnxSequenceLabellingRunner.class); + + private final OrtEnvironment env; + private final OrtSession session; + private final boolean hasFeatures; + + /** + * Load an ONNX model. + * + * @param modelPath Path to the .onnx file + */ + public OnnxSequenceLabellingRunner(Path modelPath) throws OrtException { + this.env = OrtEnvironment.getEnvironment(); + + OrtSession.SessionOptions options = new OrtSession.SessionOptions(); + options.setOptimizationLevel(OrtSession.SessionOptions.OptLevel.ALL_OPT); + + // Configure threading for optimal CPU inference performance + // Since GROBID manages concurrency at the worker level (e.g., 10 concurrent + // workers), + // use single-threaded inference per session to avoid CPU oversubscription + options.setIntraOpNumThreads(1); + + // interOpNumThreads: threads for parallel execution of multiple operators + // Set to 1 since GROBID manages concurrency at a higher level + options.setInterOpNumThreads(1); + + // Use sequential execution mode (vs parallel) since GROBID handles parallelism + options.setExecutionMode(OrtSession.SessionOptions.ExecutionMode.SEQUENTIAL); + + this.session = env.createSession(modelPath.toString(), options); + + // Check if model has features input + this.hasFeatures = session.getInputNames().contains("features_input"); + + LOGGER.info("Loaded ONNX model from {} (single-threaded, sequential mode)", modelPath); + LOGGER.info("Input names: {}", session.getInputNames()); + LOGGER.info("Output names: {}", session.getOutputNames()); + } + + /** + * Run inference to get emission scores. + * + * @param wordEmbeddings Word embeddings [batch, seq_len, embed_size] + * @param charIndices Character indices [batch, seq_len, max_char] + * @return Emission scores [batch, seq_len, num_tags] + */ + public float[][][] runInference(float[][][] wordEmbeddings, long[][][] charIndices) throws OrtException { + return runInference(wordEmbeddings, charIndices, null); + } + + /** + * Run inference with optional features. + * + * @param wordEmbeddings Word embeddings [batch, seq_len, embed_size] + * @param charIndices Character indices [batch, seq_len, max_char] + * @param featureIndices Optional feature indices [batch, seq_len, num_features] + * @return Emission scores [batch, seq_len, num_tags] + */ + public float[][][] runInference( + float[][][] wordEmbeddings, + long[][][] charIndices, + long[][][] featureIndices) throws OrtException { + + int batchSize = wordEmbeddings.length; + int seqLen = wordEmbeddings[0].length; + int embedSize = wordEmbeddings[0][0].length; + int maxChar = charIndices[0][0].length; + + // Create input tensors + Map inputs = new HashMap<>(); + + // Word embeddings tensor + float[] wordFlat = flatten3D(wordEmbeddings); + OnnxTensor wordTensor = OnnxTensor.createTensor(env, + FloatBuffer.wrap(wordFlat), + new long[] { batchSize, seqLen, embedSize }); + inputs.put("word_input", wordTensor); + + // Char indices tensor + long[] charFlat = flatten3DLong(charIndices); + OnnxTensor charTensor = OnnxTensor.createTensor(env, + LongBuffer.wrap(charFlat), + new long[] { batchSize, seqLen, maxChar }); + inputs.put("char_input", charTensor); + + // Features tensor (if model supports and provided) + if (hasFeatures && featureIndices != null) { + int numFeatures = featureIndices[0][0].length; + long[] featFlat = flatten3DLong(featureIndices); + OnnxTensor featTensor = OnnxTensor.createTensor(env, + LongBuffer.wrap(featFlat), + new long[] { batchSize, seqLen, numFeatures }); + inputs.put("features_input", featTensor); + } + + // Run inference + try (OrtSession.Result result = session.run(inputs)) { + // Get output tensor + OnnxTensor outputTensor = (OnnxTensor) result.get("emissions").get(); + + // Get shape [batch, seq_len, num_tags] + long[] shape = outputTensor.getInfo().getShape(); + int numTags = (int) shape[2]; + + // Copy output to array + float[] outputFlat = outputTensor.getFloatBuffer().array(); + + // Reshape to 3D + float[][][] emissions = new float[batchSize][seqLen][numTags]; + int idx = 0; + for (int b = 0; b < batchSize; b++) { + for (int s = 0; s < seqLen; s++) { + for (int t = 0; t < numTags; t++) { + emissions[b][s][t] = outputFlat[idx++]; + } + } + } + + return emissions; + } finally { + // Clean up input tensors + for (OnnxTensor tensor : inputs.values()) { + tensor.close(); + } + } + } + + /** + * Try to infer a fixed maximum sequence length from the ONNX model input tensor + * shape. + *

+ * This only works when the exported model declares a static seq_len dimension + * (e.g. [batch, 3000, embed]). If the model uses a dynamic dimension (often + * reported as -1 / symbolic), this method returns -1. + */ + public int inferMaxSequenceLength() { + try { + NodeInfo inputInfo = session.getInputInfo().get("word_input"); + if (inputInfo == null) { + return -1; + } + TensorInfo tensorInfo = (TensorInfo) inputInfo.getInfo(); + long[] shape = tensorInfo.getShape(); + if (shape == null || shape.length < 2) { + return -1; + } + long seqLen = shape[1]; + if (seqLen <= 0) { + return -1; + } + if (seqLen > Integer.MAX_VALUE) { + return -1; + } + return (int) seqLen; + } catch (Exception e) { + // Be conservative: inference should never fail because of this helper. + return -1; + } + } + + private float[] flatten3D(float[][][] arr) { + int d1 = arr.length; + int d2 = arr[0].length; + int d3 = arr[0][0].length; + float[] flat = new float[d1 * d2 * d3]; + int idx = 0; + for (int i = 0; i < d1; i++) { + for (int j = 0; j < d2; j++) { + for (int k = 0; k < d3; k++) { + flat[idx++] = arr[i][j][k]; + } + } + } + return flat; + } + + private long[] flatten3DLong(long[][][] arr) { + int d1 = arr.length; + int d2 = arr[0].length; + int d3 = arr[0][0].length; + long[] flat = new long[d1 * d2 * d3]; + int idx = 0; + for (int i = 0; i < d1; i++) { + for (int j = 0; j < d2; j++) { + for (int k = 0; k < d3; k++) { + flat[idx++] = arr[i][j][k]; + } + } + } + return flat; + } + + public boolean hasFeatures() { + return hasFeatures; + } + + @Override + public void close() { + try { + if (session != null) { + session.close(); + } + } catch (Exception e) { + LOGGER.warn("Error closing ONNX session", e); + } + } +} diff --git a/grobid-core/src/main/java/org/grobid/core/engines/tagging/delft/Preprocessor.java b/grobid-core/src/main/java/org/grobid/core/engines/tagging/delft/Preprocessor.java new file mode 100644 index 0000000000..afd27bb76d --- /dev/null +++ b/grobid-core/src/main/java/org/grobid/core/engines/tagging/delft/Preprocessor.java @@ -0,0 +1,298 @@ +package org.grobid.core.engines.tagging.delft; + +import com.google.gson.Gson; +import com.google.gson.JsonArray; +import com.google.gson.JsonElement; +import com.google.gson.JsonObject; +import org.grobid.core.analyzers.GrobidAnalyzer; +import org.grobid.core.layout.LayoutToken; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.FileReader; +import java.io.IOException; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Preprocessor for converting text to model inputs. + * + * Uses GrobidAnalyzer for tokenization and converts to character indices. + * Supports feature preprocessing for BidLSTM_CRF_FEATURES models. + */ +public class Preprocessor { + + private static final Logger LOGGER = LoggerFactory.getLogger(Preprocessor.class); + + private final Map charVocab; + private final Map tagIndex; + private final int maxCharLength; + private final int padIndex; + private final int unkIndex; + + // Feature preprocessing support + private final List featuresIndices; + private final Map> featuresMapToIndex; + private final boolean hasFeatures; + + // Character encoding support - when false, return all-zero char indices + // This matches Python's DataLoader behavior when return_chars=False + private final boolean returnChars; + + /** + * Create preprocessor with vocabularies (no features, with char encoding). + */ + public Preprocessor(Map charVocab, Map tagIndex, int maxCharLength) { + this(charVocab, tagIndex, maxCharLength, null, null, true); + } + + /** + * Create preprocessor with vocabularies and features support. + */ + public Preprocessor(Map charVocab, Map tagIndex, int maxCharLength, + List featuresIndices, Map> featuresMapToIndex) { + this(charVocab, tagIndex, maxCharLength, featuresIndices, featuresMapToIndex, true); + } + + /** + * Create preprocessor with all options. + * + * @param returnChars If false, tokensToCharIndices returns all zeros (matching + * Python's return_chars=False) + */ + public Preprocessor(Map charVocab, Map tagIndex, int maxCharLength, + List featuresIndices, Map> featuresMapToIndex, boolean returnChars) { + this.charVocab = charVocab; + this.tagIndex = tagIndex; + this.maxCharLength = maxCharLength; + this.padIndex = charVocab.getOrDefault("", 0); + this.unkIndex = charVocab.getOrDefault("", 1); + this.featuresIndices = featuresIndices; + this.featuresMapToIndex = featuresMapToIndex; + this.hasFeatures = featuresIndices != null && !featuresIndices.isEmpty(); + this.returnChars = returnChars; + } + + /** + * Load preprocessor from vocab.json exported by Python. + */ + public static Preprocessor fromJson(Path vocabPath) throws IOException { + Gson gson = new Gson(); + try (FileReader reader = new FileReader(vocabPath.toFile())) { + JsonObject json = gson.fromJson(reader, JsonObject.class); + + // Parse char vocab + Map charVocabDouble = gson.fromJson(json.get("charVocab"), HashMap.class); + Map charVocab = new HashMap<>(); + for (Map.Entry entry : charVocabDouble.entrySet()) { + charVocab.put(entry.getKey(), entry.getValue().intValue()); + } + + // Parse tag index (index -> tag name) + Map tagIndexStr = gson.fromJson(json.get("tagIndex"), HashMap.class); + Map tagIndex = new HashMap<>(); + for (Map.Entry entry : tagIndexStr.entrySet()) { + tagIndex.put(Integer.parseInt(entry.getKey()), entry.getValue()); + } + + int maxCharLength = json.get("maxCharLength").getAsInt(); + + // Parse feature mappings if present + List featuresIndices = null; + Map> featuresMapToIndex = null; + + if (json.has("featuresIndices") && !json.get("featuresIndices").isJsonNull()) { + JsonArray indicesArray = json.getAsJsonArray("featuresIndices"); + featuresIndices = new ArrayList<>(); + for (JsonElement el : indicesArray) { + featuresIndices.add(el.getAsInt()); + } + + LOGGER.info("Loaded {} feature indices", featuresIndices.size()); + } + + if (json.has("featuresMapToIndex") && !json.get("featuresMapToIndex").isJsonNull()) { + JsonObject fmti = json.getAsJsonObject("featuresMapToIndex"); + featuresMapToIndex = new HashMap<>(); + + for (String featureIdxStr : fmti.keySet()) { + Integer featureIdx = Integer.parseInt(featureIdxStr); + JsonObject valueMap = fmti.getAsJsonObject(featureIdxStr); + Map innerMap = new HashMap<>(); + + for (String valueName : valueMap.keySet()) { + innerMap.put(valueName, valueMap.get(valueName).getAsInt()); + } + + featuresMapToIndex.put(featureIdx, innerMap); + } + + LOGGER.info("Loaded feature vocabulary with {} feature columns", featuresMapToIndex.size()); + } + + // Parse returnChars flag (defaults to true for backward compatibility) + boolean returnChars = true; + if (json.has("returnChars") && !json.get("returnChars").isJsonNull()) { + returnChars = json.get("returnChars").getAsBoolean(); + } + LOGGER.info("Loaded returnChars={}", returnChars); + + return new Preprocessor(charVocab, tagIndex, maxCharLength, featuresIndices, featuresMapToIndex, + returnChars); + } + } + + /** + * Tokenize text using GrobidAnalyzer. + * Filters out whitespace-only tokens to match Python DeLFT behavior. + * + * @param text Input text + * @return List of tokens (excluding whitespace-only tokens) + */ + public List tokenize(String text) { + List allTokens = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(text); + List filtered = new ArrayList<>(); + for (LayoutToken token : allTokens) { + String txt = token.getText(); + // Filter out whitespace-only tokens + if (txt != null && !txt.trim().isEmpty()) { + filtered.add(token); + } + } + return filtered; + } + + /** + * Convert tokens to character indices. + * + * If returnChars is false, returns all-zero array to match Python's + * DataLoader behavior when return_chars=False. + * + * @param tokens List of tokens + * @param seqLength Padded sequence length + * @return Character indices [seq_len][max_char_length] + */ + public long[][] tokensToCharIndices(List tokens, int seqLength) { + long[][] charIndices = new long[seqLength][maxCharLength]; + + // If returnChars is false, return all-zero array (matches Python's DataLoader) + if (!returnChars) { + return charIndices; + } + + for (int i = 0; i < Math.min(tokens.size(), seqLength); i++) { + String word = tokens.get(i).getText(); + for (int j = 0; j < Math.min(word.length(), maxCharLength); j++) { + String ch = String.valueOf(word.charAt(j)); + charIndices[i][j] = charVocab.getOrDefault(ch, unkIndex); + } + // Rest is padded with 0 (default) + } + + return charIndices; + } + + /** + * Convert tokens to string array. + */ + public String[] tokensToStrings(List tokens) { + String[] strings = new String[tokens.size()]; + for (int i = 0; i < tokens.size(); i++) { + strings[i] = tokens.get(i).getText(); + } + return strings; + } + + /** + * Convert tag indices to tag names. + */ + public String[] indicesToTags(int[] indices) { + String[] tags = new String[indices.length]; + for (int i = 0; i < indices.length; i++) { + tags[i] = tagIndex.getOrDefault(indices[i], "O"); + } + return tags; + } + + /** + * Create mask for valid tokens. + */ + public boolean[] createMask(int numTokens, int seqLength) { + boolean[] mask = new boolean[seqLength]; + for (int i = 0; i < Math.min(numTokens, seqLength); i++) { + mask[i] = true; + } + return mask; + } + + public int getMaxCharLength() { + return maxCharLength; + } + + public Map getTagIndex() { + return tagIndex; + } + + /** + * Check if this preprocessor supports features. + */ + public boolean hasFeatures() { + return hasFeatures; + } + + /** + * Get the number of features expected per token. + */ + public int getNumFeatures() { + return featuresIndices != null ? featuresIndices.size() : 0; + } + + /** + * Get the feature indices (column positions in training data). + */ + public List getFeaturesIndices() { + return featuresIndices; + } + + /** + * Convert feature strings to indices for ONNX model input. + * + * Features are provided as a 2D array: [numTokens][numFeatures] + * where each element is the feature value string (e.g., "ALLCAP", "LINESTART"). + * + * @param features Feature strings per token [numTokens][numFeatures] + * @param seqLength Padded sequence length + * @return Feature indices [seqLength][numFeatures] ready for ONNX model + */ + public long[][] tokensToFeatureIndices(String[][] features, int seqLength) { + if (!hasFeatures) { + return null; + } + + int numFeatures = featuresIndices.size(); + long[][] result = new long[seqLength][numFeatures]; + + int numTokens = Math.min(features.length, seqLength); + + for (int tokenIdx = 0; tokenIdx < numTokens; tokenIdx++) { + for (int featIdx = 0; featIdx < numFeatures; featIdx++) { + if (featIdx < features[tokenIdx].length) { + String featureValue = features[tokenIdx][featIdx]; + Integer originalColumnIdx = featuresIndices.get(featIdx); + + // Look up the index for this feature value using the original column vocabulary + Map valueMap = featuresMapToIndex.get(originalColumnIdx); + if (valueMap != null && featureValue != null) { + result[tokenIdx][featIdx] = valueMap.getOrDefault(featureValue, 0); + } + // Default is 0 (padding) + } + } + } + + return result; + } +} diff --git a/grobid-core/src/main/java/org/grobid/core/engines/tagging/delft/WordEmbeddings.java b/grobid-core/src/main/java/org/grobid/core/engines/tagging/delft/WordEmbeddings.java new file mode 100644 index 0000000000..a88bc2a514 --- /dev/null +++ b/grobid-core/src/main/java/org/grobid/core/engines/tagging/delft/WordEmbeddings.java @@ -0,0 +1,480 @@ +package org.grobid.core.engines.tagging.delft; + +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; +import org.lmdbjava.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Closeable; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; + +/** + * Word embeddings lookup using LMDB database. + * + * Reads embeddings from LMDB where values are raw float32 arrays + * (little-endian). + * Use convert_lmdb_embeddings.py to convert from pickled numpy format. + * + * This class is a singleton per embeddings path - multiple models using the + * same embeddings (e.g., glove-840B) share a single LMDB connection. + * Use {@link #getInstance(Path, int)} to obtain instances. + */ +public class WordEmbeddings implements Closeable { + + private static final Logger LOGGER = LoggerFactory.getLogger(WordEmbeddings.class); + + // Singleton registry keyed by absolute path + private static final ConcurrentHashMap INSTANCES = new ConcurrentHashMap<>(); + + // Default max cache size: 200K entries (~240MB for 300-dim embeddings) + private static final int DEFAULT_CACHE_SIZE = 200_000; + + private final Env env; + private final Dbi dbi; + private final int embeddingSize; + private final float[] zeroVector; + private final AtomicInteger refCount = new AtomicInteger(0); // Track how many models are using this instance + + // LRU cache for embeddings - eliminates repeated LMDB lookups (using Guava) + private final Cache cache; + + // Instrumentation for LMDB access pattern analysis + private final AtomicLong totalLookups = new AtomicLong(); + private final AtomicLong cacheHits = new AtomicLong(); + private final AtomicLong totalLookupTimeNs = new AtomicLong(); + private final AtomicLong misses = new AtomicLong(); // Words not found in DB + private final ConcurrentHashMap uniqueWords = new ConcurrentHashMap<>(); + private final ScheduledExecutorService statsScheduler; + private final String dbName; + + /** + * Get a shared WordEmbeddings instance for the given path. + * + * Multiple models using the same embeddings path will share a single LMDB + * connection, reducing resource usage and reader slot contention. + * + * @param dbPath Path to the LMDB database directory + * @param embeddingSize Dimension of the embeddings + * @return Shared WordEmbeddings instance + * @throws IOException if the database cannot be opened + */ + public static WordEmbeddings getInstance(Path dbPath, int embeddingSize) throws IOException { + String key = dbPath.toAbsolutePath().toString(); + + WordEmbeddings instance = INSTANCES.get(key); + if (instance != null) { + instance.refCount.incrementAndGet(); + LOGGER.debug("Reusing existing WordEmbeddings instance for {} (refCount={})", + key, instance.refCount.get()); + return instance; + } + + // Double-checked locking for thread-safe lazy initialization + synchronized (INSTANCES) { + instance = INSTANCES.get(key); + if (instance == null) { + instance = new WordEmbeddings(dbPath, embeddingSize); + INSTANCES.put(key, instance); + LOGGER.info("Created new WordEmbeddings singleton for {}", key); + } + instance.refCount.incrementAndGet(); + return instance; + } + } + + /** + * Private constructor - use {@link #getInstance(Path, int)} instead. + * + * @param dbPath Path to the LMDB database directory + * @param embeddingSize Dimension of the embeddings + * @throws IOException if the database cannot be opened (missing path, LMDB + * error, or native library issue) + */ + private WordEmbeddings(Path dbPath, int embeddingSize) throws IOException { + this.embeddingSize = embeddingSize; + this.zeroVector = new float[embeddingSize]; + this.dbName = dbPath.getFileName().toString(); + + // Initialize Guava Cache with LRU eviction (max 200K entries ~240MB for + // 300-dim) + CacheBuilder cacheBuilder = CacheBuilder.newBuilder() + .maximumSize(DEFAULT_CACHE_SIZE); + + // Only enable cache statistics when debug logging is enabled (avoid overhead in + // production) + if (LOGGER.isDebugEnabled()) { + cacheBuilder.recordStats(); + } + this.cache = cacheBuilder.build(); + + // Check if path exists before trying to open + if (!Files.exists(dbPath)) { + throw new IOException("Embeddings database not found: " + dbPath.toAbsolutePath() + + "\nPlease provide a valid path to an LMDB embeddings database."); + } + if (!Files.isDirectory(dbPath)) { + throw new IOException("Embeddings path is not a directory: " + dbPath.toAbsolutePath() + + "\nLMDB databases are directories containing 'data.mdb' and 'lock.mdb' files."); + } + + try { + // Open LMDB environment with increased reader slots for high concurrency + this.env = Env.create() + .setMapSize(10_000_000_000L) // 10GB max + .setMaxReaders(512) // Support high concurrency (default is 126) + .setMaxDbs(1) + .open(dbPath.toFile()); + + // Open the default database + this.dbi = env.openDbi((String) null, DbiFlags.MDB_CREATE); + } catch (LmdbException e) { + throw new IOException("Failed to open LMDB database at " + dbPath.toAbsolutePath() + + ": " + e.getMessage(), e); + } catch (UnsatisfiedLinkError e) { + throw new IOException("LMDB native library failed to load. " + + "Ensure lmdbjava dependency includes native libraries for your platform. " + + "Error: " + e.getMessage(), e); + } + + // Validate that the database contains raw float32 format (not pickled numpy) + validateEmbeddingFormat(dbPath); + + // Start the stats logging scheduler only when debug logging is enabled + if (LOGGER.isDebugEnabled()) { + this.statsScheduler = Executors.newSingleThreadScheduledExecutor(r -> { + Thread t = new Thread(r, "lmdb-stats-" + dbName); + t.setDaemon(true); + return t; + }); + statsScheduler.scheduleAtFixedRate(this::logStats, 30, 30, TimeUnit.SECONDS); + LOGGER.info("Opened LMDB database at {} (debug stats logging enabled every 30s)", dbPath); + } else { + this.statsScheduler = null; + LOGGER.info("Opened LMDB database at {}", dbPath); + } + } + + /** + * Log accumulated statistics about LMDB access patterns. + * Called every 30 seconds by the stats scheduler. + */ + private void logStats() { + long lookups = totalLookups.get(); + if (lookups == 0) { + return; // Don't log if no activity + } + + long timeNs = totalLookupTimeNs.get(); + long missCount = misses.get(); + long hits = cacheHits.get(); + int uniqueCount = uniqueWords.size(); + long cacheSize = cache.size(); + + // Calculate metrics + double cacheHitRate = lookups > 0 ? 100.0 * hits / lookups : 0.0; + double dbHitRate = lookups > 0 ? 100.0 * (lookups - missCount) / lookups : 0.0; + double repeatRatio = uniqueCount > 0 ? (double) lookups / uniqueCount : 0.0; + long lmdbLookups = lookups - hits; + double avgLmdbTimeMs = lmdbLookups > 0 ? (timeNs / (double) lmdbLookups) / 1_000_000.0 : 0.0; + + LOGGER.debug("LMDB [{}] stats: {} lookups, cache hit: {}% ({} entries), " + + "DB hit: {}%, repeat ratio: {}x, avg LMDB lookup: {}ms", + dbName, lookups, + String.format("%.1f", cacheHitRate), cacheSize, + String.format("%.1f", dbHitRate), + String.format("%.1f", repeatRatio), + String.format("%.3f", avgLmdbTimeMs)); + } + + /** + * Look up embedding for a word. + * + * @param word The word to look up + * @return Embedding vector, or zero vector if not found + * @throws RuntimeException if LMDB database access fails + */ + public float[] getEmbedding(String word) { + // Normalize digits to "0" like Python's _normalize_num + String normalizedWord = normalizeNum(word); + + byte[] keyBytes = normalizedWord.getBytes(StandardCharsets.UTF_8); + ByteBuffer keyBuffer = ByteBuffer.allocateDirect(keyBytes.length); + keyBuffer.put(keyBytes).flip(); + + // Retry logic for LMDB BadReaderLockException under high concurrency + int maxRetries = 3; + int retryDelayMs = 10; + + for (int attempt = 0; attempt < maxRetries; attempt++) { + try (Txn txn = env.txnRead()) { + ByteBuffer valueBuffer = dbi.get(txn, keyBuffer); + + if (valueBuffer == null) { + // Word not found, return zero vector + return zeroVector.clone(); + } + + // Parse float array from raw bytes (little-endian float32) + valueBuffer.order(ByteOrder.LITTLE_ENDIAN); + float[] embedding = new float[embeddingSize]; + for (int i = 0; i < embeddingSize; i++) { + embedding[i] = valueBuffer.getFloat(); + } + return embedding; + } catch (Txn.BadReaderLockException e) { + if (attempt < maxRetries - 1) { + LOGGER.debug("LMDB reader lock contention (attempt {}), retrying", attempt + 1); + try { + Thread.sleep(retryDelayMs); + retryDelayMs *= 2; + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + throw new RuntimeException("Interrupted during LMDB retry", ie); + } + } else { + throw new RuntimeException( + "LMDB error after " + maxRetries + " retries for word '" + word + "': " + e.getMessage(), + e); + } + } catch (LmdbException e) { + throw new RuntimeException( + "LMDB database error during embedding lookup for word '" + word + "': " + e.getMessage(), e); + } + } + + throw new RuntimeException("LMDB lookup failed after retries for word '" + word + "'"); + } + + /** + * Normalize digits in a word to "0" (matches Python's _normalize_num). + * This is needed because the model was trained with this normalization. + * + * @param word Input word + * @return Word with all digits replaced by "0" + */ + private String normalizeNum(String word) { + StringBuilder sb = new StringBuilder(); + for (char c : word.toCharArray()) { + if (Character.isDigit(c)) { + sb.append('0'); + } else { + sb.append(c); + } + } + return sb.toString(); + } + + /** + * Look up embeddings for a sequence of words. + * + * Uses a single LMDB read transaction for all lookups to avoid + * exhausting reader slots under high concurrency. + * + * @param words Array of words + * @return 2D array [seq_len][embedding_size] + * @throws RuntimeException if LMDB database access fails + */ + public float[][] getEmbeddings(String[] words) { + float[][] result = new float[words.length][embeddingSize]; + + // Retry logic for LMDB BadReaderLockException under high concurrency + int maxRetries = 3; + int retryDelayMs = 10; + + for (int attempt = 0; attempt < maxRetries; attempt++) { + try (Txn txn = env.txnRead()) { + for (int i = 0; i < words.length; i++) { + result[i] = getEmbeddingWithTxn(words[i], txn); + } + return result; // Success, return immediately + } catch (Txn.BadReaderLockException e) { + // Reader slot contention under high concurrency - retry after brief delay + if (attempt < maxRetries - 1) { + LOGGER.debug("LMDB reader lock contention (attempt {}), retrying after {}ms", + attempt + 1, retryDelayMs); + try { + Thread.sleep(retryDelayMs); + retryDelayMs *= 2; // Exponential backoff + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + throw new RuntimeException("Interrupted during LMDB retry", ie); + } + } else { + throw new RuntimeException( + "LMDB database error after " + maxRetries + " retries: " + e.getMessage(), e); + } + } catch (LmdbException e) { + throw new RuntimeException( + "LMDB database error during batch embedding lookup: " + e.getMessage(), e); + } + } + + // Should not reach here, but satisfy compiler + throw new RuntimeException("LMDB lookup failed after retries"); + } + + /** + * Look up embedding for a word using an existing transaction. + * + * @param word The word to look up + * @param txn Active read transaction + * @return Embedding vector, or zero vector if not found + */ + private float[] getEmbeddingWithTxn(String word, Txn txn) { + // Normalize digits to "0" like Python's _normalize_num + String normalizedWord = normalizeNum(word); + + // Track lookups and unique words + totalLookups.incrementAndGet(); + uniqueWords.putIfAbsent(normalizedWord, Boolean.TRUE); + + // Check cache first - avoids LMDB disk I/O for repeated words + float[] cached = cache.getIfPresent(normalizedWord); + if (cached != null) { + cacheHits.incrementAndGet(); + return cached; // Return cached copy (no clone needed - embeddings are read-only) + } + + // Cache miss - look up in LMDB + long startNs = System.nanoTime(); + + byte[] keyBytes = normalizedWord.getBytes(StandardCharsets.UTF_8); + ByteBuffer keyBuffer = ByteBuffer.allocateDirect(keyBytes.length); + keyBuffer.put(keyBytes).flip(); + + ByteBuffer valueBuffer = dbi.get(txn, keyBuffer); + + totalLookupTimeNs.addAndGet(System.nanoTime() - startNs); + + if (valueBuffer == null) { + // Word not found, cache and return zero vector + misses.incrementAndGet(); + float[] zero = zeroVector.clone(); + cache.put(normalizedWord, zero); + return zero; + } + + // Parse float array from raw bytes (little-endian float32) + valueBuffer.order(ByteOrder.LITTLE_ENDIAN); + float[] embedding = new float[embeddingSize]; + for (int i = 0; i < embeddingSize; i++) { + embedding[i] = valueBuffer.getFloat(); + } + + // Store in cache for future lookups + cache.put(normalizedWord, embedding); + return embedding; + } + + /** + * Check if a word exists in the database. + * + * @throws RuntimeException if LMDB database access fails + */ + public boolean contains(String word) { + byte[] keyBytes = word.getBytes(StandardCharsets.UTF_8); + ByteBuffer keyBuffer = ByteBuffer.allocateDirect(keyBytes.length); + keyBuffer.put(keyBytes).flip(); + + try (Txn txn = env.txnRead()) { + return dbi.get(txn, keyBuffer) != null; + } catch (LmdbException e) { + throw new RuntimeException("LMDB database error checking word '" + word + "': " + e.getMessage(), e); + } + } + + public int getEmbeddingSize() { + return embeddingSize; + } + + /** + * Validate that the embeddings database contains raw float32 format. + * + * If the database contains pickled numpy arrays (the old DeLFT format), + * the bytes will be interpreted as garbage floats with extreme values. + * This validation fails fast at startup with a clear error message. + * + * @param dbPath Path to the database (for error messages) + * @throws IOException if validation fails + */ + private void validateEmbeddingFormat(Path dbPath) throws IOException { + // Common test words that should exist in any GloVe/word2vec vocabulary + String[] testWords = { "the", "and", "of", "to", "in" }; + final float MAX_VALID_VALUE = 10.0f; // GloVe values are typically < 5 + + for (String testWord : testWords) { + if (contains(testWord)) { + float[] embedding = getEmbedding(testWord); + + for (int i = 0; i < embedding.length; i++) { + float value = embedding[i]; + + if (Float.isNaN(value) || Float.isInfinite(value) || Math.abs(value) > MAX_VALID_VALUE) { + close(); // Clean up before throwing + throw new IOException( + "Embeddings database at " + dbPath.toAbsolutePath() + " appears to contain " + + "pickled numpy format instead of raw float32.\n" + + "Found invalid embedding value for word '" + testWord + "': " + + (Float.isNaN(value) ? "NaN" : Float.isInfinite(value) ? "Infinity" : value) + + " at index " + i + ".\n" + + "Please regenerate embeddings using:\n" + + " python3 grobid-home/scripts/preload_embeddings.py --embedding glove-840B"); + } + } + + LOGGER.debug("Embeddings format validation passed for word '{}'", testWord); + return; // Validation passed for one word, that's enough + } + } + + LOGGER.warn("Could not validate embeddings format - none of the test words found in database"); + } + + @Override + public void close() { + // Reference counting: only close if this is the last reference + int remaining = refCount.decrementAndGet(); + if (remaining > 0) { + LOGGER.debug("WordEmbeddings [{}] close called, but {} references remain", dbName, remaining); + return; + } + + // Remove from singleton registry + INSTANCES.values().remove(this); + LOGGER.info("Closing WordEmbeddings singleton for {}", dbName); + + // Shutdown stats scheduler first + if (statsScheduler != null) { + statsScheduler.shutdown(); + try { + if (!statsScheduler.awaitTermination(5, TimeUnit.SECONDS)) { + statsScheduler.shutdownNow(); + } + } catch (InterruptedException e) { + statsScheduler.shutdownNow(); + Thread.currentThread().interrupt(); + } + // Log final stats before closing + logStats(); + } + + if (dbi != null) { + dbi.close(); + } + if (env != null) { + env.close(); + } + } +} diff --git a/grobid-core/src/main/java/org/grobid/core/features/FeaturesVectorHeader.java b/grobid-core/src/main/java/org/grobid/core/features/FeaturesVectorHeader.java index 2ef1859577..361cb02edb 100755 --- a/grobid-core/src/main/java/org/grobid/core/features/FeaturesVectorHeader.java +++ b/grobid-core/src/main/java/org/grobid/core/features/FeaturesVectorHeader.java @@ -52,6 +52,94 @@ public class FeaturesVectorHeader { public boolean largerThanAverageFont = false; //public boolean superscript = false; + public static FeaturesVectorHeader fromLayoutToken(LayoutToken token) { + FeaturesVectorHeader features = new FeaturesVectorHeader(); + FeatureFactory featureFactory = FeatureFactory.getInstance(); + + String text = token.getText(); + features.token = token; + features.string = text; + + Matcher m0 = featureFactory.isPunct.matcher(text); + + if (m0.find()) { + features.punctType = "PUNCT"; + } + if (text.equals("(") || text.equals("[")) { + features.punctType = "OPENBRACKET"; + + } else if (text.equals(")") || text.equals("]")) { + features.punctType = "ENDBRACKET"; + + } else if (text.equals(".")) { + features.punctType = "DOT"; + + } else if (text.equals(",")) { + features.punctType = "COMMA"; + + } else if (text.equals("-")) { + features.punctType = "HYPHEN"; + + } else if (text.equals("\"") || text.equals("\'") || text.equals("`")) { + features.punctType = "QUOTE"; + } + + if (text.length() == 1) { + features.singleChar = true; + } + + if (Character.isUpperCase(text.charAt(0))) { + features.capitalisation = "INITCAP"; + } + + if (featureFactory.test_all_capital(text)) { + features.capitalisation = "ALLCAP"; + } + + if (featureFactory.test_digit(text)) { + features.digit = "CONTAINSDIGITS"; + } + + Matcher m = featureFactory.isDigit.matcher(text); + if (m.find()) { + features.digit = "ALLDIGIT"; + } + + if (featureFactory.test_common(text)) { + features.commonName = true; + } + + if (featureFactory.test_names(text)) { + features.properName = true; + } + + if (featureFactory.test_month(text)) { + features.month = true; + } + + Matcher m2 = featureFactory.year.matcher(text); + if (m2.find()) { + features.year = true; + } + + if (token.isBold()) + features.bold = true; + + if (token.isItalic()) + features.italic = true; + + if (features.capitalisation == null) + features.capitalisation = "NOCAPS"; + + if (features.digit == null) + features.digit = "NODIGIT"; + + if (features.punctType == null) + features.punctType = "NOPUNCT"; + + return features; + } + public String printVector() { if (string == null) return null; if (string.length() == 0) return null; diff --git a/grobid-core/src/main/java/org/grobid/core/jni/DeLFTClassifierModel.java b/grobid-core/src/main/java/org/grobid/core/jni/DeLFTClassifierModel.java index 21f2255b12..c530a8ffe3 100644 --- a/grobid-core/src/main/java/org/grobid/core/jni/DeLFTClassifierModel.java +++ b/grobid-core/src/main/java/org/grobid/core/jni/DeLFTClassifierModel.java @@ -18,10 +18,11 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; -public class DeLFTClassifierModel { +public class DeLFTClassifierModel implements org.grobid.core.engines.tagging.GenericClassifier { public static final Logger LOGGER = LoggerFactory.getLogger(DeLFTClassifierModel.class); - // Exploit JNI CPython interpreter to execute load and execute a DeLFT deep learning model + // Exploit JNI CPython interpreter to execute load and execute a DeLFT deep + // learning model private String modelName; private String architecture; @@ -29,8 +30,10 @@ public DeLFTClassifierModel(String model, String architecture) { this.modelName = model; this.architecture = architecture; try { - LOGGER.info("Loading DeLFT classification model for " + this.modelName + " in " + GrobidProperties.getInstance().getModelPath()); - JEPThreadPoolClassifier.getInstance().run(new InitModel(this.modelName, GrobidProperties.getInstance().getModelPath(), this.architecture)); + LOGGER.info("Loading DeLFT classification model for " + this.modelName + " in " + + GrobidProperties.getInstance().getModelPath()); + JEPThreadPoolClassifier.getInstance().run( + new InitModel(this.modelName, GrobidProperties.getInstance().getModelPath(), this.architecture)); } catch (InterruptedException | RuntimeException e) { LOGGER.error("DeLFT model " + this.modelName + " initialization failed", e); } @@ -60,12 +63,12 @@ public void run() { GrobidProperties.getInstance(); if (GrobidProperties.getDelftRuntimeMaxSequenceLength(this.modelName) != -1) { jep.eval(this.modelName + ".config.max_sequence_length=" + - GrobidProperties.getDelftRuntimeMaxSequenceLength(this.modelName)); + GrobidProperties.getDelftRuntimeMaxSequenceLength(this.modelName)); } if (GrobidProperties.getDelftRuntimeBatchSize(this.modelName) != -1) { jep.eval(this.modelName + ".config.batch_size=" + - GrobidProperties.getDelftRuntimeBatchSize(this.modelName)); + GrobidProperties.getDelftRuntimeBatchSize(this.modelName)); } } catch (JepException e) { @@ -80,32 +83,30 @@ private class ClassificationTask implements Callable { private String modelName; public ClassificationTask(String modelName, List data) { - //System.out.println("label thread: " + Thread.currentThread().getId()); + // System.out.println("label thread: " + Thread.currentThread().getId()); this.modelName = modelName; this.data = data; } private void setJepStringValueWithFileFallback( - Jep jep, String name, List values - ) throws JepException, IOException { + Jep jep, String name, List values) throws JepException, IOException { try { jep.set(name, values); - // convert PyJList to a normal python list (necessary for Hugging Face transformer tokenizer input) + // convert PyJList to a normal python list (necessary for Hugging Face + // transformer tokenizer input) jep.eval(name + " = list(" + name + ")"); } catch (JepException e) { // we have normally the Java List as a PyJList in python, which should - // be equivalent to a normal python list + // be equivalent to a normal python list File tempFile = IOUtilities.newTempFile(name, ".data"); LOGGER.debug( - "Falling back to file {} due to exception: {}", - tempFile, e.toString() - ); + "Falling back to file {} due to exception: {}", + tempFile, e.toString()); IOUtilities.writeListInFile(tempFile.getAbsolutePath(), values, "|"); jep.eval("from pathlib import Path"); jep.eval( - name + " = Path('" + tempFile.getAbsolutePath() + - "').read_text(encoding='utf-8').split(\"|\")" - ); + name + " = Path('" + tempFile.getAbsolutePath() + + "').read_text(encoding='utf-8').split(\"|\")"); tempFile.delete(); } } @@ -119,8 +120,10 @@ public String call() { this.setJepStringValueWithFileFallback(jep, "input", this.data); String model_variable = this.modelName.replace("-", "_"); jep.eval("jsondict = " + model_variable + ".predict(input, 'json', use_main_thread_only=True)"); - //jep.eval("print(json.dumps(jsondict, sort_keys=False, indent=4, ensure_ascii=False))"); - Object objectResult = jep.getValue("json.dumps(jsondict, sort_keys=True, indent=4, ensure_ascii=False)"); + // jep.eval("print(json.dumps(jsondict, sort_keys=False, indent=4, + // ensure_ascii=False))"); + Object objectResult = jep + .getValue("json.dumps(jsondict, sort_keys=True, indent=4, ensure_ascii=False)"); results = (String) objectResult; @@ -139,7 +142,7 @@ public String call() { LOGGER.error("DeLFT model classification via JEP failed", e); } } - //System.out.println(labelledData.toString()); + // System.out.println(labelledData.toString()); return results; } } @@ -161,8 +164,10 @@ public String classify(List data) { } /** - * Training via JNI CPython interpreter (JEP). It appears that after some epochs, the JEP thread - * usually hangs... Possibly issues with IO threads at the level of JEP (output not consumed because + * Training via JNI CPython interpreter (JEP). It appears that after some + * epochs, the JEP thread + * usually hangs... Possibly issues with IO threads at the level of JEP (output + * not consumed because * of \r and no end of line?). */ public static void trainJNI(String modelName, File trainingData, File outputModel) { @@ -170,7 +175,7 @@ public static void trainJNI(String modelName, File trainingData, File outputMode LOGGER.info("Train DeLFT classification model " + modelName + "..."); GrobidProperties.getInstance(); JEPThreadPoolClassifier.getInstance().run( - new TrainTask(modelName, trainingData, GrobidProperties.getModelPath())); + new TrainTask(modelName, trainingData, GrobidProperties.getModelPath())); } catch (InterruptedException e) { LOGGER.error("Train DeLFT classification model " + modelName + " task failed", e); } @@ -184,8 +189,9 @@ private static class TrainTask implements Runnable { private boolean incremental; public TrainTask(String modelName, File trainPath, File modelPath) { - //public TrainTask(String modelName, File trainPath, File modelPath, String architecture, boolean incremental) { - //System.out.println("train thread: " + Thread.currentThread().getId()); + // public TrainTask(String modelName, File trainPath, File modelPath, String + // architecture, boolean incremental) { + // System.out.println("train thread: " + Thread.currentThread().getId()); this.modelName = modelName; this.trainPath = trainPath; this.modelPath = modelPath; @@ -199,7 +205,8 @@ public void run() { try { // load data // to be reviewed for classification - jep.eval("x_all, y_all, f_all = load_data_and_labels_crf_file('" + this.trainPath.getAbsolutePath() + "')"); + jep.eval("x_all, y_all, f_all = load_data_and_labels_crf_file('" + this.trainPath.getAbsolutePath() + + "')"); jep.eval("x_train, x_valid, y_train, y_valid = train_test_split(x_all, y_all, test_size=0.1)"); jep.eval("print(len(x_train), 'train sequences')"); jep.eval("print(len(x_valid), 'validation sequences')"); @@ -213,38 +220,41 @@ public void run() { String localArgs = ""; if (GrobidProperties.getDelftTrainingMaxSequenceLength(this.modelName) != -1) localArgs += ", maxlen=" + - GrobidProperties.getDelftTrainingMaxSequenceLength(this.modelName); + GrobidProperties.getDelftTrainingMaxSequenceLength(this.modelName); if (GrobidProperties.getDelftTrainingBatchSize(this.modelName) != -1) localArgs += ", batch_size=" + - GrobidProperties.getDelftTrainingBatchSize(this.modelName); + GrobidProperties.getDelftTrainingBatchSize(this.modelName); if (GrobidProperties.getDelftTranformer(modelName) != null) { localArgs += ", transformer=" + - GrobidProperties.getDelftTranformer(modelName); + GrobidProperties.getDelftTranformer(modelName); } // init model to be trained if (this.architecture == null) { jep.eval("model = Classifier('" + this.modelName + - "', max_epoch=100, recurrent_dropout=0.50, embeddings_name='glove-840B', use_ELMo=" + useELMo + localArgs + ")"); + "', max_epoch=100, recurrent_dropout=0.50, embeddings_name='glove-840B', use_ELMo=" + + useELMo + localArgs + ")"); } else { jep.eval("model = Classifier('" + this.modelName + - "', max_epoch=100, recurrent_dropout=0.50, embeddings_name='glove-840B', use_ELMo=" + useELMo + localArgs + - ", architecture='" + architecture + "')"); + "', max_epoch=100, recurrent_dropout=0.50, embeddings_name='glove-840B', use_ELMo=" + + useELMo + localArgs + + ", architecture='" + architecture + "')"); } // actual training if (incremental) { // if incremental training, we need to load the existing model if (this.modelPath != null && - this.modelPath.exists() && - !this.modelPath.isDirectory()) { + this.modelPath.exists() && + !this.modelPath.isDirectory()) { jep.eval("model.load('" + this.modelPath.getAbsolutePath() + "')"); jep.eval("model.train(x_train, y_train, x_valid, y_valid, incremental=True)"); } else { - throw new GrobidException("the path to the model to be used for starting incremental training is invalid: " + - this.modelPath.getAbsolutePath()); + throw new GrobidException( + "the path to the model to be used for starting incremental training is invalid: " + + this.modelPath.getAbsolutePath()); } } else jep.eval("model.train(x_train, y_train, x_valid, y_valid)"); @@ -276,18 +286,20 @@ public void run() { } /** - * Train with an external process rather than with JNI, this approach appears to be more stable for the - * training process (JNI approach hangs after a while) and does not raise any runtime/integration issues. + * Train with an external process rather than with JNI, this approach appears to + * be more stable for the + * training process (JNI approach hangs after a while) and does not raise any + * runtime/integration issues. */ public static void train(String modelName, File trainingData, File outputModel) { try { LOGGER.info("Train DeLFT model " + modelName + "..."); List command = Arrays.asList("python3", - "dataseerClassifier.py", - modelName, - "train", - "--input", trainingData.getAbsolutePath(), - "--output", GrobidProperties.getInstance().getModelPath().getAbsolutePath()); + "dataseerClassifier.py", + modelName, + "train", + "--input", trainingData.getAbsolutePath(), + "--output", GrobidProperties.getInstance().getModelPath().getAbsolutePath()); if (GrobidProperties.getInstance().useELMo(modelName)) { command.add("--use-ELMo"); } @@ -296,14 +308,13 @@ public static void train(String modelName, File trainingData, File outputModel) File delftPath = new File(GrobidProperties.getInstance().getDeLFTFilePath()); pb.directory(delftPath); Process process = pb.start(); - //pb.inheritIO(); - CustomStreamGobbler customStreamGobbler = - new CustomStreamGobbler(process.getInputStream(), System.out); + // pb.inheritIO(); + CustomStreamGobbler customStreamGobbler = new CustomStreamGobbler(process.getInputStream(), System.out); Executors.newSingleThreadExecutor().submit(customStreamGobbler); SimpleStreamGobbler streamGobbler = new SimpleStreamGobbler(process.getErrorStream(), System.err::println); Executors.newSingleThreadExecutor().submit(streamGobbler); int exitCode = process.waitFor(); - //assert exitCode == 0; + // assert exitCode == 0; } catch (IOException e) { LOGGER.error("IO error when training DeLFT classification model " + modelName, e); } catch (InterruptedException e) { @@ -335,8 +346,10 @@ public void run() { String model_variable = null; try { model_variable = this.modelName.replace("-", "_"); - // We don't close the JEP instance here because it might be reused by other models - // The JEP instance will be closed when the application shuts down or when explicitly requested + // We don't close the JEP instance here because it might be reused by other + // models + // The JEP instance will be closed when the application shuts down or when + // explicitly requested } catch (JepException e) { LOGGER.error("Closing DeLFT classification model failed", e); } finally { @@ -363,12 +376,13 @@ public SimpleStreamGobbler(InputStream inputStream, Consumer consumer) { @Override public void run() { new BufferedReader(new InputStreamReader(inputStream)).lines() - .forEach(consumer); + .forEach(consumer); } } /** - * This is a custom gobbler that reproduces correctly the Keras training progress bar + * This is a custom gobbler that reproduces correctly the Keras training + * progress bar * by injecting a \r for progress line updates. */ private static class CustomStreamGobbler implements Runnable { diff --git a/grobid-core/src/main/java/org/grobid/core/main/LibraryLoader.java b/grobid-core/src/main/java/org/grobid/core/main/LibraryLoader.java index 7b8e5d933d..8a638709a1 100755 --- a/grobid-core/src/main/java/org/grobid/core/main/LibraryLoader.java +++ b/grobid-core/src/main/java/org/grobid/core/main/LibraryLoader.java @@ -40,38 +40,44 @@ public static void load() { LOGGER.debug(getLibraryFolder()); Set distinctModels = GrobidProperties.getInstance().getDistinctModels(); - for(GrobidCRFEngine distinctModel : distinctModels) { + for (GrobidCRFEngine distinctModel : distinctModels) { if (distinctModel != GrobidCRFEngine.CRFPP && - distinctModel != GrobidCRFEngine.WAPITI && - distinctModel != GrobidCRFEngine.DELFT) { + distinctModel != GrobidCRFEngine.WAPITI && + distinctModel != GrobidCRFEngine.DELFT && + distinctModel != GrobidCRFEngine.ONNX && + distinctModel != GrobidCRFEngine.DUMMY) { throw new IllegalStateException("Unsupported sequence labelling engine: " + distinctModel); } } File libraryFolder = new File(getLibraryFolder()); if (!libraryFolder.exists() || !libraryFolder.isDirectory()) { - LOGGER.error("Unable to find a native sequence labelling library: Folder " + libraryFolder + " does not exist"); + LOGGER.error("Unable to find a native sequence labelling library: Folder " + libraryFolder + + " does not exist"); throw new RuntimeException( - "Unable to find a native sequence labelling library: Folder " + libraryFolder + " does not exist"); + "Unable to find a native sequence labelling library: Folder " + libraryFolder + + " does not exist"); } if (CollectionUtils.containsAny(distinctModels, Collections.singletonList(GrobidCRFEngine.CRFPP))) { - File[] files = libraryFolder.listFiles(file -> file.getName().toLowerCase().startsWith(CRFPP_NATIVE_LIB_NAME)); + File[] files = libraryFolder + .listFiles(file -> file.getName().toLowerCase().startsWith(CRFPP_NATIVE_LIB_NAME)); if (ArrayUtils.isEmpty(files)) { LOGGER.error("Unable to find a native CRF++ library: No files starting with " - + CRFPP_NATIVE_LIB_NAME - + " are in folder " + libraryFolder); - throw new RuntimeException( - "Unable to find a native CRF++ library: No files starting with " + CRFPP_NATIVE_LIB_NAME + " are in folder " + libraryFolder); + throw new RuntimeException( + "Unable to find a native CRF++ library: No files starting with " + + CRFPP_NATIVE_LIB_NAME + + " are in folder " + libraryFolder); } if (files.length > 1) { - LOGGER.error("Unable to load a native CRF++ library: More than 1 library exists in " + libraryFolder); + LOGGER.error( + "Unable to load a native CRF++ library: More than 1 library exists in " + libraryFolder); throw new RuntimeException( - "Unable to load a native CRF++ library: More than 1 library exists in " + libraryFolder); + "Unable to load a native CRF++ library: More than 1 library exists in " + libraryFolder); } String libPath = files[0].getAbsolutePath(); @@ -82,11 +88,11 @@ public static void load() { } catch (Exception e) { LOGGER.error("Unable to load a native CRF++ library, although it was found under path " + libPath); throw new RuntimeException( - "Unable to load a native CRF++ library, although it was found under path " + libPath, e); + "Unable to load a native CRF++ library, although it was found under path " + libPath, e); } } - - if (CollectionUtils.containsAny(distinctModels, Collections.singletonList(GrobidCRFEngine.WAPITI))) { + + if (CollectionUtils.containsAny(distinctModels, Collections.singletonList(GrobidCRFEngine.WAPITI))) { File[] wapitiLibFiles = libraryFolder.listFiles(new FilenameFilter() { @Override public boolean accept(File dir, String name) { @@ -99,10 +105,13 @@ public boolean accept(File dir, String name) { } else { LOGGER.info("Loading Wapiti native library..."); if (CollectionUtils.containsAny(distinctModels, Collections.singletonList(GrobidCRFEngine.DELFT))) { - // if DeLFT will be used, we must not load libstdc++, it would create a conflict with tensorflow libstdc++ version + // if DeLFT will be used, we must not load libstdc++, it would create a conflict + // with tensorflow libstdc++ version // so we temporary rename the lib so that it is not loaded in this case - // note that we know that, in this case, the local lib can be ignored because as DeFLT and tensorflow are installed - // we are sure that a compatible libstdc++ lib is installed on the system and can be dynamically loaded + // note that we know that, in this case, the local lib can be ignored because as + // DeFLT and tensorflow are installed + // we are sure that a compatible libstdc++ lib is installed on the system and + // can be dynamically loaded String libstdcppPath = libraryFolder.getAbsolutePath() + File.separator + "libstdc++.so.6"; File libstdcppFile = new File(libstdcppPath); @@ -123,18 +132,22 @@ public boolean accept(File dir, String name) { } finally { if (CollectionUtils.containsAny(distinctModels, Arrays.asList(GrobidCRFEngine.DELFT))) { // restore libstdc++ - String libstdcppPathNew = libraryFolder.getAbsolutePath() + File.separator + "libstdc++.so.6.new"; + String libstdcppPathNew = libraryFolder.getAbsolutePath() + File.separator + + "libstdc++.so.6.new"; File libstdcppFileNew = new File(libstdcppPathNew); if (libstdcppFileNew.exists()) { - File libstdcppFile = new File(libraryFolder.getAbsolutePath() + File.separator + "libstdc++.so.6"); + File libstdcppFile = new File( + libraryFolder.getAbsolutePath() + File.separator + "libstdc++.so.6"); libstdcppFileNew.renameTo(libstdcppFile); } // restore libgcc - String libgccPathNew = libraryFolder.getAbsolutePath() + File.separator + "libgcc_s.so.1.new"; + String libgccPathNew = libraryFolder.getAbsolutePath() + File.separator + + "libgcc_s.so.1.new"; File libgccFileNew = new File(libgccPathNew); if (libgccFileNew.exists()) { - File libgccFile = new File(libraryFolder.getAbsolutePath() + File.separator + "libgcc_s.so.1"); + File libgccFile = new File( + libraryFolder.getAbsolutePath() + File.separator + "libgcc_s.so.1"); libgccFileNew.renameTo(libgccFile); } } @@ -144,8 +157,10 @@ public boolean accept(File dir, String name) { if (CollectionUtils.containsAny(distinctModels, Collections.singletonList(GrobidCRFEngine.DELFT))) { LOGGER.info("Loading JEP native library for DeLFT... " + libraryFolder.getAbsolutePath()); - // actual loading will be made at JEP initialization, so we just need to add the path in the - // java.library.path (JEP will anyway try to load from java.library.path, so explicit file + // actual loading will be made at JEP initialization, so we just need to add the + // path in the + // java.library.path (JEP will anyway try to load from java.library.path, so + // explicit file // loading here will not help) try { @@ -192,8 +207,8 @@ public static void addLibraryPath(String pathToAdd) throws Exception { public static String getLibraryFolder() { GrobidProperties.getInstance(); - return String.format("%s" + File.separator + "%s", - GrobidProperties.getNativeLibraryPath().getAbsolutePath(), - Utilities.getOsNameAndArch()); + return String.format("%s" + File.separator + "%s", + GrobidProperties.getNativeLibraryPath().getAbsolutePath(), + Utilities.getOsNameAndArch()); } } diff --git a/grobid-core/src/main/java/org/grobid/core/utilities/GrobidConfig.java b/grobid-core/src/main/java/org/grobid/core/utilities/GrobidConfig.java index 986c2645e4..b3e1216c8c 100644 --- a/grobid-core/src/main/java/org/grobid/core/utilities/GrobidConfig.java +++ b/grobid-core/src/main/java/org/grobid/core/utilities/GrobidConfig.java @@ -3,7 +3,7 @@ import java.util.List; /** - * This class is a bean for the YAML configuation of the GROBID instance. + * This class is a bean for the YAML configuation of the GROBID instance. * */ public class GrobidConfig { @@ -21,12 +21,12 @@ public static class GrobidParameters { public String languageDetectorFactory; public String sentenceDetectorFactory; - - public int concurrency = 10; + + public int concurrency = 10; public int poolMaxWait = 1; - - public DelftParameters delft; - public WapitiParameters wapiti; + + public DelftParameters delft; + public WapitiParameters wapiti; public List models; } @@ -61,7 +61,7 @@ public static class HostParameters { public String url; public int timeoutSec = 60; } - + public static class DelftParameters { /** * Generic parameters relative to the DeLFT engine @@ -107,11 +107,21 @@ public static class DelftModelParameterSet { public int batch_size = -1; } - public static class ModelParameters { - public String name; /* name of model */ - public String engine; /* value wapiti or delft */ + public static class OnnxModelParameters { + /** + * Parameters relative to a specific ONNX model + */ + public String architecture; + public int max_sequence_length = -1; + public int batch_size = -1; + } + + public static class ModelParameters { + public String name; /* name of model */ + public String engine; /* value wapiti, delft, or onnx */ public WapitiModelParameters wapiti; public DelftModelParameters delft; + public OnnxModelParameters onnx; } } \ No newline at end of file diff --git a/grobid-core/src/main/java/org/grobid/core/utilities/GrobidProperties.java b/grobid-core/src/main/java/org/grobid/core/utilities/GrobidProperties.java index a9d44a8508..62a391706b 100644 --- a/grobid-core/src/main/java/org/grobid/core/utilities/GrobidProperties.java +++ b/grobid-core/src/main/java/org/grobid/core/utilities/GrobidProperties.java @@ -30,14 +30,18 @@ import com.fasterxml.jackson.databind.DeserializationFeature; /** - * This class provide methods to set/load/access grobid config value from a yaml config file loaded + * This class provide methods to set/load/access grobid config value from a yaml + * config file loaded * in the class {@link GrobidConfig}. * - * New yaml parameters and former properties should be equivalent via this class. We keep the - * class name "GrobidProperties" for compatibility with Grobid modules and other Java applications + * New yaml parameters and former properties should be equivalent via this + * class. We keep the + * class name "GrobidProperties" for compatibility with Grobid modules and other + * Java applications * using Grobid as a library. * - * to be done: having parameters that can be overridden by a system property having a compatible name. + * to be done: having parameters that can be overridden by a system property + * having a compatible name. */ public class GrobidProperties { public static final Logger LOGGER = LoggerFactory.getLogger(GrobidProperties.class); @@ -82,7 +86,8 @@ public class GrobidProperties { private static String GROBID_VERSION = null; /** - * Returns an instance of {@link GrobidProperties} object. If no one is set, then + * Returns an instance of {@link GrobidProperties} object. If no one is set, + * then * it creates one */ public static GrobidProperties getInstance() { @@ -94,7 +99,8 @@ public static GrobidProperties getInstance() { } /** - * Returns an instance of {@link GrobidProperties} object based on a custom grobid-home directory. + * Returns an instance of {@link GrobidProperties} object based on a custom + * grobid-home directory. * If no one is set, then it creates one. */ public static GrobidProperties getInstance(GrobidHomeFinder grobidHomeFinder) { @@ -172,14 +178,15 @@ public static void setGrobidHome(final String pGROBID_HOME_PATH) { grobidHome = new File(pGROBID_HOME_PATH); // exception if prop file does not exist if (!grobidHome.exists()) { - throw new GrobidPropertyException("Could not read GROBID_HOME, the directory '" + pGROBID_HOME_PATH + "' does not exist."); + throw new GrobidPropertyException( + "Could not read GROBID_HOME, the directory '" + pGROBID_HOME_PATH + "' does not exist."); } try { grobidHome = grobidHome.getCanonicalFile(); } catch (IOException e) { throw new GrobidPropertyException("Cannot set grobid home path to the given one '" + pGROBID_HOME_PATH - + "', because it does not exist."); + + "', because it does not exist."); } } @@ -216,14 +223,16 @@ public static void setGrobidConfigPath(final String pGrobidConfigPath) { File grobidConfigPath = new File(pGrobidConfigPath); // exception if config file does not exist if (!grobidConfigPath.exists()) { - throw new GrobidPropertyException("Cannot read GROBID yaml config file, the file '" + pGrobidConfigPath + "' does not exist."); + throw new GrobidPropertyException( + "Cannot read GROBID yaml config file, the file '" + pGrobidConfigPath + "' does not exist."); } try { GROBID_CONFIG_PATH = grobidConfigPath.getCanonicalFile(); } catch (IOException e) { - throw new GrobidPropertyException("Cannot set grobid yaml config file path to the given one '" + pGrobidConfigPath - + "', because it does not exist."); + throw new GrobidPropertyException( + "Cannot set grobid yaml config file path to the given one '" + pGrobidConfigPath + + "', because it does not exist."); } } @@ -247,14 +256,18 @@ public GrobidProperties() { mapper.disable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES); grobidConfig = mapper.readValue(GROBID_CONFIG_PATH, GrobidConfig.class); } catch (IOException exp) { - throw new GrobidPropertyException("Cannot open GROBID config yaml file at location '" + GROBID_CONFIG_PATH.getAbsolutePath() - + "'", exp); + throw new GrobidPropertyException( + "Cannot open GROBID config yaml file at location '" + GROBID_CONFIG_PATH.getAbsolutePath() + + "'", + exp); } catch (Exception exp) { - throw new GrobidPropertyException("Cannot open GROBID config yaml file " + getGrobidConfigPath().getAbsolutePath(), exp); + throw new GrobidPropertyException( + "Cannot open GROBID config yaml file " + getGrobidConfigPath().getAbsolutePath(), exp); } - //Map configParametersViaEnvironment = getEnvironmentVariableOverrides(System.getenv()); - //this.setEnvironmentConfigParameter(configParametersViaEnvironment); + // Map configParametersViaEnvironment = + // getEnvironmentVariableOverrides(System.getenv()); + // this.setEnvironmentConfigParameter(configParametersViaEnvironment); initializeTmpPath(); // TBD: tmp to be created @@ -266,7 +279,7 @@ public GrobidProperties() { * Create a map between model names and associated parameters */ private static void createModelMap() { - for(ModelParameters modelParameter : grobidConfig.grobid.models) { + for (ModelParameters modelParameter : grobidConfig.grobid.models) { if (modelMap == null) modelMap = new TreeMap<>(); modelMap.put(modelParameter.name, modelParameter); @@ -295,7 +308,8 @@ private void initializeTmpPath() { } /** - * Return the distinct values of all the engines that are specified in the the model map + * Return the distinct values of all the engines that are specified in the the + * model map */ public static Set getDistinctModels() { Set distinctModels = new HashSet<>(); @@ -313,6 +327,16 @@ public static Set getDistinctModels() { return distinctModels; } + /** + * Return all model names from the configuration + */ + public static Set getModelNames() { + if (modelMap == null) { + return Collections.emptySet(); + } + return modelMap.keySet(); + } + /** * Returns the current version of GROBID * @@ -391,7 +415,8 @@ public static File getNativeLibraryPath() { } /** - * Returns the installation path of DeLFT if set, null otherwise. It is required for using + * Returns the installation path of DeLFT if set, null otherwise. It is required + * for using * a Deep Learning sequence labelling engine. * * @return path to the folder that contains the local install of DeLFT @@ -470,7 +495,8 @@ public static void setCrossrefMailto(final String mailto) { * @return string of the email parameter to be used for requesting crossref */ public static String getCrossrefMailto() { - if (grobidConfig.grobid.consolidation.crossref.mailto == null || grobidConfig.grobid.consolidation.crossref.mailto.trim().length() == 0) + if (grobidConfig.grobid.consolidation.crossref.mailto == null + || grobidConfig.grobid.consolidation.crossref.mailto.trim().length() == 0) return null; else return grobidConfig.grobid.consolidation.crossref.mailto; @@ -478,8 +504,10 @@ public static String getCrossrefMailto() { /** * Set the Crossref Metadata Plus authorization token to be used for Crossref - * requests for the subscribers of this service. This token will ensure that said - * requests get directed to a pool of machines that are reserved for "Plus" SLA users. + * requests for the subscribers of this service. This token will ensure that + * said + * requests get directed to a pool of machines that are reserved for "Plus" SLA + * users. * * @param token authorization token to be used for requesting crossref */ @@ -489,13 +517,16 @@ public static void setCrossrefToken(final String token) { /** * Get the Crossref Metadata Plus authorization token to be used for Crossref - * requests for the subscribers of this service. This token will ensure that said - * requests get directed to a pool of machines that are reserved for "Plus" SLA users. + * requests for the subscribers of this service. This token will ensure that + * said + * requests get directed to a pool of machines that are reserved for "Plus" SLA + * users. * * @return authorization token to be used for requesting crossref */ public static String getCrossrefToken() { - if (grobidConfig.grobid.consolidation.crossref.token == null || grobidConfig.grobid.consolidation.crossref.token.trim().length() == 0) + if (grobidConfig.grobid.consolidation.crossref.token == null + || grobidConfig.grobid.consolidation.crossref.token.trim().length() == 0) return null; else return grobidConfig.grobid.consolidation.crossref.token; @@ -508,8 +539,8 @@ public static String getCrossrefToken() { */ public static void setProxyPort(int port) { grobidConfig.grobid.proxy.port = port; - System.setProperty("http.proxyPort", ""+port); - System.setProperty("https.proxyPort", ""+port); + System.setProperty("http.proxyPort", "" + port); + System.setProperty("https.proxyPort", "" + port); } public static Integer getPdfaltoMemoryLimitMb() { @@ -524,16 +555,20 @@ public static Integer getPdfaltoTimeoutMs() { return grobidConfig.grobid.pdf.pdfalto.timeoutSec * 1000; } - /*public static Integer getNBThreads() { - Integer nbThreadsConfig = Integer.valueOf(grobidConfig.grobid.wapiti.nbThreads); - if (nbThreadsConfig.intValue() == 0) { - return Integer.valueOf(Runtime.getRuntime().availableProcessors()); - } - return nbThreadsConfig; - }*/ + /* + * public static Integer getNBThreads() { + * Integer nbThreadsConfig = + * Integer.valueOf(grobidConfig.grobid.wapiti.nbThreads); + * if (nbThreadsConfig.intValue() == 0) { + * return Integer.valueOf(Runtime.getRuntime().availableProcessors()); + * } + * return nbThreadsConfig; + * } + */ /** - * Returns the number of threads to be used when training with CRF Wapiti, given in the grobid config file. + * Returns the number of threads to be used when training with CRF Wapiti, given + * in the grobid config file. * * @return number of threads */ @@ -556,13 +591,16 @@ public static Integer getPdfTokensMax() { } /** - * Sets the number of threads for training a Wapiti model, given in the grobid config file. + * Sets the number of threads for training a Wapiti model, given in the grobid + * config file. * * @param nbThreads umber of threads */ - /*public static void setNBThreads(int nbThreads) { - grobidConfig.grobid.wapiti.nbThreads = nbThreads; - }*/ + /* + * public static void setNBThreads(int nbThreads) { + * grobidConfig.grobid.wapiti.nbThreads = nbThreads; + * } + */ public static void setWapitiNbThreads(int nbThreads) { grobidConfig.grobid.wapiti.nbThreads = nbThreads; } @@ -578,9 +616,11 @@ public static String getLanguageDetectorFactory() { /** * Sets if a language id shall be used, given in the grobid-property file. */ - /*public static void setUseLanguageId(final String useLanguageId) { - setPropertyValue(GrobidPropertyKeys.PROP_USE_LANG_ID, useLanguageId); - }*/ + /* + * public static void setUseLanguageId(final String useLanguageId) { + * setPropertyValue(GrobidPropertyKeys.PROP_USE_LANG_ID, useLanguageId); + * } + */ public static String getSentenceDetectorFactory() { String factoryClassName = grobidConfig.grobid.sentenceDetectorFactory; @@ -599,8 +639,8 @@ public static void loadPdfaltoPath() { pathToPdfalto = new File(grobidHome.getPath(), pathName); if (!pathToPdfalto.exists()) { throw new GrobidPropertyException( - "Path to pdfalto doesn't exists. " + - "Please set the path to pdfalto in the config file"); + "Path to pdfalto doesn't exists. " + + "Please set the path to pdfalto in the config file"); } pathToPdfalto = new File(pathToPdfalto, Utilities.getOsNameAndArch()); @@ -622,11 +662,11 @@ public static ModelParameters getGrobidModelParameters(final String modelName) { // if we have a flavor of the model, we can fall back to the configuration // of the parent model String fallBackModelName = modelName; - while(param == null) { + while (param == null) { LOGGER.debug("No configuration parameter defined for model " + modelName); int ind = fallBackModelName.lastIndexOf("-"); if (ind != -1) { - fallBackModelName = modelName.substring(0,ind); + fallBackModelName = modelName.substring(0, ind); } else { return null; } @@ -655,21 +695,40 @@ public static GrobidCRFEngine getGrobidEngine(final GrobidModel model) { } public static File getModelPath(final GrobidModel model) { - if (modelMap.get(model.getModelName()) == null) { - // model is either: - // - a flavor without config, but that should fallback to the parent model config - // if no specific config exists. If it is the case, the model path is infered - // from the flavor model name - // - a normal model not specified in the config, so returning null - - if (getGrobidModelParameters(model.getModelName()) == null) { - return null; - } + if (getGrobidModelParameters(model.getModelName()) == null) { + return null; + } + + GrobidCRFEngine engine = getGrobidEngine(model); + if (engine == null) { + return null; + } + + switch (engine) { + case ONNX: + String onnxArch = getOnnxArchitecture(model); + if (onnxArch == null) + return null; + String onnxDirName = model.getModelName() + "-" + onnxArch + ".onnx"; + return new File(getModelPath(), onnxDirName); + + case DELFT: + String delftArch = getDelftArchitecture(model); + String delftDirName = model.getModelName(); + if (delftArch != null) { + delftDirName += "-" + delftArch; + } + if (useELMo(model.getModelName()) && !model.getModelName().toLowerCase().contains("bert")) { + delftDirName += "-with_ELMo"; + } + return new File(getModelPath(), delftDirName); + + default: // WAPITI, CRFPP, DUMMY + String extension = engine.getExt(); + return new File(getGrobidHome(), FOLDER_NAME_MODELS + File.separator + + model.getFolderName() + File.separator + + FILE_NAME_MODEL + "." + extension); } - String extension = getGrobidEngine(model).getExt(); - return new File(getGrobidHome(), FOLDER_NAME_MODELS + File.separator - + model.getFolderName() + File.separator - + FILE_NAME_MODEL + "." + extension); } public static File getModelPath() { @@ -682,10 +741,10 @@ public static File getTemplatePath(final File resourcesDir, final GrobidModel mo return null; File theFile = new File(resourcesDir, "dataset/" + model.getFolderName() - + "/crfpp-templates/" + model.getTemplateName()); + + "/crfpp-templates/" + model.getTemplateName()); if (!theFile.exists()) { theFile = new File("resources/dataset/" + model.getFolderName() - + "/crfpp-templates/" + model.getTemplateName()); + + "/crfpp-templates/" + model.getTemplateName()); } return theFile; } @@ -753,6 +812,7 @@ public static void setConsolidationService(String service) { /** * Get the Crossref timeout in seconds for consolidation service requests. + * * @return timeout in seconds */ public static int getCrossrefConsolidationTimeout() { @@ -765,6 +825,7 @@ public static int getCrossrefConsolidationTimeout() { /** * Get the Glutton timeout in seconds for consolidation service requests. + * * @return timeout in seconds */ public static int getGluttonConsolidationTimeout() { @@ -779,7 +840,7 @@ public static int getGluttonConsolidationTimeout() { * Returns if the execution context is stand alone or server. * * @return the context of execution. Return false if the property value is - * null or empty. + * null or empty. */ public static boolean isContextExecutionServer() { return contextExecutionServer; @@ -883,8 +944,9 @@ public static String getDelftTranformer(final String modelName) { } /** - * Return -1 if not set in the configuration and the default DeLFT value will be used in this case. - */ + * Return -1 if not set in the configuration and the default DeLFT value will be + * used in this case. + */ public static int getDelftTrainingMaxSequenceLength(final String modelName) { ModelParameters param = getGrobidModelParameters(modelName); if (param == null) { @@ -906,8 +968,9 @@ public static int getDelftTrainingMaxSequenceLength(final String modelName) { } /** - * Return -1 if not set in the configuration and the default DeLFT value will be used in this case. - */ + * Return -1 if not set in the configuration and the default DeLFT value will be + * used in this case. + */ public static int getDelftRuntimeMaxSequenceLength(final String modelName) { ModelParameters param = getGrobidModelParameters(modelName); if (param == null) { @@ -929,8 +992,9 @@ public static int getDelftRuntimeMaxSequenceLength(final String modelName) { } /** - * Return -1 if not set in the configuration and the default DeLFT value will be used in this case. - */ + * Return -1 if not set in the configuration and the default DeLFT value will be + * used in this case. + */ public static int getDelftTrainingBatchSize(final String modelName) { ModelParameters param = getGrobidModelParameters(modelName); if (param == null) { @@ -952,8 +1016,9 @@ public static int getDelftTrainingBatchSize(final String modelName) { } /** - * Return -1 if not set in the configuration and the default DeLFT value will be used in this case. - */ + * Return -1 if not set in the configuration and the default DeLFT value will be + * used in this case. + */ public static int getDelftRuntimeBatchSize(final String modelName) { ModelParameters param = getGrobidModelParameters(modelName); if (param == null) { @@ -976,10 +1041,68 @@ public static int getDelftRuntimeBatchSize(final String modelName) { public static String getDelftArchitecture(final GrobidModel model) { return getDelftArchitecture(model.getModelName()); - } + } - /*protected static Map getEnvironmentVariableOverrides(Map environmentVariablesMap) { - EnvironmentVariableProperties envParameters = new EnvironmentVariableProperties(environmentVariablesMap, "(grobid__).+"); - return envParameters.getConfigParameters(); - }*/ + public static String getOnnxArchitecture(final String modelName) { + ModelParameters param = getGrobidModelParameters(modelName); + if (param == null) { + LOGGER.debug("No configuration parameter defined for model " + modelName); + return null; + } + GrobidConfig.OnnxModelParameters onnxParam = param.onnx; + if (onnxParam != null && onnxParam.architecture != null) { + return onnxParam.architecture; + } + // Fallback to DeLFT architecture if ONNX not specified + return getDelftArchitecture(modelName); + } + + public static String getOnnxArchitecture(final GrobidModel model) { + return getOnnxArchitecture(model.getModelName()); + } + + /** + * Return -1 if not set in the configuration. + * When -1 is returned, the value should be read from the model's config.json. + */ + public static int getOnnxRuntimeMaxSequenceLength(final String modelName) { + ModelParameters param = getGrobidModelParameters(modelName); + if (param == null) { + LOGGER.debug("No configuration parameter defined for model " + modelName); + return -1; + } + GrobidConfig.OnnxModelParameters onnxParam = param.onnx; + if (onnxParam == null) { + LOGGER.debug("No configuration parameter defined for ONNX engine for model " + modelName); + return -1; + } + return onnxParam.max_sequence_length; + } + + /** + * Return -1 if not set in the configuration. + */ + public static int getOnnxRuntimeBatchSize(final String modelName) { + ModelParameters param = getGrobidModelParameters(modelName); + if (param == null) { + LOGGER.debug("No configuration parameter defined for model " + modelName); + return -1; + } + GrobidConfig.OnnxModelParameters onnxParam = param.onnx; + if (onnxParam == null) { + LOGGER.debug("No configuration parameter defined for ONNX engine for model " + modelName); + return -1; + } + return onnxParam.batch_size; + } + + /* + * protected static Map + * getEnvironmentVariableOverrides(Map environmentVariablesMap) + * { + * EnvironmentVariableProperties envParameters = new + * EnvironmentVariableProperties(environmentVariablesMap, "(grobid__).+"); + * return envParameters.getConfigParameters(); + * } + */ } diff --git a/grobid-core/src/main/java/org/grobid/core/utilities/TestEngineUtils.java b/grobid-core/src/main/java/org/grobid/core/utilities/TestEngineUtils.java new file mode 100644 index 0000000000..a28503a8ea --- /dev/null +++ b/grobid-core/src/main/java/org/grobid/core/utilities/TestEngineUtils.java @@ -0,0 +1,64 @@ +package org.grobid.core.utilities; + +import org.grobid.core.engines.tagging.GrobidCRFEngine; +import org.grobid.core.factory.AbstractEngineFactory; + +/** + * Test utilities to make Grobid tests independent of the active grobid.yaml. + * + * A number of unit/integration tests assume Wapiti-style CRF models. + * When grobid.yaml is configured to use DeLFT or ONNX for some models + * (e.g. header, reference-segmenter, citation), these tests can fail. + */ +public class TestEngineUtils { + + private static volatile boolean wapitiForced = false; + + private TestEngineUtils() { + // utility class + } + + /** + * Initialize Grobid and force all configured model engines to Wapiti. + * + * Should be called in {@code @BeforeClass} before any Engine/Parser is created. + */ + public static void initGrobidForceWapiti() { + // Ensure config/modelMap is loaded + GrobidProperties.getInstance(); + + if (!wapitiForced) { + synchronized (TestEngineUtils.class) { + if (!wapitiForced) { + forceAllModelsToWapiti(); + wapitiForced = true; + } + } + } + + // Continue with the normal initialization path + AbstractEngineFactory.init(); + } + + private static void forceAllModelsToWapiti() { + // Iterate over all model names from the configuration (not just enum values) + // to ensure all models including citation, header, reference-segmenter etc. are forced to wapiti + for (String modelName : GrobidProperties.getModelNames()) { + GrobidConfig.ModelParameters current = GrobidProperties.getGrobidModelParameters(modelName); + if (current == null) { + continue; + } + + // Overwrite engine selection only + GrobidConfig.ModelParameters override = new GrobidConfig.ModelParameters(); + override.name = current.name; + override.engine = GrobidCRFEngine.WAPITI.name().toLowerCase(); + override.wapiti = current.wapiti; + override.delft = current.delft; + override.onnx = current.onnx; + + GrobidProperties.addModel(override); + } + } +} + diff --git a/grobid-core/src/main/resources/large_sequence.txt b/grobid-core/src/main/resources/large_sequence.txt new file mode 100644 index 0000000000..83dbdf4e10 --- /dev/null +++ b/grobid-core/src/main/resources/large_sequence.txt @@ -0,0 +1,6747 @@ +12 12 1 12 12 12 2 12 12 12 LINESTART ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKSTART 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 0 8 BLOCKIN 10 0 +3 3 3 3 3 3 3 3 3 3 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 1 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 0 8 BLOCKIN 10 0 +4 4 4 4 4 4 4 4 4 4 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 1 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 10 0 +Proposed proposed P Pr Pro Prop d ed sed osed LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 10 0 +frameworks frameworks f fr fra fram s ks rks orks LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 10 0 +/ / / / / / / / / / LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 10 0 +techniques techniques t te tec tech s es ues ques LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 10 0 +for for f fo for for r or for for LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 10 0 +Ethical ethical E Et Eth Ethi l al cal ical LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 10 0 +Fine fine F Fi Fin Fine e ne ine Fine LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 6 8 BLOCKIN 10 0 +Tuning tuning T Tu Tun Tuni g ng ing ning LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 8 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 8 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 8 8 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 8 8 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 8 8 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 8 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 8 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 8 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 8 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 8 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 8 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 8 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 8 BLOCKIN 10 0 +. . . . . . . . . . LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 8 BLOCKIN 10 0 +12 12 1 12 12 12 2 12 12 12 LINESTART ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 0 9 BLOCKIN 10 0 +4 4 4 4 4 4 4 4 4 4 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 1 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 10 0 +Integration integration I In Int Inte n on ion tion LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 10 0 +with with w wi wit with h th ith with LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 10 0 +Emerging emerging E Em Eme Emer g ng ing ging LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 10 0 +Technologies technologies T Te Tec Tech s es ies gies LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 4 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 6 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 6 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 6 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 8 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 8 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 8 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 10 0 +. . . . . . . . . . LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 10 0 +12 12 1 12 12 12 2 12 12 12 LINESTART LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 0 9 BLOCKIN 10 0 +4 4 4 4 4 4 4 4 4 4 LINEIN LINEINDENT NOCAPS ALLDIGIT 1 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 0 9 BLOCKIN 10 0 +1 1 1 1 1 1 1 1 1 1 LINEIN LINEINDENT NOCAPS ALLDIGIT 1 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 10 0 +Opportunities opportunities O Op Opp Oppo s es ies ties LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 2 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 2 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 3 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 3 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 3 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 3 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 4 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 4 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 4 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 6 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 6 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 6 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 6 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 8 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 8 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 8 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 10 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 10 0 +12 12 1 12 12 12 2 12 12 12 LINESTART LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 0 9 BLOCKIN 10 0 +4 4 4 4 4 4 4 4 4 4 LINEIN LINEINDENT NOCAPS ALLDIGIT 1 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 0 9 BLOCKIN 10 0 +2 2 2 2 2 2 2 2 2 2 LINEIN LINEINDENT NOCAPS ALLDIGIT 1 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 10 0 +Challenges challenges C Ch Cha Chal s es ges nges LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 2 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 2 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 2 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 3 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 3 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 3 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 3 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 4 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 4 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 4 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 6 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 6 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 6 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 6 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 8 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 8 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 8 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 10 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 10 0 +12 12 1 12 12 12 2 12 12 12 LINESTART ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 10 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 0 10 BLOCKIN 10 0 +5 5 5 5 5 5 5 5 5 5 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 1 0 0 0 0 0 0 0 NOPUNCT 0 10 BLOCKIN 10 0 +Future future F Fu Fut Futu e re ure ture LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 10 BLOCKIN 10 0 +Research research R Re Res Rese h ch rch arch LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 2 10 BLOCKIN 10 0 +Areas areas A Ar Are Area s as eas reas LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 10 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 3 10 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 3 10 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 3 10 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 3 10 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 4 10 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 4 10 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 4 10 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 10 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 10 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 10 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 10 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 6 10 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 6 10 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 6 10 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 10 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 10 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 10 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 10 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 8 10 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 8 10 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 8 10 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 10 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 10 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 10 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 10 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 10 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 10 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 10 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 10 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 10 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 10 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 10 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 10 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 10 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 10 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 10 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 10 BLOCKIN 10 0 +. . . . . . . . . . LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 10 BLOCKEND 10 0 +Glossary glossary G Gl Glo Glos y ry ary sary LINESTART ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 0 BLOCKSTART no 0 +Bibliography bibliography B Bi Bib Bibl y hy phy aphy LINESTART ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 1 BLOCKSTART no 0 +[ [ [ [ [ [ [ [ [ [ LINESTART LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 8 BLOCKSTART 10 0 +1 1 1 1 1 1 1 1 1 1 LINEIN LINEINDENT NOCAPS ALLDIGIT 1 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 10 0 +] ] ] ] ] ] ] ] ] ] LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 8 BLOCKIN 10 0 +N n N N N N N N N N LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 0 8 BLOCKIN 10 0 +gram gram g gr gra gram m am ram gram LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 10 0 +language language l la lan lang e ge age uage LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 10 0 +models models m mo mod mode s ls els dels LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 3 8 BLOCKIN 10 0 +https https h ht htt http s ps tps ttps LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 1 NOPUNCT 3 8 BLOCKIN 10 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 4 8 BLOCKIN 10 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 10 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 10 0 +web web w we web web b eb web web LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 4 8 BLOCKIN 10 0 +stanford stanford s st sta stan d rd ord ford LINEIN LINEINDENT NOCAPS NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 8 BLOCKIN 10 0 +edu edu e ed edu edu u du edu edu LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 10 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 10 0 +~jurafsky ~jurafsky ~ ~j ~ju ~jur y ky sky fsky LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 10 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 10 0 +slp3 slp3 s sl slp slp3 3 p3 lp3 slp3 LINEIN LINEINDENT NOCAPS CONTAINSDIGITS 0 0 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 10 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 10 0 +3 3 3 3 3 3 3 3 3 3 LINEIN LINEINDENT NOCAPS ALLDIGIT 1 0 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 8 8 BLOCKIN 10 0 +pdf pdf p pd pdf pdf f df pdf pdf LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 8 8 BLOCKIN 10 0 +[ [ [ [ [ [ [ [ [ [ LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 8 8 BLOCKIN 10 0 +Accessed accessed A Ac Acc Acce d ed sed ssed LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 10 0 +01 01 0 01 01 01 1 01 01 01 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 10 8 BLOCKIN 10 0 +07 07 0 07 07 07 7 07 07 07 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 10 0 +- - - - - - - - - - LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 10 8 BLOCKIN 10 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINESTART LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 5 0 BLOCKIN 2 0 +] ] ] ] ] ] ] ] ] ] LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 6 0 BLOCKIN 2 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 0 BLOCKEND 2 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 6 BLOCKSTART 10 0 +2 2 2 2 2 2 2 2 2 2 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 1 0 0 0 0 0 0 0 NOPUNCT 0 6 BLOCKIN 10 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 6 BLOCKIN 10 0 +Anis anis A An Ani Anis s is nis Anis LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 6 BLOCKIN 10 0 +Koubaa koubaa K Ko Kou Koub a aa baa ubaa LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 6 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 2 6 BLOCKIN 10 0 +Gpt gpt G Gp Gpt Gpt t pt Gpt Gpt LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 6 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 3 6 BLOCKIN 10 0 +4 4 4 4 4 4 4 4 4 4 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 1 0 0 0 0 0 0 0 NOPUNCT 3 6 BLOCKIN 10 0 +vs vs v vs vs vs s vs vs vs LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 6 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 4 6 BLOCKIN 10 0 +gpt gpt g gp gpt gpt t pt gpt gpt LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 6 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 5 6 BLOCKIN 10 0 +3 3 3 3 3 3 3 3 3 3 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 1 0 0 0 0 0 0 0 NOPUNCT 5 6 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 6 BLOCKIN 10 0 +5 5 5 5 5 5 5 5 5 5 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 1 0 0 0 0 0 0 0 NOPUNCT 5 6 BLOCKIN 10 0 +: : : : : : : : : : LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 6 6 BLOCKIN 10 0 +A a A A A A A A A A LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 1 0 0 0 0 0 NOPUNCT 6 6 BLOCKIN 10 0 +concise concise c co con conc e se ise cise LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 6 BLOCKIN 10 0 +showdown showdown s sh sho show n wn own down LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 6 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 6 BLOCKIN 10 0 +04 04 0 04 04 04 4 04 04 04 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 6 BLOCKIN 10 0 +2023 2023 2 20 202 2023 3 23 023 2023 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 6 BLOCKIN 10 0 +. . . . . . . . . . LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 6 BLOCKEND 10 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 8 BLOCKSTART 6 0 +3 3 3 3 3 3 3 3 3 3 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 1 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 6 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 8 BLOCKIN 6 0 +Timo timo T Ti Tim Timo o mo imo Timo LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 6 0 +Kaufmann kaufmann K Ka Kau Kauf n nn ann mann LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 8 BLOCKIN 6 0 +Paul paul P Pa Pau Paul l ul aul Paul LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 6 0 +Weng weng W We Wen Weng g ng eng Weng LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 8 BLOCKIN 6 0 +Viktor viktor V Vi Vik Vikt r or tor ktor LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 6 0 +Bengs bengs B Be Ben Beng s gs ngs engs LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 8 BLOCKIN 6 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 6 0 +Eyke eyke E Ey Eyk Eyke e ke yke Eyke LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 6 0 +Hüllermeier hüllermeier H Hü Hül Hüll r er ier eier LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 6 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 8 BLOCKIN 6 0 +A a A A A A A A A A LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 1 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 6 0 +survey survey s su sur surv y ey vey rvey LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 6 0 +of of o of of of f of of of LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 6 0 +reinforcement reinforcement r re rei rein t nt ent ment LINEEND ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 6 0 +learning learning l le lea lear g ng ing ning LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 3 BLOCKIN 2 0 +from from f fr fro from m om rom from LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 3 BLOCKIN 2 0 +human human h hu hum huma n an man uman LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 3 BLOCKIN 2 0 +feedback feedback f fe fee feed k ck ack back LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 3 BLOCKIN 2 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 3 BLOCKIN 2 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 3 BLOCKIN 2 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 3 BLOCKEND 2 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 10 0 +4 4 4 4 4 4 4 4 4 4 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 1 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 10 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 10 0 +Yu yu Y Yu Yu Yu u Yu Yu Yu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 0 9 BLOCKIN 10 0 +Chu chu C Ch Chu Chu u hu Chu Chu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 10 0 +Chang chang C Ch Cha Chan g ng ang hang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 10 0 +Xu xu X Xu Xu Xu u Xu Xu Xu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 10 0 +Wang wang W Wa Wan Wang g ng ang Wang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 10 0 +Jindong jindong J Ji Jin Jind g ng ong dong LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 10 0 +Wang wang W Wa Wan Wang g ng ang Wang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 9 BLOCKIN 10 0 +Yuanyi yuanyi Y Yu Yua Yuan i yi nyi anyi LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 10 0 +Wu wu W Wu Wu Wu u Wu Wu Wu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 10 0 +Kaijie kaijie K Ka Kai Kaij e ie jie ijie LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 10 0 +Zhu zhu Z Zh Zhu Zhu u hu Zhu Zhu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 9 BLOCKIN 10 0 +Hao hao H Ha Hao Hao o ao Hao Hao LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 10 0 +Chen chen C Ch Che Chen n en hen Chen LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 9 BLOCKIN 10 0 +Linyi linyi L Li Lin Liny i yi nyi inyi LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 10 0 +Yang yang Y Ya Yan Yang g ng ang Yang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 10 0 +Xiaoyuan xiaoyuan X Xi Xia Xiao n an uan yuan LINEEND ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 10 0 +Yi yi Y Yi Yi Yi i Yi Yi Yi LINESTART LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 9 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 0 8 BLOCKIN 9 0 +Cunxiang cunxiang C Cu Cun Cunx g ng ang iang LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 9 0 +Wang wang W Wa Wan Wang g ng ang Wang LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 9 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 8 BLOCKIN 9 0 +Yidong yidong Y Yi Yid Yido g ng ong dong LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 9 0 +Wang wang W Wa Wan Wang g ng ang Wang LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 9 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 8 BLOCKIN 9 0 +Weirong weirong W We Wei Weir g ng ong rong LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 9 0 +Ye ye Y Ye Ye Ye e Ye Ye Ye LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 9 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 8 BLOCKIN 9 0 +Yue yue Y Yu Yue Yue e ue Yue Yue LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 9 0 +Zhang zhang Z Zh Zha Zhan g ng ang hang LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 9 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 8 BLOCKIN 9 0 +Yi yi Y Yi Yi Yi i Yi Yi Yi LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 9 0 +Chang chang C Ch Cha Chan g ng ang hang LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 9 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 8 BLOCKIN 9 0 +Philip philip P Ph Phi Phil p ip lip ilip LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 9 0 +S s S S S S S S S S LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 9 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 8 BLOCKIN 9 0 +Yu yu Y Yu Yu Yu u Yu Yu Yu LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 9 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 8 BLOCKIN 9 0 +Qian qian Q Qi Qia Qian n an ian Qian LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 9 0 +Yang yang Y Ya Yan Yang g ng ang Yang LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 9 0 +, , , , , , , , , , LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 8 BLOCKIN 9 0 +and and a an and and d nd and and LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 2 0 +Xingxu xingxu X Xi Xin Xing u xu gxu ngxu LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 2 0 +Xie xie X Xi Xie Xie e ie Xie Xie LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 2 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 1 9 BLOCKIN 2 0 +A a A A A A A A A A LINEIN LINEINDENT ALLCAP NODIGIT 1 0 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 2 0 +survey survey s su sur surv y ey vey rvey LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 2 0 +on on o on on on n on on on LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 2 0 +evaluation evaluation e ev eva eval n on ion tion LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 2 0 +of of o of of of f of of of LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 2 0 +large large l la lar larg e ge rge arge LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 2 0 +language language l la lan lang e ge age uage LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 2 0 +models models m mo mod mode s ls els dels LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 2 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 9 BLOCKIN 2 0 +ACM acm A AC ACM ACM M CM ACM ACM LINEIN LINEINDENT ALLCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 2 0 +Transactions transactions T Tr Tra Tran s ns ons ions LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 2 0 +on on o on on on n on on on LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 2 0 +Intelligent intelligent I In Int Inte t nt ent gent LINEEND LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 2 0 +Systems systems S Sy Sys Syst s ms ems tems LINESTART LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 1 3 BLOCKIN 5 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 3 BLOCKIN 5 0 +Technology technology T Te Tec Tech y gy ogy logy LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 3 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 3 BLOCKIN 5 0 +15 15 1 15 15 15 5 15 15 15 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 3 BLOCKIN 5 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 7 3 BLOCKIN 5 0 +1 1 1 1 1 1 1 1 1 1 LINEIN LINEINDENT NOCAPS ALLDIGIT 1 0 0 0 0 0 0 0 NOPUNCT 7 3 BLOCKIN 5 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 8 3 BLOCKIN 5 0 +45 45 4 45 45 45 5 45 45 45 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 3 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 3 BLOCKIN 5 0 +2023 2023 2 20 202 2023 3 23 023 2023 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 3 BLOCKIN 5 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 3 BLOCKEND 5 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 6 0 +5 5 5 5 5 5 5 5 5 5 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 1 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 6 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 6 0 +Ahtsham ahtsham A Ah Aht Ahts m am ham sham LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 6 0 +Zafar zafar Z Za Zaf Zafa r ar far afar LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 6 0 +Venkatesh venkatesh V Ve Ven Venk h sh esh tesh LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 6 0 +Balavadhani balavadhani B Ba Bal Bala i ni ani hani LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 6 0 +Parthasarathy parthasarathy P Pa Par Part y hy thy athy LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 6 0 +Chan chan C Ch Cha Chan n an han Chan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 6 0 +Le le L Le Le Le e Le Le Le LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 6 0 +Van van V Va Van Van n an Van Van LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 9 BLOCKIN 6 0 +Saad saad S Sa Saa Saad d ad aad Saad LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 6 0 +Shahid shahid S Sh Sha Shah d id hid ahid LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 9 BLOCKIN 6 0 +Aafaq aafaq A Aa Aaf Aafa q aq faq afaq LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 6 0 +Iqbal iqbal I Iq Iqb Iqba l al bal qbal LINEEND ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 6 0 +Khan khan K Kh Kha Khan n an han Khan LINESTART LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 3 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 0 9 BLOCKIN 3 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 3 0 +Arsalan arsalan A Ar Ars Arsa n an lan alan LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 3 0 +Shahid shahid S Sh Sha Shah d id hid ahid LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 3 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 2 9 BLOCKIN 3 0 +Building building B Bu Bui Buil g ng ing ding LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 3 0 +trust trust t tr tru trus t st ust rust LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 3 0 +in in i in in in n in in in LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 3 0 +conversational conversational c co con conv l al nal onal LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 3 0 +ai ai a ai ai ai i ai ai ai LINEIN LINEINDENT NOCAPS NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 3 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 7 9 BLOCKIN 3 0 +A a A A A A A A A A LINEIN LINEINDENT ALLCAP NODIGIT 1 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 3 0 +review review r re rev revi w ew iew view LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 3 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 3 0 +solution solution s so sol solu n on ion tion LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 3 0 +architecture architecture a ar arc arch e re ure ture LINEEND LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 3 0 +using using u us usi usin g ng ing sing LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 6 0 +large large l la lar larg e ge rge arge LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 6 0 +language language l la lan lang e ge age uage LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 6 0 +models models m mo mod mode s ls els dels LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 6 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 6 0 +knowledge knowledge k kn kno know e ge dge edge LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 6 0 +graphs graphs g gr gra grap s hs phs aphs LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 6 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 9 BLOCKIN 6 0 +Big big B Bi Big Big g ig Big Big LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 6 0 +Data data D Da Dat Data a ta ata Data LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 6 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 6 0 +Cognitive cognitive C Co Cog Cogn e ve ive tive LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 6 0 +Computing computing C Co Com Comp g ng ing ting LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 6 0 +8 8 8 8 8 8 8 8 8 8 LINEIN LINEINDENT NOCAPS ALLDIGIT 1 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 6 0 +( ( ( ( ( ( ( ( ( ( LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 10 9 BLOCKIN 6 0 +6 6 6 6 6 6 6 6 6 6 LINEIN LINEINDENT NOCAPS ALLDIGIT 1 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 6 0 +) ) ) ) ) ) ) ) ) ) LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 10 9 BLOCKIN 6 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 10 9 BLOCKIN 6 0 +70 70 7 70 70 70 0 70 70 70 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 6 0 +, , , , , , , , , , LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 6 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINESTART LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 5 0 BLOCKIN 1 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 0 BLOCKEND 1 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 10 0 +6 6 6 6 6 6 6 6 6 6 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 1 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 10 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 10 0 +Zhibo zhibo Z Zh Zhi Zhib o bo ibo hibo LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 10 0 +Chu chu C Ch Chu Chu u hu Chu Chu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 10 0 +Shiwen shiwen S Sh Shi Shiw n en wen iwen LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 10 0 +Ni ni N Ni Ni Ni i Ni Ni Ni LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 10 0 +Zichong zichong Z Zi Zic Zich g ng ong hong LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 10 0 +Wang wang W Wa Wan Wang g ng ang Wang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 9 BLOCKIN 10 0 +Xi xi X Xi Xi Xi i Xi Xi Xi LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 10 0 +Feng feng F Fe Fen Feng g ng eng Feng LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 10 0 +Min min M Mi Min Min n in Min Min LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 10 0 +Yang yang Y Ya Yan Yang g ng ang Yang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 9 BLOCKIN 10 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 10 0 +Wenbin wenbin W We Wen Wenb n in bin nbin LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 10 0 +Zhang zhang Z Zh Zha Zhan g ng ang hang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 9 BLOCKIN 10 0 +History history H Hi His Hist y ry ory tory LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 10 0 +develop develop d de dev deve p op lop elop LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 10 0 +- - - - - - - - - - LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 10 9 BLOCKIN 10 0 +ment ment m me men ment t nt ent ment LINESTART LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 7 BLOCKIN 4 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 0 7 BLOCKIN 4 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 7 BLOCKIN 4 0 +principles principles p pr pri prin s es les ples LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 7 BLOCKIN 4 0 +of of o of of of f of of of LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 7 BLOCKIN 4 0 +large large l la lar larg e ge rge arge LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 4 7 BLOCKIN 4 0 +language language l la lan lang e ge age uage LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 7 BLOCKIN 4 0 +models models m mo mod mode s ls els dels LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 7 BLOCKIN 4 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 6 7 BLOCKIN 4 0 +an an a an an an n an an an LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 7 7 BLOCKIN 4 0 +introductory introductory i in int intr y ry ory tory LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 7 BLOCKIN 4 0 +survey survey s su sur surv y ey vey rvey LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 7 BLOCKIN 4 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 7 BLOCKIN 4 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 7 BLOCKIN 4 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 7 BLOCKEND 4 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 7 0 +7 7 7 7 7 7 7 7 7 7 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 1 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 7 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 7 0 +Tomas tomas T To Tom Toma s as mas omas LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 7 0 +Mikolov mikolov M Mi Mik Miko v ov lov olov LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 7 0 +Kai kai K Ka Kai Kai i ai Kai Kai LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 7 0 +Chen chen C Ch Che Chen n en hen Chen LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 7 0 +Greg greg G Gr Gre Greg g eg reg Greg LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 7 0 +Corrado corrado C Co Cor Corr o do ado rado LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 9 BLOCKIN 7 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 7 0 +Jeffrey jeffrey J Je Jef Jeff y ey rey frey LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 7 0 +Dean dean D De Dea Dean n an ean Dean LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 7 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 6 9 BLOCKIN 7 0 +Efficient efficient E Ef Eff Effi t nt ent ient LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 7 0 +estimation estimation e es est esti n on ion tion LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 7 0 +of of o of of of f of of of LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 7 0 +word word w wo wor word d rd ord word LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 7 0 +represen represen r re rep repr n en sen esen LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 7 0 +- - - - - - - - - - LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 10 9 BLOCKIN 7 0 +tations tations t ta tat tati s ns ons ions LINESTART LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 3 BLOCKIN 2 0 +in in i in in in n in in in LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 3 3 BLOCKIN 2 0 +vector vector v ve vec vect r or tor ctor LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 3 BLOCKIN 2 0 +space space s sp spa spac e ce ace pace LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 3 BLOCKIN 2 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 3 BLOCKIN 2 0 +2013 2013 2 20 201 2013 3 13 013 2013 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 3 BLOCKIN 2 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 3 BLOCKEND 2 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 8 0 +8 8 8 8 8 8 8 8 8 8 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 1 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 8 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 8 0 +Alec alec A Al Ale Alec c ec lec Alec LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 8 0 +Radford radford R Ra Rad Radf d rd ord ford LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 8 0 +Jeff jeff J Je Jef Jeff f ff eff Jeff LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 8 0 +Wu wu W Wu Wu Wu u Wu Wu Wu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 8 0 +Rewon rewon R Re Rew Rewo n on won ewon LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +Child child C Ch Chi Chil d ld ild hild LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 9 BLOCKIN 8 0 +David david D Da Dav Davi d id vid avid LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 8 0 +Luan luan L Lu Lua Luan n an uan Luan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 8 0 +Dario dario D Da Dar Dari o io rio ario LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 8 0 +Amodei amodei A Am Amo Amod i ei dei odei LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 9 BLOCKIN 8 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +Ilya ilya I Il Ily Ilya a ya lya Ilya LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +Sutskever sutskever S Su Sut Suts r er ver ever LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 8 0 +Language language L La Lan Lang e ge age uage LINEEND ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +models models m mo mod mode s ls els dels LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 4 BLOCKIN 2 0 +are are a ar are are e re are are LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 4 BLOCKIN 2 0 +unsupervised unsupervised u un uns unsu d ed sed ised LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 4 BLOCKIN 2 0 +multitask multitask m mu mul mult k sk ask task LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 4 BLOCKIN 2 0 +learners learners l le lea lear s rs ers ners LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 4 BLOCKIN 2 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 4 BLOCKIN 2 0 +2019 2019 2 20 201 2019 9 19 019 2019 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 4 BLOCKIN 2 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 4 BLOCKEND 2 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 9 0 +9 9 9 9 9 9 9 9 9 9 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 1 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 9 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 9 0 +Jacob jacob J Ja Jac Jaco b ob cob acob LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 9 0 +Devlin devlin D De Dev Devl n in lin vlin LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 9 0 +Ming ming M Mi Min Ming g ng ing Ming LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 9 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 2 9 BLOCKIN 9 0 +Wei wei W We Wei Wei i ei Wei Wei LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 9 0 +Chang chang C Ch Cha Chan g ng ang hang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 9 0 +Kenton kenton K Ke Ken Kent n on ton nton LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 9 0 +Lee lee L Le Lee Lee e ee Lee Lee LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 9 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 9 0 +Kristina kristina K Kr Kri Kris a na ina tina LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 9 0 +Toutanova toutanova T To Tou Tout a va ova nova LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 9 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 9 BLOCKIN 9 0 +Bert bert B Be Ber Bert t rt ert Bert LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 9 0 +: : : : : : : : : : LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 8 9 BLOCKIN 9 0 +Pre pre P Pr Pre Pre e re Pre Pre LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 9 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 9 9 BLOCKIN 9 0 +training training t tr tra trai g ng ing ning LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +of of o of of of f of of of LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +deep deep d de dee deep p ep eep deep LINEEND ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +bidirectional bidirectional b bi bid bidi l al nal onal LINESTART LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 5 BLOCKIN 2 0 +transformers transformers t tr tra tran s rs ers mers LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 5 BLOCKIN 2 0 +for for f fo for for r or for for LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 5 BLOCKIN 2 0 +language language l la lan lang e ge age uage LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 5 BLOCKIN 2 0 +understanding understanding u un und unde g ng ing ding LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 5 BLOCKIN 2 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 5 BLOCKIN 2 0 +2019 2019 2 20 201 2019 9 19 019 2019 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 5 BLOCKIN 2 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 5 BLOCKEND 2 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 8 BLOCKSTART 7 0 +10 10 1 10 10 10 0 10 10 10 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 7 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 8 BLOCKIN 7 0 +Aakanksha aakanksha A Aa Aak Aaka a ha sha ksha LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 7 0 +Chowdhery chowdhery C Ch Cho Chow y ry ery hery LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 8 BLOCKIN 7 0 +Sharan sharan S Sh Sha Shar n an ran aran LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 7 0 +Narang narang N Na Nar Nara g ng ang rang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 8 BLOCKIN 7 0 +Jacob jacob J Ja Jac Jaco b ob cob acob LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 7 0 +Devlin devlin D De Dev Devl n in lin vlin LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 8 BLOCKIN 7 0 +Maarten maarten M Ma Maa Maar n en ten rten LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 7 0 +Bosma bosma B Bo Bos Bosm a ma sma osma LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 8 BLOCKIN 7 0 +Gaurav gaurav G Ga Gau Gaur v av rav urav LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 7 0 +Mishra mishra M Mi Mis Mish a ra hra shra LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 8 BLOCKIN 7 0 +Adam adam A Ad Ada Adam m am dam Adam LINEEND ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 7 0 +Roberts roberts R Ro Rob Robe s ts rts erts LINESTART LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 0 8 BLOCKIN 6 0 +Paul paul P Pa Pau Paul l ul aul Paul LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 6 0 +Barham barham B Ba Bar Barh m am ham rham LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 8 BLOCKIN 6 0 +Hyung hyung H Hy Hyu Hyun g ng ung yung LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 6 0 +Won won W Wo Won Won n on Won Won LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 6 0 +Chung chung C Ch Chu Chun g ng ung hung LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 8 BLOCKIN 6 0 +Charles charles C Ch Cha Char s es les rles LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 6 0 +Sutton sutton S Su Sut Sutt n on ton tton LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 8 BLOCKIN 6 0 +Sebastian sebastian S Se Seb Seba n an ian tian LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 6 0 +Gehrmann gehrmann G Ge Geh Gehr n nn ann mann LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 8 BLOCKIN 6 0 +Parker parker P Pa Par Park r er ker rker LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 6 0 +Schuh schuh S Sc Sch Schu h uh huh chuh LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 6 0 +, , , , , , , , , , LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 8 BLOCKIN 6 0 +Kensen kensen K Ke Ken Kens n en sen nsen LINESTART LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 6 0 +Shi shi S Sh Shi Shi i hi Shi Shi LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 8 BLOCKIN 6 0 +Sasha sasha S Sa Sas Sash a ha sha asha LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 6 0 +Tsvyashchenko tsvyashchenko T Ts Tsv Tsvy o ko nko enko LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 8 BLOCKIN 6 0 +Joshua joshua J Jo Jos Josh a ua hua shua LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 6 0 +Maynez maynez M Ma May Mayn z ez nez ynez LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 8 BLOCKIN 6 0 +Abhishek abhishek A Ab Abh Abhi k ek hek shek LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 6 0 +Rao rao R Ra Rao Rao o ao Rao Rao LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 8 BLOCKIN 6 0 +Parker parker P Pa Par Park r er ker rker LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 6 0 +Barnes barnes B Ba Bar Barn s es nes rnes LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 8 BLOCKIN 6 0 +Yi yi Y Yi Yi Yi i Yi Yi Yi LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 6 0 +Tay tay T Ta Tay Tay y ay Tay Tay LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 8 BLOCKIN 6 0 +Noam noam N No Noa Noam m am oam Noam LINEEND LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 6 0 +Shazeer shazeer S Sh Sha Shaz r er eer zeer LINESTART LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 0 8 BLOCKIN 6 0 +Vinodkumar vinodkumar V Vi Vin Vino r ar mar umar LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 6 0 +Prabhakaran prabhakaran P Pr Pra Prab n an ran aran LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 8 BLOCKIN 6 0 +Emily emily E Em Emi Emil y ly ily mily LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 6 0 +Reif reif R Re Rei Reif f if eif Reif LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 8 BLOCKIN 6 0 +Nan nan N Na Nan Nan n an Nan Nan LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 6 0 +Du du D Du Du Du u Du Du Du LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 8 BLOCKIN 6 0 +Ben ben B Be Ben Ben n en Ben Ben LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 6 0 +Hutchinson hutchinson H Hu Hut Hutc n on son nson LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 8 BLOCKIN 6 0 +Reiner reiner R Re Rei Rein r er ner iner LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 6 0 +Pope pope P Po Pop Pope e pe ope Pope LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 8 BLOCKIN 6 0 +James james J Ja Jam Jame s es mes ames LINEEND LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 6 0 +Bradbury bradbury B Br Bra Brad y ry ury bury LINESTART LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 8 BLOCKIN 8 0 +Jacob jacob J Ja Jac Jaco b ob cob acob LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 8 0 +Austin austin A Au Aus Aust n in tin stin LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 8 BLOCKIN 8 0 +Michael michael M Mi Mic Mich l el ael hael LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 8 0 +Isard isard I Is Isa Isar d rd ard sard LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 8 BLOCKIN 8 0 +Guy guy G Gu Guy Guy y uy Guy Guy LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 8 0 +Gur gur G Gu Gur Gur r ur Gur Gur LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 8 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 5 8 BLOCKIN 8 0 +Ari ari A Ar Ari Ari i ri Ari Ari LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 8 BLOCKIN 8 0 +Pengcheng pengcheng P Pe Pen Peng g ng eng heng LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 8 0 +Yin yin Y Yi Yin Yin n in Yin Yin LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 8 BLOCKIN 8 0 +Toju toju T To Toj Toju u ju oju Toju LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 8 0 +Duke duke D Du Duk Duke e ke uke Duke LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 8 BLOCKIN 8 0 +Anselm anselm A An Ans Anse m lm elm selm LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 8 0 +Lev lev L Le Lev Lev v ev Lev Lev LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 8 0 +- - - - - - - - - - LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 10 8 BLOCKIN 8 0 +skaya skaya s sk ska skay a ya aya kaya LINESTART LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 0 8 BLOCKIN 6 0 +Sanjay sanjay S Sa San Sanj y ay jay njay LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 6 0 +Ghemawat ghemawat G Gh Ghe Ghem t at wat awat LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 8 BLOCKIN 6 0 +Sunipa sunipa S Su Sun Suni a pa ipa nipa LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 6 0 +Dev dev D De Dev Dev v ev Dev Dev LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 8 BLOCKIN 6 0 +Henryk henryk H He Hen Henr k yk ryk nryk LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 6 0 +Michalewski michalewski M Mi Mic Mich i ki ski wski LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 8 BLOCKIN 6 0 +Xavier xavier X Xa Xav Xavi r er ier vier LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 6 0 +Garcia garcia G Ga Gar Garc a ia cia rcia LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 8 BLOCKIN 6 0 +Vedant vedant V Ve Ved Veda t nt ant dant LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 6 0 +Misra misra M Mi Mis Misr a ra sra isra LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 8 BLOCKIN 6 0 +Kevin kevin K Ke Kev Kevi n in vin evin LINEEND LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 6 0 +Robinson robinson R Ro Rob Robi n on son nson LINESTART LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 0 8 BLOCKIN 7 0 +Liam liam L Li Lia Liam m am iam Liam LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 7 0 +Fedus fedus F Fe Fed Fedu s us dus edus LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 8 BLOCKIN 7 0 +Denny denny D De Den Denn y ny nny enny LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 7 0 +Zhou zhou Z Zh Zho Zhou u ou hou Zhou LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 8 BLOCKIN 7 0 +Daphne daphne D Da Dap Daph e ne hne phne LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 7 0 +Ippolito ippolito I Ip Ipp Ippo o to ito lito LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 8 BLOCKIN 7 0 +David david D Da Dav Davi d id vid avid LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 7 0 +Luan luan L Lu Lua Luan n an uan Luan LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 8 BLOCKIN 7 0 +Hyeontaek hyeontaek H Hy Hye Hyeo k ek aek taek LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 7 0 +Lim lim L Li Lim Lim m im Lim Lim LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 8 BLOCKIN 7 0 +Barret barret B Ba Bar Barr t et ret rret LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 7 0 +Zoph zoph Z Zo Zop Zoph h ph oph Zoph LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 7 0 +, , , , , , , , , , LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 8 BLOCKIN 7 0 +Alexander alexander A Al Ale Alex r er der nder LINESTART LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 6 0 +Spiridonov spiridonov S Sp Spi Spir v ov nov onov LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 8 BLOCKIN 6 0 +Ryan ryan R Ry Rya Ryan n an yan Ryan LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 6 0 +Sepassi sepassi S Se Sep Sepa i si ssi assi LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 8 BLOCKIN 6 0 +David david D Da Dav Davi d id vid avid LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 6 0 +Dohan dohan D Do Doh Doha n an han ohan LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 8 BLOCKIN 6 0 +Shivani shivani S Sh Shi Shiv i ni ani vani LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 6 0 +Agrawal agrawal A Ag Agr Agra l al wal awal LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 8 BLOCKIN 6 0 +Mark mark M Ma Mar Mark k rk ark Mark LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 6 0 +Omernick omernick O Om Ome Omer k ck ick nick LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 8 BLOCKIN 6 0 +Andrew andrew A An And Andr w ew rew drew LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 6 0 +M m M M M M M M M M LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 6 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 8 BLOCKIN 6 0 +Dai dai D Da Dai Dai i ai Dai Dai LINESTART LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 0 8 BLOCKIN 6 0 +Thanumalayan thanumalayan T Th Tha Than n an yan ayan LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 6 0 +Sankaranarayana sankaranarayana S Sa San Sank a na ana yana LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 6 0 +Pillai pillai P Pi Pil Pill i ai lai llai LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 8 BLOCKIN 6 0 +Marie marie M Ma Mar Mari e ie rie arie LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 6 0 +Pellat pellat P Pe Pel Pell t at lat llat LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 8 BLOCKIN 6 0 +Aitor aitor A Ai Ait Aito r or tor itor LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 6 0 +Lewkowycz lewkowycz L Le Lew Lewk z cz ycz wycz LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 8 BLOCKIN 6 0 +Erica erica E Er Eri Eric a ca ica rica LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 6 0 +Moreira moreira M Mo Mor More a ra ira eira LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 8 BLOCKIN 6 0 +Re re R Re Re Re e Re Re Re LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 6 0 +- - - - - - - - - - LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 10 8 BLOCKIN 6 0 +won won w wo won won n on won won LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 6 0 +Child child C Ch Chi Chil d ld ild hild LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 8 BLOCKIN 6 0 +Oleksandr oleksandr O Ol Ole Olek r dr ndr andr LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 6 0 +Polozov polozov P Po Pol Polo v ov zov ozov LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 8 BLOCKIN 6 0 +Katherine katherine K Ka Kat Kath e ne ine rine LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 6 0 +Lee lee L Le Lee Lee e ee Lee Lee LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 8 BLOCKIN 6 0 +Zongwei zongwei Z Zo Zon Zong i ei wei gwei LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 6 0 +Zhou zhou Z Zh Zho Zhou u ou hou Zhou LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 8 BLOCKIN 6 0 +Xuezhi xuezhi X Xu Xue Xuez i hi zhi ezhi LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 6 0 +Wang wang W Wa Wan Wang g ng ang Wang LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 8 BLOCKIN 6 0 +Brennan brennan B Br Bre Bren n an nan nnan LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 6 0 +Saeta saeta S Sa Sae Saet a ta eta aeta LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 8 BLOCKIN 6 0 +Mark mark M Ma Mar Mark k rk ark Mark LINEEND LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 6 0 +Diaz diaz D Di Dia Diaz z az iaz Diaz LINESTART LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 0 9 BLOCKIN 8 0 +Orhan orhan O Or Orh Orha n an han rhan LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +Firat firat F Fi Fir Fira t at rat irat LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 8 0 +Michele michele M Mi Mic Mich e le ele hele LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +Catasta catasta C Ca Cat Cata a ta sta asta LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 9 BLOCKIN 8 0 +Jason jason J Ja Jas Jaso n on son ason LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 8 0 +Wei wei W We Wei Wei i ei Wei Wei LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 8 0 +Kathy kathy K Ka Kat Kath y hy thy athy LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 8 0 +Meier meier M Me Mei Meie r er ier eier LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 8 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 7 9 BLOCKIN 8 0 +Hellstern hellstern H He Hel Hell n rn ern tern LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 9 BLOCKIN 8 0 +Douglas douglas D Do Dou Doug s as las glas LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 8 0 +Eck eck E Ec Eck Eck k ck Eck Eck LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 9 BLOCKIN 8 0 +Jeff jeff J Je Jef Jeff f ff eff Jeff LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +Dean dean D De Dea Dean n an ean Dean LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +, , , , , , , , , , LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 8 0 +Slav slav S Sl Sla Slav v av lav Slav LINESTART LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 5 0 +Petrov petrov P Pe Pet Petr v ov rov trov LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 8 BLOCKIN 5 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 5 0 +Noah noah N No Noa Noah h ah oah Noah LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 5 0 +Fiedel fiedel F Fi Fie Fied l el del edel LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 5 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 3 8 BLOCKIN 5 0 +Palm palm P Pa Pal Palm m lm alm Palm LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 5 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 4 8 BLOCKIN 5 0 +Scaling scaling S Sc Sca Scal g ng ing ling LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 5 0 +language language l la lan lang e ge age uage LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 5 0 +modeling modeling m mo mod mode g ng ing ling LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 5 0 +with with w wi wit with h th ith with LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 5 0 +pathways pathways p pa pat path s ys ays ways LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 8 BLOCKIN 5 0 +2022 2022 2 20 202 2022 2 22 022 2022 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 8 BLOCKIN 5 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 8 BLOCKEND 5 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 8 0 +11 11 1 11 11 11 1 11 11 11 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 8 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 8 0 +Hugo hugo H Hu Hug Hugo o go ugo Hugo LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +Touvron touvron T To Tou Touv n on ron vron LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 8 0 +Thibaut thibaut T Th Thi Thib t ut aut baut LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 8 0 +Lavril lavril L La Lav Lavr l il ril vril LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 8 0 +Gautier gautier G Ga Gau Gaut r er ier tier LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 8 0 +Izacard izacard I Iz Iza Izac d rd ard card LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 8 0 +Xavier xavier X Xa Xav Xavi r er ier vier LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 8 0 +Martinet martinet M Ma Mar Mart t et net inet LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 9 BLOCKIN 8 0 +Marie marie M Ma Mar Mari e ie rie arie LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 8 9 BLOCKIN 8 0 +Anne anne A An Ann Anne e ne nne Anne LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +Lachaux lachaux L La Lac Lach x ux aux haux LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 9 BLOCKIN 8 0 +Timothée timothée T Ti Tim Timo e ée hée thée LINEEND ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +Lacroix lacroix L La Lac Lacr x ix oix roix LINESTART LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 0 9 BLOCKIN 6 0 +Baptiste baptiste B Ba Bap Bapt e te ste iste LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 6 0 +Rozière rozière R Ro Roz Rozi e re ère ière LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 6 0 +Naman naman N Na Nam Nama n an man aman LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 6 0 +Goyal goyal G Go Goy Goya l al yal oyal LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 9 BLOCKIN 6 0 +Eric eric E Er Eri Eric c ic ric Eric LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 6 0 +Hambro hambro H Ha Ham Hamb o ro bro mbro LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 6 0 +Faisal faisal F Fa Fai Fais l al sal isal LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 6 0 +Azhar azhar A Az Azh Azha r ar har zhar LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 9 BLOCKIN 6 0 +Aurelien aurelien A Au Aur Aure n en ien lien LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 6 0 +Rodriguez rodriguez R Ro Rod Rodr z ez uez guez LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 6 0 +Armand armand A Ar Arm Arma d nd and mand LINEEND LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 6 0 +Joulin joulin J Jo Jou Joul n in lin ulin LINESTART LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 4 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 0 8 BLOCKIN 4 0 +Edouard edouard E Ed Edo Edou d rd ard uard LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 4 0 +Grave grave G Gr Gra Grav e ve ave rave LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 4 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 8 BLOCKIN 4 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 4 0 +Guillaume guillaume G Gu Gui Guil e me ume aume LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 4 0 +Lample lample L La Lam Lamp e le ple mple LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 4 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 8 BLOCKIN 4 0 +Llama llama L Ll Lla Llam a ma ama lama LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 4 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 6 8 BLOCKIN 4 0 +Open open O Op Ope Open n en pen Open LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 4 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 4 0 +efficient efficient e ef eff effi t nt ent ient LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 4 0 +foundation foundation f fo fou foun n on ion tion LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 4 0 +language language l la lan lang e ge age uage LINEEND LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 4 0 +models models m mo mod mode s ls els dels LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 1 BLOCKIN 2 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 1 BLOCKIN 2 0 +2023 2023 2 20 202 2023 3 23 023 2023 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 8 1 BLOCKIN 2 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 1 BLOCKEND 2 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 1 0 BLOCKSTART 2 0 +12 12 1 12 12 12 2 12 12 12 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 0 BLOCKIN 2 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 4 0 BLOCKIN 2 0 +The the T Th The The e he The The LINEEND ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 0 BLOCKIN 2 0 +art art a ar art art t rt art art LINESTART LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 6 0 BLOCKIN no 0 +of of o of of of f of of of LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 0 BLOCKIN no 0 +fine fine f fi fin fine e ne ine fine LINESTART LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 3 1 BLOCKIN 1 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 3 1 BLOCKIN 1 0 +tuning tuning t tu tun tuni g ng ing ning LINEEND LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 1 BLOCKIN 1 0 +large large l la lar larg e ge rge arge LINESTART LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 7 0 BLOCKIN no 0 +language language l la lan lang e ge age uage LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 0 BLOCKIN no 0 +models models m mo mod mode s ls els dels LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 0 BLOCKIN 1 0 +, , , , , , , , , , LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 0 BLOCKIN 1 0 +explained explained e ex exp expl d ed ned ined LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 1 BLOCKIN no 0 +in in i in in in n in in in LINESTART LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 5 0 BLOCKIN no 0 +depth depth d de dep dept h th pth epth LINESTART ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 0 BLOCKIN no 0 +- - - - - - - - - - LINESTART LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 5 0 BLOCKIN 1 0 +linkedin linkedin l li lin link n in din edin LINESTART LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 1 BLOCKIN 2 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 6 1 BLOCKIN 2 0 +com com c co com com m om com com LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 1 BLOCKIN 2 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 8 1 BLOCKIN 2 0 +https https h ht htt http s ps tps ttps LINESTART LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 1 NOPUNCT 1 3 BLOCKIN 7 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 1 3 BLOCKIN 7 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 2 3 BLOCKIN 7 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 2 3 BLOCKIN 7 0 +www www w ww www www w ww www www LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 3 BLOCKIN 7 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 3 3 BLOCKIN 7 0 +linkedin linkedin l li lin link n in din edin LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 3 BLOCKIN 7 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 6 3 BLOCKIN 7 0 +com com c co com com m om com com LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 3 BLOCKIN 7 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 7 3 BLOCKIN 7 0 +pulse pulse p pu pul puls e se lse ulse LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 3 BLOCKIN 7 0 +/ / / / / / / / / / LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 9 3 BLOCKIN 7 0 +art art a ar art art t rt art art LINESTART ALIGNEDLEFT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 0 6 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 0 6 BLOCKIN 10 0 +fine fine f fi fin fine e ne ine fine LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 1 6 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 1 6 BLOCKIN 10 0 +tuning tuning t tu tun tuni g ng ing ning LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 6 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 2 6 BLOCKIN 10 0 +large large l la lar larg e ge rge arge LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 2 6 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 3 6 BLOCKIN 10 0 +language language l la lan lang e ge age uage LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 6 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 4 6 BLOCKIN 10 0 +models models m mo mod mode s ls els dels LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 6 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 5 6 BLOCKIN 10 0 +explained explained e ex exp expl d ed ned ined LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 6 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 6 6 BLOCKIN 10 0 +depth depth d de dep dept h th pth epth LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 6 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 7 6 BLOCKIN 10 0 +cherickal cherickal c ch che cher l al kal ckal LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 6 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 8 6 BLOCKIN 10 0 +giavc giavc g gi gia giav c vc avc iavc LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 6 BLOCKIN 10 0 +. . . . . . . . . . LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 6 BLOCKIN 10 0 +[ [ [ [ [ [ [ [ [ [ LINESTART LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 1 BLOCKIN 1 0 +Accessed accessed A Ac Acc Acce d ed sed ssed LINEEND LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 1 BLOCKIN 1 0 +01 01 0 01 01 01 1 01 01 01 LINESTART ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 1 BLOCKIN 4 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 2 1 BLOCKIN 4 0 +07 07 0 07 07 07 7 07 07 07 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 1 BLOCKIN 4 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 4 1 BLOCKIN 4 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 7 1 BLOCKIN 4 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 7 1 BLOCKIN 4 0 +. . . . . . . . . . LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 8 1 BLOCKEND 4 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 8 BLOCKSTART 8 0 +13 13 1 13 13 13 3 13 13 13 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 8 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 8 BLOCKIN 8 0 +Humza humza H Hu Hum Humz a za mza umza LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 8 0 +Naveed naveed N Na Nav Nave d ed eed veed LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 8 BLOCKIN 8 0 +Asad asad A As Asa Asad d ad sad Asad LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 8 0 +Ullah ullah U Ul Ull Ulla h ah lah llah LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 8 0 +Khan khan K Kh Kha Khan n an han Khan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 8 BLOCKIN 8 0 +Shi shi S Sh Shi Shi i hi Shi Shi LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 8 0 +Qiu qiu Q Qi Qiu Qiu u iu Qiu Qiu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 8 BLOCKIN 8 0 +Muhammad muhammad M Mu Muh Muha d ad mad mmad LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 8 0 +Saqib saqib S Sa Saq Saqi b ib qib aqib LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 8 BLOCKIN 8 0 +Saeed saeed S Sa Sae Saee d ed eed aeed LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 8 0 +Anwar anwar A An Anw Anwa r ar war nwar LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 8 BLOCKIN 8 0 +Muhammad muhammad M Mu Muh Muha d ad mad mmad LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 8 0 +Usman usman U Us Usm Usma n an man sman LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 8 0 +, , , , , , , , , , LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 8 BLOCKIN 8 0 +Naveed naveed N Na Nav Nave d ed eed veed LINESTART LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 4 0 +Akhtar akhtar A Ak Akh Akht r ar tar htar LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 4 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 4 0 +Nick nick N Ni Nic Nick k ck ick Nick LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 4 0 +Barnes barnes B Ba Bar Barn s es nes rnes LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 4 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 4 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 4 0 +Ajmal ajmal A Aj Ajm Ajma l al mal jmal LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 4 0 +Mian mian M Mi Mia Mian n an ian Mian LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 4 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 9 BLOCKIN 4 0 +A a A A A A A A A A LINEIN LINEINDENT ALLCAP NODIGIT 1 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 4 0 +comprehensive comprehensive c co com comp e ve ive sive LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 4 0 +overview overview o ov ove over w ew iew view LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 4 0 +of of o of of of f of of of LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 4 0 +large large l la lar larg e ge rge arge LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 4 0 +language language l la lan lang e ge age uage LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 4 0 +models models m mo mod mode s ls els dels LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 4 0 +, , , , , , , , , , LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 4 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINESTART LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 5 0 BLOCKIN 1 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 0 BLOCKEND 1 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 3 BLOCKSTART 5 0 +14 14 1 14 14 14 4 14 14 14 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 3 BLOCKIN 5 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 1 3 BLOCKIN 5 0 +Jeff jeff J Je Jef Jeff f ff eff Jeff LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 3 BLOCKIN 5 0 +Li li L Li Li Li i Li Li Li LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 3 BLOCKIN 5 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 3 BLOCKIN 5 0 +MBA mba M MB MBA MBA A BA MBA MBA LINEIN ALIGNEDLEFT ALLCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 3 BLOCKIN 5 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 3 BLOCKIN 5 0 +PMP pmp P PM PMP PMP P MP PMP PMP LINEIN ALIGNEDLEFT ALLCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 3 BLOCKIN 5 0 +on on o on on on n on on on LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 3 BLOCKIN 5 0 +LinkedIn linkedin L Li Lin Link n In dIn edIn LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 3 BLOCKIN 5 0 +: : : : : : : : : : LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 10 3 BLOCKIN 5 0 +Fine fine F Fi Fin Fine e ne ine Fine LINESTART LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 0 4 BLOCKIN 2 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 1 4 BLOCKIN 2 0 +tuning tuning t tu tun tuni g ng ing ning LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 4 BLOCKIN 2 0 +versus versus v ve ver vers s us sus rsus LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 4 BLOCKIN 2 0 +RAG rag R RA RAG RAG G AG RAG RAG LINEIN LINEINDENT ALLCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 4 BLOCKIN 2 0 +in in i in in in n in in in LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 6 4 BLOCKIN 2 0 +Generative generative G Ge Gen Gene e ve ive tive LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 4 BLOCKIN 2 0 +AI ai A AI AI AI I AI AI AI LINEIN LINEINDENT ALLCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 4 BLOCKIN 2 0 +Ap ap A Ap Ap Ap p Ap Ap Ap LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 4 BLOCKIN 2 0 +- - - - - - - - - - LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 10 4 BLOCKIN 2 0 +plications plications p pl pli plic s ns ons ions LINESTART ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 3 BLOCKIN 3 0 +Architecture architecture A Ar Arc Arch e re ure ture LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 3 BLOCKIN 3 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 6 3 BLOCKIN 3 0 +linkedin linkedin l li lin link n in din edin LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 3 BLOCKIN 3 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 3 BLOCKIN 3 0 +com com c co com com m om com com LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 3 BLOCKIN 3 0 +. . . . . . . . . . LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 3 BLOCKIN 3 0 +https https h ht htt http s ps tps ttps LINESTART LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 1 NOPUNCT 1 3 BLOCKIN 7 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 1 3 BLOCKIN 7 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 1 3 BLOCKIN 7 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 1 3 BLOCKIN 7 0 +www www w ww www www w ww www www LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 3 BLOCKIN 7 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 2 3 BLOCKIN 7 0 +linkedin linkedin l li lin link n in din edin LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 3 BLOCKIN 7 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 3 BLOCKIN 7 0 +com com c co com com m om com com LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 3 BLOCKIN 7 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 6 3 BLOCKIN 7 0 +posts posts p po pos post s ts sts osts LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 3 BLOCKIN 7 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 7 3 BLOCKIN 7 0 +xjeffli_ xjeffli_ x xj xje xjef _ i_ li_ fli_ LINEEND LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 3 BLOCKIN 7 0 +fine fine f fi fin fine e ne ine fine LINESTART ALIGNEDLEFT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 0 8 BLOCKIN 10 0 +tuning tuning t tu tun tuni g ng ing ning LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 1 8 BLOCKIN 10 0 +versus versus v ve ver vers s us sus rsus LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 2 8 BLOCKIN 10 0 +rag rag r ra rag rag g ag rag rag LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 2 8 BLOCKIN 10 0 +in in i in in in n in in in LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 2 8 BLOCKIN 10 0 +generative generative g ge gen gene e ve ive tive LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 4 8 BLOCKIN 10 0 +ai ai a ai ai ai i ai ai ai LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 4 8 BLOCKIN 10 0 +applications applications a ap app appl s ns ons ions LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 5 8 BLOCKIN 10 0 +activity activity a ac act acti y ty ity vity LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 6 8 BLOCKIN 10 0 +7189276988690382848 7189276988690382848 7 71 718 7189 8 48 848 2848 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 9 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 9 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 9 8 BLOCKIN 10 0 +vxT vxt v vx vxT vxT T xT vxT vxT LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 10 0 +. . . . . . . . . . LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 8 BLOCKIN 10 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 2 BLOCKIN 5 0 +Accessed accessed A Ac Acc Acce d ed sed ssed LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 2 BLOCKIN 5 0 +01 01 0 01 01 01 1 01 01 01 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 2 BLOCKIN 5 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 5 2 BLOCKIN 5 0 +08 08 0 08 08 08 8 08 08 08 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 2 BLOCKIN 5 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 7 2 BLOCKIN 5 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 8 2 BLOCKIN 5 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 9 2 BLOCKIN 5 0 +. . . . . . . . . . LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 2 BLOCKEND 5 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 8 BLOCKSTART 9 0 +15 15 1 15 15 15 5 15 15 15 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 9 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 8 BLOCKIN 9 0 +Tingfeng tingfeng T Ti Tin Ting g ng eng feng LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 9 0 +Hui hui H Hu Hui Hui i ui Hui Hui LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 8 BLOCKIN 9 0 +Zhenyu zhenyu Z Zh Zhe Zhen u yu nyu enyu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 9 0 +Zhang zhang Z Zh Zha Zhan g ng ang hang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 8 BLOCKIN 9 0 +Shuohuan shuohuan S Sh Shu Shuo n an uan huan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 9 0 +Wang wang W Wa Wan Wang g ng ang Wang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 8 BLOCKIN 9 0 +Weiran weiran W We Wei Weir n an ran iran LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 9 0 +Xu xu X Xu Xu Xu u Xu Xu Xu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 8 BLOCKIN 9 0 +Yu yu Y Yu Yu Yu u Yu Yu Yu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 9 0 +Sun sun S Su Sun Sun n un Sun Sun LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 8 BLOCKIN 9 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 9 0 +Hua hua H Hu Hua Hua a ua Hua Hua LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 9 0 +Wu wu W Wu Wu Wu u Wu Wu Wu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 9 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 8 BLOCKIN 9 0 +Hft hft H Hf Hft Hft t ft Hft Hft LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 9 0 +: : : : : : : : : : LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 10 8 BLOCKIN 9 0 +Half half H Ha Hal Half f lf alf Half LINEEND ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 9 0 +fine fine f fi fin fine e ne ine fine LINESTART LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 0 7 BLOCKIN 6 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 0 7 BLOCKIN 6 0 +tuning tuning t tu tun tuni g ng ing ning LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 7 BLOCKIN 6 0 +for for f fo for for r or for for LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 7 BLOCKIN 6 0 +large large l la lar larg e ge rge arge LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 2 7 BLOCKIN 6 0 +language language l la lan lang e ge age uage LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 7 BLOCKIN 6 0 +models models m mo mod mode s ls els dels LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 7 BLOCKIN 6 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 7 BLOCKIN 6 0 +arXiv arxiv a ar arX arXi v iv Xiv rXiv LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 7 BLOCKIN 6 0 +preprint preprint p pr pre prep t nt int rint LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 7 BLOCKIN 6 0 +arXiv arxiv a ar arX arXi v iv Xiv rXiv LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 7 BLOCKIN 6 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 8 7 BLOCKIN 6 0 +2404 2404 2 24 240 2404 4 04 404 2404 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 8 7 BLOCKIN 6 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 7 BLOCKIN 6 0 +18466 18466 1 18 184 1846 6 66 466 8466 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 9 7 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 7 BLOCKIN 6 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 7 BLOCKIN 6 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 7 BLOCKEND 6 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 9 0 +16 16 1 16 16 16 6 16 16 16 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 9 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 9 0 +Rion rion R Ri Rio Rion n on ion Rion LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 9 0 +Snow snow S Sn Sno Snow w ow now Snow LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 9 0 +Brendan brendan B Br Bre Bren n an dan ndan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 9 0 +O o O O O O O O O O LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 9 0 +' ' ' ' ' ' ' ' ' ' LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 QUOTE 3 9 BLOCKIN 9 0 +Connor connor C Co Con Conn r or nor nnor LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 9 0 +Dan dan D Da Dan Dan n an Dan Dan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 9 0 +Jurafsky jurafsky J Ju Jur Jura y ky sky fsky LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 9 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 9 0 +Andrew andrew A An And Andr w ew rew drew LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 9 0 +Y y Y Y Y Y Y Y Y Y LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 9 0 +Ng ng N Ng Ng Ng g Ng Ng Ng LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 9 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 9 BLOCKIN 9 0 +Cheap cheap C Ch Che Chea p ap eap heap LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 9 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 9 0 +fast fast f fa fas fast t st ast fast LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 9 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 9 9 BLOCKIN 9 0 +but but b bu but but t ut but but LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 9 0 +is is i is is is s is is is LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +it it i it it it t it it it LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +good good g go goo good d od ood good LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +? ? ? ? ? ? ? ? ? ? LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 10 9 BLOCKIN 9 0 +evaluating evaluating e ev eva eval g ng ing ting LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 2 0 +non non n no non non n on non non LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 2 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 1 9 BLOCKIN 2 0 +expert expert e ex exp expe t rt ert pert LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 2 0 +annotations annotations a an ann anno s ns ons ions LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 2 0 +for for f fo for for r or for for LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 2 0 +natural natural n na nat natu l al ral ural LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 2 0 +language language l la lan lang e ge age uage LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 2 0 +tasks tasks t ta tas task s ks sks asks LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 2 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 6 9 BLOCKIN 2 0 +In in I In In In n In In In LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 2 0 +Proceedings proceedings P Pr Pro Proc s gs ngs ings LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 2 0 +of of o of of of f of of of LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 2 0 +the the t th the the e he the the LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 2 0 +Conference conference C Co Con Conf e ce nce ence LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 2 0 +on on o on on on n on on on LINEEND LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 2 0 +Empirical empirical E Em Emp Empi l al cal ical LINESTART LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 7 BLOCKIN 6 0 +Methods methods M Me Met Meth s ds ods hods LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 7 BLOCKIN 6 0 +in in i in in in n in in in LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 2 7 BLOCKIN 6 0 +Natural natural N Na Nat Natu l al ral ural LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 7 BLOCKIN 6 0 +Language language L La Lan Lang e ge age uage LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 7 BLOCKIN 6 0 +Processing processing P Pr Pro Proc g ng ing sing LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 7 BLOCKIN 6 0 +( ( ( ( ( ( ( ( ( ( LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 7 7 BLOCKIN 6 0 +EMNLP emnlp E EM EMN EMNL P LP NLP MNLP LINEIN LINEINDENT ALLCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 7 BLOCKIN 6 0 +) ) ) ) ) ) ) ) ) ) LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 7 7 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 7 BLOCKIN 6 0 +pages pages p pa pag page s es ges ages LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 7 BLOCKIN 6 0 +254 254 2 25 254 254 4 54 254 254 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 7 BLOCKIN 6 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 9 7 BLOCKIN 6 0 +263 263 2 26 263 263 3 63 263 263 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 7 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 7 BLOCKIN 6 0 +2008 2008 2 20 200 2008 8 08 008 2008 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 7 BLOCKIN 6 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 7 BLOCKEND 6 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 7 0 +17 17 1 17 17 17 7 17 17 17 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 7 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 7 0 +Alexander alexander A Al Ale Alex r er der nder LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 7 0 +Ratner ratner R Ra Rat Ratn r er ner tner LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 7 0 +Stephen stephen S St Ste Step n en hen phen LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 7 0 +H h H H H H H H H H LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 7 0 +Bach bach B Ba Bac Bach h ch ach Bach LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 9 BLOCKIN 7 0 +Henry henry H He Hen Henr y ry nry enry LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 7 0 +Ehrenberg ehrenberg E Eh Ehr Ehre g rg erg berg LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 7 0 +Jason jason J Ja Jas Jaso n on son ason LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 7 0 +Fries fries F Fr Fri Frie s es ies ries LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 9 BLOCKIN 7 0 +Sen sen S Se Sen Sen n en Sen Sen LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 7 0 +Wu wu W Wu Wu Wu u Wu Wu Wu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 9 BLOCKIN 7 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 7 0 +Christopher christopher C Ch Chr Chri r er her pher LINEEND ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 7 0 +Ré ré R Ré Ré Ré é Ré Ré Ré LINESTART LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 3 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 0 8 BLOCKIN 3 0 +Snorkel snorkel S Sn Sno Snor l el kel rkel LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 3 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 1 8 BLOCKIN 3 0 +Rapid rapid R Ra Rap Rapi d id pid apid LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 3 0 +training training t tr tra trai g ng ing ning LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 3 0 +data data d da dat data a ta ata data LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 3 0 +creation creation c cr cre crea n on ion tion LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 3 0 +with with w wi wit with h th ith with LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 3 0 +weak weak w we wea weak k ak eak weak LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 3 0 +supervision supervision s su sup supe n on ion sion LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 3 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 8 BLOCKIN 3 0 +In in I In In In n In In In LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 3 0 +Proceedings proceedings P Pr Pro Proc s gs ngs ings LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 3 0 +of of o of of of f of of of LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 3 0 +the the t th the the e he the the LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 3 0 +VLDB vldb V VL VLD VLDB B DB LDB VLDB LINEEND LINEINDENT ALLCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 3 0 +Endowment endowment E En End Endo t nt ent ment LINESTART LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 4 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 4 BLOCKIN 5 0 +volume volume v vo vol volu e me ume lume LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 4 BLOCKIN 5 0 +11 11 1 11 11 11 1 11 11 11 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 4 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 4 BLOCKIN 5 0 +pages pages p pa pag page s es ges ages LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 4 BLOCKIN 5 0 +269 269 2 26 269 269 9 69 269 269 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 4 BLOCKIN 5 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 8 4 BLOCKIN 5 0 +282 282 2 28 282 282 2 82 282 282 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 4 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 4 BLOCKIN 5 0 +2017 2017 2 20 201 2017 7 17 017 2017 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 4 BLOCKIN 5 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 4 BLOCKEND 5 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 8 0 +18 18 1 18 18 18 8 18 18 18 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 8 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 8 0 +Liang liang L Li Lia Lian g ng ang iang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +Ding ding D Di Din Ding g ng ing Ding LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 8 0 +Philipp philipp P Ph Phi Phil p pp ipp lipp LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 8 0 +Gentner gentner G Ge Gen Gent r er ner tner LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 8 0 +Artur artur A Ar Art Artu r ur tur rtur LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 8 0 +Duda duda D Du Dud Duda a da uda Duda LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 8 0 +Vaibhav vaibhav V Va Vai Vaib v av hav bhav LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 8 0 +Sangtani sangtani S Sa San Sang i ni ani tani LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 9 BLOCKIN 8 0 +Dominik dominik D Do Dom Domi k ik nik inik LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +Ziegler ziegler Z Zi Zie Zieg r er ler gler LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 9 BLOCKIN 8 0 +Max max M Ma Max Max x ax Max Max LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +Hennen hennen H He Hen Henn n en nen nnen LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +, , , , , , , , , , LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 8 0 +Siddharth siddharth S Si Sid Sidd h th rth arth LINESTART LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 2 0 +Jain jain J Ja Jai Jain n in ain Jain LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 2 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 2 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 2 0 +Roland roland R Ro Rol Rola d nd and land LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 2 0 +Werthschützky werthschützky W We Wer Wert y ky zky tzky LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 2 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 4 9 BLOCKIN 2 0 +Automatic automatic A Au Aut Auto c ic tic atic LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 2 0 +data data d da dat data a ta ata data LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 2 0 +labeling labeling l la lab labe g ng ing ling LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 2 0 +for for f fo for for r or for for LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 2 0 +supervised supervised s su sup supe d ed sed ised LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 2 0 +learning learning l le lea lear g ng ing ning LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 2 0 +with with w wi wit with h th ith with LINEEND LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 2 0 +applications applications a ap app appl s ns ons ions LINESTART LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 5 0 +to to t to to to o to to to LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 5 0 +visual visual v vi vis visu l al ual sual LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 5 0 +inspection inspection i in ins insp n on ion tion LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 5 0 +of of o of of of f of of of LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 5 0 +mixed mixed m mi mix mixe d ed xed ixed LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 5 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 4 9 BLOCKIN 5 0 +plastic plastic p pl pla plas c ic tic stic LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 5 0 +waste waste w wa was wast e te ste aste LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 5 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 6 9 BLOCKIN 5 0 +Journal journal J Jo Jou Jour l al nal rnal LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 5 0 +of of o of of of f of of of LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 5 0 +Cleaner cleaner C Cl Cle Clea r er ner aner LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 5 0 +Production production P Pr Pro Prod n on ion tion LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 9 BLOCKIN 5 0 +234 234 2 23 234 234 4 34 234 234 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 5 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 10 9 BLOCKIN 5 0 +1033 1033 1 10 103 1033 3 33 033 1033 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 9 BLOCKIN 5 0 +- - - - - - - - - - LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 10 9 BLOCKIN 5 0 +1044 1044 1 10 104 1044 4 44 044 1044 LINESTART LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 3 1 BLOCKIN 2 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 1 BLOCKIN 2 0 +2019 2019 2 20 201 2019 9 19 019 2019 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 8 1 BLOCKIN 2 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 1 BLOCKEND 2 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 7 0 +19 19 1 19 19 19 9 19 19 19 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 7 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 7 0 +Tomas tomas T To Tom Toma s as mas omas LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 7 0 +Mikolov mikolov M Mi Mik Miko v ov lov olov LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 7 0 +Kai kai K Ka Kai Kai i ai Kai Kai LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 7 0 +Chen chen C Ch Che Chen n en hen Chen LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 7 0 +Greg greg G Gr Gre Greg g eg reg Greg LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 7 0 +Corrado corrado C Co Cor Corr o do ado rado LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 9 BLOCKIN 7 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 7 0 +Jeffrey jeffrey J Je Jef Jeff y ey rey frey LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 7 0 +Dean dean D De Dea Dean n an ean Dean LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 7 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 6 9 BLOCKIN 7 0 +Efficient efficient E Ef Eff Effi t nt ent ient LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 7 0 +estimation estimation e es est esti n on ion tion LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 7 0 +of of o of of of f of of of LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 7 0 +word word w wo wor word d rd ord word LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 7 0 +represen represen r re rep repr n en sen esen LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 7 0 +- - - - - - - - - - LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 10 9 BLOCKIN 7 0 +tations tations t ta tat tati s ns ons ions LINESTART LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 1 0 +in in i in in in n in in in LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 1 0 +vector vector v ve vec vect r or tor ctor LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 1 0 +space space s sp spa spac e ce ace pace LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 1 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 2 9 BLOCKIN 1 0 +In in I In In In n In In In LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 1 0 +Proceedings proceedings P Pr Pro Proc s gs ngs ings LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 1 0 +of of o of of of f of of of LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 1 0 +the the t th the the e he the the LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 1 0 +International international I In Int Inte l al nal onal LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 1 0 +Conference conference C Co Con Conf e ce nce ence LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 1 0 +on on o on on on n on on on LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 1 0 +Learning learning L Le Lea Lear g ng ing ning LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 1 0 +Representations representations R Re Rep Repr s ns ons ions LINEEND LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 1 0 +( ( ( ( ( ( ( ( ( ( LINESTART LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 1 BLOCKIN 4 0 +ICLR iclr I IC ICL ICLR R LR CLR ICLR LINEIN LINEINDENT ALLCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 1 BLOCKIN 4 0 +) ) ) ) ) ) ) ) ) ) LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 4 1 BLOCKIN 4 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 1 BLOCKIN 4 0 +2013 2013 2 20 201 2013 3 13 013 2013 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 8 1 BLOCKIN 4 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 1 BLOCKEND 4 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 6 0 +20 20 2 20 20 20 0 20 20 20 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 6 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 6 0 +Jeffrey jeffrey J Je Jef Jeff y ey rey frey LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 6 0 +Pennington pennington P Pe Pen Penn n on ton gton LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 6 0 +Richard richard R Ri Ric Rich d rd ard hard LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 6 0 +Socher socher S So Soc Soch r er her cher LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 9 BLOCKIN 6 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 6 0 +Christopher christopher C Ch Chr Chri r er her pher LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 6 0 +D d D D D D D D D D LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 6 0 +Manning manning M Ma Man Mann g ng ing ning LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 6 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 9 BLOCKIN 6 0 +Glove glove G Gl Glo Glov e ve ove love LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 6 0 +: : : : : : : : : : LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 8 9 BLOCKIN 6 0 +Global global G Gl Glo Glob l al bal obal LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 6 0 +vectors vectors v ve vec vect s rs ors tors LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 6 0 +for for f fo for for r or for for LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 6 0 +word word w wo wor word d rd ord word LINEEND ALIGNEDLEFT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 6 0 +representation representation r re rep repr n on ion tion LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 1 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 1 9 BLOCKIN 1 0 +In in I In In In n In In In LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 1 0 +Proceedings proceedings P Pr Pro Proc s gs ngs ings LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 1 0 +of of o of of of f of of of LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 1 0 +the the t th the the e he the the LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 1 0 +2014 2014 2 20 201 2014 4 14 014 2014 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 4 9 BLOCKIN 1 0 +Conference conference C Co Con Conf e ce nce ence LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 1 0 +on on o on on on n on on on LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 1 0 +Empirical empirical E Em Emp Empi l al cal ical LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 1 0 +Methods methods M Me Met Meth s ds ods hods LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 1 0 +in in i in in in n in in in LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 1 0 +Natural natural N Na Nat Natu l al ral ural LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 1 0 +Language language L La Lan Lang e ge age uage LINEEND LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 1 0 +Processing processing P Pr Pro Proc g ng ing sing LINESTART LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 4 BLOCKIN 6 0 +( ( ( ( ( ( ( ( ( ( LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 2 4 BLOCKIN 6 0 +EMNLP emnlp E EM EMN EMNL P LP NLP MNLP LINEIN LINEINDENT ALLCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 4 BLOCKIN 6 0 +) ) ) ) ) ) ) ) ) ) LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 4 4 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 4 BLOCKIN 6 0 +pages pages p pa pag page s es ges ages LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 4 BLOCKIN 6 0 +1532 1532 1 15 153 1532 2 32 532 1532 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 7 4 BLOCKIN 6 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 7 4 BLOCKIN 6 0 +1543 1543 1 15 154 1543 3 43 543 1543 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 8 4 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 4 BLOCKIN 6 0 +2014 2014 2 20 201 2014 4 14 014 2014 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 4 BLOCKIN 6 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 4 BLOCKEND 6 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 5 0 +21 21 2 21 21 21 1 21 21 21 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 5 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 5 0 +Rico rico R Ri Ric Rico o co ico Rico LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 5 0 +Sennrich sennrich S Se Sen Senn h ch ich rich LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 5 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 5 0 +Barry barry B Ba Bar Barr y ry rry arry LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 5 0 +Haddow haddow H Ha Had Hadd w ow dow ddow LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 5 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 5 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 5 0 +Alexandra alexandra A Al Ale Alex a ra dra ndra LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 5 0 +Birch birch B Bi Bir Birc h ch rch irch LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 5 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 6 9 BLOCKIN 5 0 +Improving improving I Im Imp Impr g ng ing ving LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 5 0 +neural neural n ne neu neur l al ral ural LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 5 0 +machine machine m ma mac mach e ne ine hine LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 5 0 +translation translation t tr tra tran n on ion tion LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 5 0 +models models m mo mod mode s ls els dels LINEEND ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 5 0 +with with w wi wit with h th ith with LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 2 0 +monolingual monolingual m mo mon mono l al ual gual LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 2 0 +data data d da dat data a ta ata data LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 2 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 2 9 BLOCKIN 2 0 +Proceedings proceedings P Pr Pro Proc s gs ngs ings LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 2 0 +of of o of of of f of of of LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 2 0 +the the t th the the e he the the LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 2 0 +54th 54th 5 54 54t 54th h th 4th 54th LINEIN LINEINDENT NOCAPS CONTAINSDIGITS 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 2 0 +Annual annual A An Ann Annu l al ual nual LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 2 0 +Meeting meeting M Me Mee Meet g ng ing ting LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 2 0 +of of o of of of f of of of LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 2 0 +the the t th the the e he the the LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 2 0 +Association association A As Ass Asso n on ion tion LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 2 0 +for for f fo for for r or for for LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 2 0 +Computa computa C Co Com Comp a ta uta puta LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 2 0 +- - - - - - - - - - LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 10 9 BLOCKIN 2 0 +tional tional t ti tio tion l al nal onal LINESTART LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 6 BLOCKIN 7 0 +Linguistics linguistics L Li Lin Ling s cs ics tics LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 6 BLOCKIN 7 0 +( ( ( ( ( ( ( ( ( ( LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 3 6 BLOCKIN 7 0 +Volume volume V Vo Vol Volu e me ume lume LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 6 BLOCKIN 7 0 +1 1 1 1 1 1 1 1 1 1 LINEIN LINEINDENT NOCAPS ALLDIGIT 1 0 0 0 0 0 0 0 NOPUNCT 4 6 BLOCKIN 7 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 5 6 BLOCKIN 7 0 +Long long L Lo Lon Long g ng ong Long LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 5 6 BLOCKIN 7 0 +Papers papers P Pa Pap Pape s rs ers pers LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 7 6 BLOCKIN 7 0 +) ) ) ) ) ) ) ) ) ) LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 7 6 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 6 BLOCKIN 7 0 +pages pages p pa pag page s es ges ages LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 6 BLOCKIN 7 0 +86 86 8 86 86 86 6 86 86 86 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 6 BLOCKIN 7 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 9 6 BLOCKIN 7 0 +96 96 9 96 96 96 6 96 96 96 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 6 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 6 BLOCKIN 7 0 +2016 2016 2 20 201 2016 6 16 016 2016 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 6 BLOCKIN 7 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 6 BLOCKEND 7 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 9 0 +22 22 2 22 22 22 2 22 22 22 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 9 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 9 0 +Javid javid J Ja Jav Javi d id vid avid LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 9 0 +Ebrahimi ebrahimi E Eb Ebr Ebra i mi imi himi LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 9 0 +Anyi anyi A An Any Anyi i yi nyi Anyi LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 9 0 +Rao rao R Ra Rao Rao o ao Rao Rao LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 9 0 +Daniel daniel D Da Dan Dani l el iel niel LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 9 0 +Lowd lowd L Lo Low Lowd d wd owd Lowd LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 9 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 9 0 +Dejing dejing D De Dej Deji g ng ing jing LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 9 0 +Dou dou D Do Dou Dou u ou Dou Dou LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 9 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 9 BLOCKIN 9 0 +Hotflip hotflip H Ho Hot Hotf p ip lip flip LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 9 0 +: : : : : : : : : : LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 8 9 BLOCKIN 9 0 +White white W Wh Whi Whit e te ite hite LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 9 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 9 9 BLOCKIN 9 0 +box box b bo box box x ox box box LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 9 0 +adversarial adversarial a ad adv adve l al ial rial LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +ex ex e ex ex ex x ex ex ex LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +- - - - - - - - - - LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 10 9 BLOCKIN 9 0 +amples amples a am amp ampl s es les ples LINESTART LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 1 0 +for for f fo for for r or for for LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 1 0 +text text t te tex text t xt ext text LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 1 0 +classification classification c cl cla clas n on ion tion LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 1 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 3 9 BLOCKIN 1 0 +In in I In In In n In In In LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 1 0 +Proceedings proceedings P Pr Pro Proc s gs ngs ings LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 1 0 +of of o of of of f of of of LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 1 0 +the the t th the the e he the the LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 1 0 +56th 56th 5 56 56t 56th h th 6th 56th LINEIN LINEINDENT NOCAPS CONTAINSDIGITS 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 1 0 +Annual annual A An Ann Annu l al ual nual LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 1 0 +Meeting meeting M Me Mee Meet g ng ing ting LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 1 0 +of of o of of of f of of of LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 1 0 +the the t th the the e he the the LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 1 0 +Association association A As Ass Asso n on ion tion LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 1 0 +for for f fo for for r or for for LINEEND LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 1 0 +Computational computational C Co Com Comp l al nal onal LINESTART LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 6 BLOCKIN 7 0 +Linguistics linguistics L Li Lin Ling s cs ics tics LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 6 BLOCKIN 7 0 +( ( ( ( ( ( ( ( ( ( LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 4 6 BLOCKIN 7 0 +Volume volume V Vo Vol Volu e me ume lume LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 6 BLOCKIN 7 0 +2 2 2 2 2 2 2 2 2 2 LINEIN LINEINDENT NOCAPS ALLDIGIT 1 0 0 0 0 0 0 0 NOPUNCT 5 6 BLOCKIN 7 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 5 6 BLOCKIN 7 0 +Short short S Sh Sho Shor t rt ort hort LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 6 6 BLOCKIN 7 0 +Papers papers P Pa Pap Pape s rs ers pers LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 7 6 BLOCKIN 7 0 +) ) ) ) ) ) ) ) ) ) LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 7 6 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 6 BLOCKIN 7 0 +pages pages p pa pag page s es ges ages LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 6 BLOCKIN 7 0 +31 31 3 31 31 31 1 31 31 31 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 6 BLOCKIN 7 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 9 6 BLOCKIN 7 0 +36 36 3 36 36 36 6 36 36 36 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 6 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 6 BLOCKIN 7 0 +2017 2017 2 20 201 2017 7 17 017 2017 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 6 BLOCKIN 7 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 6 BLOCKEND 7 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 8 0 +23 23 2 23 23 23 3 23 23 23 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 8 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 8 0 +Tom tom T To Tom Tom m om Tom Tom LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 8 0 +B b B B B B B B B B LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +Brown brown B Br Bro Brow n wn own rown LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 8 0 +Benjamin benjamin B Be Ben Benj n in min amin LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +Mann mann M Ma Man Mann n nn ann Mann LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 8 0 +Nick nick N Ni Nic Nick k ck ick Nick LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 8 0 +Ryder ryder R Ry Ryd Ryde r er der yder LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 8 0 +Melanie melanie M Me Mel Mela e ie nie anie LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 8 0 +Subbiah subbiah S Su Sub Subb h ah iah biah LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 9 BLOCKIN 8 0 +Jared jared J Ja Jar Jare d ed red ared LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 8 0 +D d D D D D D D D D LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +Kaplan kaplan K Ka Kap Kapl n an lan plan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 9 BLOCKIN 8 0 +Prafulla prafulla P Pr Pra Praf a la lla ulla LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +Dhariwal dhariwal D Dh Dha Dhar l al wal iwal LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +, , , , , , , , , , LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 8 0 +Arvind arvind A Ar Arv Arvi d nd ind vind LINESTART LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 5 0 +Neelakantan neelakantan N Ne Nee Neel n an tan ntan LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 8 BLOCKIN 5 0 +Pranav pranav P Pr Pra Pran v av nav anav LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 5 0 +Shyam shyam S Sh Shy Shya m am yam hyam LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 8 BLOCKIN 5 0 +Girish girish G Gi Gir Giri h sh ish rish LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 5 0 +Sastry sastry S Sa Sas Sast y ry try stry LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 8 BLOCKIN 5 0 +Amanda amanda A Am Ama Aman a da nda anda LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 5 0 +Askell askell A As Ask Aske l ll ell kell LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 8 BLOCKIN 5 0 +et et e et et et t et et et LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 5 0 +al al a al al al l al al al LINEIN LINEINDENT NOCAPS NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 5 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 8 8 BLOCKIN 5 0 +Language language L La Lan Lang e ge age uage LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 5 0 +models models m mo mod mode s ls els dels LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 5 0 +are are a ar are are e re are are LINEEND LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 5 0 +few few f fe few few w ew few few LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 0 5 BLOCKIN 6 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 0 5 BLOCKIN 6 0 +shot shot s sh sho shot t ot hot shot LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 5 BLOCKIN 6 0 +learners learners l le lea lear s rs ers ners LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 5 BLOCKIN 6 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 3 5 BLOCKIN 6 0 +arXiv arxiv a ar arX arXi v iv Xiv rXiv LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 5 BLOCKIN 6 0 +preprint preprint p pr pre prep t nt int rint LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 5 BLOCKIN 6 0 +arXiv arxiv a ar arX arXi v iv Xiv rXiv LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 5 BLOCKIN 6 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 7 5 BLOCKIN 6 0 +2005 2005 2 20 200 2005 5 05 005 2005 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 8 5 BLOCKIN 6 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 8 5 BLOCKIN 6 0 +14165 14165 1 14 141 1416 5 65 165 4165 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 9 5 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 5 BLOCKIN 6 0 +2020 2020 2 20 202 2020 0 20 020 2020 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 5 BLOCKIN 6 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 5 BLOCKEND 6 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 8 BLOCKSTART 7 0 +24 24 2 24 24 24 4 24 24 24 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 7 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 8 BLOCKIN 7 0 +Tianyu tianyu T Ti Tia Tian u yu nyu anyu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 7 0 +Gao gao G Ga Gao Gao o ao Gao Gao LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 8 BLOCKIN 7 0 +Adam adam A Ad Ada Adam m am dam Adam LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 7 0 +Fisch fisch F Fi Fis Fisc h ch sch isch LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 8 BLOCKIN 7 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 7 0 +Danqi danqi D Da Dan Danq i qi nqi anqi LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 7 0 +Chen chen C Ch Che Chen n en hen Chen LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 7 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 8 BLOCKIN 7 0 +Making making M Ma Mak Maki g ng ing king LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 7 0 +pre pre p pr pre pre e re pre pre LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 7 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 7 8 BLOCKIN 7 0 +trained trained t tr tra trai d ed ned ined LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 7 0 +language language l la lan lang e ge age uage LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 7 0 +models models m mo mod mode s ls els dels LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 7 0 +better better b be bet bett r er ter tter LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 7 0 +few few f fe few few w ew few few LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 7 0 +- - - - - - - - - - LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 10 8 BLOCKIN 7 0 +shot shot s sh sho shot t ot hot shot LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 1 0 +learners learners l le lea lear s rs ers ners LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 1 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 1 9 BLOCKIN 1 0 +In in I In In In n In In In LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 1 0 +Proceedings proceedings P Pr Pro Proc s gs ngs ings LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 1 0 +of of o of of of f of of of LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 1 0 +the the t th the the e he the the LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 1 0 +59th 59th 5 59 59t 59th h th 9th 59th LINEIN LINEINDENT NOCAPS CONTAINSDIGITS 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 1 0 +Annual annual A An Ann Annu l al ual nual LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 1 0 +Meeting meeting M Me Mee Meet g ng ing ting LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 1 0 +of of o of of of f of of of LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 1 0 +the the t th the the e he the the LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 1 0 +Association association A As Ass Asso n on ion tion LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 1 0 +for for f fo for for r or for for LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 1 0 +Computational computational C Co Com Comp l al nal onal LINEEND LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 1 0 +Linguistics linguistics L Li Lin Ling s cs ics tics LINESTART LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 1 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 1 0 +the the t th the the e he the the LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 1 0 +11th 11th 1 11 11t 11th h th 1th 11th LINEIN LINEINDENT NOCAPS CONTAINSDIGITS 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 1 0 +International international I In Int Inte l al nal onal LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 1 0 +Joint joint J Jo Joi Join t nt int oint LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 1 0 +Conference conference C Co Con Conf e ce nce ence LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 1 0 +on on o on on on n on on on LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 1 0 +Natural natural N Na Nat Natu l al ral ural LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 1 0 +Language language L La Lan Lang e ge age uage LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 1 0 +Processing processing P Pr Pro Proc g ng ing sing LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 1 0 +( ( ( ( ( ( ( ( ( ( LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 10 9 BLOCKIN 1 0 +Volume volume V Vo Vol Volu e me ume lume LINEEND LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 1 0 +1 1 1 1 1 1 1 1 1 1 LINESTART LINEINDENT NOCAPS ALLDIGIT 1 0 0 0 0 0 0 0 NOPUNCT 0 3 BLOCKIN 6 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 0 3 BLOCKIN 6 0 +Long long L Lo Lon Long g ng ong Long LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 1 3 BLOCKIN 6 0 +Papers papers P Pa Pap Pape s rs ers pers LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 3 3 BLOCKIN 6 0 +) ) ) ) ) ) ) ) ) ) LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 4 3 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 3 BLOCKIN 6 0 +pages pages p pa pag page s es ges ages LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 3 BLOCKIN 6 0 +3816 3816 3 38 381 3816 6 16 816 3816 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 3 BLOCKIN 6 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 7 3 BLOCKIN 6 0 +3830 3830 3 38 383 3830 0 30 830 3830 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 3 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 3 BLOCKIN 6 0 +2021 2021 2 20 202 2021 1 21 021 2021 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 3 BLOCKIN 6 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 3 BLOCKEND 6 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 8 0 +25 25 2 25 25 25 5 25 25 25 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 8 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 8 0 +Steven steven S St Ste Stev n en ven even LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +Feng feng F Fe Fen Feng g ng eng Feng LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 8 0 +Varun varun V Va Var Varu n un run arun LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 8 0 +Gangal gangal G Ga Gan Gang l al gal ngal LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 8 0 +Jinjun jinjun J Ji Jin Jinj n un jun njun LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 8 0 +Wei wei W We Wei Wei i ei Wei Wei LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 8 0 +Yashvardhan yashvardhan Y Ya Yas Yash n an han dhan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 8 0 +Chandrasekhar chandrasekhar C Ch Cha Chan r ar har khar LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 9 BLOCKIN 8 0 +Yichong yichong Y Yi Yic Yich g ng ong hong LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 8 0 +Chen chen C Ch Che Chen n en hen Chen LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 9 BLOCKIN 8 0 +Dani dani D Da Dan Dani i ni ani Dani LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +He he H He He He e He He He LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +, , , , , , , , , , LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 8 0 +Shuyang shuyang S Sh Shu Shuy g ng ang yang LINESTART LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 5 0 +Huang huang H Hu Hua Huan g ng ang uang LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 5 0 +Faisal faisal F Fa Fai Fais l al sal isal LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 5 0 +Ladhak ladhak L La Lad Ladh k ak hak dhak LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 5 0 +Jiao jiao J Ji Jia Jiao o ao iao Jiao LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 5 0 +Lee lee L Le Lee Lee e ee Lee Lee LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 9 BLOCKIN 5 0 +Xinyi xinyi X Xi Xin Xiny i yi nyi inyi LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 5 0 +Li li L Li Li Li i Li Li Li LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 5 0 +et et e et et et t et et et LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 5 0 +al al a al al al l al al al LINEIN LINEINDENT NOCAPS NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 5 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 6 9 BLOCKIN 5 0 +A a A A A A A A A A LINEIN LINEINDENT ALLCAP NODIGIT 1 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 5 0 +survey survey s su sur surv y ey vey rvey LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 5 0 +of of o of of of f of of of LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 5 0 +data data d da dat data a ta ata data LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 5 0 +augmentation augmentation a au aug augm n on ion tion LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 5 0 +approaches approaches a ap app appr s es hes ches LINEEND LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 5 0 +for for f fo for for r or for for LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 0 4 BLOCKIN 5 0 +nlp nlp n nl nlp nlp p lp nlp nlp LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 4 BLOCKIN 5 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 1 4 BLOCKIN 5 0 +arXiv arxiv a ar arX arXi v iv Xiv rXiv LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 4 BLOCKIN 5 0 +preprint preprint p pr pre prep t nt int rint LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 4 BLOCKIN 5 0 +arXiv arxiv a ar arX arXi v iv Xiv rXiv LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 4 BLOCKIN 5 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 6 4 BLOCKIN 5 0 +2106 2106 2 21 210 2106 6 06 106 2106 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 7 4 BLOCKIN 5 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 4 BLOCKIN 5 0 +07499 07499 0 07 074 0749 9 99 499 7499 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 4 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 4 BLOCKIN 5 0 +2021 2021 2 20 202 2021 1 21 021 2021 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 4 BLOCKIN 5 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 4 BLOCKEND 5 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 8 0 +26 26 2 26 26 26 6 26 26 26 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 8 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 8 0 +Suchin suchin S Su Suc Such n in hin chin LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +Gururangan gururangan G Gu Gur Guru n an gan ngan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 8 0 +Ana ana A An Ana Ana a na Ana Ana LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +Marasović marasović M Ma Mar Mara ć ić vić ović LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 9 BLOCKIN 8 0 +Swabha swabha S Sw Swa Swab a ha bha abha LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 8 0 +Swayamdipta swayamdipta S Sw Swa Sway a ta pta ipta LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 8 0 +Kyle kyle K Ky Kyl Kyle e le yle Kyle LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 8 0 +Lo lo L Lo Lo Lo o Lo Lo Lo LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 9 BLOCKIN 8 0 +Iz iz I Iz Iz Iz z Iz Iz Iz LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +Beltagy beltagy B Be Bel Belt y gy agy tagy LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 9 BLOCKIN 8 0 +Doug doug D Do Dou Doug g ug oug Doug LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +Downey downey D Do Dow Down y ey ney wney LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +, , , , , , , , , , LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 8 0 +and and a an and and d nd and and LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 4 0 +Noah noah N No Noa Noah h ah oah Noah LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 4 0 +A a A A A A A A A A LINEIN LINEINDENT ALLCAP NODIGIT 1 0 1 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 4 0 +Smith smith S Sm Smi Smit h th ith mith LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 4 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 2 8 BLOCKIN 4 0 +Don don D Do Don Don n on Don Don LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 4 0 +' ' ' ' ' ' ' ' ' ' LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 QUOTE 2 8 BLOCKIN 4 0 +t t t t t t t t t t LINEIN LINEINDENT NOCAPS NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 4 0 +stop stop s st sto stop p op top stop LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 4 0 +pretraining pretraining p pr pre pret g ng ing ning LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 4 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 5 8 BLOCKIN 4 0 +Adapt adapt A Ad Ada Adap t pt apt dapt LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 4 0 +language language l la lan lang e ge age uage LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 4 0 +models models m mo mod mode s ls els dels LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 4 0 +to to t to to to o to to to LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 4 0 +domains domains d do dom doma s ns ins ains LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 4 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 4 0 +tasks tasks t ta tas task s ks sks asks LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 4 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 8 BLOCKIN 4 0 +In in I In In In n In In In LINEEND LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 4 0 +Proceedings proceedings P Pr Pro Proc s gs ngs ings LINESTART LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 1 0 +of of o of of of f of of of LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 1 0 +the the t th the the e he the the LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 1 0 +58th 58th 5 58 58t 58th h th 8th 58th LINEIN LINEINDENT NOCAPS CONTAINSDIGITS 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 1 0 +Annual annual A An Ann Annu l al ual nual LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 1 0 +Meeting meeting M Me Mee Meet g ng ing ting LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 1 0 +of of o of of of f of of of LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 1 0 +the the t th the the e he the the LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 1 0 +Association association A As Ass Asso n on ion tion LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 1 0 +for for f fo for for r or for for LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 1 0 +Computational computational C Co Com Comp l al nal onal LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 1 0 +Linguistics linguistics L Li Lin Ling s cs ics tics LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 1 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 1 0 +pages pages p pa pag page s es ges ages LINEEND LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 1 0 +8342 8342 8 83 834 8342 2 42 342 8342 LINESTART LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 1 BLOCKIN 3 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 2 1 BLOCKIN 3 0 +8360 8360 8 83 836 8360 0 60 360 8360 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 1 BLOCKIN 3 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 1 BLOCKIN 3 0 +2020 2020 2 20 202 2020 0 20 020 2020 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 8 1 BLOCKIN 3 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 1 BLOCKEND 3 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 7 0 +27 27 2 27 27 27 7 27 27 27 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 7 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 7 0 +Emily emily E Em Emi Emil y ly ily mily LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 7 0 +M m M M M M M M M M LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 7 0 +Bender bender B Be Ben Bend r er der nder LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 7 0 +Timnit timnit T Ti Tim Timn t it nit mnit LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 7 0 +Gebru gebru G Ge Geb Gebr u ru bru ebru LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 9 BLOCKIN 7 0 +Angelina angelina A An Ang Ange a na ina lina LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 7 0 +McMillan mcmillan M Mc McM McMi n an lan llan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 7 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 6 9 BLOCKIN 7 0 +Major major M Ma Maj Majo r or jor ajor LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 7 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 7 0 +Shmargaret shmargaret S Sh Shm Shma t et ret aret LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 7 0 +Shmitchell shmitchell S Sh Shm Shmi l ll ell hell LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 7 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 7 0 +On on O On On On n On On On LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 7 0 +the the t th the the e he the the LINEEND ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 7 0 +dangers dangers d da dan dang s rs ers gers LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 2 0 +of of o of of of f of of of LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 2 0 +stochastic stochastic s st sto stoc c ic tic stic LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 2 0 +parrots parrots p pa par parr s ts ots rots LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 2 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 3 8 BLOCKIN 2 0 +Can can C Ca Can Can n an Can Can LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 2 0 +language language l la lan lang e ge age uage LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 2 0 +models models m mo mod mode s ls els dels LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 2 0 +be be b be be be e be be be LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 2 0 +too too t to too too o oo too too LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 2 0 +big big b bi big big g ig big big LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 2 0 +? ? ? ? ? ? ? ? ? ? LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 7 8 BLOCKIN 2 0 +Proceedings proceedings P Pr Pro Proc s gs ngs ings LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 2 0 +of of o of of of f of of of LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 2 0 +the the t th the the e he the the LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 2 0 +2021 2021 2 20 202 2021 1 21 021 2021 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 8 BLOCKIN 2 0 +ACM acm A AC ACM ACM M CM ACM ACM LINEEND LINEINDENT ALLCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 2 0 +Conference conference C Co Con Conf e ce nce ence LINESTART LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 7 BLOCKIN 6 0 +on on o on on on n on on on LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 7 BLOCKIN 6 0 +Fairness fairness F Fa Fai Fair s ss ess ness LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 7 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 7 BLOCKIN 6 0 +Accountability accountability A Ac Acc Acco y ty ity lity LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 7 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 7 BLOCKIN 6 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 7 BLOCKIN 6 0 +Transparency transparency T Tr Tra Tran y cy ncy ency LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 7 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 7 BLOCKIN 6 0 +pages pages p pa pag page s es ges ages LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 7 BLOCKIN 6 0 +610 610 6 61 610 610 0 10 610 610 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 7 BLOCKIN 6 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 9 7 BLOCKIN 6 0 +623 623 6 62 623 623 3 23 623 623 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 7 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 7 BLOCKIN 6 0 +2021 2021 2 20 202 2021 1 21 021 2021 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 7 BLOCKIN 6 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 7 BLOCKEND 6 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 10 BLOCKSTART 5 0 +28 28 2 28 28 28 8 28 28 28 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 10 BLOCKIN 5 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 10 BLOCKIN 5 0 +Reuben reuben R Re Reu Reub n en ben uben LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 10 BLOCKIN 5 0 +Binns binns B Bi Bin Binn s ns nns inns LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 10 BLOCKIN 5 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 1 10 BLOCKIN 5 0 +Fairness fairness F Fa Fai Fair s ss ess ness LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 10 BLOCKIN 5 0 +in in i in in in n in in in LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 3 10 BLOCKIN 5 0 +machine machine m ma mac mach e ne ine hine LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 10 BLOCKIN 5 0 +learning learning l le lea lear g ng ing ning LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 10 BLOCKIN 5 0 +: : : : : : : : : : LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 5 10 BLOCKIN 5 0 +Lessons lessons L Le Les Less s ns ons sons LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 10 BLOCKIN 5 0 +from from f fr fro from m om rom from LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 10 BLOCKIN 5 0 +political political p po pol poli l al cal ical LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 10 BLOCKIN 5 0 +philosophy philosophy p ph phi phil y hy phy ophy LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 10 BLOCKIN 5 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 8 10 BLOCKIN 5 0 +Proceedings proceedings P Pr Pro Proc s gs ngs ings LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 10 BLOCKIN 5 0 +of of o of of of f of of of LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 10 BLOCKIN 5 0 +the the t th the the e he the the LINEEND ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 10 BLOCKIN 5 0 +2018 2018 2 20 201 2018 8 18 018 2018 LINESTART LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 0 8 BLOCKIN 6 0 +Conference conference C Co Con Conf e ce nce ence LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 6 0 +on on o on on on n on on on LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 6 0 +Fairness fairness F Fa Fai Fair s ss ess ness LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 8 BLOCKIN 6 0 +Accountability accountability A Ac Acc Acco y ty ity lity LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 8 BLOCKIN 6 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 6 0 +Transparency transparency T Tr Tra Tran y cy ncy ency LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 8 BLOCKIN 6 0 +pages pages p pa pag page s es ges ages LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 6 0 +149 149 1 14 149 149 9 49 149 149 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 6 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 9 8 BLOCKIN 6 0 +159 159 1 15 159 159 9 59 159 159 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 8 BLOCKIN 6 0 +2018 2018 2 20 201 2018 8 18 018 2018 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 8 BLOCKIN 6 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 8 BLOCKEND 6 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 8 BLOCKSTART 6 0 +29 29 2 29 29 29 9 29 29 29 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 6 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 8 BLOCKIN 6 0 +Sebastian sebastian S Se Seb Seba n an ian tian LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 6 0 +Ruder ruder R Ru Rud Rude r er der uder LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 6 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 2 8 BLOCKIN 6 0 +The the T Th The The e he The The LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 6 0 +stanford stanford s st sta stan d rd ord ford LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 6 0 +natural natural n na nat natu l al ral ural LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 6 0 +language language l la lan lang e ge age uage LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 6 0 +inference inference i in inf infe e ce nce ence LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 6 0 +( ( ( ( ( ( ( ( ( ( LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 7 8 BLOCKIN 6 0 +snli snli s sn snl snli i li nli snli LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 6 0 +) ) ) ) ) ) ) ) ) ) LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 8 8 BLOCKIN 6 0 +corpus corpus c co cor corp s us pus rpus LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 6 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 8 BLOCKIN 6 0 +arXiv arxiv a ar arX arXi v iv Xiv rXiv LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 6 0 +preprint preprint p pr pre prep t nt int rint LINEEND ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 6 0 +arXiv arxiv a ar arX arXi v iv Xiv rXiv LINESTART LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 2 BLOCKIN 4 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 2 2 BLOCKIN 4 0 +1807 1807 1 18 180 1807 7 07 807 1807 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 4 2 BLOCKIN 4 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 4 2 BLOCKIN 4 0 +03519 03519 0 03 035 0351 9 19 519 3519 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 2 BLOCKIN 4 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 2 BLOCKIN 4 0 +2021 2021 2 20 202 2021 1 21 021 2021 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 9 2 BLOCKIN 4 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 2 BLOCKEND 4 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 8 0 +30 30 3 30 30 30 0 30 30 30 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 8 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 8 0 +Pradeep pradeep P Pr Pra Prad p ep eep deep LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +Rajan rajan R Ra Raj Raja n an jan ajan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 8 0 +Krishna krishna K Kr Kri Kris a na hna shna LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +Vyas vyas V Vy Vya Vyas s as yas Vyas LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 8 0 +Rajiv rajiv R Ra Raj Raji v iv jiv ajiv LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 8 0 +Bansal bansal B Ba Ban Bans l al sal nsal LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 8 0 +Ranjan ranjan R Ra Ran Ranj n an jan njan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 8 0 +Sharma sharma S Sh Sha Shar a ma rma arma LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 9 BLOCKIN 8 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 8 0 +Shubhranshu shubhranshu S Sh Shu Shub u hu shu nshu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 8 0 +Mukherjee mukherjee M Mu Muk Mukh e ee jee rjee LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 8 0 +Ma ma M Ma Ma Ma a Ma Ma Ma LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +- - - - - - - - - - LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 10 9 BLOCKIN 8 0 +chine chine c ch chi chin e ne ine hine LINESTART LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 7 BLOCKIN 8 0 +learning learning l le lea lear g ng ing ning LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 7 BLOCKIN 8 0 +for for f fo for for r or for for LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 7 BLOCKIN 8 0 +data data d da dat data a ta ata data LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 7 BLOCKIN 8 0 +preprocessing preprocessing p pr pre prep g ng ing sing LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 7 BLOCKIN 8 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 7 BLOCKIN 8 0 +Journal journal J Jo Jou Jour l al nal rnal LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 7 BLOCKIN 8 0 +of of o of of of f of of of LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 7 BLOCKIN 8 0 +Big big B Bi Big Big g ig Big Big LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 7 BLOCKIN 8 0 +Data data D Da Dat Data a ta ata Data LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 7 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 7 BLOCKIN 8 0 +6 6 6 6 6 6 6 6 6 6 LINEIN LINEINDENT NOCAPS ALLDIGIT 1 0 0 0 0 0 0 0 NOPUNCT 8 7 BLOCKIN 8 0 +( ( ( ( ( ( ( ( ( ( LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 9 7 BLOCKIN 8 0 +1 1 1 1 1 1 1 1 1 1 LINEIN LINEINDENT NOCAPS ALLDIGIT 1 0 0 0 0 0 0 0 NOPUNCT 9 7 BLOCKIN 8 0 +) ) ) ) ) ) ) ) ) ) LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 9 7 BLOCKIN 8 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 9 7 BLOCKIN 8 0 +1 1 1 1 1 1 1 1 1 1 LINEIN LINEINDENT NOCAPS ALLDIGIT 1 0 0 0 0 0 0 0 NOPUNCT 9 7 BLOCKIN 8 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 9 7 BLOCKIN 8 0 +25 25 2 25 25 25 5 25 25 25 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 7 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 7 BLOCKIN 8 0 +2019 2019 2 20 201 2019 9 19 019 2019 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 7 BLOCKIN 8 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 7 BLOCKEND 8 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 7 0 +31 31 3 31 31 31 1 31 31 31 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 7 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 7 0 +Nitesh nitesh N Ni Nit Nite h sh esh tesh LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 7 0 +V v V V V V V V V V LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 7 0 +Chawla chawla C Ch Cha Chaw a la wla awla LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 7 0 +Kevin kevin K Ke Kev Kevi n in vin evin LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 7 0 +W w W W W W W W W W LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 7 0 +Bowyer bowyer B Bo Bow Bowy r er yer wyer LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 9 BLOCKIN 7 0 +Lawrence lawrence L La Law Lawr e ce nce ence LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 7 0 +O o O O O O O O O O LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 7 0 +Hall hall H Ha Hal Hall l ll all Hall LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 7 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 7 0 +W w W W W W W W W W LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 7 0 +Philip philip P Ph Phi Phil p ip lip ilip LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 7 0 +Kegelmeyer kegelmeyer K Ke Keg Kege r er yer eyer LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 7 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 9 BLOCKIN 7 0 +Smote smote S Sm Smo Smot e te ote mote LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 7 0 +: : : : : : : : : : LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 10 9 BLOCKIN 7 0 +synthetic synthetic s sy syn synt c ic tic etic LINEEND ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 7 0 +minority minority m mi min mino y ty ity rity LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 7 0 +over over o ov ove over r er ver over LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 7 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 1 9 BLOCKIN 7 0 +sampling sampling s sa sam samp g ng ing ling LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 7 0 +technique technique t te tec tech e ue que ique LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 7 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 3 9 BLOCKIN 7 0 +Journal journal J Jo Jou Jour l al nal rnal LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 7 0 +of of o of of of f of of of LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 7 0 +Artificial artificial A Ar Art Arti l al ial cial LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 7 0 +Intelligence intelligence I In Int Inte e ce nce ence LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 7 0 +Research research R Re Res Rese h ch rch arch LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 9 BLOCKIN 7 0 +16 16 1 16 16 16 6 16 16 16 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 7 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 9 9 BLOCKIN 7 0 +321 321 3 32 321 321 1 21 321 321 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 7 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 9 9 BLOCKIN 7 0 +357 357 3 35 357 357 7 57 357 357 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 7 0 +2002 2002 2 20 200 2002 2 02 002 2002 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 9 BLOCKIN 7 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKEND 7 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 8 BLOCKSTART 3 0 +32 32 3 32 32 32 2 32 32 32 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 3 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 8 BLOCKIN 3 0 +Connor connor C Co Con Conn r or nor nnor LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 3 0 +Shorten shorten S Sh Sho Shor n en ten rten LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 3 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 3 0 +Taghi taghi T Ta Tag Tagh i hi ghi aghi LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 3 0 +M m M M M M M M M M LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 3 0 +Khoshgoftaar khoshgoftaar K Kh Kho Khos r ar aar taar LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 3 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 8 BLOCKIN 3 0 +A a A A A A A A A A LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 1 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 3 0 +survey survey s su sur surv y ey vey rvey LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 3 0 +on on o on on on n on on on LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 3 0 +image image i im ima imag e ge age mage LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 3 0 +data data d da dat data a ta ata data LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 3 0 +augmentation augmentation a au aug augm n on ion tion LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 3 0 +for for f fo for for r or for for LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 3 0 +deep deep d de dee deep p ep eep deep LINEEND ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 3 0 +learning learning l le lea lear g ng ing ning LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 4 BLOCKIN 8 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 1 4 BLOCKIN 8 0 +Journal journal J Jo Jou Jour l al nal rnal LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 4 BLOCKIN 8 0 +of of o of of of f of of of LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 4 BLOCKIN 8 0 +Big big B Bi Big Big g ig Big Big LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 4 BLOCKIN 8 0 +Data data D Da Dat Data a ta ata Data LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 4 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 4 BLOCKIN 8 0 +6 6 6 6 6 6 6 6 6 6 LINEIN LINEINDENT NOCAPS ALLDIGIT 1 0 0 0 0 0 0 0 NOPUNCT 7 4 BLOCKIN 8 0 +( ( ( ( ( ( ( ( ( ( LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 7 4 BLOCKIN 8 0 +1 1 1 1 1 1 1 1 1 1 LINEIN LINEINDENT NOCAPS ALLDIGIT 1 0 0 0 0 0 0 0 NOPUNCT 7 4 BLOCKIN 8 0 +) ) ) ) ) ) ) ) ) ) LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 8 4 BLOCKIN 8 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 8 4 BLOCKIN 8 0 +1 1 1 1 1 1 1 1 1 1 LINEIN LINEINDENT NOCAPS ALLDIGIT 1 0 0 0 0 0 0 0 NOPUNCT 8 4 BLOCKIN 8 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 8 4 BLOCKIN 8 0 +48 48 4 48 48 48 8 48 48 48 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 4 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 4 BLOCKIN 8 0 +2019 2019 2 20 201 2019 9 19 019 2019 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 4 BLOCKIN 8 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 4 BLOCKEND 8 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 8 BLOCKSTART 7 0 +33 33 3 33 33 33 3 33 33 33 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 7 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 8 BLOCKIN 7 0 +Alexander alexander A Al Ale Alex r er der nder LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 7 0 +Ratner ratner R Ra Rat Ratn r er ner tner LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 8 BLOCKIN 7 0 +Henry henry H He Hen Henr y ry nry enry LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 7 0 +Ehrenberg ehrenberg E Eh Ehr Ehre g rg erg berg LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 8 BLOCKIN 7 0 +Zeshan zeshan Z Ze Zes Zesh n an han shan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 7 0 +Hussain hussain H Hu Hus Huss n in ain sain LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 8 BLOCKIN 7 0 +Jared jared J Ja Jar Jare d ed red ared LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 7 0 +Dunnmon dunnmon D Du Dun Dunn n on mon nmon LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 8 BLOCKIN 7 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 7 0 +Christopher christopher C Ch Chr Chri r er her pher LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 7 0 +Ré ré R Ré Ré Ré é Ré Ré Ré LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 7 0 +. . . . . . . . . . LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 8 BLOCKIN 7 0 +Snorkel snorkel S Sn Sno Snor l el kel rkel LINESTART LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 3 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 0 9 BLOCKIN 3 0 +Rapid rapid R Ra Rap Rapi d id pid apid LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 3 0 +training training t tr tra trai g ng ing ning LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 3 0 +data data d da dat data a ta ata data LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 3 0 +creation creation c cr cre crea n on ion tion LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 3 0 +with with w wi wit with h th ith with LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 3 0 +weak weak w we wea weak k ak eak weak LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 3 0 +supervision supervision s su sup supe n on ion sion LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 3 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 6 9 BLOCKIN 3 0 +Proceedings proceedings P Pr Pro Proc s gs ngs ings LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 3 0 +of of o of of of f of of of LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 3 0 +the the t th the the e he the the LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 3 0 +VLDB vldb V VL VLD VLDB B DB LDB VLDB LINEIN LINEINDENT ALLCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 3 0 +Endowment endowment E En End Endo t nt ent ment LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 3 0 +, , , , , , , , , , LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 3 0 +11 11 1 11 11 11 1 11 11 11 LINESTART LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 2 BLOCKIN 6 0 +( ( ( ( ( ( ( ( ( ( LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 1 2 BLOCKIN 6 0 +3 3 3 3 3 3 3 3 3 3 LINEIN LINEINDENT NOCAPS ALLDIGIT 1 0 0 0 0 0 0 0 NOPUNCT 1 2 BLOCKIN 6 0 +) ) ) ) ) ) ) ) ) ) LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 2 2 BLOCKIN 6 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 2 2 BLOCKIN 6 0 +269 269 2 26 269 269 9 69 269 269 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 2 BLOCKIN 6 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 4 2 BLOCKIN 6 0 +282 282 2 28 282 282 2 82 282 282 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 2 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 2 BLOCKIN 6 0 +2020 2020 2 20 202 2020 0 20 020 2020 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 9 2 BLOCKIN 6 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 2 BLOCKEND 6 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 6 0 +34 34 3 34 34 34 4 34 34 34 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 6 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 6 0 +Solon solon S So Sol Solo n on lon olon LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 6 0 +Barocas barocas B Ba Bar Baro s as cas ocas LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 6 0 +Moritz moritz M Mo Mor Mori z tz itz ritz LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 6 0 +Hardt hardt H Ha Har Hard t dt rdt ardt LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 6 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 6 0 +Arvind arvind A Ar Arv Arvi d nd ind vind LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 6 0 +Narayanan narayanan N Na Nar Nara n an nan anan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 6 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 6 9 BLOCKIN 6 0 +Fairness fairness F Fa Fai Fair s ss ess ness LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 6 0 +in in i in in in n in in in LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 6 0 +machine machine m ma mac mach e ne ine hine LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 6 0 +learning learning l le lea lear g ng ing ning LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 6 0 +: : : : : : : : : : LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 9 9 BLOCKIN 6 0 +Lessons lessons L Le Les Less s ns ons sons LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 6 0 +from from f fr fro from m om rom from LINEEND ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 6 0 +political political p po pol poli l al cal ical LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 3 0 +philosophy philosophy p ph phi phil y hy phy ophy LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 3 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 2 9 BLOCKIN 3 0 +In in I In In In n In In In LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 3 0 +Proceedings proceedings P Pr Pro Proc s gs ngs ings LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 3 0 +of of o of of of f of of of LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 3 0 +the the t th the the e he the the LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 3 0 +2017 2017 2 20 201 2017 7 17 017 2017 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 5 9 BLOCKIN 3 0 +ACM acm A AC ACM ACM M CM ACM ACM LINEIN LINEINDENT ALLCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 3 0 +on on o on on on n on on on LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 3 0 +Conference conference C Co Con Conf e ce nce ence LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 3 0 +on on o on on on n on on on LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 3 0 +Fairness fairness F Fa Fai Fair s ss ess ness LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 3 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 9 BLOCKIN 3 0 +Accountability accountability A Ac Acc Acco y ty ity lity LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 3 0 +, , , , , , , , , , LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 3 0 +and and a an and and d nd and and LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 0 3 BLOCKIN 4 0 +Transparency transparency T Tr Tra Tran y cy ncy ency LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 3 BLOCKIN 4 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 3 BLOCKIN 4 0 +pages pages p pa pag page s es ges ages LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 3 BLOCKIN 4 0 +149 149 1 14 149 149 9 49 149 149 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 3 BLOCKIN 4 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 7 3 BLOCKIN 4 0 +159 159 1 15 159 159 9 59 159 159 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 3 BLOCKIN 4 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 3 BLOCKIN 4 0 +2017 2017 2 20 201 2017 7 17 017 2017 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 3 BLOCKIN 4 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 3 BLOCKEND 4 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 8 0 +35 35 3 35 35 35 5 35 35 35 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 8 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 8 0 +Thomas thomas T Th Tho Thom s as mas omas LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +Wolf wolf W Wo Wol Wolf f lf olf Wolf LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 8 0 +Lysandre lysandre L Ly Lys Lysa e re dre ndre LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +Debut debut D De Deb Debu t ut but ebut LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 8 0 +Victor victor V Vi Vic Vict r or tor ctor LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 8 0 +Sanh sanh S Sa San Sanh h nh anh Sanh LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 8 0 +Julien julien J Ju Jul Juli n en ien lien LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 8 0 +Chaumond chaumond C Ch Cha Chau d nd ond mond LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 9 BLOCKIN 8 0 +Clement clement C Cl Cle Clem t nt ent ment LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +Delangue delangue D De Del Dela e ue gue ngue LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 9 BLOCKIN 8 0 +Anthony anthony A An Ant Anth y ny ony hony LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +Moi moi M Mo Moi Moi i oi Moi Moi LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +, , , , , , , , , , LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 8 0 +Pierric pierric P Pi Pie Pier c ic ric rric LINESTART LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 9 0 +Cistac cistac C Ci Cis Cist c ac tac stac LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 9 0 +Tim tim T Ti Tim Tim m im Tim Tim LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 9 0 +Rault rault R Ra Rau Raul t lt ult ault LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 9 0 +Rémi rémi R Ré Rém Rémi i mi émi Rémi LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 9 0 +Louf louf L Lo Lou Louf f uf ouf Louf LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 9 BLOCKIN 9 0 +Morgan morgan M Mo Mor Morg n an gan rgan LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 9 0 +Funtowicz funtowicz F Fu Fun Funt z cz icz wicz LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 9 0 +et et e et et et t et et et LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 9 0 +al al a al al al l al al al LINEIN LINEINDENT NOCAPS NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 9 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 9 BLOCKIN 9 0 +Transformers transformers T Tr Tra Tran s rs ers mers LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 9 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 9 9 BLOCKIN 9 0 +State state S St Sta Stat e te ate tate LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 9 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 9 9 BLOCKIN 9 0 +of of o of of of f of of of LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 10 9 BLOCKIN 9 0 +the the t th the the e he the the LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 10 9 BLOCKIN 9 0 +art art a ar art art t rt art art LINEEND LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +natural natural n na nat natu l al ral ural LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 1 0 +language language l la lan lang e ge age uage LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 1 0 +processing processing p pr pro proc g ng ing sing LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 1 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 3 9 BLOCKIN 1 0 +Proceedings proceedings P Pr Pro Proc s gs ngs ings LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 1 0 +of of o of of of f of of of LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 1 0 +the the t th the the e he the the LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 1 0 +2020 2020 2 20 202 2020 0 20 020 2020 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 5 9 BLOCKIN 1 0 +Conference conference C Co Con Conf e ce nce ence LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 1 0 +on on o on on on n on on on LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 1 0 +Empirical empirical E Em Emp Empi l al cal ical LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 1 0 +Methods methods M Me Met Meth s ds ods hods LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 1 0 +in in i in in in n in in in LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 1 0 +Natural natural N Na Nat Natu l al ral ural LINEEND LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 1 0 +Language language L La Lan Lang e ge age uage LINESTART LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 6 BLOCKIN 5 0 +Processing processing P Pr Pro Proc g ng ing sing LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 6 BLOCKIN 5 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 3 6 BLOCKIN 5 0 +System system S Sy Sys Syst m em tem stem LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 6 BLOCKIN 5 0 +Demonstrations demonstrations D De Dem Demo s ns ons ions LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 6 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 6 BLOCKIN 5 0 +pages pages p pa pag page s es ges ages LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 6 BLOCKIN 5 0 +38 38 3 38 38 38 8 38 38 38 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 6 BLOCKIN 5 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 9 6 BLOCKIN 5 0 +45 45 4 45 45 45 5 45 45 45 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 6 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 6 BLOCKIN 5 0 +2020 2020 2 20 202 2020 0 20 020 2020 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 6 BLOCKIN 5 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 6 BLOCKEND 5 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 8 BLOCKSTART 8 0 +36 36 3 36 36 36 6 36 36 36 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 8 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 8 BLOCKIN 8 0 +Adam adam A Ad Ada Adam m am dam Adam LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 8 0 +Paszke paszke P Pa Pas Pasz e ke zke szke LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 8 BLOCKIN 8 0 +Sam sam S Sa Sam Sam m am Sam Sam LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 8 0 +Gross gross G Gr Gro Gros s ss oss ross LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 8 BLOCKIN 8 0 +Francisco francisco F Fr Fra Fran o co sco isco LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 8 0 +Massa massa M Ma Mas Mass a sa ssa assa LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 8 BLOCKIN 8 0 +Adam adam A Ad Ada Adam m am dam Adam LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 8 0 +Lerer lerer L Le Ler Lere r er rer erer LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 8 BLOCKIN 8 0 +James james J Ja Jam Jame s es mes ames LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 8 0 +Bradbury bradbury B Br Bra Brad y ry ury bury LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 8 BLOCKIN 8 0 +Gregory gregory G Gr Gre Greg y ry ory gory LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 8 0 +Chanan chanan C Ch Cha Chan n an nan anan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 8 0 +, , , , , , , , , , LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 8 BLOCKIN 8 0 +Trevor trevor T Tr Tre Trev r or vor evor LINESTART LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 7 0 +Killeen killeen K Ki Kil Kill n en een leen LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 7 0 +Zeming zeming Z Ze Zem Zemi g ng ing ming LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 7 0 +Lin lin L Li Lin Lin n in Lin Lin LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 7 0 +Natalia natalia N Na Nat Nata a ia lia alia LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 7 0 +Gimelshein gimelshein G Gi Gim Gime n in ein hein LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 7 0 +Luca luca L Lu Luc Luca a ca uca Luca LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 7 0 +Antiga antiga A An Ant Anti a ga iga tiga LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 7 0 +et et e et et et t et et et LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 7 0 +al al a al al al l al al al LINEIN LINEINDENT NOCAPS NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 7 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 9 BLOCKIN 7 0 +Pytorch pytorch P Py Pyt Pyto h ch rch orch LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 7 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 8 9 BLOCKIN 7 0 +An an A An An An n An An An LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 7 0 +imperative imperative i im imp impe e ve ive tive LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 7 0 +style style s st sty styl e le yle tyle LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 7 0 +, , , , , , , , , , LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 7 0 +high high h hi hig high h gh igh high LINESTART LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 4 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 0 9 BLOCKIN 4 0 +performance performance p pe per perf e ce nce ance LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 4 0 +deep deep d de dee deep p ep eep deep LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 4 0 +learning learning l le lea lear g ng ing ning LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 4 0 +library library l li lib libr y ry ary rary LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 4 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 4 9 BLOCKIN 4 0 +Advances advances A Ad Adv Adva s es ces nces LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 4 0 +in in i in in in n in in in LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 4 0 +Neural neural N Ne Neu Neur l al ral ural LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 4 0 +Information information I In Inf Info n on ion tion LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 4 0 +Processing processing P Pr Pro Proc g ng ing sing LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 4 0 +Systems systems S Sy Sys Syst s ms ems tems LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 4 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 4 0 +32 32 3 32 32 32 2 32 32 32 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 4 0 +, , , , , , , , , , LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 4 0 +2019 2019 2 20 201 2019 9 19 019 2019 LINESTART LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 5 0 BLOCKIN 1 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 0 BLOCKEND 1 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 8 0 +37 37 3 37 37 37 7 37 37 37 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 8 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 8 0 +Martín martín M Ma Mar Mart n ín tín rtín LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +Abadi abadi A Ab Aba Abad i di adi badi LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 8 0 +Ashish ashish A As Ash Ashi h sh ish hish LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 8 0 +Agarwal agarwal A Ag Aga Agar l al wal rwal LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 8 0 +Paul paul P Pa Pau Paul l ul aul Paul LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 8 0 +Barham barham B Ba Bar Barh m am ham rham LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 8 0 +Eugene eugene E Eu Eug Euge e ne ene gene LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 8 0 +Brevdo brevdo B Br Bre Brev o do vdo evdo LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 9 BLOCKIN 8 0 +Zhifeng zhifeng Z Zh Zhi Zhif g ng eng feng LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +Chen chen C Ch Che Chen n en hen Chen LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 9 BLOCKIN 8 0 +Craig craig C Cr Cra Crai g ig aig raig LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 8 0 +Citro citro C Ci Cit Citr o ro tro itro LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 8 0 +Greg greg G Gr Gre Greg g eg reg Greg LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +S s S S S S S S S S LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +Corrado corrado C Co Cor Corr o do ado rado LINESTART LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 0 8 BLOCKIN 7 0 +Andy andy A An And Andy y dy ndy Andy LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 7 0 +Davis davis D Da Dav Davi s is vis avis LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 8 BLOCKIN 7 0 +Jeffrey jeffrey J Je Jef Jeff y ey rey frey LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 7 0 +Dean dean D De Dea Dean n an ean Dean LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 8 BLOCKIN 7 0 +Matthieu matthieu M Ma Mat Matt u eu ieu hieu LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 7 0 +Devin devin D De Dev Devi n in vin evin LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 8 BLOCKIN 7 0 +et et e et et et t et et et LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 7 0 +al al a al al al l al al al LINEIN LINEINDENT NOCAPS NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 7 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 8 BLOCKIN 7 0 +Tensorflow tensorflow T Te Ten Tens w ow low flow LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 7 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 8 8 BLOCKIN 7 0 +Large large L La Lar Larg e ge rge arge LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 7 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 9 8 BLOCKIN 7 0 +scale scale s sc sca scal e le ale cale LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 7 0 +machine machine m ma mac mach e ne ine hine LINEEND LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 7 0 +learning learning l le lea lear g ng ing ning LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 5 0 +on on o on on on n on on on LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 5 0 +heterogeneous heterogeneous h he het hete s us ous eous LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 5 0 +distributed distributed d di dis dist d ed ted uted LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 5 0 +systems systems s sy sys syst s ms ems tems LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 5 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 8 BLOCKIN 5 0 +arXiv arxiv a ar arX arXi v iv Xiv rXiv LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 5 0 +preprint preprint p pr pre prep t nt int rint LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 5 0 +arXiv arxiv a ar arX arXi v iv Xiv rXiv LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 5 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 8 8 BLOCKIN 5 0 +1603 1603 1 16 160 1603 3 03 603 1603 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 9 8 BLOCKIN 5 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 8 BLOCKIN 5 0 +04467 04467 0 04 044 0446 7 67 467 4467 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 8 BLOCKIN 5 0 +2015 2015 2 20 201 2015 5 15 015 2015 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 8 BLOCKIN 5 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 8 BLOCKEND 5 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 9 0 +38 38 3 38 38 38 8 38 38 38 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 9 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 9 0 +Jacob jacob J Ja Jac Jaco b ob cob acob LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 9 0 +Devlin devlin D De Dev Devl n in lin vlin LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 9 0 +Ming ming M Mi Min Ming g ng ing Ming LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 9 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 2 9 BLOCKIN 9 0 +Wei wei W We Wei Wei i ei Wei Wei LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 9 0 +Chang chang C Ch Cha Chan g ng ang hang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 9 0 +Kenton kenton K Ke Ken Kent n on ton nton LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 9 0 +Lee lee L Le Lee Lee e ee Lee Lee LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 9 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 9 0 +Kristina kristina K Kr Kri Kris a na ina tina LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 9 0 +Toutanova toutanova T To Tou Tout a va ova nova LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 9 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 9 BLOCKIN 9 0 +Bert bert B Be Ber Bert t rt ert Bert LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 9 0 +: : : : : : : : : : LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 8 9 BLOCKIN 9 0 +Pre pre P Pr Pre Pre e re Pre Pre LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 9 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 9 9 BLOCKIN 9 0 +training training t tr tra trai g ng ing ning LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +of of o of of of f of of of LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +deep deep d de dee deep p ep eep deep LINEEND ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +bidirectional bidirectional b bi bid bidi l al nal onal LINESTART LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 5 0 +transformers transformers t tr tra tran s rs ers mers LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 5 0 +for for f fo for for r or for for LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 5 0 +language language l la lan lang e ge age uage LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 5 0 +understanding understanding u un und unde g ng ing ding LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 5 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 6 9 BLOCKIN 5 0 +arXiv arxiv a ar arX arXi v iv Xiv rXiv LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 5 0 +preprint preprint p pr pre prep t nt int rint LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 5 0 +arXiv arxiv a ar arX arXi v iv Xiv rXiv LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 5 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 8 9 BLOCKIN 5 0 +1810 1810 1 18 181 1810 0 10 810 1810 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 9 9 BLOCKIN 5 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 9 BLOCKIN 5 0 +04805 04805 0 04 048 0480 5 05 805 4805 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 9 BLOCKIN 5 0 +2018 2018 2 20 201 2018 8 18 018 2018 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 9 BLOCKIN 5 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKEND 5 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 9 0 +39 39 3 39 39 39 9 39 39 39 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 9 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 9 0 +Yinhan yinhan Y Yi Yin Yinh n an han nhan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 9 0 +Liu liu L Li Liu Liu u iu Liu Liu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 9 0 +Myle myle M My Myl Myle e le yle Myle LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 9 0 +Ott ott O Ot Ott Ott t tt Ott Ott LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 9 0 +Naman naman N Na Nam Nama n an man aman LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 9 0 +Goyal goyal G Go Goy Goya l al yal oyal LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 9 BLOCKIN 9 0 +Jingfei jingfei J Ji Jin Jing i ei fei gfei LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 9 0 +Du du D Du Du Du u Du Du Du LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 9 0 +Mandar mandar M Ma Man Mand r ar dar ndar LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 9 0 +Joshi joshi J Jo Jos Josh i hi shi oshi LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 9 BLOCKIN 9 0 +Danqi danqi D Da Dan Danq i qi nqi anqi LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 9 0 +Chen chen C Ch Che Chen n en hen Chen LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 9 BLOCKIN 9 0 +Omer omer O Om Ome Omer r er mer Omer LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +Levy levy L Le Lev Levy y vy evy Levy LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 9 0 +Mike mike M Mi Mik Mike e ke ike Mike LINEEND ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +Lewis lewis L Le Lew Lewi s is wis ewis LINESTART LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 4 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 0 9 BLOCKIN 4 0 +Luke luke L Lu Luk Luke e ke uke Luke LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 4 0 +Zettlemoyer zettlemoyer Z Ze Zet Zett r er yer oyer LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 4 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 4 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 4 0 +Veselin veselin V Ve Ves Vese n in lin elin LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 4 0 +Stoyanov stoyanov S St Sto Stoy v ov nov anov LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 4 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 9 BLOCKIN 4 0 +Roberta roberta R Ro Rob Robe a ta rta erta LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 4 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 6 9 BLOCKIN 4 0 +A a A A A A A A A A LINEIN LINEINDENT ALLCAP NODIGIT 1 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 4 0 +robustly robustly r ro rob robu y ly tly stly LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 4 0 +optimized optimized o op opt opti d ed zed ized LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 4 0 +bert bert b be ber bert t rt ert bert LINEIN LINEINDENT NOCAPS NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 4 0 +pretraining pretraining p pr pre pret g ng ing ning LINEEND LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 4 0 +approach approach a ap app appr h ch ach oach LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 4 BLOCKIN 5 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 1 4 BLOCKIN 5 0 +arXiv arxiv a ar arX arXi v iv Xiv rXiv LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 4 BLOCKIN 5 0 +preprint preprint p pr pre prep t nt int rint LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 4 BLOCKIN 5 0 +arXiv arxiv a ar arX arXi v iv Xiv rXiv LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 4 BLOCKIN 5 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 6 4 BLOCKIN 5 0 +1907 1907 1 19 190 1907 7 07 907 1907 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 7 4 BLOCKIN 5 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 4 BLOCKIN 5 0 +11692 11692 1 11 116 1169 2 92 692 1692 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 8 4 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 4 BLOCKIN 5 0 +2019 2019 2 20 201 2019 9 19 019 2019 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 4 BLOCKIN 5 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 4 BLOCKEND 5 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 9 0 +40 40 4 40 40 40 0 40 40 40 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 9 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 9 0 +Sheng sheng S Sh She Shen g ng eng heng LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 9 0 +Shen shen S Sh She Shen n en hen Shen LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 9 0 +Zhewei zhewei Z Zh Zhe Zhew i ei wei ewei LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 9 0 +Dong dong D Do Don Dong g ng ong Dong LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 9 0 +Xiaocheng xiaocheng X Xi Xia Xiao g ng eng heng LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 9 0 +Ye ye Y Ye Ye Ye e Ye Ye Ye LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 9 0 +Linjian linjian L Li Lin Linj n an ian jian LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 9 0 +Ma ma M Ma Ma Ma a Ma Ma Ma LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 9 0 +Zhewei zhewei Z Zh Zhe Zhew i ei wei ewei LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 9 0 +Li li L Li Li Li i Li Li Li LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 9 BLOCKIN 9 0 +Zirui zirui Z Zi Zir Ziru i ui rui irui LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 9 0 +Wang wang W Wa Wan Wang g ng ang Wang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 9 BLOCKIN 9 0 +Samyam samyam S Sa Sam Samy m am yam myam LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +Rajbhan rajbhan R Ra Raj Rajb n an han bhan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +- - - - - - - - - - LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 10 9 BLOCKIN 9 0 +dari dari d da dar dari i ri ari dari LINESTART LINEINDENT NOCAPS NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 0 9 BLOCKIN 5 0 +Yuxiong yuxiong Y Yu Yux Yuxi g ng ong iong LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 5 0 +Wang wang W Wa Wan Wang g ng ang Wang LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 5 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 5 0 +Zhen zhen Z Zh Zhe Zhen n en hen Zhen LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 5 0 +Yang yang Y Ya Yan Yang g ng ang Yang LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 5 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 4 9 BLOCKIN 5 0 +Q q Q Q Q Q Q Q Q Q LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 5 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 4 9 BLOCKIN 5 0 +bert bert b be ber bert t rt ert bert LINEIN LINEINDENT NOCAPS NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 5 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 5 9 BLOCKIN 5 0 +Hessian hessian H He Hes Hess n an ian sian LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 5 0 +based based b ba bas base d ed sed ased LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 5 0 +ultra ultra u ul ult ultr a ra tra ltra LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 5 0 +low low l lo low low w ow low low LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 5 0 +precision precision p pr pre prec n on ion sion LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 5 0 +quantization quantization q qu qua quan n on ion tion LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 5 0 +of of o of of of f of of of LINEEND LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 5 0 +bert bert b be ber bert t rt ert bert LINESTART LINEINDENT NOCAPS NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 8 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 0 9 BLOCKIN 8 0 +Proceedings proceedings P Pr Pro Proc s gs ngs ings LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +of of o of of of f of of of LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 8 0 +the the t th the the e he the the LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 8 0 +AAAI aaai A AA AAA AAAI I AI AAI AAAI LINEIN LINEINDENT ALLCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +Conference conference C Co Con Conf e ce nce ence LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 8 0 +on on o on on on n on on on LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 8 0 +Artificial artificial A Ar Art Arti l al ial cial LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 8 0 +Intelligence intelligence I In Int Inte e ce nce ence LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 9 BLOCKIN 8 0 +34 34 3 34 34 34 4 34 34 34 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +( ( ( ( ( ( ( ( ( ( LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 8 9 BLOCKIN 8 0 +05 05 0 05 05 05 5 05 05 05 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +) ) ) ) ) ) ) ) ) ) LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 8 9 BLOCKIN 8 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 9 9 BLOCKIN 8 0 +8815 8815 8 88 881 8815 5 15 815 8815 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 8 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 9 9 BLOCKIN 8 0 +8821 8821 8 88 882 8821 1 21 821 8821 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 8 0 +2020 2020 2 20 202 2020 0 20 020 2020 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKEND 8 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 8 0 +41 41 4 41 41 41 1 41 41 41 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 8 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 8 0 +Alec alec A Al Ale Alec c ec lec Alec LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +Radford radford R Ra Rad Radf d rd ord ford LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 8 0 +Jeffrey jeffrey J Je Jef Jeff y ey rey frey LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 8 0 +Wu wu W Wu Wu Wu u Wu Wu Wu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 8 0 +Rewon rewon R Re Rew Rewo n on won ewon LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 8 0 +Child child C Ch Chi Chil d ld ild hild LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 9 BLOCKIN 8 0 +David david D Da Dav Davi d id vid avid LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 8 0 +Luan luan L Lu Lua Luan n an uan Luan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 8 0 +Dario dario D Da Dar Dari o io rio ario LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 8 0 +Amodei amodei A Am Amo Amod i ei dei odei LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 9 BLOCKIN 8 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +Ilya ilya I Il Ily Ilya a ya lya Ilya LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 8 0 +Sutskever sutskever S Su Sut Suts r er ver ever LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 8 0 +Language language L La Lan Lang e ge age uage LINEEND ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +models models m mo mod mode s ls els dels LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 0 6 BLOCKIN 7 0 +are are a ar are are e re are are LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 6 BLOCKIN 7 0 +unsupervised unsupervised u un uns unsu d ed sed ised LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 6 BLOCKIN 7 0 +multitask multitask m mu mul mult k sk ask task LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 6 BLOCKIN 7 0 +learners learners l le lea lear s rs ers ners LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 6 BLOCKIN 7 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 6 6 BLOCKIN 7 0 +OpenAI openai O Op Ope Open I AI nAI enAI LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 6 BLOCKIN 7 0 +Blog blog B Bl Blo Blog g og log Blog LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 6 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 6 BLOCKIN 7 0 +1 1 1 1 1 1 1 1 1 1 LINEIN LINEINDENT NOCAPS ALLDIGIT 1 0 0 0 0 0 0 0 NOPUNCT 9 6 BLOCKIN 7 0 +( ( ( ( ( ( ( ( ( ( LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 9 6 BLOCKIN 7 0 +8 8 8 8 8 8 8 8 8 8 LINEIN LINEINDENT NOCAPS ALLDIGIT 1 0 0 0 0 0 0 0 NOPUNCT 9 6 BLOCKIN 7 0 +) ) ) ) ) ) ) ) ) ) LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 9 6 BLOCKIN 7 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 9 6 BLOCKIN 7 0 +9 9 9 9 9 9 9 9 9 9 LINEIN LINEINDENT NOCAPS ALLDIGIT 1 0 0 0 0 0 0 0 NOPUNCT 9 6 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 6 BLOCKIN 7 0 +2019 2019 2 20 201 2019 9 19 019 2019 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 6 BLOCKIN 7 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 6 BLOCKEND 7 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 7 0 +42 42 4 42 42 42 2 42 42 42 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 7 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 7 0 +Timnit timnit T Ti Tim Timn t it nit mnit LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 7 0 +Gebru gebru G Ge Geb Gebr u ru bru ebru LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 7 0 +Jamie jamie J Ja Jam Jami e ie mie amie LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 7 0 +Morgenstern morgenstern M Mo Mor Morg n rn ern tern LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 9 BLOCKIN 7 0 +Briana briana B Br Bri Bria a na ana iana LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 7 0 +Vecchione vecchione V Ve Vec Vecc e ne one ione LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 7 0 +Jennifer jennifer J Je Jen Jenn r er fer ifer LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 7 0 +Wortman wortman W Wo Wor Wort n an man tman LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 7 0 +Vaughan vaughan V Va Vau Vaug n an han ghan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 9 BLOCKIN 7 0 +Hanna hanna H Ha Han Hann a na nna anna LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 7 0 +Wallach wallach W Wa Wal Wall h ch ach lach LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 7 0 +, , , , , , , , , , LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 7 0 +Hal hal H Ha Hal Hal l al Hal Hal LINESTART LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 4 0 +Daumé daumé D Da Dau Daum é mé umé aumé LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 4 0 +III iii I II III III I II III III LINEIN LINEINDENT ALLCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 4 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 8 BLOCKIN 4 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 4 0 +Kate kate K Ka Kat Kate e te ate Kate LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 4 0 +Crawford crawford C Cr Cra Craw d rd ord ford LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 4 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 4 8 BLOCKIN 4 0 +Datasheets datasheets D Da Dat Data s ts ets eets LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 4 0 +for for f fo for for r or for for LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 4 0 +datasets datasets d da dat data s ts ets sets LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 4 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 8 BLOCKIN 4 0 +Communications communications C Co Com Comm s ns ons ions LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 4 0 +of of o of of of f of of of LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 4 0 +the the t th the the e he the the LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 4 0 +ACM acm A AC ACM ACM M CM ACM ACM LINEIN LINEINDENT ALLCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 4 0 +, , , , , , , , , , LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 8 BLOCKIN 4 0 +64 64 6 64 64 64 4 64 64 64 LINESTART LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 2 BLOCKIN 6 0 +( ( ( ( ( ( ( ( ( ( LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 1 2 BLOCKIN 6 0 +12 12 1 12 12 12 2 12 12 12 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 2 BLOCKIN 6 0 +) ) ) ) ) ) ) ) ) ) LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 2 2 BLOCKIN 6 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 3 2 BLOCKIN 6 0 +86 86 8 86 86 86 6 86 86 86 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 2 BLOCKIN 6 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 4 2 BLOCKIN 6 0 +92 92 9 92 92 92 2 92 92 92 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 2 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 2 BLOCKIN 6 0 +2021 2021 2 20 202 2021 1 21 021 2021 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 9 2 BLOCKIN 6 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 2 BLOCKEND 6 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 5 0 +43 43 4 43 43 43 3 43 43 43 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 5 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 5 0 +Diederik diederik D Di Die Died k ik rik erik LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 5 0 +P p P P P P P P P P LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 5 0 +Kingma kingma K Ki Kin King a ma gma ngma LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 5 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 5 0 +Jimmy jimmy J Ji Jim Jimm y my mmy immy LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 5 0 +Ba ba B Ba Ba Ba a Ba Ba Ba LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 5 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 4 9 BLOCKIN 5 0 +Adam adam A Ad Ada Adam m am dam Adam LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 5 0 +: : : : : : : : : : LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 5 9 BLOCKIN 5 0 +A a A A A A A A A A LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 5 0 +method method m me met meth d od hod thod LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 5 0 +for for f fo for for r or for for LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 5 0 +stochastic stochastic s st sto stoc c ic tic stic LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 5 0 +optimization optimization o op opt opti n on ion tion LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 5 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 9 BLOCKIN 5 0 +arXiv arxiv a ar arX arXi v iv Xiv rXiv LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 5 0 +preprint preprint p pr pre prep t nt int rint LINEEND ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 5 0 +arXiv arxiv a ar arX arXi v iv Xiv rXiv LINESTART LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 2 BLOCKIN 4 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 2 2 BLOCKIN 4 0 +1412 1412 1 14 141 1412 2 12 412 1412 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 4 2 BLOCKIN 4 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 4 2 BLOCKIN 4 0 +6980 6980 6 69 698 6980 0 80 980 6980 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 2 BLOCKIN 4 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 2 BLOCKIN 4 0 +2014 2014 2 20 201 2014 4 14 014 2014 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 9 2 BLOCKIN 4 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 2 BLOCKEND 4 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 8 0 +44 44 4 44 44 44 4 44 44 44 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 8 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 8 0 +Norman norman N No Nor Norm n an man rman LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +P p P P P P P P P P LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +Jouppi jouppi J Jo Jou Joup i pi ppi uppi LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 8 0 +Cliff cliff C Cl Cli Clif f ff iff liff LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +Young young Y Yo You Youn g ng ung oung LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 8 0 +Nishant nishant N Ni Nis Nish t nt ant hant LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 8 0 +Patil patil P Pa Pat Pati l il til atil LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 8 0 +David david D Da Dav Davi d id vid avid LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 8 0 +Patterson patterson P Pa Pat Patt n on son rson LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 9 BLOCKIN 8 0 +Gaurav gaurav G Ga Gau Gaur v av rav urav LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +Agrawal agrawal A Ag Agr Agra l al wal awal LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 9 BLOCKIN 8 0 +Raminder raminder R Ra Ram Rami r er der nder LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +Bajwa bajwa B Ba Baj Bajw a wa jwa ajwa LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +, , , , , , , , , , LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 8 0 +Sarah sarah S Sa Sar Sara h ah rah arah LINESTART LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 6 0 +Bates bates B Ba Bat Bate s es tes ates LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 6 0 +Suresh suresh S Su Sur Sure h sh esh resh LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 6 0 +Bhatia bhatia B Bh Bha Bhat a ia tia atia LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 6 0 +Nan nan N Na Nan Nan n an Nan Nan LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 6 0 +Boden boden B Bo Bod Bode n en den oden LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 9 BLOCKIN 6 0 +Al al A Al Al Al l Al Al Al LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 6 0 +Borchers borchers B Bo Bor Borc s rs ers hers LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 6 0 +et et e et et et t et et et LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 6 0 + +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 8 0 +Thomas thomas T Th Tho Thom s as mas omas LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +Wolf wolf W Wo Wol Wolf f lf olf Wolf LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 8 0 +Lysandre lysandre L Ly Lys Lysa e re dre ndre LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +Debut debut D De Deb Debu t ut but ebut LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 8 0 +Victor victor V Vi Vic Vict r or tor ctor LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 8 0 +Sanh sanh S Sa San Sanh h nh anh Sanh LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 8 0 +Julien julien J Ju Jul Juli n en ien lien LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 8 0 +Chaumond chaumond C Ch Cha Chau d nd ond mond LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 9 BLOCKIN 8 0 +Clement clement C Cl Cle Clem t nt ent ment LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +Delangue delangue D De Del Dela e ue gue ngue LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 9 BLOCKIN 8 0 +Anthony anthony A An Ant Anth y ny ony hony LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +Moi moi M Mo Moi Moi i oi Moi Moi LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +, , , , , , , , , , LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 8 0 +Pierric pierric P Pi Pie Pier c ic ric rric LINESTART LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 9 0 +Cistac cistac C Ci Cis Cist c ac tac stac LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 9 0 +Tim tim T Ti Tim Tim m im Tim Tim LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 9 0 +Rault rault R Ra Rau Raul t lt ult ault LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 9 0 +Rémi rémi R Ré Rém Rémi i mi émi Rémi LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 9 0 +Louf louf L Lo Lou Louf f uf ouf Louf LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 9 BLOCKIN 9 0 +Morgan morgan M Mo Mor Morg n an gan rgan LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 9 0 +Funtowicz funtowicz F Fu Fun Funt z cz icz wicz LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 9 0 +et et e et et et t et et et LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 9 0 +al al a al al al l al al al LINEIN LINEINDENT NOCAPS NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 9 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 9 BLOCKIN 9 0 +Transformers transformers T Tr Tra Tran s rs ers mers LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 9 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 9 9 BLOCKIN 9 0 +State state S St Sta Stat e te ate tate LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 9 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 9 9 BLOCKIN 9 0 +of of o of of of f of of of LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 10 9 BLOCKIN 9 0 +the the t th the the e he the the LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 10 9 BLOCKIN 9 0 +art art a ar art art t rt art art LINEEND LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +natural natural n na nat natu l al ral ural LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 1 0 +language language l la lan lang e ge age uage LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 1 0 +processing processing p pr pro proc g ng ing sing LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 1 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 3 9 BLOCKIN 1 0 +Proceedings proceedings P Pr Pro Proc s gs ngs ings LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 1 0 +of of o of of of f of of of LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 1 0 +the the t th the the e he the the LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 1 0 +2020 2020 2 20 202 2020 0 20 020 2020 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 5 9 BLOCKIN 1 0 +Conference conference C Co Con Conf e ce nce ence LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 1 0 +on on o on on on n on on on LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 1 0 +Empirical empirical E Em Emp Empi l al cal ical LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 1 0 +Methods methods M Me Met Meth s ds ods hods LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 1 0 +in in i in in in n in in in LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 1 0 +Natural natural N Na Nat Natu l al ral ural LINEEND LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 1 0 +Language language L La Lan Lang e ge age uage LINESTART LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 6 BLOCKIN 5 0 +Processing processing P Pr Pro Proc g ng ing sing LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 6 BLOCKIN 5 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 3 6 BLOCKIN 5 0 +System system S Sy Sys Syst m em tem stem LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 6 BLOCKIN 5 0 +Demonstrations demonstrations D De Dem Demo s ns ons ions LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 6 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 6 BLOCKIN 5 0 +pages pages p pa pag page s es ges ages LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 6 BLOCKIN 5 0 +38 38 3 38 38 38 8 38 38 38 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 6 BLOCKIN 5 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 9 6 BLOCKIN 5 0 +45 45 4 45 45 45 5 45 45 45 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 6 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 6 BLOCKIN 5 0 +2020 2020 2 20 202 2020 0 20 020 2020 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 6 BLOCKIN 5 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 6 BLOCKEND 5 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 8 BLOCKSTART 8 0 +36 36 3 36 36 36 6 36 36 36 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 8 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 8 BLOCKIN 8 0 +Adam adam A Ad Ada Adam m am dam Adam LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 8 0 +Paszke paszke P Pa Pas Pasz e ke zke szke LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 8 BLOCKIN 8 0 +Sam sam S Sa Sam Sam m am Sam Sam LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 8 0 +Gross gross G Gr Gro Gros s ss oss ross LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 8 BLOCKIN 8 0 +Francisco francisco F Fr Fra Fran o co sco isco LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 8 0 +Massa massa M Ma Mas Mass a sa ssa assa LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 8 BLOCKIN 8 0 +Adam adam A Ad Ada Adam m am dam Adam LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 8 0 +Lerer lerer L Le Ler Lere r er rer erer LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 8 BLOCKIN 8 0 +James james J Ja Jam Jame s es mes ames LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 8 0 +Bradbury bradbury B Br Bra Brad y ry ury bury LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 8 BLOCKIN 8 0 +Gregory gregory G Gr Gre Greg y ry ory gory LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 8 0 +Chanan chanan C Ch Cha Chan n an nan anan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 8 0 +, , , , , , , , , , LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 8 BLOCKIN 8 0 +Trevor trevor T Tr Tre Trev r or vor evor LINESTART LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 7 0 +Killeen killeen K Ki Kil Kill n en een leen LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 7 0 +Zeming zeming Z Ze Zem Zemi g ng ing ming LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 7 0 +Lin lin L Li Lin Lin n in Lin Lin LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 7 0 +Natalia natalia N Na Nat Nata a ia lia alia LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 7 0 +Gimelshein gimelshein G Gi Gim Gime n in ein hein LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 7 0 +Luca luca L Lu Luc Luca a ca uca Luca LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 7 0 +Antiga antiga A An Ant Anti a ga iga tiga LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 7 0 +et et e et et et t et et et LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 7 0 +al al a al al al l al al al LINEIN LINEINDENT NOCAPS NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 7 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 9 BLOCKIN 7 0 +Pytorch pytorch P Py Pyt Pyto h ch rch orch LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 7 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 8 9 BLOCKIN 7 0 +An an A An An An n An An An LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 7 0 +imperative imperative i im imp impe e ve ive tive LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 7 0 +style style s st sty styl e le yle tyle LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 7 0 +, , , , , , , , , , LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 7 0 +high high h hi hig high h gh igh high LINESTART LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 4 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 0 9 BLOCKIN 4 0 +performance performance p pe per perf e ce nce ance LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 4 0 +deep deep d de dee deep p ep eep deep LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 4 0 +learning learning l le lea lear g ng ing ning LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 4 0 +library library l li lib libr y ry ary rary LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 4 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 4 9 BLOCKIN 4 0 +Advances advances A Ad Adv Adva s es ces nces LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 4 0 +in in i in in in n in in in LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 4 0 +Neural neural N Ne Neu Neur l al ral ural LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 4 0 +Information information I In Inf Info n on ion tion LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 4 0 +Processing processing P Pr Pro Proc g ng ing sing LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 4 0 +Systems systems S Sy Sys Syst s ms ems tems LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 4 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 4 0 +32 32 3 32 32 32 2 32 32 32 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 4 0 +, , , , , , , , , , LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 4 0 +2019 2019 2 20 201 2019 9 19 019 2019 LINESTART LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 5 0 BLOCKIN 1 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 0 BLOCKEND 1 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 8 0 +37 37 3 37 37 37 7 37 37 37 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 8 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 8 0 +Martín martín M Ma Mar Mart n ín tín rtín LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +Abadi abadi A Ab Aba Abad i di adi badi LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 8 0 +Ashish ashish A As Ash Ashi h sh ish hish LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 8 0 +Agarwal agarwal A Ag Aga Agar l al wal rwal LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 8 0 +Paul paul P Pa Pau Paul l ul aul Paul LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 8 0 +Barham barham B Ba Bar Barh m am ham rham LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 8 0 +Eugene eugene E Eu Eug Euge e ne ene gene LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 8 0 +Brevdo brevdo B Br Bre Brev o do vdo evdo LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 9 BLOCKIN 8 0 +Zhifeng zhifeng Z Zh Zhi Zhif g ng eng feng LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +Chen chen C Ch Che Chen n en hen Chen LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 9 BLOCKIN 8 0 +Craig craig C Cr Cra Crai g ig aig raig LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 8 0 +Citro citro C Ci Cit Citr o ro tro itro LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 8 0 +Greg greg G Gr Gre Greg g eg reg Greg LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +S s S S S S S S S S LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +Corrado corrado C Co Cor Corr o do ado rado LINESTART LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 0 8 BLOCKIN 7 0 +Andy andy A An And Andy y dy ndy Andy LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 7 0 +Davis davis D Da Dav Davi s is vis avis LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 8 BLOCKIN 7 0 +Jeffrey jeffrey J Je Jef Jeff y ey rey frey LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 7 0 +Dean dean D De Dea Dean n an ean Dean LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 8 BLOCKIN 7 0 +Matthieu matthieu M Ma Mat Matt u eu ieu hieu LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 7 0 +Devin devin D De Dev Devi n in vin evin LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 8 BLOCKIN 7 0 +et et e et et et t et et et LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 7 0 +al al a al al al l al al al LINEIN LINEINDENT NOCAPS NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 7 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 8 BLOCKIN 7 0 +Tensorflow tensorflow T Te Ten Tens w ow low flow LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 7 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 8 8 BLOCKIN 7 0 +Large large L La Lar Larg e ge rge arge LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 7 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 9 8 BLOCKIN 7 0 +scale scale s sc sca scal e le ale cale LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 7 0 +machine machine m ma mac mach e ne ine hine LINEEND LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 7 0 +learning learning l le lea lear g ng ing ning LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 5 0 +on on o on on on n on on on LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 5 0 +heterogeneous heterogeneous h he het hete s us ous eous LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 5 0 +distributed distributed d di dis dist d ed ted uted LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 5 0 +systems systems s sy sys syst s ms ems tems LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 5 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 8 BLOCKIN 5 0 +arXiv arxiv a ar arX arXi v iv Xiv rXiv LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 5 0 +preprint preprint p pr pre prep t nt int rint LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 5 0 +arXiv arxiv a ar arX arXi v iv Xiv rXiv LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 5 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 8 8 BLOCKIN 5 0 +1603 1603 1 16 160 1603 3 03 603 1603 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 9 8 BLOCKIN 5 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 8 BLOCKIN 5 0 +04467 04467 0 04 044 0446 7 67 467 4467 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 8 BLOCKIN 5 0 +2015 2015 2 20 201 2015 5 15 015 2015 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 8 BLOCKIN 5 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 8 BLOCKEND 5 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 9 0 +38 38 3 38 38 38 8 38 38 38 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 9 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 9 0 +Jacob jacob J Ja Jac Jaco b ob cob acob LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 9 0 +Devlin devlin D De Dev Devl n in lin vlin LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 9 0 +Ming ming M Mi Min Ming g ng ing Ming LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 9 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 2 9 BLOCKIN 9 0 +Wei wei W We Wei Wei i ei Wei Wei LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 9 0 +Chang chang C Ch Cha Chan g ng ang hang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 9 0 +Kenton kenton K Ke Ken Kent n on ton nton LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 9 0 +Lee lee L Le Lee Lee e ee Lee Lee LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 9 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 9 0 +Kristina kristina K Kr Kri Kris a na ina tina LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 9 0 +Toutanova toutanova T To Tou Tout a va ova nova LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 9 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 9 BLOCKIN 9 0 +Bert bert B Be Ber Bert t rt ert Bert LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 9 0 +: : : : : : : : : : LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 8 9 BLOCKIN 9 0 +Pre pre P Pr Pre Pre e re Pre Pre LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 9 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 9 9 BLOCKIN 9 0 +training training t tr tra trai g ng ing ning LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +of of o of of of f of of of LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +deep deep d de dee deep p ep eep deep LINEEND ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +bidirectional bidirectional b bi bid bidi l al nal onal LINESTART LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 5 0 +transformers transformers t tr tra tran s rs ers mers LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 5 0 +for for f fo for for r or for for LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 5 0 +language language l la lan lang e ge age uage LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 5 0 +understanding understanding u un und unde g ng ing ding LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 5 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 6 9 BLOCKIN 5 0 +arXiv arxiv a ar arX arXi v iv Xiv rXiv LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 5 0 +preprint preprint p pr pre prep t nt int rint LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 5 0 +arXiv arxiv a ar arX arXi v iv Xiv rXiv LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 5 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 8 9 BLOCKIN 5 0 +1810 1810 1 18 181 1810 0 10 810 1810 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 9 9 BLOCKIN 5 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 9 BLOCKIN 5 0 +04805 04805 0 04 048 0480 5 05 805 4805 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 9 BLOCKIN 5 0 +2018 2018 2 20 201 2018 8 18 018 2018 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 9 BLOCKIN 5 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKEND 5 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 9 0 +39 39 3 39 39 39 9 39 39 39 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 9 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 9 0 +Yinhan yinhan Y Yi Yin Yinh n an han nhan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 9 0 +Liu liu L Li Liu Liu u iu Liu Liu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 9 0 +Myle myle M My Myl Myle e le yle Myle LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 9 0 +Ott ott O Ot Ott Ott t tt Ott Ott LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 9 0 +Naman naman N Na Nam Nama n an man aman LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 9 0 +Goyal goyal G Go Goy Goya l al yal oyal LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 9 BLOCKIN 9 0 +Jingfei jingfei J Ji Jin Jing i ei fei gfei LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 9 0 +Du du D Du Du Du u Du Du Du LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 9 0 +Mandar mandar M Ma Man Mand r ar dar ndar LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 9 0 +Joshi joshi J Jo Jos Josh i hi shi oshi LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 9 BLOCKIN 9 0 +Danqi danqi D Da Dan Danq i qi nqi anqi LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 9 0 +Chen chen C Ch Che Chen n en hen Chen LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 9 BLOCKIN 9 0 +Omer omer O Om Ome Omer r er mer Omer LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +Levy levy L Le Lev Levy y vy evy Levy LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 9 0 +Mike mike M Mi Mik Mike e ke ike Mike LINEEND ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +Lewis lewis L Le Lew Lewi s is wis ewis LINESTART LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 4 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 0 9 BLOCKIN 4 0 +Luke luke L Lu Luk Luke e ke uke Luke LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 4 0 +Zettlemoyer zettlemoyer Z Ze Zet Zett r er yer oyer LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 4 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 4 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 4 0 +Veselin veselin V Ve Ves Vese n in lin elin LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 4 0 +Stoyanov stoyanov S St Sto Stoy v ov nov anov LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 4 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 9 BLOCKIN 4 0 +Roberta roberta R Ro Rob Robe a ta rta erta LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 4 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 6 9 BLOCKIN 4 0 +A a A A A A A A A A LINEIN LINEINDENT ALLCAP NODIGIT 1 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 4 0 +robustly robustly r ro rob robu y ly tly stly LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 4 0 +optimized optimized o op opt opti d ed zed ized LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 4 0 +bert bert b be ber bert t rt ert bert LINEIN LINEINDENT NOCAPS NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 4 0 +pretraining pretraining p pr pre pret g ng ing ning LINEEND LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 4 0 +approach approach a ap app appr h ch ach oach LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 4 BLOCKIN 5 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 1 4 BLOCKIN 5 0 +arXiv arxiv a ar arX arXi v iv Xiv rXiv LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 4 BLOCKIN 5 0 +preprint preprint p pr pre prep t nt int rint LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 4 BLOCKIN 5 0 +arXiv arxiv a ar arX arXi v iv Xiv rXiv LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 4 BLOCKIN 5 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 6 4 BLOCKIN 5 0 +1907 1907 1 19 190 1907 7 07 907 1907 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 7 4 BLOCKIN 5 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 4 BLOCKIN 5 0 +11692 11692 1 11 116 1169 2 92 692 1692 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 8 4 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 4 BLOCKIN 5 0 +2019 2019 2 20 201 2019 9 19 019 2019 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 4 BLOCKIN 5 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 4 BLOCKEND 5 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 9 0 +40 40 4 40 40 40 0 40 40 40 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 9 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 9 0 +Sheng sheng S Sh She Shen g ng eng heng LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 9 0 +Shen shen S Sh She Shen n en hen Shen LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 9 0 +Zhewei zhewei Z Zh Zhe Zhew i ei wei ewei LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 9 0 +Dong dong D Do Don Dong g ng ong Dong LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 9 0 +Xiaocheng xiaocheng X Xi Xia Xiao g ng eng heng LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 9 0 +Ye ye Y Ye Ye Ye e Ye Ye Ye LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 9 0 +Linjian linjian L Li Lin Linj n an ian jian LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 9 0 +Ma ma M Ma Ma Ma a Ma Ma Ma LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 9 0 +Zhewei zhewei Z Zh Zhe Zhew i ei wei ewei LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 9 0 +Li li L Li Li Li i Li Li Li LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 9 BLOCKIN 9 0 +Zirui zirui Z Zi Zir Ziru i ui rui irui LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 9 0 +Wang wang W Wa Wan Wang g ng ang Wang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 9 BLOCKIN 9 0 +Samyam samyam S Sa Sam Samy m am yam myam LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +Rajbhan rajbhan R Ra Raj Rajb n an han bhan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +- - - - - - - - - - LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 10 9 BLOCKIN 9 0 +dari dari d da dar dari i ri ari dari LINESTART LINEINDENT NOCAPS NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 0 9 BLOCKIN 5 0 +Yuxiong yuxiong Y Yu Yux Yuxi g ng ong iong LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 5 0 +Wang wang W Wa Wan Wang g ng ang Wang LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 5 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 5 0 +Zhen zhen Z Zh Zhe Zhen n en hen Zhen LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 5 0 +Yang yang Y Ya Yan Yang g ng ang Yang LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 5 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 4 9 BLOCKIN 5 0 +Q q Q Q Q Q Q Q Q Q LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 5 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 4 9 BLOCKIN 5 0 +bert bert b be ber bert t rt ert bert LINEIN LINEINDENT NOCAPS NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 5 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 5 9 BLOCKIN 5 0 +Hessian hessian H He Hes Hess n an ian sian LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 5 0 +based based b ba bas base d ed sed ased LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 5 0 +ultra ultra u ul ult ultr a ra tra ltra LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 5 0 +low low l lo low low w ow low low LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 5 0 +precision precision p pr pre prec n on ion sion LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 5 0 +quantization quantization q qu qua quan n on ion tion LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 5 0 +of of o of of of f of of of LINEEND LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 5 0 +bert bert b be ber bert t rt ert bert LINESTART LINEINDENT NOCAPS NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 8 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 0 9 BLOCKIN 8 0 +Proceedings proceedings P Pr Pro Proc s gs ngs ings LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +of of o of of of f of of of LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 8 0 +the the t th the the e he the the LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 8 0 +AAAI aaai A AA AAA AAAI I AI AAI AAAI LINEIN LINEINDENT ALLCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +Conference conference C Co Con Conf e ce nce ence LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 8 0 +on on o on on on n on on on LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 8 0 +Artificial artificial A Ar Art Arti l al ial cial LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 8 0 +Intelligence intelligence I In Int Inte e ce nce ence LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 9 BLOCKIN 8 0 +34 34 3 34 34 34 4 34 34 34 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +( ( ( ( ( ( ( ( ( ( LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 8 9 BLOCKIN 8 0 +05 05 0 05 05 05 5 05 05 05 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +) ) ) ) ) ) ) ) ) ) LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 8 9 BLOCKIN 8 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 9 9 BLOCKIN 8 0 +8815 8815 8 88 881 8815 5 15 815 8815 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 8 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 9 9 BLOCKIN 8 0 +8821 8821 8 88 882 8821 1 21 821 8821 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 8 0 +2020 2020 2 20 202 2020 0 20 020 2020 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKEND 8 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 8 0 +41 41 4 41 41 41 1 41 41 41 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 8 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 8 0 +Alec alec A Al Ale Alec c ec lec Alec LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +Radford radford R Ra Rad Radf d rd ord ford LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 8 0 +Jeffrey jeffrey J Je Jef Jeff y ey rey frey LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 8 0 +Wu wu W Wu Wu Wu u Wu Wu Wu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 8 0 +Rewon rewon R Re Rew Rewo n on won ewon LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 8 0 +Child child C Ch Chi Chil d ld ild hild LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 9 BLOCKIN 8 0 +David david D Da Dav Davi d id vid avid LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 8 0 +Luan luan L Lu Lua Luan n an uan Luan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 8 0 +Dario dario D Da Dar Dari o io rio ario LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 8 0 +Amodei amodei A Am Amo Amod i ei dei odei LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 9 BLOCKIN 8 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +Ilya ilya I Il Ily Ilya a ya lya Ilya LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 8 0 +Sutskever sutskever S Su Sut Suts r er ver ever LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 8 0 +Language language L La Lan Lang e ge age uage LINEEND ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +models models m mo mod mode s ls els dels LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 0 6 BLOCKIN 7 0 +are are a ar are are e re are are LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 6 BLOCKIN 7 0 +unsupervised unsupervised u un uns unsu d ed sed ised LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 6 BLOCKIN 7 0 +multitask multitask m mu mul mult k sk ask task LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 6 BLOCKIN 7 0 +learners learners l le lea lear s rs ers ners LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 6 BLOCKIN 7 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 6 6 BLOCKIN 7 0 +OpenAI openai O Op Ope Open I AI nAI enAI LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 6 BLOCKIN 7 0 +Blog blog B Bl Blo Blog g og log Blog LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 6 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 6 BLOCKIN 7 0 +1 1 1 1 1 1 1 1 1 1 LINEIN LINEINDENT NOCAPS ALLDIGIT 1 0 0 0 0 0 0 0 NOPUNCT 9 6 BLOCKIN 7 0 +( ( ( ( ( ( ( ( ( ( LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 9 6 BLOCKIN 7 0 +8 8 8 8 8 8 8 8 8 8 LINEIN LINEINDENT NOCAPS ALLDIGIT 1 0 0 0 0 0 0 0 NOPUNCT 9 6 BLOCKIN 7 0 +) ) ) ) ) ) ) ) ) ) LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 9 6 BLOCKIN 7 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 9 6 BLOCKIN 7 0 +9 9 9 9 9 9 9 9 9 9 LINEIN LINEINDENT NOCAPS ALLDIGIT 1 0 0 0 0 0 0 0 NOPUNCT 9 6 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 6 BLOCKIN 7 0 +2019 2019 2 20 201 2019 9 19 019 2019 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 6 BLOCKIN 7 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 6 BLOCKEND 7 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 7 0 +42 42 4 42 42 42 2 42 42 42 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 7 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 7 0 +Timnit timnit T Ti Tim Timn t it nit mnit LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 7 0 +Gebru gebru G Ge Geb Gebr u ru bru ebru LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 7 0 +Jamie jamie J Ja Jam Jami e ie mie amie LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 7 0 +Morgenstern morgenstern M Mo Mor Morg n rn ern tern LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 9 BLOCKIN 7 0 +Briana briana B Br Bri Bria a na ana iana LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 7 0 +Vecchione vecchione V Ve Vec Vecc e ne one ione LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 7 0 +Jennifer jennifer J Je Jen Jenn r er fer ifer LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 7 0 +Wortman wortman W Wo Wor Wort n an man tman LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 7 0 +Vaughan vaughan V Va Vau Vaug n an han ghan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 9 BLOCKIN 7 0 +Hanna hanna H Ha Han Hann a na nna anna LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 7 0 +Wallach wallach W Wa Wal Wall h ch ach lach LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 7 0 +, , , , , , , , , , LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 7 0 +Hal hal H Ha Hal Hal l al Hal Hal LINESTART LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 4 0 +Daumé daumé D Da Dau Daum é mé umé aumé LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 4 0 +III iii I II III III I II III III LINEIN LINEINDENT ALLCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 4 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 8 BLOCKIN 4 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 4 0 +Kate kate K Ka Kat Kate e te ate Kate LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 4 0 +Crawford crawford C Cr Cra Craw d rd ord ford LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 4 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 4 8 BLOCKIN 4 0 +Datasheets datasheets D Da Dat Data s ts ets eets LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 4 0 +for for f fo for for r or for for LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 4 0 +datasets datasets d da dat data s ts ets sets LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 4 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 8 BLOCKIN 4 0 +Communications communications C Co Com Comm s ns ons ions LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 4 0 +of of o of of of f of of of LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 4 0 +the the t th the the e he the the LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 4 0 +ACM acm A AC ACM ACM M CM ACM ACM LINEIN LINEINDENT ALLCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 4 0 +, , , , , , , , , , LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 8 BLOCKIN 4 0 +64 64 6 64 64 64 4 64 64 64 LINESTART LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 2 BLOCKIN 6 0 +( ( ( ( ( ( ( ( ( ( LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 1 2 BLOCKIN 6 0 +12 12 1 12 12 12 2 12 12 12 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 2 BLOCKIN 6 0 +) ) ) ) ) ) ) ) ) ) LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 2 2 BLOCKIN 6 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 3 2 BLOCKIN 6 0 +86 86 8 86 86 86 6 86 86 86 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 2 BLOCKIN 6 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 4 2 BLOCKIN 6 0 +92 92 9 92 92 92 2 92 92 92 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 2 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 2 BLOCKIN 6 0 +2021 2021 2 20 202 2021 1 21 021 2021 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 9 2 BLOCKIN 6 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 2 BLOCKEND 6 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 5 0 +43 43 4 43 43 43 3 43 43 43 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 5 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 5 0 +Diederik diederik D Di Die Died k ik rik erik LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 5 0 +P p P P P P P P P P LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 5 0 +Kingma kingma K Ki Kin King a ma gma ngma LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 5 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 5 0 +Jimmy jimmy J Ji Jim Jimm y my mmy immy LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 5 0 +Ba ba B Ba Ba Ba a Ba Ba Ba LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 5 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 4 9 BLOCKIN 5 0 +Adam adam A Ad Ada Adam m am dam Adam LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 5 0 +: : : : : : : : : : LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 5 9 BLOCKIN 5 0 +A a A A A A A A A A LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 5 0 +method method m me met meth d od hod thod LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 5 0 +for for f fo for for r or for for LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 5 0 +stochastic stochastic s st sto stoc c ic tic stic LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 5 0 +optimization optimization o op opt opti n on ion tion LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 5 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 9 BLOCKIN 5 0 +arXiv arxiv a ar arX arXi v iv Xiv rXiv LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 5 0 +preprint preprint p pr pre prep t nt int rint LINEEND ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 5 0 +arXiv arxiv a ar arX arXi v iv Xiv rXiv LINESTART LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 2 BLOCKIN 4 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 2 2 BLOCKIN 4 0 +1412 1412 1 14 141 1412 2 12 412 1412 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 4 2 BLOCKIN 4 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 4 2 BLOCKIN 4 0 +6980 6980 6 69 698 6980 0 80 980 6980 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 2 BLOCKIN 4 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 2 BLOCKIN 4 0 +2014 2014 2 20 201 2014 4 14 014 2014 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 9 2 BLOCKIN 4 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 2 BLOCKEND 4 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 8 0 +44 44 4 44 44 44 4 44 44 44 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 8 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 8 0 +Norman norman N No Nor Norm n an man rman LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +P p P P P P P P P P LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +Jouppi jouppi J Jo Jou Joup i pi ppi uppi LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 8 0 +Cliff cliff C Cl Cli Clif f ff iff liff LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +Young young Y Yo You Youn g ng ung oung LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 8 0 +Nishant nishant N Ni Nis Nish t nt ant hant LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 8 0 +Patil patil P Pa Pat Pati l il til atil LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 8 0 +David david D Da Dav Davi d id vid avid LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 8 0 +Patterson patterson P Pa Pat Patt n on son rson LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 9 BLOCKIN 8 0 +Gaurav gaurav G Ga Gau Gaur v av rav urav LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +Agrawal agrawal A Ag Agr Agra l al wal awal LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 9 BLOCKIN 8 0 +Raminder raminder R Ra Ram Rami r er der nder LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +Bajwa bajwa B Ba Baj Bajw a wa jwa ajwa LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +, , , , , , , , , , LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 8 0 +Sarah sarah S Sa Sar Sara h ah rah arah LINESTART LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 6 0 +Bates bates B Ba Bat Bate s es tes ates LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 6 0 +Suresh suresh S Su Sur Sure h sh esh resh LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 6 0 +Bhatia bhatia B Bh Bha Bhat a ia tia atia LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 6 0 +Nan nan N Na Nan Nan n an Nan Nan LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 6 0 +Boden boden B Bo Bod Bode n en den oden LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 9 BLOCKIN 6 0 +Al al A Al Al Al l Al Al Al LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 6 0 +Borchers borchers B Bo Bor Borc s rs ers hers LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 6 0 +et et e et et et t et et et LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 6 0 +al al a al al al l al al al LINEIN LINEINDENT NOCAPS NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 6 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 6 9 BLOCKIN 6 0 +In in I In In In n In In In LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 6 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 7 9 BLOCKIN 6 0 +datacenter datacenter d da dat data r er ter nter LINEIN LINEINDENT NOCAPS NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 6 0 +performance performance p pe per perf e ce nce ance LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 6 0 +analysis analysis a an ana anal s is sis ysis LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 6 0 +of of o of of of f of of of LINEEND LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 6 0 +a a a a a a a a a a LINESTART LINEINDENT NOCAPS NODIGIT 1 0 1 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 1 0 +tensor tensor t te ten tens r or sor nsor LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 1 0 +processing processing p pr pro proc g ng ing sing LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 1 0 +unit unit u un uni unit t it nit unit LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 1 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 2 9 BLOCKIN 1 0 +Proceedings proceedings P Pr Pro Proc s gs ngs ings LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 1 0 +of of o of of of f of of of LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 1 0 +the the t th the the e he the the LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 1 0 +44th 44th 4 44 44t 44th h th 4th 44th LINEIN LINEINDENT NOCAPS CONTAINSDIGITS 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 1 0 +Annual annual A An Ann Annu l al ual nual LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 1 0 +International international I In Int Inte l al nal onal LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 1 0 +Symposium symposium S Sy Sym Symp m um ium sium LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 1 0 +on on o on on on n on on on LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 1 0 +Computer computer C Co Com Comp r er ter uter LINEEND LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 1 0 +Architecture architecture A Ar Arc Arch e re ure ture LINESTART LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 3 BLOCKIN 4 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 3 BLOCKIN 4 0 +pages pages p pa pag page s es ges ages LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 3 BLOCKIN 4 0 +1 1 1 1 1 1 1 1 1 1 LINEIN LINEINDENT NOCAPS ALLDIGIT 1 0 0 0 0 0 0 0 NOPUNCT 6 3 BLOCKIN 4 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 7 3 BLOCKIN 4 0 +12 12 1 12 12 12 2 12 12 12 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 3 BLOCKIN 4 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 3 BLOCKIN 4 0 +2017 2017 2 20 201 2017 7 17 017 2017 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 3 BLOCKIN 4 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 3 BLOCKEND 4 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 8 0 +45 45 4 45 45 45 5 45 45 45 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 8 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 8 0 +Martín martín M Ma Mar Mart n ín tín rtín LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +Abadi abadi A Ab Aba Abad i di adi badi LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 8 0 +Paul paul P Pa Pau Paul l ul aul Paul LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 8 0 +Barham barham B Ba Bar Barh m am ham rham LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 8 0 +Jianmin jianmin J Ji Jia Jian n in min nmin LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 8 0 +Chen chen C Ch Che Chen n en hen Chen LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 8 0 +Zhifeng zhifeng Z Zh Zhi Zhif g ng eng feng LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 8 0 +Chen chen C Ch Che Chen n en hen Chen LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 8 0 +Andy andy A An And Andy y dy ndy Andy LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 8 0 +Davis davis D Da Dav Davi s is vis avis LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 9 BLOCKIN 8 0 +Jeffrey jeffrey J Je Jef Jeff y ey rey frey LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 8 0 +Dean dean D De Dea Dean n an ean Dean LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 8 0 +Matthieu matthieu M Ma Mat Matt u eu ieu hieu LINEEND ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +Devin devin D De Dev Devi n in vin evin LINESTART LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 0 9 BLOCKIN 7 0 +Sanjay sanjay S Sa San Sanj y ay jay njay LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 7 0 +Ghemawat ghemawat G Gh Ghe Ghem t at wat awat LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 7 0 +Geoffrey geoffrey G Ge Geo Geof y ey rey frey LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 7 0 +Irving irving I Ir Irv Irvi g ng ing ving LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 9 BLOCKIN 7 0 +Michael michael M Mi Mic Mich l el ael hael LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 7 0 +Isard isard I Is Isa Isar d rd ard sard LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 7 0 +et et e et et et t et et et LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 7 0 +al al a al al al l al al al LINEIN LINEINDENT NOCAPS NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 7 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 6 9 BLOCKIN 7 0 +Tensorflow tensorflow T Te Ten Tens w ow low flow LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 7 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 8 9 BLOCKIN 7 0 +A a A A A A A A A A LINEIN LINEINDENT ALLCAP NODIGIT 1 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 7 0 +system system s sy sys syst m em tem stem LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 7 0 +for for f fo for for r or for for LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 7 0 +large large l la lar larg e ge rge arge LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 7 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 10 9 BLOCKIN 7 0 +scale scale s sc sca scal e le ale cale LINEEND LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 7 0 +machine machine m ma mac mach e ne ine hine LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 1 0 +learning learning l le lea lear g ng ing ning LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 1 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 2 8 BLOCKIN 1 0 +12th 12th 1 12 12t 12th h th 2th 12th LINEIN LINEINDENT NOCAPS CONTAINSDIGITS 0 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 1 0 +USENIX usenix U US USE USEN X IX NIX ENIX LINEIN LINEINDENT ALLCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 1 0 +Symposium symposium S Sy Sym Symp m um ium sium LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 1 0 +on on o on on on n on on on LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 1 0 +Operating operating O Op Ope Oper g ng ing ting LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 1 0 +Systems systems S Sy Sys Syst s ms ems tems LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 1 0 +Design design D De Des Desi n gn ign sign LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 1 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 1 0 +Implementation implementation I Im Imp Impl n on ion tion LINEEND LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 1 0 +( ( ( ( ( ( ( ( ( ( LINESTART LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 3 BLOCKIN 6 0 +OSDI osdi O OS OSD OSDI I DI SDI OSDI LINEIN LINEINDENT ALLCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 3 BLOCKIN 6 0 +16 16 1 16 16 16 6 16 16 16 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 3 BLOCKIN 6 0 +) ) ) ) ) ) ) ) ) ) LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 3 3 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 3 BLOCKIN 6 0 +pages pages p pa pag page s es ges ages LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 3 BLOCKIN 6 0 +265 265 2 26 265 265 5 65 265 265 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 3 BLOCKIN 6 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 7 3 BLOCKIN 6 0 +283 283 2 28 283 283 3 83 283 283 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 3 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 3 BLOCKIN 6 0 +2016 2016 2 20 201 2016 6 16 016 2016 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 3 BLOCKIN 6 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 3 BLOCKEND 6 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 7 0 +46 46 4 46 46 46 6 46 46 46 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 7 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 7 0 +Mohammad mohammad M Mo Moh Moha d ad mad mmad LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 7 0 +Shoeybi shoeybi S Sh Sho Shoe i bi ybi eybi LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 7 0 +Mostofa mostofa M Mo Mos Most a fa ofa tofa LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 7 0 +Patwary patwary P Pa Pat Patw y ry ary wary LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 9 BLOCKIN 7 0 +Raghavendra raghavendra R Ra Rag Ragh a ra dra ndra LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 7 0 +Puri puri P Pu Pur Puri i ri uri Puri LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 7 0 +Patrick patrick P Pa Pat Patr k ck ick rick LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 7 0 +LeGresley legresley L Le LeG LeGr y ey ley sley LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 9 BLOCKIN 7 0 +Jared jared J Ja Jar Jare d ed red ared LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 7 0 +Casper casper C Ca Cas Casp r er per sper LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 7 0 +and and a an and and d nd and and LINEEND ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 7 0 +Bryan bryan B Br Bry Brya n an yan ryan LINESTART LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 4 0 +Catanzaro catanzaro C Ca Cat Cata o ro aro zaro LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 4 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 1 8 BLOCKIN 4 0 +Megatron megatron M Me Meg Mega n on ron tron LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 4 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 3 8 BLOCKIN 4 0 +lm lm l lm lm lm m lm lm lm LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 4 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 3 8 BLOCKIN 4 0 +Training training T Tr Tra Trai g ng ing ning LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 4 0 +multi multi m mu mul mult i ti lti ulti LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 4 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 5 8 BLOCKIN 4 0 +billion billion b bi bil bill n on ion lion LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 4 0 +parameter parameter p pa par para r er ter eter LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 4 0 +language language l la lan lang e ge age uage LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 4 0 +models models m mo mod mode s ls els dels LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 4 0 +using using u us usi usin g ng ing sing LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 4 0 +model model m mo mod mode l el del odel LINEEND LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 4 0 +parallelism parallelism p pa par para m sm ism lism LINESTART LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 5 BLOCKIN 5 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 2 5 BLOCKIN 5 0 +arXiv arxiv a ar arX arXi v iv Xiv rXiv LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 5 BLOCKIN 5 0 +preprint preprint p pr pre prep t nt int rint LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 5 BLOCKIN 5 0 +arXiv arxiv a ar arX arXi v iv Xiv rXiv LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 5 BLOCKIN 5 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 6 5 BLOCKIN 5 0 +1909 1909 1 19 190 1909 9 09 909 1909 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 7 5 BLOCKIN 5 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 5 BLOCKIN 5 0 +08053 08053 0 08 080 0805 3 53 053 8053 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 5 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 5 BLOCKIN 5 0 +2019 2019 2 20 201 2019 9 19 019 2019 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 5 BLOCKIN 5 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 5 BLOCKEND 5 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 8 0 +47 47 4 47 47 47 7 47 47 47 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 8 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 8 0 +Yang yang Y Ya Yan Yang g ng ang Yang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +You you Y Yo You You u ou You You LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 8 0 +Jing jing J Ji Jin Jing g ng ing Jing LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 8 0 +Li li L Li Li Li i Li Li Li LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 8 0 +Sashank sashank S Sa Sas Sash k nk ank hank LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +Reddi reddi R Re Red Redd i di ddi eddi LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 9 BLOCKIN 8 0 +Jonathan jonathan J Jo Jon Jona n an han than LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 8 0 +Hseu hseu H Hs Hse Hseu u eu seu Hseu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 8 0 +Sanjiv sanjiv S Sa San Sanj v iv jiv njiv LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 8 0 +Kumar kumar K Ku Kum Kuma r ar mar umar LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 9 BLOCKIN 8 0 +Srinadh srinadh S Sr Sri Srin h dh adh nadh LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +Bhojanapalli bhojanapalli B Bh Bho Bhoj i li lli alli LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 8 0 +Xiaodan xiaodan X Xi Xia Xiao n an dan odan LINEEND ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +Song song S So Son Song g ng ong Song LINESTART LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 0 9 BLOCKIN 5 0 +James james J Ja Jam Jame s es mes ames LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 5 0 +Demmel demmel D De Dem Demm l el mel mmel LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 5 0 +Cho cho C Ch Cho Cho o ho Cho Cho LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 5 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 2 9 BLOCKIN 5 0 +Jui jui J Ju Jui Jui i ui Jui Jui LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 5 0 +Hsieh hsieh H Hs Hsi Hsie h eh ieh sieh LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 9 BLOCKIN 5 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 5 0 +Payal payal P Pa Pay Paya l al yal ayal LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 5 0 +Yadollahpour yadollahpour Y Ya Yad Yado r ur our pour LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 5 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 6 9 BLOCKIN 5 0 +Large large L La Lar Larg e ge rge arge LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 5 0 +batch batch b ba bat batc h ch tch atch LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 5 0 +optimization optimization o op opt opti n on ion tion LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 5 0 +for for f fo for for r or for for LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 5 0 +deep deep d de dee deep p ep eep deep LINEEND LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 5 0 +learning learning l le lea lear g ng ing ning LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 7 BLOCKIN 6 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 1 7 BLOCKIN 6 0 +Training training T Tr Tra Trai g ng ing ning LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 2 7 BLOCKIN 6 0 +bert bert b be ber bert t rt ert bert LINEIN LINEINDENT NOCAPS NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 7 BLOCKIN 6 0 +in in i in in in n in in in LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 3 7 BLOCKIN 6 0 +76 76 7 76 76 76 6 76 76 76 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 7 BLOCKIN 6 0 +minutes minutes m mi min minu s es tes utes LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 7 BLOCKIN 6 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 7 BLOCKIN 6 0 +arXiv arxiv a ar arX arXi v iv Xiv rXiv LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 7 BLOCKIN 6 0 +preprint preprint p pr pre prep t nt int rint LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 7 BLOCKIN 6 0 +arXiv arxiv a ar arX arXi v iv Xiv rXiv LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 7 BLOCKIN 6 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 8 7 BLOCKIN 6 0 +1904 1904 1 19 190 1904 4 04 904 1904 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 9 7 BLOCKIN 6 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 7 BLOCKIN 6 0 +00962 00962 0 00 009 0096 2 62 962 0962 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 7 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 7 BLOCKIN 6 0 +2019 2019 2 20 201 2019 9 19 019 2019 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 7 BLOCKIN 6 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 7 BLOCKEND 6 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 7 BLOCKSTART 7 0 +48 48 4 48 48 48 8 48 48 48 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 7 BLOCKIN 7 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 7 BLOCKIN 7 0 +Ian ian I Ia Ian Ian n an Ian Ian LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 7 BLOCKIN 7 0 +Goodfellow goodfellow G Go Goo Good w ow low llow LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 7 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 7 BLOCKIN 7 0 +Yoshua yoshua Y Yo Yos Yosh a ua hua shua LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 7 BLOCKIN 7 0 +Bengio bengio B Be Ben Beng o io gio ngio LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 7 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 7 BLOCKIN 7 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 7 BLOCKIN 7 0 +Aaron aaron A Aa Aar Aaro n on ron aron LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 7 BLOCKIN 7 0 +Courville courville C Co Cou Cour e le lle ille LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 7 BLOCKIN 7 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 7 BLOCKIN 7 0 +Deep deep D De Dee Deep p ep eep Deep LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 7 BLOCKIN 7 0 +learning learning l le lea lear g ng ing ning LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 7 BLOCKIN 7 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 7 BLOCKIN 7 0 +2016 2016 2 20 201 2016 6 16 016 2016 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 7 BLOCKIN 7 0 +. . . . . . . . . . LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 7 BLOCKEND 7 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 5 0 +49 49 4 49 49 49 9 49 49 49 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 5 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 5 0 +James james J Ja Jam Jame s es mes ames LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 5 0 +Bergstra bergstra B Be Ber Berg a ra tra stra LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 5 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 5 0 +Yoshua yoshua Y Yo Yos Yosh a ua hua shua LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 5 0 +Bengio bengio B Be Ben Beng o io gio ngio LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 5 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 4 9 BLOCKIN 5 0 +Random random R Ra Ran Rand m om dom ndom LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 5 0 +search search s se sea sear h ch rch arch LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 5 0 +for for f fo for for r or for for LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 5 0 +hyper hyper h hy hyp hype r er per yper LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 5 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 7 9 BLOCKIN 5 0 +parameter parameter p pa par para r er ter eter LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 5 0 +optimization optimization o op opt opti n on ion tion LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 5 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 9 BLOCKIN 5 0 +Journal journal J Jo Jou Jour l al nal rnal LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 5 0 +of of o of of of f of of of LINEEND ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 5 0 +Machine machine M Ma Mac Mach e ne ine hine LINESTART LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 4 BLOCKIN 7 0 +Learning learning L Le Lea Lear g ng ing ning LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 4 BLOCKIN 7 0 +Research research R Re Res Rese h ch rch arch LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 5 4 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 4 BLOCKIN 7 0 +13 13 1 13 13 13 3 13 13 13 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 4 BLOCKIN 7 0 +( ( ( ( ( ( ( ( ( ( LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 6 4 BLOCKIN 7 0 +2 2 2 2 2 2 2 2 2 2 LINEIN LINEINDENT NOCAPS ALLDIGIT 1 0 0 0 0 0 0 0 NOPUNCT 6 4 BLOCKIN 7 0 +) ) ) ) ) ) ) ) ) ) LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 7 4 BLOCKIN 7 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 7 4 BLOCKIN 7 0 +281 281 2 28 281 281 1 81 281 281 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 4 BLOCKIN 7 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 8 4 BLOCKIN 7 0 +305 305 3 30 305 305 5 05 305 305 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 4 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 4 BLOCKIN 7 0 +2012 2012 2 20 201 2012 2 12 012 2012 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 4 BLOCKIN 7 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 4 BLOCKEND 7 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 7 0 +50 50 5 50 50 50 0 50 50 50 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 7 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 7 0 +Frank frank F Fr Fra Fran k nk ank rank LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 7 0 +Hutter hutter H Hu Hut Hutt r er ter tter LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 7 0 +Lars lars L La Lar Lars s rs ars Lars LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 7 0 +Kotthoff kotthoff K Ko Kot Kott f ff off hoff LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 7 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 7 0 +Joaquin joaquin J Jo Joa Joaq n in uin quin LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 7 0 +Vanschoren vanschoren V Va Van Vans n en ren oren LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 7 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 6 9 BLOCKIN 7 0 +Automated automated A Au Aut Auto d ed ted ated LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 7 0 +Machine machine M Ma Mac Mach e ne ine hine LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 7 0 +Learning learning L Le Lea Lear g ng ing ning LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 7 0 +: : : : : : : : : : LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 9 9 BLOCKIN 7 0 +Methods methods M Me Met Meth s ds ods hods LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 7 0 +, , , , , , , , , , LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 7 0 +Systems systems S Sy Sys Syst s ms ems tems LINESTART LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 1 4 BLOCKIN 4 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 4 BLOCKIN 4 0 +Challenges challenges C Ch Cha Chal s es ges nges LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 4 BLOCKIN 4 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 4 4 BLOCKIN 4 0 +Springer springer S Sp Spr Spri r er ger nger LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 4 BLOCKIN 4 0 +Nature nature N Na Nat Natu e re ure ture LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 4 BLOCKIN 4 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 4 BLOCKIN 4 0 +2019 2019 2 20 201 2019 9 19 019 2019 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 4 BLOCKIN 4 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 4 BLOCKEND 4 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 10 0 +51 51 5 51 51 51 1 51 51 51 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 10 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 10 0 +Lutz lutz L Lu Lut Lutz z tz utz Lutz LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 10 0 +Prechelt prechelt P Pr Pre Prec t lt elt helt LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 2 9 BLOCKIN 10 0 +Early early E Ea Ear Earl y ly rly arly LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 10 0 +stopping stopping s st sto stop g ng ing ping LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 3 9 BLOCKIN 10 0 +but but b bu but but t ut but but LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 10 0 +when when w wh whe when n en hen when LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 10 0 +? ? ? ? ? ? ? ? ? ? LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 4 9 BLOCKIN 10 0 +Neural neural N Ne Neu Neur l al ral ural LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 10 0 +Networks networks N Ne Net Netw s ks rks orks LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 10 0 +: : : : : : : : : : LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 6 9 BLOCKIN 10 0 +Tricks tricks T Tr Tri Tric s ks cks icks LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 10 0 +of of o of of of f of of of LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 10 0 +the the t th the the e he the the LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 10 0 +trade trade t tr tra trad e de ade rade LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 9 BLOCKIN 10 0 +pages pages p pa pag page s es ges ages LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 10 0 +55 55 5 55 55 55 5 55 55 55 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 10 9 BLOCKIN 10 0 +69 69 6 69 69 69 9 69 69 69 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 10 0 +1998 1998 1 19 199 1998 8 98 998 1998 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 9 BLOCKIN 10 0 +. . . . . . . . . . LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKEND 10 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 4 0 +52 52 5 52 52 52 2 52 52 52 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 4 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 4 0 +Alexander alexander A Al Ale Alex r er der nder LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 4 0 +Sergeev sergeev S Se Ser Serg v ev eev geev LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 4 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 4 0 +Mike mike M Mi Mik Mike e ke ike Mike LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 4 0 +Del del D De Del Del l el Del Del LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 4 0 +Balso balso B Ba Bal Bals o so lso also LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 4 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 9 BLOCKIN 4 0 +Horovod horovod H Ho Hor Horo d od vod ovod LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 4 0 +: : : : : : : : : : LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 6 9 BLOCKIN 4 0 +fast fast f fa fas fast t st ast fast LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 4 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 4 0 +easy easy e ea eas easy y sy asy easy LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 4 0 +distributed distributed d di dis dist d ed ted uted LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 4 0 +deep deep d de dee deep p ep eep deep LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 4 0 +learning learning l le lea lear g ng ing ning LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 4 0 +in in i in in in n in in in LINEEND ALIGNEDLEFT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 4 0 +tensorflow tensorflow t te ten tens w ow low flow LINESTART LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 5 BLOCKIN 5 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 2 5 BLOCKIN 5 0 +arXiv arxiv a ar arX arXi v iv Xiv rXiv LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 5 BLOCKIN 5 0 +preprint preprint p pr pre prep t nt int rint LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 5 BLOCKIN 5 0 +arXiv arxiv a ar arX arXi v iv Xiv rXiv LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 5 BLOCKIN 5 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 6 5 BLOCKIN 5 0 +1802 1802 1 18 180 1802 2 02 802 1802 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 7 5 BLOCKIN 5 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 5 BLOCKIN 5 0 +05799 05799 0 05 057 0579 9 99 799 5799 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 5 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 5 BLOCKIN 5 0 +2018 2018 2 20 201 2018 8 18 018 2018 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 5 BLOCKIN 5 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 5 BLOCKEND 5 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 8 0 +53 53 5 53 53 53 3 53 53 53 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 8 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 8 0 +Samyam samyam S Sa Sam Samy m am yam myam LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +Rajbhandari rajbhandari R Ra Raj Rajb i ri ari dari LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 8 0 +Jeff jeff J Je Jef Jeff f ff eff Jeff LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +Rasley rasley R Ra Ras Rasl y ey ley sley LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 9 BLOCKIN 8 0 +Olatunji olatunji O Ol Ola Olat i ji nji unji LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 8 0 +Ruwase ruwase R Ru Ruw Ruwa e se ase wase LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 8 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 8 0 +Yuxiong yuxiong Y Yu Yux Yuxi g ng ong iong LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 8 0 +He he H He He He e He He He LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 8 9 BLOCKIN 8 0 +Deepspeed deepspeed D De Dee Deep d ed eed peed LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 8 0 +: : : : : : : : : : LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 9 9 BLOCKIN 8 0 +Extreme extreme E Ex Ext Extr e me eme reme LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 10 9 BLOCKIN 8 0 +scale scale s sc sca scal e le ale cale LINEEND ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +model model m mo mod mode l el del odel LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 0 6 BLOCKIN 5 0 +training training t tr tra trai g ng ing ning LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 2 6 BLOCKIN 5 0 +for for f fo for for r or for for LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 6 BLOCKIN 5 0 +everyone everyone e ev eve ever e ne one yone LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 6 BLOCKIN 5 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 4 6 BLOCKIN 5 0 +arXiv arxiv a ar arX arXi v iv Xiv rXiv LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 6 BLOCKIN 5 0 +preprint preprint p pr pre prep t nt int rint LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 6 BLOCKIN 5 0 +arXiv arxiv a ar arX arXi v iv Xiv rXiv LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 6 BLOCKIN 5 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 8 6 BLOCKIN 5 0 +2007 2007 2 20 200 2007 7 07 007 2007 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 8 6 BLOCKIN 5 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 8 6 BLOCKIN 5 0 +04822 04822 0 04 048 0482 2 22 822 4822 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 6 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 6 BLOCKIN 5 0 +2020 2020 2 20 202 2020 0 20 020 2020 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 6 BLOCKIN 5 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 6 BLOCKEND 5 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 8 0 +54 54 5 54 54 54 4 54 54 54 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 8 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 8 0 +Paulius paulius P Pa Pau Paul s us ius lius LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +Micikevicius micikevicius M Mi Mic Mici s us ius cius LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 8 0 +Sharan sharan S Sh Sha Shar n an ran aran LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +Narang narang N Na Nar Nara g ng ang rang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 9 BLOCKIN 8 0 +Jonah jonah J Jo Jon Jona h ah nah onah LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 8 0 +Alben alben A Al Alb Albe n en ben lben LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 8 0 +Gregory gregory G Gr Gre Greg y ry ory gory LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 8 0 +Diamos diamos D Di Dia Diam s os mos amos LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 9 BLOCKIN 8 0 +Erich erich E Er Eri Eric h ch ich rich LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +Elsen elsen E El Els Else n en sen lsen LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 9 BLOCKIN 8 0 +David david D Da Dav Davi d id vid avid LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +Garcia garcia G Ga Gar Garc a ia cia rcia LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +, , , , , , , , , , LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 8 0 +Boris boris B Bo Bor Bori s is ris oris LINESTART LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 5 0 +Ginsburg ginsburg G Gi Gin Gins g rg urg burg LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 8 BLOCKIN 5 0 +Michael michael M Mi Mic Mich l el ael hael LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 5 0 +Houston houston H Ho Hou Hous n on ton ston LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 8 BLOCKIN 5 0 +Oleksii oleksii O Ol Ole Olek i ii sii ksii LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 5 0 +Kuchaiev kuchaiev K Ku Kuc Kuch v ev iev aiev LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 8 BLOCKIN 5 0 +Ganesh ganesh G Ga Gan Gane h sh esh nesh LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 5 0 +Venkatesh venkatesh V Ve Ven Venk h sh esh tesh LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 8 BLOCKIN 5 0 +et et e et et et t et et et LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 5 0 +al al a al al al l al al al LINEIN LINEINDENT NOCAPS NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 5 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 8 BLOCKIN 5 0 +Mixed mixed M Mi Mix Mixe d ed xed ixed LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 5 0 +precision precision p pr pre prec n on ion sion LINEEND LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 5 0 +training training t tr tra trai g ng ing ning LINESTART LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 1 4 BLOCKIN 5 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 1 4 BLOCKIN 5 0 +arXiv arxiv a ar arX arXi v iv Xiv rXiv LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 4 BLOCKIN 5 0 +preprint preprint p pr pre prep t nt int rint LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 4 BLOCKIN 5 0 +arXiv arxiv a ar arX arXi v iv Xiv rXiv LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 4 BLOCKIN 5 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 6 4 BLOCKIN 5 0 +1710 1710 1 17 171 1710 0 10 710 1710 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 7 4 BLOCKIN 5 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 4 BLOCKIN 5 0 +03740 03740 0 03 037 0374 0 40 740 3740 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 4 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 4 BLOCKIN 5 0 +2018 2018 2 20 201 2018 8 18 018 2018 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 4 BLOCKIN 5 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 4 BLOCKEND 5 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 9 0 +55 55 5 55 55 55 5 55 55 55 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 9 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 9 0 +Karan karan K Ka Kar Kara n an ran aran LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 9 0 +Singhal singhal S Si Sin Sing l al hal ghal LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 9 0 +Tao tao T Ta Tao Tao o ao Tao Tao LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 9 0 +Tu tu T Tu Tu Tu u Tu Tu Tu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 9 0 +Juraj juraj J Ju Jur Jura j aj raj uraj LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 9 0 +Gottweis gottweis G Go Got Gott s is eis weis LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 9 0 +Rory rory R Ro Ror Rory y ry ory Rory LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 9 0 +Sayres sayres S Sa Say Sayr s es res yres LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 9 0 +Ellery ellery E El Ell Elle y ry ery lery LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 9 0 +Wulczyn wulczyn W Wu Wul Wulc n yn zyn czyn LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 9 BLOCKIN 9 0 +Le le L Le Le Le e Le Le Le LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 9 0 +Hou hou H Ho Hou Hou u ou Hou Hou LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 9 BLOCKIN 9 0 +Kevin kevin K Ke Kev Kevi n in vin evin LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +Clark clark C Cl Cla Clar k rk ark lark LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +, , , , , , , , , , LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 9 0 +Stephen stephen S St Ste Step n en hen phen LINESTART LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 6 0 +Pfohl pfohl P Pf Pfo Pfoh l hl ohl fohl LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 8 BLOCKIN 6 0 +Heather heather H He Hea Heat r er her ther LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 6 0 +Cole cole C Co Col Cole e le ole Cole LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 6 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 3 8 BLOCKIN 6 0 +Lewis lewis L Le Lew Lewi s is wis ewis LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 8 BLOCKIN 6 0 +Darlene darlene D Da Dar Darl e ne ene lene LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 6 0 +Neal neal N Ne Nea Neal l al eal Neal LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 8 BLOCKIN 6 0 +Mike mike M Mi Mik Mike e ke ike Mike LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 6 0 +Schaekermann schaekermann S Sc Sch Scha n nn ann mann LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 8 BLOCKIN 6 0 +Amy amy A Am Amy Amy y my Amy Amy LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 6 0 +Wang wang W Wa Wan Wang g ng ang Wang LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 8 BLOCKIN 6 0 +Mohamed mohamed M Mo Moh Moha d ed med amed LINEEND LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 6 0 +Amin amin A Am Ami Amin n in min Amin LINESTART LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 0 8 BLOCKIN 6 0 +Sami sami S Sa Sam Sami i mi ami Sami LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 6 0 +Lachgar lachgar L La Lac Lach r ar gar hgar LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 8 BLOCKIN 6 0 +Philip philip P Ph Phi Phil p ip lip ilip LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 6 0 +Mansfield mansfield M Ma Man Mans d ld eld ield LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 8 BLOCKIN 6 0 +Sushant sushant S Su Sus Sush t nt ant hant LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 6 0 +Prakash prakash P Pr Pra Prak h sh ash kash LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 8 BLOCKIN 6 0 +Bradley bradley B Br Bra Brad y ey ley dley LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 6 0 +Green green G Gr Gre Gree n en een reen LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 8 BLOCKIN 6 0 +Ewa ewa E Ew Ewa Ewa a wa Ewa Ewa LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 6 0 +Dominowska dominowska D Do Dom Domi a ka ska wska LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 6 0 +, , , , , , , , , , LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 8 BLOCKIN 6 0 +Blaise blaise B Bl Bla Blai e se ise aise LINESTART LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 6 0 +Aguera aguera A Ag Agu Ague a ra era uera LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 6 0 +y y y y y y y y y y LINEIN LINEINDENT NOCAPS NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 6 0 +Arcas arcas A Ar Arc Arca s as cas rcas LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 8 BLOCKIN 6 0 +Nenad nenad N Ne Nen Nena d ad nad enad LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 6 0 +Tomasev tomasev T To Tom Toma v ev sev asev LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 8 BLOCKIN 6 0 +Yun yun Y Yu Yun Yun n un Yun Yun LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 6 0 +Liu liu L Li Liu Liu u iu Liu Liu LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 8 BLOCKIN 6 0 +Renee renee R Re Ren Rene e ee nee enee LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 6 0 +Wong wong W Wo Won Wong g ng ong Wong LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 8 BLOCKIN 6 0 +Christopher christopher C Ch Chr Chri r er her pher LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 6 0 +Semturs semturs S Se Sem Semt s rs urs turs LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 8 BLOCKIN 6 0 +S s S S S S S S S S LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 6 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 8 BLOCKIN 6 0 +Sara sara S Sa Sar Sara a ra ara Sara LINEEND LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 6 0 +Mahdavi mahdavi M Ma Mah Mahd i vi avi davi LINESTART LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 0 8 BLOCKIN 7 0 +Joelle joelle J Jo Joe Joel e le lle elle LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 7 0 +Barral barral B Ba Bar Barr l al ral rral LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 8 BLOCKIN 7 0 +Dale dale D Da Dal Dale e le ale Dale LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 7 0 +Webster webster W We Web Webs r er ter ster LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 8 BLOCKIN 7 0 +Greg greg G Gr Gre Greg g eg reg Greg LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 7 0 +S s S S S S S S S S LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 7 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 8 BLOCKIN 7 0 +Corrado corrado C Co Cor Corr o do ado rado LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 8 BLOCKIN 7 0 +Yossi yossi Y Yo Yos Yoss i si ssi ossi LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 7 0 +Matias matias M Ma Mat Mati s as ias tias LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 8 BLOCKIN 7 0 +Shekoofeh shekoofeh S Sh She Shek h eh feh ofeh LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 7 0 +Azizi azizi A Az Azi Aziz i zi izi zizi LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 8 BLOCKIN 7 0 +Alan alan A Al Ala Alan n an lan Alan LINEEND LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 7 0 +Karthikesalingam karthikesalingam K Ka Kar Kart m am gam ngam LINESTART LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 3 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 8 BLOCKIN 3 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 3 0 +Vivek vivek V Vi Viv Vive k ek vek ivek LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 3 0 +Natarajan natarajan N Na Nat Nata n an jan ajan LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 3 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 4 8 BLOCKIN 3 0 +Towards towards T To Tow Towa s ds rds ards LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 3 0 +expert expert e ex exp expe t rt ert pert LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 3 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 6 8 BLOCKIN 3 0 +level level l le lev leve l el vel evel LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 3 0 +medical medical m me med medi l al cal ical LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 3 0 +question question q qu que ques n on ion tion LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 3 0 +answering answering a an ans answ g ng ing ring LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 3 0 +with with w wi wit with h th ith with LINEEND LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 3 0 +large large l la lar larg e ge rge arge LINESTART LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 1 2 BLOCKIN 2 0 +language language l la lan lang e ge age uage LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 2 BLOCKIN 2 0 +models models m mo mod mode s ls els dels LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 2 BLOCKIN 2 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 2 BLOCKIN 2 0 +2023 2023 2 20 202 2023 3 23 023 2023 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 2 BLOCKIN 2 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 2 BLOCKEND 2 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 8 0 +56 56 5 56 56 56 6 56 56 56 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 8 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 8 0 +Hongyang hongyang H Ho Hon Hong g ng ang yang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +Yang yang Y Ya Yan Yang g ng ang Yang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 8 0 +Xiao xiao X Xi Xia Xiao o ao iao Xiao LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 8 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 2 9 BLOCKIN 8 0 +Yang yang Y Ya Yan Yang g ng ang Yang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +Liu liu L Li Liu Liu u iu Liu Liu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 8 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 8 0 +Christina christina C Ch Chr Chri a na ina tina LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 8 0 +Dan dan D Da Dan Dan n an Dan Dan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 8 0 +Wang wang W Wa Wan Wang g ng ang Wang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 8 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 6 9 BLOCKIN 8 0 +Fingpt fingpt F Fi Fin Fing t pt gpt ngpt LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 8 0 +: : : : : : : : : : LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 7 9 BLOCKIN 8 0 +Open open O Op Ope Open n en pen Open LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 8 9 BLOCKIN 8 0 +source source s so sou sour e ce rce urce LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 8 0 +financial financial f fi fin fina l al ial cial LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +large large l la lar larg e ge rge arge LINEEND ALIGNEDLEFT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +language language l la lan lang e ge age uage LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 2 BLOCKIN 2 0 +models models m mo mod mode s ls els dels LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 2 BLOCKIN 2 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 2 BLOCKIN 2 0 +2023 2023 2 20 202 2023 3 23 023 2023 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 9 2 BLOCKIN 2 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 2 BLOCKEND 2 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 10 0 +57 57 5 57 57 57 7 57 57 57 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 10 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 10 0 +Zhi zhi Z Zh Zhi Zhi i hi Zhi Zhi LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 10 0 +Zhou zhou Z Zh Zho Zhou u ou hou Zhou LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 10 0 +Jiang jiang J Ji Jia Jian g ng ang iang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 2 9 BLOCKIN 10 0 +Xin xin X Xi Xin Xin n in Xin Xin LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 10 0 +Shi shi S Sh Shi Shi i hi Shi Shi LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 10 0 +Peng peng P Pe Pen Peng g ng eng Peng LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 4 9 BLOCKIN 10 0 +Xiao xiao X Xi Xia Xiao o ao iao Xiao LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 10 0 +Song song S So Son Song g ng ong Song LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 10 0 +Xiao xiao X Xi Xia Xiao o ao iao Xiao LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 6 9 BLOCKIN 10 0 +Wen wen W We Wen Wen n en Wen Wen LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 10 0 +Yang yang Y Ya Yan Yang g ng ang Yang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 9 BLOCKIN 10 0 +Yi yi Y Yi Yi Yi i Yi Yi Yi LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 7 9 BLOCKIN 10 0 +Xuan xuan X Xu Xua Xuan n an uan Xuan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 10 0 +Jin jin J Ji Jin Jin n in Jin Jin LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 9 BLOCKIN 10 0 +Lan lan L La Lan Lan n an Lan Lan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 9 9 BLOCKIN 10 0 +Zhe zhe Z Zh Zhe Zhe e he Zhe Zhe LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 10 0 +Guo guo G Gu Guo Guo o uo Guo Guo LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 10 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 10 0 +Yu yu Y Yu Yu Yu u Yu Yu Yu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 10 0 +- - - - - - - - - - LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 10 9 BLOCKIN 10 0 +Feng feng F Fe Fen Feng g ng eng Feng LINESTART LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 7 BLOCKIN 5 0 +Li li L Li Li Li i Li Li Li LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 7 BLOCKIN 5 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 1 7 BLOCKIN 5 0 +Lawgpt lawgpt L La Law Lawg t pt gpt wgpt LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 7 BLOCKIN 5 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 2 7 BLOCKIN 5 0 +A a A A A A A A A A LINEIN LINEINDENT ALLCAP NODIGIT 1 0 1 0 0 0 0 0 NOPUNCT 2 7 BLOCKIN 5 0 +chinese chinese c ch chi chin e se ese nese LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 7 BLOCKIN 5 0 +legal legal l le leg lega l al gal egal LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 7 BLOCKIN 5 0 +knowledge knowledge k kn kno know e ge dge edge LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 7 BLOCKIN 5 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 6 7 BLOCKIN 5 0 +enhanced enhanced e en enh enha d ed ced nced LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 7 BLOCKIN 5 0 +large large l la lar larg e ge rge arge LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 7 7 BLOCKIN 5 0 +language language l la lan lang e ge age uage LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 7 BLOCKIN 5 0 +model model m mo mod mode l el del odel LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 7 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 7 BLOCKIN 5 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 7 BLOCKIN 5 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 7 BLOCKEND 5 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 10 0 +58 58 5 58 58 58 8 58 58 58 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 10 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 10 0 +Linqing linqing L Li Lin Linq g ng ing qing LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 10 0 +Chen chen C Ch Che Chen n en hen Chen LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 10 0 +Weilei weilei W We Wei Weil i ei lei ilei LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 10 0 +Wang wang W Wa Wan Wang g ng ang Wang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 10 0 +Zilong zilong Z Zi Zil Zilo g ng ong long LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 10 0 +Bai bai B Ba Bai Bai i ai Bai Bai LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 10 0 +Peng peng P Pe Pen Peng g ng eng Peng LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 10 0 +Xu xu X Xu Xu Xu u Xu Xu Xu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 10 0 +Yan yan Y Ya Yan Yan n an Yan Yan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 10 0 +Fang fang F Fa Fan Fang g ng ang Fang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 9 BLOCKIN 10 0 +Jie jie J Ji Jie Jie e ie Jie Jie LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 10 0 +Fang fang F Fa Fan Fang g ng ang Fang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 9 BLOCKIN 10 0 +Wentao wentao W We Wen Went o ao tao ntao LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 10 0 +Wu wu W Wu Wu Wu u Wu Wu Wu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 10 0 +Lizhi lizhi L Li Liz Lizh i hi zhi izhi LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 10 0 +Zhou zhou Z Zh Zho Zhou u ou hou Zhou LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 10 0 +, , , , , , , , , , LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 10 0 +Ruiji ruiji R Ru Rui Ruij i ji iji uiji LINESTART LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 8 0 +Zhang zhang Z Zh Zha Zhan g ng ang hang LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 8 BLOCKIN 8 0 +Yubin yubin Y Yu Yub Yubi n in bin ubin LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 8 0 +Xia xia X Xi Xia Xia a ia Xia Xia LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 8 BLOCKIN 8 0 +Chaobo chaobo C Ch Cha Chao o bo obo aobo LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 8 0 +Xu xu X Xu Xu Xu u Xu Xu Xu LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 8 BLOCKIN 8 0 +Ran ran R Ra Ran Ran n an Ran Ran LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 8 0 +Hu hu H Hu Hu Hu u Hu Hu Hu LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 8 BLOCKIN 8 0 +Licong licong L Li Lic Lico g ng ong cong LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 8 0 +Xu xu X Xu Xu Xu u Xu Xu Xu LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 8 BLOCKIN 8 0 +Qijun qijun Q Qi Qij Qiju n un jun ijun LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 8 0 +Cai cai C Ca Cai Cai i ai Cai Cai LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 8 BLOCKIN 8 0 +Haoran haoran H Ha Hao Haor n an ran oran LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 8 0 +Hua hua H Hu Hua Hua a ua Hua Hua LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 8 BLOCKIN 8 0 +Jing jing J Ji Jin Jing g ng ing Jing LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 8 0 +Sun sun S Su Sun Sun n un Sun Sun LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 8 BLOCKIN 8 0 +Jin jin J Ji Jin Jin n in Jin Jin LINEEND LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 8 0 +Liu liu L Li Liu Liu u iu Liu Liu LINESTART LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 9 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 0 8 BLOCKIN 9 0 +Tian tian T Ti Tia Tian n an ian Tian LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 9 0 +Qiu qiu Q Qi Qiu Qiu u iu Qiu Qiu LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 9 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 8 BLOCKIN 9 0 +Haowen haowen H Ha Hao Haow n en wen owen LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 9 0 +Liu liu L Li Liu Liu u iu Liu Liu LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 9 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 8 BLOCKIN 9 0 +Meng meng M Me Men Meng g ng eng Meng LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 9 0 +Hu hu H Hu Hu Hu u Hu Hu Hu LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 9 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 8 BLOCKIN 9 0 +Xiuwen xiuwen X Xi Xiu Xiuw n en wen uwen LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 9 0 +Li li L Li Li Li i Li Li Li LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 9 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 8 BLOCKIN 9 0 +Fei fei F Fe Fei Fei i ei Fei Fei LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 9 0 +Gao gao G Ga Gao Gao o ao Gao Gao LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 9 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 8 BLOCKIN 9 0 +Yufu yufu Y Yu Yuf Yufu u fu ufu Yufu LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 9 0 +Wang wang W Wa Wan Wang g ng ang Wang LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 9 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 8 BLOCKIN 9 0 +Lin lin L Li Lin Lin n in Lin Lin LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 9 0 +Tie tie T Ti Tie Tie e ie Tie Tie LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 9 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 8 BLOCKIN 9 0 +Chaochao chaochao C Ch Cha Chao o ao hao chao LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 9 0 +Wang wang W Wa Wan Wang g ng ang Wang LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 9 0 +, , , , , , , , , , LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 8 BLOCKIN 9 0 +Jianping jianping J Ji Jia Jian g ng ing ping LINESTART LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 8 0 +Lu lu L Lu Lu Lu u Lu Lu Lu LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 8 0 +Cheng cheng C Ch Che Chen g ng eng heng LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 8 0 +Sun sun S Su Sun Sun n un Sun Sun LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 8 0 +Yixin yixin Y Yi Yix Yixi n in xin ixin LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +Wang wang W Wa Wan Wang g ng ang Wang LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 9 BLOCKIN 8 0 +Shengjie shengjie S Sh She Shen e ie jie gjie LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 8 0 +Yang yang Y Ya Yan Yang g ng ang Yang LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 8 0 +Yuancheng yuancheng Y Yu Yua Yuan g ng eng heng LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 8 0 +Li li L Li Li Li i Li Li Li LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 9 BLOCKIN 8 0 +Lu lu L Lu Lu Lu u Lu Lu Lu LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +Jin jin J Ji Jin Jin n in Jin Jin LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 9 BLOCKIN 8 0 +Lisha lisha L Li Lis Lish a ha sha isha LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 8 0 +Zhang zhang Z Zh Zha Zhan g ng ang hang LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 8 0 +Fu fu F Fu Fu Fu u Fu Fu Fu LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +Bian bian B Bi Bia Bian n an ian Bian LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +, , , , , , , , , , LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 8 0 +Zhongkai zhongkai Z Zh Zho Zhon i ai kai gkai LINESTART LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 5 0 +Ye ye Y Ye Ye Ye e Ye Ye Ye LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 8 BLOCKIN 5 0 +Lidong lidong L Li Lid Lido g ng ong dong LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 5 0 +Pei pei P Pe Pei Pei i ei Pei Pei LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 8 BLOCKIN 5 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 5 0 +Changyang changyang C Ch Cha Chan g ng ang yang LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 5 0 +Tu tu T Tu Tu Tu u Tu Tu Tu LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 5 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 8 BLOCKIN 5 0 +Pharmagpt pharmagpt P Ph Pha Phar t pt gpt agpt LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 5 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 6 8 BLOCKIN 5 0 +Domain domain D Do Dom Doma n in ain main LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 5 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 7 8 BLOCKIN 5 0 +specific specific s sp spe spec c ic fic ific LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 5 0 +large large l la lar larg e ge rge arge LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 5 0 +language language l la lan lang e ge age uage LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 5 0 +models models m mo mod mode s ls els dels LINEEND LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 5 0 +for for f fo for for r or for for LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 0 4 BLOCKIN 3 0 +bio bio b bi bio bio o io bio bio LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 4 BLOCKIN 3 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 2 4 BLOCKIN 3 0 +pharmaceutical pharmaceutical p ph pha phar l al cal ical LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 4 BLOCKIN 3 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 4 BLOCKIN 3 0 +chemistry chemistry c ch che chem y ry try stry LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 4 BLOCKIN 3 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 4 BLOCKIN 3 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 4 BLOCKIN 3 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 4 BLOCKEND 3 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 9 0 +59 59 5 59 59 59 9 59 59 59 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 9 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 9 0 +Writer writer W Wr Wri Writ r er ter iter LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 9 0 +Engineering engineering E En Eng Engi g ng ing ring LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 9 0 +team team t te tea team m am eam team LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 9 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 3 9 BLOCKIN 9 0 +Palmyra palmyra P Pa Pal Palm a ra yra myra LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 9 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 4 9 BLOCKIN 9 0 +Fin fin F Fi Fin Fin n in Fin Fin LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 9 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 4 9 BLOCKIN 9 0 +70B 70b 7 70 70B 70B B 0B 70B 70B LINEIN ALIGNEDLEFT ALLCAP CONTAINSDIGITS 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 9 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 5 9 BLOCKIN 9 0 +32k 32k 3 32 32k 32k k 2k 32k 32k LINEIN ALIGNEDLEFT NOCAPS CONTAINSDIGITS 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 9 0 +: : : : : : : : : : LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 5 9 BLOCKIN 9 0 +a a a a a a a a a a LINEIN ALIGNEDLEFT NOCAPS NODIGIT 1 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 9 0 +powerful powerful p po pow powe l ul ful rful LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 9 0 +LLM llm L LL LLM LLM M LM LLM LLM LINEIN ALIGNEDLEFT ALLCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 9 0 +designed designed d de des desi d ed ned gned LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 9 0 +for for f fo for for r or for for LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 9 0 +Finance finance F Fi Fin Fina e ce nce ance LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 9 0 +https https h ht htt http s ps tps ttps LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 1 NOPUNCT 10 9 BLOCKIN 9 0 +: : : : : : : : : : LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 10 9 BLOCKIN 9 0 +/ / / / / / / / / / LINESTART LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 0 2 BLOCKIN 6 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 0 2 BLOCKIN 6 0 +dev dev d de dev dev v ev dev dev LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 2 BLOCKIN 6 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 2 2 BLOCKIN 6 0 +writer writer w wr wri writ r er ter iter LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 2 BLOCKIN 6 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 2 BLOCKIN 6 0 +com com c co com com m om com com LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 2 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 2 BLOCKIN 6 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 9 2 BLOCKIN 6 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 2 BLOCKEND 6 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 9 0 +60 60 6 60 60 60 0 60 60 60 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 9 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 9 0 +Zeyu zeyu Z Ze Zey Zeyu u yu eyu Zeyu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 9 0 +Han han H Ha Han Han n an Han Han LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 9 0 +Chao chao C Ch Cha Chao o ao hao Chao LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 9 0 +Gao gao G Ga Gao Gao o ao Gao Gao LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 9 0 +Jinyang jinyang J Ji Jin Jiny g ng ang yang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 9 0 +Liu liu L Li Liu Liu u iu Liu Liu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 9 BLOCKIN 9 0 +Jeff jeff J Je Jef Jeff f ff eff Jeff LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 9 0 +Zhang zhang Z Zh Zha Zhan g ng ang hang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 9 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 9 0 +Sai sai S Sa Sai Sai i ai Sai Sai LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 9 0 +Qian qian Q Qi Qia Qian n an ian Qian LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 9 0 +Zhang zhang Z Zh Zha Zhan g ng ang hang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 9 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 9 BLOCKIN 9 0 +Parameter parameter P Pa Par Para r er ter eter LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 9 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 9 9 BLOCKIN 9 0 +efficient efficient e ef eff effi t nt ent ient LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 9 0 +fine fine f fi fin fine e ne ine fine LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 10 9 BLOCKIN 9 0 +tuning tuning t tu tun tuni g ng ing ning LINEEND ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +for for f fo for for r or for for LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 0 4 BLOCKIN 3 0 +large large l la lar larg e ge rge arge LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 2 4 BLOCKIN 3 0 +models models m mo mod mode s ls els dels LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 4 BLOCKIN 3 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 3 4 BLOCKIN 3 0 +A a A A A A A A A A LINEIN LINEINDENT ALLCAP NODIGIT 1 0 1 0 0 0 0 0 NOPUNCT 4 4 BLOCKIN 3 0 +comprehensive comprehensive c co com comp e ve ive sive LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 4 BLOCKIN 3 0 +survey survey s su sur surv y ey vey rvey LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 4 BLOCKIN 3 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 4 BLOCKIN 3 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 4 BLOCKIN 3 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 4 BLOCKEND 3 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 8 0 +61 61 6 61 61 61 1 61 61 61 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 8 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 8 0 +Lin lin L Li Lin Lin n in Lin Lin LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 8 0 +Tian tian T Ti Tia Tian n an ian Tian LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 8 0 +Xiuzhen xiuzhen X Xi Xiu Xiuz n en hen zhen LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 8 0 +Zhang zhang Z Zh Zha Zhan g ng ang hang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 8 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +Jey jey J Je Jey Jey y ey Jey Jey LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 8 0 +Han han H Ha Han Han n an Han Han LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 8 0 +Lau lau L La Lau Lau u au Lau Lau LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 8 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 9 BLOCKIN 8 0 +Metatroll metatroll M Me Met Meta l ll oll roll LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 8 0 +: : : : : : : : : : LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 6 9 BLOCKIN 8 0 +Few few F Fe Few Few w ew Few Few LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 8 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 7 9 BLOCKIN 8 0 +shot shot s sh sho shot t ot hot shot LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 8 0 +detection detection d de det dete n on ion tion LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +of of o of of of f of of of LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 8 0 +state state s st sta stat e te ate tate LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 10 9 BLOCKIN 8 0 +sponsored sponsored s sp spo spon d ed red ored LINEEND ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +trolls trolls t tr tro trol s ls lls olls LINESTART LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 4 0 +with with w wi wit with h th ith with LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 4 0 +transformer transformer t tr tra tran r er mer rmer LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 4 0 +adapters adapters a ad ada adap s rs ers ters LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 4 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 3 8 BLOCKIN 4 0 +In in I In In In n In In In LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 4 0 +Proceedings proceedings P Pr Pro Proc s gs ngs ings LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 4 0 +of of o of of of f of of of LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 4 0 +the the t th the the e he the the LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 4 0 +ACM acm A AC ACM ACM M CM ACM ACM LINEIN LINEINDENT ALLCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 4 0 +Web web W We Web Web b eb Web Web LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 4 0 +Conference conference C Co Con Conf e ce nce ence LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 4 0 +2023 2023 2 20 202 2023 3 23 023 2023 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 9 8 BLOCKIN 4 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 8 BLOCKIN 4 0 +WWW www W WW WWW WWW W WW WWW WWW LINEIN LINEINDENT ALLCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 4 0 +' ' ' ' ' ' ' ' ' ' LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 QUOTE 10 8 BLOCKIN 4 0 +23 23 2 23 23 23 3 23 23 23 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 4 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 8 BLOCKIN 4 0 +ACM acm A AC ACM ACM M CM ACM ACM LINESTART LINEINDENT ALLCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 1 BLOCKIN 2 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 1 BLOCKIN 2 0 +April april A Ap Apr Apri l il ril pril LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 1 0 NOPUNCT 6 1 BLOCKIN 2 0 +2023 2023 2 20 202 2023 3 23 023 2023 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 9 1 BLOCKIN 2 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 1 BLOCKEND 2 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 10 0 +62 62 6 62 62 62 2 62 62 62 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 10 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 10 0 +Edward edward E Ed Edw Edwa d rd ard ward LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 10 0 +J j J J J J J J J J LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 1 9 BLOCKIN 10 0 +Hu hu H Hu Hu Hu u Hu Hu Hu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 10 0 +Yelong yelong Y Ye Yel Yelo g ng ong long LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 10 0 +Shen shen S Sh She Shen n en hen Shen LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 10 0 +Phillip phillip P Ph Phi Phil p ip lip llip LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 10 0 +Wallis wallis W Wa Wal Wall s is lis llis LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 10 0 +Zeyuan zeyuan Z Ze Zey Zeyu n an uan yuan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 10 0 +Allen allen A Al All Alle n en len llen LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 7 9 BLOCKIN 10 0 +Zhu zhu Z Zh Zhu Zhu u hu Zhu Zhu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 9 BLOCKIN 10 0 +Yuanzhi yuanzhi Y Yu Yua Yuan i hi zhi nzhi LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 10 0 +Li li L Li Li Li i Li Li Li LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 9 BLOCKIN 10 0 +Shean shean S Sh She Shea n an ean hean LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 10 0 +Wang wang W Wa Wan Wang g ng ang Wang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 10 0 +Lu lu L Lu Lu Lu u Lu Lu Lu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 10 0 +Wang wang W Wa Wan Wang g ng ang Wang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 10 0 +, , , , , , , , , , LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 10 0 +and and a an and and d nd and and LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 0 7 BLOCKIN 5 0 +Weizhu weizhu W We Wei Weiz u hu zhu izhu LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 7 BLOCKIN 5 0 +Chen chen C Ch Che Chen n en hen Chen LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 7 BLOCKIN 5 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 2 7 BLOCKIN 5 0 +Lora lora L Lo Lor Lora a ra ora Lora LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 7 BLOCKIN 5 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 3 7 BLOCKIN 5 0 +Low low L Lo Low Low w ow Low Low LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 3 7 BLOCKIN 5 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 4 7 BLOCKIN 5 0 +rank rank r ra ran rank k nk ank rank LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 7 BLOCKIN 5 0 +adaptation adaptation a ad ada adap n on ion tion LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 7 BLOCKIN 5 0 +of of o of of of f of of of LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 7 BLOCKIN 5 0 +large large l la lar larg e ge rge arge LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 7 7 BLOCKIN 5 0 +language language l la lan lang e ge age uage LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 7 BLOCKIN 5 0 +models models m mo mod mode s ls els dels LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 7 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 7 BLOCKIN 5 0 +2021 2021 2 20 202 2021 1 21 021 2021 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 7 BLOCKIN 5 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 7 BLOCKEND 5 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 2 BLOCKSTART 3 0 +63 63 6 63 63 63 3 63 63 63 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 2 BLOCKIN 3 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 1 2 BLOCKIN 3 0 +PhD phd P Ph PhD PhD D hD PhD PhD LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 2 BLOCKIN 3 0 +Sebastian sebastian S Se Seb Seba n an ian tian LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 2 BLOCKIN 3 0 +Raschka raschka R Ra Ras Rasc a ka hka chka LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 2 BLOCKIN 3 0 +. . . . . . . . . . LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 2 BLOCKIN 3 0 +Practical practical P Pr Pra Prac l al cal ical LINESTART LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 5 BLOCKIN 2 0 +Tips tips T Ti Tip Tips s ps ips Tips LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 5 BLOCKIN 2 0 +for for f fo for for r or for for LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 5 BLOCKIN 2 0 +Finetuning finetuning F Fi Fin Fine g ng ing ning LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 5 BLOCKIN 2 0 +LLMs llms L LL LLM LLMs s Ms LMs LLMs LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 5 BLOCKIN 2 0 +Using using U Us Usi Usin g ng ing sing LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 5 BLOCKIN 2 0 +LoRA lora L Lo LoR LoRA A RA oRA LoRA LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 5 BLOCKIN 2 0 +( ( ( ( ( ( ( ( ( ( LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 9 5 BLOCKIN 2 0 +Low low L Lo Low Low w ow Low Low LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 10 5 BLOCKIN 2 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 10 5 BLOCKIN 2 0 +Rank rank R Ra Ran Rank k nk ank Rank LINEEND LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 5 BLOCKIN 2 0 +Adaptation adaptation A Ad Ada Adap n on ion tion LINESTART ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 10 0 +) ) ) ) ) ) ) ) ) ) LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 1 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 1 8 BLOCKIN 10 0 +magazine magazine m ma mag maga e ne ine zine LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 2 8 BLOCKIN 10 0 +sebastianraschka sebastianraschka s se seb seba a ka hka chka LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 4 8 BLOCKIN 10 0 +com com c co com com m om com com LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 8 BLOCKIN 10 0 +https https h ht htt http s ps tps ttps LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 1 NOPUNCT 5 8 BLOCKIN 10 0 +: : : : : : : : : : LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 6 8 BLOCKIN 10 0 +/ / / / / / / / / / LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 10 0 +/ / / / / / / / / / LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 10 0 +magazine magazine m ma mag maga e ne ine zine LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 8 BLOCKIN 10 0 +sebastianraschka sebastianraschka s se seb seba a ka hka chka LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 8 BLOCKIN 10 0 +com com c co com com m om com com LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 10 0 +/ / / / / / / / / / LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 10 0 +p p p p p p p p p p LINEIN ALIGNEDLEFT NOCAPS NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 10 0 +/ / / / / / / / / / LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 10 0 +practical practical p pr pra prac l al cal ical LINESTART ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 5 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 1 5 BLOCKIN 10 0 +tips tips t ti tip tips s ps ips tips LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 5 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 2 5 BLOCKIN 10 0 +for for f fo for for r or for for LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 5 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 3 5 BLOCKIN 10 0 +finetuning finetuning f fi fin fine g ng ing ning LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 5 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 5 5 BLOCKIN 10 0 +llms llms l ll llm llms s ms lms llms LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 5 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 5 BLOCKIN 10 0 +[ [ [ [ [ [ [ [ [ [ LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 6 5 BLOCKIN 10 0 +Accessed accessed A Ac Acc Acce d ed sed ssed LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 5 BLOCKIN 10 0 +01 01 0 01 01 01 1 01 01 01 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 5 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 8 5 BLOCKIN 10 0 +08 08 0 08 08 08 8 08 08 08 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 5 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 9 5 BLOCKIN 10 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 9 5 BLOCKIN 10 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 9 5 BLOCKIN 10 0 +. . . . . . . . . . LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 5 BLOCKEND 10 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 7 0 +64 64 6 64 64 64 4 64 64 64 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 7 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 7 0 +Tim tim T Ti Tim Tim m im Tim Tim LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 7 0 +Dettmers dettmers D De Det Dett s rs ers mers LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 7 0 +Artidoro artidoro A Ar Art Arti o ro oro doro LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 7 0 +Pagnoni pagnoni P Pa Pag Pagn i ni oni noni LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 7 0 +Ari ari A Ar Ari Ari i ri Ari Ari LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 7 0 +Holtzman holtzman H Ho Hol Holt n an man zman LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 7 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 7 0 +Luke luke L Lu Luk Luke e ke uke Luke LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 7 0 +Zettlemoyer zettlemoyer Z Ze Zet Zett r er yer oyer LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 7 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 9 BLOCKIN 7 0 +Qlora qlora Q Ql Qlo Qlor a ra ora lora LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 7 0 +: : : : : : : : : : LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 8 9 BLOCKIN 7 0 +Efficient efficient E Ef Eff Effi t nt ent ient LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 7 0 +finetuning finetuning f fi fin fine g ng ing ning LINEEND ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 7 0 +of of o of of of f of of of LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 0 2 BLOCKIN 2 0 +quantized quantized q qu qua quan d ed zed ized LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 2 BLOCKIN 2 0 +llms llms l ll llm llms s ms lms llms LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 2 BLOCKIN 2 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 2 BLOCKIN 2 0 +2023 2023 2 20 202 2023 3 23 023 2023 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 2 BLOCKIN 2 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 2 BLOCKEND 2 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 8 BLOCKSTART 10 0 +65 65 6 65 65 65 5 65 65 65 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 10 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 8 BLOCKIN 10 0 +What what W Wh Wha What t at hat What LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 10 0 +is is i is is is s is is is LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 10 0 +QLoRa qlora Q QL QLo QLoR a Ra oRa LoRa LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 10 0 +? ? ? ? ? ? ? ? ? ? LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 2 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 2 8 BLOCKIN 10 0 +Analytics analytics A An Ana Anal s cs ics tics LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 10 0 +Vidhya vidhya V Vi Vid Vidh a ya hya dhya LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 5 8 BLOCKIN 10 0 +community community c co com comm y ty ity nity LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 6 8 BLOCKIN 10 0 +analyticsvidhya analyticsvidhya a an ana anal a ya hya dhya LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 8 BLOCKIN 10 0 +com com c co com com m om com com LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 8 8 BLOCKIN 10 0 +https https h ht htt http s ps tps ttps LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 1 NOPUNCT 9 8 BLOCKIN 10 0 +: : : : : : : : : : LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 9 8 BLOCKIN 10 0 +/ / / / / / / / / / LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 10 0 +/ / / / / / / / / / LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 10 0 +community community c co com comm y ty ity nity LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 10 0 +. . . . . . . . . . LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 8 BLOCKIN 10 0 +analyticsvidhya analyticsvidhya a an ana anal a ya hya dhya LINESTART LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 1 8 BLOCKIN 10 0 +com com c co com com m om com com LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 10 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 10 0 +c c c c c c c c c c LINEIN LINEINDENT NOCAPS NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 10 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 10 0 +generative generative g ge gen gene e ve ive tive LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 3 8 BLOCKIN 10 0 +ai ai a ai ai ai i ai ai ai LINEIN LINEINDENT NOCAPS NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 4 8 BLOCKIN 10 0 +tech tech t te tec tech h ch ech tech LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 4 8 BLOCKIN 10 0 +discussion discussion d di dis disc n on ion sion LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 10 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 10 0 +what what w wh wha what t at hat what LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 6 8 BLOCKIN 10 0 +is is i is is is s is is is LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 7 8 BLOCKIN 10 0 +qlora qlora q ql qlo qlor a ra ora lora LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 8 BLOCKIN 10 0 +[ [ [ [ [ [ [ [ [ [ LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 8 8 BLOCKIN 10 0 +Accessed accessed A Ac Acc Acce d ed sed ssed LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 10 0 +01 01 0 01 01 01 1 01 01 01 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 9 8 BLOCKIN 10 0 +08 08 0 08 08 08 8 08 08 08 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 10 0 +- - - - - - - - - - LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 10 8 BLOCKIN 10 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINESTART LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 5 0 BLOCKIN 2 0 +] ] ] ] ] ] ] ] ] ] LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 6 0 BLOCKIN 2 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 0 BLOCKEND 2 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 8 BLOCKSTART 10 0 +66 66 6 66 66 66 6 66 66 66 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 10 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 8 BLOCKIN 10 0 +Shih shih S Sh Shi Shih h ih hih Shih LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 1 8 BLOCKIN 10 0 +Yang yang Y Ya Yan Yang g ng ang Yang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 10 0 +Liu liu L Li Liu Liu u iu Liu Liu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 8 BLOCKIN 10 0 +Chien chien C Ch Chi Chie n en ien hien LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 3 8 BLOCKIN 10 0 +Yi yi Y Yi Yi Yi i Yi Yi Yi LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 10 0 +Wang wang W Wa Wan Wang g ng ang Wang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 8 BLOCKIN 10 0 +Hongxu hongxu H Ho Hon Hong u xu gxu ngxu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 10 0 +Yin yin Y Yi Yin Yin n in Yin Yin LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 8 BLOCKIN 10 0 +Pavlo pavlo P Pa Pav Pavl o lo vlo avlo LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 10 0 +Molchanov molchanov M Mo Mol Molc v ov nov anov LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 8 BLOCKIN 10 0 +Yu yu Y Yu Yu Yu u Yu Yu Yu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 8 8 BLOCKIN 10 0 +Chiang chiang C Ch Chi Chia g ng ang iang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 10 0 +Frank frank F Fr Fra Fran k nk ank rank LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 10 0 +Wang wang W Wa Wan Wang g ng ang Wang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 8 BLOCKIN 10 0 +Kwang kwang K Kw Kwa Kwan g ng ang wang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 10 0 +- - - - - - - - - - LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 10 8 BLOCKIN 10 0 +Ting ting T Ti Tin Ting g ng ing Ting LINESTART LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 7 BLOCKIN 8 0 +Cheng cheng C Ch Che Chen g ng eng heng LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 7 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 7 BLOCKIN 8 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 7 BLOCKIN 8 0 +Min min M Mi Min Min n in Min Min LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 7 BLOCKIN 8 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 2 7 BLOCKIN 8 0 +Hung hung H Hu Hun Hung g ng ung Hung LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 3 7 BLOCKIN 8 0 +Chen chen C Ch Che Chen n en hen Chen LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 7 BLOCKIN 8 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 4 7 BLOCKIN 8 0 +Dora dora D Do Dor Dora a ra ora Dora LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 7 BLOCKIN 8 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 4 7 BLOCKIN 8 0 +Weight weight W We Wei Weig t ht ght ight LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 7 BLOCKIN 8 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 6 7 BLOCKIN 8 0 +decomposed decomposed d de dec deco d ed sed osed LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 7 BLOCKIN 8 0 +low low l lo low low w ow low low LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 7 7 BLOCKIN 8 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 7 7 BLOCKIN 8 0 +rank rank r ra ran rank k nk ank rank LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 7 BLOCKIN 8 0 +adaptation adaptation a ad ada adap n on ion tion LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 7 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 7 BLOCKIN 8 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 7 BLOCKIN 8 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 7 BLOCKEND 8 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 5 BLOCKSTART 4 0 +67 67 6 67 67 67 7 67 67 67 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 5 BLOCKIN 4 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 5 BLOCKIN 4 0 +Apple apple A Ap App Appl e le ple pple LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 1 5 BLOCKIN 4 0 +intelligence intelligence i in int inte e ce nce ence LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 5 BLOCKIN 4 0 +foundation foundation f fo fou foun n on ion tion LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 5 BLOCKIN 4 0 +language language l la lan lang e ge age uage LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 5 BLOCKIN 4 0 +models models m mo mod mode s ls els dels LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 5 BLOCKIN 4 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 5 BLOCKIN 4 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 5 BLOCKIN 4 0 +. . . . . . . . . . LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 5 BLOCKEND 4 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 8 BLOCKSTART 9 0 +68 68 6 68 68 68 8 68 68 68 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 9 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 8 BLOCKIN 9 0 +Tingfeng tingfeng T Ti Tin Ting g ng eng feng LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 9 0 +Hui hui H Hu Hui Hui i ui Hui Hui LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 8 BLOCKIN 9 0 +Zhenyu zhenyu Z Zh Zhe Zhen u yu nyu enyu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 9 0 +Zhang zhang Z Zh Zha Zhan g ng ang hang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 8 BLOCKIN 9 0 +Shuohuan shuohuan S Sh Shu Shuo n an uan huan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 9 0 +Wang wang W Wa Wan Wang g ng ang Wang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 8 BLOCKIN 9 0 +Weiran weiran W We Wei Weir n an ran iran LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 9 0 +Xu xu X Xu Xu Xu u Xu Xu Xu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 8 BLOCKIN 9 0 +Yu yu Y Yu Yu Yu u Yu Yu Yu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 9 0 +Sun sun S Su Sun Sun n un Sun Sun LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 8 BLOCKIN 9 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 9 0 +Hua hua H Hu Hua Hua a ua Hua Hua LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 9 0 +Wu wu W Wu Wu Wu u Wu Wu Wu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 9 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 8 BLOCKIN 9 0 +Hft hft H Hf Hft Hft t ft Hft Hft LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 9 0 +: : : : : : : : : : LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 10 8 BLOCKIN 9 0 +Half half H Ha Hal Half f lf alf Half LINEEND ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 9 0 +fine fine f fi fin fine e ne ine fine LINESTART LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 0 4 BLOCKIN 3 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 1 4 BLOCKIN 3 0 +tuning tuning t tu tun tuni g ng ing ning LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 4 BLOCKIN 3 0 +for for f fo for for r or for for LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 4 BLOCKIN 3 0 +large large l la lar larg e ge rge arge LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 5 4 BLOCKIN 3 0 +language language l la lan lang e ge age uage LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 4 BLOCKIN 3 0 +models models m mo mod mode s ls els dels LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 4 BLOCKIN 3 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 4 BLOCKIN 3 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 4 BLOCKIN 3 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 4 BLOCKEND 3 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 8 BLOCKSTART 8 0 +69 69 6 69 69 69 9 69 69 69 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 8 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 8 BLOCKIN 8 0 +Johnny johnny J Jo Joh John y ny nny hnny LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 8 0 +Li li L Li Li Li i Li Li Li LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 8 BLOCKIN 8 0 +Saksham saksham S Sa Sak Saks m am ham sham LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 8 0 +Consul consul C Co Con Cons l ul sul nsul LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 8 BLOCKIN 8 0 +Eda eda E Ed Eda Eda a da Eda Eda LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 8 0 +Zhou zhou Z Zh Zho Zhou u ou hou Zhou LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 8 BLOCKIN 8 0 +James james J Ja Jam Jame s es mes ames LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 8 0 +Wong wong W Wo Won Wong g ng ong Wong LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 8 BLOCKIN 8 0 +Naila naila N Na Nai Nail a la ila aila LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 8 0 +Farooqui farooqui F Fa Far Faro i ui qui oqui LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 8 BLOCKIN 8 0 +Yuxin yuxin Y Yu Yux Yuxi n in xin uxin LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 8 0 +Ye ye Y Ye Ye Ye e Ye Ye Ye LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 8 BLOCKIN 8 0 +Nithyashree nithyashree N Ni Nit Nith e ee ree hree LINEEND ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 8 0 +Manohar manohar M Ma Man Mano r ar har ohar LINESTART LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 0 8 BLOCKIN 6 0 +Zhuxiaona zhuxiaona Z Zh Zhu Zhux a na ona aona LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 6 0 +Wei wei W We Wei Wei i ei Wei Wei LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 8 BLOCKIN 6 0 +Tian tian T Ti Tia Tian n an ian Tian LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 6 0 +Wu wu W Wu Wu Wu u Wu Wu Wu LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 8 BLOCKIN 6 0 +Ben ben B Be Ben Ben n en Ben Ben LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 6 0 +Echols echols E Ec Ech Echo s ls ols hols LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 8 BLOCKIN 6 0 +Sharon sharon S Sh Sha Shar n on ron aron LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 6 0 +Zhou zhou Z Zh Zho Zhou u ou hou Zhou LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 8 BLOCKIN 6 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 6 0 +Gregory gregory G Gr Gre Greg y ry ory gory LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 6 0 +Diamos diamos D Di Dia Diam s os mos amos LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 6 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 8 BLOCKIN 6 0 +Banishing banishing B Ba Ban Bani g ng ing hing LINEEND LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 6 0 +llm llm l ll llm llm m lm llm llm LINESTART LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 5 BLOCKIN 2 0 +hallucinations hallucinations h ha hal hall s ns ons ions LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 5 BLOCKIN 2 0 +requires requires r re req requ s es res ires LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 5 BLOCKIN 2 0 +rethinking rethinking r re ret reth g ng ing king LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 5 BLOCKIN 2 0 +generalization generalization g ge gen gene n on ion tion LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 5 BLOCKIN 2 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 5 BLOCKIN 2 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 5 BLOCKIN 2 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 5 BLOCKEND 2 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 8 0 +70 70 7 70 70 70 0 70 70 70 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 8 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 8 0 +Albert albert A Al Alb Albe t rt ert bert LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +Q q Q Q Q Q Q Q Q Q LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 1 9 BLOCKIN 8 0 +Jiang jiang J Ji Jia Jian g ng ang iang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 8 0 +Alexandre alexandre A Al Ale Alex e re dre ndre LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +Sablayrolles sablayrolles S Sa Sab Sabl s es les lles LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 8 0 +Antoine antoine A An Ant Anto e ne ine oine LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 8 0 +Roux roux R Ro Rou Roux x ux oux Roux LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 8 0 +Arthur arthur A Ar Art Arth r ur hur thur LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 8 0 +Mensch mensch M Me Men Mens h ch sch nsch LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 9 BLOCKIN 8 0 +Blanche blanche B Bl Bla Blan e he che nche LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 8 0 +Savary savary S Sa Sav Sava y ry ary vary LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 8 0 +Chris chris C Ch Chr Chri s is ris hris LINEEND ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +Bamford bamford B Ba Bam Bamf d rd ord ford LINESTART LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 0 8 BLOCKIN 5 0 +Devendra devendra D De Dev Deve a ra dra ndra LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 5 0 +Singh singh S Si Sin Sing h gh ngh ingh LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 5 0 +Chaplot chaplot C Ch Cha Chap t ot lot plot LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 8 BLOCKIN 5 0 +Diego diego D Di Die Dieg o go ego iego LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 5 0 +de de d de de de e de de de LINEIN LINEINDENT NOCAPS NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 5 0 +las las l la las las s as las las LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 5 0 +Casas casas C Ca Cas Casa s as sas asas LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 8 BLOCKIN 5 0 +Emma emma E Em Emm Emma a ma mma Emma LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 5 0 +Bou bou B Bo Bou Bou u ou Bou Bou LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 5 0 +Hanna hanna H Ha Han Hann a na nna anna LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 8 BLOCKIN 5 0 +Florian florian F Fl Flo Flor n an ian rian LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 5 0 +Bressand bressand B Br Bre Bres d nd and sand LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 5 0 +, , , , , , , , , , LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 8 BLOCKIN 5 0 +Gianna gianna G Gi Gia Gian a na nna anna LINESTART LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 6 0 +Lengyel lengyel L Le Len Leng l el yel gyel LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 6 0 +Guillaume guillaume G Gu Gui Guil e me ume aume LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 6 0 +Bour bour B Bo Bou Bour r ur our Bour LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 6 0 +Guillaume guillaume G Gu Gui Guil e me ume aume LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 6 0 +Lample lample L La Lam Lamp e le ple mple LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 6 0 +Lélio lélio L Lé Lél Léli o io lio élio LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 6 0 +Renard renard R Re Ren Rena d rd ard nard LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 6 0 +Lavaud lavaud L La Lav Lava d ud aud vaud LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 9 BLOCKIN 6 0 +Lucile lucile L Lu Luc Luci e le ile cile LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 6 0 +Saulnier saulnier S Sa Sau Saul r er ier nier LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 6 0 +Marie marie M Ma Mar Mari e ie rie arie LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 6 0 +- - - - - - - - - - LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 10 9 BLOCKIN 6 0 +Anne anne A An Ann Anne e ne nne Anne LINESTART LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 5 0 +Lachaux lachaux L La Lac Lach x ux aux haux LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 8 BLOCKIN 5 0 +Pierre pierre P Pi Pie Pier e re rre erre LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 5 0 +Stock stock S St Sto Stoc k ck ock tock LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 8 BLOCKIN 5 0 +Sandeep sandeep S Sa San Sand p ep eep deep LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 5 0 +Subramanian subramanian S Su Sub Subr n an ian nian LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 8 BLOCKIN 5 0 +Sophia sophia S So Sop Soph a ia hia phia LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 5 0 +Yang yang Y Ya Yan Yang g ng ang Yang LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 8 BLOCKIN 5 0 +Szymon szymon S Sz Szy Szym n on mon ymon LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 5 0 +Antoniak antoniak A An Ant Anto k ak iak niak LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 8 BLOCKIN 5 0 +Teven teven T Te Tev Teve n en ven even LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 5 0 +Le le L Le Le Le e Le Le Le LINEEND LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 5 0 +Scao scao S Sc Sca Scao o ao cao Scao LINESTART LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 0 9 BLOCKIN 6 0 +Théophile théophile T Th Thé Théo e le ile hile LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 6 0 +Gervet gervet G Ge Ger Gerv t et vet rvet LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 6 0 +Thibaut thibaut T Th Thi Thib t ut aut baut LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 6 0 +Lavril lavril L La Lav Lavr l il ril vril LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 9 BLOCKIN 6 0 +Thomas thomas T Th Tho Thom s as mas omas LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 6 0 +Wang wang W Wa Wan Wang g ng ang Wang LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 6 0 +Timothée timothée T Ti Tim Timo e ée hée thée LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 6 0 +Lacroix lacroix L La Lac Lacr x ix oix roix LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 9 BLOCKIN 6 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 6 0 +William william W Wi Wil Will m am iam liam LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 6 0 +El el E El El El l El El El LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 6 0 +Sayed sayed S Sa Say Saye d ed yed ayed LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 6 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 6 0 +Mixtral mixtral M Mi Mix Mixt l al ral tral LINESTART LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 2 BLOCKIN 2 0 +of of o of of of f of of of LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 2 BLOCKIN 2 0 +experts experts e ex exp expe s ts rts erts LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 2 BLOCKIN 2 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 2 BLOCKIN 2 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 2 BLOCKIN 2 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 2 BLOCKEND 2 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 1 BLOCKSTART 2 0 +71 71 7 71 71 71 1 71 71 71 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 1 BLOCKIN 2 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 2 1 BLOCKIN 2 0 +Applying applying A Ap App Appl g ng ing ying LINEEND ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 1 BLOCKIN 2 0 +Mixture mixture M Mi Mix Mixt e re ure ture LINESTART LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 0 BLOCKIN no 0 +of of o of of of f of of of LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 0 BLOCKIN no 0 +Experts experts E Ex Exp Expe s ts rts erts LINESTART LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 0 BLOCKIN no 0 +in in i in in in n in in in LINESTART LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 5 0 BLOCKIN no 0 +LLM llm L LL LLM LLM M LM LLM LLM LINESTART LINEINDENT ALLCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 0 BLOCKIN no 0 +Architectures architectures A Ar Arc Arch s es res ures LINESTART LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 1 BLOCKIN no 0 +- - - - - - - - - - LINESTART LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 5 0 BLOCKIN 1 0 +NVIDIA nvidia N NV NVI NVID A IA DIA IDIA LINESTART LINEINDENT ALLCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 0 BLOCKIN no 0 +Techni techni T Te Tec Tech i ni hni chni LINESTART LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 0 BLOCKIN 1 0 +- - - - - - - - - - LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 8 0 BLOCKIN 1 0 +cal cal c ca cal cal l al cal cal LINESTART ALIGNEDLEFT NOCAPS NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 0 BLOCKIN no 0 +Blog blog B Bl Blo Blog g og log Blog LINESTART LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 0 BLOCKIN no 0 +- - - - - - - - - - LINESTART LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 5 0 BLOCKIN 1 0 +developer developer d de dev deve r er per oper LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 2 BLOCKIN 3 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 4 2 BLOCKIN 3 0 +nvidia nvidia n nv nvi nvid a ia dia idia LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 2 BLOCKIN 3 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 2 BLOCKIN 3 0 +com com c co com com m om com com LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 2 BLOCKIN 3 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 2 BLOCKIN 3 0 +https https h ht htt http s ps tps ttps LINESTART LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 1 NOPUNCT 1 3 BLOCKIN 7 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 1 3 BLOCKIN 7 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 1 3 BLOCKIN 7 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 2 3 BLOCKIN 7 0 +developer developer d de dev deve r er per oper LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 3 BLOCKIN 7 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 3 BLOCKIN 7 0 +nvidia nvidia n nv nvi nvid a ia dia idia LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 3 BLOCKIN 7 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 6 3 BLOCKIN 7 0 +com com c co com com m om com com LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 3 BLOCKIN 7 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 8 3 BLOCKIN 7 0 +blog blog b bl blo blog g og log blog LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 3 BLOCKIN 7 0 +/ / / / / / / / / / LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 9 3 BLOCKIN 7 0 +applying applying a ap app appl g ng ing ying LINESTART ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 7 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 1 7 BLOCKIN 10 0 +mixture mixture m mi mix mixt e re ure ture LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 7 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 2 7 BLOCKIN 10 0 +of of o of of of f of of of LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 7 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 2 7 BLOCKIN 10 0 +experts experts e ex exp expe s ts rts erts LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 7 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 3 7 BLOCKIN 10 0 +in in i in in in n in in in LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 4 7 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 4 7 BLOCKIN 10 0 +llm llm l ll llm llm m lm llm llm LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 7 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 4 7 BLOCKIN 10 0 +architectures architectures a ar arc arch s es res ures LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 7 BLOCKIN 10 0 +/ / / / / / / / / / LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 6 7 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 6 7 BLOCKIN 10 0 +[ [ [ [ [ [ [ [ [ [ LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 7 7 BLOCKIN 10 0 +Accessed accessed A Ac Acc Acce d ed sed ssed LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 7 BLOCKIN 10 0 +01 01 0 01 01 01 1 01 01 01 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 7 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 8 7 BLOCKIN 10 0 +08 08 0 08 08 08 8 08 08 08 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 7 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 9 7 BLOCKIN 10 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 9 7 BLOCKIN 10 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 9 7 BLOCKIN 10 0 +. . . . . . . . . . LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 7 BLOCKEND 10 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 9 0 +72 72 7 72 72 72 2 72 72 72 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 9 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 9 0 +Junlin junlin J Ju Jun Junl n in lin nlin LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 9 0 +Wang wang W Wa Wan Wang g ng ang Wang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 9 0 +Jue jue J Ju Jue Jue e ue Jue Jue LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 9 0 +Wang wang W Wa Wan Wang g ng ang Wang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 9 0 +Ben ben B Be Ben Ben n en Ben Ben LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 9 0 +Athiwaratkun athiwaratkun A At Ath Athi n un kun tkun LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 9 0 +Ce ce C Ce Ce Ce e Ce Ce Ce LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 9 0 +Zhang zhang Z Zh Zha Zhan g ng ang hang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 9 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 9 0 +James james J Ja Jam Jame s es mes ames LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 9 0 +Zou zou Z Zo Zou Zou u ou Zou Zou LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 9 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 8 9 BLOCKIN 9 0 +Mixture mixture M Mi Mix Mixt e re ure ture LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 9 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 9 9 BLOCKIN 9 0 +of of o of of of f of of of LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 9 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 9 9 BLOCKIN 9 0 +agents agents a ag age agen s ts nts ents LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +enhances enhances e en enh enha s es ces nces LINEEND ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +large large l la lar larg e ge rge arge LINESTART LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 1 4 BLOCKIN 2 0 +language language l la lan lang e ge age uage LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 4 BLOCKIN 2 0 +model model m mo mod mode l el del odel LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 4 BLOCKIN 2 0 +capabilities capabilities c ca cap capa s es ies ties LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 4 BLOCKIN 2 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 4 BLOCKIN 2 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 4 BLOCKIN 2 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 4 BLOCKEND 2 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 7 0 +73 73 7 73 73 73 3 73 73 73 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 7 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 7 0 +John john J Jo Joh John n hn ohn John LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 7 0 +Schulman schulman S Sc Sch Schu n an man lman LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 7 0 +Filip filip F Fi Fil Fili p ip lip ilip LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 7 0 +Wolski wolski W Wo Wol Wols i ki ski lski LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 7 0 +Prafulla prafulla P Pr Pra Praf a la lla ulla LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 7 0 +Dhariwal dhariwal D Dh Dha Dhar l al wal iwal LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 7 0 +Alec alec A Al Ale Alec c ec lec Alec LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 7 0 +Radford radford R Ra Rad Radf d rd ord ford LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 9 BLOCKIN 7 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 7 0 +Oleg oleg O Ol Ole Oleg g eg leg Oleg LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 7 0 +Klimov klimov K Kl Kli Klim v ov mov imov LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 7 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 9 BLOCKIN 7 0 +Proximal proximal P Pr Pro Prox l al mal imal LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 7 0 +policy policy p po pol poli y cy icy licy LINEEND ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 7 0 +optimization optimization o op opt opti n on ion tion LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 3 BLOCKIN 2 0 +algorithms algorithms a al alg algo s ms hms thms LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 3 BLOCKIN 2 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 3 BLOCKIN 2 0 +2017 2017 2 20 201 2017 7 17 017 2017 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 9 3 BLOCKIN 2 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 3 BLOCKEND 2 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 8 0 +74 74 7 74 74 74 4 74 74 74 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 8 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 8 0 +Rafael rafael R Ra Raf Rafa l el ael fael LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +Rafailov rafailov R Ra Raf Rafa v ov lov ilov LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 8 0 +Archit archit A Ar Arc Arch t it hit chit LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +Sharma sharma S Sh Sha Shar a ma rma arma LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 9 BLOCKIN 8 0 +Eric eric E Er Eri Eric c ic ric Eric LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 8 0 +Mitchell mitchell M Mi Mit Mitc l ll ell hell LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 8 0 +Stefano stefano S St Ste Stef o no ano fano LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 8 0 +Ermon ermon E Er Erm Ermo n on mon rmon LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 9 BLOCKIN 8 0 +Christopher christopher C Ch Chr Chri r er her pher LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 8 0 +D d D D D D D D D D LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 8 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 9 BLOCKIN 8 0 +Manning manning M Ma Man Mann g ng ing ning LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 8 0 +and and a an and and d nd and and LINEEND ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +Chelsea chelsea C Ch Che Chel a ea sea lsea LINESTART LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 3 0 +Finn finn F Fi Fin Finn n nn inn Finn LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 3 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 1 9 BLOCKIN 3 0 +Direct direct D Di Dir Dire t ct ect rect LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 3 0 +preference preference p pr pre pref e ce nce ence LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 3 0 +optimization optimization o op opt opti n on ion tion LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 3 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 5 9 BLOCKIN 3 0 +Your your Y Yo You Your r ur our Your LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 3 0 +language language l la lan lang e ge age uage LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 3 0 +model model m mo mod mode l el del odel LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 3 0 +is is i is is is s is is is LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 3 0 +secretly secretly s se sec secr y ly tly etly LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 3 0 +a a a a a a a a a a LINEIN LINEINDENT NOCAPS NODIGIT 1 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 3 0 +reward reward r re rew rewa d rd ard ward LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 3 0 +model model m mo mod mode l el del odel LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 3 0 +, , , , , , , , , , LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 3 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINESTART LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 5 0 BLOCKIN 1 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 0 BLOCKEND 1 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 10 0 +75 75 7 75 75 75 5 75 75 75 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 10 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 10 0 +Shusheng shusheng S Sh Shu Shus g ng eng heng LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 10 0 +Xu xu X Xu Xu Xu u Xu Xu Xu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 10 0 +Wei wei W We Wei Wei i ei Wei Wei LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 10 0 +Fu fu F Fu Fu Fu u Fu Fu Fu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 10 0 +Jiaxuan jiaxuan J Ji Jia Jiax n an uan xuan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 10 0 +Gao gao G Ga Gao Gao o ao Gao Gao LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 9 BLOCKIN 10 0 +Wenjie wenjie W We Wen Wenj e ie jie njie LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 10 0 +Ye ye Y Ye Ye Ye e Ye Ye Ye LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 10 0 +Weilin weilin W We Wei Weil n in lin ilin LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 10 0 +Liu liu L Li Liu Liu u iu Liu Liu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 9 BLOCKIN 10 0 +Zhiyu zhiyu Z Zh Zhi Zhiy u yu iyu hiyu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 10 0 +Mei mei M Me Mei Mei i ei Mei Mei LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 9 BLOCKIN 10 0 +Guangju guangju G Gu Gua Guan u ju gju ngju LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 10 0 +Wang wang W Wa Wan Wang g ng ang Wang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 10 0 +Chao chao C Ch Cha Chao o ao hao Chao LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 10 0 +Yu yu Y Yu Yu Yu u Yu Yu Yu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 10 0 +, , , , , , , , , , LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 10 0 +and and a an and and d nd and and LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 0 7 BLOCKIN 4 0 +Yi yi Y Yi Yi Yi i Yi Yi Yi LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 7 BLOCKIN 4 0 +Wu wu W Wu Wu Wu u Wu Wu Wu LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 7 BLOCKIN 4 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 1 7 BLOCKIN 4 0 +Is is I Is Is Is s Is Is Is LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 7 BLOCKIN 4 0 +dpo dpo d dp dpo dpo o po dpo dpo LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 7 BLOCKIN 4 0 +superior superior s su sup supe r or ior rior LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 7 BLOCKIN 4 0 +to to t to to to o to to to LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 7 BLOCKIN 4 0 +ppo ppo p pp ppo ppo o po ppo ppo LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 7 BLOCKIN 4 0 +for for f fo for for r or for for LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 7 BLOCKIN 4 0 +llm llm l ll llm llm m lm llm llm LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 7 BLOCKIN 4 0 +alignment alignment a al ali alig t nt ent ment LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 7 BLOCKIN 4 0 +? ? ? ? ? ? ? ? ? ? LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 7 7 BLOCKIN 4 0 +a a a a a a a a a a LINEIN LINEINDENT NOCAPS NODIGIT 1 0 1 0 0 0 0 0 NOPUNCT 7 7 BLOCKIN 4 0 +comprehensive comprehensive c co com comp e ve ive sive LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 7 BLOCKIN 4 0 +study study s st stu stud y dy udy tudy LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 7 BLOCKIN 4 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 7 BLOCKIN 4 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 7 BLOCKIN 4 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 7 BLOCKEND 4 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 6 0 +76 76 7 76 76 76 6 76 76 76 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 6 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 6 0 +Jiwoo jiwoo J Ji Jiw Jiwo o oo woo iwoo LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 6 0 +Hong hong H Ho Hon Hong g ng ong Hong LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 6 0 +Noah noah N No Noa Noah h ah oah Noah LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 6 0 +Lee lee L Le Lee Lee e ee Lee Lee LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 6 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 6 0 +James james J Ja Jam Jame s es mes ames LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 6 0 +Thorne thorne T Th Tho Thor e ne rne orne LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 6 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 9 BLOCKIN 6 0 +Orpo orpo O Or Orp Orpo o po rpo Orpo LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 6 0 +: : : : : : : : : : LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 6 9 BLOCKIN 6 0 +Monolithic monolithic M Mo Mon Mono c ic hic thic LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 6 0 +preference preference p pr pre pref e ce nce ence LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 6 0 +optimization optimization o op opt opti n on ion tion LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 6 0 +without without w wi wit with t ut out hout LINEEND ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 6 0 +reference reference r re ref refe e ce nce ence LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 5 BLOCKIN 5 0 +model model m mo mod mode l el del odel LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 5 BLOCKIN 5 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 2 5 BLOCKIN 5 0 +arXiv arxiv a ar arX arXi v iv Xiv rXiv LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 5 BLOCKIN 5 0 +preprint preprint p pr pre prep t nt int rint LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 5 BLOCKIN 5 0 +arXiv arxiv a ar arX arXi v iv Xiv rXiv LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 5 BLOCKIN 5 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 7 5 BLOCKIN 5 0 +2403 2403 2 24 240 2403 3 03 403 2403 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 8 5 BLOCKIN 5 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 8 5 BLOCKIN 5 0 +07691 07691 0 07 076 0769 1 91 691 7691 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 5 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 5 BLOCKIN 5 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 5 BLOCKIN 5 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 5 BLOCKEND 5 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 8 BLOCKSTART 6 0 +77 77 7 77 77 77 7 77 77 77 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 6 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 8 BLOCKIN 6 0 +Jiwoo jiwoo J Ji Jiw Jiwo o oo woo iwoo LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 6 0 +Hong hong H Ho Hon Hong g ng ong Hong LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 8 BLOCKIN 6 0 +Noah noah N No Noa Noah h ah oah Noah LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 6 0 +Lee lee L Le Lee Lee e ee Lee Lee LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 8 BLOCKIN 6 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 6 0 +James james J Ja Jam Jame s es mes ames LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 6 0 +Thorne thorne T Th Tho Thor e ne rne orne LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 6 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 8 BLOCKIN 6 0 +Orpo orpo O Or Orp Orpo o po rpo Orpo LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 6 0 +evaluation evaluation e ev eva eval n on ion tion LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 6 0 +: : : : : : : : : : LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 7 8 BLOCKIN 6 0 +Performance performance P Pe Per Perf e ce nce ance LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 6 0 +on on o on on on n on on on LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 6 0 +alpacaeval alpacaeval a al alp alpa l al val eval LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 6 0 +and and a an and and d nd and and LINEEND ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 6 0 +mt mt m mt mt mt t mt mt mt LINESTART LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 3 BLOCKIN 4 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 0 3 BLOCKIN 4 0 +bench bench b be ben benc h ch nch ench LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 3 BLOCKIN 4 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 2 3 BLOCKIN 4 0 +Papers papers P Pa Pap Pape s rs ers pers LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 4 3 BLOCKIN 4 0 +with with w wi wit with h th ith with LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 3 BLOCKIN 4 0 +Code code C Co Cod Code e de ode Code LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 3 BLOCKIN 4 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 3 BLOCKIN 4 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 3 BLOCKIN 4 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 3 BLOCKEND 4 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 10 0 +78 78 7 78 78 78 8 78 78 78 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 10 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 10 0 +What what W Wh Wha What t at hat What LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 10 0 +are are a ar are are e re are are LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 10 0 +the the t th the the e he the the LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 10 0 +most most m mo mos most t st ost most LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 10 0 +effective effective e ef eff effe e ve ive tive LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 10 0 +techniques techniques t te tec tech s es ues ques LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 10 0 +for for f fo for for r or for for LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 10 0 +pruning pruning p pr pru prun g ng ing ning LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 10 0 +ai ai a ai ai ai i ai ai ai LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 10 0 +models models m mo mod mode s ls els dels LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 10 0 +? ? ? ? ? ? ? ? ? ? LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 7 9 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 8 9 BLOCKIN 10 0 +linkedin linkedin l li lin link n in din edin LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 9 BLOCKIN 10 0 +com com c co com com m om com com LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 9 BLOCKIN 10 0 +https https h ht htt http s ps tps ttps LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 1 NOPUNCT 10 9 BLOCKIN 10 0 +: : : : : : : : : : LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 10 9 BLOCKIN 10 0 +/ / / / / / / / / / LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 10 0 +/ / / / / / / / / / LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 10 0 +www www w ww www www w ww www www LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 10 0 +. . . . . . . . . . LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 10 0 +linkedin linkedin l li lin link n in din edin LINESTART LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 1 8 BLOCKIN 10 0 +com com c co com com m om com com LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 10 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 10 0 +advice advice a ad adv advi e ce ice vice LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 10 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 10 0 +3 3 3 3 3 3 3 3 3 3 LINEIN LINEINDENT NOCAPS ALLDIGIT 1 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 10 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 10 0 +what what w wh wha what t at hat what LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 3 8 BLOCKIN 10 0 +most most m mo mos most t st ost most LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 3 8 BLOCKIN 10 0 +effective effective e ef eff effe e ve ive tive LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 4 8 BLOCKIN 10 0 +techniques techniques t te tec tech s es ues ques LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 6 8 BLOCKIN 10 0 +pruning pruning p pr pru prun g ng ing ning LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 7 8 BLOCKIN 10 0 +0mlef 0mlef 0 0m 0ml 0mle f ef lef mlef LINEIN LINEINDENT NOCAPS CONTAINSDIGITS 0 0 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 8 BLOCKIN 10 0 +[ [ [ [ [ [ [ [ [ [ LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 8 8 BLOCKIN 10 0 +Accessed accessed A Ac Acc Acce d ed sed ssed LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 10 0 +05 05 0 05 05 05 5 05 05 05 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 9 8 BLOCKIN 10 0 +07 07 0 07 07 07 7 07 07 07 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 10 0 +- - - - - - - - - - LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 10 8 BLOCKIN 10 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINESTART LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 5 0 BLOCKIN 2 0 +] ] ] ] ] ] ] ] ] ] LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 6 0 BLOCKIN 2 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 0 BLOCKEND 2 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 8 BLOCKSTART 8 0 +79 79 7 79 79 79 9 79 79 79 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 8 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 8 BLOCKIN 8 0 +Boxin boxin B Bo Box Boxi n in xin oxin LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 8 0 +Wang wang W Wa Wan Wang g ng ang Wang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 8 BLOCKIN 8 0 +Weixin weixin W We Wei Weix n in xin ixin LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 8 0 +Chen chen C Ch Che Chen n en hen Chen LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 8 BLOCKIN 8 0 +Hengzhi hengzhi H He Hen Heng i hi zhi gzhi LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 8 0 +Pei pei P Pe Pei Pei i ei Pei Pei LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 8 BLOCKIN 8 0 +Chulin chulin C Ch Chu Chul n in lin ulin LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 8 0 +Xie xie X Xi Xie Xie e ie Xie Xie LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 8 BLOCKIN 8 0 +Mintong mintong M Mi Min Mint g ng ong tong LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 8 0 +Kang kang K Ka Kan Kang g ng ang Kang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 8 BLOCKIN 8 0 +Chenhui chenhui C Ch Che Chen i ui hui nhui LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 8 0 +Zhang zhang Z Zh Zha Zhan g ng ang hang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 8 BLOCKIN 8 0 +Chejian chejian C Ch Che Chej n an ian jian LINEEND ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 8 0 +Xu xu X Xu Xu Xu u Xu Xu Xu LINESTART LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 0 8 BLOCKIN 8 0 +Zidi zidi Z Zi Zid Zidi i di idi Zidi LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 8 0 +Xiong xiong X Xi Xio Xion g ng ong iong LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 8 BLOCKIN 8 0 +Ritik ritik R Ri Rit Riti k ik tik itik LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 8 0 +Dutta dutta D Du Dut Dutt a ta tta utta LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 8 BLOCKIN 8 0 +Rylan rylan R Ry Ryl Ryla n an lan ylan LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 8 0 +Schaeffer schaeffer S Sc Sch Scha r er fer ffer LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 8 BLOCKIN 8 0 +Sang sang S Sa San Sang g ng ang Sang LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 8 0 +T t T T T T T T T T LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 8 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 6 8 BLOCKIN 8 0 +Truong truong T Tr Tru Truo g ng ong uong LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 8 BLOCKIN 8 0 +Simran simran S Si Sim Simr n an ran mran LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 8 0 +Arora arora A Ar Aro Aror a ra ora rora LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 8 BLOCKIN 8 0 +Mantas mantas M Ma Man Mant s as tas ntas LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 8 0 +Mazeika mazeika M Ma Maz Maze a ka ika eika LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 8 0 +, , , , , , , , , , LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 8 BLOCKIN 8 0 +Dan dan D Da Dan Dan n an Dan Dan LINESTART LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 7 0 +Hendrycks hendrycks H He Hen Hend s ks cks ycks LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 8 BLOCKIN 7 0 +Zinan zinan Z Zi Zin Zina n an nan inan LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 7 0 +Lin lin L Li Lin Lin n in Lin Lin LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 8 BLOCKIN 7 0 +Yu yu Y Yu Yu Yu u Yu Yu Yu LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 7 0 +Cheng cheng C Ch Che Chen g ng eng heng LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 8 BLOCKIN 7 0 +Sanmi sanmi S Sa San Sanm i mi nmi anmi LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 7 0 +Koyejo koyejo K Ko Koy Koye o jo ejo yejo LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 8 BLOCKIN 7 0 +Dawn dawn D Da Daw Dawn n wn awn Dawn LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 7 0 +Song song S So Son Song g ng ong Song LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 8 BLOCKIN 7 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 7 0 +Bo bo B Bo Bo Bo o Bo Bo Bo LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 7 0 +Li li L Li Li Li i Li Li Li LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 7 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 8 BLOCKIN 7 0 +Decodingtrust decodingtrust D De Dec Deco t st ust rust LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 7 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 10 8 BLOCKIN 7 0 +A a A A A A A A A A LINEEND LINEINDENT ALLCAP NODIGIT 1 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 7 0 +comprehensive comprehensive c co com comp e ve ive sive LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 6 BLOCKIN 2 0 +assessment assessment a as ass asse t nt ent ment LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 6 BLOCKIN 2 0 +of of o of of of f of of of LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 6 BLOCKIN 2 0 +trustworthiness trustworthiness t tr tru trus s ss ess ness LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 6 BLOCKIN 2 0 +in in i in in in n in in in LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 7 6 BLOCKIN 2 0 +gpt gpt g gp gpt gpt t pt gpt gpt LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 6 BLOCKIN 2 0 +models models m mo mod mode s ls els dels LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 6 BLOCKIN 2 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 6 BLOCKIN 2 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 6 BLOCKIN 2 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 6 BLOCKEND 2 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 8 0 +80 80 8 80 80 80 0 80 80 80 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 8 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 8 0 +Hakan hakan H Ha Hak Haka n an kan akan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +Inan inan I In Ina Inan n an nan Inan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 8 0 +Kartikeya kartikeya K Ka Kar Kart a ya eya keya LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 8 0 +Upasani upasani U Up Upa Upas i ni ani sani LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 8 0 +Jianfeng jianfeng J Ji Jia Jian g ng eng feng LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 8 0 +Chi chi C Ch Chi Chi i hi Chi Chi LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 8 0 +Rashi rashi R Ra Ras Rash i hi shi ashi LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 8 0 +Rungta rungta R Ru Run Rung a ta gta ngta LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 9 BLOCKIN 8 0 +Krithika krithika K Kr Kri Krit a ka ika hika LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +Iyer iyer I Iy Iye Iyer r er yer Iyer LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 9 BLOCKIN 8 0 +Yuning yuning Y Yu Yun Yuni g ng ing ning LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 8 0 +Mao mao M Ma Mao Mao o ao Mao Mao LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 8 0 +Michael michael M Mi Mic Mich l el ael hael LINEEND ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +Tontchev tontchev T To Ton Tont v ev hev chev LINESTART LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 8 BLOCKIN 7 0 +Qing qing Q Qi Qin Qing g ng ing Qing LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 7 0 +Hu hu H Hu Hu Hu u Hu Hu Hu LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 8 BLOCKIN 7 0 +Brian brian B Br Bri Bria n an ian rian LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 7 0 +Fuller fuller F Fu Ful Full r er ler ller LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 8 BLOCKIN 7 0 +Davide davide D Da Dav Davi e de ide vide LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 7 0 +Testuggine testuggine T Te Tes Test e ne ine gine LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 8 BLOCKIN 7 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 7 0 +Madian madian M Ma Mad Madi n an ian dian LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 7 0 +Khabsa khabsa K Kh Kha Khab a sa bsa absa LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 7 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 8 8 BLOCKIN 7 0 +Llama llama L Ll Lla Llam a ma ama lama LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 7 0 +guard guard g gu gua guar d rd ard uard LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 7 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 10 8 BLOCKIN 7 0 +Llm llm L Ll Llm Llm m lm Llm Llm LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 7 0 +- - - - - - - - - - LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 10 8 BLOCKIN 7 0 +based based b ba bas base d ed sed ased LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 0 6 BLOCKIN 4 0 +input input i in inp inpu t ut put nput LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 6 BLOCKIN 4 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 2 6 BLOCKIN 4 0 +output output o ou out outp t ut put tput LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 6 BLOCKIN 4 0 +safeguard safeguard s sa saf safe d rd ard uard LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 6 BLOCKIN 4 0 +for for f fo for for r or for for LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 6 BLOCKIN 4 0 +human human h hu hum huma n an man uman LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 6 BLOCKIN 4 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 6 6 BLOCKIN 4 0 +ai ai a ai ai ai i ai ai ai LINEIN LINEINDENT NOCAPS NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 6 BLOCKIN 4 0 +conversations conversations c co con conv s ns ons ions LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 6 BLOCKIN 4 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 6 BLOCKIN 4 0 +2023 2023 2 20 202 2023 3 23 023 2023 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 6 BLOCKIN 4 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 6 BLOCKEND 4 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 8 0 +81 81 8 81 81 81 1 81 81 81 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 8 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 8 0 +Wenjun wenjun W We Wen Wenj n un jun njun LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +Zeng zeng Z Ze Zen Zeng g ng eng Zeng LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 8 0 +Yuchi yuchi Y Yu Yuc Yuch i hi chi uchi LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 8 0 +Liu liu L Li Liu Liu u iu Liu Liu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 8 0 +Ryan ryan R Ry Rya Ryan n an yan Ryan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +Mullins mullins M Mu Mul Mull s ns ins lins LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 9 BLOCKIN 8 0 +Ludovic ludovic L Lu Lud Ludo c ic vic ovic LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 8 0 +Peran peran P Pe Per Pera n an ran eran LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 8 0 +Joe joe J Jo Joe Joe e oe Joe Joe LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 8 0 +Fernandez fernandez F Fe Fer Fern z ez dez ndez LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 9 BLOCKIN 8 0 +Hamza hamza H Ha Ham Hamz a za mza amza LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 8 0 +Harkous harkous H Ha Har Hark s us ous kous LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 8 0 +Karthik karthik K Ka Kar Kart k ik hik thik LINEEND ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +Narasimhan narasimhan N Na Nar Nara n an han mhan LINESTART LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 8 BLOCKIN 5 0 +Drew drew D Dr Dre Drew w ew rew Drew LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 5 0 +Proud proud P Pr Pro Prou d ud oud roud LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 8 BLOCKIN 5 0 +Piyush piyush P Pi Piy Piyu h sh ush yush LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 5 0 +Kumar kumar K Ku Kum Kuma r ar mar umar LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 8 BLOCKIN 5 0 +Bhaktipriya bhaktipriya B Bh Bha Bhak a ya iya riya LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 5 0 +Radharapu radharapu R Ra Rad Radh u pu apu rapu LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 8 BLOCKIN 5 0 +Olivia olivia O Ol Oli Oliv a ia via ivia LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 5 0 +Sturman sturman S St Stu Stur n an man rman LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 8 BLOCKIN 5 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 5 0 +Oscar oscar O Os Osc Osca r ar car scar LINEEND LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 5 0 +Wahltinez wahltinez W Wa Wah Wahl z ez nez inez LINESTART LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 7 BLOCKIN 4 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 1 7 BLOCKIN 4 0 +Shieldgemma shieldgemma S Sh Shi Shie a ma mma emma LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 7 BLOCKIN 4 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 3 7 BLOCKIN 4 0 +Generative generative G Ge Gen Gene e ve ive tive LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 7 BLOCKIN 4 0 +ai ai a ai ai ai i ai ai ai LINEIN LINEINDENT NOCAPS NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 7 BLOCKIN 4 0 +content content c co con cont t nt ent tent LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 7 BLOCKIN 4 0 +moderation moderation m mo mod mode n on ion tion LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 7 BLOCKIN 4 0 +based based b ba bas base d ed sed ased LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 7 BLOCKIN 4 0 +on on o on on on n on on on LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 7 BLOCKIN 4 0 +gemma gemma g ge gem gemm a ma mma emma LINEIN LINEINDENT NOCAPS NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 7 BLOCKIN 4 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 7 BLOCKIN 4 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 7 BLOCKIN 4 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 7 BLOCKEND 4 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 8 0 +82 82 8 82 82 82 2 82 82 82 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 8 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 8 0 +Seungju seungju S Se Seu Seun u ju gju ngju LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +Han han H Ha Han Han n an Han Han LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 8 0 +Kavel kavel K Ka Kav Kave l el vel avel LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 8 0 +Rao rao R Ra Rao Rao o ao Rao Rao LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 8 0 +Allyson allyson A Al All Ally n on son yson LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 8 0 +Ettinger ettinger E Et Ett Etti r er ger nger LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 8 0 +Liwei liwei L Li Liw Liwe i ei wei iwei LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 8 0 +Jiang jiang J Ji Jia Jian g ng ang iang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 8 0 +Bill bill B Bi Bil Bill l ll ill Bill LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 8 0 +Yuchen yuchen Y Yu Yuc Yuch n en hen chen LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +Lin lin L Li Lin Lin n in Lin Lin LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 9 BLOCKIN 8 0 +Nathan nathan N Na Nat Nath n an han than LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 8 0 +Lambert lambert L La Lam Lamb t rt ert bert LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 8 0 +Yejin yejin Y Ye Yej Yeji n in jin ejin LINEEND ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +Choi choi C Ch Cho Choi i oi hoi Choi LINESTART LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 0 9 BLOCKIN 6 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 6 0 +Nouha nouha N No Nou Nouh a ha uha ouha LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 6 0 +Dziri dziri D Dz Dzi Dzir i ri iri ziri LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 6 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 2 9 BLOCKIN 6 0 +Wildguard wildguard W Wi Wil Wild d rd ard uard LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 6 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 3 9 BLOCKIN 6 0 +Open open O Op Ope Open n en pen Open LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 6 0 +one one o on one one e ne one one LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 6 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 5 9 BLOCKIN 6 0 +stop stop s st sto stop p op top stop LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 6 0 +moderation moderation m mo mod mode n on ion tion LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 6 0 +tools tools t to too tool s ls ols ools LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 6 0 +for for f fo for for r or for for LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 6 0 +safety safety s sa saf safe y ty ety fety LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 6 0 +risks risks r ri ris risk s ks sks isks LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 9 BLOCKIN 6 0 +jailbreaks jailbreaks j ja jai jail s ks aks eaks LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 6 0 +, , , , , , , , , , LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 6 0 +and and a an and and d nd and and LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 2 BLOCKIN 2 0 +refusals refusals r re ref refu s ls als sals LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 2 BLOCKIN 2 0 +of of o of of of f of of of LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 2 BLOCKIN 2 0 +llms llms l ll llm llms s ms lms llms LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 2 BLOCKIN 2 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 2 BLOCKIN 2 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 2 BLOCKIN 2 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 2 BLOCKEND 2 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 1 BLOCKSTART 2 0 +83 83 8 83 83 83 3 83 83 83 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 1 BLOCKIN 2 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 3 1 BLOCKIN 2 0 +Vishal vishal V Vi Vis Vish l al hal shal LINEEND ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 1 BLOCKIN 2 0 +Mysore mysore M My Mys Myso e re ore sore LINESTART LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 0 BLOCKIN 1 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 0 BLOCKIN 1 0 +LLM llm L LL LLM LLM M LM LLM LLM LINESTART LINEINDENT ALLCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 0 BLOCKIN no 0 +Deployment deployment D De Dep Depl t nt ent ment LINESTART LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 8 1 BLOCKIN no 0 +Strategies strategies S St Str Stra s es ies gies LINESTART LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 1 BLOCKIN no 0 +: : : : : : : : : : LINESTART LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 3 0 BLOCKIN 1 0 +Its its I It Its Its s ts Its Its LINESTART LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 0 BLOCKIN no 0 +not not n no not not t ot not not LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 0 BLOCKIN no 0 +Magic magic M Ma Mag Magi c ic gic agic LINESTART ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 0 BLOCKIN no 0 +, , , , , , , , , , LINESTART LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 0 BLOCKIN 1 0 +Its its I It Its Its s ts Its Its LINESTART LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 0 BLOCKIN no 0 +Logic logic L Lo Log Logi c ic gic ogic LINESTART LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 0 BLOCKIN 1 0 +! ! ! ! ! ! ! ! ! ! LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 7 0 BLOCKIN 1 0 +- - - - - - - - - - LINESTART LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 5 0 BLOCKIN 1 0 +visrow visrow v vi vis visr w ow row srow LINESTART LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 0 BLOCKIN 1 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 0 BLOCKIN 1 0 +https https h ht htt http s ps tps ttps LINESTART LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 1 NOPUNCT 1 2 BLOCKIN 7 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 2 2 BLOCKIN 7 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 2 2 BLOCKIN 7 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 2 2 BLOCKIN 7 0 +medium medium m me med medi m um ium dium LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 2 BLOCKIN 7 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 2 BLOCKIN 7 0 +com com c co com com m om com com LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 2 BLOCKIN 7 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 6 2 BLOCKIN 7 0 +@ @ @ @ @ @ @ @ @ @ LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 6 2 BLOCKIN 7 0 +visrow visrow v vi vis visr w ow row srow LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 2 BLOCKIN 7 0 +/ / / / / / / / / / LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 9 2 BLOCKIN 7 0 +llm llm l ll llm llm m lm llm llm LINESTART ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 6 BLOCKIN 9 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 0 6 BLOCKIN 9 0 +deployment deployment d de dep depl t nt ent ment LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 2 6 BLOCKIN 9 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 2 6 BLOCKIN 9 0 +strategies strategies s st str stra s es ies gies LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 6 BLOCKIN 9 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 4 6 BLOCKIN 9 0 +its its i it its its s ts its its LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 6 BLOCKIN 9 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 4 6 BLOCKIN 9 0 +not not n no not not t ot not not LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 6 BLOCKIN 9 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 5 6 BLOCKIN 9 0 +magic magic m ma mag magi c ic gic agic LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 6 BLOCKIN 9 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 6 6 BLOCKIN 9 0 +its its i it its its s ts its its LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 6 BLOCKIN 9 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 6 6 BLOCKIN 9 0 +logic logic l lo log logi c ic gic ogic LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 6 BLOCKIN 9 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 7 6 BLOCKIN 9 0 +71d5f32ac2b4 71d5f32ac2b4 7 71 71d 71d5 4 b4 2b4 c2b4 LINEIN ALIGNEDLEFT NOCAPS CONTAINSDIGITS 0 0 0 0 0 0 0 0 NOPUNCT 9 6 BLOCKIN 9 0 +. . . . . . . . . . LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 6 BLOCKIN 9 0 +[ [ [ [ [ [ [ [ [ [ LINESTART LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 1 BLOCKIN 3 0 +Accessed accessed A Ac Acc Acce d ed sed ssed LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 1 BLOCKIN 3 0 +07 07 0 07 07 07 7 07 07 07 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 1 BLOCKIN 3 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 8 1 BLOCKIN 3 0 +08 08 0 08 08 08 8 08 08 08 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 1 BLOCKIN 3 0 +- - - - - - - - - - LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 10 1 BLOCKIN 3 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINESTART ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 5 0 BLOCKIN 2 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 6 0 BLOCKIN 2 0 +. . . . . . . . . . LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 0 BLOCKEND 2 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 9 0 +84 84 8 84 84 84 4 84 84 84 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 9 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 9 0 +Woosuk woosuk W Wo Woo Woos k uk suk osuk LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 9 0 +Kwon kwon K Kw Kwo Kwon n on won Kwon LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 9 0 +Zhuohan zhuohan Z Zh Zhu Zhuo n an han ohan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 9 0 +Li li L Li Li Li i Li Li Li LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 9 0 +Siyuan siyuan S Si Siy Siyu n an uan yuan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 9 0 +Zhuang zhuang Z Zh Zhu Zhua g ng ang uang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 9 0 +Ying ying Y Yi Yin Ying g ng ing Ying LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 9 0 +Sheng sheng S Sh She Shen g ng eng heng LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 9 0 +Lianmin lianmin L Li Lia Lian n in min nmin LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 9 0 +Zheng zheng Z Zh Zhe Zhen g ng eng heng LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 9 BLOCKIN 9 0 +Cody cody C Co Cod Cody y dy ody Cody LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 9 0 +Hao hao H Ha Hao Hao o ao Hao Hao LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 9 0 +Yu yu Y Yu Yu Yu u Yu Yu Yu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 9 0 +Joseph joseph J Jo Jos Jose h ph eph seph LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +E e E E E E E E E E LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +. . . . . . . . . . LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 9 0 +Gonzalez gonzalez G Go Gon Gonz z ez lez alez LINESTART LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 3 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 0 8 BLOCKIN 3 0 +Hao hao H Ha Hao Hao o ao Hao Hao LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 3 0 +Zhang zhang Z Zh Zha Zhan g ng ang hang LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 3 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 8 BLOCKIN 3 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 3 0 +Ion ion I Io Ion Ion n on Ion Ion LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 3 0 +Stoica stoica S St Sto Stoi a ca ica oica LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 3 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 4 8 BLOCKIN 3 0 +Efficient efficient E Ef Eff Effi t nt ent ient LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 3 0 +memory memory m me mem memo y ry ory mory LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 3 0 +management management m ma man mana t nt ent ment LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 3 0 +for for f fo for for r or for for LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 3 0 +large large l la lar larg e ge rge arge LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 3 0 +language language l la lan lang e ge age uage LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 3 0 +model model m mo mod mode l el del odel LINEEND LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 3 0 +serving serving s se ser serv g ng ing ving LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 3 BLOCKIN 2 0 +with with w wi wit with h th ith with LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 3 BLOCKIN 2 0 +pagedattention pagedattention p pa pag page n on ion tion LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 3 BLOCKIN 2 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 3 BLOCKIN 2 0 +2023 2023 2 20 202 2023 3 23 023 2023 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 3 BLOCKIN 2 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 3 BLOCKEND 2 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 8 BLOCKSTART 4 0 +85 85 8 85 85 85 5 85 85 85 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 4 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 8 BLOCKIN 4 0 +Preprocess preprocess P Pr Pre Prep s ss ess cess LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 4 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 4 0 +fine fine f fi fin fine e ne ine fine LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 4 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 3 8 BLOCKIN 4 0 +tune tune t tu tun tune e ne une tune LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 4 0 +llms llms l ll llm llms s ms lms llms LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 4 0 +quickly quickly q qu qui quic y ly kly ckly LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 4 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 4 0 +cost cost c co cos cost t st ost cost LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 4 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 6 8 BLOCKIN 4 0 +effectively effectively e ef eff effe y ly ely vely LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 4 0 +using using u us usi usin g ng ing sing LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 4 0 +amazon amazon a am ama amaz n on zon azon LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 4 0 +emr emr e em emr emr r mr emr emr LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 4 0 +serverless serverless s se ser serv s ss ess less LINEEND ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 4 0 +and and a an and and d nd and and LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 0 3 BLOCKIN 4 0 +amazon amazon a am ama amaz n on zon azon LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 3 BLOCKIN 4 0 +sagemaker sagemaker s sa sag sage r er ker aker LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 3 BLOCKIN 4 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 6 3 BLOCKIN 4 0 +aws aws a aw aws aws s ws aws aws LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 3 BLOCKIN 4 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 3 BLOCKIN 4 0 +amazon amazon a am ama amaz n on zon azon LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 3 BLOCKIN 4 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 3 BLOCKIN 4 0 +com com c co com com m om com com LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 3 BLOCKIN 4 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 3 BLOCKIN 4 0 +https https h ht htt http s ps tps ttps LINESTART LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 1 NOPUNCT 1 3 BLOCKIN 9 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 1 3 BLOCKIN 9 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 1 3 BLOCKIN 9 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 2 3 BLOCKIN 9 0 +aws aws a aw aws aws s ws aws aws LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 3 BLOCKIN 9 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 3 3 BLOCKIN 9 0 +amazon amazon a am ama amaz n on zon azon LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 3 BLOCKIN 9 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 4 3 BLOCKIN 9 0 +com com c co com com m om com com LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 3 BLOCKIN 9 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 5 3 BLOCKIN 9 0 +blogs blogs b bl blo blog s gs ogs logs LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 3 BLOCKIN 9 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 7 3 BLOCKIN 9 0 +big big b bi big big g ig big big LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 3 BLOCKIN 9 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 8 3 BLOCKIN 9 0 +data data d da dat data a ta ata data LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 3 BLOCKIN 9 0 +/ / / / / / / / / / LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 9 3 BLOCKIN 9 0 +preprocess preprocess p pr pre prep s ss ess cess LINESTART ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 1 9 BLOCKIN 10 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 1 9 BLOCKIN 10 0 + +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 6 0 +Jiwoo jiwoo J Ji Jiw Jiwo o oo woo iwoo LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 6 0 +Hong hong H Ho Hon Hong g ng ong Hong LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 6 0 +Noah noah N No Noa Noah h ah oah Noah LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 6 0 +Lee lee L Le Lee Lee e ee Lee Lee LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 6 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 6 0 +James james J Ja Jam Jame s es mes ames LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 6 0 +Thorne thorne T Th Tho Thor e ne rne orne LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 6 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 9 BLOCKIN 6 0 +Orpo orpo O Or Orp Orpo o po rpo Orpo LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 6 0 +: : : : : : : : : : LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 6 9 BLOCKIN 6 0 +Monolithic monolithic M Mo Mon Mono c ic hic thic LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 6 0 +preference preference p pr pre pref e ce nce ence LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 6 0 +optimization optimization o op opt opti n on ion tion LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 6 0 +without without w wi wit with t ut out hout LINEEND ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 6 0 +reference reference r re ref refe e ce nce ence LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 5 BLOCKIN 5 0 +model model m mo mod mode l el del odel LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 5 BLOCKIN 5 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 2 5 BLOCKIN 5 0 +arXiv arxiv a ar arX arXi v iv Xiv rXiv LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 5 BLOCKIN 5 0 +preprint preprint p pr pre prep t nt int rint LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 5 BLOCKIN 5 0 +arXiv arxiv a ar arX arXi v iv Xiv rXiv LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 5 BLOCKIN 5 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 7 5 BLOCKIN 5 0 +2403 2403 2 24 240 2403 3 03 403 2403 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 8 5 BLOCKIN 5 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 8 5 BLOCKIN 5 0 +07691 07691 0 07 076 0769 1 91 691 7691 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 5 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 5 BLOCKIN 5 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 5 BLOCKIN 5 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 5 BLOCKEND 5 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 8 BLOCKSTART 6 0 +77 77 7 77 77 77 7 77 77 77 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 6 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 8 BLOCKIN 6 0 +Jiwoo jiwoo J Ji Jiw Jiwo o oo woo iwoo LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 6 0 +Hong hong H Ho Hon Hong g ng ong Hong LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 8 BLOCKIN 6 0 +Noah noah N No Noa Noah h ah oah Noah LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 6 0 +Lee lee L Le Lee Lee e ee Lee Lee LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 8 BLOCKIN 6 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 6 0 +James james J Ja Jam Jame s es mes ames LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 6 0 +Thorne thorne T Th Tho Thor e ne rne orne LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 6 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 8 BLOCKIN 6 0 +Orpo orpo O Or Orp Orpo o po rpo Orpo LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 6 0 +evaluation evaluation e ev eva eval n on ion tion LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 6 0 +: : : : : : : : : : LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 7 8 BLOCKIN 6 0 +Performance performance P Pe Per Perf e ce nce ance LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 6 0 +on on o on on on n on on on LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 6 0 +alpacaeval alpacaeval a al alp alpa l al val eval LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 6 0 +and and a an and and d nd and and LINEEND ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 6 0 +mt mt m mt mt mt t mt mt mt LINESTART LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 3 BLOCKIN 4 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 0 3 BLOCKIN 4 0 +bench bench b be ben benc h ch nch ench LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 3 BLOCKIN 4 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 2 3 BLOCKIN 4 0 +Papers papers P Pa Pap Pape s rs ers pers LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 4 3 BLOCKIN 4 0 +with with w wi wit with h th ith with LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 3 BLOCKIN 4 0 +Code code C Co Cod Code e de ode Code LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 3 BLOCKIN 4 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 3 BLOCKIN 4 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 3 BLOCKIN 4 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 3 BLOCKEND 4 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 10 0 +78 78 7 78 78 78 8 78 78 78 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 10 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 10 0 +What what W Wh Wha What t at hat What LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 10 0 +are are a ar are are e re are are LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 10 0 +the the t th the the e he the the LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 10 0 +most most m mo mos most t st ost most LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 10 0 +effective effective e ef eff effe e ve ive tive LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 10 0 +techniques techniques t te tec tech s es ues ques LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 10 0 +for for f fo for for r or for for LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 10 0 +pruning pruning p pr pru prun g ng ing ning LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 10 0 +ai ai a ai ai ai i ai ai ai LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 10 0 +models models m mo mod mode s ls els dels LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 10 0 +? ? ? ? ? ? ? ? ? ? LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 7 9 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 8 9 BLOCKIN 10 0 +linkedin linkedin l li lin link n in din edin LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 9 BLOCKIN 10 0 +com com c co com com m om com com LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 9 BLOCKIN 10 0 +https https h ht htt http s ps tps ttps LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 1 NOPUNCT 10 9 BLOCKIN 10 0 +: : : : : : : : : : LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 10 9 BLOCKIN 10 0 +/ / / / / / / / / / LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 10 0 +/ / / / / / / / / / LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 10 0 +www www w ww www www w ww www www LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 10 0 +. . . . . . . . . . LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 10 0 +linkedin linkedin l li lin link n in din edin LINESTART LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 1 8 BLOCKIN 10 0 +com com c co com com m om com com LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 10 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 10 0 +advice advice a ad adv advi e ce ice vice LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 10 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 10 0 +3 3 3 3 3 3 3 3 3 3 LINEIN LINEINDENT NOCAPS ALLDIGIT 1 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 10 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 10 0 +what what w wh wha what t at hat what LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 3 8 BLOCKIN 10 0 +most most m mo mos most t st ost most LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 3 8 BLOCKIN 10 0 +effective effective e ef eff effe e ve ive tive LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 4 8 BLOCKIN 10 0 +techniques techniques t te tec tech s es ues ques LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 6 8 BLOCKIN 10 0 +pruning pruning p pr pru prun g ng ing ning LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 7 8 BLOCKIN 10 0 +0mlef 0mlef 0 0m 0ml 0mle f ef lef mlef LINEIN LINEINDENT NOCAPS CONTAINSDIGITS 0 0 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 8 BLOCKIN 10 0 +[ [ [ [ [ [ [ [ [ [ LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 8 8 BLOCKIN 10 0 +Accessed accessed A Ac Acc Acce d ed sed ssed LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 10 0 +05 05 0 05 05 05 5 05 05 05 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 9 8 BLOCKIN 10 0 +07 07 0 07 07 07 7 07 07 07 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 10 0 +- - - - - - - - - - LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 10 8 BLOCKIN 10 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINESTART LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 5 0 BLOCKIN 2 0 +] ] ] ] ] ] ] ] ] ] LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 6 0 BLOCKIN 2 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 0 BLOCKEND 2 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 8 BLOCKSTART 8 0 +79 79 7 79 79 79 9 79 79 79 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 8 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 8 BLOCKIN 8 0 +Boxin boxin B Bo Box Boxi n in xin oxin LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 8 0 +Wang wang W Wa Wan Wang g ng ang Wang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 8 BLOCKIN 8 0 +Weixin weixin W We Wei Weix n in xin ixin LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 8 0 +Chen chen C Ch Che Chen n en hen Chen LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 8 BLOCKIN 8 0 +Hengzhi hengzhi H He Hen Heng i hi zhi gzhi LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 8 0 +Pei pei P Pe Pei Pei i ei Pei Pei LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 8 BLOCKIN 8 0 +Chulin chulin C Ch Chu Chul n in lin ulin LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 8 0 +Xie xie X Xi Xie Xie e ie Xie Xie LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 8 BLOCKIN 8 0 +Mintong mintong M Mi Min Mint g ng ong tong LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 8 0 +Kang kang K Ka Kan Kang g ng ang Kang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 8 BLOCKIN 8 0 +Chenhui chenhui C Ch Che Chen i ui hui nhui LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 8 0 +Zhang zhang Z Zh Zha Zhan g ng ang hang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 8 BLOCKIN 8 0 +Chejian chejian C Ch Che Chej n an ian jian LINEEND ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 8 0 +Xu xu X Xu Xu Xu u Xu Xu Xu LINESTART LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 0 8 BLOCKIN 8 0 +Zidi zidi Z Zi Zid Zidi i di idi Zidi LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 8 0 +Xiong xiong X Xi Xio Xion g ng ong iong LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 8 BLOCKIN 8 0 +Ritik ritik R Ri Rit Riti k ik tik itik LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 8 0 +Dutta dutta D Du Dut Dutt a ta tta utta LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 8 BLOCKIN 8 0 +Rylan rylan R Ry Ryl Ryla n an lan ylan LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 8 0 +Schaeffer schaeffer S Sc Sch Scha r er fer ffer LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 8 BLOCKIN 8 0 +Sang sang S Sa San Sang g ng ang Sang LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 8 0 +T t T T T T T T T T LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 8 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 6 8 BLOCKIN 8 0 +Truong truong T Tr Tru Truo g ng ong uong LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 8 BLOCKIN 8 0 +Simran simran S Si Sim Simr n an ran mran LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 8 0 +Arora arora A Ar Aro Aror a ra ora rora LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 8 BLOCKIN 8 0 +Mantas mantas M Ma Man Mant s as tas ntas LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 8 0 +Mazeika mazeika M Ma Maz Maze a ka ika eika LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 8 0 +, , , , , , , , , , LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 8 BLOCKIN 8 0 +Dan dan D Da Dan Dan n an Dan Dan LINESTART LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 7 0 +Hendrycks hendrycks H He Hen Hend s ks cks ycks LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 8 BLOCKIN 7 0 +Zinan zinan Z Zi Zin Zina n an nan inan LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 7 0 +Lin lin L Li Lin Lin n in Lin Lin LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 8 BLOCKIN 7 0 +Yu yu Y Yu Yu Yu u Yu Yu Yu LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 7 0 +Cheng cheng C Ch Che Chen g ng eng heng LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 8 BLOCKIN 7 0 +Sanmi sanmi S Sa San Sanm i mi nmi anmi LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 7 0 +Koyejo koyejo K Ko Koy Koye o jo ejo yejo LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 8 BLOCKIN 7 0 +Dawn dawn D Da Daw Dawn n wn awn Dawn LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 7 0 +Song song S So Son Song g ng ong Song LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 8 BLOCKIN 7 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 7 0 +Bo bo B Bo Bo Bo o Bo Bo Bo LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 7 0 +Li li L Li Li Li i Li Li Li LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 7 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 8 BLOCKIN 7 0 +Decodingtrust decodingtrust D De Dec Deco t st ust rust LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 7 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 10 8 BLOCKIN 7 0 +A a A A A A A A A A LINEEND LINEINDENT ALLCAP NODIGIT 1 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 7 0 +comprehensive comprehensive c co com comp e ve ive sive LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 6 BLOCKIN 2 0 +assessment assessment a as ass asse t nt ent ment LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 6 BLOCKIN 2 0 +of of o of of of f of of of LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 6 BLOCKIN 2 0 +trustworthiness trustworthiness t tr tru trus s ss ess ness LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 6 BLOCKIN 2 0 +in in i in in in n in in in LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 7 6 BLOCKIN 2 0 +gpt gpt g gp gpt gpt t pt gpt gpt LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 6 BLOCKIN 2 0 +models models m mo mod mode s ls els dels LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 6 BLOCKIN 2 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 6 BLOCKIN 2 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 6 BLOCKIN 2 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 6 BLOCKEND 2 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 8 0 +80 80 8 80 80 80 0 80 80 80 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 8 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 8 0 +Hakan hakan H Ha Hak Haka n an kan akan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +Inan inan I In Ina Inan n an nan Inan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 8 0 +Kartikeya kartikeya K Ka Kar Kart a ya eya keya LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 8 0 +Upasani upasani U Up Upa Upas i ni ani sani LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 8 0 +Jianfeng jianfeng J Ji Jia Jian g ng eng feng LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 8 0 +Chi chi C Ch Chi Chi i hi Chi Chi LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 8 0 +Rashi rashi R Ra Ras Rash i hi shi ashi LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 8 0 +Rungta rungta R Ru Run Rung a ta gta ngta LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 9 BLOCKIN 8 0 +Krithika krithika K Kr Kri Krit a ka ika hika LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +Iyer iyer I Iy Iye Iyer r er yer Iyer LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 9 BLOCKIN 8 0 +Yuning yuning Y Yu Yun Yuni g ng ing ning LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 8 0 +Mao mao M Ma Mao Mao o ao Mao Mao LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 8 0 +Michael michael M Mi Mic Mich l el ael hael LINEEND ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +Tontchev tontchev T To Ton Tont v ev hev chev LINESTART LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 8 BLOCKIN 7 0 +Qing qing Q Qi Qin Qing g ng ing Qing LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 7 0 +Hu hu H Hu Hu Hu u Hu Hu Hu LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 8 BLOCKIN 7 0 +Brian brian B Br Bri Bria n an ian rian LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 7 0 +Fuller fuller F Fu Ful Full r er ler ller LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 8 BLOCKIN 7 0 +Davide davide D Da Dav Davi e de ide vide LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 7 0 +Testuggine testuggine T Te Tes Test e ne ine gine LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 7 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 8 BLOCKIN 7 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 7 0 +Madian madian M Ma Mad Madi n an ian dian LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 7 0 +Khabsa khabsa K Kh Kha Khab a sa bsa absa LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 7 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 8 8 BLOCKIN 7 0 +Llama llama L Ll Lla Llam a ma ama lama LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 7 0 +guard guard g gu gua guar d rd ard uard LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 7 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 10 8 BLOCKIN 7 0 +Llm llm L Ll Llm Llm m lm Llm Llm LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 7 0 +- - - - - - - - - - LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 10 8 BLOCKIN 7 0 +based based b ba bas base d ed sed ased LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 0 6 BLOCKIN 4 0 +input input i in inp inpu t ut put nput LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 6 BLOCKIN 4 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 2 6 BLOCKIN 4 0 +output output o ou out outp t ut put tput LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 6 BLOCKIN 4 0 +safeguard safeguard s sa saf safe d rd ard uard LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 6 BLOCKIN 4 0 +for for f fo for for r or for for LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 6 BLOCKIN 4 0 +human human h hu hum huma n an man uman LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 6 BLOCKIN 4 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 6 6 BLOCKIN 4 0 +ai ai a ai ai ai i ai ai ai LINEIN LINEINDENT NOCAPS NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 6 BLOCKIN 4 0 +conversations conversations c co con conv s ns ons ions LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 6 BLOCKIN 4 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 6 BLOCKIN 4 0 +2023 2023 2 20 202 2023 3 23 023 2023 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 6 BLOCKIN 4 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 6 BLOCKEND 4 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 8 0 +81 81 8 81 81 81 1 81 81 81 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 8 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 8 0 +Wenjun wenjun W We Wen Wenj n un jun njun LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +Zeng zeng Z Ze Zen Zeng g ng eng Zeng LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 8 0 +Yuchi yuchi Y Yu Yuc Yuch i hi chi uchi LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 8 0 +Liu liu L Li Liu Liu u iu Liu Liu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 8 0 +Ryan ryan R Ry Rya Ryan n an yan Ryan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +Mullins mullins M Mu Mul Mull s ns ins lins LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 9 BLOCKIN 8 0 +Ludovic ludovic L Lu Lud Ludo c ic vic ovic LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 8 0 +Peran peran P Pe Per Pera n an ran eran LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 8 0 +Joe joe J Jo Joe Joe e oe Joe Joe LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 8 0 +Fernandez fernandez F Fe Fer Fern z ez dez ndez LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 9 BLOCKIN 8 0 +Hamza hamza H Ha Ham Hamz a za mza amza LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 8 0 +Harkous harkous H Ha Har Hark s us ous kous LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 8 0 +Karthik karthik K Ka Kar Kart k ik hik thik LINEEND ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +Narasimhan narasimhan N Na Nar Nara n an han mhan LINESTART LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 8 BLOCKIN 5 0 +Drew drew D Dr Dre Drew w ew rew Drew LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 5 0 +Proud proud P Pr Pro Prou d ud oud roud LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 8 BLOCKIN 5 0 +Piyush piyush P Pi Piy Piyu h sh ush yush LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 5 0 +Kumar kumar K Ku Kum Kuma r ar mar umar LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 8 BLOCKIN 5 0 +Bhaktipriya bhaktipriya B Bh Bha Bhak a ya iya riya LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 5 0 +Radharapu radharapu R Ra Rad Radh u pu apu rapu LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 8 BLOCKIN 5 0 +Olivia olivia O Ol Oli Oliv a ia via ivia LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 5 0 +Sturman sturman S St Stu Stur n an man rman LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 8 BLOCKIN 5 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 5 0 +Oscar oscar O Os Osc Osca r ar car scar LINEEND LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 5 0 +Wahltinez wahltinez W Wa Wah Wahl z ez nez inez LINESTART LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 7 BLOCKIN 4 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 1 7 BLOCKIN 4 0 +Shieldgemma shieldgemma S Sh Shi Shie a ma mma emma LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 7 BLOCKIN 4 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 3 7 BLOCKIN 4 0 +Generative generative G Ge Gen Gene e ve ive tive LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 7 BLOCKIN 4 0 +ai ai a ai ai ai i ai ai ai LINEIN LINEINDENT NOCAPS NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 7 BLOCKIN 4 0 +content content c co con cont t nt ent tent LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 7 BLOCKIN 4 0 +moderation moderation m mo mod mode n on ion tion LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 7 BLOCKIN 4 0 +based based b ba bas base d ed sed ased LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 7 BLOCKIN 4 0 +on on o on on on n on on on LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 7 BLOCKIN 4 0 +gemma gemma g ge gem gemm a ma mma emma LINEIN LINEINDENT NOCAPS NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 7 BLOCKIN 4 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 7 BLOCKIN 4 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 7 BLOCKIN 4 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 7 BLOCKEND 4 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 8 0 +82 82 8 82 82 82 2 82 82 82 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 8 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 8 0 +Seungju seungju S Se Seu Seun u ju gju ngju LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +Han han H Ha Han Han n an Han Han LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 8 0 +Kavel kavel K Ka Kav Kave l el vel avel LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 8 0 +Rao rao R Ra Rao Rao o ao Rao Rao LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 8 0 +Allyson allyson A Al All Ally n on son yson LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 8 0 +Ettinger ettinger E Et Ett Etti r er ger nger LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 8 0 +Liwei liwei L Li Liw Liwe i ei wei iwei LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 8 0 +Jiang jiang J Ji Jia Jian g ng ang iang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 8 0 +Bill bill B Bi Bil Bill l ll ill Bill LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 8 0 +Yuchen yuchen Y Yu Yuc Yuch n en hen chen LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +Lin lin L Li Lin Lin n in Lin Lin LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 9 BLOCKIN 8 0 +Nathan nathan N Na Nat Nath n an han than LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 8 0 +Lambert lambert L La Lam Lamb t rt ert bert LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 8 0 +Yejin yejin Y Ye Yej Yeji n in jin ejin LINEEND ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +Choi choi C Ch Cho Choi i oi hoi Choi LINESTART LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 0 9 BLOCKIN 6 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 6 0 +Nouha nouha N No Nou Nouh a ha uha ouha LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 6 0 +Dziri dziri D Dz Dzi Dzir i ri iri ziri LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 6 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 2 9 BLOCKIN 6 0 +Wildguard wildguard W Wi Wil Wild d rd ard uard LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 6 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 3 9 BLOCKIN 6 0 +Open open O Op Ope Open n en pen Open LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 6 0 +one one o on one one e ne one one LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 6 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 5 9 BLOCKIN 6 0 +stop stop s st sto stop p op top stop LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 6 0 +moderation moderation m mo mod mode n on ion tion LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 6 0 +tools tools t to too tool s ls ols ools LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 6 0 +for for f fo for for r or for for LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 6 0 +safety safety s sa saf safe y ty ety fety LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 6 0 +risks risks r ri ris risk s ks sks isks LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 9 BLOCKIN 6 0 +jailbreaks jailbreaks j ja jai jail s ks aks eaks LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 6 0 +, , , , , , , , , , LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 6 0 +and and a an and and d nd and and LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 2 BLOCKIN 2 0 +refusals refusals r re ref refu s ls als sals LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 2 BLOCKIN 2 0 +of of o of of of f of of of LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 2 BLOCKIN 2 0 +llms llms l ll llm llms s ms lms llms LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 2 BLOCKIN 2 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 2 BLOCKIN 2 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 2 BLOCKIN 2 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 2 BLOCKEND 2 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 1 BLOCKSTART 2 0 +83 83 8 83 83 83 3 83 83 83 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 1 BLOCKIN 2 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 3 1 BLOCKIN 2 0 +Vishal vishal V Vi Vis Vish l al hal shal LINEEND ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 1 BLOCKIN 2 0 +Mysore mysore M My Mys Myso e re ore sore LINESTART LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 0 BLOCKIN 1 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 0 BLOCKIN 1 0 +LLM llm L LL LLM LLM M LM LLM LLM LINESTART LINEINDENT ALLCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 0 BLOCKIN no 0 +Deployment deployment D De Dep Depl t nt ent ment LINESTART LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 8 1 BLOCKIN no 0 +Strategies strategies S St Str Stra s es ies gies LINESTART LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 1 BLOCKIN no 0 +: : : : : : : : : : LINESTART LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 3 0 BLOCKIN 1 0 +Its its I It Its Its s ts Its Its LINESTART LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 0 BLOCKIN no 0 +not not n no not not t ot not not LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 0 BLOCKIN no 0 +Magic magic M Ma Mag Magi c ic gic agic LINESTART ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 0 BLOCKIN no 0 +, , , , , , , , , , LINESTART LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 0 BLOCKIN 1 0 +Its its I It Its Its s ts Its Its LINESTART LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 0 BLOCKIN no 0 +Logic logic L Lo Log Logi c ic gic ogic LINESTART LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 0 BLOCKIN 1 0 +! ! ! ! ! ! ! ! ! ! LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 7 0 BLOCKIN 1 0 +- - - - - - - - - - LINESTART LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 5 0 BLOCKIN 1 0 +visrow visrow v vi vis visr w ow row srow LINESTART LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 0 BLOCKIN 1 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 0 BLOCKIN 1 0 +https https h ht htt http s ps tps ttps LINESTART LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 1 NOPUNCT 1 2 BLOCKIN 7 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 2 2 BLOCKIN 7 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 2 2 BLOCKIN 7 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 2 2 BLOCKIN 7 0 +medium medium m me med medi m um ium dium LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 2 BLOCKIN 7 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 2 BLOCKIN 7 0 +com com c co com com m om com com LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 2 BLOCKIN 7 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 6 2 BLOCKIN 7 0 +@ @ @ @ @ @ @ @ @ @ LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 6 2 BLOCKIN 7 0 +visrow visrow v vi vis visr w ow row srow LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 2 BLOCKIN 7 0 +/ / / / / / / / / / LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 9 2 BLOCKIN 7 0 +llm llm l ll llm llm m lm llm llm LINESTART ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 6 BLOCKIN 9 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 0 6 BLOCKIN 9 0 +deployment deployment d de dep depl t nt ent ment LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 2 6 BLOCKIN 9 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 2 6 BLOCKIN 9 0 +strategies strategies s st str stra s es ies gies LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 6 BLOCKIN 9 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 4 6 BLOCKIN 9 0 +its its i it its its s ts its its LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 6 BLOCKIN 9 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 4 6 BLOCKIN 9 0 +not not n no not not t ot not not LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 6 BLOCKIN 9 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 5 6 BLOCKIN 9 0 +magic magic m ma mag magi c ic gic agic LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 6 BLOCKIN 9 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 6 6 BLOCKIN 9 0 +its its i it its its s ts its its LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 6 BLOCKIN 9 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 6 6 BLOCKIN 9 0 +logic logic l lo log logi c ic gic ogic LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 6 BLOCKIN 9 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 7 6 BLOCKIN 9 0 +71d5f32ac2b4 71d5f32ac2b4 7 71 71d 71d5 4 b4 2b4 c2b4 LINEIN ALIGNEDLEFT NOCAPS CONTAINSDIGITS 0 0 0 0 0 0 0 0 NOPUNCT 9 6 BLOCKIN 9 0 +. . . . . . . . . . LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 6 BLOCKIN 9 0 +[ [ [ [ [ [ [ [ [ [ LINESTART LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 1 BLOCKIN 3 0 +Accessed accessed A Ac Acc Acce d ed sed ssed LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 1 BLOCKIN 3 0 +07 07 0 07 07 07 7 07 07 07 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 1 BLOCKIN 3 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 8 1 BLOCKIN 3 0 +08 08 0 08 08 08 8 08 08 08 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 1 BLOCKIN 3 0 +- - - - - - - - - - LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 10 1 BLOCKIN 3 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINESTART ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 5 0 BLOCKIN 2 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 6 0 BLOCKIN 2 0 +. . . . . . . . . . LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 0 BLOCKEND 2 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 9 0 +84 84 8 84 84 84 4 84 84 84 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 9 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 9 0 +Woosuk woosuk W Wo Woo Woos k uk suk osuk LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 9 0 +Kwon kwon K Kw Kwo Kwon n on won Kwon LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 9 0 +Zhuohan zhuohan Z Zh Zhu Zhuo n an han ohan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 9 0 +Li li L Li Li Li i Li Li Li LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 9 0 +Siyuan siyuan S Si Siy Siyu n an uan yuan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 9 0 +Zhuang zhuang Z Zh Zhu Zhua g ng ang uang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 9 0 +Ying ying Y Yi Yin Ying g ng ing Ying LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 9 0 +Sheng sheng S Sh She Shen g ng eng heng LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 9 0 +Lianmin lianmin L Li Lia Lian n in min nmin LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 9 0 +Zheng zheng Z Zh Zhe Zhen g ng eng heng LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 9 BLOCKIN 9 0 +Cody cody C Co Cod Cody y dy ody Cody LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 9 0 +Hao hao H Ha Hao Hao o ao Hao Hao LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 9 0 +Yu yu Y Yu Yu Yu u Yu Yu Yu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 9 0 +Joseph joseph J Jo Jos Jose h ph eph seph LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +E e E E E E E E E E LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +. . . . . . . . . . LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 9 0 +Gonzalez gonzalez G Go Gon Gonz z ez lez alez LINESTART LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 3 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 0 8 BLOCKIN 3 0 +Hao hao H Ha Hao Hao o ao Hao Hao LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 3 0 +Zhang zhang Z Zh Zha Zhan g ng ang hang LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 3 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 8 BLOCKIN 3 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 3 0 +Ion ion I Io Ion Ion n on Ion Ion LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 3 0 +Stoica stoica S St Sto Stoi a ca ica oica LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 3 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 4 8 BLOCKIN 3 0 +Efficient efficient E Ef Eff Effi t nt ent ient LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 3 0 +memory memory m me mem memo y ry ory mory LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 3 0 +management management m ma man mana t nt ent ment LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 3 0 +for for f fo for for r or for for LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 3 0 +large large l la lar larg e ge rge arge LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 3 0 +language language l la lan lang e ge age uage LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 3 0 +model model m mo mod mode l el del odel LINEEND LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 3 0 +serving serving s se ser serv g ng ing ving LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 3 BLOCKIN 2 0 +with with w wi wit with h th ith with LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 3 BLOCKIN 2 0 +pagedattention pagedattention p pa pag page n on ion tion LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 3 BLOCKIN 2 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 3 BLOCKIN 2 0 +2023 2023 2 20 202 2023 3 23 023 2023 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 3 BLOCKIN 2 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 3 BLOCKEND 2 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 8 BLOCKSTART 4 0 +85 85 8 85 85 85 5 85 85 85 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 4 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 8 BLOCKIN 4 0 +Preprocess preprocess P Pr Pre Prep s ss ess cess LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 4 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 4 0 +fine fine f fi fin fine e ne ine fine LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 4 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 3 8 BLOCKIN 4 0 +tune tune t tu tun tune e ne une tune LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 4 0 +llms llms l ll llm llms s ms lms llms LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 4 0 +quickly quickly q qu qui quic y ly kly ckly LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 4 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 4 0 +cost cost c co cos cost t st ost cost LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 4 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 6 8 BLOCKIN 4 0 +effectively effectively e ef eff effe y ly ely vely LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 4 0 +using using u us usi usin g ng ing sing LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 4 0 +amazon amazon a am ama amaz n on zon azon LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 4 0 +emr emr e em emr emr r mr emr emr LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 4 0 +serverless serverless s se ser serv s ss ess less LINEEND ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 4 0 +and and a an and and d nd and and LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 0 3 BLOCKIN 4 0 +amazon amazon a am ama amaz n on zon azon LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 3 BLOCKIN 4 0 +sagemaker sagemaker s sa sag sage r er ker aker LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 3 BLOCKIN 4 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 6 3 BLOCKIN 4 0 +aws aws a aw aws aws s ws aws aws LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 3 BLOCKIN 4 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 3 BLOCKIN 4 0 +amazon amazon a am ama amaz n on zon azon LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 3 BLOCKIN 4 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 3 BLOCKIN 4 0 +com com c co com com m om com com LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 3 BLOCKIN 4 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 3 BLOCKIN 4 0 +https https h ht htt http s ps tps ttps LINESTART LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 1 NOPUNCT 1 3 BLOCKIN 9 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 1 3 BLOCKIN 9 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 1 3 BLOCKIN 9 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 2 3 BLOCKIN 9 0 +aws aws a aw aws aws s ws aws aws LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 3 BLOCKIN 9 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 3 3 BLOCKIN 9 0 +amazon amazon a am ama amaz n on zon azon LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 3 BLOCKIN 9 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 4 3 BLOCKIN 9 0 +com com c co com com m om com com LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 3 BLOCKIN 9 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 5 3 BLOCKIN 9 0 +blogs blogs b bl blo blog s gs ogs logs LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 3 BLOCKIN 9 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 7 3 BLOCKIN 9 0 +big big b bi big big g ig big big LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 3 BLOCKIN 9 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 8 3 BLOCKIN 9 0 +data data d da dat data a ta ata data LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 3 BLOCKIN 9 0 +/ / / / / / / / / / LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 9 3 BLOCKIN 9 0 +preprocess preprocess p pr pre prep s ss ess cess LINESTART ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 1 9 BLOCKIN 10 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 1 9 BLOCKIN 10 0 +fine fine f fi fin fine e ne ine fine LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 2 9 BLOCKIN 10 0 +tune tune t tu tun tune e ne une tune LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 2 9 BLOCKIN 10 0 +llms llms l ll llm llms s ms lms llms LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 3 9 BLOCKIN 10 0 +quickly quickly q qu qui quic y ly kly ckly LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 3 9 BLOCKIN 10 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 4 9 BLOCKIN 10 0 +cost cost c co cos cost t st ost cost LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 4 9 BLOCKIN 10 0 +effectively effectively e ef eff effe y ly ely vely LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 6 9 BLOCKIN 10 0 +using using u us usi usin g ng ing sing LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 6 9 BLOCKIN 10 0 +amazon amazon a am ama amaz n on zon azon LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 7 9 BLOCKIN 10 0 +emr emr e em emr emr r mr emr emr LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 7 9 BLOCKIN 10 0 +serverless serverless s se ser serv s ss ess less LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 9 9 BLOCKIN 10 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 9 9 BLOCKIN 10 0 +ama ama a am ama ama a ma ama ama LINEEND ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 10 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 2 BLOCKIN 5 0 +Accessed accessed A Ac Acc Acce d ed sed ssed LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 2 BLOCKIN 5 0 +06 06 0 06 06 06 6 06 06 06 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 2 BLOCKIN 5 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 5 2 BLOCKIN 5 0 +08 08 0 08 08 08 8 08 08 08 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 2 BLOCKIN 5 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 7 2 BLOCKIN 5 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 8 2 BLOCKIN 5 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 9 2 BLOCKIN 5 0 +. . . . . . . . . . LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 2 BLOCKEND 5 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 10 0 +86 86 8 86 86 86 6 86 86 86 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 10 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 10 0 +Nvidia nvidia N Nv Nvi Nvid a ia dia idia LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 10 0 +nemo nemo n ne nem nemo o mo emo nemo LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 10 0 +build build b bu bui buil d ld ild uild LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 10 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 10 0 +customize customize c cu cus cust e ze ize mize LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 10 0 +your your y yo you your r ur our your LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 10 0 +own own o ow own own n wn own own LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 10 0 +llms llms l ll llm llms s ms lms llms LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 10 0 +( ( ( ( ( ( ( ( ( ( LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 6 9 BLOCKIN 10 0 +with with w wi wit with h th ith with LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 10 0 +tutorial tutorial t tu tut tuto l al ial rial LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 10 0 +) ) ) ) ) ) ) ) ) ) LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 7 9 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 8 9 BLOCKIN 10 0 +run run r ru run run n un run run LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 8 9 BLOCKIN 10 0 +ai ai a ai ai ai i ai ai ai LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 8 9 BLOCKIN 10 0 +https https h ht htt http s ps tps ttps LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 1 NOPUNCT 9 9 BLOCKIN 10 0 +: : : : : : : : : : LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 9 9 BLOCKIN 10 0 +/ / / / / / / / / / LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 10 0 +/ / / / / / / / / / LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 10 0 +www www w ww www www w ww www www LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 10 0 +run run r ru run run n un run run LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 10 0 +ai ai a ai ai ai i ai ai ai LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 10 0 +/ / / / / / / / / / LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 10 0 +guides guides g gu gui guid s es des ides LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 0 6 BLOCKIN 10 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 1 6 BLOCKIN 10 0 +ai ai a ai ai ai i ai ai ai LINEIN LINEINDENT NOCAPS NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 6 BLOCKIN 10 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 1 6 BLOCKIN 10 0 +open open o op ope open n en pen open LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 6 BLOCKIN 10 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 2 6 BLOCKIN 10 0 +source source s so sou sour e ce rce urce LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 6 BLOCKIN 10 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 3 6 BLOCKIN 10 0 +projects projects p pr pro proj s ts cts ects LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 4 6 BLOCKIN 10 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 4 6 BLOCKIN 10 0 +nvidia nvidia n nv nvi nvid a ia dia idia LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 6 BLOCKIN 10 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 5 6 BLOCKIN 10 0 +nemo nemo n ne nem nemo o mo emo nemo LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 6 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 6 6 BLOCKIN 10 0 +[ [ [ [ [ [ [ [ [ [ LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 6 6 BLOCKIN 10 0 +Accessed accessed A Ac Acc Acce d ed sed ssed LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 6 BLOCKIN 10 0 +07 07 0 07 07 07 7 07 07 07 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 6 BLOCKIN 10 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 8 6 BLOCKIN 10 0 +08 08 0 08 08 08 8 08 08 08 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 6 BLOCKIN 10 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 9 6 BLOCKIN 10 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 9 6 BLOCKIN 10 0 +] ] ] ] ] ] ] ] ] ] LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 9 6 BLOCKIN 10 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 6 BLOCKEND 10 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 8 BLOCKSTART 10 0 +87 87 8 87 87 87 7 87 87 87 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 10 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 8 BLOCKIN 10 0 +Nvidia nvidia N Nv Nvi Nvid a ia dia idia LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 1 8 BLOCKIN 10 0 +What what W Wh Wha What t at hat What LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 10 0 +is is i is is is s is is is LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 10 0 +nvidia nvidia n nv nvi nvid a ia dia idia LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 10 0 +nemo nemo n ne nem nemo o mo emo nemo LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 10 0 +? ? ? ? ? ? ? ? ? ? LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 4 8 BLOCKIN 10 0 +https https h ht htt http s ps tps ttps LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 1 NOPUNCT 5 8 BLOCKIN 10 0 +: : : : : : : : : : LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 5 8 BLOCKIN 10 0 +/ / / / / / / / / / LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 10 0 +/ / / / / / / / / / LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 10 0 +www www w ww www www w ww www www LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 8 BLOCKIN 10 0 +nvidia nvidia n nv nvi nvid a ia dia idia LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 6 8 BLOCKIN 10 0 +com com c co com com m om com com LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 10 0 +/ / / / / / / / / / LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 10 0 +en en e en en en n en en en LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 7 8 BLOCKIN 10 0 +us us u us us us s us us us LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 10 0 +/ / / / / / / / / / LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 10 0 +ai ai a ai ai ai i ai ai ai LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 8 8 BLOCKIN 10 0 +data data d da dat data a ta ata data LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 8 8 BLOCKIN 10 0 +science science s sc sci scie e ce nce ence LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 10 0 +/ / / / / / / / / / LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 10 0 +products products p pr pro prod s ts cts ucts LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 10 0 +/ / / / / / / / / / LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 10 0 +nemo nemo n ne nem nemo o mo emo nemo LINESTART LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 0 BLOCKIN 2 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 6 0 BLOCKIN 2 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 0 BLOCKEND 2 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 6 0 +88 88 8 88 88 88 8 88 88 88 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 6 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 6 0 +Gemini gemini G Ge Gem Gemi i ni ini mini LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 6 0 +Team team T Te Tea Team m am eam Team LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 6 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 6 0 +Rohan rohan R Ro Roh Roha n an han ohan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 6 0 +Anil anil A An Ani Anil l il nil Anil LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 6 0 +et et e et et et t et et et LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 6 0 +al al a al al al l al al al LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 6 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 4 9 BLOCKIN 6 0 +Gemini gemini G Ge Gem Gemi i ni ini mini LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 6 0 +: : : : : : : : : : LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 5 9 BLOCKIN 6 0 +A a A A A A A A A A LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 6 0 +family family f fa fam fami y ly ily mily LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 6 0 +of of o of of of f of of of LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 6 0 +highly highly h hi hig high y ly hly ghly LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 6 0 +capable capable c ca cap capa e le ble able LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 6 0 +multimodal multimodal m mu mul mult l al dal odal LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 6 0 +models models m mo mod mode s ls els dels LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 6 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 9 BLOCKIN 6 0 +. . . . . . . . . . LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKEND 6 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 10 0 +89 89 8 89 89 89 9 89 89 89 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 10 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 10 0 +Yizhang yizhang Y Yi Yiz Yizh g ng ang hang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 10 0 +Jin jin J Ji Jin Jin n in Jin Jin LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 10 0 +Jian jian J Ji Jia Jian n an ian Jian LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 10 0 +Li li L Li Li Li i Li Li Li LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 10 0 +Yexin yexin Y Ye Yex Yexi n in xin exin LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 10 0 +Liu liu L Li Liu Liu u iu Liu Liu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 9 BLOCKIN 10 0 +Tianjun tianjun T Ti Tia Tian n un jun njun LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 10 0 +Gu gu G Gu Gu Gu u Gu Gu Gu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 10 0 +Kai kai K Ka Kai Kai i ai Kai Kai LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 10 0 +Wu wu W Wu Wu Wu u Wu Wu Wu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 10 0 +Zhengkai zhengkai Z Zh Zhe Zhen i ai kai gkai LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 10 0 +Jiang jiang J Ji Jia Jian g ng ang iang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 9 BLOCKIN 10 0 +Muyang muyang M Mu Muy Muya g ng ang yang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 10 0 +He he H He He He e He He He LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 9 BLOCKIN 10 0 +Bo bo B Bo Bo Bo o Bo Bo Bo LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 10 0 +Zhao zhao Z Zh Zha Zhao o ao hao Zhao LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 10 0 +Xin xin X Xi Xin Xin n in Xin Xin LINEEND ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 10 0 +Tan tan T Ta Tan Tan n an Tan Tan LINESTART LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 0 8 BLOCKIN 5 0 +Zhenye zhenye Z Zh Zhe Zhen e ye nye enye LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 5 0 +Gan gan G Ga Gan Gan n an Gan Gan LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 8 BLOCKIN 5 0 +Yabiao yabiao Y Ya Yab Yabi o ao iao biao LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 5 0 +Wang wang W Wa Wan Wang g ng ang Wang LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 8 BLOCKIN 5 0 +Chengjie chengjie C Ch Che Chen e ie jie gjie LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 5 0 +Wang wang W Wa Wan Wang g ng ang Wang LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 8 BLOCKIN 5 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 5 0 +Lizhuang lizhuang L Li Liz Lizh g ng ang uang LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 5 0 +Ma ma M Ma Ma Ma a Ma Ma Ma LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 5 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 8 BLOCKIN 5 0 +Efficient efficient E Ef Eff Effi t nt ent ient LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 5 0 +multimodal multimodal m mu mul mult l al dal odal LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 5 0 +large large l la lar larg e ge rge arge LINEEND LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 5 0 +language language l la lan lang e ge age uage LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 3 BLOCKIN 3 0 +models models m mo mod mode s ls els dels LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 3 BLOCKIN 3 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 5 3 BLOCKIN 3 0 +A a A A A A A A A A LINEIN LINEINDENT ALLCAP NODIGIT 1 0 1 0 0 0 0 0 NOPUNCT 5 3 BLOCKIN 3 0 +survey survey s su sur surv y ey vey rvey LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 3 BLOCKIN 3 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 3 BLOCKIN 3 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 3 BLOCKIN 3 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 3 BLOCKEND 3 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 8 0 +90 90 9 90 90 90 0 90 90 90 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 8 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 8 0 +Alec alec A Al Ale Alec c ec lec Alec LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +Radford radford R Ra Rad Radf d rd ord ford LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 8 0 +Jong jong J Jo Jon Jong g ng ong Jong LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 8 0 +Wook wook W Wo Woo Wook k ok ook Wook LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +Kim kim K Ki Kim Kim m im Kim Kim LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 8 0 +Chris chris C Ch Chr Chri s is ris hris LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 8 0 +Hallacy hallacy H Ha Hal Hall y cy acy lacy LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 8 0 +Aditya aditya A Ad Adi Adit a ya tya itya LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 8 0 +Ramesh ramesh R Ra Ram Rame h sh esh mesh LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 9 BLOCKIN 8 0 +Gabriel gabriel G Ga Gab Gabr l el iel riel LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +Goh goh G Go Goh Goh h oh Goh Goh LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 9 BLOCKIN 8 0 +Sandhini sandhini S Sa San Sand i ni ini hini LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +Agarwal agarwal A Ag Aga Agar l al wal rwal LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +, , , , , , , , , , LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 8 0 +Girish girish G Gi Gir Giri h sh ish rish LINESTART LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 6 0 +Sastry sastry S Sa Sas Sast y ry try stry LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 6 0 +Amanda amanda A Am Ama Aman a da nda anda LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 6 0 +Askell askell A As Ask Aske l ll ell kell LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 6 0 +Pamela pamela P Pa Pam Pame a la ela mela LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 6 0 +Mishkin mishkin M Mi Mis Mish n in kin hkin LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 6 0 +Jack jack J Ja Jac Jack k ck ack Jack LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 6 0 +Clark clark C Cl Cla Clar k rk ark lark LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 6 0 +Gretchen gretchen G Gr Gre Gret n en hen chen LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 6 0 +Krueger krueger K Kr Kru Krue r er ger eger LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 9 BLOCKIN 6 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 6 0 +Ilya ilya I Il Ily Ilya a ya lya Ilya LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 6 0 +Sutskever sutskever S Su Sut Suts r er ver ever LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 6 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 6 0 +Learning learning L Le Lea Lear g ng ing ning LINESTART LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 7 BLOCKIN 2 0 +transferable transferable t tr tra tran e le ble able LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 7 BLOCKIN 2 0 +visual visual v vi vis visu l al ual sual LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 7 BLOCKIN 2 0 +models models m mo mod mode s ls els dels LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 7 BLOCKIN 2 0 +from from f fr fro from m om rom from LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 7 BLOCKIN 2 0 +natural natural n na nat natu l al ral ural LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 7 BLOCKIN 2 0 +language language l la lan lang e ge age uage LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 7 BLOCKIN 2 0 +supervision supervision s su sup supe n on ion sion LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 7 BLOCKIN 2 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 7 BLOCKIN 2 0 +2021 2021 2 20 202 2021 1 21 021 2021 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 7 BLOCKIN 2 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 7 BLOCKEND 2 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 8 BLOCKSTART 8 0 +91 91 9 91 91 91 1 91 91 91 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 8 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 8 BLOCKIN 8 0 +Haokun haokun H Ha Hao Haok n un kun okun LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 8 0 +Liu liu L Li Liu Liu u iu Liu Liu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 8 BLOCKIN 8 0 +Derek derek D De Der Dere k ek rek erek LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 8 0 +Tam tam T Ta Tam Tam m am Tam Tam LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 8 BLOCKIN 8 0 +Mohammed mohammed M Mo Moh Moha d ed med mmed LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 8 0 +Muqeeth muqeeth M Mu Muq Muqe h th eth eeth LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 8 BLOCKIN 8 0 +Jay jay J Ja Jay Jay y ay Jay Jay LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 8 0 +Mohta mohta M Mo Moh Moht a ta hta ohta LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 8 BLOCKIN 8 0 +Tenghao tenghao T Te Ten Teng o ao hao ghao LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 8 0 +Huang huang H Hu Hua Huan g ng ang uang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 8 BLOCKIN 8 0 +Mohit mohit M Mo Moh Mohi t it hit ohit LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 8 0 +Bansal bansal B Ba Ban Bans l al sal nsal LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 8 BLOCKIN 8 0 +and and a an and and d nd and and LINEEND ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 8 0 +Colin colin C Co Col Coli n in lin olin LINESTART LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 6 0 +Raffel raffel R Ra Raf Raff l el fel ffel LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 6 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 1 9 BLOCKIN 6 0 +Few few F Fe Few Few w ew Few Few LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 6 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 2 9 BLOCKIN 6 0 +shot shot s sh sho shot t ot hot shot LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 6 0 +parameter parameter p pa par para r er ter eter LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 6 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 3 9 BLOCKIN 6 0 +efficient efficient e ef eff effi t nt ent ient LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 6 0 +fine fine f fi fin fine e ne ine fine LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 6 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 5 9 BLOCKIN 6 0 +tuning tuning t tu tun tuni g ng ing ning LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 6 0 +is is i is is is s is is is LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 6 0 +better better b be bet bett r er ter tter LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 6 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 6 0 +cheaper cheaper c ch che chea r er per aper LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 6 0 +than than t th tha than n an han than LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 6 0 +in in i in in in n in in in LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 6 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 9 9 BLOCKIN 6 0 +context context c co con cont t xt ext text LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 6 0 +learn learn l le lea lear n rn arn earn LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 6 0 +- - - - - - - - - - LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 10 9 BLOCKIN 6 0 +ing ing i in ing ing g ng ing ing LINESTART LINEINDENT NOCAPS NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 1 BLOCKIN 2 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 1 BLOCKIN 2 0 +2022 2022 2 20 202 2022 2 22 022 2022 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 8 1 BLOCKIN 2 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 1 BLOCKEND 2 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 7 0 +92 92 9 92 92 92 2 92 92 92 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 7 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 7 0 +Mojtaba mojtaba M Mo Moj Mojt a ba aba taba LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 7 0 +Valipour valipour V Va Val Vali r ur our pour LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 7 0 +Mehdi mehdi M Me Meh Mehd i di hdi ehdi LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 7 0 +Rezagholizadeh rezagholizadeh R Re Rez Reza h eh deh adeh LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 7 0 +Ivan ivan I Iv Iva Ivan n an van Ivan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 7 0 +Kobyzev kobyzev K Ko Kob Koby v ev zev yzev LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 7 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 7 0 +Ali ali A Al Ali Ali i li Ali Ali LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 7 0 +Ghodsi ghodsi G Gh Gho Ghod i si dsi odsi LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 7 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 8 9 BLOCKIN 7 0 +Dylora dylora D Dy Dyl Dylo a ra ora lora LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 7 0 +: : : : : : : : : : LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 9 9 BLOCKIN 7 0 +Parameter parameter P Pa Par Para r er ter eter LINEEND ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 7 0 +efficient efficient e ef eff effi t nt ent ient LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 5 0 +tuning tuning t tu tun tuni g ng ing ning LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 5 0 +of of o of of of f of of of LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 5 0 +pre pre p pr pre pre e re pre pre LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 5 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 2 8 BLOCKIN 5 0 +trained trained t tr tra trai d ed ned ined LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 5 0 +models models m mo mod mode s ls els dels LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 5 0 +using using u us usi usin g ng ing sing LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 5 0 +dynamic dynamic d dy dyn dyna c ic mic amic LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 5 0 +search search s se sea sear h ch rch arch LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 5 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 7 8 BLOCKIN 5 0 +free free f fr fre free e ee ree free LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 5 0 +low low l lo low low w ow low low LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 5 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 8 8 BLOCKIN 5 0 +rank rank r ra ran rank k nk ank rank LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 5 0 +adaptation adaptation a ad ada adap n on ion tion LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 8 BLOCKIN 5 0 +2023 2023 2 20 202 2023 3 23 023 2023 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 8 BLOCKIN 5 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 8 BLOCKEND 5 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 10 0 +93 93 9 93 93 93 3 93 93 93 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 10 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 10 0 +Longteng longteng L Lo Lon Long g ng eng teng LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 10 0 +Zhang zhang Z Zh Zha Zhan g ng ang hang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 10 0 +Lin lin L Li Lin Lin n in Lin Lin LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 10 0 +Zhang zhang Z Zh Zha Zhan g ng ang hang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 10 0 +Shaohuai shaohuai S Sh Sha Shao i ai uai huai LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 10 0 +Shi shi S Sh Shi Shi i hi Shi Shi LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 10 0 +Xiaowen xiaowen X Xi Xia Xiao n en wen owen LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 10 0 +Chu chu C Ch Chu Chu u hu Chu Chu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 10 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 10 0 +Bo bo B Bo Bo Bo o Bo Bo Bo LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 10 0 +Li li L Li Li Li i Li Li Li LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 8 9 BLOCKIN 10 0 +Lora lora L Lo Lor Lora a ra ora Lora LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 8 9 BLOCKIN 10 0 +fa fa f fa fa fa a fa fa fa LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 10 0 +: : : : : : : : : : LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 9 9 BLOCKIN 10 0 +Memory memory M Me Mem Memo y ry ory mory LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 10 9 BLOCKIN 10 0 +efficient efficient e ef eff effi t nt ent ient LINEEND ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 10 0 +low low l lo low low w ow low low LINESTART LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 0 6 BLOCKIN 4 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 0 6 BLOCKIN 4 0 +rank rank r ra ran rank k nk ank rank LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 6 BLOCKIN 4 0 +adaptation adaptation a ad ada adap n on ion tion LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 6 BLOCKIN 4 0 +for for f fo for for r or for for LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 6 BLOCKIN 4 0 +large large l la lar larg e ge rge arge LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 4 6 BLOCKIN 4 0 +language language l la lan lang e ge age uage LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 6 BLOCKIN 4 0 +models models m mo mod mode s ls els dels LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 6 BLOCKIN 4 0 +fine fine f fi fin fine e ne ine fine LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 8 6 BLOCKIN 4 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 8 6 BLOCKIN 4 0 +tuning tuning t tu tun tuni g ng ing ning LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 6 BLOCKIN 4 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 6 BLOCKIN 4 0 +2023 2023 2 20 202 2023 3 23 023 2023 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 6 BLOCKIN 4 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 6 BLOCKEND 4 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 8 0 +94 94 9 94 94 94 4 94 94 94 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 8 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 8 0 +Qiong qiong Q Qi Qio Qion g ng ong iong LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +Wu wu W Wu Wu Wu u Wu Wu Wu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 8 0 +Weihao weihao W We Wei Weih o ao hao ihao LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 8 0 +Ye ye Y Ye Ye Ye e Ye Ye Ye LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 8 0 +Yiyi yiyi Y Yi Yiy Yiyi i yi iyi Yiyi LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +Zhou zhou Z Zh Zho Zhou u ou hou Zhou LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 9 BLOCKIN 8 0 +Xiaoshuai xiaoshuai X Xi Xia Xiao i ai uai huai LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 8 0 +Sun sun S Su Sun Sun n un Sun Sun LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 8 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 8 0 +Rongrong rongrong R Ro Ron Rong g ng ong rong LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 8 0 +Ji ji J Ji Ji Ji i Ji Ji Ji LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 8 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 9 BLOCKIN 8 0 +Not not N No Not Not t ot Not Not LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +all all a al all all l ll all all LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +attention attention a at att atte n on ion tion LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +is is i is is is s is is is LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +needed needed n ne nee need d ed ded eded LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +: : : : : : : : : : LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 10 9 BLOCKIN 8 0 +Parameter parameter P Pa Par Para r er ter eter LINESTART LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 3 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 3 0 +computation computation c co com comp n on ion tion LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 3 0 +efficient efficient e ef eff effi t nt ent ient LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 3 0 +transfer transfer t tr tra tran r er fer sfer LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 3 0 +learning learning l le lea lear g ng ing ning LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 3 0 +for for f fo for for r or for for LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 3 0 +multi multi m mu mul mult i ti lti ulti LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 3 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 7 9 BLOCKIN 3 0 +modal modal m mo mod moda l al dal odal LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 3 0 +large large l la lar larg e ge rge arge LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 3 0 +language language l la lan lang e ge age uage LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 3 0 +models models m mo mod mode s ls els dels LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 3 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 3 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 9 BLOCKIN 3 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKEND 3 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 8 BLOCKSTART 10 0 +95 95 9 95 95 95 5 95 95 95 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 10 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 8 BLOCKIN 10 0 +Shibo shibo S Sh Shi Shib o bo ibo hibo LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 10 0 +Jie jie J Ji Jie Jie e ie Jie Jie LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 8 BLOCKIN 10 0 +Yehui yehui Y Ye Yeh Yehu i ui hui ehui LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 10 0 +Tang tang T Ta Tan Tang g ng ang Tang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 8 BLOCKIN 10 0 +Ning ning N Ni Nin Ning g ng ing Ning LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 10 0 +Ding ding D Di Din Ding g ng ing Ding LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 8 BLOCKIN 10 0 +Zhi zhi Z Zh Zhi Zhi i hi Zhi Zhi LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 5 8 BLOCKIN 10 0 +Hong hong H Ho Hon Hong g ng ong Hong LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 10 0 +Deng deng D De Den Deng g ng eng Deng LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 8 BLOCKIN 10 0 +Kai kai K Ka Kai Kai i ai Kai Kai LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 10 0 +Han han H Ha Han Han n an Han Han LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 8 BLOCKIN 10 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 10 0 +Yunhe yunhe Y Yu Yun Yunh e he nhe unhe LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 10 0 +Wang wang W Wa Wan Wang g ng ang Wang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 10 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 8 BLOCKIN 10 0 +Memory memory M Me Mem Memo y ry ory mory LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 10 8 BLOCKIN 10 0 +space space s sp spa spac e ce ace pace LINEEND ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 10 0 +visual visual v vi vis visu l al ual sual LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 0 6 BLOCKIN 4 0 +prompting prompting p pr pro prom g ng ing ting LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 6 BLOCKIN 4 0 +for for f fo for for r or for for LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 6 BLOCKIN 4 0 +efficient efficient e ef eff effi t nt ent ient LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 6 BLOCKIN 4 0 +vision vision v vi vis visi n on ion sion LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 6 BLOCKIN 4 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 6 6 BLOCKIN 4 0 +language language l la lan lang e ge age uage LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 6 BLOCKIN 4 0 +fine fine f fi fin fine e ne ine fine LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 8 6 BLOCKIN 4 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 8 6 BLOCKIN 4 0 +tuning tuning t tu tun tuni g ng ing ning LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 6 BLOCKIN 4 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 6 BLOCKIN 4 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 6 BLOCKIN 4 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 6 BLOCKEND 4 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 8 0 +96 96 9 96 96 96 6 96 96 96 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 8 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 8 0 +Kai kai K Ka Kai Kai i ai Kai Kai LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 8 0 +Lv lv L Lv Lv Lv v Lv Lv Lv LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 8 0 +Yuqing yuqing Y Yu Yuq Yuqi g ng ing qing LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 8 0 +Yang yang Y Ya Yan Yang g ng ang Yang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 8 0 +Tengxiao tengxiao T Te Ten Teng o ao iao xiao LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 8 0 +Liu liu L Li Liu Liu u iu Liu Liu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 9 BLOCKIN 8 0 +Qinghui qinghui Q Qi Qin Qing i ui hui ghui LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 8 0 +Gao gao G Ga Gao Gao o ao Gao Gao LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 8 0 +Qipeng qipeng Q Qi Qip Qipe g ng eng peng LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 8 0 +Guo guo G Gu Guo Guo o uo Guo Guo LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 9 BLOCKIN 8 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +Xipeng xipeng X Xi Xip Xipe g ng eng peng LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +Qiu qiu Q Qi Qiu Qiu u iu Qiu Qiu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 8 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 9 BLOCKIN 8 0 +Full full F Fu Ful Full l ll ull Full LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +parameter parameter p pa par para r er ter eter LINEEND ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +fine fine f fi fin fine e ne ine fine LINESTART LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 0 6 BLOCKIN 3 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 0 6 BLOCKIN 3 0 +tuning tuning t tu tun tuni g ng ing ning LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 6 BLOCKIN 3 0 +for for f fo for for r or for for LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 6 BLOCKIN 3 0 +large large l la lar larg e ge rge arge LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 3 6 BLOCKIN 3 0 +language language l la lan lang e ge age uage LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 6 BLOCKIN 3 0 +models models m mo mod mode s ls els dels LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 6 BLOCKIN 3 0 +with with w wi wit with h th ith with LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 6 BLOCKIN 3 0 +limited limited l li lim limi d ed ted ited LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 6 BLOCKIN 3 0 +resources resources r re res reso s es ces rces LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 6 BLOCKIN 3 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 6 BLOCKIN 3 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 6 BLOCKIN 3 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 6 BLOCKEND 3 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 9 0 +97 97 9 97 97 97 7 97 97 97 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 9 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 9 0 +Sadhika sadhika S Sa Sad Sadh a ka ika hika LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 9 0 +Malladi malladi M Ma Mal Mall i di adi ladi LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 9 0 +Tianyu tianyu T Ti Tia Tian u yu nyu anyu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 9 0 +Gao gao G Ga Gao Gao o ao Gao Gao LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 9 0 +Eshaan eshaan E Es Esh Esha n an aan haan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 9 0 +Nichani nichani N Ni Nic Nich i ni ani hani LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 9 0 +Alex alex A Al Ale Alex x ex lex Alex LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 9 0 +Damian damian D Da Dam Dami n an ian mian LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 9 BLOCKIN 9 0 +Jason jason J Ja Jas Jaso n on son ason LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 9 0 +D d D D D D D D D D LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 9 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 8 9 BLOCKIN 9 0 +Lee lee L Le Lee Lee e ee Lee Lee LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 9 BLOCKIN 9 0 +Danqi danqi D Da Dan Danq i qi nqi anqi LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +Chen chen C Ch Che Chen n en hen Chen LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 9 0 +and and a an and and d nd and and LINEEND ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +Sanjeev sanjeev S Sa San Sanj v ev eev jeev LINESTART LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 7 BLOCKIN 4 0 +Arora arora A Ar Aro Aror a ra ora rora LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 7 BLOCKIN 4 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 1 7 BLOCKIN 4 0 +Fine fine F Fi Fin Fine e ne ine Fine LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 2 7 BLOCKIN 4 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 2 7 BLOCKIN 4 0 +tuning tuning t tu tun tuni g ng ing ning LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 7 BLOCKIN 4 0 +language language l la lan lang e ge age uage LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 7 BLOCKIN 4 0 +models models m mo mod mode s ls els dels LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 7 BLOCKIN 4 0 +with with w wi wit with h th ith with LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 7 BLOCKIN 4 0 +just just j ju jus just t st ust just LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 7 BLOCKIN 4 0 +forward forward f fo for forw d rd ard ward LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 7 BLOCKIN 4 0 +passes passes p pa pas pass s es ses sses LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 7 BLOCKIN 4 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 7 BLOCKIN 4 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 7 BLOCKIN 4 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 7 BLOCKEND 4 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 9 0 +98 98 9 98 98 98 8 98 98 98 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 9 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 9 0 +Gang gang G Ga Gan Gang g ng ang Gang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 9 0 +Liu liu L Li Liu Liu u iu Liu Liu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 9 0 +Jinlong jinlong J Ji Jin Jinl g ng ong long LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 9 0 +He he H He He He e He He He LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 9 0 +Pengfei pengfei P Pe Pen Peng i ei fei gfei LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 9 0 +Li li L Li Li Li i Li Li Li LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 9 BLOCKIN 9 0 +Genrong genrong G Ge Gen Genr g ng ong rong LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 9 0 +He he H He He He e He He He LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 9 0 +Zhaolin zhaolin Z Zh Zha Zhao n in lin olin LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 9 0 +Chen chen C Ch Che Chen n en hen Chen LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 9 BLOCKIN 9 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 9 0 +Shenjun shenjun S Sh She Shen n un jun njun LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 9 0 +Zhong zhong Z Zh Zho Zhon g ng ong hong LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 9 0 +Pefomed pefomed P Pe Pef Pefo d ed med omed LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +: : : : : : : : : : LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 10 9 BLOCKIN 9 0 +Parameter parameter P Pa Par Para r er ter eter LINESTART LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 3 0 +efficient efficient e ef eff effi t nt ent ient LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 3 0 +fine fine f fi fin fine e ne ine fine LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 3 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 2 9 BLOCKIN 3 0 +tuning tuning t tu tun tuni g ng ing ning LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 3 0 +of of o of of of f of of of LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 3 0 +multimodal multimodal m mu mul mult l al dal odal LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 3 0 +large large l la lar larg e ge rge arge LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 3 0 +language language l la lan lang e ge age uage LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 3 0 +models models m mo mod mode s ls els dels LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 3 0 +for for f fo for for r or for for LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 3 0 +medical medical m me med medi l al cal ical LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 3 0 +imaging imaging i im ima imag g ng ing ging LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 3 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 3 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 9 BLOCKIN 3 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKEND 3 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 9 0 +99 99 9 99 99 99 9 99 99 99 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 9 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 9 0 +Wei wei W We Wei Wei i ei Wei Wei LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 9 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 1 9 BLOCKIN 9 0 +Ning ning N Ni Nin Ning g ng ing Ning LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 9 0 +Hsu hsu H Hs Hsu Hsu u su Hsu Hsu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 9 0 +Benjamin benjamin B Be Ben Benj n in min amin LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 9 0 +Bolte bolte B Bo Bol Bolt e te lte olte LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 9 0 +Yao yao Y Ya Yao Yao o ao Yao Yao LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 9 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 4 9 BLOCKIN 9 0 +Hung hung H Hu Hun Hung g ng ung Hung LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 9 0 +Hubert hubert H Hu Hub Hube t rt ert bert LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 9 0 +Tsai tsai T Ts Tsa Tsai i ai sai Tsai LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 9 0 +Kushal kushal K Ku Kus Kush l al hal shal LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 9 0 +Lakhotia lakhotia L La Lak Lakh a ia tia otia LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 9 BLOCKIN 9 0 +Ruslan ruslan R Ru Rus Rusl n an lan slan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 9 0 +Salakhutdinov salakhutdinov S Sa Sal Sala v ov nov inov LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +, , , , , , , , , , LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 9 0 +and and a an and and d nd and and LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 3 0 +Abdelrahman abdelrahman A Ab Abd Abde n an man hman LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 3 0 +Mohamed mohamed M Mo Moh Moha d ed med amed LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 3 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 2 8 BLOCKIN 3 0 +Hubert hubert H Hu Hub Hube t rt ert bert LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 3 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 3 8 BLOCKIN 3 0 +Self self S Se Sel Self f lf elf Self LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 3 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 4 8 BLOCKIN 3 0 +supervised supervised s su sup supe d ed sed ised LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 3 0 +speech speech s sp spe spee h ch ech eech LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 3 0 +representation representation r re rep repr n on ion tion LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 3 0 +learning learning l le lea lear g ng ing ning LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 3 0 +by by b by by by y by by by LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 3 0 +masked masked m ma mas mask d ed ked sked LINEEND LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 3 0 +prediction prediction p pr pre pred n on ion tion LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 3 BLOCKIN 2 0 +of of o of of of f of of of LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 3 BLOCKIN 2 0 +hidden hidden h hi hid hidd n en den dden LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 3 BLOCKIN 2 0 +units units u un uni unit s ts its nits LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 3 BLOCKIN 2 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 3 BLOCKIN 2 0 +2021 2021 2 20 202 2021 1 21 021 2021 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 3 BLOCKIN 2 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 3 BLOCKEND 2 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 8 0 +100 100 1 10 100 100 0 00 100 100 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 8 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 8 0 +Alexei alexei A Al Ale Alex i ei xei exei LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +Baevski baevski B Ba Bae Baev i ki ski vski LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 8 0 +Henry henry H He Hen Henr y ry nry enry LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +Zhou zhou Z Zh Zho Zhou u ou hou Zhou LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 8 0 +Abdelrahman abdelrahman A Ab Abd Abde n an man hman LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 8 0 +Mohamed mohamed M Mo Moh Moha d ed med amed LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 8 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 8 0 +Michael michael M Mi Mic Mich l el ael hael LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 8 0 +Auli auli A Au Aul Auli i li uli Auli LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 8 9 BLOCKIN 8 0 +wav2vec wav2vec w wa wav wav2 c ec vec 2vec LINEIN ALIGNEDLEFT NOCAPS CONTAINSDIGITS 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 8 0 +2 2 2 2 2 2 2 2 2 2 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 1 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 8 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 9 BLOCKIN 8 0 +0 0 0 0 0 0 0 0 0 0 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 1 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 8 0 +: : : : : : : : : : LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 9 9 BLOCKIN 8 0 +A a A A A A A A A A LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +framework framework f fr fra fram k rk ork work LINEEND ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +for for f fo for for r or for for LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 0 6 BLOCKIN 3 0 +self self s se sel self f lf elf self LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 6 BLOCKIN 3 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 1 6 BLOCKIN 3 0 +supervised supervised s su sup supe d ed sed ised LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 6 BLOCKIN 3 0 +learning learning l le lea lear g ng ing ning LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 6 BLOCKIN 3 0 +of of o of of of f of of of LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 6 BLOCKIN 3 0 +speech speech s sp spe spee h ch ech eech LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 6 BLOCKIN 3 0 +representations representations r re rep repr s ns ons ions LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 6 BLOCKIN 3 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 6 BLOCKIN 3 0 +2020 2020 2 20 202 2020 0 20 020 2020 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 6 BLOCKIN 3 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 6 BLOCKEND 3 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 1 BLOCKSTART 2 0 +101 101 1 10 101 101 1 01 101 101 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 1 BLOCKIN 2 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 3 1 BLOCKIN 2 0 +Deepak deepak D De Dee Deep k ak pak epak LINEEND ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 1 BLOCKIN 2 0 +Babu babu B Ba Bab Babu u bu abu Babu LINESTART LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 0 BLOCKIN no 0 +P p P P P P P P P P LINESTART LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 3 0 BLOCKIN no 0 +R r R R R R R R R R LINESTART LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 2 0 BLOCKIN 1 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 0 BLOCKIN 1 0 +Audio audio A Au Aud Audi o io dio udio LINESTART LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 0 BLOCKIN no 0 +language language l la lan lang e ge age uage LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 0 BLOCKIN no 0 +models models m mo mod mode s ls els dels LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 0 BLOCKIN no 0 +and and a an and and d nd and and LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 0 BLOCKIN no 0 +multimodal multimodal m mu mul mult l al dal odal LINESTART LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 1 BLOCKIN no 0 +ar ar a ar ar ar r ar ar ar LINESTART LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 0 BLOCKIN 1 0 +- - - - - - - - - - LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 7 0 BLOCKIN 1 0 +chitecture chitecture c ch chi chit e re ure ture LINESTART ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 1 BLOCKIN no 0 +- - - - - - - - - - LINESTART LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 5 0 BLOCKIN 1 0 +prdeepak prdeepak p pr prd prde k ak pak epak LINESTART LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 1 BLOCKIN 2 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 1 BLOCKIN 2 0 +babu babu b ba bab babu u bu abu babu LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 1 BLOCKIN 2 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 8 1 BLOCKIN 2 0 +https https h ht htt http s ps tps ttps LINESTART LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 1 NOPUNCT 1 3 BLOCKIN 8 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 1 3 BLOCKIN 8 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 1 3 BLOCKIN 8 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 2 3 BLOCKIN 8 0 +medium medium m me med medi m um ium dium LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 3 BLOCKIN 8 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 4 3 BLOCKIN 8 0 +com com c co com com m om com com LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 3 BLOCKIN 8 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 5 3 BLOCKIN 8 0 +@ @ @ @ @ @ @ @ @ @ LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 5 3 BLOCKIN 8 0 +prdeepak prdeepak p pr prd prde k ak pak epak LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 3 BLOCKIN 8 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 8 3 BLOCKIN 8 0 +babu babu b ba bab babu u bu abu babu LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 3 BLOCKIN 8 0 +/ / / / / / / / / / LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 9 3 BLOCKIN 8 0 +audio audio a au aud audi o io dio udio LINESTART ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 0 6 BLOCKIN 7 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 0 6 BLOCKIN 7 0 +language language l la lan lang e ge age uage LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 6 BLOCKIN 7 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 2 6 BLOCKIN 7 0 +models models m mo mod mode s ls els dels LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 6 BLOCKIN 7 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 3 6 BLOCKIN 7 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 6 BLOCKIN 7 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 4 6 BLOCKIN 7 0 +multimodal multimodal m mu mul mult l al dal odal LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 6 BLOCKIN 7 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 5 6 BLOCKIN 7 0 +architecture architecture a ar arc arch e re ure ture LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 6 BLOCKIN 7 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 7 6 BLOCKIN 7 0 +1cdd90f46fac 1cdd90f46fac 1 1c 1cd 1cdd c ac fac 6fac LINEIN ALIGNEDLEFT NOCAPS CONTAINSDIGITS 0 0 0 0 0 0 0 0 NOPUNCT 9 6 BLOCKIN 7 0 +. . . . . . . . . . LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 6 BLOCKIN 7 0 +[ [ [ [ [ [ [ [ [ [ LINESTART LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 1 BLOCKIN 3 0 +Accessed accessed A Ac Acc Acce d ed sed ssed LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 1 BLOCKIN 3 0 +19 19 1 19 19 19 9 19 19 19 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 1 BLOCKIN 3 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 8 1 BLOCKIN 3 0 +07 07 0 07 07 07 7 07 07 07 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 1 BLOCKIN 3 0 +- - - - - - - - - - LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 10 1 BLOCKIN 3 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINESTART ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 5 0 BLOCKIN 2 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 6 0 BLOCKIN 2 0 +. . . . . . . . . . LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 0 BLOCKEND 2 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 8 0 +102 102 1 10 102 102 2 02 102 102 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 8 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 8 0 +Paul paul P Pa Pau Paul l ul aul Paul LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +K k K K K K K K K K LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 1 9 BLOCKIN 8 0 +Rubenstein rubenstein R Ru Rub Rube n in ein tein LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 8 0 +Chulayuth chulayuth C Ch Chu Chul h th uth yuth LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 8 0 +Asawaroengchai asawaroengchai A As Asa Asaw i ai hai chai LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 8 0 +Duc duc D Du Duc Duc c uc Duc Duc LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 8 0 +Dung dung D Du Dun Dung g ng ung Dung LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 8 0 +Nguyen nguyen N Ng Ngu Nguy n en yen uyen LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 9 BLOCKIN 8 0 +Ankur ankur A An Ank Anku r ur kur nkur LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +Bapna bapna B Ba Bap Bapn a na pna apna LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 9 BLOCKIN 8 0 +Zalán zalán Z Za Zal Zalá n án lán alán LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +Borsos borsos B Bo Bor Bors s os sos rsos LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +, , , , , , , , , , LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 8 0 +Félix félix F Fé Fél Féli x ix lix élix LINESTART LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 5 0 +de de d de de de e de de de LINEIN LINEINDENT NOCAPS NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 5 0 +Chaumont chaumont C Ch Cha Chau t nt ont mont LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 5 0 +Quitry quitry Q Qu Qui Quit y ry try itry LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 8 BLOCKIN 5 0 +Peter peter P Pe Pet Pete r er ter eter LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 5 0 +Chen chen C Ch Che Chen n en hen Chen LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 8 BLOCKIN 5 0 +Dalia dalia D Da Dal Dali a ia lia alia LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 5 0 +El el E El El El l El El El LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 5 0 +Badawy badawy B Ba Bad Bada y wy awy dawy LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 8 BLOCKIN 5 0 +Wei wei W We Wei Wei i ei Wei Wei LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 5 0 +Han han H Ha Han Han n an Han Han LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 8 BLOCKIN 5 0 +Eugene eugene E Eu Eug Euge e ne ene gene LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 5 0 +Kharitonov kharitonov K Kh Kha Khar v ov nov onov LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 8 BLOCKIN 5 0 +Hannah hannah H Ha Han Hann h ah nah nnah LINEEND LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 5 0 +Muckenhirn muckenhirn M Mu Muc Muck n rn irn hirn LINESTART LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 8 BLOCKIN 6 0 +Dirk dirk D Di Dir Dirk k rk irk Dirk LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 6 0 +Padfield padfield P Pa Pad Padf d ld eld ield LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 8 BLOCKIN 6 0 +James james J Ja Jam Jame s es mes ames LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 6 0 +Qin qin Q Qi Qin Qin n in Qin Qin LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 8 BLOCKIN 6 0 +Danny danny D Da Dan Dann y ny nny anny LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 6 0 +Rozenberg rozenberg R Ro Roz Roze g rg erg berg LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 8 BLOCKIN 6 0 +Tara tara T Ta Tar Tara a ra ara Tara LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 6 0 +Sainath sainath S Sa Sai Sain h th ath nath LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 8 BLOCKIN 6 0 +Johan johan J Jo Joh Joha n an han ohan LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 6 0 +Schalkwyk schalkwyk S Sc Sch Scha k yk wyk kwyk LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 8 BLOCKIN 6 0 +Matt matt M Ma Mat Matt t tt att Matt LINEEND LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 6 0 +Sharifi sharifi S Sh Sha Shar i fi ifi rifi LINESTART LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 0 9 BLOCKIN 5 0 +Michelle michelle M Mi Mic Mich e le lle elle LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 5 0 +Tadmor tadmor T Ta Tad Tadm r or mor dmor LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 5 0 +Ramanovich ramanovich R Ra Ram Rama h ch ich vich LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 9 BLOCKIN 5 0 +Marco marco M Ma Mar Marc o co rco arco LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 5 0 +Tagliasacchi tagliasacchi T Ta Tag Tagl i hi chi cchi LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 5 0 +Alexandru alexandru A Al Ale Alex u ru dru ndru LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 5 0 +Tudor tudor T Tu Tud Tudo r or dor udor LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 9 BLOCKIN 5 0 +Mihajlo mihajlo M Mi Mih Miha o lo jlo ajlo LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 5 0 +Velimirović velimirović V Ve Vel Veli ć ić vić ović LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 5 0 +, , , , , , , , , , LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 5 0 +Damien damien D Da Dam Dami n en ien mien LINESTART LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 6 0 +Vincent vincent V Vi Vin Vinc t nt ent cent LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 8 BLOCKIN 6 0 +Jiahui jiahui J Ji Jia Jiah i ui hui ahui LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 6 0 +Yu yu Y Yu Yu Yu u Yu Yu Yu LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 8 BLOCKIN 6 0 +Yongqiang yongqiang Y Yo Yon Yong g ng ang iang LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 6 0 +Wang wang W Wa Wan Wang g ng ang Wang LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 8 BLOCKIN 6 0 +Vicky vicky V Vi Vic Vick y ky cky icky LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 6 0 +Zayats zayats Z Za Zay Zaya s ts ats yats LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 8 BLOCKIN 6 0 +Neil neil N Ne Nei Neil l il eil Neil LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 6 0 +Zeghidour zeghidour Z Ze Zeg Zegh r ur our dour LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 8 BLOCKIN 6 0 +Yu yu Y Yu Yu Yu u Yu Yu Yu LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 6 0 +Zhang zhang Z Zh Zha Zhan g ng ang hang LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 8 BLOCKIN 6 0 +Zhishuai zhishuai Z Zh Zhi Zhis i ai uai huai LINEEND LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 6 0 +Zhang zhang Z Zh Zha Zhan g ng ang hang LINESTART LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 4 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 0 9 BLOCKIN 4 0 +Lukas lukas L Lu Luk Luka s as kas ukas LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 4 0 +Zilka zilka Z Zi Zil Zilk a ka lka ilka LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 4 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 4 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 4 0 +Christian christian C Ch Chr Chri n an ian tian LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 4 0 +Frank frank F Fr Fra Fran k nk ank rank LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 4 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 4 9 BLOCKIN 4 0 +Audiopalm audiopalm A Au Aud Audi m lm alm palm LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 4 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 6 9 BLOCKIN 4 0 +A a A A A A A A A A LINEIN LINEINDENT ALLCAP NODIGIT 1 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 4 0 +large large l la lar larg e ge rge arge LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 4 0 +language language l la lan lang e ge age uage LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 4 0 +model model m mo mod mode l el del odel LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 4 0 +that that t th tha that t at hat that LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 4 0 +can can c ca can can n an can can LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 4 0 +speak speak s sp spe spea k ak eak peak LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 4 0 +and and a an and and d nd and and LINEEND LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 4 0 +listen listen l li lis list n en ten sten LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 1 BLOCKIN 2 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 1 BLOCKIN 2 0 +2023 2023 2 20 202 2023 3 23 023 2023 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 8 1 BLOCKIN 2 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 1 BLOCKEND 2 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 7 0 +103 103 1 10 103 103 3 03 103 103 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 7 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 7 0 +Zalán zalán Z Za Zal Zalá n án lán alán LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 7 0 +Borsos borsos B Bo Bor Bors s os sos rsos LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 7 0 +Raphaël raphaël R Ra Rap Raph l ël aël haël LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 7 0 +Marinier marinier M Ma Mar Mari r er ier nier LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 9 BLOCKIN 7 0 +Damien damien D Da Dam Dami n en ien mien LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 7 0 +Vincent vincent V Vi Vin Vinc t nt ent cent LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 7 0 +Eugene eugene E Eu Eug Euge e ne ene gene LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 7 0 +Kharitonov kharitonov K Kh Kha Khar v ov nov onov LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 9 BLOCKIN 7 0 +Olivier olivier O Ol Oli Oliv r er ier vier LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 7 0 +Pietquin pietquin P Pi Pie Piet n in uin quin LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 7 0 +Matt matt M Ma Mat Matt t tt att Matt LINEEND ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 7 0 +Sharifi sharifi S Sh Sha Shar i fi ifi rifi LINESTART LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 0 9 BLOCKIN 6 0 +Dominik dominik D Do Dom Domi k ik nik inik LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 6 0 +Roblek roblek R Ro Rob Robl k ek lek blek LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 6 0 +Olivier olivier O Ol Oli Oliv r er ier vier LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 6 0 +Teboul teboul T Te Teb Tebo l ul oul boul LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 9 BLOCKIN 6 0 +David david D Da Dav Davi d id vid avid LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 6 0 +Grangier grangier G Gr Gra Gran r er ier gier LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 6 0 +Marco marco M Ma Mar Marc o co rco arco LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 6 0 +Tagliasacchi tagliasacchi T Ta Tag Tagl i hi chi cchi LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 6 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 9 BLOCKIN 6 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 6 0 +Neil neil N Ne Nei Neil l il eil Neil LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 6 0 +Zeghidour zeghidour Z Ze Zeg Zegh r ur our dour LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 6 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 6 0 +Audiolm audiolm A Au Aud Audi m lm olm iolm LINESTART LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 6 BLOCKIN 3 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 1 6 BLOCKIN 3 0 +a a a a a a a a a a LINEIN LINEINDENT NOCAPS NODIGIT 1 0 1 0 0 0 0 0 NOPUNCT 1 6 BLOCKIN 3 0 +language language l la lan lang e ge age uage LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 6 BLOCKIN 3 0 +modeling modeling m mo mod mode g ng ing ling LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 6 BLOCKIN 3 0 +approach approach a ap app appr h ch ach oach LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 6 BLOCKIN 3 0 +to to t to to to o to to to LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 6 BLOCKIN 3 0 +audio audio a au aud audi o io dio udio LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 6 BLOCKIN 3 0 +generation generation g ge gen gene n on ion tion LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 6 BLOCKIN 3 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 6 BLOCKIN 3 0 +2023 2023 2 20 202 2023 3 23 023 2023 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 6 BLOCKIN 3 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 6 BLOCKEND 3 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 8 BLOCKSTART 8 0 +104 104 1 10 104 104 4 04 104 104 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 8 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 8 BLOCKIN 8 0 +Humza humza H Hu Hum Humz a za mza umza LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 8 0 +Naveed naveed N Na Nav Nave d ed eed veed LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 8 BLOCKIN 8 0 +Asad asad A As Asa Asad d ad sad Asad LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 8 0 +Ullah ullah U Ul Ull Ulla h ah lah llah LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 8 0 +Khan khan K Kh Kha Khan n an han Khan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 8 BLOCKIN 8 0 +Shi shi S Sh Shi Shi i hi Shi Shi LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 8 0 +Qiu qiu Q Qi Qiu Qiu u iu Qiu Qiu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 8 BLOCKIN 8 0 +Muhammad muhammad M Mu Muh Muha d ad mad mmad LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 8 0 +Saqib saqib S Sa Saq Saqi b ib qib aqib LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 8 BLOCKIN 8 0 +Saeed saeed S Sa Sae Saee d ed eed aeed LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 8 0 +Anwar anwar A An Anw Anwa r ar war nwar LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 8 BLOCKIN 8 0 +Muhammad muhammad M Mu Muh Muha d ad mad mmad LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 8 0 +Usman usman U Us Usm Usma n an man sman LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 8 0 +, , , , , , , , , , LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 8 BLOCKIN 8 0 +Naveed naveed N Na Nav Nave d ed eed veed LINESTART LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 4 0 +Akhtar akhtar A Ak Akh Akht r ar tar htar LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 4 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 4 0 +Nick nick N Ni Nic Nick k ck ick Nick LINEIN LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 4 0 +Barnes barnes B Ba Bar Barn s es nes rnes LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 4 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 4 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 4 0 +Ajmal ajmal A Aj Ajm Ajma l al mal jmal LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 4 0 +Mian mian M Mi Mia Mian n an ian Mian LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 4 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 9 BLOCKIN 4 0 +A a A A A A A A A A LINEIN LINEINDENT ALLCAP NODIGIT 1 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 4 0 +comprehensive comprehensive c co com comp e ve ive sive LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 4 0 +overview overview o ov ove over w ew iew view LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 4 0 +of of o of of of f of of of LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 4 0 +large large l la lar larg e ge rge arge LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 4 0 +language language l la lan lang e ge age uage LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 4 0 +models models m mo mod mode s ls els dels LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 4 0 +, , , , , , , , , , LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 4 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINESTART LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 5 0 BLOCKIN 1 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 0 BLOCKEND 1 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 6 0 +105 105 1 10 105 105 5 05 105 105 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 6 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 6 0 +Fine fine F Fi Fin Fine e ne ine Fine LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 6 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 1 9 BLOCKIN 6 0 +tune tune t tu tun tune e ne une tune LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 6 0 +llama llama l ll lla llam a ma ama lama LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 6 0 +2 2 2 2 2 2 2 2 2 2 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 1 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 6 0 +with with w wi wit with h th ith with LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 6 0 +lora lora l lo lor lora a ra ora lora LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 6 0 +: : : : : : : : : : LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 4 9 BLOCKIN 6 0 +Customizing customizing C Cu Cus Cust g ng ing zing LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 6 0 +a a a a a a a a a a LINEIN ALIGNEDLEFT NOCAPS NODIGIT 1 0 1 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 6 0 +large large l la lar larg e ge rge arge LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 6 0 +language language l la lan lang e ge age uage LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 6 0 +model model m mo mod mode l el del odel LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 6 0 +for for f fo for for r or for for LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 6 0 +question question q qu que ques n on ion tion LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 6 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 10 9 BLOCKIN 6 0 +answering answering a an ans answ g ng ing ring LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 6 0 +- - - - - - - - - - LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 10 9 BLOCKIN 6 0 +rocm rocm r ro roc rocm m cm ocm rocm LINESTART LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 0 8 BLOCKIN 10 0 +blogs blogs b bl blo blog s gs ogs logs LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 1 8 BLOCKIN 10 0 +amd amd a am amd amd d md amd amd LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 1 8 BLOCKIN 10 0 +com com c co com com m om com com LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 2 8 BLOCKIN 10 0 +https https h ht htt http s ps tps ttps LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 1 NOPUNCT 3 8 BLOCKIN 10 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 3 8 BLOCKIN 10 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 10 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 10 0 +rocm rocm r ro roc rocm m cm ocm rocm LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 4 8 BLOCKIN 10 0 +blogs blogs b bl blo blog s gs ogs logs LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 4 8 BLOCKIN 10 0 +amd amd a am amd amd d md amd amd LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 10 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 8 BLOCKIN 10 0 +com com c co com com m om com com LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 10 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 10 0 +artificial artificial a ar art arti l al ial cial LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 6 8 BLOCKIN 10 0 +intelligence intelligence i in int inte e ce nce ence LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 10 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 10 0 +llama2 llama2 l ll lla llam 2 a2 ma2 ama2 LINEIN LINEINDENT NOCAPS CONTAINSDIGITS 0 0 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 9 8 BLOCKIN 10 0 +lora lora l lo lor lora a ra ora lora LINEIN LINEINDENT NOCAPS NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 10 0 +/ / / / / / / / / / LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 10 0 +README readme R RE REA READ E ME DME ADME LINESTART LINEINDENT ALLCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 3 BLOCKIN 7 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 1 3 BLOCKIN 7 0 +html html h ht htm html l ml tml html LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 3 BLOCKIN 7 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 3 3 BLOCKIN 7 0 +[ [ [ [ [ [ [ [ [ [ LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 4 3 BLOCKIN 7 0 +Accessed accessed A Ac Acc Acce d ed sed ssed LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 3 BLOCKIN 7 0 +15 15 1 15 15 15 5 15 15 15 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 3 BLOCKIN 7 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 7 3 BLOCKIN 7 0 +07 07 0 07 07 07 7 07 07 07 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 3 BLOCKIN 7 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 8 3 BLOCKIN 7 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 9 3 BLOCKIN 7 0 +] ] ] ] ] ] ] ] ] ] LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 9 3 BLOCKIN 7 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 3 BLOCKEND 7 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 2 BLOCKSTART 3 0 +106 106 1 10 106 106 6 06 106 106 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 2 BLOCKIN 3 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 2 2 BLOCKIN 3 0 +Aayush aayush A Aa Aay Aayu h sh ush yush LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 2 BLOCKIN 3 0 +Mittal mittal M Mi Mit Mitt l al tal ttal LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 2 BLOCKIN 3 0 +. . . . . . . . . . LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 2 BLOCKIN 3 0 +Understanding understanding U Un Und Unde g ng ing ding LINESTART LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 3 BLOCKIN 2 0 +llm llm l ll llm llm m lm llm llm LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 3 BLOCKIN 2 0 +fine fine f fi fin fine e ne ine fine LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 7 3 BLOCKIN 2 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 7 3 BLOCKIN 2 0 +tuning tuning t tu tun tuni g ng ing ning LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 3 BLOCKIN 2 0 +: : : : : : : : : : LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 10 3 BLOCKIN 2 0 +Tailoring tailoring T Ta Tai Tail g ng ing ring LINESTART LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 2 BLOCKIN 1 0 +large large l la lar larg e ge rge arge LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 5 2 BLOCKIN 1 0 +language language l la lan lang e ge age uage LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 2 BLOCKIN 1 0 +mod mod m mo mod mod d od mod mod LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 2 BLOCKIN 1 0 +- - - - - - - - - - LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 10 2 BLOCKIN 1 0 +els els e el els els s ls els els LINESTART ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 4 BLOCKIN 3 0 +to to t to to to o to to to LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 4 BLOCKIN 3 0 +your your y yo you your r ur our your LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 4 BLOCKIN 3 0 +unique unique u un uni uniq e ue que ique LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 4 BLOCKIN 3 0 +requirements requirements r re req requ s ts nts ents LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 4 BLOCKIN 3 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 7 4 BLOCKIN 3 0 +linkedin linkedin l li lin link n in din edin LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 4 BLOCKIN 3 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 4 BLOCKIN 3 0 +com com c co com com m om com com LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 4 BLOCKIN 3 0 +. . . . . . . . . . LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 4 BLOCKIN 3 0 +https https h ht htt http s ps tps ttps LINESTART LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 1 NOPUNCT 2 2 BLOCKIN 6 0 +: : : : : : : : : : LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 PUNCT 2 2 BLOCKIN 6 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 3 2 BLOCKIN 6 0 +/ / / / / / / / / / LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 3 2 BLOCKIN 6 0 +www www w ww www www w ww www www LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 2 BLOCKIN 6 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 5 2 BLOCKIN 6 0 +unite unite u un uni unit e te ite nite LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 2 BLOCKIN 6 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 2 BLOCKIN 6 0 +ai ai a ai ai ai i ai ai ai LINEIN LINEINDENT NOCAPS NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 2 BLOCKIN 6 0 +/ / / / / / / / / / LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 9 2 BLOCKIN 6 0 +understanding understanding u un und unde g ng ing ding LINESTART ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 1 8 BLOCKIN 10 0 +llm llm l ll llm llm m lm llm llm LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 1 8 BLOCKIN 10 0 +fine fine f fi fin fine e ne ine fine LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 2 8 BLOCKIN 10 0 +tuning tuning t tu tun tuni g ng ing ning LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 3 8 BLOCKIN 10 0 +tailoring tailoring t ta tai tail g ng ing ring LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 4 8 BLOCKIN 10 0 +large large l la lar larg e ge rge arge LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 5 8 BLOCKIN 10 0 +language language l la lan lang e ge age uage LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 5 8 BLOCKIN 10 0 +models models m mo mod mode s ls els dels LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 6 8 BLOCKIN 10 0 +to to t to to to o to to to LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 7 8 BLOCKIN 10 0 +your your y yo you your r ur our your LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 7 8 BLOCKIN 10 0 +unique unique u un uni uniq e ue que ique LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 8 8 BLOCKIN 10 0 +requirements requirements r re req requ s ts nts ents LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 10 0 +. . . . . . . . . . LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 8 BLOCKIN 10 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 2 BLOCKIN 5 0 +Accessed accessed A Ac Acc Acce d ed sed ssed LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 2 BLOCKIN 5 0 +11 11 1 11 11 11 1 11 11 11 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 2 BLOCKIN 5 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 5 2 BLOCKIN 5 0 +07 07 0 07 07 07 7 07 07 07 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 2 BLOCKIN 5 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 7 2 BLOCKIN 5 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 8 2 BLOCKIN 5 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 9 2 BLOCKIN 5 0 +. . . . . . . . . . LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 2 BLOCKEND 5 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 8 0 +107 107 1 10 107 107 7 07 107 107 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 8 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 8 0 +Alan alan A Al Ala Alan n an lan Alan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +Ansell ansell A An Ans Anse l ll ell sell LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 8 0 +Ivan ivan I Iv Iva Ivan n an van Ivan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 8 0 +Vulić vulić V Vu Vul Vuli ć ić lić ulić LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 8 0 +Hannah hannah H Ha Han Hann h ah nah nnah LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 8 0 +Sterz sterz S St Ste Ster z rz erz terz LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 8 0 +Anna anna A An Ann Anna a na nna Anna LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 8 0 +Korhonen korhonen K Ko Kor Korh n en nen onen LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 8 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 8 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 8 0 +Edoardo edoardo E Ed Edo Edoa o do rdo ardo LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +M m M M M M M M M M LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 8 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 8 9 BLOCKIN 8 0 +Ponti ponti P Po Pon Pont i ti nti onti LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 8 0 +. . . . . . . . . . LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 9 9 BLOCKIN 8 0 +Scaling scaling S Sc Sca Scal g ng ing ling LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +sparse sparse s sp spa spar e se rse arse LINEEND ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 8 0 +fine fine f fi fin fine e ne ine fine LINESTART LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 0 4 BLOCKIN 3 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 1 4 BLOCKIN 3 0 +tuning tuning t tu tun tuni g ng ing ning LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 4 BLOCKIN 3 0 +to to t to to to o to to to LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 4 BLOCKIN 3 0 +large large l la lar larg e ge rge arge LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 4 4 BLOCKIN 3 0 +language language l la lan lang e ge age uage LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 4 BLOCKIN 3 0 +models models m mo mod mode s ls els dels LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 4 BLOCKIN 3 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 4 BLOCKIN 3 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 4 BLOCKIN 3 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 4 BLOCKEND 3 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 10 0 +108 108 1 10 108 108 8 08 108 108 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 10 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 10 0 +Xinyu xinyu X Xi Xin Xiny u yu nyu inyu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 10 0 +Lin lin L Li Lin Lin n in Lin Lin LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 10 0 +Wenjie wenjie W We Wen Wenj e ie jie njie LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 10 0 +Wang wang W Wa Wan Wang g ng ang Wang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 10 0 +Yongqi yongqi Y Yo Yon Yong i qi gqi ngqi LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 10 0 +Li li L Li Li Li i Li Li Li LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 9 BLOCKIN 10 0 +Shuo shuo S Sh Shu Shuo o uo huo Shuo LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 10 0 +Yang yang Y Ya Yan Yang g ng ang Yang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 10 0 +Fuli fuli F Fu Ful Fuli i li uli Fuli LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 10 0 +Feng feng F Fe Fen Feng g ng eng Feng LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 9 BLOCKIN 10 0 +Yinwei yinwei Y Yi Yin Yinw i ei wei nwei LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 10 0 +Wei wei W We Wei Wei i ei Wei Wei LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 10 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 9 BLOCKIN 10 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 10 0 +Tat tat T Ta Tat Tat t at Tat Tat LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 10 0 +- - - - - - - - - - LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 10 9 BLOCKIN 10 0 +Seng seng S Se Sen Seng g ng eng Seng LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 10 0 +Chua chua C Ch Chu Chua a ua hua Chua LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 10 0 +. . . . . . . . . . LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 10 0 +Data data D Da Dat Data a ta ata Data LINESTART LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 0 6 BLOCKIN 5 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 0 6 BLOCKIN 5 0 +efficient efficient e ef eff effi t nt ent ient LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 2 6 BLOCKIN 5 0 +fine fine f fi fin fine e ne ine fine LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 3 6 BLOCKIN 5 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 3 6 BLOCKIN 5 0 +tuning tuning t tu tun tuni g ng ing ning LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 6 BLOCKIN 5 0 +for for f fo for for r or for for LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 6 BLOCKIN 5 0 +llm llm l ll llm llm m lm llm llm LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 6 BLOCKIN 5 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 5 6 BLOCKIN 5 0 +based based b ba bas base d ed sed ased LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 6 BLOCKIN 5 0 +recommendation recommendation r re rec reco n on ion tion LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 6 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 6 BLOCKIN 5 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 6 BLOCKIN 5 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 6 BLOCKEND 5 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 9 0 +109 109 1 10 109 109 9 09 109 109 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 9 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 9 0 +Yue yue Y Yu Yue Yue e ue Yue Yue LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 9 0 +Liu liu L Li Liu Liu u iu Liu Liu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 9 0 +Shihao shihao S Sh Shi Shih o ao hao ihao LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 9 0 +Zhu zhu Z Zh Zhu Zhu u hu Zhu Zhu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 9 0 +Jun jun J Ju Jun Jun n un Jun Jun LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 1 0 NOPUNCT 3 9 BLOCKIN 9 0 +Xia xia X Xi Xia Xia a ia Xia Xia LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 9 BLOCKIN 9 0 +Yingwei yingwei Y Yi Yin Ying i ei wei gwei LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 9 0 +Ma ma M Ma Ma Ma a Ma Ma Ma LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 5 9 BLOCKIN 9 0 +Jian jian J Ji Jia Jian n an ian Jian LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 9 0 +Ma ma M Ma Ma Ma a Ma Ma Ma LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 9 0 +Wenliang wenliang W We Wen Wenl g ng ang iang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 9 0 +Zhong zhong Z Zh Zho Zhon g ng ong hong LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 9 BLOCKIN 9 0 +Xinwang xinwang X Xi Xin Xinw g ng ang wang LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 9 0 +Liu liu L Li Liu Liu u iu Liu Liu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 9 0 +Guannan guannan G Gu Gua Guan n an nan nnan LINEEND ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +Zhang zhang Z Zh Zha Zhan g ng ang hang LINESTART LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 5 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 0 9 BLOCKIN 5 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 5 0 +Kejun kejun K Ke Kej Keju n un jun ejun LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 5 0 +Zhang zhang Z Zh Zha Zhan g ng ang hang LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 5 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 2 9 BLOCKIN 5 0 +End end E En End End d nd End End LINEIN LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 5 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 3 9 BLOCKIN 5 0 +to to t to to to o to to to LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 5 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 3 9 BLOCKIN 5 0 +end end e en end end d nd end end LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 5 0 +learnable learnable l le lea lear e le ble able LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 5 0 +clustering clustering c cl clu clus g ng ing ring LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 5 0 +for for f fo for for r or for for LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 5 0 +intent intent i in int inte t nt ent tent LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 5 0 +learning learning l le lea lear g ng ing ning LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 5 0 +in in i in in in n in in in LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 5 0 +recommendation recommendation r re rec reco n on ion tion LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 5 0 +, , , , , , , , , , LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 5 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINESTART LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 5 0 BLOCKIN 1 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 7 0 BLOCKEND 1 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 9 0 +110 110 1 11 110 110 0 10 110 110 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 9 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 9 0 +Haoran haoran H Ha Hao Haor n an ran oran LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 9 0 +Li li L Li Li Li i Li Li Li LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 1 9 BLOCKIN 9 0 +Xinyuan xinyuan X Xi Xin Xiny n an uan yuan LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 9 0 +Zhao zhao Z Zh Zha Zhao o ao hao Zhao LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 3 9 BLOCKIN 9 0 +Dadi dadi D Da Dad Dadi i di adi Dadi LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 9 0 +Guo guo G Gu Guo Guo o uo Guo Guo LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 9 BLOCKIN 9 0 +Hanlin hanlin H Ha Han Hanl n in lin nlin LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 9 0 +Gu gu G Gu Gu Gu u Gu Gu Gu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 9 0 +Ziqian ziqian Z Zi Ziq Ziqi n an ian qian LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 9 0 +Zeng zeng Z Ze Zen Zeng g ng eng Zeng LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 9 BLOCKIN 9 0 +Yuxing yuxing Y Yu Yux Yuxi g ng ing xing LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 9 0 +Han han H Ha Han Han n an Han Han LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 9 BLOCKIN 9 0 +Yangqiu yangqiu Y Ya Yan Yang u iu qiu gqiu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 9 0 +Song song S So Son Song g ng ong Song LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 10 9 BLOCKIN 9 0 +Lixin lixin L Li Lix Lixi n in xin ixin LINEEND ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 9 0 +Fan fan F Fa Fan Fan n an Fan Fan LINESTART LINEINDENT INITCAP NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 3 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 0 8 BLOCKIN 3 0 +and and a an and and d nd and and LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 0 8 BLOCKIN 3 0 +Qiang qiang Q Qi Qia Qian g ng ang iang LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 8 BLOCKIN 3 0 +Yang yang Y Ya Yan Yang g ng ang Yang LINEIN LINEINDENT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 2 8 BLOCKIN 3 0 +. . . . . . . . . . LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 2 8 BLOCKIN 3 0 +Federated federated F Fe Fed Fede d ed ted ated LINEIN LINEINDENT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 8 BLOCKIN 3 0 +domain domain d do dom doma n in ain main LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 8 BLOCKIN 3 0 +- - - - - - - - - - LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 HYPHEN 4 8 BLOCKIN 3 0 +specific specific s sp spe spec c ic fic ific LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 8 BLOCKIN 3 0 +knowledge knowledge k kn kno know e ge dge edge LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 8 BLOCKIN 3 0 +transfer transfer t tr tra tran r er fer sfer LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 8 BLOCKIN 3 0 +on on o on on on n on on on LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 8 8 BLOCKIN 3 0 +large large l la lar larg e ge rge arge LINEIN LINEINDENT NOCAPS NODIGIT 0 1 1 0 0 0 0 0 NOPUNCT 9 8 BLOCKIN 3 0 +language language l la lan lang e ge age uage LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 3 0 +models models m mo mod mode s ls els dels LINEEND LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 10 8 BLOCKIN 3 0 +using using u us usi usin g ng ing sing LINESTART LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 2 BLOCKIN 2 0 +synthetic synthetic s sy syn synt c ic tic etic LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 5 2 BLOCKIN 2 0 +data data d da dat data a ta ata data LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 7 2 BLOCKIN 2 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 7 2 BLOCKIN 2 0 +2024 2024 2 20 202 2024 4 24 024 2024 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 2 BLOCKIN 2 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 2 BLOCKEND 2 0 +[ [ [ [ [ [ [ [ [ [ LINESTART ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 OPENBRACKET 0 9 BLOCKSTART 7 0 +111 111 1 11 111 111 1 11 111 111 LINEIN ALIGNEDLEFT NOCAPS ALLDIGIT 0 0 0 0 0 0 0 0 NOPUNCT 0 9 BLOCKIN 7 0 +] ] ] ] ] ] ] ] ] ] LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 ENDBRACKET 0 9 BLOCKIN 7 0 +Aleksander aleksander A Al Ale Alek r er der nder LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 1 9 BLOCKIN 7 0 +Madry madry M Ma Mad Madr y ry dry adry LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 2 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 2 9 BLOCKIN 7 0 +Aleksandar aleksandar A Al Ale Alek r ar dar ndar LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 3 9 BLOCKIN 7 0 +Makelov makelov M Ma Mak Make v ov lov elov LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 4 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 4 9 BLOCKIN 7 0 +Ludwig ludwig L Lu Lud Ludw g ig wig dwig LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 5 9 BLOCKIN 7 0 +Schmidt schmidt S Sc Sch Schm t dt idt midt LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 6 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 6 9 BLOCKIN 7 0 +Dimitris dimitris D Di Dim Dimi s is ris tris LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 7 9 BLOCKIN 7 0 +Tsipras tsipras T Ts Tsi Tsip s as ras pras LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 9 BLOCKIN 7 0 +, , , , , , , , , , LINEIN ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 8 9 BLOCKIN 7 0 +and and a an and and d nd and and LINEIN ALIGNEDLEFT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 9 BLOCKIN 7 0 +Adrian adrian A Ad Adr Adri n an ian rian LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 1 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 7 0 +Vladu vladu V Vl Vla Vlad u du adu ladu LINEIN ALIGNEDLEFT INITCAP NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 10 9 BLOCKIN 7 0 +. . . . . . . . . . LINEEND ALIGNEDLEFT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 9 BLOCKIN 7 0 +Towards towards T To Tow Towa s ds rds ards LINESTART LINEINDENT INITCAP NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 6 BLOCKIN 2 0 +deep deep d de dee deep p ep eep deep LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 1 6 BLOCKIN 2 0 +learning learning l le lea lear g ng ing ning LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 3 6 BLOCKIN 2 0 +models models m mo mod mode s ls els dels LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 4 6 BLOCKIN 2 0 +resistant resistant r re res resi t nt ant tant LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 6 BLOCKIN 2 0 +to to t to to to o to to to LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 6 6 BLOCKIN 2 0 +adversarial adversarial a ad adv adve l al ial rial LINEIN LINEINDENT NOCAPS NODIGIT 0 0 0 0 0 0 0 0 NOPUNCT 8 6 BLOCKIN 2 0 +attacks attacks a at att atta s ks cks acks LINEIN LINEINDENT NOCAPS NODIGIT 0 0 1 0 0 0 0 0 NOPUNCT 9 6 BLOCKIN 2 0 +, , , , , , , , , , LINEIN LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 COMMA 9 6 BLOCKIN 2 0 +2019 2019 2 20 201 2019 9 19 019 2019 LINEIN LINEINDENT NOCAPS ALLDIGIT 0 0 0 0 0 1 0 0 NOPUNCT 10 6 BLOCKIN 2 0 +. . . . . . . . . . LINEEND LINEINDENT ALLCAP NODIGIT 1 0 0 0 0 0 0 0 DOT 10 6 BLOCKEND 2 0 + diff --git a/grobid-core/src/test/java/org/grobid/core/GrobidModelsTest.java b/grobid-core/src/test/java/org/grobid/core/GrobidModelsTest.java index e28b62ca56..79c9198d03 100644 --- a/grobid-core/src/test/java/org/grobid/core/GrobidModelsTest.java +++ b/grobid-core/src/test/java/org/grobid/core/GrobidModelsTest.java @@ -1,5 +1,6 @@ package org.grobid.core; +import org.grobid.core.utilities.TestEngineUtils; import org.grobid.core.utilities.GrobidConfig; import org.grobid.core.utilities.GrobidProperties; import org.junit.BeforeClass; @@ -12,6 +13,7 @@ public class GrobidModelsTest { @BeforeClass public static void setInitialContext() throws Exception { + TestEngineUtils.initGrobidForceWapiti(); GrobidProperties.getInstance(); } @@ -24,7 +26,7 @@ public void testGrobidModelsEnum_StandardModel_affiliation() throws Exception { assertThat(model.getModelName(), is("affiliation-address")); assertThat(model.getTemplateName(), is("affiliation-address.template")); String[] splittedPath = model.getModelPath().split("[/\\\\]"); - //assertThat(splittedPath[splittedPath.length - 1], is("model.wapiti")); + assertThat(splittedPath[splittedPath.length - 1], is("model.wapiti")); assertThat(splittedPath[splittedPath.length - 2], is("affiliation-address")); assertThat(splittedPath[splittedPath.length - 3], is("models")); } @@ -37,7 +39,7 @@ public void testGrobidModelsEnum_StandardModel_name() throws Exception { assertThat(model.getModelName(), is("header")); assertThat(model.getTemplateName(), is("header.template")); String[] splittedPath = model.getModelPath().split("[/\\\\]"); - //assertThat(splittedPath[splittedPath.length - 1], is("model.wapiti")); + assertThat(splittedPath[splittedPath.length - 1], is("model.wapiti")); assertThat(splittedPath[splittedPath.length - 2], is("header")); assertThat(splittedPath[splittedPath.length - 4], is("grobid-home")); } @@ -56,7 +58,7 @@ public void testGrobidModelsEnum_CustomModel_shouldBeConfiguredBeforeHand() thro assertThat(model.getTemplateName(), is("myDreamModel.template")); String[] tokenizePath = model.getModelPath().split("[/\\\\]"); - //assertThat(tokenizePath[tokenizePath.length - 1], is("model.wapiti")); + assertThat(tokenizePath[tokenizePath.length - 1], is("model.wapiti")); assertThat(tokenizePath[tokenizePath.length - 2], is("myDreamModel")); assertThat(tokenizePath[tokenizePath.length - 3], is("models")); assertThat(tokenizePath[tokenizePath.length - 4], is("grobid-home")); diff --git a/grobid-core/src/test/java/org/grobid/core/data/AffiliationTest.java b/grobid-core/src/test/java/org/grobid/core/data/AffiliationTest.java new file mode 100644 index 0000000000..e69de29bb2 diff --git a/grobid-core/src/test/java/org/grobid/core/data/FigureTest.java b/grobid-core/src/test/java/org/grobid/core/data/FigureTest.java new file mode 100644 index 0000000000..e69de29bb2 diff --git a/grobid-core/src/test/java/org/grobid/core/data/FunderTest.java b/grobid-core/src/test/java/org/grobid/core/data/FunderTest.java new file mode 100644 index 0000000000..e69de29bb2 diff --git a/grobid-core/src/test/java/org/grobid/core/data/FundingTest.java b/grobid-core/src/test/java/org/grobid/core/data/FundingTest.java new file mode 100644 index 0000000000..e69de29bb2 diff --git a/grobid-core/src/test/java/org/grobid/core/data/PersonTest.java b/grobid-core/src/test/java/org/grobid/core/data/PersonTest.java new file mode 100644 index 0000000000..9a432c0aee --- /dev/null +++ b/grobid-core/src/test/java/org/grobid/core/data/PersonTest.java @@ -0,0 +1,234 @@ +package org.grobid.core.data; + +import org.junit.Before; +import org.junit.Test; + +import java.util.Arrays; +import java.util.List; + +import static org.hamcrest.CoreMatchers.*; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.hasSize; + +public class PersonTest { + + private Person person; + + @Before + public void setUp() { + person = new Person(); + } + + @Test + public void testSetFirstName() { + person.setFirstName("John"); + assertThat(person.getFirstName(), is("John")); + } + + @Test + public void testSetLastName() { + person.setLastName("Doe"); + assertThat(person.getLastName(), is("Doe")); + } + + @Test + public void testSetMiddleName() { + person.setMiddleName("Michael"); + assertThat(person.getMiddleName(), is("Michael")); + } + + @Test + public void testSetRawName() { + person.setRawName("John M. Doe"); + assertThat(person.getRawName(), is("John M. Doe")); + } + + @Test + public void testSetTitle_withParentheses_shouldRemoveThem() { + person.setTitle("(Prof.)"); + assertThat(person.getTitle(), is("Prof.")); + } + + @Test + public void testSetTitle_withMultipleParentheses_shouldRemoveAll() { + person.setTitle("((Dr.))"); + assertThat(person.getTitle(), is("Dr.")); + } + + @Test + public void testSetTitle_withoutParentheses() { + person.setTitle("Dr."); + assertThat(person.getTitle(), is("Dr.")); + } + + @Test + public void testSetSuffix() { + person.setSuffix("Jr."); + assertThat(person.getSuffix(), is("Jr.")); + } + + @Test + public void testSetCorresp() { + person.setCorresp(true); + assertThat(person.getCorresp(), is(true)); + } + + @Test + public void testSetORCID_withHttpPrefix_shouldRemovePrefix() { + person.setORCID("http://orcid.org/0000-0001-2345-6789"); + assertThat(person.getORCID(), is("0000-0001-2345-6789")); + } + + @Test + public void testSetORCID_withHttpsPrefix_shouldRemovePrefix() { + person.setORCID("https://orcid.org/0000-0001-2345-6789"); + assertThat(person.getORCID(), is("0000-0001-2345-6789")); + } + + @Test + public void testSetORCID_withoutPrefix() { + person.setORCID("0000-0001-2345-6789"); + assertThat(person.getORCID(), is("0000-0001-2345-6789")); + } + + @Test + public void testSetORCID_null_shouldNotThrow() { + person.setORCID(null); + assertThat(person.getORCID(), is(nullValue())); + } + + @Test + public void testSetEmail() { + person.setEmail("john.doe@example.com"); + assertThat(person.getEmail(), is("john.doe@example.com")); + } + + @Test + public void testNotNull_allFieldsNull_shouldReturnFalse() { + assertThat(person.notNull(), is(false)); + } + + @Test + public void testNotNull_withFirstName_shouldReturnTrue() { + person.setFirstName("John"); + assertThat(person.notNull(), is(true)); + } + + @Test + public void testNotNull_withLastName_shouldReturnTrue() { + person.setLastName("Doe"); + assertThat(person.notNull(), is(true)); + } + + @Test + public void testNotNull_withMiddleName_shouldReturnTrue() { + person.setMiddleName("Michael"); + assertThat(person.notNull(), is(true)); + } + + @Test + public void testNotNull_withTitle_shouldReturnTrue() { + person.setTitle("Dr."); + assertThat(person.notNull(), is(true)); + } + + @Test + public void testAddAffiliationMarker() { + person.addAffiliationMarker("1"); + person.addAffiliationMarker("2"); + + List markers = person.getAffiliationMarkers(); + assertThat(markers, hasSize(2)); + assertThat(markers.get(0), is("1")); + assertThat(markers.get(1), is("2")); + } + + @Test + public void testAddMarker_shouldRemoveSpaces() { + person.addMarker("1 2"); + + List markers = person.getMarkers(); + assertThat(markers, hasSize(1)); + assertThat(markers.get(0), is("12")); + } + + @Test + public void testAddAffiliation() { + Affiliation affiliation = new Affiliation(); + affiliation.setName("MIT"); + + person.addAffiliation(affiliation); + + List affiliations = person.getAffiliations(); + assertThat(affiliations, hasSize(1)); + assertThat(affiliations.get(0).getName(), is("MIT")); + } + + @Test + public void testClonePerson() { + person.setFirstName("John"); + person.setLastName("Doe"); + person.setMiddleName("Michael"); + person.setEmail("john@example.com"); + person.setORCID("0000-0001-2345-6789"); + person.setCorresp(true); + + Person cloned = person.clonePerson(); + + assertThat(cloned.getFirstName(), is("John")); + assertThat(cloned.getLastName(), is("Doe")); + assertThat(cloned.getMiddleName(), is("Michael")); + assertThat(cloned.getEmail(), is("john@example.com")); + assertThat(cloned.getORCID(), is("0000-0001-2345-6789")); + assertThat(cloned.getCorresp(), is(true)); + } + + @Test + public void testSetAffiliationBlocks() { + List blocks = Arrays.asList("block1", "block2"); + person.setAffiliationBlocks(blocks); + + assertThat(person.getAffiliationBlocks(), is(blocks)); + } + + @Test + public void testAddAffiliationBlocks() { + person.addAffiliationBlocks("block1"); + person.addAffiliationBlocks("block2"); + + List blocks = person.getAffiliationBlocks(); + assertThat(blocks, hasSize(2)); + } + + @Test + public void testSetMarkers() { + List markers = Arrays.asList("1", "2", "3"); + person.setMarkers(markers); + + assertThat(person.getMarkers(), is(markers)); + } + + @Test + public void testSetAffiliationMarkers() { + List markers = Arrays.asList("a", "b"); + person.setAffiliationMarkers(markers); + + assertThat(person.getAffiliationMarkers(), is(markers)); + } + + @Test + public void testSetAffiliations() { + Affiliation aff1 = new Affiliation(); + aff1.setName("MIT"); + Affiliation aff2 = new Affiliation(); + aff2.setName("Stanford"); + + List affiliations = Arrays.asList(aff1, aff2); + person.setAffiliations(affiliations); + + assertThat(person.getAffiliations(), hasSize(2)); + assertThat(person.getAffiliations().get(0).getName(), is("MIT")); + assertThat(person.getAffiliations().get(1).getName(), is("Stanford")); + } +} + diff --git a/grobid-core/src/test/java/org/grobid/core/data/TableTest.java b/grobid-core/src/test/java/org/grobid/core/data/TableTest.java new file mode 100644 index 0000000000..e69de29bb2 diff --git a/grobid-core/src/test/java/org/grobid/core/document/TEIFormatterIntegrationTest.java b/grobid-core/src/test/java/org/grobid/core/document/TEIFormatterIntegrationTest.java index 159a254e18..2fa793b584 100644 --- a/grobid-core/src/test/java/org/grobid/core/document/TEIFormatterIntegrationTest.java +++ b/grobid-core/src/test/java/org/grobid/core/document/TEIFormatterIntegrationTest.java @@ -3,8 +3,7 @@ import org.grobid.core.data.Note; import org.grobid.core.engines.EngineParsers; import org.grobid.core.engines.config.GrobidAnalysisConfig; -import org.grobid.core.main.LibraryLoader; -import org.grobid.core.utilities.GrobidProperties; +import org.grobid.core.utilities.TestEngineUtils; import org.junit.BeforeClass; import org.junit.Test; @@ -12,15 +11,14 @@ import java.util.List; import static org.hamcrest.CoreMatchers.is; +import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.hasSize; -import static org.junit.Assert.assertThat; public class TEIFormatterIntegrationTest { @BeforeClass public static void setInitialContext() throws Exception { - GrobidProperties.getInstance(); - LibraryLoader.load(); + TestEngineUtils.initGrobidForceWapiti(); } @Test @@ -31,10 +29,10 @@ public void testGetTeiNotes() throws Exception { List teiNotes = new TEIFormatter(null, null).getTeiNotes(doc); - /*assertThat(teiNotes, hasSize(1)); - assertThat(teiNotes.get(0).getText(), is(" http://wikipedia.org ")); + assertThat(teiNotes, hasSize(1)); + assertThat(teiNotes.get(0).getText(), is("http://wikipedia.org ")); assertThat(teiNotes.get(0).getLabel(), is("1")); - assertThat(teiNotes.get(0).getPageNumber(), is(1));*/ + assertThat(teiNotes.get(0).getPageNumber(), is(1)); } } \ No newline at end of file diff --git a/grobid-core/src/test/java/org/grobid/core/engines/DateParserIntegrationTest.java b/grobid-core/src/test/java/org/grobid/core/engines/DateParserIntegrationTest.java index faf07269ec..b74b436400 100644 --- a/grobid-core/src/test/java/org/grobid/core/engines/DateParserIntegrationTest.java +++ b/grobid-core/src/test/java/org/grobid/core/engines/DateParserIntegrationTest.java @@ -1,7 +1,7 @@ package org.grobid.core.engines; import org.grobid.core.data.Date; -import org.grobid.core.factory.AbstractEngineFactory; +import org.grobid.core.utilities.TestEngineUtils; import org.junit.*; import java.util.Arrays; @@ -21,13 +21,7 @@ public class DateParserIntegrationTest { @BeforeClass public static void setInitialContext() throws Exception { -// MockContext.setInitialContext(); - AbstractEngineFactory.init(); - } - - @AfterClass - public static void destroyInitialContext() throws Exception { -// MockContext.destroyInitialContext(); + TestEngineUtils.initGrobidForceWapiti(); } @Before diff --git a/grobid-core/src/test/java/org/grobid/core/engines/SegmentationTest.java b/grobid-core/src/test/java/org/grobid/core/engines/SegmentationTest.java index a09ac407d9..fccc5ce675 100644 --- a/grobid-core/src/test/java/org/grobid/core/engines/SegmentationTest.java +++ b/grobid-core/src/test/java/org/grobid/core/engines/SegmentationTest.java @@ -3,7 +3,7 @@ import org.grobid.core.document.Document; import org.grobid.core.document.DocumentSource; import org.grobid.core.engines.config.GrobidAnalysisConfig; -import org.grobid.core.factory.AbstractEngineFactory; +import org.grobid.core.utilities.TestEngineUtils; import org.junit.AfterClass; import org.junit.Before; import org.junit.BeforeClass; @@ -21,7 +21,7 @@ public class SegmentationTest { @BeforeClass public static void setInitialContext() throws Exception { // MockContext.setInitialContext(); - AbstractEngineFactory.init(); + TestEngineUtils.initGrobidForceWapiti(); } @AfterClass diff --git a/grobid-core/src/test/java/org/grobid/core/engines/patent/ReferenceExtractorTest.java b/grobid-core/src/test/java/org/grobid/core/engines/patent/ReferenceExtractorTest.java index fc64da641d..abb010a382 100644 --- a/grobid-core/src/test/java/org/grobid/core/engines/patent/ReferenceExtractorTest.java +++ b/grobid-core/src/test/java/org/grobid/core/engines/patent/ReferenceExtractorTest.java @@ -10,8 +10,7 @@ import org.grobid.core.data.BibDataSet; import org.grobid.core.data.PatentItem; -import org.grobid.core.factory.AbstractEngineFactory; -import org.grobid.core.utilities.counters.GrobidTimer; +import org.grobid.core.utilities.TestEngineUtils; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Ignore; @@ -26,7 +25,7 @@ public class ReferenceExtractorTest { @BeforeClass public static void setInitialContext() throws Exception { - AbstractEngineFactory.init(); + TestEngineUtils.initGrobidForceWapiti(); } @AfterClass diff --git a/grobid-core/src/test/java/org/grobid/core/engines/tagging/delft/CRFDecoderTest.java b/grobid-core/src/test/java/org/grobid/core/engines/tagging/delft/CRFDecoderTest.java new file mode 100644 index 0000000000..dacaac654d --- /dev/null +++ b/grobid-core/src/test/java/org/grobid/core/engines/tagging/delft/CRFDecoderTest.java @@ -0,0 +1,220 @@ +package org.grobid.core.engines.tagging.delft; + +import org.junit.Before; +import org.junit.Test; + +import static org.junit.Assert.*; + +/** + * Unit tests for CRFDecoder Viterbi decoding. + * + * Tests verify: + * - Basic Viterbi decoding finds optimal path + * - Mask handling for variable length sequences + * - Transition matrix usage + * - Start/end transitions + * - Batch decoding + */ +public class CRFDecoderTest { + + private CRFDecoder decoder; + + @Before + public void setUp() { + // Create a simple 3-tag CRF decoder (O, B-TITLE, I-TITLE) + // Transition matrix [from_tag][to_tag] + float[][] transitions = { + // to: O, B-TITLE, I-TITLE + { 0.5f, 0.3f, -1.0f }, // from O: prefer O or B-TITLE, penalize I-TITLE + { 0.2f, 0.1f, 0.6f }, // from B-TITLE: prefer I-TITLE + { 0.3f, 0.1f, 0.5f } // from I-TITLE: prefer continuing I-TITLE + }; + + // Start transitions: prefer starting with O or B-TITLE + float[] startTransitions = { 0.5f, 0.4f, -1.0f }; + + // End transitions: all tags can end + float[] endTransitions = { 0.0f, 0.0f, 0.0f }; + + decoder = new CRFDecoder(transitions, startTransitions, endTransitions); + } + + @Test + public void testGetNumTags() { + assertEquals(3, decoder.getNumTags()); + } + + /** + * Test basic decoding with strong emissions for each tag. + */ + @Test + public void testDecode_followsStrongEmissions() { + // Emissions strongly favor: [O, B-TITLE, I-TITLE, O] + float[][] emissions = { + { 2.0f, -1.0f, -1.0f }, // Position 0: strongly O + { -1.0f, 2.0f, -1.0f }, // Position 1: strongly B-TITLE + { -1.0f, -1.0f, 2.0f }, // Position 2: strongly I-TITLE + { 2.0f, -1.0f, -1.0f }, // Position 3: strongly O + }; + + int[] result = decoder.decode(emissions, null); + + assertEquals(4, result.length); + assertEquals(0, result[0]); // O + assertEquals(1, result[1]); // B-TITLE + assertEquals(2, result[2]); // I-TITLE + assertEquals(0, result[3]); // O + } + + /** + * Test that transitions influence decoding when emissions are ambiguous. + */ + @Test + public void testDecode_transitionsInfluenceDecoding() { + // Emissions are all equal - transitions should decide + float[][] emissions = { + { 0.0f, 0.0f, 0.0f }, // Position 0: ambiguous + { 0.0f, 0.0f, 0.0f }, // Position 1: ambiguous + }; + + int[] result = decoder.decode(emissions, null); + + assertEquals(2, result.length); + // With our transition matrix, starting with O is preferred + // (startTransitions[0]=0.5) + // and O->O has good transition (0.5) + assertEquals(0, result[0]); // Should start with O + } + + /** + * Test that I-TITLE cannot start a sequence (penalized by startTransitions). + */ + @Test + public void testDecode_cannotStartWithContinuation() { + // Position 0 emissions favor I-TITLE, but start transitions penalize it + float[][] emissions = { + { 0.0f, 0.0f, 0.5f }, // Slightly favor I-TITLE + }; + + int[] result = decoder.decode(emissions, null); + + // Should NOT be I-TITLE (index 2) because start transitions penalize it + assertNotEquals(2, result[0]); + } + + /** + * Test decoding with mask - only valid positions are decoded. + */ + @Test + public void testDecode_respectsMask() { + float[][] emissions = { + { 2.0f, -1.0f, -1.0f }, // Position 0: O + { -1.0f, 2.0f, -1.0f }, // Position 1: B-TITLE + { -1.0f, -1.0f, 2.0f }, // Position 2: I-TITLE (masked out) + { 2.0f, -1.0f, -1.0f }, // Position 3: O (masked out) + }; + + boolean[] mask = { true, true, false, false }; + + int[] result = decoder.decode(emissions, mask); + + // Only first 2 positions should be decoded + assertEquals(2, result.length); + assertEquals(0, result[0]); // O + assertEquals(1, result[1]); // B-TITLE + } + + /** + * Test decoding empty sequence (all masked out). + */ + @Test + public void testDecode_emptySequence() { + float[][] emissions = { + { 2.0f, -1.0f, -1.0f }, + { -1.0f, 2.0f, -1.0f }, + }; + + boolean[] mask = { false, false }; + + int[] result = decoder.decode(emissions, mask); + + assertEquals(0, result.length); + } + + /** + * Test batch decoding. + */ + @Test + public void testDecodeBatch() { + float[][][] emissions = { + // Sequence 0: [O, B-TITLE] + { + { 2.0f, -1.0f, -1.0f }, + { -1.0f, 2.0f, -1.0f }, + }, + // Sequence 1: [B-TITLE, I-TITLE] + { + { -1.0f, 2.0f, -1.0f }, + { -1.0f, -1.0f, 2.0f }, + } + }; + + int[][] results = decoder.decodeBatch(emissions, null); + + assertEquals(2, results.length); + + assertEquals(2, results[0].length); + assertEquals(0, results[0][0]); // O + assertEquals(1, results[0][1]); // B-TITLE + + assertEquals(2, results[1].length); + assertEquals(1, results[1][0]); // B-TITLE + assertEquals(2, results[1][1]); // I-TITLE + } + + /** + * Test batch decoding with masks. + */ + @Test + public void testDecodeBatch_withMasks() { + float[][][] emissions = { + // Sequence 0: 3 positions, but only 2 valid + { + { 2.0f, -1.0f, -1.0f }, + { -1.0f, 2.0f, -1.0f }, + { -1.0f, -1.0f, 2.0f }, + }, + // Sequence 1: 3 positions, only 1 valid + { + { 2.0f, -1.0f, -1.0f }, + { -1.0f, 2.0f, -1.0f }, + { -1.0f, -1.0f, 2.0f }, + } + }; + + boolean[][] masks = { + { true, true, false }, + { true, false, false } + }; + + int[][] results = decoder.decodeBatch(emissions, masks); + + assertEquals(2, results[0].length); // 2 valid positions + assertEquals(1, results[1].length); // 1 valid position + } + + /** + * Test single position decoding. + */ + @Test + public void testDecode_singlePosition() { + float[][] emissions = { + { -1.0f, 2.0f, -1.0f }, // Only B-TITLE + }; + + int[] result = decoder.decode(emissions, null); + + assertEquals(1, result.length); + assertEquals(1, result[0]); // B-TITLE + } +} diff --git a/grobid-core/src/test/java/org/grobid/core/engines/tagging/delft/HeaderOnnxIntegrationTest.java b/grobid-core/src/test/java/org/grobid/core/engines/tagging/delft/HeaderOnnxIntegrationTest.java new file mode 100644 index 0000000000..e500c532ce --- /dev/null +++ b/grobid-core/src/test/java/org/grobid/core/engines/tagging/delft/HeaderOnnxIntegrationTest.java @@ -0,0 +1,212 @@ +package org.grobid.core.engines.tagging.delft; + +import ai.onnxruntime.OrtException; +import org.apache.commons.lang3.StringUtils; +import org.grobid.core.GrobidModels; +import org.grobid.core.analyzers.GrobidAnalyzer; +import org.grobid.core.features.FeaturesVectorHeader; +import org.grobid.core.layout.LayoutToken; +import org.grobid.core.utilities.GrobidProperties; +import org.junit.*; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + +import static org.hamcrest.CoreMatchers.is; +import static org.hamcrest.CoreMatchers.notNullValue; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.lessThan; +import static org.junit.Assume.assumeTrue; + +/** + * Integration test for Header ONNX model. + * + * This test verifies that the ONNX-based header model can be loaded and + * run inference correctly. It helps diagnose issues with: + * - ONNX Runtime native library loading + * - LMDB embeddings compatibility + * - CRF decoder functionality + * - End-to-end model inference + * + * Prerequisites: + * - ONNX header model at grobid-home/models/header-BidLSTM_CRF_FEATURES.onnx/ + * - Embeddings preloaded using: python3 + * grobid-home/scripts/preload_embeddings.py --embedding glove-840B + */ +public class HeaderOnnxIntegrationTest { + + private static final String ARCHITECTURE = "BidLSTM_CRF_FEATURES"; + + private static Path modelPath; + private static Path embeddingsPath; + private OnnxSequenceLabellingModel model; + + @BeforeClass + public static void setUpClass() { + // Initialize GROBID properties + GrobidProperties.getInstance(); + + // Get model path + String modelName = GrobidModels.HEADER.getModelName(); + String grobidHome = GrobidProperties.getGrobidHome().getAbsolutePath(); + modelPath = Path.of(grobidHome, "models", modelName + "-" + ARCHITECTURE + ".onnx"); + + // Get embeddings path + String delftPath = GrobidProperties.getDeLFTFilePath(); + embeddingsPath = Path.of(delftPath, "data", "db", "glove-840B"); + } + + @Before + public void setUp() throws IOException, OrtException { + // Skip test if model is not available + assumeTrue("ONNX model not found at " + modelPath + + ". Please ensure the ONNX header model is installed.", + Files.exists(modelPath) && Files.isDirectory(modelPath)); + + // Skip test if embeddings are not available + assumeTrue("Embeddings not found at " + embeddingsPath + + ". Please run: python3 grobid-home/scripts/preload_embeddings.py --embedding glove-840B", + Files.exists(embeddingsPath) && Files.isDirectory(embeddingsPath)); + + // Load model + model = new OnnxSequenceLabellingModel(modelPath); + } + + @After + public void tearDown() { + if (model != null) { + model.close(); + } + } + + @Test + public void testModelCanBeLoaded() { + assertThat(model, is(notNullValue())); + assertThat("Model should have features", model.hasFeatures(), is(true)); + assertThat("Model should have > 0 features", model.getNumFeatures(), greaterThan(0)); + } + + @Test + public void testMaxSequenceLength() { + int maxSeqLength = model.getMaxSeqLength(); + assertThat("Max sequence length should be positive", maxSeqLength, greaterThan(0)); + } + + @Test + public void testAnnotateSimpleHeader() throws OrtException { + String input = "Deep Learning for Natural Language Processing John Smith MIT"; + + List allTokens = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(input); + List filtered = allTokens.stream() + .filter(token -> StringUtils.isNotBlank(token.getText())) + .collect(Collectors.toList()); + + String[] words = new String[filtered.size()]; + String[][] features = new String[filtered.size()][model.getNumFeatures()]; + for (int i = 0; i < filtered.size(); i++) { + words[i] = filtered.get(i).getText(); + FeaturesVectorHeader featuresVectorHeader = FeaturesVectorHeader.fromLayoutToken(filtered.get(i)); + features[i] = featuresVectorHeader.printVector().split("\n"); + } + + OnnxSequenceLabellingModel.AnnotationResult result = model.annotateTokens(words, features); + + assertThat(result, is(notNullValue())); + assertThat(result.getTokens(), is(notNullValue())); + assertThat(result.getLabels(), is(notNullValue())); + assertThat("Tokens and labels should have same length", + result.getTokens().length, is(result.getLabels().length)); + assertThat(result.getLabels().length, greaterThan(0)); + + long otherLabel = Arrays.stream(result.getLabels()).filter(v -> v.equalsIgnoreCase("")).count(); + + assertThat(otherLabel, lessThan((long) result.getLabels().length)); + } + + @Test + public void testLabelGrobidInput() { + // Create a simple GROBID-formatted input with features + // Format: token\tfeature1\tfeature2\t... + StringBuilder input = new StringBuilder(); + + // Simulate header features (token + 22 features based on config.json) + // Features indices 9-30 = 22 features + String[] tokens = { "Deep", "Learning", "for", "NLP" }; + for (String token : tokens) { + input.append(token); + // Add dummy features (GROBID uses various binary and categorical features) + for (int i = 0; i < 22; i++) { + input.append("\t").append("NOFEAT"); + } + input.append("\n"); + } + + String result = model.labelGrobidInput(input.toString()); + + assertThat(result, is(notNullValue())); + assertThat("Result should not be empty", result.length(), greaterThan(0)); + + // Verify that each line has the original content plus a label + String[] resultLines = result.trim().split("\n"); + assertThat("Result should have same number of lines as input", + resultLines.length, is(tokens.length)); + + for (String line : resultLines) { + String[] parts = line.split("\t"); + // Should have: token + 22 features + 1 label = 24 parts + assertThat("Each line should have at least 24 parts (token + features + label)", + parts.length, greaterThan(23)); + } + } + + @Test + public void testAnnotateTokensWithFeatures() throws OrtException { + // Test token annotation with explicit features + String[] tokens = { "Analysis", "of", "Cancer", "Genomes" }; + // 22 features per token + String[][] features = new String[tokens.length][22]; + for (int i = 0; i < tokens.length; i++) { + for (int j = 0; j < 22; j++) { + features[i][j] = "NOFEAT"; + } + } + + OnnxSequenceLabellingModel.AnnotationResult result = model.annotateTokens(tokens, features); + + assertThat(result, is(notNullValue())); + assertThat(result.getLabels().length, is(tokens.length)); + + assertThat(result.getLabels().length, greaterThan(0)); + + long otherLabel = Arrays.stream(result.getLabels()).filter(v -> v.equalsIgnoreCase("")).count(); + + assertThat(otherLabel, lessThan((long) result.getLabels().length)); + } + + @Test + public void testLabelMultipleSequences() { + // Test with multiple sequences separated by empty lines + StringBuilder input = new StringBuilder(); + + // First sequence + input.append(FeaturesVectorHeader.fromLayoutToken(new LayoutToken("Title")).printVector()); + input.append(FeaturesVectorHeader.fromLayoutToken(new LayoutToken("Text")).printVector()); + input.append("\n"); // Empty line = sequence separator + + // Second sequence + input.append(FeaturesVectorHeader.fromLayoutToken(new LayoutToken("Author")).printVector()); + input.append(FeaturesVectorHeader.fromLayoutToken(new LayoutToken("Name")).printVector()); + + String result = model.labelGrobidInput(input.toString()); + + assertThat(result, is(notNullValue())); + // Result should contain labeled tokens + assertThat("Result should contain labeled output", result.length(), greaterThan(0)); + } +} diff --git a/grobid-core/src/test/java/org/grobid/core/engines/tagging/delft/OnnxSequenceLabellingModelTest.java b/grobid-core/src/test/java/org/grobid/core/engines/tagging/delft/OnnxSequenceLabellingModelTest.java new file mode 100644 index 0000000000..155769d88e --- /dev/null +++ b/grobid-core/src/test/java/org/grobid/core/engines/tagging/delft/OnnxSequenceLabellingModelTest.java @@ -0,0 +1,287 @@ +package org.grobid.core.engines.tagging.delft; + +import org.junit.Test; + +import static org.junit.Assert.*; + +/** + * Unit tests for OnnxSequenceLabellingModel sequence chunking functionality. + */ +public class OnnxSequenceLabellingModelTest { + + /** + * Test that input is correctly split into sequences at empty lines. + */ + @Test + public void testSequenceSplitting() { + String input = "token1\tfeature1\ntoken2\tfeature2\n\ntoken3\tfeature3\ntoken4\tfeature4\n"; + + int sequenceCount = countSequences(input); + + assertEquals(2, sequenceCount); + } + + /** + * Test chunking calculation for large sequences. + */ + @Test + public void testChunkingCalculation() { + int totalTokens = 6748; // Size of large_sequence.txt + int maxSeqLength = 512; + + int expectedChunks = (int) Math.ceil((double) totalTokens / maxSeqLength); + + assertEquals(14, expectedChunks); + } + + /** + * Test that features are correctly parsed from tab-separated lines. + */ + @Test + public void testFeatureParsing() { + String line = "token\tf1\tf2\tf3\tf4"; + String[] parts = line.split("[\\t\\s]+"); + + assertEquals(5, parts.length); + } + + // Helper to count sequences in input + private int countSequences(String input) { + String[] lines = input.split("\n", -1); + int count = 0; + boolean inSequence = false; + + for (String line : lines) { + if (line.trim().isEmpty()) { + if (inSequence) { + count++; + inSequence = false; + } + } else { + inSequence = true; + } + } + if (inSequence) + count++; + + return count; + } + + /** + * Test that feature extraction uses the correct column offset. + * + * Python stores features as: features = pieces[1:] + * So features[9] = pieces[10] + * + * Java must use parts[featureIndex + 1] to get the same value. + * This test verifies the +1 offset logic is correct. + */ + @Test + public void testFeatureExtractionOffset() { + // Simulate Grobid input line with 31 columns (token + 30 features) + // Column 10 = "BLOCKSTART" (0-indexed, so it's pieces[10] in Python) + String line = "token col1 col2 col3 col4 col5 col6 col7 col8 col9 BLOCKSTART LINESTART ALIGNEDLEFT " + + "col13 col14 col15 col16 col17 col18 col19 col20 col21 col22 col23 col24 col25 col26 col27 col28 col29 col30"; + String[] parts = line.split("[\\t\\s]+"); + + // featuresIndices = [9, 10, 11, ...] (Python's 1-based feature indices) + // When we access featureIndex=9, we should get "BLOCKSTART" which is at + // parts[10] + int featureIndex = 9; + int adjustedIndex = featureIndex + 1; // This is the fix! + + assertEquals("BLOCKSTART", parts[adjustedIndex]); + assertEquals("LINESTART", parts[10 + 1]); + assertEquals("ALIGNEDLEFT", parts[11 + 1]); + } + + /** + * Test the compact feature extraction - verifying we extract only + * the columns specified in featuresIndices. + */ + @Test + public void testCompactFeatureExtraction() { + String line = "token a b c d e f g h i BLOCKSTART LINESTART ALIGNEDLEFT m n o p q r s t u v w x y z"; + String[] parts = line.split("[\\t\\s]+"); + + // featuresIndices = [9, 10, 11] (select columns 9, 10, 11 from Python's + // features array) + java.util.List featuresIndices = java.util.Arrays.asList(9, 10, 11); + + // Extract features using the +1 offset + String[] extractedFeatures = new String[featuresIndices.size()]; + for (int k = 0; k < featuresIndices.size(); k++) { + int featureIndex = featuresIndices.get(k); + int adjustedIndex = featureIndex + 1; + if (adjustedIndex < parts.length) { + extractedFeatures[k] = parts[adjustedIndex]; + } else { + extractedFeatures[k] = "0"; + } + } + + // Verify we got the correct values + assertEquals("BLOCKSTART", extractedFeatures[0]); + assertEquals("LINESTART", extractedFeatures[1]); + assertEquals("ALIGNEDLEFT", extractedFeatures[2]); + } + + /** + * Test that missing features (when adjustedIndex >= parts.length) default to + * "0". + */ + @Test + public void testMissingFeatures() { + String line = "token col1 col2"; // Only 3 columns + String[] parts = line.split("[\\t\\s]+"); + + java.util.List featuresIndices = java.util.Arrays.asList(9, 10, 11); + + String[] extractedFeatures = new String[featuresIndices.size()]; + for (int k = 0; k < featuresIndices.size(); k++) { + int featureIndex = featuresIndices.get(k); + int adjustedIndex = featureIndex + 1; + if (adjustedIndex < parts.length) { + extractedFeatures[k] = parts[adjustedIndex]; + } else { + extractedFeatures[k] = "0"; // Default for missing + } + } + + // All should be "0" since parts only has 3 elements + assertEquals("0", extractedFeatures[0]); + assertEquals("0", extractedFeatures[1]); + assertEquals("0", extractedFeatures[2]); + } + + // ========================================================================= + // Label Conversion Tests (delft2grobidLabel logic) + // ========================================================================= + + /** + * Test IOB "O" label converts to GROBID "O" (OTHER_LABEL). + */ + @Test + public void testLabelConversion_OLabel() { + String result = convertLabel("O"); + assertEquals("O", result); + } + + /** + * Test IOB "B-" prefix converts to GROBID "I-" prefix. + */ + @Test + public void testLabelConversion_BPrefix() { + String result = convertLabel("B-title"); + assertEquals("I-", result); + } + + /** + * Test IOB "I-" prefix converts to GROBID "<" ... ">" format. + */ + @Test + public void testLabelConversion_IPrefix() { + String result = convertLabel("I-title"); + assertEquals("<title>", result); + } + + /** + * Test <PAD> label converts to "O". + */ + @Test + public void testLabelConversion_PADLabel() { + String result = convertLabel("<PAD>"); + assertEquals("O", result); + } + + // Helper to simulate delft2grobidLabel + private String convertLabel(String label) { + if (label.equals("O") || label.trim().equals("<PAD>")) { + return "O"; + } else if (label.startsWith("B-")) { + return label.replace("B-", "I-<") + ">"; + } else if (label.startsWith("I-")) { + return "<" + label.substring(2) + ">"; + } + return label; + } + + // ========================================================================= + // Sequence Chunking Tests + // ========================================================================= + + /** + * Test chunking calculation for sequences that fit in one chunk. + */ + @Test + public void testChunking_fitsInOneChunk() { + int totalTokens = 400; + int maxSeqLength = 512; + + int numChunks = (int) Math.ceil((double) totalTokens / maxSeqLength); + + assertEquals(1, numChunks); + } + + /** + * Test chunk boundaries are calculated correctly. + */ + @Test + public void testChunkBoundaries() { + int totalTokens = 1000; + int maxSeqLength = 512; + + java.util.List<int[]> chunks = new java.util.ArrayList<>(); + int offset = 0; + while (offset < totalTokens) { + int chunkEnd = Math.min(offset + maxSeqLength, totalTokens); + chunks.add(new int[] { offset, chunkEnd }); + offset = chunkEnd; + } + + assertEquals(2, chunks.size()); + + // First chunk: [0, 512) + assertEquals(0, chunks.get(0)[0]); + assertEquals(512, chunks.get(0)[1]); + + // Second chunk: [512, 1000) + assertEquals(512, chunks.get(1)[0]); + assertEquals(1000, chunks.get(1)[1]); + } + + /** + * Test parsing input into sequences separated by empty lines. + */ + @Test + public void testParseSequences() { + String input = "token1\tfeature1\ntoken2\tfeature2\n\ntoken3\tfeature3\n"; + + java.util.List<java.util.List<String>> sequences = parseSequences(input); + + assertEquals(2, sequences.size()); + assertEquals(2, sequences.get(0).size()); // First sequence: 2 tokens + assertEquals(1, sequences.get(1).size()); // Second sequence: 1 token + } + + // Helper to parse sequences + private java.util.List<java.util.List<String>> parseSequences(String input) { + java.util.List<java.util.List<String>> sequences = new java.util.ArrayList<>(); + java.util.List<String> current = new java.util.ArrayList<>(); + + for (String line : input.split("\n", -1)) { + if (line.trim().isEmpty()) { + if (!current.isEmpty()) { + sequences.add(current); + current = new java.util.ArrayList<>(); + } + } else { + current.add(line); + } + } + if (!current.isEmpty()) { + sequences.add(current); + } + return sequences; + } +} diff --git a/grobid-core/src/test/java/org/grobid/core/engines/tagging/delft/PreprocessingComparisonTest.java b/grobid-core/src/test/java/org/grobid/core/engines/tagging/delft/PreprocessingComparisonTest.java new file mode 100644 index 0000000000..e76a4d0279 --- /dev/null +++ b/grobid-core/src/test/java/org/grobid/core/engines/tagging/delft/PreprocessingComparisonTest.java @@ -0,0 +1,159 @@ +package org.grobid.core.engines.tagging.delft; + +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.io.*; +import java.nio.file.*; +import java.util.*; + +import static org.junit.Assert.*; + +/** + * Test to compare Java preprocessing with Python preprocessing. + * + * This test loads the same input file (example.txt from delft) and + * produces preprocessing output in JSON format that can be compared + * with the Python output. + */ +public class PreprocessingComparisonTest { + + private static final String DELFT_PATH = System.getProperty("user.home") + "/development/github/delft"; + private static final String MODEL_DIR = DELFT_PATH + "/exported_models/header-BidLSTM_CRF_FEATURES.onnx"; + private static final String INPUT_FILE = DELFT_PATH + "/example.txt"; + private static final String OUTPUT_FILE = DELFT_PATH + "/java_preprocessing.json"; + + private static Preprocessor preprocessor; + + @BeforeClass + public static void setup() throws IOException { + Path vocabPath = Paths.get(MODEL_DIR, "vocab.json"); + if (!Files.exists(vocabPath)) { + System.out.println("SKIP: vocab.json not found at " + vocabPath); + return; + } + preprocessor = Preprocessor.fromJson(vocabPath); + } + + /** + * Test that generates preprocessing output for comparison with Python. + * Run with: mvn test -Dtest=PreprocessingComparisonTest#testExportPreprocessing + */ + @Test + public void testExportPreprocessing() throws IOException { + if (preprocessor == null) { + System.out.println("SKIP: Preprocessor not loaded"); + return; + } + + // Read input file + String content = Files.readString(Paths.get(INPUT_FILE)); + String[] lines = content.split("\n", -1); + + // Parse tokens and features + List<String> tokens = new ArrayList<>(); + List<String[]> tokenFeatures = new ArrayList<>(); + + for (String line : lines) { + if (line.trim().isEmpty()) + continue; + String[] parts = line.split("[\\t\\s]+"); + if (parts.length > 0) { + tokens.add(parts[0]); + tokenFeatures.add(parts); + } + } + + int limit = Math.min(50, tokens.size()); + + // Build output + Map<String, Object> output = new LinkedHashMap<>(); + + // Config + Map<String, Object> config = new LinkedHashMap<>(); + config.put("max_char", preprocessor.getMaxCharLength()); + config.put("num_features", preprocessor.getNumFeatures()); + config.put("features_indices", preprocessor.getFeaturesIndices()); + output.put("config", config); + + // Tokens + output.put("tokens", tokens.subList(0, limit)); + + // Char indices + List<long[]> charIndices = new ArrayList<>(); + for (int i = 0; i < limit; i++) { + // Create LayoutToken for this token + org.grobid.core.layout.LayoutToken lt = new org.grobid.core.layout.LayoutToken(); + lt.setText(tokens.get(i)); + List<org.grobid.core.layout.LayoutToken> tokenList = Collections.singletonList(lt); + + // Get char indices (returned as [seq][char], we only have 1 token so take [0]) + long[][] indices = preprocessor.tokensToCharIndices(tokenList, 1); + charIndices.add(indices[0]); + } + output.put("char_indices", charIndices); + + // Feature indices + if (preprocessor.hasFeatures() && preprocessor.getFeaturesIndices() != null) { + List<Integer> featuresIndices = preprocessor.getFeaturesIndices(); + List<List<Map<String, Object>>> featureOutput = new ArrayList<>(); + + for (int i = 0; i < limit; i++) { + String[] parts = tokenFeatures.get(i); + List<Map<String, Object>> tokenFeatList = new ArrayList<>(); + + // Build feature array for tokensToFeatureIndices + String[][] singleTokenFeatures = new String[1][featuresIndices.size()]; + + for (int j = 0; j < featuresIndices.size(); j++) { + int featureColumn = featuresIndices.get(j); + // Match the fix: use featureColumn + 1 since Python's features = pieces[1:] + int adjustedColumn = featureColumn + 1; + String featureValue = adjustedColumn < parts.length ? parts[adjustedColumn] : null; + singleTokenFeatures[0][j] = featureValue; + } + + // Get mapped indices + long[][] mappedIndices = preprocessor.tokensToFeatureIndices(singleTokenFeatures, 1); + + for (int j = 0; j < featuresIndices.size(); j++) { + int featureColumn = featuresIndices.get(j); + String featureValue = singleTokenFeatures[0][j]; + long mappedIndex = mappedIndices != null ? mappedIndices[0][j] : 0; + + Map<String, Object> featInfo = new LinkedHashMap<>(); + featInfo.put("featureColumn", featureColumn); + featInfo.put("featureValue", featureValue); + featInfo.put("mappedIndex", mappedIndex); + tokenFeatList.add(featInfo); + } + + featureOutput.add(tokenFeatList); + } + output.put("feature_indices", featureOutput); + } + + // Write output + Gson gson = new GsonBuilder().setPrettyPrinting().create(); + String json = gson.toJson(output); + Files.writeString(Paths.get(OUTPUT_FILE), json); + + System.out.println("Java preprocessing output written to: " + OUTPUT_FILE); + System.out.println("\nSample of first 5 tokens:"); + for (int i = 0; i < Math.min(5, limit); i++) { + System.out.println("\nToken " + i + ": '" + tokens.get(i) + "'"); + System.out.print(" Char indices (first 10): ["); + long[] chars = charIndices.get(i); + for (int j = 0; j < Math.min(10, chars.length); j++) { + if (j > 0) + System.out.print(", "); + System.out.print(chars[j]); + } + System.out.println("]"); + } + + assertTrue("Output file should exist", Files.exists(Paths.get(OUTPUT_FILE))); + } +} diff --git a/grobid-core/src/test/java/org/grobid/core/engines/tagging/delft/PreprocessorTest.java b/grobid-core/src/test/java/org/grobid/core/engines/tagging/delft/PreprocessorTest.java new file mode 100644 index 0000000000..4cf33a4696 --- /dev/null +++ b/grobid-core/src/test/java/org/grobid/core/engines/tagging/delft/PreprocessorTest.java @@ -0,0 +1,247 @@ +package org.grobid.core.engines.tagging.delft; + +import org.junit.Before; +import org.junit.Test; + +import java.util.*; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.*; +import static org.junit.Assert.*; + +/** + * Unit tests for Preprocessor, particularly the feature extraction logic. + * + * These tests validate that: + * 1. Feature indices map correctly to vocabulary lookups + * 2. The compact features array structure works as expected + */ +public class PreprocessorTest { + + private Preprocessor preprocessor; + + // Mock vocabulary and feature mappings + private Map<String, Integer> charVocab; + private Map<Integer, String> tagIndex; + private List<Integer> featuresIndices; + private Map<Integer, Map<String, Integer>> featuresMapToIndex; + + @Before + public void setUp() { + // Simple char vocabulary + charVocab = new HashMap<>(); + charVocab.put("<PAD>", 0); + charVocab.put("<UNK>", 1); + charVocab.put("a", 2); + charVocab.put("b", 3); + charVocab.put("c", 4); + + // Simple tag index + tagIndex = new HashMap<>(); + tagIndex.put(0, "<PAD>"); + tagIndex.put(1, "O"); + tagIndex.put(2, "B-TITLE"); + tagIndex.put(3, "I-TITLE"); + + // Feature indices matching Python's 1-based indexing (columns 9, 10, 11 in + // file) + featuresIndices = Arrays.asList(9, 10, 11); + + // Feature vocabularies for each column + featuresMapToIndex = new HashMap<>(); + + // Column 9 vocabulary (e.g., BLOCK features) + Map<String, Integer> col9Vocab = new HashMap<>(); + col9Vocab.put("BLOCKSTART", 1); + col9Vocab.put("BLOCKIN", 2); + col9Vocab.put("BLOCKEND", 3); + featuresMapToIndex.put(9, col9Vocab); + + // Column 10 vocabulary (e.g., LINE features) + Map<String, Integer> col10Vocab = new HashMap<>(); + col10Vocab.put("LINESTART", 1); + col10Vocab.put("LINEIN", 2); + col10Vocab.put("LINEEND", 3); + featuresMapToIndex.put(10, col10Vocab); + + // Column 11 vocabulary (e.g., ALIGN features) + Map<String, Integer> col11Vocab = new HashMap<>(); + col11Vocab.put("ALIGNEDLEFT", 1); + col11Vocab.put("ALIGNEDRIGHT", 2); + col11Vocab.put("CENTERED", 3); + featuresMapToIndex.put(11, col11Vocab); + + preprocessor = new Preprocessor(charVocab, tagIndex, 30, featuresIndices, featuresMapToIndex); + } + + @Test + public void testHasFeatures_returnsTrue() { + assertTrue(preprocessor.hasFeatures()); + } + + @Test + public void testGetFeaturesIndices() { + List<Integer> indices = preprocessor.getFeaturesIndices(); + assertEquals(3, indices.size()); + assertEquals(Integer.valueOf(9), indices.get(0)); + assertEquals(Integer.valueOf(10), indices.get(1)); + assertEquals(Integer.valueOf(11), indices.get(2)); + } + + @Test + public void testGetNumFeatures() { + assertEquals(3, preprocessor.getNumFeatures()); + } + + /** + * Test that tokensToFeatureIndices correctly maps feature values to vocabulary + * indices. + * + * The features array is a COMPACT structure where: + * - features[tokenIdx][0] contains the value for column featuresIndices[0] + * (column 9) + * - features[tokenIdx][1] contains the value for column featuresIndices[1] + * (column 10) + * etc. + */ + @Test + public void testTokensToFeatureIndices_mapsCorrectly() { + // Compact features array: features[token][featureSlot] + // Slot 0 = value for column 9, Slot 1 = value for column 10, Slot 2 = value for + // column 11 + String[][] features = { + { "BLOCKSTART", "LINESTART", "ALIGNEDLEFT" }, // Token 0 + { "BLOCKIN", "LINEIN", "ALIGNEDRIGHT" }, // Token 1 + }; + + long[][] result = preprocessor.tokensToFeatureIndices(features, 2); + + assertNotNull(result); + assertEquals(2, result.length); // seqLength = 2 + assertEquals(3, result[0].length); // 3 features + + // Token 0: BLOCKSTART(1), LINESTART(1), ALIGNEDLEFT(1) + assertEquals(1, result[0][0]); // BLOCKSTART -> 1 + assertEquals(1, result[0][1]); // LINESTART -> 1 + assertEquals(1, result[0][2]); // ALIGNEDLEFT -> 1 + + // Token 1: BLOCKIN(2), LINEIN(2), ALIGNEDRIGHT(2) + assertEquals(2, result[1][0]); // BLOCKIN -> 2 + assertEquals(2, result[1][1]); // LINEIN -> 2 + assertEquals(2, result[1][2]); // ALIGNEDRIGHT -> 2 + } + + @Test + public void testTokensToFeatureIndices_unknownValueMapsToZero() { + String[][] features = { + { "UNKNOWN_BLOCK", "LINESTART", "ALIGNEDLEFT" }, + }; + + long[][] result = preprocessor.tokensToFeatureIndices(features, 1); + + // Unknown value maps to 0 (default) + assertEquals(0, result[0][0]); // UNKNOWN_BLOCK -> 0 + assertEquals(1, result[0][1]); // LINESTART -> 1 + assertEquals(1, result[0][2]); // ALIGNEDLEFT -> 1 + } + + @Test + public void testTokensToFeatureIndices_nullValueMapsToZero() { + String[][] features = { + { null, "LINESTART", null }, + }; + + long[][] result = preprocessor.tokensToFeatureIndices(features, 1); + + // Null value keeps default 0 + assertEquals(0, result[0][0]); // null -> 0 + assertEquals(1, result[0][1]); // LINESTART -> 1 + assertEquals(0, result[0][2]); // null -> 0 + } + + @Test + public void testTokensToFeatureIndices_padsToSeqLength() { + String[][] features = { + { "BLOCKSTART", "LINESTART", "ALIGNEDLEFT" }, + }; + + // Request seqLength=4, but only 1 token + long[][] result = preprocessor.tokensToFeatureIndices(features, 4); + + assertEquals(4, result.length); // Padded to seqLength + + // Token 0 should have correct values + assertEquals(1, result[0][0]); + + // Tokens 1-3 should be all zeros (padding) + for (int i = 1; i < 4; i++) { + for (int j = 0; j < 3; j++) { + assertEquals(0, result[i][j]); + } + } + } + + @Test + public void testTokensToFeatureIndices_truncatesToSeqLength() { + String[][] features = { + { "BLOCKSTART", "LINESTART", "ALIGNEDLEFT" }, + { "BLOCKIN", "LINEIN", "ALIGNEDRIGHT" }, + { "BLOCKEND", "LINEEND", "CENTERED" }, + }; + + // Request seqLength=2, but 3 tokens provided + long[][] result = preprocessor.tokensToFeatureIndices(features, 2); + + assertEquals(2, result.length); // Truncated to seqLength + + // Token 0 and 1 should have correct values + assertEquals(1, result[0][0]); // BLOCKSTART + assertEquals(2, result[1][0]); // BLOCKIN + } + + @Test + public void testTokensToFeatureIndices_handlesShortFeatureArray() { + // Features array shorter than expected (missing slot 2) + String[][] features = { + { "BLOCKSTART", "LINESTART" }, // Only 2 elements, should be 3 + }; + + long[][] result = preprocessor.tokensToFeatureIndices(features, 1); + + // Should not throw, and missing feature should be 0 + assertEquals(1, result[0][0]); // BLOCKSTART + assertEquals(1, result[0][1]); // LINESTART + // Slot 2 is missing in input, should remain 0 + assertEquals(0, result[0][2]); + } + + @Test + public void testNoFeatures_returnsNull() { + // Create preprocessor without features + Preprocessor noFeaturesPreprocessor = new Preprocessor(charVocab, tagIndex, 30); + + assertFalse(noFeaturesPreprocessor.hasFeatures()); + assertNull(noFeaturesPreprocessor.tokensToFeatureIndices(new String[][] {}, 1)); + } + + @Test + public void testCreateMask() { + boolean[] mask = preprocessor.createMask(3, 5); + + assertEquals(5, mask.length); + assertTrue(mask[0]); + assertTrue(mask[1]); + assertTrue(mask[2]); + assertFalse(mask[3]); + assertFalse(mask[4]); + } + + @Test + public void testGetTagIndex() { + Map<Integer, String> tags = preprocessor.getTagIndex(); + + assertEquals("O", tags.get(1)); + assertEquals("B-TITLE", tags.get(2)); + assertEquals("I-TITLE", tags.get(3)); + } +} diff --git a/grobid-core/src/test/java/org/grobid/core/engines/tagging/delft/WordEmbeddingsIntegrationTest.java b/grobid-core/src/test/java/org/grobid/core/engines/tagging/delft/WordEmbeddingsIntegrationTest.java new file mode 100644 index 0000000000..c815dc7afb --- /dev/null +++ b/grobid-core/src/test/java/org/grobid/core/engines/tagging/delft/WordEmbeddingsIntegrationTest.java @@ -0,0 +1,201 @@ +package org.grobid.core.engines.tagging.delft; + +import org.grobid.core.utilities.GrobidProperties; +import org.junit.*; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import static org.hamcrest.CoreMatchers.is; +import static org.hamcrest.CoreMatchers.notNullValue; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.greaterThan; +import static org.junit.Assert.assertTrue; +import static org.junit.Assume.assumeTrue; + +/** + * Integration test for WordEmbeddings LMDB lookup. + * + * This test verifies that ONNX word embeddings can be loaded and queried + * correctly from the LMDB database. It helps diagnose issues with: + * - LMDB native library loading (especially on Linux) + * - LMDB database format compatibility + * - Embeddings path configuration + * + * Prerequisites: + * - Embeddings must be preloaded using: python3 + * grobid-home/scripts/preload_embeddings.py --embedding glove-840B + * - The LMDB database should be at {delft}/data/db/glove-840B + */ +public class WordEmbeddingsIntegrationTest { + + private static final String EMBEDDINGS_NAME = "glove-840B"; + private static final int EMBEDDING_SIZE = 300; // GloVe-840B dimension + + private static Path embeddingsPath; + private WordEmbeddings embeddings; + + @BeforeClass + public static void setUpClass() { + // Initialize GROBID properties + GrobidProperties.getInstance(); + + // Get embeddings path + String delftPath = GrobidProperties.getDeLFTFilePath(); + embeddingsPath = Path.of(delftPath, "data", "db", EMBEDDINGS_NAME); + } + + @Before + public void setUp() throws IOException { + // Skip test if embeddings are not available + assumeTrue("Embeddings not found at " + embeddingsPath + + ". Please run: python3 grobid-home/scripts/preload_embeddings.py --embedding glove-840B", + Files.exists(embeddingsPath) && Files.isDirectory(embeddingsPath)); + + // Open embeddings database + embeddings = WordEmbeddings.getInstance(embeddingsPath, EMBEDDING_SIZE); + } + + @After + public void tearDown() { + if (embeddings != null) { + embeddings.close(); + } + } + + @Test + public void testEmbeddingsCanBeOpened() { + // If we get here without exception, the database opened successfully + assertThat(embeddings, is(notNullValue())); + assertThat(embeddings.getEmbeddingSize(), is(EMBEDDING_SIZE)); + } + + @Test + public void testLookupKnownWord() { + // "the" is one of the most common words and should be in any embedding + float[] embedding = embeddings.getEmbedding("the"); + + assertThat("Embedding should not be null", embedding, is(notNullValue())); + assertThat("Embedding should have correct dimension", embedding.length, is(EMBEDDING_SIZE)); + + // Check that the embedding is not all zeros (word was found) + double sumSquares = 0.0; + for (float f : embedding) { + sumSquares += f * f; + } + assertThat("Embedding for 'the' should not be all zeros", sumSquares, greaterThan(0.0)); + + // Validate embedding values are in expected range for GloVe + // If LMDB contains pickled numpy format (not raw float32), values will be + // garbage + validateEmbeddingValuesInRange(embedding, "the"); + } + + @Test + public void testLookupAnotherKnownWord() { + // Test another common word + float[] embedding = embeddings.getEmbedding("science"); + + assertThat(embedding, is(notNullValue())); + assertThat(embedding.length, is(EMBEDDING_SIZE)); + + // Check that the embedding is not all zeros + double sumSquares = 0.0; + for (float f : embedding) { + sumSquares += f * f; + } + assertThat("Embedding for 'science' should not be all zeros", sumSquares, greaterThan(0.0)); + } + + @Test + public void testContainsKnownWord() { + // Test the contains method for a word that should exist + assertTrue("Database should contain 'the'", embeddings.contains("the")); + assertTrue("Database should contain 'and'", embeddings.contains("and")); + } + + @Test + public void testGetMultipleEmbeddings() { + String[] words = { "the", "quick", "brown", "fox" }; + float[][] embeddingsResult = embeddings.getEmbeddings(words); + + assertThat("Batch result should have correct length", + embeddingsResult.length, is(words.length)); + + for (int i = 0; i < words.length; i++) { + assertThat("Each embedding should have correct dimension", + embeddingsResult[i].length, is(EMBEDDING_SIZE)); + } + } + + @Test + public void testDigitNormalization() { + // WordEmbeddings should normalize digits to "0" + // So "2024" should look up "0000" + float[] embedding = embeddings.getEmbedding("2024"); + + assertThat(embedding, is(notNullValue())); + assertThat(embedding.length, is(EMBEDDING_SIZE)); + // Note: The normalized form "0000" may or may not be in the vocabulary, + // but the lookup should succeed (returning zero vector if not found) + } + + @Test + public void testUnknownWordReturnsZeroVector() { + // A very unlikely word that should not be in the vocabulary + float[] embedding = embeddings.getEmbedding("xyzzy12345qwerty"); + + assertThat(embedding, is(notNullValue())); + assertThat(embedding.length, is(EMBEDDING_SIZE)); + + // Should be all zeros + double sumSquares = 0.0; + for (float f : embedding) { + sumSquares += f * f; + } + assertThat("Unknown word should return zero vector", sumSquares, is(0.0)); + } + + /** + * Validates that embedding values are in expected range for GloVe embeddings. + * + * GloVe embeddings typically have values in the range of approximately -5 to 5. + * If the LMDB database contains pickled numpy format instead of raw float32, + * the bytes will be interpreted as garbage floats with extreme values (often + * very large or NaN/Infinity). + * + * @param embedding The embedding vector to validate + * @param word The word being looked up (for error messages) + */ + private void validateEmbeddingValuesInRange(float[] embedding, String word) { + final float MAX_VALID_VALUE = 10.0f; // GloVe values are typically < 5 + + for (int i = 0; i < embedding.length; i++) { + float value = embedding[i]; + + // Check for NaN or Infinity (common when interpreting pickle bytes as float) + assertTrue( + String.format("Embedding for '%s' contains NaN at index %d. " + + "This suggests the LMDB database contains pickled numpy format " + + "instead of raw float32. Please regenerate embeddings using: " + + "python3 grobid-home/scripts/preload_embeddings.py --embedding glove-840B", + word, i), + !Float.isNaN(value)); + + assertTrue( + String.format("Embedding for '%s' contains Infinity at index %d. " + + "This suggests the LMDB database contains pickled numpy format " + + "instead of raw float32.", word, i), + !Float.isInfinite(value)); + + // Check for extreme values (pickled data often produces very large floats) + assertTrue( + String.format("Embedding for '%s' has extreme value %.2f at index %d " + + "(expected range: -%.0f to %.0f). This suggests the LMDB database " + + "contains pickled numpy format instead of raw float32.", + word, value, i, MAX_VALID_VALUE, MAX_VALID_VALUE), + Math.abs(value) <= MAX_VALID_VALUE); + } + } +} diff --git a/grobid-core/src/test/java/org/grobid/core/test/EngineTest.java b/grobid-core/src/test/java/org/grobid/core/test/EngineTest.java index ae882e3756..f8449e45d1 100755 --- a/grobid-core/src/test/java/org/grobid/core/test/EngineTest.java +++ b/grobid-core/src/test/java/org/grobid/core/test/EngineTest.java @@ -2,6 +2,7 @@ import org.grobid.core.engines.Engine; import org.grobid.core.factory.GrobidFactory; +import org.grobid.core.utilities.TestEngineUtils; import org.junit.AfterClass; import org.junit.BeforeClass; @@ -10,6 +11,7 @@ public abstract class EngineTest { @BeforeClass public static void setUpClass() throws Exception { + TestEngineUtils.initGrobidForceWapiti(); engine = GrobidFactory.getInstance().getEngine(); } diff --git a/grobid-core/src/test/java/org/grobid/core/test/TestFullTextParser.java b/grobid-core/src/test/java/org/grobid/core/test/TestFullTextParser.java index ee1bafa44e..1ed0bbf195 100755 --- a/grobid-core/src/test/java/org/grobid/core/test/TestFullTextParser.java +++ b/grobid-core/src/test/java/org/grobid/core/test/TestFullTextParser.java @@ -11,7 +11,6 @@ import org.grobid.core.engines.label.TaggingLabel; import org.grobid.core.factory.GrobidFactory; import org.grobid.core.layout.Block; -import org.grobid.core.utilities.GrobidProperties; import org.junit.*; import java.io.File; @@ -29,10 +28,6 @@ public class TestFullTextParser extends EngineTest { - @BeforeClass - public static void init() { - GrobidProperties.getInstance(); - } @AfterClass public static void tearDown(){ diff --git a/grobid-core/src/test/java/org/grobid/core/utilities/LayoutTokensUtilIntegrationTest.java b/grobid-core/src/test/java/org/grobid/core/utilities/LayoutTokensUtilIntegrationTest.java index 94331b0e0b..81b68da092 100644 --- a/grobid-core/src/test/java/org/grobid/core/utilities/LayoutTokensUtilIntegrationTest.java +++ b/grobid-core/src/test/java/org/grobid/core/utilities/LayoutTokensUtilIntegrationTest.java @@ -4,7 +4,6 @@ import org.grobid.core.document.DocumentSource; import org.grobid.core.engines.Engine; import org.grobid.core.engines.config.GrobidAnalysisConfig; -import org.grobid.core.main.LibraryLoader; import org.junit.BeforeClass; import org.junit.Test; @@ -17,8 +16,7 @@ public class LayoutTokensUtilIntegrationTest { @BeforeClass public static void setUp() throws Exception { - LibraryLoader.load(); - GrobidProperties.getInstance(); + TestEngineUtils.initGrobidForceWapiti(); } @Test diff --git a/grobid-core/src/test/java/org/grobid/core/visualization/TestCitationsVisualizer.java b/grobid-core/src/test/java/org/grobid/core/visualization/TestCitationsVisualizer.java index 8ec44d853e..556d764b29 100644 --- a/grobid-core/src/test/java/org/grobid/core/visualization/TestCitationsVisualizer.java +++ b/grobid-core/src/test/java/org/grobid/core/visualization/TestCitationsVisualizer.java @@ -7,7 +7,9 @@ import org.grobid.core.engines.Engine; import org.grobid.core.engines.config.GrobidAnalysisConfig; import org.grobid.core.factory.GrobidFactory; +import org.grobid.core.utilities.TestEngineUtils; import org.junit.AfterClass; +import org.junit.BeforeClass; import org.junit.Test; import java.io.File; @@ -16,8 +18,6 @@ import java.util.Arrays; import java.util.List; -import com.fasterxml.jackson.core.Versioned; - import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertEquals; @@ -25,6 +25,11 @@ public class TestCitationsVisualizer { static final ObjectMapper mapper = new ObjectMapper(); + @BeforeClass + public static void setUp() { + TestEngineUtils.initGrobidForceWapiti(); + } + @AfterClass public static void tearDown(){ GrobidFactory.reset(); diff --git a/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserIntegrationTest.kt b/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserIntegrationTest.kt index 4ae2422567..658314ad80 100644 --- a/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserIntegrationTest.kt +++ b/grobid-core/src/test/kotlin/org/grobid/core/engines/FundingAcknowledgementParserIntegrationTest.kt @@ -1,7 +1,7 @@ package org.grobid.core.engines import org.grobid.core.engines.config.GrobidAnalysisConfig -import org.grobid.core.factory.AbstractEngineFactory +import org.grobid.core.utilities.TestEngineUtils import org.grobid.core.utilities.GrobidConfig import org.grobid.core.utilities.GrobidProperties import org.hamcrest.MatcherAssert.assertThat @@ -10,7 +10,6 @@ import org.junit.Before import org.junit.BeforeClass import org.junit.Test import org.xmlunit.matchers.CompareMatcher -import java.util.* class FundingAcknowledgementParserIntegrationTest { @@ -267,7 +266,7 @@ class FundingAcknowledgementParserIntegrationTest { @BeforeClass @Throws(java.lang.Exception::class) fun setInitialContext(): Unit { - AbstractEngineFactory.init() + TestEngineUtils.initGrobidForceWapiti() } } } \ No newline at end of file diff --git a/grobid-home/config/grobid-onnx.yaml b/grobid-home/config/grobid-onnx.yaml new file mode 100644 index 0000000000..b170a1dce0 --- /dev/null +++ b/grobid-home/config/grobid-onnx.yaml @@ -0,0 +1,257 @@ +# this is the configuration file for the GROBID instance using ONNX models (no DeLFT/Python) +# This configuration uses ONNX for deep learning models and Wapiti for CRF models. + +grobid: + # where all the Grobid resources are stored (models, lexicon, native libraries, etc.), normally no need to change + grobidHome: "grobid-home" + + # path relative to the grobid-home path (e.g. tmp for grobid-home/tmp) or absolute path (/tmp) + temp: "tmp" + + # normally nothing to change here, path relative to the grobid-home path (e.g. grobid-home/lib) + nativelibrary: "lib" + + pdf: + pdfalto: + # path relative to the grobid-home path (e.g. grobid-home/pdfalto), you don't want to change this normally + path: "pdfalto" + # security for PDF parsing + memoryLimitMb: 6096 + timeoutSec: 120 + + # security relative to the PDF parsing result + blocksMax: 200000 + tokensMax: 1000000 + + consolidation: + # define the bibliographical data consolidation service to be used, either "crossref" for CrossRef REST API or + # "glutton" for https://github.com/kermitt2/biblio-glutton + service: "crossref" + #service: "glutton" + glutton: + #url: "https://cloud.science-miner.com/glutton" + url: "http://localhost:8080" + timeoutSec: 60 + crossref: + mailto: + # NOTE! To use the Crossref web API, you need normally to use it politely + # and to indicate an email address here, e.g. mailto: "toto@titi.tutu" + token: + # to use Crossref metadata plus service (available by subscription) + #token: "yourmysteriouscrossrefmetadataplusauthorizationtokentobeputhere" + timeoutSec: 60 + # Timeout for CrossRef API calls (in seconds) - USE with CARE! + proxy: + # proxy to be used when doing external call to the consolidation service + host: + port: + + # CORS configuration for the GROBID web API service + corsAllowedOrigins: "*" + corsAllowedMethods: "OPTIONS,GET,PUT,POST,DELETE,HEAD" + corsAllowedHeaders: "X-Requested-With,Content-Type,Accept,Origin" + + # the actual implementation for language recognition to be used + languageDetectorFactory: "org.grobid.core.lang.impl.CybozuLanguageDetectorFactory" + + # the actual implementation for optional sentence segmentation to be used (PragmaticSegmenter or OpenNLP) + #sentenceDetectorFactory: "org.grobid.core.lang.impl.PragmaticSentenceDetectorFactory" + sentenceDetectorFactory: "org.grobid.core.lang.impl.OpenNLPSentenceDetectorFactory" + + # maximum concurrency allowed to GROBID server for processing parallel requests - change it according to your CPU/GPU capacities + # for a production server running only GROBID, set the value slightly above the available number of threads of the server + # to get the best performance and security + concurrency: 10 + # when the pool is full, for queries waiting for the availability of a Grobid engine, this is the maximum time wait to try + # to get an engine (in seconds) - normally never change it + poolMaxWait: 1 + + # DeLFT is not used in this ONNX-only configuration + delft: + install: "../delft" + pythonVirtualEnv: + + wapiti: + # Wapiti global parameters + # number of threads for training the wapiti models (0 to use all available processors) + nbThreads: 0 + + models: + # ONNX-only configuration: models use either "onnx" or "wapiti" engine + # No "delft" engine is used in this configuration + + - name: "segmentation" + # at this time, must always be CRF wapiti, the input sequence size is too large for a Deep Learning implementation + engine: "wapiti" + wapiti: + epsilon: 0.0000001 + window: 50 + nbMaxIterations: 2000 + + - name: "segmentation-article-light" + engine: "wapiti" + wapiti: + epsilon: 0.0000001 + window: 50 + nbMaxIterations: 2000 + + - name: "segmentation-article-light-ref" + engine: "wapiti" + wapiti: + epsilon: 0.0000001 + window: 50 + nbMaxIterations: 2000 + + - name: "segmentation-sdo-ietf" + engine: "wapiti" + wapiti: + epsilon: 0.0000001 + window: 50 + nbMaxIterations: 2000 + + - name: "fulltext" + # at this time, must always be CRF wapiti, the input sequence size is too large for a Deep Learning implementation + engine: "wapiti" + wapiti: + epsilon: 0.0001 + window: 20 + nbMaxIterations: 1500 + + - name: "header" + engine: "onnx" + wapiti: + epsilon: 0.000001 + window: 30 + nbMaxIterations: 1500 + onnx: + architecture: "BidLSTM_CRF_FEATURES" + + - name: "header-article-light" + engine: "wapiti" + wapiti: + epsilon: 0.000001 + window: 30 + nbMaxIterations: 1500 + + - name: "header-article-light-ref" + engine: "wapiti" + wapiti: + epsilon: 0.000001 + window: 30 + nbMaxIterations: 1500 + + - name: "header-sdo-ietf" + engine: "wapiti" + wapiti: + epsilon: 0.000001 + window: 30 + nbMaxIterations: 1500 + + - name: "reference-segmenter" + engine: "onnx" + wapiti: + epsilon: 0.00001 + window: 20 + onnx: + architecture: "BidLSTM_CRF_FEATURES" + + - name: "name-header" + engine: "wapiti" + + - name: "name-citation" + engine: "wapiti" + + - name: "date" + engine: "wapiti" + onnx: + architecture: "BidLSTM_CRF_FEATURES" + + - name: "figure" + engine: "wapiti" + wapiti: + epsilon: 0.00001 + window: 20 + + - name: "table" + engine: "wapiti" + wapiti: + epsilon: 0.00001 + window: 20 + + - name: "affiliation-address" + engine: "onnx" + onnx: + architecture: "BidLSTM_CRF_FEATURES" + + - name: "citation" + engine: "onnx" + wapiti: + epsilon: 0.00001 + window: 50 + nbMaxIterations: 3000 + onnx: + architecture: "BidLSTM_CRF_FEATURES" + + - name: "patent-citation" + engine: "wapiti" + wapiti: + epsilon: 0.0001 + window: 20 + + - name: "funding-acknowledgement" + engine: "onnx" + wapiti: + epsilon: 0.00001 + window: 50 + nbMaxIterations: 2000 + onnx: + architecture: "BidLSTM_CRF_FEATURES" + + - name: "copyright" + engine: "onnx" + delft: + architecture: "gru" + onnx: + architecture: "gru" + + - name: "license" + engine: "onnx" + delft: + architecture: "gru" + onnx: + architecture: "gru" + + # for **service only**: how to load the models, + # false -> models are loaded when needed, avoiding putting in memory useless models (only in case of CRF) but slow down + # significantly the service at first call + # true -> all the models are loaded into memory at the server startup (default), slow the start of the services + # and models not used will take some more memory (only in case of CRF), but server is immediatly warm and ready + modelPreload: true + +server: + type: custom + applicationConnectors: + - type: http + port: 8070 + adminConnectors: + - type: http + port: 8071 + registerDefaultExceptionMappers: false + # change the following for having all http requests logged + requestLog: + appenders: [] + +# these logging settings apply to the Grobid service usage mode +logging: + level: INFO + loggers: + org.apache.pdfbox.pdmodel.font.PDSimpleFont: "OFF" + org.glassfish.jersey.internal: "OFF" + com.squarespace.jersey2.guice.JerseyGuiceUtils: "OFF" + appenders: + - type: console + threshold: INFO + timeZone: UTC + # uncomment to have the logs in json format + #layout: + # type: json diff --git a/grobid-home/config/grobid.yaml b/grobid-home/config/grobid.yaml index 7228d9ae95..7182379c9a 100644 --- a/grobid-home/config/grobid.yaml +++ b/grobid-home/config/grobid.yaml @@ -139,8 +139,8 @@ grobid: nbMaxIterations: 1500 - name: "header" - engine: "wapiti" - #engine: "delft" + engine: "onnx" +# engine: "delft" wapiti: # wapiti training parameters, they will be used at training time only epsilon: 0.000001 @@ -148,7 +148,7 @@ grobid: nbMaxIterations: 1500 delft: # deep learning parameters - architecture: "BidLSTM_ChainCRF_FEATURES" + architecture: "BidLSTM_CRF_FEATURES" #transformer: "allenai/scibert_scivocab_cased" useELMo: false runtime: @@ -162,6 +162,8 @@ grobid: #batch_size: 6 max_sequence_length: 3000 batch_size: 9 + onnx: + architecture: "BidLSTM_CRF_FEATURES" - name: "header-article-light" engine: "wapiti" @@ -196,8 +198,9 @@ grobid: nbMaxIterations: 1500 - name: "reference-segmenter" - engine: "wapiti" - #engine: "delft" + #engine: "wapiti" +# engine: "delft" + engine: "onnx" wapiti: # wapiti training parameters, they will be used at training time only epsilon: 0.00001 @@ -214,6 +217,8 @@ grobid: # parameters used for training max_sequence_length: 3000 batch_size: 10 + onnx: + architecture: "BidLSTM_CRF_FEATURES" - name: "name-header" engine: "wapiti" @@ -232,9 +237,12 @@ grobid: - name: "date" engine: "wapiti" #engine: "delft" +# engine: "onnx" delft: # deep learning parameters architecture: "BidLSTM_CRF_FEATURES" + onnx: + architecture: "BidLSTM_CRF_FEATURES" - name: "figure" engine: "wapiti" @@ -259,15 +267,19 @@ grobid: architecture: "BidLSTM_CRF" - name: "affiliation-address" - engine: "wapiti" +# engine: "wapiti" #engine: "delft" + engine: "onnx" delft: # deep learning parameters architecture: "BidLSTM_CRF_FEATURES" + onnx: + architecture: "BidLSTM_CRF_FEATURES" - name: "citation" - engine: "wapiti" - #engine: "delft" + #engine: "wapiti" +# engine: "delft" + engine: "onnx" wapiti: # wapiti training parameters, they will be used at training time only epsilon: 0.00001 @@ -287,6 +299,8 @@ grobid: # parameters used for training max_sequence_length: 500 batch_size: 50 + onnx: + architecture: "BidLSTM_CRF_FEATURES" - name: "patent-citation" engine: "wapiti" @@ -309,8 +323,9 @@ grobid: batch_size: 40 - name: "funding-acknowledgement" - engine: "wapiti" +# engine: "wapiti" #engine: "delft" + engine: "onnx" wapiti: # wapiti training parameters, they will be used at training time only epsilon: 0.00001 @@ -330,28 +345,36 @@ grobid: # parameters used for training max_sequence_length: 500 batch_size: 40 + onnx: + architecture: "BidLSTM_CRF_FEATURES" - name: "copyright" # at this time, we only have a DeLFT implementation, # use "wapiti" if the deep learning library JNI is not available and model will then be ignored #engine: "delft" - engine: "wapiti" +# engine: "wapiti" + engine: "onnx" delft: # deep learning parameters architecture: "gru" #architecture: "bert" #transformer: "allenai/scibert_scivocab_cased" + onnx: + architecture: "gru" - name: "license" # at this time, for being active, it must be DeLFT, no other implementation is available # use "wapiti" if the deep learning library JNI is not available and model will then be ignored #engine: "delft" - engine: "wapiti" +# engine: "wapiti" + engine: "onnx" delft: # deep learning parameters architecture: "gru" #architecture: "bert" #transformer: "allenai/scibert_scivocab_cased" + onnx: + architecture: "gru" # for **service only**: how to load the models, # false -> models are loaded when needed, avoiding putting in memory useless models (only in case of CRF) but slow down @@ -380,6 +403,9 @@ logging: org.apache.pdfbox.pdmodel.font.PDSimpleFont: "OFF" org.glassfish.jersey.internal: "OFF" com.squarespace.jersey2.guice.JerseyGuiceUtils: "OFF" + # org.eclipse.jetty: "OFF" + # org.grobid: "INFO" + # org.grobid.core.engines.tagging.delft.WordEmbeddings: "DEBUG" appenders: - type: console threshold: INFO diff --git a/grobid-home/models/affiliation-address-BidLSTM_CRF/config.json b/grobid-home/models/affiliation-address-BidLSTM_CRF/config.json deleted file mode 100644 index ea73141bf9..0000000000 --- a/grobid-home/models/affiliation-address-BidLSTM_CRF/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "model_name": "affiliation-address-BidLSTM_CRF", - "architecture": "BidLSTM_CRF", - "embeddings_name": "glove-840B", - "char_vocab_size": 152, - "case_vocab_size": 8, - "char_embedding_size": 25, - "num_char_lstm_units": 25, - "max_char_length": 30, - "features_vocabulary_size": 12, - "features_indices": null, - "features_embedding_size": 4, - "features_lstm_units": 4, - "max_sequence_length": 600, - "word_embedding_size": 300, - "num_word_lstm_units": 100, - "case_embedding_size": 5, - "dropout": 0.5, - "recurrent_dropout": 0.5, - "use_crf": true, - "use_chain_crf": false, - "fold_number": 1, - "batch_size": 20, - "transformer_name": null, - "use_ELMo": false -} \ No newline at end of file diff --git a/grobid-home/models/affiliation-address-BidLSTM_CRF/model_weights.hdf5 b/grobid-home/models/affiliation-address-BidLSTM_CRF/model_weights.hdf5 deleted file mode 100644 index 8775d3492c..0000000000 Binary files a/grobid-home/models/affiliation-address-BidLSTM_CRF/model_weights.hdf5 and /dev/null differ diff --git a/grobid-home/models/affiliation-address-BidLSTM_CRF/preprocessor.json b/grobid-home/models/affiliation-address-BidLSTM_CRF/preprocessor.json deleted file mode 100644 index ac8c8629be..0000000000 --- a/grobid-home/models/affiliation-address-BidLSTM_CRF/preprocessor.json +++ /dev/null @@ -1,223 +0,0 @@ -{ - "padding": true, - "return_lengths": true, - "return_word_embeddings": true, - "return_casing": false, - "return_features": false, - "return_chars": true, - "return_bert_embeddings": false, - "vocab_char": { - "<PAD>": 0, - "<UNK>": 1, - "!": 2, - "\"": 3, - "#": 4, - "$": 5, - "&": 6, - "'": 7, - "(": 8, - ")": 9, - "*": 10, - "+": 11, - ",": 12, - "-": 13, - ".": 14, - "/": 15, - "0": 16, - "1": 17, - "2": 18, - "3": 19, - "4": 20, - "5": 21, - "6": 22, - "7": 23, - "8": 24, - "9": 25, - ":": 26, - ";": 27, - ">": 28, - "@": 29, - "A": 30, - "B": 31, - "C": 32, - "D": 33, - "E": 34, - "F": 35, - "G": 36, - "H": 37, - "I": 38, - "J": 39, - "K": 40, - "L": 41, - "M": 42, - "N": 43, - "O": 44, - "P": 45, - "Q": 46, - "R": 47, - "S": 48, - "T": 49, - "U": 50, - "V": 51, - "W": 52, - "X": 53, - "Y": 54, - "Z": 55, - "[": 56, - "\\": 57, - "]": 58, - "^": 59, - "_": 60, - "a": 61, - "b": 62, - "c": 63, - "d": 64, - "e": 65, - "f": 66, - "g": 67, - "h": 68, - "i": 69, - "j": 70, - "k": 71, - "l": 72, - "m": 73, - "n": 74, - "o": 75, - "p": 76, - "q": 77, - "r": 78, - "s": 79, - "t": 80, - "u": 81, - "v": 82, - "w": 83, - "x": 84, - "y": 85, - "z": 86, - "~": 87, - "\u007f": 88, - "\u0080": 89, - "\u009f": 90, - "\u00a3": 91, - "\u00a7": 92, - "\u00a8": 93, - "\u00aa": 94, - "\u00b3": 95, - "\u00b4": 96, - "\u00b6": 97, - "\u00bb": 98, - "\u00c2": 99, - "\u00c3": 100, - "\u00c5": 101, - "\u00c8": 102, - "\u00c9": 103, - "\u00cc": 104, - "\u00d5": 105, - "\u00d6": 106, - "\u00dc": 107, - "\u00df": 108, - "\u00e0": 109, - "\u00e1": 110, - "\u00e2": 111, - "\u00e3": 112, - "\u00e4": 113, - "\u00e5": 114, - "\u00e7": 115, - "\u00e8": 116, - "\u00e9": 117, - "\u00ea": 118, - "\u00ec": 119, - "\u00ed": 120, - "\u00ee": 121, - "\u00ef": 122, - "\u00f1": 123, - "\u00f2": 124, - "\u00f3": 125, - "\u00f4": 126, - "\u00f6": 127, - "\u00f8": 128, - "\u00fb": 129, - "\u00fc": 130, - "\u010d": 131, - "\u0131": 132, - "\u0141": 133, - "\u0142": 134, - "\u0144": 135, - "\u0158": 136, - "\u0160": 137, - "\u0161": 138, - "\u017a": 139, - "\u017c": 140, - "\u017e": 141, - "\u0288": 142, - "\u02d8": 143, - "\u02db": 144, - "\u2013": 145, - "\u2019": 146, - "\u2020": 147, - "\u2021": 148, - "\u22a5": 149, - "\u2424": 150, - "\uff0d": 151 - }, - "vocab_tag": { - "<PAD>": 0, - "B-<addrLine>": 1, - "B-<country>": 2, - "B-<department>": 3, - "B-<institution>": 4, - "B-<laboratory>": 5, - "B-<marker>": 6, - "B-<postBox>": 7, - "B-<postCode>": 8, - "B-<region>": 9, - "B-<settlement>": 10, - "I-<addrLine>": 11, - "I-<country>": 12, - "I-<department>": 13, - "I-<institution>": 14, - "I-<laboratory>": 15, - "I-<marker>": 16, - "I-<postBox>": 17, - "I-<postCode>": 18, - "I-<region>": 19, - "I-<settlement>": 20, - "O": 21 - }, - "vocab_case": [ - "<PAD>", - "numeric", - "allLower", - "allUpper", - "initialUpper", - "other", - "mainly_numeric", - "contains_digit" - ], - "max_char_length": 30, - "feature_preprocessor": null, - "indice_tag": { - "0": "<PAD>", - "1": "B-<addrLine>", - "2": "B-<country>", - "3": "B-<department>", - "4": "B-<institution>", - "5": "B-<laboratory>", - "6": "B-<marker>", - "7": "B-<postBox>", - "8": "B-<postCode>", - "9": "B-<region>", - "10": "B-<settlement>", - "11": "I-<addrLine>", - "12": "I-<country>", - "13": "I-<department>", - "14": "I-<institution>", - "15": "I-<laboratory>", - "16": "I-<marker>", - "17": "I-<postBox>", - "18": "I-<postCode>", - "19": "I-<region>", - "20": "I-<settlement>", - "21": "O" - } -} \ No newline at end of file diff --git a/grobid-home/models/affiliation-address-BidLSTM_CRF_FEATURES.onnx/config.json b/grobid-home/models/affiliation-address-BidLSTM_CRF_FEATURES.onnx/config.json new file mode 100644 index 0000000000..32d1c6258d --- /dev/null +++ b/grobid-home/models/affiliation-address-BidLSTM_CRF_FEATURES.onnx/config.json @@ -0,0 +1,25 @@ +{ + "modelName": "grobid-affiliation-address-BidLSTM_CRF_FEATURES", + "architecture": "BidLSTM_CRF_FEATURES", + "wordEmbeddingSize": 300, + "charEmbeddingSize": 25, + "numCharLstmUnits": 25, + "numWordLstmUnits": 100, + "maxSequenceLength": 600, + "embeddingsName": "glove-840B", + "featuresIndices": [ + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18 + ], + "featuresEmbeddingSize": 4, + "featuresLstmUnits": 4, + "featuresVocabularySize": 12 +} \ No newline at end of file diff --git a/grobid-home/models/affiliation-address-BidLSTM_CRF_FEATURES.onnx/crf_params.json b/grobid-home/models/affiliation-address-BidLSTM_CRF_FEATURES.onnx/crf_params.json new file mode 100644 index 0000000000..4fee8641ee --- /dev/null +++ b/grobid-home/models/affiliation-address-BidLSTM_CRF_FEATURES.onnx/crf_params.json @@ -0,0 +1,580 @@ +{ + "transitions": [ + [ + 0.0338442362844944, + -0.011294804513454437, + -0.07096321880817413, + -0.005429031793028116, + -0.184834823012352, + -0.14195792376995087, + -0.1298777163028717, + -0.13626863062381744, + -0.15994907915592194, + -0.1576385498046875, + -0.008872970007359982, + -0.04860258847475052, + -0.31969836354255676, + -0.3448556363582611, + -0.2721947431564331, + -0.26295602321624756, + -0.04120734706521034, + -0.12296182662248611, + -0.10490875691175461, + -0.23811109364032745, + -0.33208850026130676, + -0.21338006854057312 + ], + [ + -0.11512733995914459, + -0.6923450827598572, + -0.22968405485153198, + -0.40920260548591614, + -0.33445900678634644, + -0.39246270060539246, + -0.28574520349502563, + -0.15398506820201874, + -0.44393545389175415, + -0.32854267954826355, + -0.36147114634513855, + 0.6412872076034546, + -0.2502049505710602, + -0.47876307368278503, + -0.6729992628097534, + -0.46953049302101135, + -0.1952379196882248, + -0.5731874108314514, + -0.6122575998306274, + -0.5198708772659302, + -0.8026620745658875, + -0.6011205315589905 + ], + [ + 0.08398947864770889, + -0.10214826464653015, + -0.36945047974586487, + 0.14073754847049713, + 0.1809493899345398, + -0.022770758718252182, + 0.13679656386375427, + -0.19684462249279022, + -0.04579457268118858, + -0.32672908902168274, + -0.19960716366767883, + -0.33317822217941284, + 0.37945374846458435, + -0.3869170546531677, + -0.48166945576667786, + -0.45693469047546387, + -0.273945689201355, + -0.2872774004936218, + -0.27384841442108154, + -0.8044688701629639, + -0.5482637286186218, + -0.2849787771701813 + ], + [ + -0.20331723988056183, + -0.09747038781642914, + -0.12835180759429932, + -0.6417351961135864, + -0.5194398164749146, + -0.5331801772117615, + -0.28529787063598633, + -0.18212856352329254, + -0.17385028302669525, + -0.18486689031124115, + -0.22244811058044434, + -0.4923481345176697, + -0.4409734904766083, + 0.6115338206291199, + -0.7987716197967529, + -0.7595298290252686, + -0.09024617820978165, + -0.28387996554374695, + -0.3265945613384247, + -0.19966387748718262, + -0.5194218158721924, + -0.5118324756622314 + ], + [ + -0.2678092420101166, + 0.11975151300430298, + -0.15472297370433807, + -0.5431517362594604, + -0.5955774188041687, + -0.0704287514090538, + -0.35343289375305176, + -0.20265500247478485, + -0.1913072019815445, + -0.15860849618911743, + -0.2786189913749695, + -0.6792582869529724, + -0.4869024455547333, + -0.7131770849227905, + 0.5878925323486328, + -0.7785271406173706, + -0.06592006236314774, + -0.2652747631072998, + -0.3612750768661499, + -0.2952355146408081, + -0.6714939475059509, + -0.3171798586845398 + ], + [ + -0.06822061538696289, + -0.24414196610450745, + -0.18829117715358734, + -0.6102490425109863, + -0.4225368797779083, + -0.7064500451087952, + -0.3463769853115082, + -0.09594660252332687, + -0.20008857548236847, + -0.10293439030647278, + -0.2797398865222931, + -0.524177074432373, + -0.13302549719810486, + -0.8053358793258667, + -0.7043206095695496, + 0.7490493655204773, + -0.08999517560005188, + -0.34945327043533325, + -0.22292934358119965, + -0.2093796730041504, + -0.3577820658683777, + -0.0785355493426323 + ], + [ + -0.04269752651453018, + -0.3343313932418823, + -0.34465542435646057, + 0.32823076844215393, + 0.1438201665878296, + 0.1613306701183319, + -0.551685631275177, + -0.1690392941236496, + -0.21765810251235962, + -0.10817968845367432, + 0.20608828961849213, + -0.552809476852417, + -0.5935716032981873, + -0.47793206572532654, + -0.4809647798538208, + -0.45095494389533997, + 0.03234673663973808, + -0.4008941054344177, + -0.4173295497894287, + -0.3014482855796814, + -0.26181361079216003, + 0.14198879897594452 + ], + [ + -0.25616398453712463, + -0.3231348991394043, + -0.07378073781728745, + -0.26755547523498535, + -0.19526784121990204, + -0.13931867480278015, + -0.17527905106544495, + -0.5151861310005188, + -0.5941535234451294, + -0.21180596947669983, + -0.2713538408279419, + -0.6681471467018127, + -0.20507459342479706, + -0.2378290593624115, + -0.20684246718883514, + -0.31826263666152954, + -0.144175723195076, + 0.46975910663604736, + -0.4903338849544525, + -0.4082929491996765, + -0.40065455436706543, + -0.43297725915908813 + ], + [ + -0.04576817527413368, + -0.1872754991054535, + -0.07280124723911285, + 0.12524357438087463, + -0.2941194474697113, + -0.18050578236579895, + -0.08785556256771088, + -0.1494702398777008, + -0.5170264840126038, + -0.23730261623859406, + 0.19418339431285858, + -0.5354102849960327, + -0.4356920123100281, + -0.26658371090888977, + -0.22724920511245728, + -0.14311015605926514, + -0.2986925542354584, + -0.5090011954307556, + 0.4931524693965912, + -0.6585748195648193, + -0.7539120316505432, + 0.005383940879255533 + ], + [ + -0.14020489156246185, + -0.3386692404747009, + -0.228628009557724, + -0.10717488825321198, + 0.11151435971260071, + 0.01725652813911438, + -0.1155162900686264, + -0.3151521682739258, + 0.18926182389259338, + -0.6863798499107361, + -0.4057333469390869, + -0.3683810234069824, + -0.7868426442146301, + -0.17027698457241058, + -0.3190540671348572, + -0.34380242228507996, + -0.18444286286830902, + -0.5289602875709534, + -0.6327500939369202, + 0.421809583902359, + -0.8420408368110657, + 0.058010924607515335 + ], + [ + -0.15547366440296173, + -0.39495617151260376, + -0.04383600130677223, + 0.09642967581748962, + 0.030938755720853806, + 0.02147647738456726, + 0.17716777324676514, + -0.18954280018806458, + 0.4234936535358429, + -0.14591455459594727, + -0.569330096244812, + -0.7413203716278076, + -0.6057407855987549, + -0.3945390582084656, + -0.4948973059654236, + -0.42955654859542847, + -0.2370767593383789, + -0.5565851926803589, + -0.5121620297431946, + -0.9312217235565186, + 0.49886590242385864, + 0.18487174808979034 + ], + [ + -0.0667833760380745, + -0.6493995785713196, + -0.2009384036064148, + -0.09009983390569687, + -0.2755386233329773, + -0.183600515127182, + -0.29298117756843567, + -0.12015526741743088, + -0.10547426342964172, + -0.12298203259706497, + -0.2635583281517029, + 0.48656952381134033, + -0.3664420545101166, + -0.45485126972198486, + -0.6180541515350342, + -0.4231409728527069, + -0.22541125118732452, + -0.6635199189186096, + -0.6686914563179016, + -0.4504595994949341, + -0.8300284743309021, + 0.11286038160324097 + ], + [ + -0.10806786268949509, + -0.24087655544281006, + -0.6059082746505737, + -0.14044030010700226, + -0.09196656942367554, + -0.062293823808431625, + 0.25167328119277954, + -0.1219690591096878, + -0.2583712041378021, + -0.46903055906295776, + -0.38693398237228394, + -0.32251372933387756, + 0.3907884657382965, + -0.3778049349784851, + -0.5885772705078125, + -0.32243847846984863, + -0.14032557606697083, + -0.2956041693687439, + -0.2357301115989685, + -0.7257483601570129, + -0.5755476355552673, + -0.3575330674648285 + ], + [ + -0.19442693889141083, + -0.18871816992759705, + -0.2263738512992859, + -0.5697980523109436, + -0.29202374815940857, + -0.6187683939933777, + -0.07258469611406326, + -0.08734467625617981, + -0.24328407645225525, + -0.2316780537366867, + -0.2264264076948166, + -0.5679788589477539, + -0.33590978384017944, + 0.6841370463371277, + -0.6048682928085327, + -0.8423677682876587, + -0.10886385291814804, + -0.4247741997241974, + -0.2528461813926697, + -0.11571279913187027, + -0.385856032371521, + 0.07769302278757095 + ], + [ + -0.23717054724693298, + 0.203762486577034, + -0.2634022831916809, + -0.1127885952591896, + -0.5720428228378296, + -0.3277590274810791, + -0.020379478111863136, + 0.1492726355791092, + 0.02882552146911621, + -0.32453295588493347, + -0.20689554512500763, + -0.6169582605361938, + -0.5740383863449097, + -0.780655026435852, + 0.671401858329773, + -0.8302177786827087, + -0.1312478631734848, + -0.5203949213027954, + -0.30213627219200134, + -0.36967524886131287, + -0.666350781917572, + 0.07260458171367645 + ], + [ + -0.14403021335601807, + 0.02112434431910515, + -0.19679316878318787, + -0.369891494512558, + -0.31183508038520813, + -0.4039520025253296, + -0.2664774954319, + -0.139314204454422, + -0.2451072484254837, + -0.17762108147144318, + -0.27799054980278015, + -0.48988527059555054, + -0.22392243146896362, + -0.7307792901992798, + -0.7755672931671143, + 0.5162690281867981, + -0.22692079842090607, + -0.33374521136283875, + -0.11407633125782013, + -0.20966091752052307, + -0.32243072986602783, + -0.09145090728998184 + ], + [ + -0.10863027721643448, + 0.03115534409880638, + -0.024307439103722572, + -0.035311102867126465, + 0.10844236612319946, + -0.13088978826999664, + -0.20166195929050446, + -0.03031442128121853, + -0.2196142077445984, + 0.022887680679559708, + -0.15583018958568573, + -0.14159806072711945, + -0.20685729384422302, + -0.30751413106918335, + -0.07589766383171082, + -0.2828496992588043, + 0.1508883535861969, + -0.13108602166175842, + -0.12289957702159882, + -0.07026885449886322, + -0.025666600093245506, + -0.13361650705337524 + ], + [ + -0.09335536509752274, + -0.4188857078552246, + -0.1766972690820694, + -0.26449576020240784, + -0.03065682388842106, + -0.04593323543667793, + -0.12231624126434326, + -0.6053104400634766, + -0.2446293979883194, + -0.22564879059791565, + -0.1020064428448677, + -0.5942507982254028, + -0.2161254733800888, + -0.33632442355155945, + -0.3123648464679718, + -0.445837140083313, + -0.20968955755233765, + 0.26134783029556274, + -0.510423481464386, + -0.5132777094841003, + -0.536557137966156, + 0.17878380417823792 + ], + [ + -0.0562482550740242, + -0.20296913385391235, + -0.08046694844961166, + -0.045146431773900986, + -0.09141835570335388, + -0.37627968192100525, + 0.006649693939834833, + -0.22877253592014313, + -0.5542576909065247, + -0.3778811991214752, + 0.3955265283584595, + -0.44574010372161865, + -0.31969261169433594, + -0.24039211869239807, + -0.3440001606941223, + -0.3411547541618347, + -0.11136302351951599, + -0.34950101375579834, + 0.3165329098701477, + -0.33847397565841675, + -0.7911173105239868, + 0.05991634726524353 + ], + [ + -0.07102060317993164, + -0.1680671125650406, + -0.4340049922466278, + -0.03762519359588623, + -0.11443666368722916, + -0.06624738872051239, + -0.09883303195238113, + -0.14277416467666626, + 0.17922760546207428, + -0.42263534665107727, + -0.12380480766296387, + -0.4549333453178406, + -0.5676165223121643, + -0.20469880104064941, + -0.34369096159935, + -0.3239659368991852, + -0.08331000059843063, + -0.43214151263237, + -0.6567862629890442, + 0.40521949529647827, + -0.6256375312805176, + 0.0491066612303257 + ], + [ + 0.015544936992228031, + -0.2867782413959503, + 0.032724540680646896, + 0.06428550183773041, + -0.09262365847826004, + -0.09051138907670975, + -0.17753688991069794, + -0.21908259391784668, + -0.22245869040489197, + -0.10136832296848297, + -0.6046714186668396, + -0.6749756932258606, + -0.6294628381729126, + -0.27326154708862305, + -0.5605869889259338, + -0.3681408166885376, + -0.21228696405887604, + -0.43663328886032104, + -0.7254413962364197, + -0.7701458930969238, + 0.5403742790222168, + 0.1619524210691452 + ], + [ + -0.20585192739963531, + 0.2876221239566803, + 0.2454468011856079, + 0.4119555652141571, + 0.3196447491645813, + 0.2220960259437561, + -0.21576721966266632, + 0.14623288810253143, + 0.11046325415372849, + 0.25871190428733826, + 0.16008634865283966, + -0.4960484206676483, + -0.4669160842895508, + -0.6087306141853333, + -0.49598971009254456, + -0.4543299973011017, + -0.25453388690948486, + -0.36528438329696655, + -0.19133445620536804, + -0.34873974323272705, + -0.27958282828330994, + 0.07238321751356125 + ] + ], + "startTransitions": [ + -0.2484998255968094, + -0.21696658432483673, + -0.368173748254776, + 0.31760427355766296, + 0.4221130907535553, + 0.22435960173606873, + 0.16187447309494019, + -0.0031762802973389626, + -0.27872005105018616, + -0.3730117678642273, + 0.05377888306975365, + -0.3157128393650055, + -0.4493007957935333, + -0.5314681529998779, + -0.4341479539871216, + -0.678692102432251, + -0.3652220070362091, + -0.26524922251701355, + -0.311659038066864, + -0.23148185014724731, + -0.4218624234199524, + 0.1805553138256073 + ], + "endTransitions": [ + 0.13307654857635498, + -0.03326607868075371, + 0.20924371480941772, + -0.13297221064567566, + -0.13144485652446747, + -0.10179749131202698, + -0.11251126229763031, + -0.1639021635055542, + -0.024194087833166122, + -0.29395321011543274, + -0.2667964696884155, + -0.0795818567276001, + 0.006229702848941088, + -0.2883282005786896, + -0.3186459541320801, + -0.23140844702720642, + -0.2335258275270462, + -0.10743527859449387, + -0.08839895576238632, + -0.19506867229938507, + -0.17804646492004395, + -0.25375890731811523 + ] +} \ No newline at end of file diff --git a/grobid-home/models/affiliation-address-BidLSTM_CRF_FEATURES.onnx/encoder.onnx b/grobid-home/models/affiliation-address-BidLSTM_CRF_FEATURES.onnx/encoder.onnx new file mode 100644 index 0000000000..a9f31303e7 Binary files /dev/null and b/grobid-home/models/affiliation-address-BidLSTM_CRF_FEATURES.onnx/encoder.onnx differ diff --git a/grobid-home/models/affiliation-address-BidLSTM_CRF_FEATURES.onnx/vocab.json b/grobid-home/models/affiliation-address-BidLSTM_CRF_FEATURES.onnx/vocab.json new file mode 100644 index 0000000000..78dcd84b3b --- /dev/null +++ b/grobid-home/models/affiliation-address-BidLSTM_CRF_FEATURES.onnx/vocab.json @@ -0,0 +1,272 @@ +{ + "charVocab": { + "<PAD>": 0, + "<UNK>": 1, + "!": 2, + "\"": 3, + "#": 4, + "$": 5, + "&": 6, + "'": 7, + "(": 8, + ")": 9, + "*": 10, + "+": 11, + ",": 12, + "-": 13, + ".": 14, + "/": 15, + "0": 16, + "1": 17, + "2": 18, + "3": 19, + "4": 20, + "5": 21, + "6": 22, + "7": 23, + "8": 24, + "9": 25, + ":": 26, + ";": 27, + ">": 28, + "@": 29, + "A": 30, + "B": 31, + "C": 32, + "D": 33, + "E": 34, + "F": 35, + "G": 36, + "H": 37, + "I": 38, + "J": 39, + "K": 40, + "L": 41, + "M": 42, + "N": 43, + "O": 44, + "P": 45, + "Q": 46, + "R": 47, + "S": 48, + "T": 49, + "U": 50, + "V": 51, + "W": 52, + "X": 53, + "Y": 54, + "Z": 55, + "[": 56, + "\\": 57, + "]": 58, + "^": 59, + "a": 60, + "b": 61, + "c": 62, + "d": 63, + "e": 64, + "f": 65, + "g": 66, + "h": 67, + "i": 68, + "j": 69, + "k": 70, + "l": 71, + "m": 72, + "n": 73, + "o": 74, + "p": 75, + "q": 76, + "r": 77, + "s": 78, + "t": 79, + "u": 80, + "v": 81, + "w": 82, + "x": 83, + "y": 84, + "z": 85, + "~": 86, + "": 87, + "€": 88, + "Ÿ": 89, + "£": 90, + "¤": 91, + "§": 92, + "¨": 93, + "ª": 94, + "³": 95, + "´": 96, + "¶": 97, + "¸": 98, + "»": 99, + "Â": 100, + "Ã": 101, + "Å": 102, + "È": 103, + "É": 104, + "Ì": 105, + "Õ": 106, + "Ö": 107, + "Ü": 108, + "ß": 109, + "à": 110, + "á": 111, + "â": 112, + "ã": 113, + "ä": 114, + "å": 115, + "ç": 116, + "è": 117, + "é": 118, + "ê": 119, + "ì": 120, + "í": 121, + "î": 122, + "ï": 123, + "ñ": 124, + "ò": 125, + "ó": 126, + "ô": 127, + "ö": 128, + "ø": 129, + "û": 130, + "ü": 131, + "č": 132, + "ę": 133, + "ı": 134, + "Ł": 135, + "ł": 136, + "ń": 137, + "Ř": 138, + "Š": 139, + "š": 140, + "ź": 141, + "ż": 142, + "ž": 143, + "ʈ": 144, + "ˆ": 145, + "˘": 146, + "˛": 147, + "–": 148, + "’": 149, + "†": 150, + "‡": 151, + "⊥": 152, + "␤": 153, + "-": 154 + }, + "tagVocab": { + "<PAD>": 0, + "B-<addrLine>": 1, + "B-<country>": 2, + "B-<department>": 3, + "B-<institution>": 4, + "B-<laboratory>": 5, + "B-<marker>": 6, + "B-<postBox>": 7, + "B-<postCode>": 8, + "B-<region>": 9, + "B-<settlement>": 10, + "I-<addrLine>": 11, + "I-<country>": 12, + "I-<department>": 13, + "I-<institution>": 14, + "I-<laboratory>": 15, + "I-<marker>": 16, + "I-<postBox>": 17, + "I-<postCode>": 18, + "I-<region>": 19, + "I-<settlement>": 20, + "O": 21 + }, + "tagIndex": { + "0": "<PAD>", + "1": "B-<addrLine>", + "2": "B-<country>", + "3": "B-<department>", + "4": "B-<institution>", + "5": "B-<laboratory>", + "6": "B-<marker>", + "7": "B-<postBox>", + "8": "B-<postCode>", + "9": "B-<region>", + "10": "B-<settlement>", + "11": "I-<addrLine>", + "12": "I-<country>", + "13": "I-<department>", + "14": "I-<institution>", + "15": "I-<laboratory>", + "16": "I-<marker>", + "17": "I-<postBox>", + "18": "I-<postCode>", + "19": "I-<region>", + "20": "I-<settlement>", + "21": "O" + }, + "maxCharLength": 30, + "returnChars": false, + "featuresIndices": [ + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18 + ], + "featuresVocabularySize": 12, + "featuresMapToIndex": { + "9": { + "LINEEND": 1, + "LINEIN": 2, + "LINESTART": 3 + }, + "10": { + "ALLCAPS": 13, + "INITCAP": 14, + "NOCAPS": 15 + }, + "11": { + "ALLDIGIT": 25, + "CONTAINDIGIT": 26, + "NODIGIT": 27 + }, + "12": { + "0": 37, + "1": 38 + }, + "13": { + "0": 49, + "1": 50 + }, + "14": { + "0": 61, + "1": 62 + }, + "15": { + "0": 73 + }, + "16": { + "0": 85, + "1": 86 + }, + "17": { + "0": 97, + "1": 98 + }, + "18": { + "COMMA": 109, + "DOT": 110, + "ENDBRACKET": 111, + "HYPHEN": 112, + "NOPUNCT": 113, + "OPENBRACKET": 114, + "PUNCT": 115, + "QUOTE": 116 + } + } +} \ No newline at end of file diff --git a/grobid-home/models/affiliation-address-BidLSTM_CRF_FEATURES/config.json b/grobid-home/models/affiliation-address-BidLSTM_CRF_FEATURES/config.json deleted file mode 100644 index 408c9c0795..0000000000 --- a/grobid-home/models/affiliation-address-BidLSTM_CRF_FEATURES/config.json +++ /dev/null @@ -1,87 +0,0 @@ -{ - "model_name": "affiliation-address-BidLSTM_CRF_FEATURES", - "architecture": "BidLSTM_CRF_FEATURES", - "embeddings_name": "glove-840B", - "char_vocab_size": 152, - "case_vocab_size": 8, - "char_embedding_size": 25, - "num_char_lstm_units": 25, - "max_char_length": 30, - "features_vocabulary_size": 12, - "features_indices": [ - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18 - ], - "features_embedding_size": 4, - "features_lstm_units": 4, - "max_sequence_length": 600, - "word_embedding_size": 300, - "num_word_lstm_units": 100, - "case_embedding_size": 5, - "dropout": 0.5, - "recurrent_dropout": 0.5, - "use_crf": true, - "use_chain_crf": false, - "fold_number": 1, - "batch_size": 20, - "transformer_name": null, - "use_ELMo": false, - "features_map_to_index": { - "9": { - "LINEEND": 1, - "LINEIN": 2, - "LINESTART": 3 - }, - "10": { - "ALLCAPS": 13, - "INITCAP": 14, - "NOCAPS": 15 - }, - "11": { - "ALLDIGIT": 25, - "CONTAINDIGIT": 26, - "NODIGIT": 27 - }, - "12": { - "0": 37, - "1": 38 - }, - "13": { - "0": 49, - "1": 50 - }, - "14": { - "0": 61, - "1": 62 - }, - "15": { - "0": 73 - }, - "16": { - "0": 85, - "1": 86 - }, - "17": { - "0": 97, - "1": 98 - }, - "18": { - "COMMA": 109, - "DOT": 110, - "ENDBRACKET": 111, - "HYPHEN": 112, - "NOPUNCT": 113, - "OPENBRACKET": 114, - "PUNCT": 115, - "QUOTE": 116 - } - } -} \ No newline at end of file diff --git a/grobid-home/models/affiliation-address-BidLSTM_CRF_FEATURES/model_weights.hdf5 b/grobid-home/models/affiliation-address-BidLSTM_CRF_FEATURES/model_weights.hdf5 deleted file mode 100644 index ff95969d84..0000000000 Binary files a/grobid-home/models/affiliation-address-BidLSTM_CRF_FEATURES/model_weights.hdf5 and /dev/null differ diff --git a/grobid-home/models/affiliation-address-BidLSTM_CRF_FEATURES/preprocessor.json b/grobid-home/models/affiliation-address-BidLSTM_CRF_FEATURES/preprocessor.json deleted file mode 100644 index 12130d2145..0000000000 --- a/grobid-home/models/affiliation-address-BidLSTM_CRF_FEATURES/preprocessor.json +++ /dev/null @@ -1,287 +0,0 @@ -{ - "padding": true, - "return_lengths": true, - "return_word_embeddings": true, - "return_casing": false, - "return_features": true, - "return_chars": true, - "return_bert_embeddings": false, - "vocab_char": { - "<PAD>": 0, - "<UNK>": 1, - "!": 2, - "\"": 3, - "#": 4, - "$": 5, - "&": 6, - "'": 7, - "(": 8, - ")": 9, - "*": 10, - "+": 11, - ",": 12, - "-": 13, - ".": 14, - "/": 15, - "0": 16, - "1": 17, - "2": 18, - "3": 19, - "4": 20, - "5": 21, - "6": 22, - "7": 23, - "8": 24, - "9": 25, - ":": 26, - ";": 27, - ">": 28, - "@": 29, - "A": 30, - "B": 31, - "C": 32, - "D": 33, - "E": 34, - "F": 35, - "G": 36, - "H": 37, - "I": 38, - "J": 39, - "K": 40, - "L": 41, - "M": 42, - "N": 43, - "O": 44, - "P": 45, - "Q": 46, - "R": 47, - "S": 48, - "T": 49, - "U": 50, - "V": 51, - "W": 52, - "X": 53, - "Y": 54, - "Z": 55, - "[": 56, - "\\": 57, - "]": 58, - "^": 59, - "a": 60, - "b": 61, - "c": 62, - "d": 63, - "e": 64, - "f": 65, - "g": 66, - "h": 67, - "i": 68, - "j": 69, - "k": 70, - "l": 71, - "m": 72, - "n": 73, - "o": 74, - "p": 75, - "q": 76, - "r": 77, - "s": 78, - "t": 79, - "u": 80, - "v": 81, - "w": 82, - "x": 83, - "y": 84, - "z": 85, - "~": 86, - "\u007f": 87, - "\u0080": 88, - "\u009f": 89, - "\u00a3": 90, - "\u00a7": 91, - "\u00a8": 92, - "\u00aa": 93, - "\u00b3": 94, - "\u00b4": 95, - "\u00b6": 96, - "\u00bb": 97, - "\u00c2": 98, - "\u00c3": 99, - "\u00c5": 100, - "\u00c8": 101, - "\u00c9": 102, - "\u00cc": 103, - "\u00d5": 104, - "\u00d6": 105, - "\u00dc": 106, - "\u00df": 107, - "\u00e0": 108, - "\u00e1": 109, - "\u00e2": 110, - "\u00e3": 111, - "\u00e4": 112, - "\u00e5": 113, - "\u00e7": 114, - "\u00e8": 115, - "\u00e9": 116, - "\u00ea": 117, - "\u00ec": 118, - "\u00ed": 119, - "\u00ee": 120, - "\u00ef": 121, - "\u00f1": 122, - "\u00f2": 123, - "\u00f3": 124, - "\u00f4": 125, - "\u00f6": 126, - "\u00f8": 127, - "\u00fb": 128, - "\u00fc": 129, - "\u010d": 130, - "\u0119": 131, - "\u0131": 132, - "\u0141": 133, - "\u0142": 134, - "\u0144": 135, - "\u0158": 136, - "\u0160": 137, - "\u0161": 138, - "\u017a": 139, - "\u017c": 140, - "\u017e": 141, - "\u0288": 142, - "\u02d8": 143, - "\u02db": 144, - "\u2013": 145, - "\u2019": 146, - "\u2020": 147, - "\u2021": 148, - "\u22a5": 149, - "\u2424": 150, - "\uff0d": 151 - }, - "vocab_tag": { - "<PAD>": 0, - "B-<addrLine>": 1, - "B-<country>": 2, - "B-<department>": 3, - "B-<institution>": 4, - "B-<laboratory>": 5, - "B-<marker>": 6, - "B-<postBox>": 7, - "B-<postCode>": 8, - "B-<region>": 9, - "B-<settlement>": 10, - "I-<addrLine>": 11, - "I-<country>": 12, - "I-<department>": 13, - "I-<institution>": 14, - "I-<laboratory>": 15, - "I-<marker>": 16, - "I-<postBox>": 17, - "I-<postCode>": 18, - "I-<region>": 19, - "I-<settlement>": 20, - "O": 21 - }, - "vocab_case": [ - "<PAD>", - "numeric", - "allLower", - "allUpper", - "initialUpper", - "other", - "mainly_numeric", - "contains_digit" - ], - "max_char_length": 30, - "feature_preprocessor": { - "features_vocabulary_size": 12, - "features_indices": [ - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18 - ], - "features_map_to_index": { - "9": { - "LINEEND": 1, - "LINEIN": 2, - "LINESTART": 3 - }, - "10": { - "ALLCAPS": 13, - "INITCAP": 14, - "NOCAPS": 15 - }, - "11": { - "ALLDIGIT": 25, - "CONTAINDIGIT": 26, - "NODIGIT": 27 - }, - "12": { - "0": 37, - "1": 38 - }, - "13": { - "0": 49, - "1": 50 - }, - "14": { - "0": 61, - "1": 62 - }, - "15": { - "0": 73 - }, - "16": { - "0": 85, - "1": 86 - }, - "17": { - "0": 97, - "1": 98 - }, - "18": { - "COMMA": 109, - "DOT": 110, - "ENDBRACKET": 111, - "HYPHEN": 112, - "NOPUNCT": 113, - "OPENBRACKET": 114, - "PUNCT": 115, - "QUOTE": 116 - } - } - }, - "indice_tag": { - "0": "<PAD>", - "1": "B-<addrLine>", - "2": "B-<country>", - "3": "B-<department>", - "4": "B-<institution>", - "5": "B-<laboratory>", - "6": "B-<marker>", - "7": "B-<postBox>", - "8": "B-<postCode>", - "9": "B-<region>", - "10": "B-<settlement>", - "11": "I-<addrLine>", - "12": "I-<country>", - "13": "I-<department>", - "14": "I-<institution>", - "15": "I-<laboratory>", - "16": "I-<marker>", - "17": "I-<postBox>", - "18": "I-<postCode>", - "19": "I-<region>", - "20": "I-<settlement>", - "21": "O" - } -} \ No newline at end of file diff --git a/grobid-home/models/affiliation-address-BidLSTM_ChainCRF-with_ELMo/config.json b/grobid-home/models/affiliation-address-BidLSTM_ChainCRF-with_ELMo/config.json deleted file mode 100644 index e3dd10b18e..0000000000 --- a/grobid-home/models/affiliation-address-BidLSTM_ChainCRF-with_ELMo/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "model_name": "affiliation-address-BidLSTM_ChainCRF-with_ELMo", - "architecture": "BidLSTM_ChainCRF", - "embeddings_name": "glove-840B", - "char_vocab_size": 149, - "case_vocab_size": 8, - "char_embedding_size": 25, - "num_char_lstm_units": 25, - "max_char_length": 30, - "features_vocabulary_size": 12, - "features_indices": null, - "features_embedding_size": 4, - "features_lstm_units": 4, - "max_sequence_length": 600, - "word_embedding_size": 1324, - "num_word_lstm_units": 100, - "case_embedding_size": 5, - "dropout": 0.5, - "recurrent_dropout": 0.5, - "use_crf": true, - "use_chain_crf": true, - "fold_number": 1, - "batch_size": 20, - "transformer_name": null, - "use_ELMo": true -} \ No newline at end of file diff --git a/grobid-home/models/affiliation-address-BidLSTM_ChainCRF-with_ELMo/model_weights.hdf5 b/grobid-home/models/affiliation-address-BidLSTM_ChainCRF-with_ELMo/model_weights.hdf5 deleted file mode 100644 index dfb3812156..0000000000 Binary files a/grobid-home/models/affiliation-address-BidLSTM_ChainCRF-with_ELMo/model_weights.hdf5 and /dev/null differ diff --git a/grobid-home/models/affiliation-address-BidLSTM_ChainCRF-with_ELMo/preprocessor.json b/grobid-home/models/affiliation-address-BidLSTM_ChainCRF-with_ELMo/preprocessor.json deleted file mode 100644 index 23b55610df..0000000000 --- a/grobid-home/models/affiliation-address-BidLSTM_ChainCRF-with_ELMo/preprocessor.json +++ /dev/null @@ -1,220 +0,0 @@ -{ - "padding": true, - "return_lengths": true, - "return_word_embeddings": true, - "return_casing": false, - "return_features": false, - "return_chars": true, - "return_bert_embeddings": false, - "vocab_char": { - "<PAD>": 0, - "<UNK>": 1, - "!": 2, - "\"": 3, - "#": 4, - "$": 5, - "&": 6, - "'": 7, - "(": 8, - ")": 9, - "*": 10, - "+": 11, - ",": 12, - "-": 13, - ".": 14, - "/": 15, - "0": 16, - "1": 17, - "2": 18, - "3": 19, - "4": 20, - "5": 21, - "6": 22, - "7": 23, - "8": 24, - "9": 25, - ":": 26, - ";": 27, - ">": 28, - "@": 29, - "A": 30, - "B": 31, - "C": 32, - "D": 33, - "E": 34, - "F": 35, - "G": 36, - "H": 37, - "I": 38, - "J": 39, - "K": 40, - "L": 41, - "M": 42, - "N": 43, - "O": 44, - "P": 45, - "Q": 46, - "R": 47, - "S": 48, - "T": 49, - "U": 50, - "V": 51, - "W": 52, - "X": 53, - "Y": 54, - "Z": 55, - "[": 56, - "\\": 57, - "]": 58, - "^": 59, - "_": 60, - "a": 61, - "b": 62, - "c": 63, - "d": 64, - "e": 65, - "f": 66, - "g": 67, - "h": 68, - "i": 69, - "j": 70, - "k": 71, - "l": 72, - "m": 73, - "n": 74, - "o": 75, - "p": 76, - "q": 77, - "r": 78, - "s": 79, - "t": 80, - "u": 81, - "v": 82, - "w": 83, - "x": 84, - "y": 85, - "z": 86, - "~": 87, - "\u007f": 88, - "\u0080": 89, - "\u009f": 90, - "\u00a3": 91, - "\u00a7": 92, - "\u00a8": 93, - "\u00aa": 94, - "\u00b3": 95, - "\u00b4": 96, - "\u00bb": 97, - "\u00c2": 98, - "\u00c3": 99, - "\u00c5": 100, - "\u00c8": 101, - "\u00c9": 102, - "\u00cc": 103, - "\u00d6": 104, - "\u00dc": 105, - "\u00df": 106, - "\u00e0": 107, - "\u00e1": 108, - "\u00e2": 109, - "\u00e3": 110, - "\u00e4": 111, - "\u00e5": 112, - "\u00e7": 113, - "\u00e8": 114, - "\u00e9": 115, - "\u00ea": 116, - "\u00ec": 117, - "\u00ed": 118, - "\u00ee": 119, - "\u00ef": 120, - "\u00f1": 121, - "\u00f2": 122, - "\u00f3": 123, - "\u00f4": 124, - "\u00f6": 125, - "\u00f8": 126, - "\u00fb": 127, - "\u00fc": 128, - "\u010d": 129, - "\u0131": 130, - "\u0141": 131, - "\u0142": 132, - "\u0144": 133, - "\u0158": 134, - "\u0160": 135, - "\u0161": 136, - "\u017a": 137, - "\u017c": 138, - "\u017e": 139, - "\u02d8": 140, - "\u02db": 141, - "\u2013": 142, - "\u2019": 143, - "\u2020": 144, - "\u2021": 145, - "\u22a5": 146, - "\u2424": 147, - "\uff0d": 148 - }, - "vocab_tag": { - "<PAD>": 0, - "B-<addrLine>": 1, - "B-<country>": 2, - "B-<department>": 3, - "B-<institution>": 4, - "B-<laboratory>": 5, - "B-<marker>": 6, - "B-<postBox>": 7, - "B-<postCode>": 8, - "B-<region>": 9, - "B-<settlement>": 10, - "I-<addrLine>": 11, - "I-<country>": 12, - "I-<department>": 13, - "I-<institution>": 14, - "I-<laboratory>": 15, - "I-<marker>": 16, - "I-<postBox>": 17, - "I-<postCode>": 18, - "I-<region>": 19, - "I-<settlement>": 20, - "O": 21 - }, - "vocab_case": [ - "<PAD>", - "numeric", - "allLower", - "allUpper", - "initialUpper", - "other", - "mainly_numeric", - "contains_digit" - ], - "max_char_length": 30, - "feature_preprocessor": null, - "indice_tag": { - "0": "<PAD>", - "1": "B-<addrLine>", - "2": "B-<country>", - "3": "B-<department>", - "4": "B-<institution>", - "5": "B-<laboratory>", - "6": "B-<marker>", - "7": "B-<postBox>", - "8": "B-<postCode>", - "9": "B-<region>", - "10": "B-<settlement>", - "11": "I-<addrLine>", - "12": "I-<country>", - "13": "I-<department>", - "14": "I-<institution>", - "15": "I-<laboratory>", - "16": "I-<marker>", - "17": "I-<postBox>", - "18": "I-<postCode>", - "19": "I-<region>", - "20": "I-<settlement>", - "21": "O" - } -} \ No newline at end of file diff --git a/grobid-home/models/citation-BidLSTM_CRF/config.json b/grobid-home/models/citation-BidLSTM_CRF/config.json deleted file mode 100644 index 44f5cd2eac..0000000000 --- a/grobid-home/models/citation-BidLSTM_CRF/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "model_name": "citation-BidLSTM_CRF", - "architecture": "BidLSTM_CRF", - "embeddings_name": "glove-840B", - "char_vocab_size": 213, - "case_vocab_size": 8, - "char_embedding_size": 25, - "num_char_lstm_units": 25, - "max_char_length": 30, - "features_vocabulary_size": 12, - "features_indices": null, - "features_embedding_size": 4, - "features_lstm_units": 4, - "max_sequence_length": 600, - "word_embedding_size": 300, - "num_word_lstm_units": 100, - "case_embedding_size": 5, - "dropout": 0.5, - "recurrent_dropout": 0.5, - "use_crf": true, - "use_chain_crf": false, - "fold_number": 1, - "batch_size": 20, - "transformer_name": null, - "use_ELMo": false -} \ No newline at end of file diff --git a/grobid-home/models/citation-BidLSTM_CRF/model_weights.hdf5 b/grobid-home/models/citation-BidLSTM_CRF/model_weights.hdf5 deleted file mode 100644 index 489a65d8cc..0000000000 Binary files a/grobid-home/models/citation-BidLSTM_CRF/model_weights.hdf5 and /dev/null differ diff --git a/grobid-home/models/citation-BidLSTM_CRF/preprocessor.json b/grobid-home/models/citation-BidLSTM_CRF/preprocessor.json deleted file mode 100644 index 2f30245a81..0000000000 --- a/grobid-home/models/citation-BidLSTM_CRF/preprocessor.json +++ /dev/null @@ -1,316 +0,0 @@ -{ - "padding": true, - "return_lengths": true, - "return_word_embeddings": true, - "return_casing": false, - "return_features": false, - "return_chars": true, - "return_bert_embeddings": false, - "vocab_char": { - "<PAD>": 0, - "<UNK>": 1, - "!": 2, - "\"": 3, - "#": 4, - "$": 5, - "%": 6, - "&": 7, - "'": 8, - "(": 9, - ")": 10, - "*": 11, - "+": 12, - ",": 13, - "-": 14, - ".": 15, - "/": 16, - "0": 17, - "1": 18, - "2": 19, - "3": 20, - "4": 21, - "5": 22, - "6": 23, - "7": 24, - "8": 25, - "9": 26, - ":": 27, - ";": 28, - "=": 29, - ">": 30, - "?": 31, - "@": 32, - "A": 33, - "B": 34, - "C": 35, - "D": 36, - "E": 37, - "F": 38, - "G": 39, - "H": 40, - "I": 41, - "J": 42, - "K": 43, - "L": 44, - "M": 45, - "N": 46, - "O": 47, - "P": 48, - "Q": 49, - "R": 50, - "S": 51, - "T": 52, - "U": 53, - "V": 54, - "W": 55, - "X": 56, - "Y": 57, - "Z": 58, - "[": 59, - "]": 60, - "^": 61, - "_": 62, - "`": 63, - "a": 64, - "b": 65, - "c": 66, - "d": 67, - "e": 68, - "f": 69, - "g": 70, - "h": 71, - "i": 72, - "j": 73, - "k": 74, - "l": 75, - "m": 76, - "n": 77, - "o": 78, - "p": 79, - "q": 80, - "r": 81, - "s": 82, - "t": 83, - "u": 84, - "v": 85, - "w": 86, - "x": 87, - "y": 88, - "z": 89, - "|": 90, - "~": 91, - "\u0084": 92, - "\u0086": 93, - "\u0092": 94, - "\u0096": 95, - "\u00a1": 96, - "\u00a2": 97, - "\u00a4": 98, - "\u00a7": 99, - "\u00a8": 100, - "\u00a9": 101, - "\u00ab": 102, - "\u00ad": 103, - "\u00af": 104, - "\u00b0": 105, - "\u00b1": 106, - "\u00b2": 107, - "\u00b3": 108, - "\u00b4": 109, - "\u00b5": 110, - "\u00b8": 111, - "\u00ba": 112, - "\u00bb": 113, - "\u00bc": 114, - "\u00bf": 115, - "\u00c0": 116, - "\u00c3": 117, - "\u00c5": 118, - "\u00c8": 119, - "\u00c9": 120, - "\u00ca": 121, - "\u00ce": 122, - "\u00cf": 123, - "\u00d4": 124, - "\u00d6": 125, - "\u00d7": 126, - "\u00d8": 127, - "\u00dc": 128, - "\u00df": 129, - "\u00e0": 130, - "\u00e1": 131, - "\u00e2": 132, - "\u00e3": 133, - "\u00e4": 134, - "\u00e7": 135, - "\u00e8": 136, - "\u00e9": 137, - "\u00ea": 138, - "\u00ec": 139, - "\u00ed": 140, - "\u00ee": 141, - "\u00ef": 142, - "\u00f1": 143, - "\u00f2": 144, - "\u00f3": 145, - "\u00f4": 146, - "\u00f6": 147, - "\u00f8": 148, - "\u00fa": 149, - "\u00fc": 150, - "\u00fd": 151, - "\u0102": 152, - "\u0107": 153, - "\u010c": 154, - "\u010d": 155, - "\u011f": 156, - "\u0130": 157, - "\u0131": 158, - "\u0132": 159, - "\u013e": 160, - "\u0142": 161, - "\u0144": 162, - "\u0161": 163, - "\u0179": 164, - "\u017d": 165, - "\u02c6": 166, - "\u02c7": 167, - "\u039a": 168, - "\u039b": 169, - "\u039e": 170, - "\u03a0": 171, - "\u03b1": 172, - "\u03b2": 173, - "\u03b3": 174, - "\u03b5": 175, - "\u03b7": 176, - "\u03b8": 177, - "\u03b9": 178, - "\u03bb": 179, - "\u03bd": 180, - "\u03be": 181, - "\u03bf": 182, - "\u03c0": 183, - "\u03c1": 184, - "\u03c2": 185, - "\u03c3": 186, - "\u03c4": 187, - "\u03c6": 188, - "\u03e9": 189, - "\u1390": 190, - "\u1f04": 191, - "\u1f14": 192, - "\u1f73": 193, - "\u1f79": 194, - "\u2018": 195, - "\u2019": 196, - "\u201c": 197, - "\u201d": 198, - "\u2022": 199, - "\u2192": 200, - "\u2202": 201, - "\u2206": 202, - "\u2212": 203, - "\u221a": 204, - "\u221d": 205, - "\u221e": 206, - "\u223c": 207, - "\u2245": 208, - "\uf061": 209, - "\ufb00": 210, - "\ufb01": 211, - "\ufb02": 212 - }, - "vocab_tag": { - "<PAD>": 0, - "B-<author>": 1, - "B-<booktitle>": 2, - "B-<collaboration>": 3, - "B-<date>": 4, - "B-<editor>": 5, - "B-<institution>": 6, - "B-<issue>": 7, - "B-<journal>": 8, - "B-<location>": 9, - "B-<note>": 10, - "B-<pages>": 11, - "B-<publisher>": 12, - "B-<pubnum>": 13, - "B-<series>": 14, - "B-<tech>": 15, - "B-<title>": 16, - "B-<volume>": 17, - "B-<web>": 18, - "I-<author>": 19, - "I-<booktitle>": 20, - "I-<collaboration>": 21, - "I-<date>": 22, - "I-<editor>": 23, - "I-<institution>": 24, - "I-<issue>": 25, - "I-<journal>": 26, - "I-<location>": 27, - "I-<note>": 28, - "I-<pages>": 29, - "I-<publisher>": 30, - "I-<pubnum>": 31, - "I-<series>": 32, - "I-<tech>": 33, - "I-<title>": 34, - "I-<volume>": 35, - "I-<web>": 36, - "O": 37 - }, - "vocab_case": [ - "<PAD>", - "numeric", - "allLower", - "allUpper", - "initialUpper", - "other", - "mainly_numeric", - "contains_digit" - ], - "max_char_length": 30, - "feature_preprocessor": null, - "indice_tag": { - "0": "<PAD>", - "1": "B-<author>", - "2": "B-<booktitle>", - "3": "B-<collaboration>", - "4": "B-<date>", - "5": "B-<editor>", - "6": "B-<institution>", - "7": "B-<issue>", - "8": "B-<journal>", - "9": "B-<location>", - "10": "B-<note>", - "11": "B-<pages>", - "12": "B-<publisher>", - "13": "B-<pubnum>", - "14": "B-<series>", - "15": "B-<tech>", - "16": "B-<title>", - "17": "B-<volume>", - "18": "B-<web>", - "19": "I-<author>", - "20": "I-<booktitle>", - "21": "I-<collaboration>", - "22": "I-<date>", - "23": "I-<editor>", - "24": "I-<institution>", - "25": "I-<issue>", - "26": "I-<journal>", - "27": "I-<location>", - "28": "I-<note>", - "29": "I-<pages>", - "30": "I-<publisher>", - "31": "I-<pubnum>", - "32": "I-<series>", - "33": "I-<tech>", - "34": "I-<title>", - "35": "I-<volume>", - "36": "I-<web>", - "37": "O" - } -} \ No newline at end of file diff --git a/grobid-home/models/citation-BidLSTM_CRF_FEATURES-with_ELMo/config.json b/grobid-home/models/citation-BidLSTM_CRF_FEATURES-with_ELMo/config.json deleted file mode 100644 index 0bd3fbf417..0000000000 --- a/grobid-home/models/citation-BidLSTM_CRF_FEATURES-with_ELMo/config.json +++ /dev/null @@ -1,143 +0,0 @@ -{ - "model_name": "citation-BidLSTM_CRF_FEATURES-with_ELMo", - "architecture": "BidLSTM_CRF_FEATURES", - "embeddings_name": "glove-840B", - "char_vocab_size": 261, - "case_vocab_size": 8, - "char_embedding_size": 25, - "num_char_lstm_units": 25, - "max_char_length": 30, - "features_vocabulary_size": 12, - "features_indices": [ - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20, - 21, - 22, - 23, - 24, - 25, - 26, - 27 - ], - "features_embedding_size": 4, - "features_lstm_units": 4, - "max_sequence_length": 500, - "word_embedding_size": 1324, - "num_word_lstm_units": 100, - "case_embedding_size": 5, - "dropout": 0.5, - "recurrent_dropout": 0.5, - "use_crf": true, - "use_chain_crf": false, - "fold_number": 1, - "batch_size": 30, - "transformer_name": null, - "use_ELMo": true, - "features_map_to_index": { - "9": { - "LINEEND": 1, - "LINEIN": 2, - "LINESTART": 3 - }, - "10": { - "ALLCAP": 13, - "INITCAP": 14, - "NOCAPS": 15 - }, - "11": { - "ALLDIGIT": 25, - "CONTAINSDIGITS": 26, - "NODIGIT": 27 - }, - "12": { - "0": 37, - "1": 38 - }, - "13": { - "0": 49, - "1": 50 - }, - "14": { - "0": 61, - "1": 62 - }, - "15": { - "0": 73, - "1": 74 - }, - "16": { - "0": 85, - "1": 86 - }, - "17": { - "0": 97, - "1": 98 - }, - "18": { - "0": 109, - "1": 110 - }, - "19": { - "0": 121, - "1": 122 - }, - "20": { - "0": 133, - "1": 134 - }, - "21": { - "0": 145, - "1": 146 - }, - "22": { - "0": 157, - "1": 158 - }, - "23": { - "0": 169, - "1": 170 - }, - "24": { - "0": 181, - "1": 182 - }, - "25": { - "0": 193, - "1": 194 - }, - "26": { - "COMMA": 205, - "DOT": 206, - "ENDBRACKET": 207, - "HYPHEN": 208, - "NOPUNCT": 209, - "OPENBRACKET": 210, - "PUNCT": 211, - "QUOTE": 212 - }, - "27": { - "0": 217, - "1": 218, - "10": 219, - "11": 220, - "2": 221, - "3": 222, - "4": 223, - "5": 224, - "6": 225, - "7": 226, - "8": 227, - "9": 228 - } - } -} \ No newline at end of file diff --git a/grobid-home/models/citation-BidLSTM_CRF_FEATURES-with_ELMo/model_weights.hdf5 b/grobid-home/models/citation-BidLSTM_CRF_FEATURES-with_ELMo/model_weights.hdf5 deleted file mode 100644 index 6a5c015a36..0000000000 Binary files a/grobid-home/models/citation-BidLSTM_CRF_FEATURES-with_ELMo/model_weights.hdf5 and /dev/null differ diff --git a/grobid-home/models/citation-BidLSTM_CRF_FEATURES-with_ELMo/preprocessor.json b/grobid-home/models/citation-BidLSTM_CRF_FEATURES-with_ELMo/preprocessor.json deleted file mode 100644 index fdd872ddda..0000000000 --- a/grobid-home/models/citation-BidLSTM_CRF_FEATURES-with_ELMo/preprocessor.json +++ /dev/null @@ -1,484 +0,0 @@ -{ - "padding": true, - "return_lengths": true, - "return_word_embeddings": true, - "return_casing": false, - "return_features": true, - "return_chars": true, - "return_bert_embeddings": false, - "vocab_char": { - "<PAD>": 0, - "<UNK>": 1, - "!": 2, - "\"": 3, - "#": 4, - "$": 5, - "%": 6, - "&": 7, - "'": 8, - "(": 9, - ")": 10, - "*": 11, - "+": 12, - ",": 13, - "-": 14, - ".": 15, - "/": 16, - "0": 17, - "1": 18, - "2": 19, - "3": 20, - "4": 21, - "5": 22, - "6": 23, - "7": 24, - "8": 25, - "9": 26, - ":": 27, - ";": 28, - "<": 29, - "=": 30, - ">": 31, - "?": 32, - "@": 33, - "A": 34, - "B": 35, - "C": 36, - "D": 37, - "E": 38, - "F": 39, - "G": 40, - "H": 41, - "I": 42, - "J": 43, - "K": 44, - "L": 45, - "M": 46, - "N": 47, - "O": 48, - "P": 49, - "Q": 50, - "R": 51, - "S": 52, - "T": 53, - "U": 54, - "V": 55, - "W": 56, - "X": 57, - "Y": 58, - "Z": 59, - "[": 60, - "\\": 61, - "]": 62, - "^": 63, - "_": 64, - "`": 65, - "a": 66, - "b": 67, - "c": 68, - "d": 69, - "e": 70, - "f": 71, - "g": 72, - "h": 73, - "i": 74, - "j": 75, - "k": 76, - "l": 77, - "m": 78, - "n": 79, - "o": 80, - "p": 81, - "q": 82, - "r": 83, - "s": 84, - "t": 85, - "u": 86, - "v": 87, - "w": 88, - "x": 89, - "y": 90, - "z": 91, - "|": 92, - "~": 93, - "\u0084": 94, - "\u0086": 95, - "\u0092": 96, - "\u0096": 97, - "\u00a1": 98, - "\u00a2": 99, - "\u00a4": 100, - "\u00a7": 101, - "\u00a8": 102, - "\u00a9": 103, - "\u00ab": 104, - "\u00ad": 105, - "\u00af": 106, - "\u00b0": 107, - "\u00b1": 108, - "\u00b2": 109, - "\u00b3": 110, - "\u00b4": 111, - "\u00b5": 112, - "\u00b8": 113, - "\u00ba": 114, - "\u00bb": 115, - "\u00bc": 116, - "\u00bf": 117, - "\u00c0": 118, - "\u00c1": 119, - "\u00c3": 120, - "\u00c5": 121, - "\u00c8": 122, - "\u00c9": 123, - "\u00ca": 124, - "\u00ce": 125, - "\u00cf": 126, - "\u00d4": 127, - "\u00d6": 128, - "\u00d7": 129, - "\u00d8": 130, - "\u00dc": 131, - "\u00df": 132, - "\u00e0": 133, - "\u00e1": 134, - "\u00e2": 135, - "\u00e3": 136, - "\u00e4": 137, - "\u00e7": 138, - "\u00e8": 139, - "\u00e9": 140, - "\u00ea": 141, - "\u00ec": 142, - "\u00ed": 143, - "\u00ee": 144, - "\u00ef": 145, - "\u00f1": 146, - "\u00f2": 147, - "\u00f3": 148, - "\u00f4": 149, - "\u00f5": 150, - "\u00f6": 151, - "\u00f8": 152, - "\u00fa": 153, - "\u00fc": 154, - "\u00fd": 155, - "\u0102": 156, - "\u0107": 157, - "\u010c": 158, - "\u010d": 159, - "\u0119": 160, - "\u011f": 161, - "\u0130": 162, - "\u0131": 163, - "\u0132": 164, - "\u013e": 165, - "\u0142": 166, - "\u0144": 167, - "\u015b": 168, - "\u0161": 169, - "\u0179": 170, - "\u017d": 171, - "\u017e": 172, - "\u02c6": 173, - "\u02c7": 174, - "\u039a": 175, - "\u039b": 176, - "\u039e": 177, - "\u03a0": 178, - "\u03b1": 179, - "\u03b2": 180, - "\u03b3": 181, - "\u03b5": 182, - "\u03b7": 183, - "\u03b8": 184, - "\u03b9": 185, - "\u03bb": 186, - "\u03bd": 187, - "\u03be": 188, - "\u03bf": 189, - "\u03c0": 190, - "\u03c1": 191, - "\u03c2": 192, - "\u03c3": 193, - "\u03c4": 194, - "\u03c6": 195, - "\u03e9": 196, - "\u0410": 197, - "\u0411": 198, - "\u0412": 199, - "\u0418": 200, - "\u041a": 201, - "\u041b": 202, - "\u041c": 203, - "\u041d": 204, - "\u041f": 205, - "\u0420": 206, - "\u0421": 207, - "\u0427": 208, - "\u042f": 209, - "\u0430": 210, - "\u0431": 211, - "\u0432": 212, - "\u0433": 213, - "\u0434": 214, - "\u0435": 215, - "\u0438": 216, - "\u0439": 217, - "\u043a": 218, - "\u043b": 219, - "\u043c": 220, - "\u043d": 221, - "\u043e": 222, - "\u043f": 223, - "\u0440": 224, - "\u0441": 225, - "\u0442": 226, - "\u0443": 227, - "\u0445": 228, - "\u0446": 229, - "\u0447": 230, - "\u044a": 231, - "\u044b": 232, - "\u044e": 233, - "\u1390": 234, - "\u1f04": 235, - "\u1f14": 236, - "\u1f73": 237, - "\u1f79": 238, - "\u2018": 239, - "\u2019": 240, - "\u201a": 241, - "\u201c": 242, - "\u201d": 243, - "\u201e": 244, - "\u2022": 245, - "\u2026": 246, - "\u2192": 247, - "\u2202": 248, - "\u2206": 249, - "\u2212": 250, - "\u221a": 251, - "\u221d": 252, - "\u221e": 253, - "\u223c": 254, - "\u2245": 255, - "\uf061": 256, - "\ufb00": 257, - "\ufb01": 258, - "\ufb02": 259, - "\ufffd": 260 - }, - "vocab_tag": { - "<PAD>": 0, - "B-<author>": 1, - "B-<booktitle>": 2, - "B-<collaboration>": 3, - "B-<date>": 4, - "B-<editor>": 5, - "B-<institution>": 6, - "B-<issue>": 7, - "B-<journal>": 8, - "B-<location>": 9, - "B-<note>": 10, - "B-<pages>": 11, - "B-<publisher>": 12, - "B-<pubnum>": 13, - "B-<series>": 14, - "B-<tech>": 15, - "B-<title>": 16, - "B-<volume>": 17, - "B-<web>": 18, - "I-<author>": 19, - "I-<booktitle>": 20, - "I-<collaboration>": 21, - "I-<date>": 22, - "I-<editor>": 23, - "I-<institution>": 24, - "I-<issue>": 25, - "I-<journal>": 26, - "I-<location>": 27, - "I-<note>": 28, - "I-<pages>": 29, - "I-<publisher>": 30, - "I-<pubnum>": 31, - "I-<series>": 32, - "I-<tech>": 33, - "I-<title>": 34, - "I-<volume>": 35, - "I-<web>": 36, - "O": 37 - }, - "vocab_case": [ - "<PAD>", - "numeric", - "allLower", - "allUpper", - "initialUpper", - "other", - "mainly_numeric", - "contains_digit" - ], - "max_char_length": 30, - "feature_preprocessor": { - "features_vocabulary_size": 12, - "features_indices": [ - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20, - 21, - 22, - 23, - 24, - 25, - 26, - 27 - ], - "features_map_to_index": { - "9": { - "LINEEND": 1, - "LINEIN": 2, - "LINESTART": 3 - }, - "10": { - "ALLCAP": 13, - "INITCAP": 14, - "NOCAPS": 15 - }, - "11": { - "ALLDIGIT": 25, - "CONTAINSDIGITS": 26, - "NODIGIT": 27 - }, - "12": { - "0": 37, - "1": 38 - }, - "13": { - "0": 49, - "1": 50 - }, - "14": { - "0": 61, - "1": 62 - }, - "15": { - "0": 73, - "1": 74 - }, - "16": { - "0": 85, - "1": 86 - }, - "17": { - "0": 97, - "1": 98 - }, - "18": { - "0": 109, - "1": 110 - }, - "19": { - "0": 121, - "1": 122 - }, - "20": { - "0": 133, - "1": 134 - }, - "21": { - "0": 145, - "1": 146 - }, - "22": { - "0": 157, - "1": 158 - }, - "23": { - "0": 169, - "1": 170 - }, - "24": { - "0": 181, - "1": 182 - }, - "25": { - "0": 193, - "1": 194 - }, - "26": { - "COMMA": 205, - "DOT": 206, - "ENDBRACKET": 207, - "HYPHEN": 208, - "NOPUNCT": 209, - "OPENBRACKET": 210, - "PUNCT": 211, - "QUOTE": 212 - }, - "27": { - "0": 217, - "1": 218, - "10": 219, - "11": 220, - "2": 221, - "3": 222, - "4": 223, - "5": 224, - "6": 225, - "7": 226, - "8": 227, - "9": 228 - } - } - }, - "indice_tag": { - "0": "<PAD>", - "1": "B-<author>", - "2": "B-<booktitle>", - "3": "B-<collaboration>", - "4": "B-<date>", - "5": "B-<editor>", - "6": "B-<institution>", - "7": "B-<issue>", - "8": "B-<journal>", - "9": "B-<location>", - "10": "B-<note>", - "11": "B-<pages>", - "12": "B-<publisher>", - "13": "B-<pubnum>", - "14": "B-<series>", - "15": "B-<tech>", - "16": "B-<title>", - "17": "B-<volume>", - "18": "B-<web>", - "19": "I-<author>", - "20": "I-<booktitle>", - "21": "I-<collaboration>", - "22": "I-<date>", - "23": "I-<editor>", - "24": "I-<institution>", - "25": "I-<issue>", - "26": "I-<journal>", - "27": "I-<location>", - "28": "I-<note>", - "29": "I-<pages>", - "30": "I-<publisher>", - "31": "I-<pubnum>", - "32": "I-<series>", - "33": "I-<tech>", - "34": "I-<title>", - "35": "I-<volume>", - "36": "I-<web>", - "37": "O" - } -} \ No newline at end of file diff --git a/grobid-home/models/citation-BidLSTM_CRF_FEATURES.onnx/config.json b/grobid-home/models/citation-BidLSTM_CRF_FEATURES.onnx/config.json new file mode 100644 index 0000000000..3fc13bc52e --- /dev/null +++ b/grobid-home/models/citation-BidLSTM_CRF_FEATURES.onnx/config.json @@ -0,0 +1,34 @@ +{ + "modelName": "grobid-citation-BidLSTM_CRF_FEATURES", + "architecture": "BidLSTM_CRF_FEATURES", + "wordEmbeddingSize": 300, + "charEmbeddingSize": 25, + "numCharLstmUnits": 25, + "numWordLstmUnits": 100, + "maxSequenceLength": 500, + "embeddingsName": "glove-840B", + "featuresIndices": [ + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27 + ], + "featuresEmbeddingSize": 4, + "featuresLstmUnits": 4, + "featuresVocabularySize": 12 +} \ No newline at end of file diff --git a/grobid-home/models/citation-BidLSTM_CRF_FEATURES.onnx/crf_params.json b/grobid-home/models/citation-BidLSTM_CRF_FEATURES.onnx/crf_params.json new file mode 100644 index 0000000000..f6096c7485 --- /dev/null +++ b/grobid-home/models/citation-BidLSTM_CRF_FEATURES.onnx/crf_params.json @@ -0,0 +1,1604 @@ +{ + "transitions": [ + [ + 0.45657917857170105, + -1.7254638671875, + -1.2988920211791992, + -1.2670811414718628, + -1.3224672079086304, + -1.6413387060165405, + -0.6429982781410217, + -1.8774728775024414, + -2.0762295722961426, + -0.3108195662498474, + -2.718554973602295, + -2.6384592056274414, + -1.6607553958892822, + -2.5534017086029053, + -1.5009056329727173, + -1.6518653631210327, + -1.4670066833496094, + -1.992478370666504, + -1.0054503679275513, + -1.2628711462020874, + -2.236285924911499, + -2.4157798290252686, + -0.9515445232391357, + -2.3828511238098145, + -1.3146189451217651, + -0.9944998025894165, + -1.9127737283706665, + -0.6848518252372742, + -2.635063648223877, + -2.0959761142730713, + -2.1261208057403564, + -2.4760560989379883, + -2.2024943828582764, + -2.108215570449829, + -1.431313157081604, + -1.1686617136001587, + -0.7888093590736389, + -1.7697389125823975 + ], + [ + -1.2580524682998657, + -2.087117910385132, + -0.8514404296875, + -1.410083532333374, + -0.2971295416355133, + -1.4350194931030273, + -0.7131173610687256, + -0.3099457621574402, + -1.0468608140945435, + -1.0200761556625366, + -0.7354137897491455, + -0.358371376991272, + -0.9802907705307007, + -0.43225976824760437, + -0.48423823714256287, + -0.545546293258667, + -1.092613935470581, + -0.6921493411064148, + -0.37757691740989685, + 1.8750361204147339, + -2.0419390201568604, + -2.6600446701049805, + -1.4615267515182495, + -2.5791447162628174, + -2.088911533355713, + -0.505940318107605, + -1.974441647529602, + -1.7557897567749023, + -2.119426727294922, + -0.9901989698410034, + -2.1883292198181152, + -1.0693156719207764, + -1.8506922721862793, + -1.6066612005233765, + -2.0616815090179443, + -0.9530932307243347, + -0.9334427118301392, + -0.40199875831604004 + ], + [ + -1.934366226196289, + -0.8478443026542664, + -2.7167227268218994, + -1.2605518102645874, + -1.7743626832962036, + -1.1084529161453247, + -1.196256399154663, + -1.407170057296753, + -2.188110828399658, + -1.0903719663619995, + -1.9298160076141357, + -1.2780430316925049, + -1.5868059396743774, + -1.1573936939239502, + -1.8261778354644775, + -1.1890157461166382, + -2.5811338424682617, + -2.1948578357696533, + -0.8313423991203308, + -1.9925838708877563, + 1.9896939992904663, + -2.3598439693450928, + -1.693703293800354, + -2.214932680130005, + -2.5660834312438965, + -1.0463342666625977, + -3.4318675994873047, + -2.0260446071624756, + -3.229814052581787, + -1.5890235900878906, + -2.758009672164917, + -2.0226199626922607, + -3.6284215450286865, + -2.6248040199279785, + -3.5073587894439697, + -2.0123159885406494, + -1.1441214084625244, + -1.2662862539291382 + ], + [ + -1.3624783754348755, + -1.5604777336120605, + -1.0397937297821045, + -1.7255244255065918, + -0.7705797553062439, + -1.0564731359481812, + -0.7697165608406067, + -0.2315201759338379, + -0.964150607585907, + -0.6294353008270264, + -0.9524825811386108, + -0.2442367523908615, + -1.060999870300293, + -0.37707486748695374, + -0.629391610622406, + -0.5961847305297852, + -1.2903642654418945, + -0.671897828578949, + -0.2251533418893814, + -2.8058719635009766, + -2.8990633487701416, + 1.6627180576324463, + -0.9308065176010132, + -2.187796115875244, + -2.362675666809082, + -0.46138688921928406, + -2.2307472229003906, + -1.3009387254714966, + -1.954878330230713, + -0.5942599773406982, + -2.2300524711608887, + -0.9051530361175537, + -2.104140520095825, + -1.5806533098220825, + -2.840794563293457, + -0.7039247751235962, + -0.7560397386550903, + -0.31526991724967957 + ], + [ + -0.29777634143829346, + -0.8011839389801025, + 0.2422013282775879, + -0.8331459164619446, + -2.314849376678467, + 0.039530519396066666, + -0.19599857926368713, + -1.3207204341888428, + 0.9471725225448608, + -0.07450350373983383, + -0.2920750379562378, + 0.015611831098794937, + 0.4880717396736145, + -1.3752483129501343, + -0.2697068452835083, + 0.08725076913833618, + 0.4637313485145569, + -0.5683063864707947, + 0.07496543228626251, + -2.0609521865844727, + -2.8967859745025635, + -1.679197907447815, + 2.2740163803100586, + -1.919128656387329, + -1.9254100322723389, + -1.6324620246887207, + -2.5837514400482178, + -1.977355718612671, + -3.192966938018799, + -2.0481226444244385, + -1.9848105907440186, + -2.092989444732666, + -2.4332101345062256, + -1.976639986038208, + -2.500971555709839, + -2.502370595932007, + -1.2993578910827637, + 0.4266514778137207 + ], + [ + -1.4886813163757324, + -1.5220202207565308, + -1.0209217071533203, + -0.8243781924247742, + 0.31437811255455017, + -2.0150249004364014, + -0.8721848726272583, + -0.6546860337257385, + -1.0386312007904053, + -1.1580212116241455, + -1.0221425294876099, + -0.5624645352363586, + -1.5251262187957764, + -0.7264357209205627, + -0.5620583295822144, + -0.6922935247421265, + -0.5368449687957764, + -1.0326920747756958, + -0.36919665336608887, + -2.8151021003723145, + -2.5072197914123535, + -1.9451240301132202, + -1.4121330976486206, + 1.880102276802063, + -2.327136993408203, + -0.3277084529399872, + -2.253533124923706, + -2.071666717529297, + -2.4076173305511475, + -0.8503119349479675, + -2.5678014755249023, + -1.614977240562439, + -2.3767342567443848, + -1.7013286352157593, + -1.9819419384002686, + -1.020521879196167, + -1.0848251581192017, + -1.6287306547164917 + ], + [ + -0.1266685128211975, + -0.8523597717285156, + 0.04570235684514046, + -0.8985579013824463, + -1.7235077619552612, + -1.2501002550125122, + -1.7518079280853271, + -1.0804152488708496, + -1.5208818912506104, + -1.6535019874572754, + 0.17437982559204102, + -0.9732513427734375, + -2.063283920288086, + -1.1068010330200195, + -0.9947440028190613, + 0.0928560346364975, + -0.8656128644943237, + -1.704245686531067, + -0.5779868364334106, + -2.086165189743042, + -2.9328227043151855, + -1.9573719501495361, + -1.671120524406433, + -2.296234130859375, + 1.9352140426635742, + -0.8035863041877747, + -3.118515729904175, + -2.7691149711608887, + -2.950378179550171, + -1.1666544675827026, + -3.2748734951019287, + -2.422886371612549, + -2.814647912979126, + -2.5091865062713623, + -2.4366674423217773, + -1.3675785064697266, + -1.4538382291793823, + -0.4924522340297699 + ], + [ + 0.3834821581840515, + -0.5794278979301453, + -0.8212290406227112, + -0.3197081983089447, + -0.08477681875228882, + -0.7182992696762085, + -0.9416420459747314, + -2.134594678878784, + 0.3181159794330597, + -1.0505812168121338, + 0.37489694356918335, + -0.5514721870422363, + -0.9213749766349792, + -0.7438174486160278, + -0.5874804854393005, + -0.4158117473125458, + -0.617087721824646, + -0.30048155784606934, + -0.37743663787841797, + -0.7768145799636841, + -2.6749463081359863, + -0.6425855159759521, + -2.3020079135894775, + -1.4225643873214722, + -1.4827053546905518, + 0.9923464059829712, + -2.0384156703948975, + -1.89102303981781, + -2.925045967102051, + -2.522275924682617, + -1.6713424921035767, + -1.826281189918518, + -2.3252789974212646, + -1.6544256210327148, + -2.1071269512176514, + -2.3770384788513184, + -0.9843597412109375, + 0.42553243041038513 + ], + [ + -0.20993390679359436, + -0.7312290668487549, + -2.6046526432037354, + -0.9240898489952087, + 0.1556079089641571, + -1.207505464553833, + -1.3853824138641357, + 0.27898338437080383, + -2.9350197315216064, + -1.6664303541183472, + -0.36097586154937744, + -0.9860211610794067, + -1.8503150939941406, + -1.5058664083480835, + -1.9688549041748047, + -1.3600997924804688, + -1.7131946086883545, + 0.8651428818702698, + -0.5902389287948608, + -1.8621309995651245, + -3.45145583152771, + -2.3091328144073486, + -2.5048930644989014, + -2.1140403747558594, + -2.6407294273376465, + -1.840279459953308, + 2.022233724594116, + -2.2950594425201416, + -3.3345351219177246, + -1.5986891984939575, + -3.1374447345733643, + -2.4036550521850586, + -4.017137050628662, + -2.924137830734253, + -3.0284481048583984, + -3.263864517211914, + -1.4219368696212769, + -0.6684865951538086 + ], + [ + 0.00039828603621572256, + -1.0352922677993774, + -1.1455134153366089, + -0.45608392357826233, + 0.4431281089782715, + -1.447389006614685, + -1.332970380783081, + -1.2200554609298706, + -1.1733119487762451, + -2.014468193054199, + 0.0026044307742267847, + -1.21184241771698, + -1.7702462673187256, + -0.05428833141922951, + -0.9612429738044739, + -0.8342995643615723, + -0.5366727709770203, + -0.0448150709271431, + -0.537116527557373, + -1.7274069786071777, + -2.718392848968506, + -1.4851131439208984, + -2.5937321186065674, + -2.2406365871429443, + -2.6435937881469727, + -1.1567254066467285, + -2.5189480781555176, + 1.824367880821228, + -2.838228225708008, + -1.5552273988723755, + -2.9280893802642822, + -1.6117668151855469, + -2.8205227851867676, + -2.048525094985962, + -2.098257541656494, + -1.9512392282485962, + -0.9487360715866089, + -0.27660006284713745 + ], + [ + -0.9612246155738831, + -1.189780592918396, + -2.739419460296631, + -0.9474943280220032, + -0.12338961660861969, + -1.8294402360916138, + -0.08750586211681366, + -0.843731164932251, + -0.4965338706970215, + -1.6757526397705078, + -2.5999457836151123, + -2.147446393966675, + -0.6875380873680115, + 0.029754510149359703, + -1.8498284816741943, + -1.3414206504821777, + -2.3142449855804443, + -0.40234553813934326, + -1.1631485223770142, + -1.6504188776016235, + -3.599835157394409, + -1.8514035940170288, + -2.3298227787017822, + -2.2547378540039062, + -2.608895778656006, + -1.7989393472671509, + -3.451709508895874, + -2.0696816444396973, + 2.037738561630249, + -2.1154465675354004, + -3.083965301513672, + -2.664830446243286, + -3.4809367656707764, + -2.909520149230957, + -3.4963579177856445, + -1.946781039237976, + -1.65054452419281, + -0.5424761176109314 + ], + [ + 0.0064069791696965694, + -0.39066773653030396, + -1.13959538936615, + -0.23888444900512695, + -0.9107760190963745, + -0.484514981508255, + -0.4197399616241455, + -1.5158262252807617, + 0.31095248460769653, + -0.9357269406318665, + -0.40653273463249207, + -2.4060146808624268, + -1.0130728483200073, + -0.9514408111572266, + -0.779716968536377, + -0.6135109066963196, + -0.7157073616981506, + -1.5807456970214844, + -0.8927251100540161, + -0.6392699480056763, + -1.9896479845046997, + -0.656912088394165, + -2.4127190113067627, + -1.4517303705215454, + -1.037469506263733, + -2.385737180709839, + -1.2581615447998047, + -1.7228858470916748, + -2.681508779525757, + 1.578829288482666, + -1.3791276216506958, + -2.477452278137207, + -1.875659465789795, + -1.9332562685012817, + -1.5160892009735107, + -2.6046996116638184, + -0.9494172930717468, + -0.2912082374095917 + ], + [ + -0.4036887288093567, + -1.1069729328155518, + -1.4758477210998535, + -0.8170952796936035, + -0.7675719857215881, + -1.509324550628662, + -1.7614972591400146, + -1.1157827377319336, + -0.17790433764457703, + 0.36428338289260864, + -1.298715353012085, + -1.1591808795928955, + -2.728986978530884, + -1.272373080253601, + 0.020382652059197426, + -1.2039556503295898, + -1.026099443435669, + -0.4954985976219177, + -0.5485758185386658, + -2.0026066303253174, + -3.002145767211914, + -2.191098928451538, + -1.9475706815719604, + -2.3599839210510254, + -3.032061815261841, + -1.0077389478683472, + -3.3883728981018066, + -3.1915323734283447, + -2.7871289253234863, + -1.4592617750167847, + 1.8814096450805664, + -1.9974850416183472, + -3.2706801891326904, + -2.5743536949157715, + -2.674457311630249, + -1.7522162199020386, + -1.1917866468429565, + -0.12237155437469482 + ], + [ + -0.16880424320697784, + -0.4412621259689331, + -0.9956077933311462, + -0.15029042959213257, + -1.7125012874603271, + -0.5735768675804138, + 0.04840938374400139, + -1.4274109601974487, + -1.2313910722732544, + -0.7786775827407837, + -1.362955927848816, + -1.890482783317566, + -0.8857340812683105, + -2.5489144325256348, + -0.6974778175354004, + -0.9390345215797424, + -0.6738181114196777, + -1.8921622037887573, + -1.4504643678665161, + -0.663796603679657, + -2.165235996246338, + -0.7089773416519165, + -2.0927646160125732, + -1.4436219930648804, + -1.7013415098190308, + -1.2632105350494385, + -1.9993101358413696, + -1.3753101825714111, + -2.6478588581085205, + -2.815675735473633, + -2.012145757675171, + 1.9975744485855103, + -2.167182445526123, + -2.410507917404175, + -1.312150239944458, + -1.6218907833099365, + -1.68290376663208, + -1.7667391300201416 + ], + [ + -1.8135671615600586, + -0.5115551948547363, + -2.517786741256714, + -0.6692095994949341, + -1.5940420627593994, + -1.0307589769363403, + -1.1546859741210938, + -1.3786948919296265, + -2.002894163131714, + -1.3573673963546753, + -1.6252118349075317, + -1.1512258052825928, + -1.7653621435165405, + -1.1760528087615967, + -1.3931665420532227, + -1.202052116394043, + -1.390074610710144, + 0.27849531173706055, + -0.7010268568992615, + -1.0531015396118164, + -3.918627977371216, + -1.6655609607696533, + -1.5847454071044922, + -1.8793107271194458, + -2.706291675567627, + -0.9223138093948364, + -3.5606322288513184, + -2.159458875656128, + -3.2153332233428955, + -1.4644443988800049, + -3.163489818572998, + -2.1332461833953857, + 1.5843355655670166, + -2.570528984069824, + -3.315469980239868, + -1.3792541027069092, + -1.1864086389541626, + -1.256666660308838 + ], + [ + -2.009692668914795, + -0.38048624992370605, + -1.5468169450759888, + -0.5829048156738281, + 0.0624677874147892, + -0.9306790232658386, + -1.191450834274292, + -0.8251820802688599, + -1.2889586687088013, + -1.024240255355835, + -1.431588053703308, + -1.1777013540267944, + -1.247164011001587, + 0.07392482459545135, + -0.9725857973098755, + -1.5272939205169678, + -0.955696702003479, + -1.6955116987228394, + -0.924564003944397, + -0.8188430666923523, + -3.1669812202453613, + -1.3514525890350342, + -1.4980523586273193, + -1.6198078393936157, + -2.6854333877563477, + -0.602117657661438, + -2.7061519622802734, + -1.818263053894043, + -2.7340877056121826, + -1.241163969039917, + -2.6283555030822754, + -2.8367552757263184, + -2.7880501747131348, + 1.5429354906082153, + -2.161687135696411, + -1.156194806098938, + -1.712408185005188, + -0.43802574276924133 + ], + [ + -1.7364332675933838, + -0.743925929069519, + -2.2983102798461914, + -1.4421827793121338, + -1.3423070907592773, + -0.8259854316711426, + -0.772675633430481, + -0.7931619882583618, + -0.4036080837249756, + -0.5296952724456787, + -1.547731876373291, + -0.8809980154037476, + -1.0116056203842163, + -0.94605553150177, + -1.1485577821731567, + -0.8267898559570312, + -2.503455877304077, + -1.5595670938491821, + -0.575520396232605, + -2.294769763946533, + -3.4514801502227783, + -2.4656405448913574, + -1.3487157821655273, + -2.010204553604126, + -2.106443405151367, + -0.9008358120918274, + -3.0970935821533203, + -1.2165286540985107, + -2.8458592891693115, + -1.3619258403778076, + -2.36616849899292, + -1.6172901391983032, + -3.3000566959381104, + -2.0768377780914307, + 1.8279497623443604, + -1.600235939025879, + -0.9771915674209595, + -0.974338710308075 + ], + [ + -0.23871886730194092, + -0.45546406507492065, + -1.1618503332138062, + -0.254958838224411, + -0.13491235673427582, + -0.6298776865005493, + 0.03745156154036522, + -0.3062214255332947, + -0.1250578761100769, + -1.2358951568603516, + 0.22827331721782684, + 0.8617718815803528, + -0.2375303953886032, + -1.2837997674942017, + -1.0155808925628662, + -0.7816857695579529, + -0.8437100052833557, + -2.4284558296203613, + -0.5047628283500671, + -0.8757196068763733, + -2.6386709213256836, + -0.7454298734664917, + -2.355177879333496, + -1.6572908163070679, + -1.5248626470565796, + -2.0207931995391846, + -2.12870717048645, + -1.7966010570526123, + -3.0426321029663086, + -2.1295557022094727, + -1.693716049194336, + -1.8252371549606323, + -1.9991053342819214, + -2.0430383682250977, + -2.190791130065918, + 1.2143940925598145, + -1.0707664489746094, + 0.6423196196556091 + ], + [ + -1.9091709852218628, + -0.31763240694999695, + -0.7042407989501953, + -0.3518291413784027, + -0.7948834300041199, + -0.675581157207489, + -0.46863844990730286, + -0.7826033234596252, + -0.5058429837226868, + -0.301849365234375, + -0.6287923455238342, + -0.8587498068809509, + -0.5340302586555481, + -1.1321203708648682, + -0.296547532081604, + -0.5896424651145935, + -0.4402746558189392, + -0.5692362785339355, + -1.4783483743667603, + -0.5728195905685425, + -1.475324273109436, + -0.7053667902946472, + -1.190481424331665, + -1.0854310989379883, + -1.2175312042236328, + -0.5957954525947571, + -0.9935160875320435, + -0.599934458732605, + -1.7239317893981934, + -1.0580377578735352, + -1.0721821784973145, + -2.1976261138916016, + -1.1993407011032104, + -1.6456832885742188, + -0.9379597306251526, + -0.7327592372894287, + 1.3118685483932495, + -1.8223817348480225 + ], + [ + -0.8714396953582764, + -1.853116750717163, + -0.7357402443885803, + -0.5863816142082214, + 0.02127656526863575, + -0.567829430103302, + -1.2838083505630493, + -0.5320233106613159, + -0.5566316843032837, + -1.6091386079788208, + -1.690220832824707, + -1.0507662296295166, + -1.3925522565841675, + 0.37576332688331604, + -1.0224628448486328, + -1.2677232027053833, + -0.421408474445343, + -0.7522180676460266, + -0.5770581364631653, + 1.469793677330017, + -2.619396209716797, + -3.8339288234710693, + -2.1878268718719482, + -2.577200412750244, + -2.260521411895752, + -0.6093447208404541, + -2.429159164428711, + -2.0542571544647217, + -2.315372943878174, + -1.3182443380355835, + -2.5871670246124268, + -1.3911893367767334, + -2.268223524093628, + -1.6037938594818115, + -2.71895694732666, + -1.951128363609314, + -0.6585078835487366, + 0.1846662014722824 + ], + [ + -1.0354511737823486, + -0.05742604285478592, + -3.1329498291015625, + -1.6681122779846191, + -0.6904481053352356, + -2.1388893127441406, + -2.098621129989624, + -0.7833964824676514, + -2.3108198642730713, + -0.9780736565589905, + -1.0450879335403442, + -0.8568062782287598, + -1.6496107578277588, + -0.44611871242523193, + -2.8013432025909424, + -2.174286127090454, + -2.5588650703430176, + -0.8847936987876892, + -0.15595276653766632, + -1.9898730516433716, + 1.6862306594848633, + -2.7567031383514404, + -2.663858413696289, + -2.5370066165924072, + -2.727262258529663, + -2.035982131958008, + -3.290111780166626, + -3.02889347076416, + -3.6716537475585938, + -1.9858717918395996, + -3.1281673908233643, + -2.197793483734131, + -4.1008758544921875, + -3.2462685108184814, + -3.3084144592285156, + -3.273815631866455, + -1.48838210105896, + -0.6130866408348083 + ], + [ + -2.4268743991851807, + -1.0844701528549194, + -1.200859546661377, + -1.3532661199569702, + -0.13728827238082886, + -1.039311170578003, + -0.8637980818748474, + -0.16449452936649323, + -0.9602260589599609, + -0.8828447461128235, + -0.7639266848564148, + -0.305705726146698, + -0.9545315504074097, + -0.3603043556213379, + -0.3713889420032501, + -0.5492355823516846, + -1.6915355920791626, + -0.6300157904624939, + -0.2002902626991272, + -2.9801766872406006, + -2.3514974117279053, + 0.7924177050590515, + -1.1784902811050415, + -2.3716251850128174, + -1.9347165822982788, + -0.3662571609020233, + -2.2782700061798096, + -1.6204148530960083, + -1.9014047384262085, + -0.7225125432014465, + -2.0962584018707275, + -1.0571011304855347, + -1.8776935338974, + -1.2275242805480957, + -2.468075752258301, + -0.5779125094413757, + -0.8635720610618591, + 0.3403991460800171 + ], + [ + -0.6031792759895325, + -0.9964290857315063, + -1.2463401556015015, + -0.40242671966552734, + -2.9970970153808594, + -0.8123520016670227, + -0.8776895403862, + -1.6606690883636475, + 0.10314860194921494, + -0.5459357500076294, + -0.09653744846582413, + -0.9057682752609253, + -0.7666310667991638, + -1.3840076923370361, + -0.7385956048965454, + -0.5747491121292114, + -1.8913615942001343, + -2.1777853965759277, + -0.0359971784055233, + -1.304287075996399, + -2.75036358833313, + -1.0997114181518555, + 1.5520225763320923, + -1.603230595588684, + -1.5884270668029785, + -1.895064353942871, + -1.5219680070877075, + -1.8727933168411255, + -3.175762176513672, + -2.3344202041625977, + -1.8282241821289062, + -2.124410629272461, + -2.1841773986816406, + -2.0011556148529053, + -1.994920253753662, + -2.0824995040893555, + -1.7081944942474365, + -0.04741279408335686 + ], + [ + 0.001464596949517727, + -1.3153842687606812, + -1.7750449180603027, + -1.3446664810180664, + -0.8063778281211853, + -2.496769428253174, + -1.6510584354400635, + -0.9740445613861084, + -1.4789623022079468, + -1.6836498975753784, + -1.4654854536056519, + -0.11381307244300842, + -0.9160778522491455, + -0.7411896586418152, + -1.2106752395629883, + -1.316740870475769, + -2.1778197288513184, + -1.6897826194763184, + -0.5128070712089539, + -2.5155630111694336, + -2.383720636367798, + -2.430300712585449, + -1.9292216300964355, + 1.2904382944107056, + -2.335050582885742, + -0.7711840867996216, + -2.114753246307373, + -2.162869691848755, + -2.705646514892578, + -0.9566371440887451, + -2.6486754417419434, + -1.2961572408676147, + -2.4963555335998535, + -1.8424516916275024, + -2.157315254211426, + -1.5924248695373535, + -1.1594699621200562, + -0.5940916538238525 + ], + [ + 0.16222313046455383, + -1.2532039880752563, + -1.425062656402588, + -1.1237469911575317, + -0.22716695070266724, + -1.3648428916931152, + -2.1730425357818604, + -0.6878266930580139, + -1.344835638999939, + -1.27156662940979, + -1.4034432172775269, + -0.8701751828193665, + -2.246166944503784, + -0.023427056148648262, + -1.3291192054748535, + 0.5866320133209229, + -1.3489457368850708, + -1.6879552602767944, + -0.6133474707603455, + -1.9614986181259155, + -2.517993688583374, + -2.0293514728546143, + -1.8159234523773193, + -2.1168694496154785, + 1.2119126319885254, + -0.8502691984176636, + -2.5015597343444824, + -2.6057004928588867, + -2.6248700618743896, + -1.193113923072815, + -3.0618789196014404, + -1.8253097534179688, + -2.6438307762145996, + -2.179112672805786, + -2.312567949295044, + -1.1174665689468384, + -1.4984151124954224, + -0.2834663689136505 + ], + [ + -1.1820076704025269, + -0.441546231508255, + -0.46076205372810364, + -0.2232024222612381, + -1.1733464002609253, + -0.18620240688323975, + -0.4195297360420227, + -1.7261085510253906, + -0.6401600241661072, + -0.6321255564689636, + -0.7112460732460022, + -1.5683192014694214, + -0.4373256266117096, + -0.5970399379730225, + -0.16830870509147644, + -0.19033874571323395, + -0.5145110487937927, + -1.4434478282928467, + -0.31944456696510315, + -0.5084838271141052, + -1.6517597436904907, + -0.2862340211868286, + -1.9813846349716187, + -0.6506680250167847, + -0.8837465643882751, + 1.1305674314498901, + -1.0241273641586304, + -1.1385997533798218, + -2.0236713886260986, + -2.4412014484405518, + -0.90739506483078, + -1.8907356262207031, + -1.1561230421066284, + -1.0170981884002686, + -1.192569375038147, + -1.7534596920013428, + -1.0200852155685425, + -0.3421257734298706 + ], + [ + -0.7923245429992676, + -0.6188677549362183, + -2.390526294708252, + -1.0810531377792358, + -0.0694633424282074, + -1.4124823808670044, + -1.748159408569336, + -0.8339914083480835, + -2.6497058868408203, + -0.16460685431957245, + -0.16616998612880707, + -0.19532062113285065, + -2.581245183944702, + -1.42731511592865, + -2.3517637252807617, + -1.5202038288116455, + -2.3366241455078125, + 0.05929391458630562, + -0.8786223530769348, + -1.8983091115951538, + -3.442636251449585, + -2.365623712539673, + -2.6008200645446777, + -2.5690321922302246, + -2.781308174133301, + -1.7110270261764526, + 1.7855647802352905, + -2.826700210571289, + -3.7369935512542725, + -1.497348666191101, + -3.2914505004882812, + -2.0016191005706787, + -4.264852046966553, + -3.2680134773254395, + -3.015462636947632, + -3.998128890991211, + -1.4962152242660522, + -0.8279354572296143 + ], + [ + -0.015430246479809284, + -1.1533108949661255, + -1.430083155632019, + -0.7773129343986511, + -0.4400736391544342, + -1.3833562135696411, + -1.5993731021881104, + -1.2142270803451538, + -1.2917886972427368, + -2.9902422428131104, + -1.6516424417495728, + -0.7256853580474854, + -2.4189352989196777, + -1.04271399974823, + -0.9224168658256531, + -1.1056015491485596, + -1.563831090927124, + -1.862464189529419, + 0.2665143311023712, + -1.8530006408691406, + -2.6535682678222656, + -1.6611402034759521, + -2.4291274547576904, + -2.2071166038513184, + -2.6465113162994385, + -1.26434326171875, + -2.375486135482788, + 1.6297520399093628, + -3.0225729942321777, + -1.6860389709472656, + -2.9665181636810303, + -1.6550822257995605, + -2.699439764022827, + -2.2616052627563477, + -2.092986822128296, + -1.8027167320251465, + -0.9200475215911865, + -0.029429011046886444 + ], + [ + -0.4675758481025696, + -1.1190723180770874, + -0.9756779074668884, + -1.05160653591156, + -1.217578649520874, + -0.5330201983451843, + -0.07428105175495148, + -2.4455883502960205, + 0.003700054483488202, + -0.7784658670425415, + -2.403334379196167, + -2.600884437561035, + -0.6679537892341614, + -0.3978484272956848, + -1.7384477853775024, + -1.6977934837341309, + -2.283695697784424, + -0.742687463760376, + -0.3593957722187042, + -1.6518263816833496, + -3.2444169521331787, + -2.0510928630828857, + -2.691774606704712, + -2.2633919715881348, + -2.4678726196289062, + -1.6998931169509888, + -3.128377914428711, + -2.472714424133301, + 1.407179832458496, + -2.581315279006958, + -2.8508388996124268, + -2.830418825149536, + -3.196280002593994, + -2.8512790203094482, + -2.961390972137451, + -2.4400174617767334, + -1.8779717683792114, + -0.35683947801589966 + ], + [ + -0.660494327545166, + -0.6414039731025696, + -0.8501704335212708, + -0.43919989466667175, + -1.0771427154541016, + 0.35650894045829773, + -0.8832647204399109, + -2.1268045902252197, + 0.131255641579628, + 0.20143045485019684, + 0.08713097870349884, + -2.125683546066284, + -0.3688608705997467, + -0.7352280020713806, + -0.6466352343559265, + -0.6711243987083435, + 0.024290909990668297, + -1.269054651260376, + -0.8655333518981934, + -0.8407468795776367, + -2.1583688259124756, + -0.8561399579048157, + -2.545478343963623, + -1.0145747661590576, + -1.1419459581375122, + -3.2171216011047363, + -1.0580638647079468, + -1.6442292928695679, + -2.9044525623321533, + 1.6470199823379517, + -1.5654337406158447, + -2.386275291442871, + -1.723999261856079, + -1.8791625499725342, + -1.5555813312530518, + -3.1796560287475586, + -1.1547609567642212, + -0.019089747220277786 + ], + [ + -0.6463392972946167, + -1.3866616487503052, + -1.5057345628738403, + -1.016966462135315, + -0.47088000178337097, + -1.674490213394165, + -1.9977883100509644, + -0.8923759460449219, + -0.12570549547672272, + -1.2440476417541504, + -1.8356050252914429, + -0.18762943148612976, + -2.8892905712127686, + -0.16854774951934814, + -1.349707007408142, + -1.400325059890747, + -1.3165473937988281, + -2.054481029510498, + -0.7052426934242249, + -2.4374239444732666, + -2.860539674758911, + -2.2308225631713867, + -2.0803892612457275, + -2.6065056324005127, + -3.156863212585449, + -1.0148533582687378, + -2.897756338119507, + -3.215593099594116, + -3.085181951522827, + -1.6279449462890625, + 1.551371693611145, + -1.884119987487793, + -3.1540820598602295, + -2.6556808948516846, + -2.569963216781616, + -1.9133144617080688, + -1.4564285278320312, + -0.34245768189430237 + ], + [ + -0.35366421937942505, + -0.7312232851982117, + -1.2182986736297607, + -0.6163435578346252, + -1.3381294012069702, + -0.7880350351333618, + -0.43879982829093933, + -1.7418206930160522, + -0.3985506594181061, + -1.2090766429901123, + -2.4474005699157715, + -2.3924057483673096, + -1.2241272926330566, + -2.334491491317749, + -0.9998557567596436, + -1.7069777250289917, + -1.4658290147781372, + -1.8725411891937256, + -0.701711118221283, + -0.8752269148826599, + -2.2997143268585205, + -1.1896758079528809, + -2.2099812030792236, + -1.6886945962905884, + -1.7866628170013428, + -2.2151429653167725, + -1.518157958984375, + -1.5511894226074219, + -3.190805435180664, + -2.2823729515075684, + -1.8590222597122192, + 1.4594566822052002, + -2.2622592449188232, + -2.9263203144073486, + -1.4819097518920898, + -2.34877610206604, + -1.8937572240829468, + -0.8151752352714539 + ], + [ + -2.069762945175171, + -0.8146624565124512, + -2.4462058544158936, + -0.9368357062339783, + -2.0533690452575684, + -1.2836114168167114, + -1.6275206804275513, + -1.8510100841522217, + -2.733599901199341, + -2.1579349040985107, + -2.18290114402771, + -1.831044316291809, + -2.2708218097686768, + -1.3794316053390503, + -1.9960098266601562, + -1.4110610485076904, + -1.9270753860473633, + -0.08569145947694778, + -0.7651064395904541, + -1.5622566938400269, + -3.8268558979034424, + -2.1120851039886475, + -2.0574564933776855, + -2.4248368740081787, + -2.6079320907592773, + -1.2084507942199707, + -3.5626628398895264, + -2.393941879272461, + -3.3574769496917725, + -1.5337347984313965, + -3.039767026901245, + -2.1799123287200928, + 1.3774102926254272, + -2.761600971221924, + -3.2596418857574463, + -2.187648057937622, + -1.4907891750335693, + -0.7454717755317688 + ], + [ + -1.9551693201065063, + -0.6927981376647949, + -1.4689112901687622, + -0.5725055932998657, + -1.6370700597763062, + -1.163214921951294, + -0.015855779871344566, + -1.0277124643325806, + -1.734549880027771, + -1.5833313465118408, + -1.7694636583328247, + -1.4607677459716797, + -1.5804563760757446, + 1.0022990703582764, + -1.1800225973129272, + -1.7673892974853516, + -1.1754943132400513, + -2.2843337059020996, + 0.046492353081703186, + -1.154741883277893, + -2.696756601333618, + -1.4598208665847778, + -1.9600811004638672, + -1.8266266584396362, + -2.507661819458008, + -1.0929009914398193, + -2.5178334712982178, + -2.095463514328003, + -2.829322576522827, + -1.7013896703720093, + -2.4265172481536865, + -2.7242445945739746, + -2.553251028060913, + 0.9564800262451172, + -2.24807071685791, + -1.6756738424301147, + -1.9068894386291504, + -0.2768643796443939 + ], + [ + -0.5277445316314697, + -0.7780254483222961, + -1.9484217166900635, + -1.8201377391815186, + -0.3611772656440735, + -0.8792129158973694, + -1.6080174446105957, + -1.897482991218567, + -0.9934938549995422, + -2.3075568675994873, + -1.499800443649292, + -0.7066132426261902, + -1.086706519126892, + -0.45405861735343933, + -2.368536949157715, + -1.7616318464279175, + -2.6811821460723877, + -0.9389429688453674, + -1.044274091720581, + -1.8349555730819702, + -3.326568365097046, + -3.064472198486328, + -1.906092643737793, + -2.1701033115386963, + -2.640275001525879, + -1.369072437286377, + -3.46012544631958, + -2.283961296081543, + -3.6266801357269287, + -1.3576760292053223, + -2.904729127883911, + -1.7739413976669312, + -3.69710111618042, + -2.5422017574310303, + 1.65988290309906, + -2.5419163703918457, + -1.0699994564056396, + -0.5555716753005981 + ], + [ + -1.589754343032837, + -0.33446016907691956, + -0.8003204464912415, + -0.3152627646923065, + -2.6636409759521484, + -0.41454172134399414, + -0.6648046374320984, + -2.847352981567383, + -1.4190844297409058, + -1.1146562099456787, + -1.34132981300354, + -3.4223856925964355, + -1.0888288021087646, + -0.8044797778129578, + -0.41240137815475464, + -0.46918797492980957, + -1.1907289028167725, + -2.311145067214966, + -0.28614404797554016, + -0.9569870233535767, + -2.3355534076690674, + -0.5979520082473755, + -2.2807700634002686, + -1.2550808191299438, + -1.005100965499878, + -1.9091483354568481, + -1.6978236436843872, + -1.2271112203598022, + -2.2678635120391846, + -2.4864163398742676, + -1.4327486753463745, + -2.030940294265747, + -1.4052306413650513, + -1.4676859378814697, + -1.9480984210968018, + 1.4021075963974, + -1.0454349517822266, + 0.06828484684228897 + ], + [ + -0.23079082369804382, + -1.021427869796753, + -1.1366149187088013, + -0.8545809984207153, + -1.464737892150879, + -0.862376868724823, + -1.183154821395874, + -0.8415493369102478, + -1.0104671716690063, + -0.6394678354263306, + -0.5208562612533569, + -1.1245139837265015, + -0.9386278986930847, + -0.7674246430397034, + -0.7726422548294067, + -1.0004769563674927, + -1.389273762702942, + -1.3358479738235474, + -2.1130897998809814, + -0.5527687072753906, + -1.533891201019287, + -0.9919546246528625, + -1.815958023071289, + -1.183046817779541, + -1.5838251113891602, + -1.081207275390625, + -1.3008248805999756, + -0.9169443845748901, + -1.9132722616195679, + -0.9715282320976257, + -1.4475971460342407, + -1.735262155532837, + -1.5720882415771484, + -2.0099592208862305, + -0.9564676284790039, + -1.2514877319335938, + 1.0011119842529297, + -0.6127866506576538 + ], + [ + 0.6448063254356384, + 0.7572498321533203, + 1.0911988019943237, + 0.524128794670105, + 1.4135451316833496, + 1.1817667484283447, + 0.8331936001777649, + 0.9025977849960327, + 1.2999435663223267, + 1.454221248626709, + 0.8888773918151855, + 1.0594176054000854, + 1.1529332399368286, + 1.0303577184677124, + 0.4484076499938965, + 0.5056223273277283, + 1.090172290802002, + 0.9097325801849365, + 0.8688086867332458, + -3.1089305877685547, + -3.712941884994507, + -2.756964683532715, + -3.5867767333984375, + -3.0733652114868164, + -3.1176345348358154, + -3.528468370437622, + -2.9516360759735107, + -3.464951753616333, + -3.9666693210601807, + -2.828489065170288, + -3.4350264072418213, + -3.0497825145721436, + -3.6919758319854736, + -2.993417501449585, + -3.0930402278900146, + -3.997267246246338, + -2.1553473472595215, + 1.3781147003173828 + ] + ], + "startTransitions": [ + -0.5686123371124268, + 1.3780397176742554, + 0.6937686204910278, + 0.4019813537597656, + -0.052585214376449585, + -0.06788468360900879, + 0.5093993544578552, + -1.1224946975708008, + 1.1872624158859253, + -1.5751601457595825, + 0.3040814995765686, + 0.30986014008522034, + 0.7399799823760986, + 0.43532711267471313, + -0.20763517916202545, + -1.393404483795166, + 0.7376992702484131, + -0.6647242903709412, + -0.07946129888296127, + -2.382018566131592, + -2.2307560443878174, + -3.0358145236968994, + -1.7324212789535522, + -1.9357080459594727, + -2.2607152462005615, + -1.4234501123428345, + -2.3690545558929443, + -1.8972913026809692, + -1.9582685232162476, + -1.3180702924728394, + -2.491741418838501, + -1.554578185081482, + -2.074909210205078, + -1.5994789600372314, + -1.9437367916107178, + -1.2468892335891724, + -1.116112470626831, + 0.6255027651786804 + ], + "endTransitions": [ + 1.3384504318237305, + -0.7345888614654541, + -0.9976558685302734, + -0.8110249638557434, + -0.3210834860801697, + -1.0078150033950806, + -0.3305588960647583, + -0.6704232692718506, + -1.1016132831573486, + -0.0033517074771225452, + -1.054339051246643, + -0.21098436415195465, + -0.6152421236038208, + -0.9732515215873718, + -0.8970611691474915, + -1.041251301765442, + -1.030497670173645, + -0.13948768377304077, + -1.2132830619812012, + -1.2121561765670776, + -1.035833477973938, + -1.8381625413894653, + -0.32516998052597046, + -0.8982994556427002, + -0.38082367181777954, + -0.6117727756500244, + -1.29838228225708, + -0.5704505443572998, + -0.4178621172904968, + -1.2673799991607666, + -0.8355352878570557, + -0.994159996509552, + -1.2757554054260254, + -1.3586913347244263, + -1.0985126495361328, + -1.0413298606872559, + -0.8479649424552917, + 1.447618007659912 + ] +} \ No newline at end of file diff --git a/grobid-home/models/citation-BidLSTM_CRF_FEATURES.onnx/encoder.onnx b/grobid-home/models/citation-BidLSTM_CRF_FEATURES.onnx/encoder.onnx new file mode 100644 index 0000000000..efcccc7320 Binary files /dev/null and b/grobid-home/models/citation-BidLSTM_CRF_FEATURES.onnx/encoder.onnx differ diff --git a/grobid-home/models/citation-BidLSTM_CRF_FEATURES.onnx/vocab.json b/grobid-home/models/citation-BidLSTM_CRF_FEATURES.onnx/vocab.json new file mode 100644 index 0000000000..52e9502f90 --- /dev/null +++ b/grobid-home/models/citation-BidLSTM_CRF_FEATURES.onnx/vocab.json @@ -0,0 +1,2143 @@ +{ + "charVocab": { + "<PAD>": 0, + "<UNK>": 1, + "!": 2, + "\"": 3, + "#": 4, + "$": 5, + "%": 6, + "&": 7, + "'": 8, + "(": 9, + ")": 10, + "*": 11, + "+": 12, + ",": 13, + "-": 14, + ".": 15, + "/": 16, + "0": 17, + "1": 18, + "2": 19, + "3": 20, + "4": 21, + "5": 22, + "6": 23, + "7": 24, + "8": 25, + "9": 26, + ":": 27, + ";": 28, + "<": 29, + "=": 30, + ">": 31, + "?": 32, + "@": 33, + "A": 34, + "B": 35, + "C": 36, + "D": 37, + "E": 38, + "F": 39, + "G": 40, + "H": 41, + "I": 42, + "J": 43, + "K": 44, + "L": 45, + "M": 46, + "N": 47, + "O": 48, + "P": 49, + "Q": 50, + "R": 51, + "S": 52, + "T": 53, + "U": 54, + "V": 55, + "W": 56, + "X": 57, + "Y": 58, + "Z": 59, + "[": 60, + "\\": 61, + "]": 62, + "^": 63, + "_": 64, + "`": 65, + "a": 66, + "b": 67, + "c": 68, + "d": 69, + "e": 70, + "f": 71, + "g": 72, + "h": 73, + "i": 74, + "j": 75, + "k": 76, + "l": 77, + "m": 78, + "n": 79, + "o": 80, + "p": 81, + "q": 82, + "r": 83, + "s": 84, + "t": 85, + "u": 86, + "v": 87, + "w": 88, + "x": 89, + "y": 90, + "z": 91, + "|": 92, + "~": 93, + "„": 94, + "†": 95, + "’": 96, + "–": 97, + "¡": 98, + "¢": 99, + "¤": 100, + "§": 101, + "¨": 102, + "©": 103, + "«": 104, + "­": 105, + "¯": 106, + "°": 107, + "±": 108, + "²": 109, + "³": 110, + "´": 111, + "µ": 112, + "¸": 113, + "º": 114, + "»": 115, + "¼": 116, + "¿": 117, + "À": 118, + "Á": 119, + "Â": 120, + "Ã": 121, + "Å": 122, + "È": 123, + "É": 124, + "Ê": 125, + "Î": 126, + "Ï": 127, + "Ô": 128, + "Ö": 129, + "×": 130, + "Ø": 131, + "Ü": 132, + "ß": 133, + "à": 134, + "á": 135, + "â": 136, + "ã": 137, + "ä": 138, + "ç": 139, + "è": 140, + "é": 141, + "ê": 142, + "ì": 143, + "í": 144, + "î": 145, + "ï": 146, + "ñ": 147, + "ò": 148, + "ó": 149, + "ô": 150, + "õ": 151, + "ö": 152, + "ø": 153, + "ú": 154, + "û": 155, + "ü": 156, + "ý": 157, + "Ă": 158, + "ą": 159, + "ć": 160, + "Č": 161, + "č": 162, + "ė": 163, + "ę": 164, + "ğ": 165, + "İ": 166, + "ı": 167, + "IJ": 168, + "ľ": 169, + "ł": 170, + "ń": 171, + "ř": 172, + "ś": 173, + "š": 174, + "ū": 175, + "Ź": 176, + "Ž": 177, + "ž": 178, + "ˆ": 179, + "ˇ": 180, + "˜": 181, + "́": 182, + "Κ": 183, + "Λ": 184, + "Ξ": 185, + "Π": 186, + "α": 187, + "β": 188, + "γ": 189, + "ε": 190, + "η": 191, + "θ": 192, + "ι": 193, + "λ": 194, + "ν": 195, + "ξ": 196, + "ο": 197, + "π": 198, + "ρ": 199, + "ς": 200, + "σ": 201, + "τ": 202, + "φ": 203, + "ϩ": 204, + "А": 205, + "Б": 206, + "В": 207, + "Д": 208, + "Е": 209, + "И": 210, + "К": 211, + "Л": 212, + "М": 213, + "Н": 214, + "О": 215, + "П": 216, + "Р": 217, + "С": 218, + "Т": 219, + "Х": 220, + "Ч": 221, + "Я": 222, + "а": 223, + "б": 224, + "в": 225, + "г": 226, + "д": 227, + "е": 228, + "ж": 229, + "з": 230, + "и": 231, + "й": 232, + "к": 233, + "л": 234, + "м": 235, + "н": 236, + "о": 237, + "п": 238, + "р": 239, + "с": 240, + "т": 241, + "у": 242, + "ф": 243, + "х": 244, + "ц": 245, + "ч": 246, + "щ": 247, + "ъ": 248, + "ы": 249, + "ь": 250, + "э": 251, + "ю": 252, + "я": 253, + "᎐": 254, + "ἄ": 255, + "ἔ": 256, + "έ": 257, + "ό": 258, + "‘": 259, + "’": 260, + "‚": 261, + "“": 262, + "”": 263, + "„": 264, + "•": 265, + "․": 266, + "…": 267, + "ℓ": 268, + "→": 269, + "↵": 270, + "∂": 271, + "∆": 272, + "√": 273, + "∝": 274, + "∞": 275, + "∼": 276, + "≅": 277, + "、": 278, + "あ": 279, + "い": 280, + "え": 281, + "お": 282, + "か": 283, + "ぐ": 284, + "け": 285, + "げ": 286, + "こ": 287, + "さ": 288, + "し": 289, + "す": 290, + "た": 291, + "ち": 292, + "っ": 293, + "つ": 294, + "づ": 295, + "て": 296, + "で": 297, + "と": 298, + "な": 299, + "に": 300, + "の": 301, + "び": 302, + "ま": 303, + "み": 304, + "む": 305, + "め": 306, + "ゆ": 307, + "よ": 308, + "ら": 309, + "り": 310, + "る": 311, + "れ": 312, + "わ": 313, + "を": 314, + "ァ": 315, + "ア": 316, + "ィ": 317, + "イ": 318, + "ウ": 319, + "ェ": 320, + "エ": 321, + "カ": 322, + "ガ": 323, + "キ": 324, + "ク": 325, + "グ": 326, + "コ": 327, + "ゴ": 328, + "シ": 329, + "ジ": 330, + "ス": 331, + "ズ": 332, + "セ": 333, + "ソ": 334, + "タ": 335, + "チ": 336, + "ッ": 337, + "テ": 338, + "デ": 339, + "ト": 340, + "ド": 341, + "ナ": 342, + "ニ": 343, + "ネ": 344, + "ノ": 345, + "ハ": 346, + "バ": 347, + "パ": 348, + "ヒ": 349, + "フ": 350, + "ブ": 351, + "プ": 352, + "ベ": 353, + "ミ": 354, + "ム": 355, + "メ": 356, + "モ": 357, + "ュ": 358, + "ョ": 359, + "ラ": 360, + "リ": 361, + "ル": 362, + "レ": 363, + "ロ": 364, + "ワ": 365, + "ン": 366, + "・": 367, + "ー": 368, + "一": 369, + "丁": 370, + "三": 371, + "上": 372, + "下": 373, + "与": 374, + "世": 375, + "丘": 376, + "业": 377, + "东": 378, + "个": 379, + "中": 380, + "临": 381, + "为": 382, + "主": 383, + "丽": 384, + "久": 385, + "之": 386, + "乐": 387, + "也": 388, + "习": 389, + "书": 390, + "乾": 391, + "予": 392, + "事": 393, + "二": 394, + "于": 395, + "云": 396, + "互": 397, + "五": 398, + "井": 399, + "亞": 400, + "交": 401, + "亥": 402, + "产": 403, + "京": 404, + "亮": 405, + "人": 406, + "仁": 407, + "今": 408, + "介": 409, + "从": 410, + "仙": 411, + "代": 412, + "以": 413, + "仲": 414, + "价": 415, + "任": 416, + "仿": 417, + "企": 418, + "伊": 419, + "伍": 420, + "众": 421, + "会": 422, + "伟": 423, + "传": 424, + "伯": 425, + "伸": 426, + "伽": 427, + "但": 428, + "位": 429, + "住": 430, + "体": 431, + "何": 432, + "作": 433, + "佳": 434, + "併": 435, + "使": 436, + "來": 437, + "例": 438, + "供": 439, + "価": 440, + "便": 441, + "係": 442, + "俄": 443, + "俊": 444, + "保": 445, + "信": 446, + "修": 447, + "倉": 448, + "倩": 449, + "倪": 450, + "倫": 451, + "倻": 452, + "值": 453, + "偉": 454, + "偿": 455, + "傳": 456, + "傷": 457, + "傾": 458, + "元": 459, + "兆": 460, + "光": 461, + "克": 462, + "入": 463, + "內": 464, + "全": 465, + "八": 466, + "公": 467, + "六": 468, + "共": 469, + "关": 470, + "兵": 471, + "其": 472, + "典": 473, + "兼": 474, + "内": 475, + "再": 476, + "冠": 477, + "冬": 478, + "几": 479, + "処": 480, + "出": 481, + "分": 482, + "刊": 483, + "划": 484, + "刘": 485, + "刚": 486, + "创": 487, + "初": 488, + "判": 489, + "利": 490, + "到": 491, + "制": 492, + "券": 493, + "刻": 494, + "前": 495, + "剛": 496, + "剧": 497, + "創": 498, + "劉": 499, + "功": 500, + "加": 501, + "动": 502, + "勉": 503, + "動": 504, + "務": 505, + "勝": 506, + "勵": 507, + "化": 508, + "北": 509, + "匱": 510, + "区": 511, + "医": 512, + "區": 513, + "十": 514, + "千": 515, + "半": 516, + "华": 517, + "卓": 518, + "協": 519, + "南": 520, + "占": 521, + "卫": 522, + "危": 523, + "卷": 524, + "厅": 525, + "原": 526, + "去": 527, + "参": 528, + "參": 529, + "叉": 530, + "及": 531, + "发": 532, + "取": 533, + "受": 534, + "变": 535, + "叢": 536, + "口": 537, + "古": 538, + "句": 539, + "可": 540, + "史": 541, + "号": 542, + "司": 543, + "合": 544, + "吉": 545, + "同": 546, + "名": 547, + "后": 548, + "向": 549, + "听": 550, + "启": 551, + "吳": 552, + "吴": 553, + "吾": 554, + "呈": 555, + "告": 556, + "周": 557, + "和": 558, + "品": 559, + "哉": 560, + "响": 561, + "員": 562, + "哲": 563, + "商": 564, + "問": 565, + "啸": 566, + "善": 567, + "喜": 568, + "喪": 569, + "喬": 570, + "営": 571, + "嘉": 572, + "器": 573, + "四": 574, + "回": 575, + "因": 576, + "図": 577, + "围": 578, + "国": 579, + "图": 580, + "國": 581, + "園": 582, + "圖": 583, + "土": 584, + "在": 585, + "地": 586, + "场": 587, + "址": 588, + "坂": 589, + "坚": 590, + "坛": 591, + "坤": 592, + "型": 593, + "城": 594, + "域": 595, + "培": 596, + "基": 597, + "堂": 598, + "堯": 599, + "報": 600, + "塙": 601, + "塚": 602, + "塩": 603, + "境": 604, + "增": 605, + "壁": 606, + "壇": 607, + "士": 608, + "変": 609, + "复": 610, + "夏": 611, + "外": 612, + "多": 613, + "大": 614, + "天": 615, + "太": 616, + "夫": 617, + "夭": 618, + "奇": 619, + "女": 620, + "妍": 621, + "始": 622, + "委": 623, + "姚": 624, + "姜": 625, + "娇": 626, + "娟": 627, + "婧": 628, + "婷": 629, + "媒": 630, + "嬀": 631, + "子": 632, + "存": 633, + "孙": 634, + "孟": 635, + "季": 636, + "学": 637, + "孫": 638, + "學": 639, + "宁": 640, + "宇": 641, + "安": 642, + "宋": 643, + "完": 644, + "宏": 645, + "宗": 646, + "宙": 647, + "定": 648, + "宜": 649, + "宝": 650, + "实": 651, + "実": 652, + "害": 653, + "家": 654, + "容": 655, + "宿": 656, + "寅": 657, + "寒": 658, + "察": 659, + "實": 660, + "寧": 661, + "寸": 662, + "对": 663, + "寺": 664, + "导": 665, + "寿": 666, + "専": 667, + "尊": 668, + "對": 669, + "小": 670, + "少": 671, + "尙": 672, + "尚": 673, + "就": 674, + "尻": 675, + "尽": 676, + "尾": 677, + "居": 678, + "屋": 679, + "展": 680, + "山": 681, + "岡": 682, + "岭": 683, + "岸": 684, + "峰": 685, + "島": 686, + "崎": 687, + "崗": 688, + "嶺": 689, + "川": 690, + "州": 691, + "工": 692, + "巫": 693, + "差": 694, + "巻": 695, + "市": 696, + "师": 697, + "師": 698, + "常": 699, + "帽": 700, + "平": 701, + "年": 702, + "幸": 703, + "广": 704, + "広": 705, + "庆": 706, + "床": 707, + "应": 708, + "店": 709, + "府": 710, + "度": 711, + "康": 712, + "庸": 713, + "庾": 714, + "廢": 715, + "廣": 716, + "建": 717, + "开": 718, + "式": 719, + "引": 720, + "弘": 721, + "张": 722, + "弥": 723, + "張": 724, + "归": 725, + "当": 726, + "形": 727, + "彤": 728, + "彦": 729, + "彬": 730, + "彭": 731, + "影": 732, + "征": 733, + "律": 734, + "徐": 735, + "從": 736, + "復": 737, + "微": 738, + "徳": 739, + "德": 740, + "徽": 741, + "心": 742, + "忍": 743, + "志": 744, + "忠": 745, + "快": 746, + "念": 747, + "态": 748, + "思": 749, + "性": 750, + "恭": 751, + "息": 752, + "悖": 753, + "悠": 754, + "悼": 755, + "情": 756, + "惠": 757, + "想": 758, + "意": 759, + "感": 760, + "態": 761, + "慧": 762, + "慶": 763, + "應": 764, + "成": 765, + "我": 766, + "戦": 767, + "戰": 768, + "戶": 769, + "戸": 770, + "房": 771, + "所": 772, + "扈": 773, + "打": 774, + "払": 775, + "承": 776, + "技": 777, + "报": 778, + "拡": 779, + "择": 780, + "持": 781, + "指": 782, + "挑": 783, + "捷": 784, + "排": 785, + "掛": 786, + "探": 787, + "接": 788, + "控": 789, + "推": 790, + "措": 791, + "揣": 792, + "摇": 793, + "撥": 794, + "播": 795, + "支": 796, + "改": 797, + "放": 798, + "政": 799, + "效": 800, + "敎": 801, + "教": 802, + "敦": 803, + "敬": 804, + "数": 805, + "文": 806, + "斌": 807, + "斐": 808, + "料": 809, + "斜": 810, + "新": 811, + "斷": 812, + "方": 813, + "於": 814, + "施": 815, + "族": 816, + "日": 817, + "时": 818, + "昌": 819, + "明": 820, + "易": 821, + "星": 822, + "春": 823, + "時": 824, + "晋": 825, + "晓": 826, + "晨": 827, + "景": 828, + "晴": 829, + "暢": 830, + "暴": 831, + "曉": 832, + "更": 833, + "書": 834, + "曹": 835, + "替": 836, + "最": 837, + "會": 838, + "月": 839, + "有": 840, + "服": 841, + "望": 842, + "朝": 843, + "期": 844, + "木": 845, + "未": 846, + "本": 847, + "朮": 848, + "术": 849, + "朱": 850, + "机": 851, + "杂": 852, + "杉": 853, + "李": 854, + "材": 855, + "村": 856, + "杜": 857, + "来": 858, + "杨": 859, + "杭": 860, + "杰": 861, + "東": 862, + "松": 863, + "板": 864, + "构": 865, + "析": 866, + "林": 867, + "果": 868, + "某": 869, + "染": 870, + "查": 871, + "柳": 872, + "査": 873, + "标": 874, + "树": 875, + "校": 876, + "株": 877, + "根": 878, + "格": 879, + "桂": 880, + "案": 881, + "梓": 882, + "梦": 883, + "棄": 884, + "森": 885, + "植": 886, + "検": 887, + "楊": 888, + "楚": 889, + "楠": 890, + "業": 891, + "楮": 892, + "楼": 893, + "概": 894, + "榮": 895, + "構": 896, + "槪": 897, + "槻": 898, + "標": 899, + "樟": 900, + "模": 901, + "樣": 902, + "横": 903, + "樹": 904, + "橋": 905, + "機": 906, + "檢": 907, + "欒": 908, + "次": 909, + "歉": 910, + "歐": 911, + "正": 912, + "武": 913, + "歴": 914, + "歷": 915, + "殊": 916, + "毅": 917, + "毒": 918, + "比": 919, + "氏": 920, + "民": 921, + "气": 922, + "気": 923, + "氣": 924, + "水": 925, + "氷": 926, + "永": 927, + "汚": 928, + "江": 929, + "池": 930, + "污": 931, + "汤": 932, + "汪": 933, + "汶": 934, + "決": 935, + "沈": 936, + "沢": 937, + "河": 938, + "油": 939, + "治": 940, + "沼": 941, + "沿": 942, + "況": 943, + "法": 944, + "波": 945, + "泰": 946, + "洋": 947, + "津": 948, + "洪": 949, + "洱": 950, + "活": 951, + "流": 952, + "浅": 953, + "测": 954, + "济": 955, + "浙": 956, + "海": 957, + "涉": 958, + "涛": 959, + "涵": 960, + "淑": 961, + "淘": 962, + "淡": 963, + "淵": 964, + "混": 965, + "淸": 966, + "淺": 967, + "清": 968, + "済": 969, + "渡": 970, + "測": 971, + "港": 972, + "湖": 973, + "湯": 974, + "源": 975, + "準": 976, + "溫": 977, + "滋": 978, + "滙": 979, + "滿": 980, + "演": 981, + "漢": 982, + "漫": 983, + "潘": 984, + "澤": 985, + "濟": 986, + "濤": 987, + "瀉": 988, + "灣": 989, + "火": 990, + "灼": 991, + "災": 992, + "灾": 993, + "炎": 994, + "炭": 995, + "炮": 996, + "点": 997, + "煌": 998, + "照": 999, + "熱": 1000, + "燕": 1001, + "爲": 1002, + "片": 1003, + "版": 1004, + "牟": 1005, + "物": 1006, + "特": 1007, + "状": 1008, + "獎": 1009, + "獨": 1010, + "獻": 1011, + "玄": 1012, + "玉": 1013, + "王": 1014, + "环": 1015, + "现": 1016, + "玲": 1017, + "珉": 1018, + "珍": 1019, + "現": 1020, + "球": 1021, + "理": 1022, + "琛": 1023, + "瑛": 1024, + "瑶": 1025, + "璐": 1026, + "環": 1027, + "瓷": 1028, + "甘": 1029, + "生": 1030, + "産": 1031, + "用": 1032, + "田": 1033, + "由": 1034, + "甲": 1035, + "电": 1036, + "町": 1037, + "画": 1038, + "界": 1039, + "略": 1040, + "畫": 1041, + "異": 1042, + "疗": 1043, + "疾": 1044, + "病": 1045, + "症": 1046, + "瘡": 1047, + "療": 1048, + "発": 1049, + "登": 1050, + "白": 1051, + "百": 1052, + "的": 1053, + "益": 1054, + "监": 1055, + "盘": 1056, + "盟": 1057, + "監": 1058, + "盤": 1059, + "目": 1060, + "直": 1061, + "相": 1062, + "省": 1063, + "看": 1064, + "県": 1065, + "眞": 1066, + "真": 1067, + "督": 1068, + "瞻": 1069, + "知": 1070, + "石": 1071, + "砂": 1072, + "研": 1073, + "硏": 1074, + "碩": 1075, + "礎": 1076, + "示": 1077, + "社": 1078, + "祐": 1079, + "祝": 1080, + "神": 1081, + "禎": 1082, + "福": 1083, + "秀": 1084, + "私": 1085, + "科": 1086, + "秘": 1087, + "秦": 1088, + "程": 1089, + "稿": 1090, + "穎": 1091, + "穫": 1092, + "究": 1093, + "空": 1094, + "窟": 1095, + "立": 1096, + "章": 1097, + "童": 1098, + "笛": 1099, + "第": 1100, + "等": 1101, + "策": 1102, + "筧": 1103, + "管": 1104, + "節": 1105, + "築": 1106, + "篤": 1107, + "簡": 1108, + "籍": 1109, + "米": 1110, + "粘": 1111, + "系": 1112, + "紀": 1113, + "紅": 1114, + "素": 1115, + "細": 1116, + "組": 1117, + "経": 1118, + "結": 1119, + "經": 1120, + "綜": 1121, + "綠": 1122, + "総": 1123, + "緑": 1124, + "編": 1125, + "縣": 1126, + "總": 1127, + "績": 1128, + "繁": 1129, + "續": 1130, + "红": 1131, + "纪": 1132, + "练": 1133, + "组": 1134, + "经": 1135, + "结": 1136, + "络": 1137, + "绩": 1138, + "综": 1139, + "绿": 1140, + "网": 1141, + "置": 1142, + "美": 1143, + "群": 1144, + "義": 1145, + "習": 1146, + "翔": 1147, + "翟": 1148, + "翰": 1149, + "考": 1150, + "者": 1151, + "耳": 1152, + "联": 1153, + "聚": 1154, + "聪": 1155, + "肅": 1156, + "肖": 1157, + "育": 1158, + "肺": 1159, + "背": 1160, + "胡": 1161, + "能": 1162, + "脈": 1163, + "脉": 1164, + "腑": 1165, + "腔": 1166, + "臟": 1167, + "臣": 1168, + "臨": 1169, + "致": 1170, + "臺": 1171, + "與": 1172, + "興": 1173, + "舒": 1174, + "舞": 1175, + "般": 1176, + "船": 1177, + "良": 1178, + "艺": 1179, + "节": 1180, + "芍": 1181, + "芝": 1182, + "芬": 1183, + "花": 1184, + "芳": 1185, + "苏": 1186, + "苦": 1187, + "英": 1188, + "茂": 1189, + "范": 1190, + "草": 1191, + "药": 1192, + "莞": 1193, + "莫": 1194, + "莹": 1195, + "菁": 1196, + "華": 1197, + "菱": 1198, + "萍": 1199, + "营": 1200, + "萩": 1201, + "萱": 1202, + "落": 1203, + "葉": 1204, + "董": 1205, + "蒙": 1206, + "蒼": 1207, + "蔡": 1208, + "蔭": 1209, + "薇": 1210, + "薛": 1211, + "薬": 1212, + "藍": 1213, + "藤": 1214, + "藥": 1215, + "虚": 1216, + "虛": 1217, + "號": 1218, + "蚁": 1219, + "蚂": 1220, + "融": 1221, + "行": 1222, + "術": 1223, + "街": 1224, + "衛": 1225, + "补": 1226, + "袍": 1227, + "被": 1228, + "裁": 1229, + "補": 1230, + "製": 1231, + "西": 1232, + "要": 1233, + "見": 1234, + "規": 1235, + "視": 1236, + "覺": 1237, + "觀": 1238, + "观": 1239, + "规": 1240, + "视": 1241, + "角": 1242, + "解": 1243, + "觴": 1244, + "訂": 1245, + "計": 1246, + "討": 1247, + "記": 1248, + "訣": 1249, + "設": 1250, + "証": 1251, + "評": 1252, + "詹": 1253, + "誌": 1254, + "語": 1255, + "調": 1256, + "談": 1257, + "論": 1258, + "講": 1259, + "謨": 1260, + "證": 1261, + "議": 1262, + "變": 1263, + "讨": 1264, + "议": 1265, + "许": 1266, + "论": 1267, + "设": 1268, + "证": 1269, + "评": 1270, + "试": 1271, + "课": 1272, + "调": 1273, + "谢": 1274, + "谷": 1275, + "象": 1276, + "財": 1277, + "賃": 1278, + "資": 1279, + "質": 1280, + "财": 1281, + "责": 1282, + "质": 1283, + "贺": 1284, + "资": 1285, + "赤": 1286, + "赵": 1287, + "越": 1288, + "趙": 1289, + "跡": 1290, + "路": 1291, + "践": 1292, + "蹈": 1293, + "軸": 1294, + "較": 1295, + "輝": 1296, + "輯": 1297, + "轉": 1298, + "轶": 1299, + "较": 1300, + "辛": 1301, + "辨": 1302, + "農": 1303, + "辺": 1304, + "辽": 1305, + "达": 1306, + "过": 1307, + "迎": 1308, + "近": 1309, + "进": 1310, + "远": 1311, + "述": 1312, + "迹": 1313, + "选": 1314, + "通": 1315, + "逝": 1316, + "速": 1317, + "造": 1318, + "連": 1319, + "進": 1320, + "運": 1321, + "道": 1322, + "達": 1323, + "遵": 1324, + "選": 1325, + "遺": 1326, + "遼": 1327, + "郎": 1328, + "郑": 1329, + "郞": 1330, + "部": 1331, + "郭": 1332, + "都": 1333, + "鄕": 1334, + "鄧": 1335, + "配": 1336, + "酸": 1337, + "醫": 1338, + "重": 1339, + "野": 1340, + "金": 1341, + "鈴": 1342, + "鉛": 1343, + "鉱": 1344, + "銘": 1345, + "錫": 1346, + "録": 1347, + "鍍": 1348, + "鎭": 1349, + "鏡": 1350, + "鐵": 1351, + "铁": 1352, + "铜": 1353, + "销": 1354, + "锋": 1355, + "長": 1356, + "門": 1357, + "開": 1358, + "関": 1359, + "閣": 1360, + "閻": 1361, + "閾": 1362, + "關": 1363, + "门": 1364, + "问": 1365, + "间": 1366, + "闻": 1367, + "阪": 1368, + "防": 1369, + "阳": 1370, + "阴": 1371, + "阿": 1372, + "附": 1373, + "际": 1374, + "陈": 1375, + "降": 1376, + "院": 1377, + "陰": 1378, + "陳": 1379, + "陶": 1380, + "陽": 1381, + "隆": 1382, + "隐": 1383, + "際": 1384, + "障": 1385, + "隨": 1386, + "險": 1387, + "雄": 1388, + "雅": 1389, + "集": 1390, + "雑": 1391, + "雙": 1392, + "雜": 1393, + "難": 1394, + "雪": 1395, + "雯": 1396, + "雲": 1397, + "雷": 1398, + "震": 1399, + "霖": 1400, + "霞": 1401, + "靑": 1402, + "青": 1403, + "非": 1404, + "面": 1405, + "革": 1406, + "靳": 1407, + "韓": 1408, + "音": 1409, + "韵": 1410, + "頁": 1411, + "須": 1412, + "領": 1413, + "題": 1414, + "類": 1415, + "页": 1416, + "顾": 1417, + "颖": 1418, + "题": 1419, + "颜": 1420, + "風": 1421, + "风": 1422, + "飛": 1423, + "餘": 1424, + "館": 1425, + "饰": 1426, + "馆": 1427, + "香": 1428, + "馬": 1429, + "馮": 1430, + "驗": 1431, + "马": 1432, + "體": 1433, + "高": 1434, + "魁": 1435, + "鮮": 1436, + "鲜": 1437, + "鳥": 1438, + "鷲": 1439, + "鸣": 1440, + "鹽": 1441, + "麗": 1442, + "默": 1443, + "點": 1444, + "齊": 1445, + "齋": 1446, + "龍": 1447, + "龙": 1448, + "가": 1449, + "각": 1450, + "간": 1451, + "갈": 1452, + "감": 1453, + "강": 1454, + "개": 1455, + "객": 1456, + "갱": 1457, + "거": 1458, + "건": 1459, + "검": 1460, + "게": 1461, + "격": 1462, + "견": 1463, + "결": 1464, + "경": 1465, + "계": 1466, + "고": 1467, + "곤": 1468, + "공": 1469, + "과": 1470, + "곽": 1471, + "관": 1472, + "광": 1473, + "괴": 1474, + "교": 1475, + "구": 1476, + "국": 1477, + "군": 1478, + "궁": 1479, + "권": 1480, + "귀": 1481, + "규": 1482, + "균": 1483, + "그": 1484, + "극": 1485, + "근": 1486, + "글": 1487, + "금": 1488, + "급": 1489, + "기": 1490, + "길": 1491, + "김": 1492, + "까": 1493, + "끼": 1494, + "나": 1495, + "낙": 1496, + "난": 1497, + "남": 1498, + "내": 1499, + "낸": 1500, + "너": 1501, + "넌": 1502, + "널": 1503, + "네": 1504, + "넷": 1505, + "녀": 1506, + "년": 1507, + "념": 1508, + "녕": 1509, + "노": 1510, + "녹": 1511, + "논": 1512, + "놀": 1513, + "농": 1514, + "뉴": 1515, + "는": 1516, + "능": 1517, + "니": 1518, + "닝": 1519, + "다": 1520, + "단": 1521, + "달": 1522, + "담": 1523, + "당": 1524, + "대": 1525, + "댄": 1526, + "댓": 1527, + "더": 1528, + "덕": 1529, + "던": 1530, + "덤": 1531, + "데": 1532, + "델": 1533, + "도": 1534, + "독": 1535, + "돈": 1536, + "돌": 1537, + "동": 1538, + "되": 1539, + "된": 1540, + "두": 1541, + "둔": 1542, + "둘": 1543, + "듀": 1544, + "드": 1545, + "득": 1546, + "듣": 1547, + "들": 1548, + "등": 1549, + "디": 1550, + "딥": 1551, + "따": 1552, + "라": 1553, + "란": 1554, + "람": 1555, + "래": 1556, + "랜": 1557, + "램": 1558, + "략": 1559, + "량": 1560, + "러": 1561, + "럼": 1562, + "레": 1563, + "렌": 1564, + "려": 1565, + "력": 1566, + "련": 1567, + "렬": 1568, + "령": 1569, + "례": 1570, + "로": 1571, + "록": 1572, + "론": 1573, + "료": 1574, + "룡": 1575, + "루": 1576, + "룬": 1577, + "류": 1578, + "률": 1579, + "른": 1580, + "를": 1581, + "리": 1582, + "린": 1583, + "림": 1584, + "립": 1585, + "링": 1586, + "마": 1587, + "만": 1588, + "말": 1589, + "망": 1590, + "맞": 1591, + "매": 1592, + "머": 1593, + "먹": 1594, + "먼": 1595, + "메": 1596, + "멘": 1597, + "면": 1598, + "멸": 1599, + "명": 1600, + "몇": 1601, + "모": 1602, + "목": 1603, + "몰": 1604, + "묘": 1605, + "무": 1606, + "묵": 1607, + "문": 1608, + "물": 1609, + "뮤": 1610, + "미": 1611, + "민": 1612, + "밀": 1613, + "및": 1614, + "바": 1615, + "박": 1616, + "반": 1617, + "발": 1618, + "방": 1619, + "배": 1620, + "백": 1621, + "버": 1622, + "벌": 1623, + "범": 1624, + "법": 1625, + "베": 1626, + "벤": 1627, + "벽": 1628, + "변": 1629, + "별": 1630, + "병": 1631, + "보": 1632, + "복": 1633, + "본": 1634, + "봄": 1635, + "봉": 1636, + "부": 1637, + "북": 1638, + "분": 1639, + "붕": 1640, + "뷰": 1641, + "브": 1642, + "블": 1643, + "비": 1644, + "빅": 1645, + "빈": 1646, + "사": 1647, + "산": 1648, + "살": 1649, + "삶": 1650, + "삼": 1651, + "상": 1652, + "새": 1653, + "색": 1654, + "생": 1655, + "서": 1656, + "석": 1657, + "선": 1658, + "설": 1659, + "섬": 1660, + "섭": 1661, + "성": 1662, + "세": 1663, + "셀": 1664, + "션": 1665, + "셜": 1666, + "소": 1667, + "속": 1668, + "손": 1669, + "송": 1670, + "쇼": 1671, + "수": 1672, + "숙": 1673, + "순": 1674, + "술": 1675, + "숭": 1676, + "슈": 1677, + "스": 1678, + "슬": 1679, + "습": 1680, + "승": 1681, + "시": 1682, + "식": 1683, + "신": 1684, + "실": 1685, + "심": 1686, + "십": 1687, + "싼": 1688, + "쓰": 1689, + "아": 1690, + "악": 1691, + "안": 1692, + "않": 1693, + "암": 1694, + "압": 1695, + "앙": 1696, + "애": 1697, + "액": 1698, + "앱": 1699, + "야": 1700, + "양": 1701, + "어": 1702, + "언": 1703, + "얻": 1704, + "얼": 1705, + "엄": 1706, + "업": 1707, + "에": 1708, + "엔": 1709, + "여": 1710, + "역": 1711, + "연": 1712, + "열": 1713, + "염": 1714, + "엽": 1715, + "영": 1716, + "예": 1717, + "오": 1718, + "옥": 1719, + "온": 1720, + "올": 1721, + "옵": 1722, + "와": 1723, + "완": 1724, + "왜": 1725, + "외": 1726, + "요": 1727, + "욕": 1728, + "용": 1729, + "우": 1730, + "욱": 1731, + "운": 1732, + "울": 1733, + "움": 1734, + "웅": 1735, + "워": 1736, + "원": 1737, + "월": 1738, + "웨": 1739, + "웹": 1740, + "위": 1741, + "유": 1742, + "육": 1743, + "윤": 1744, + "율": 1745, + "융": 1746, + "으": 1747, + "은": 1748, + "을": 1749, + "음": 1750, + "응": 1751, + "의": 1752, + "이": 1753, + "익": 1754, + "인": 1755, + "일": 1756, + "임": 1757, + "입": 1758, + "있": 1759, + "잉": 1760, + "자": 1761, + "작": 1762, + "장": 1763, + "재": 1764, + "쟁": 1765, + "저": 1766, + "적": 1767, + "전": 1768, + "절": 1769, + "점": 1770, + "접": 1771, + "젓": 1772, + "정": 1773, + "제": 1774, + "젠": 1775, + "조": 1776, + "족": 1777, + "존": 1778, + "졸": 1779, + "종": 1780, + "좋": 1781, + "죄": 1782, + "주": 1783, + "죽": 1784, + "준": 1785, + "중": 1786, + "즈": 1787, + "증": 1788, + "지": 1789, + "직": 1790, + "진": 1791, + "질": 1792, + "집": 1793, + "징": 1794, + "짜": 1795, + "쪽": 1796, + "차": 1797, + "착": 1798, + "찬": 1799, + "찰": 1800, + "참": 1801, + "창": 1802, + "채": 1803, + "책": 1804, + "처": 1805, + "척": 1806, + "천": 1807, + "철": 1808, + "첩": 1809, + "청": 1810, + "체": 1811, + "쳐": 1812, + "초": 1813, + "촉": 1814, + "촌": 1815, + "총": 1816, + "최": 1817, + "추": 1818, + "축": 1819, + "춘": 1820, + "출": 1821, + "춤": 1822, + "충": 1823, + "취": 1824, + "츄": 1825, + "츠": 1826, + "측": 1827, + "층": 1828, + "치": 1829, + "친": 1830, + "침": 1831, + "카": 1832, + "캠": 1833, + "커": 1834, + "컴": 1835, + "케": 1836, + "켓": 1837, + "코": 1838, + "콘": 1839, + "크": 1840, + "클": 1841, + "키": 1842, + "타": 1843, + "탄": 1844, + "탈": 1845, + "탐": 1846, + "탑": 1847, + "탕": 1848, + "태": 1849, + "택": 1850, + "터": 1851, + "털": 1852, + "테": 1853, + "텍": 1854, + "텐": 1855, + "텔": 1856, + "템": 1857, + "토": 1858, + "통": 1859, + "퇴": 1860, + "투": 1861, + "튜": 1862, + "트": 1863, + "특": 1864, + "티": 1865, + "팅": 1866, + "파": 1867, + "판": 1868, + "패": 1869, + "퍼": 1870, + "페": 1871, + "펴": 1872, + "편": 1873, + "평": 1874, + "포": 1875, + "폰": 1876, + "표": 1877, + "품": 1878, + "풍": 1879, + "퓨": 1880, + "프": 1881, + "플": 1882, + "피": 1883, + "픽": 1884, + "하": 1885, + "학": 1886, + "한": 1887, + "할": 1888, + "함": 1889, + "합": 1890, + "항": 1891, + "해": 1892, + "행": 1893, + "향": 1894, + "허": 1895, + "헌": 1896, + "험": 1897, + "혁": 1898, + "현": 1899, + "협": 1900, + "형": 1901, + "혜": 1902, + "호": 1903, + "혼": 1904, + "홍": 1905, + "화": 1906, + "확": 1907, + "환": 1908, + "활": 1909, + "황": 1910, + "회": 1911, + "획": 1912, + "효": 1913, + "후": 1914, + "훈": 1915, + "흠": 1916, + "흡": 1917, + "흥": 1918, + "희": 1919, + "": 1920, + "": 1921, + "金": 1922, + "濫": 1923, + "綠": 1924, + "錄": 1925, + "論": 1926, + "菱": 1927, + "讀": 1928, + "了": 1929, + "李": 1930, + "林": 1931, + "臨": 1932, + "拓": 1933, + "ff": 1934, + "fi": 1935, + "fl": 1936, + "�": 1937 + }, + "tagVocab": { + "<PAD>": 0, + "B-<author>": 1, + "B-<booktitle>": 2, + "B-<collaboration>": 3, + "B-<date>": 4, + "B-<editor>": 5, + "B-<institution>": 6, + "B-<issue>": 7, + "B-<journal>": 8, + "B-<location>": 9, + "B-<note>": 10, + "B-<pages>": 11, + "B-<publisher>": 12, + "B-<pubnum>": 13, + "B-<series>": 14, + "B-<tech>": 15, + "B-<title>": 16, + "B-<volume>": 17, + "B-<web>": 18, + "I-<author>": 19, + "I-<booktitle>": 20, + "I-<collaboration>": 21, + "I-<date>": 22, + "I-<editor>": 23, + "I-<institution>": 24, + "I-<issue>": 25, + "I-<journal>": 26, + "I-<location>": 27, + "I-<note>": 28, + "I-<pages>": 29, + "I-<publisher>": 30, + "I-<pubnum>": 31, + "I-<series>": 32, + "I-<tech>": 33, + "I-<title>": 34, + "I-<volume>": 35, + "I-<web>": 36, + "O": 37 + }, + "tagIndex": { + "0": "<PAD>", + "1": "B-<author>", + "2": "B-<booktitle>", + "3": "B-<collaboration>", + "4": "B-<date>", + "5": "B-<editor>", + "6": "B-<institution>", + "7": "B-<issue>", + "8": "B-<journal>", + "9": "B-<location>", + "10": "B-<note>", + "11": "B-<pages>", + "12": "B-<publisher>", + "13": "B-<pubnum>", + "14": "B-<series>", + "15": "B-<tech>", + "16": "B-<title>", + "17": "B-<volume>", + "18": "B-<web>", + "19": "I-<author>", + "20": "I-<booktitle>", + "21": "I-<collaboration>", + "22": "I-<date>", + "23": "I-<editor>", + "24": "I-<institution>", + "25": "I-<issue>", + "26": "I-<journal>", + "27": "I-<location>", + "28": "I-<note>", + "29": "I-<pages>", + "30": "I-<publisher>", + "31": "I-<pubnum>", + "32": "I-<series>", + "33": "I-<tech>", + "34": "I-<title>", + "35": "I-<volume>", + "36": "I-<web>", + "37": "O" + }, + "maxCharLength": 30, + "returnChars": false, + "featuresIndices": [ + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27 + ], + "featuresVocabularySize": 12, + "featuresMapToIndex": { + "9": { + "LINEEND": 1, + "LINEIN": 2, + "LINESTART": 3 + }, + "10": { + "ALLCAP": 13, + "INITCAP": 14, + "NOCAPS": 15 + }, + "11": { + "ALLDIGIT": 25, + "CONTAINSDIGITS": 26, + "NODIGIT": 27 + }, + "12": { + "0": 37, + "1": 38 + }, + "13": { + "0": 49, + "1": 50 + }, + "14": { + "0": 61, + "1": 62 + }, + "15": { + "0": 73, + "1": 74 + }, + "16": { + "0": 85, + "1": 86 + }, + "17": { + "0": 97, + "1": 98 + }, + "18": { + "0": 109, + "1": 110 + }, + "19": { + "0": 121, + "1": 122 + }, + "20": { + "0": 133, + "1": 134 + }, + "21": { + "0": 145, + "1": 146 + }, + "22": { + "0": 157, + "1": 158 + }, + "23": { + "0": 169, + "1": 170 + }, + "24": { + "0": 181, + "1": 182 + }, + "25": { + "0": 193, + "1": 194 + }, + "26": { + "COMMA": 205, + "DOT": 206, + "ENDBRACKET": 207, + "HYPHEN": 208, + "NOPUNCT": 209, + "OPENBRACKET": 210, + "PUNCT": 211, + "QUOTE": 212 + }, + "27": { + "0": 217, + "1": 218, + "10": 219, + "11": 220, + "2": 221, + "3": 222, + "4": 223, + "5": 224, + "6": 225, + "7": 226, + "8": 227, + "9": 228 + } + } +} \ No newline at end of file diff --git a/grobid-home/models/citation-BidLSTM_CRF_FEATURES/config.json b/grobid-home/models/citation-BidLSTM_CRF_FEATURES/config.json deleted file mode 100644 index cd04b8c064..0000000000 --- a/grobid-home/models/citation-BidLSTM_CRF_FEATURES/config.json +++ /dev/null @@ -1,143 +0,0 @@ -{ - "model_name": "citation-BidLSTM_CRF_FEATURES", - "architecture": "BidLSTM_CRF_FEATURES", - "embeddings_name": "glove-840B", - "char_vocab_size": 1939, - "case_vocab_size": 8, - "char_embedding_size": 25, - "num_char_lstm_units": 25, - "max_char_length": 30, - "features_vocabulary_size": 12, - "features_indices": [ - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20, - 21, - 22, - 23, - 24, - 25, - 26, - 27 - ], - "features_embedding_size": 4, - "features_lstm_units": 4, - "max_sequence_length": 500, - "word_embedding_size": 300, - "num_word_lstm_units": 100, - "case_embedding_size": 5, - "dropout": 0.5, - "recurrent_dropout": 0.5, - "use_crf": true, - "use_chain_crf": false, - "fold_number": 1, - "batch_size": 50, - "transformer_name": null, - "use_ELMo": false, - "features_map_to_index": { - "9": { - "LINEEND": 1, - "LINEIN": 2, - "LINESTART": 3 - }, - "10": { - "ALLCAP": 13, - "INITCAP": 14, - "NOCAPS": 15 - }, - "11": { - "ALLDIGIT": 25, - "CONTAINSDIGITS": 26, - "NODIGIT": 27 - }, - "12": { - "0": 37, - "1": 38 - }, - "13": { - "0": 49, - "1": 50 - }, - "14": { - "0": 61, - "1": 62 - }, - "15": { - "0": 73, - "1": 74 - }, - "16": { - "0": 85, - "1": 86 - }, - "17": { - "0": 97, - "1": 98 - }, - "18": { - "0": 109, - "1": 110 - }, - "19": { - "0": 121, - "1": 122 - }, - "20": { - "0": 133, - "1": 134 - }, - "21": { - "0": 145, - "1": 146 - }, - "22": { - "0": 157, - "1": 158 - }, - "23": { - "0": 169, - "1": 170 - }, - "24": { - "0": 181, - "1": 182 - }, - "25": { - "0": 193, - "1": 194 - }, - "26": { - "COMMA": 205, - "DOT": 206, - "ENDBRACKET": 207, - "HYPHEN": 208, - "NOPUNCT": 209, - "OPENBRACKET": 210, - "PUNCT": 211, - "QUOTE": 212 - }, - "27": { - "0": 217, - "1": 218, - "10": 219, - "11": 220, - "2": 221, - "3": 222, - "4": 223, - "5": 224, - "6": 225, - "7": 226, - "8": 227, - "9": 228 - } - } -} \ No newline at end of file diff --git a/grobid-home/models/citation-BidLSTM_CRF_FEATURES/model_weights.hdf5 b/grobid-home/models/citation-BidLSTM_CRF_FEATURES/model_weights.hdf5 deleted file mode 100644 index 3ce5e1e951..0000000000 Binary files a/grobid-home/models/citation-BidLSTM_CRF_FEATURES/model_weights.hdf5 and /dev/null differ diff --git a/grobid-home/models/citation-BidLSTM_CRF_FEATURES/preprocessor.json b/grobid-home/models/citation-BidLSTM_CRF_FEATURES/preprocessor.json deleted file mode 100644 index dac07cb7b2..0000000000 --- a/grobid-home/models/citation-BidLSTM_CRF_FEATURES/preprocessor.json +++ /dev/null @@ -1,2162 +0,0 @@ -{ - "padding": true, - "return_lengths": true, - "return_word_embeddings": true, - "return_casing": false, - "return_features": true, - "return_chars": true, - "return_bert_embeddings": false, - "vocab_char": { - "<PAD>": 0, - "<UNK>": 1, - "!": 2, - "\"": 3, - "#": 4, - "$": 5, - "%": 6, - "&": 7, - "'": 8, - "(": 9, - ")": 10, - "*": 11, - "+": 12, - ",": 13, - "-": 14, - ".": 15, - "/": 16, - "0": 17, - "1": 18, - "2": 19, - "3": 20, - "4": 21, - "5": 22, - "6": 23, - "7": 24, - "8": 25, - "9": 26, - ":": 27, - ";": 28, - "<": 29, - "=": 30, - ">": 31, - "?": 32, - "@": 33, - "A": 34, - "B": 35, - "C": 36, - "D": 37, - "E": 38, - "F": 39, - "G": 40, - "H": 41, - "I": 42, - "J": 43, - "K": 44, - "L": 45, - "M": 46, - "N": 47, - "O": 48, - "P": 49, - "Q": 50, - "R": 51, - "S": 52, - "T": 53, - "U": 54, - "V": 55, - "W": 56, - "X": 57, - "Y": 58, - "Z": 59, - "[": 60, - "\\": 61, - "]": 62, - "^": 63, - "_": 64, - "`": 65, - "a": 66, - "b": 67, - "c": 68, - "d": 69, - "e": 70, - "f": 71, - "g": 72, - "h": 73, - "i": 74, - "j": 75, - "k": 76, - "l": 77, - "m": 78, - "n": 79, - "o": 80, - "p": 81, - "q": 82, - "r": 83, - "s": 84, - "t": 85, - "u": 86, - "v": 87, - "w": 88, - "x": 89, - "y": 90, - "z": 91, - "|": 92, - "~": 93, - "\u0084": 94, - "\u0086": 95, - "\u0092": 96, - "\u0096": 97, - "\u00a1": 98, - "\u00a2": 99, - "\u00a4": 100, - "\u00a7": 101, - "\u00a8": 102, - "\u00a9": 103, - "\u00ab": 104, - "\u00ad": 105, - "\u00af": 106, - "\u00b0": 107, - "\u00b1": 108, - "\u00b2": 109, - "\u00b3": 110, - "\u00b4": 111, - "\u00b5": 112, - "\u00b8": 113, - "\u00ba": 114, - "\u00bb": 115, - "\u00bc": 116, - "\u00bf": 117, - "\u00c0": 118, - "\u00c1": 119, - "\u00c2": 120, - "\u00c3": 121, - "\u00c5": 122, - "\u00c8": 123, - "\u00c9": 124, - "\u00ca": 125, - "\u00ce": 126, - "\u00cf": 127, - "\u00d4": 128, - "\u00d6": 129, - "\u00d7": 130, - "\u00d8": 131, - "\u00dc": 132, - "\u00df": 133, - "\u00e0": 134, - "\u00e1": 135, - "\u00e2": 136, - "\u00e3": 137, - "\u00e4": 138, - "\u00e7": 139, - "\u00e8": 140, - "\u00e9": 141, - "\u00ea": 142, - "\u00ec": 143, - "\u00ed": 144, - "\u00ee": 145, - "\u00ef": 146, - "\u00f1": 147, - "\u00f2": 148, - "\u00f3": 149, - "\u00f4": 150, - "\u00f5": 151, - "\u00f6": 152, - "\u00f8": 153, - "\u00fa": 154, - "\u00fb": 155, - "\u00fc": 156, - "\u00fd": 157, - "\u0102": 158, - "\u0105": 159, - "\u0107": 160, - "\u010c": 161, - "\u010d": 162, - "\u0117": 163, - "\u0119": 164, - "\u011f": 165, - "\u0130": 166, - "\u0131": 167, - "\u0132": 168, - "\u013e": 169, - "\u0142": 170, - "\u0144": 171, - "\u0159": 172, - "\u015b": 173, - "\u0161": 174, - "\u016b": 175, - "\u0179": 176, - "\u017d": 177, - "\u017e": 178, - "\u02c6": 179, - "\u02c7": 180, - "\u02dc": 181, - "\u0301": 182, - "\u039a": 183, - "\u039b": 184, - "\u039e": 185, - "\u03a0": 186, - "\u03b1": 187, - "\u03b2": 188, - "\u03b3": 189, - "\u03b5": 190, - "\u03b7": 191, - "\u03b8": 192, - "\u03b9": 193, - "\u03bb": 194, - "\u03bd": 195, - "\u03be": 196, - "\u03bf": 197, - "\u03c0": 198, - "\u03c1": 199, - "\u03c2": 200, - "\u03c3": 201, - "\u03c4": 202, - "\u03c6": 203, - "\u03e9": 204, - "\u0410": 205, - "\u0411": 206, - "\u0412": 207, - "\u0414": 208, - "\u0415": 209, - "\u0418": 210, - "\u041a": 211, - "\u041b": 212, - "\u041c": 213, - "\u041d": 214, - "\u041e": 215, - "\u041f": 216, - "\u0420": 217, - "\u0421": 218, - "\u0422": 219, - "\u0425": 220, - "\u0427": 221, - "\u042f": 222, - "\u0430": 223, - "\u0431": 224, - "\u0432": 225, - "\u0433": 226, - "\u0434": 227, - "\u0435": 228, - "\u0436": 229, - "\u0437": 230, - "\u0438": 231, - "\u0439": 232, - "\u043a": 233, - "\u043b": 234, - "\u043c": 235, - "\u043d": 236, - "\u043e": 237, - "\u043f": 238, - "\u0440": 239, - "\u0441": 240, - "\u0442": 241, - "\u0443": 242, - "\u0444": 243, - "\u0445": 244, - "\u0446": 245, - "\u0447": 246, - "\u0449": 247, - "\u044a": 248, - "\u044b": 249, - "\u044c": 250, - "\u044d": 251, - "\u044e": 252, - "\u044f": 253, - "\u1390": 254, - "\u1f04": 255, - "\u1f14": 256, - "\u1f73": 257, - "\u1f79": 258, - "\u2018": 259, - "\u2019": 260, - "\u201a": 261, - "\u201c": 262, - "\u201d": 263, - "\u201e": 264, - "\u2022": 265, - "\u2024": 266, - "\u2026": 267, - "\u2113": 268, - "\u2192": 269, - "\u21b5": 270, - "\u2202": 271, - "\u2206": 272, - "\u2212": 273, - "\u221a": 274, - "\u221d": 275, - "\u221e": 276, - "\u223c": 277, - "\u2245": 278, - "\u3001": 279, - "\u3042": 280, - "\u3044": 281, - "\u3048": 282, - "\u304a": 283, - "\u304b": 284, - "\u3050": 285, - "\u3051": 286, - "\u3052": 287, - "\u3053": 288, - "\u3055": 289, - "\u3057": 290, - "\u3059": 291, - "\u305f": 292, - "\u3061": 293, - "\u3063": 294, - "\u3064": 295, - "\u3065": 296, - "\u3066": 297, - "\u3067": 298, - "\u3068": 299, - "\u306a": 300, - "\u306b": 301, - "\u306e": 302, - "\u3073": 303, - "\u307e": 304, - "\u307f": 305, - "\u3080": 306, - "\u3081": 307, - "\u3086": 308, - "\u3088": 309, - "\u3089": 310, - "\u308a": 311, - "\u308b": 312, - "\u308c": 313, - "\u308f": 314, - "\u3092": 315, - "\u30a1": 316, - "\u30a2": 317, - "\u30a3": 318, - "\u30a4": 319, - "\u30a6": 320, - "\u30a7": 321, - "\u30a8": 322, - "\u30ab": 323, - "\u30ac": 324, - "\u30ad": 325, - "\u30af": 326, - "\u30b0": 327, - "\u30b3": 328, - "\u30b4": 329, - "\u30b7": 330, - "\u30b8": 331, - "\u30b9": 332, - "\u30ba": 333, - "\u30bb": 334, - "\u30bd": 335, - "\u30bf": 336, - "\u30c1": 337, - "\u30c3": 338, - "\u30c6": 339, - "\u30c7": 340, - "\u30c8": 341, - "\u30c9": 342, - "\u30ca": 343, - "\u30cb": 344, - "\u30cd": 345, - "\u30ce": 346, - "\u30cf": 347, - "\u30d0": 348, - "\u30d1": 349, - "\u30d2": 350, - "\u30d5": 351, - "\u30d6": 352, - "\u30d7": 353, - "\u30d9": 354, - "\u30df": 355, - "\u30e0": 356, - "\u30e1": 357, - "\u30e2": 358, - "\u30e5": 359, - "\u30e7": 360, - "\u30e9": 361, - "\u30ea": 362, - "\u30eb": 363, - "\u30ec": 364, - "\u30ed": 365, - "\u30ef": 366, - "\u30f3": 367, - "\u30fb": 368, - "\u30fc": 369, - "\u4e00": 370, - "\u4e01": 371, - "\u4e09": 372, - "\u4e0a": 373, - "\u4e0b": 374, - "\u4e0e": 375, - "\u4e16": 376, - "\u4e18": 377, - "\u4e1a": 378, - "\u4e1c": 379, - "\u4e2a": 380, - "\u4e2d": 381, - "\u4e34": 382, - "\u4e3a": 383, - "\u4e3b": 384, - "\u4e3d": 385, - "\u4e45": 386, - "\u4e4b": 387, - "\u4e50": 388, - "\u4e5f": 389, - "\u4e60": 390, - "\u4e66": 391, - "\u4e7e": 392, - "\u4e88": 393, - "\u4e8b": 394, - "\u4e8c": 395, - "\u4e8e": 396, - "\u4e91": 397, - "\u4e92": 398, - "\u4e94": 399, - "\u4e95": 400, - "\u4e9e": 401, - "\u4ea4": 402, - "\u4ea5": 403, - "\u4ea7": 404, - "\u4eac": 405, - "\u4eae": 406, - "\u4eba": 407, - "\u4ec1": 408, - "\u4eca": 409, - "\u4ecb": 410, - "\u4ece": 411, - "\u4ed9": 412, - "\u4ee3": 413, - "\u4ee5": 414, - "\u4ef2": 415, - "\u4ef7": 416, - "\u4efb": 417, - "\u4eff": 418, - "\u4f01": 419, - "\u4f0a": 420, - "\u4f0d": 421, - "\u4f17": 422, - "\u4f1a": 423, - "\u4f1f": 424, - "\u4f20": 425, - "\u4f2f": 426, - "\u4f38": 427, - "\u4f3d": 428, - "\u4f46": 429, - "\u4f4d": 430, - "\u4f4f": 431, - "\u4f53": 432, - "\u4f55": 433, - "\u4f5c": 434, - "\u4f73": 435, - "\u4f75": 436, - "\u4f7f": 437, - "\u4f86": 438, - "\u4f8b": 439, - "\u4f9b": 440, - "\u4fa1": 441, - "\u4fbf": 442, - "\u4fc2": 443, - "\u4fc4": 444, - "\u4fca": 445, - "\u4fdd": 446, - "\u4fe1": 447, - "\u4fee": 448, - "\u5009": 449, - "\u5029": 450, - "\u502a": 451, - "\u502b": 452, - "\u503b": 453, - "\u503c": 454, - "\u5049": 455, - "\u507f": 456, - "\u50b3": 457, - "\u50b7": 458, - "\u50be": 459, - "\u5143": 460, - "\u5146": 461, - "\u5149": 462, - "\u514b": 463, - "\u5165": 464, - "\u5167": 465, - "\u5168": 466, - "\u516b": 467, - "\u516c": 468, - "\u516d": 469, - "\u5171": 470, - "\u5173": 471, - "\u5175": 472, - "\u5176": 473, - "\u5178": 474, - "\u517c": 475, - "\u5185": 476, - "\u518d": 477, - "\u51a0": 478, - "\u51ac": 479, - "\u51e0": 480, - "\u51e6": 481, - "\u51fa": 482, - "\u5206": 483, - "\u520a": 484, - "\u5212": 485, - "\u5218": 486, - "\u521a": 487, - "\u521b": 488, - "\u521d": 489, - "\u5224": 490, - "\u5229": 491, - "\u5230": 492, - "\u5236": 493, - "\u5238": 494, - "\u523b": 495, - "\u524d": 496, - "\u525b": 497, - "\u5267": 498, - "\u5275": 499, - "\u5289": 500, - "\u529f": 501, - "\u52a0": 502, - "\u52a8": 503, - "\u52c9": 504, - "\u52d5": 505, - "\u52d9": 506, - "\u52dd": 507, - "\u52f5": 508, - "\u5316": 509, - "\u5317": 510, - "\u5331": 511, - "\u533a": 512, - "\u533b": 513, - "\u5340": 514, - "\u5341": 515, - "\u5343": 516, - "\u534a": 517, - "\u534e": 518, - "\u5353": 519, - "\u5354": 520, - "\u5357": 521, - "\u5360": 522, - "\u536b": 523, - "\u5371": 524, - "\u5377": 525, - "\u5385": 526, - "\u539f": 527, - "\u53bb": 528, - "\u53c2": 529, - "\u53c3": 530, - "\u53c9": 531, - "\u53ca": 532, - "\u53d1": 533, - "\u53d6": 534, - "\u53d7": 535, - "\u53d8": 536, - "\u53e2": 537, - "\u53e3": 538, - "\u53e4": 539, - "\u53e5": 540, - "\u53ef": 541, - "\u53f2": 542, - "\u53f7": 543, - "\u53f8": 544, - "\u5408": 545, - "\u5409": 546, - "\u540c": 547, - "\u540d": 548, - "\u540e": 549, - "\u5411": 550, - "\u542c": 551, - "\u542f": 552, - "\u5433": 553, - "\u5434": 554, - "\u543e": 555, - "\u5448": 556, - "\u544a": 557, - "\u5468": 558, - "\u548c": 559, - "\u54c1": 560, - "\u54c9": 561, - "\u54cd": 562, - "\u54e1": 563, - "\u54f2": 564, - "\u5546": 565, - "\u554f": 566, - "\u5578": 567, - "\u5584": 568, - "\u559c": 569, - "\u55aa": 570, - "\u55ac": 571, - "\u55b6": 572, - "\u5609": 573, - "\u5668": 574, - "\u56db": 575, - "\u56de": 576, - "\u56e0": 577, - "\u56f3": 578, - "\u56f4": 579, - "\u56fd": 580, - "\u56fe": 581, - "\u570b": 582, - "\u5712": 583, - "\u5716": 584, - "\u571f": 585, - "\u5728": 586, - "\u5730": 587, - "\u573a": 588, - "\u5740": 589, - "\u5742": 590, - "\u575a": 591, - "\u575b": 592, - "\u5764": 593, - "\u578b": 594, - "\u57ce": 595, - "\u57df": 596, - "\u57f9": 597, - "\u57fa": 598, - "\u5802": 599, - "\u582f": 600, - "\u5831": 601, - "\u5859": 602, - "\u585a": 603, - "\u5869": 604, - "\u5883": 605, - "\u589e": 606, - "\u58c1": 607, - "\u58c7": 608, - "\u58eb": 609, - "\u5909": 610, - "\u590d": 611, - "\u590f": 612, - "\u5916": 613, - "\u591a": 614, - "\u5927": 615, - "\u5929": 616, - "\u592a": 617, - "\u592b": 618, - "\u592d": 619, - "\u5947": 620, - "\u5973": 621, - "\u598d": 622, - "\u59cb": 623, - "\u59d4": 624, - "\u59da": 625, - "\u59dc": 626, - "\u5a07": 627, - "\u5a1f": 628, - "\u5a67": 629, - "\u5a77": 630, - "\u5a92": 631, - "\u5b00": 632, - "\u5b50": 633, - "\u5b58": 634, - "\u5b59": 635, - "\u5b5f": 636, - "\u5b63": 637, - "\u5b66": 638, - "\u5b6b": 639, - "\u5b78": 640, - "\u5b81": 641, - "\u5b87": 642, - "\u5b89": 643, - "\u5b8b": 644, - "\u5b8c": 645, - "\u5b8f": 646, - "\u5b97": 647, - "\u5b99": 648, - "\u5b9a": 649, - "\u5b9c": 650, - "\u5b9d": 651, - "\u5b9e": 652, - "\u5b9f": 653, - "\u5bb3": 654, - "\u5bb6": 655, - "\u5bb9": 656, - "\u5bbf": 657, - "\u5bc5": 658, - "\u5bd2": 659, - "\u5bdf": 660, - "\u5be6": 661, - "\u5be7": 662, - "\u5bf8": 663, - "\u5bf9": 664, - "\u5bfa": 665, - "\u5bfc": 666, - "\u5bff": 667, - "\u5c02": 668, - "\u5c0a": 669, - "\u5c0d": 670, - "\u5c0f": 671, - "\u5c11": 672, - "\u5c19": 673, - "\u5c1a": 674, - "\u5c31": 675, - "\u5c3b": 676, - "\u5c3d": 677, - "\u5c3e": 678, - "\u5c45": 679, - "\u5c4b": 680, - "\u5c55": 681, - "\u5c71": 682, - "\u5ca1": 683, - "\u5cad": 684, - "\u5cb8": 685, - "\u5cf0": 686, - "\u5cf6": 687, - "\u5d0e": 688, - "\u5d17": 689, - "\u5dba": 690, - "\u5ddd": 691, - "\u5dde": 692, - "\u5de5": 693, - "\u5deb": 694, - "\u5dee": 695, - "\u5dfb": 696, - "\u5e02": 697, - "\u5e08": 698, - "\u5e2b": 699, - "\u5e38": 700, - "\u5e3d": 701, - "\u5e73": 702, - "\u5e74": 703, - "\u5e78": 704, - "\u5e7f": 705, - "\u5e83": 706, - "\u5e86": 707, - "\u5e8a": 708, - "\u5e94": 709, - "\u5e97": 710, - "\u5e9c": 711, - "\u5ea6": 712, - "\u5eb7": 713, - "\u5eb8": 714, - "\u5ebe": 715, - "\u5ee2": 716, - "\u5ee3": 717, - "\u5efa": 718, - "\u5f00": 719, - "\u5f0f": 720, - "\u5f15": 721, - "\u5f18": 722, - "\u5f20": 723, - "\u5f25": 724, - "\u5f35": 725, - "\u5f52": 726, - "\u5f53": 727, - "\u5f62": 728, - "\u5f64": 729, - "\u5f66": 730, - "\u5f6c": 731, - "\u5f6d": 732, - "\u5f71": 733, - "\u5f81": 734, - "\u5f8b": 735, - "\u5f90": 736, - "\u5f9e": 737, - "\u5fa9": 738, - "\u5fae": 739, - "\u5fb3": 740, - "\u5fb7": 741, - "\u5fbd": 742, - "\u5fc3": 743, - "\u5fcd": 744, - "\u5fd7": 745, - "\u5fe0": 746, - "\u5feb": 747, - "\u5ff5": 748, - "\u6001": 749, - "\u601d": 750, - "\u6027": 751, - "\u606d": 752, - "\u606f": 753, - "\u6096": 754, - "\u60a0": 755, - "\u60bc": 756, - "\u60c5": 757, - "\u60e0": 758, - "\u60f3": 759, - "\u610f": 760, - "\u611f": 761, - "\u614b": 762, - "\u6167": 763, - "\u6176": 764, - "\u61c9": 765, - "\u6210": 766, - "\u6211": 767, - "\u6226": 768, - "\u6230": 769, - "\u6236": 770, - "\u6238": 771, - "\u623f": 772, - "\u6240": 773, - "\u6248": 774, - "\u6253": 775, - "\u6255": 776, - "\u627f": 777, - "\u6280": 778, - "\u62a5": 779, - "\u62e1": 780, - "\u62e9": 781, - "\u6301": 782, - "\u6307": 783, - "\u6311": 784, - "\u6377": 785, - "\u6392": 786, - "\u639b": 787, - "\u63a2": 788, - "\u63a5": 789, - "\u63a7": 790, - "\u63a8": 791, - "\u63aa": 792, - "\u63e3": 793, - "\u6447": 794, - "\u64a5": 795, - "\u64ad": 796, - "\u652f": 797, - "\u6539": 798, - "\u653e": 799, - "\u653f": 800, - "\u6548": 801, - "\u654e": 802, - "\u6559": 803, - "\u6566": 804, - "\u656c": 805, - "\u6570": 806, - "\u6587": 807, - "\u658c": 808, - "\u6590": 809, - "\u6599": 810, - "\u659c": 811, - "\u65b0": 812, - "\u65b7": 813, - "\u65b9": 814, - "\u65bc": 815, - "\u65bd": 816, - "\u65cf": 817, - "\u65e5": 818, - "\u65f6": 819, - "\u660c": 820, - "\u660e": 821, - "\u6613": 822, - "\u661f": 823, - "\u6625": 824, - "\u6642": 825, - "\u664b": 826, - "\u6653": 827, - "\u6668": 828, - "\u666f": 829, - "\u6674": 830, - "\u66a2": 831, - "\u66b4": 832, - "\u66c9": 833, - "\u66f4": 834, - "\u66f8": 835, - "\u66f9": 836, - "\u66ff": 837, - "\u6700": 838, - "\u6703": 839, - "\u6708": 840, - "\u6709": 841, - "\u670d": 842, - "\u671b": 843, - "\u671d": 844, - "\u671f": 845, - "\u6728": 846, - "\u672a": 847, - "\u672c": 848, - "\u672e": 849, - "\u672f": 850, - "\u6731": 851, - "\u673a": 852, - "\u6742": 853, - "\u6749": 854, - "\u674e": 855, - "\u6750": 856, - "\u6751": 857, - "\u675c": 858, - "\u6765": 859, - "\u6768": 860, - "\u676d": 861, - "\u6770": 862, - "\u6771": 863, - "\u677e": 864, - "\u677f": 865, - "\u6784": 866, - "\u6790": 867, - "\u6797": 868, - "\u679c": 869, - "\u67d0": 870, - "\u67d3": 871, - "\u67e5": 872, - "\u67f3": 873, - "\u67fb": 874, - "\u6807": 875, - "\u6811": 876, - "\u6821": 877, - "\u682a": 878, - "\u6839": 879, - "\u683c": 880, - "\u6842": 881, - "\u6848": 882, - "\u6893": 883, - "\u68a6": 884, - "\u68c4": 885, - "\u68ee": 886, - "\u690d": 887, - "\u691c": 888, - "\u694a": 889, - "\u695a": 890, - "\u6960": 891, - "\u696d": 892, - "\u696e": 893, - "\u697c": 894, - "\u6982": 895, - "\u69ae": 896, - "\u69cb": 897, - "\u69ea": 898, - "\u69fb": 899, - "\u6a19": 900, - "\u6a1f": 901, - "\u6a21": 902, - "\u6a23": 903, - "\u6a2a": 904, - "\u6a39": 905, - "\u6a4b": 906, - "\u6a5f": 907, - "\u6aa2": 908, - "\u6b12": 909, - "\u6b21": 910, - "\u6b49": 911, - "\u6b50": 912, - "\u6b63": 913, - "\u6b66": 914, - "\u6b74": 915, - "\u6b77": 916, - "\u6b8a": 917, - "\u6bc5": 918, - "\u6bd2": 919, - "\u6bd4": 920, - "\u6c0f": 921, - "\u6c11": 922, - "\u6c14": 923, - "\u6c17": 924, - "\u6c23": 925, - "\u6c34": 926, - "\u6c37": 927, - "\u6c38": 928, - "\u6c5a": 929, - "\u6c5f": 930, - "\u6c60": 931, - "\u6c61": 932, - "\u6c64": 933, - "\u6c6a": 934, - "\u6c76": 935, - "\u6c7a": 936, - "\u6c88": 937, - "\u6ca2": 938, - "\u6cb3": 939, - "\u6cb9": 940, - "\u6cbb": 941, - "\u6cbc": 942, - "\u6cbf": 943, - "\u6cc1": 944, - "\u6cd5": 945, - "\u6ce2": 946, - "\u6cf0": 947, - "\u6d0b": 948, - "\u6d25": 949, - "\u6d2a": 950, - "\u6d31": 951, - "\u6d3b": 952, - "\u6d41": 953, - "\u6d45": 954, - "\u6d4b": 955, - "\u6d4e": 956, - "\u6d59": 957, - "\u6d77": 958, - "\u6d89": 959, - "\u6d9b": 960, - "\u6db5": 961, - "\u6dd1": 962, - "\u6dd8": 963, - "\u6de1": 964, - "\u6df5": 965, - "\u6df7": 966, - "\u6df8": 967, - "\u6dfa": 968, - "\u6e05": 969, - "\u6e08": 970, - "\u6e21": 971, - "\u6e2c": 972, - "\u6e2f": 973, - "\u6e56": 974, - "\u6e6f": 975, - "\u6e90": 976, - "\u6e96": 977, - "\u6eab": 978, - "\u6ecb": 979, - "\u6ed9": 980, - "\u6eff": 981, - "\u6f14": 982, - "\u6f22": 983, - "\u6f2b": 984, - "\u6f58": 985, - "\u6fa4": 986, - "\u6fdf": 987, - "\u6fe4": 988, - "\u7009": 989, - "\u7063": 990, - "\u706b": 991, - "\u707c": 992, - "\u707d": 993, - "\u707e": 994, - "\u708e": 995, - "\u70ad": 996, - "\u70ae": 997, - "\u70b9": 998, - "\u714c": 999, - "\u7167": 1000, - "\u71b1": 1001, - "\u71d5": 1002, - "\u7232": 1003, - "\u7247": 1004, - "\u7248": 1005, - "\u725f": 1006, - "\u7269": 1007, - "\u7279": 1008, - "\u72b6": 1009, - "\u734e": 1010, - "\u7368": 1011, - "\u737b": 1012, - "\u7384": 1013, - "\u7389": 1014, - "\u738b": 1015, - "\u73af": 1016, - "\u73b0": 1017, - "\u73b2": 1018, - "\u73c9": 1019, - "\u73cd": 1020, - "\u73fe": 1021, - "\u7403": 1022, - "\u7406": 1023, - "\u741b": 1024, - "\u745b": 1025, - "\u7476": 1026, - "\u7490": 1027, - "\u74b0": 1028, - "\u74f7": 1029, - "\u7518": 1030, - "\u751f": 1031, - "\u7523": 1032, - "\u7528": 1033, - "\u7530": 1034, - "\u7531": 1035, - "\u7532": 1036, - "\u7535": 1037, - "\u753a": 1038, - "\u753b": 1039, - "\u754c": 1040, - "\u7565": 1041, - "\u756b": 1042, - "\u7570": 1043, - "\u7597": 1044, - "\u75be": 1045, - "\u75c5": 1046, - "\u75c7": 1047, - "\u7621": 1048, - "\u7642": 1049, - "\u767a": 1050, - "\u767b": 1051, - "\u767d": 1052, - "\u767e": 1053, - "\u7684": 1054, - "\u76ca": 1055, - "\u76d1": 1056, - "\u76d8": 1057, - "\u76df": 1058, - "\u76e3": 1059, - "\u76e4": 1060, - "\u76ee": 1061, - "\u76f4": 1062, - "\u76f8": 1063, - "\u7701": 1064, - "\u770b": 1065, - "\u770c": 1066, - "\u771e": 1067, - "\u771f": 1068, - "\u7763": 1069, - "\u77bb": 1070, - "\u77e5": 1071, - "\u77f3": 1072, - "\u7802": 1073, - "\u7814": 1074, - "\u784f": 1075, - "\u78a9": 1076, - "\u790e": 1077, - "\u793a": 1078, - "\u793e": 1079, - "\u7950": 1080, - "\u795d": 1081, - "\u795e": 1082, - "\u798e": 1083, - "\u798f": 1084, - "\u79c0": 1085, - "\u79c1": 1086, - "\u79d1": 1087, - "\u79d8": 1088, - "\u79e6": 1089, - "\u7a0b": 1090, - "\u7a3f": 1091, - "\u7a4e": 1092, - "\u7a6b": 1093, - "\u7a76": 1094, - "\u7a7a": 1095, - "\u7a9f": 1096, - "\u7acb": 1097, - "\u7ae0": 1098, - "\u7ae5": 1099, - "\u7b1b": 1100, - "\u7b2c": 1101, - "\u7b49": 1102, - "\u7b56": 1103, - "\u7b67": 1104, - "\u7ba1": 1105, - "\u7bc0": 1106, - "\u7bc9": 1107, - "\u7be4": 1108, - "\u7c21": 1109, - "\u7c4d": 1110, - "\u7c73": 1111, - "\u7c98": 1112, - "\u7cfb": 1113, - "\u7d00": 1114, - "\u7d05": 1115, - "\u7d20": 1116, - "\u7d30": 1117, - "\u7d44": 1118, - "\u7d4c": 1119, - "\u7d50": 1120, - "\u7d93": 1121, - "\u7d9c": 1122, - "\u7da0": 1123, - "\u7dcf": 1124, - "\u7dd1": 1125, - "\u7de8": 1126, - "\u7e23": 1127, - "\u7e3d": 1128, - "\u7e3e": 1129, - "\u7e41": 1130, - "\u7e8c": 1131, - "\u7ea2": 1132, - "\u7eaa": 1133, - "\u7ec3": 1134, - "\u7ec4": 1135, - "\u7ecf": 1136, - "\u7ed3": 1137, - "\u7edc": 1138, - "\u7ee9": 1139, - "\u7efc": 1140, - "\u7eff": 1141, - "\u7f51": 1142, - "\u7f6e": 1143, - "\u7f8e": 1144, - "\u7fa4": 1145, - "\u7fa9": 1146, - "\u7fd2": 1147, - "\u7fd4": 1148, - "\u7fdf": 1149, - "\u7ff0": 1150, - "\u8003": 1151, - "\u8005": 1152, - "\u8033": 1153, - "\u8054": 1154, - "\u805a": 1155, - "\u806a": 1156, - "\u8085": 1157, - "\u8096": 1158, - "\u80b2": 1159, - "\u80ba": 1160, - "\u80cc": 1161, - "\u80e1": 1162, - "\u80fd": 1163, - "\u8108": 1164, - "\u8109": 1165, - "\u8151": 1166, - "\u8154": 1167, - "\u81df": 1168, - "\u81e3": 1169, - "\u81e8": 1170, - "\u81f4": 1171, - "\u81fa": 1172, - "\u8207": 1173, - "\u8208": 1174, - "\u8212": 1175, - "\u821e": 1176, - "\u822c": 1177, - "\u8239": 1178, - "\u826f": 1179, - "\u827a": 1180, - "\u8282": 1181, - "\u828d": 1182, - "\u829d": 1183, - "\u82ac": 1184, - "\u82b1": 1185, - "\u82b3": 1186, - "\u82cf": 1187, - "\u82e6": 1188, - "\u82f1": 1189, - "\u8302": 1190, - "\u8303": 1191, - "\u8349": 1192, - "\u836f": 1193, - "\u839e": 1194, - "\u83ab": 1195, - "\u83b9": 1196, - "\u83c1": 1197, - "\u83ef": 1198, - "\u83f1": 1199, - "\u840d": 1200, - "\u8425": 1201, - "\u8429": 1202, - "\u8431": 1203, - "\u843d": 1204, - "\u8449": 1205, - "\u8463": 1206, - "\u8499": 1207, - "\u84bc": 1208, - "\u8521": 1209, - "\u852d": 1210, - "\u8587": 1211, - "\u859b": 1212, - "\u85ac": 1213, - "\u85cd": 1214, - "\u85e4": 1215, - "\u85e5": 1216, - "\u865a": 1217, - "\u865b": 1218, - "\u865f": 1219, - "\u8681": 1220, - "\u8682": 1221, - "\u878d": 1222, - "\u884c": 1223, - "\u8853": 1224, - "\u8857": 1225, - "\u885b": 1226, - "\u8865": 1227, - "\u888d": 1228, - "\u88ab": 1229, - "\u88c1": 1230, - "\u88dc": 1231, - "\u88fd": 1232, - "\u897f": 1233, - "\u8981": 1234, - "\u898b": 1235, - "\u898f": 1236, - "\u8996": 1237, - "\u89ba": 1238, - "\u89c0": 1239, - "\u89c2": 1240, - "\u89c4": 1241, - "\u89c6": 1242, - "\u89d2": 1243, - "\u89e3": 1244, - "\u89f4": 1245, - "\u8a02": 1246, - "\u8a08": 1247, - "\u8a0e": 1248, - "\u8a18": 1249, - "\u8a23": 1250, - "\u8a2d": 1251, - "\u8a3c": 1252, - "\u8a55": 1253, - "\u8a79": 1254, - "\u8a8c": 1255, - "\u8a9e": 1256, - "\u8abf": 1257, - "\u8ac7": 1258, - "\u8ad6": 1259, - "\u8b1b": 1260, - "\u8b28": 1261, - "\u8b49": 1262, - "\u8b70": 1263, - "\u8b8a": 1264, - "\u8ba8": 1265, - "\u8bae": 1266, - "\u8bb8": 1267, - "\u8bba": 1268, - "\u8bbe": 1269, - "\u8bc1": 1270, - "\u8bc4": 1271, - "\u8bd5": 1272, - "\u8bfe": 1273, - "\u8c03": 1274, - "\u8c22": 1275, - "\u8c37": 1276, - "\u8c61": 1277, - "\u8ca1": 1278, - "\u8cc3": 1279, - "\u8cc7": 1280, - "\u8cea": 1281, - "\u8d22": 1282, - "\u8d23": 1283, - "\u8d28": 1284, - "\u8d3a": 1285, - "\u8d44": 1286, - "\u8d64": 1287, - "\u8d75": 1288, - "\u8d8a": 1289, - "\u8d99": 1290, - "\u8de1": 1291, - "\u8def": 1292, - "\u8df5": 1293, - "\u8e48": 1294, - "\u8ef8": 1295, - "\u8f03": 1296, - "\u8f1d": 1297, - "\u8f2f": 1298, - "\u8f49": 1299, - "\u8f76": 1300, - "\u8f83": 1301, - "\u8f9b": 1302, - "\u8fa8": 1303, - "\u8fb2": 1304, - "\u8fba": 1305, - "\u8fbd": 1306, - "\u8fbe": 1307, - "\u8fc7": 1308, - "\u8fce": 1309, - "\u8fd1": 1310, - "\u8fdb": 1311, - "\u8fdc": 1312, - "\u8ff0": 1313, - "\u8ff9": 1314, - "\u9009": 1315, - "\u901a": 1316, - "\u901d": 1317, - "\u901f": 1318, - "\u9020": 1319, - "\u9023": 1320, - "\u9032": 1321, - "\u904b": 1322, - "\u9053": 1323, - "\u9054": 1324, - "\u9075": 1325, - "\u9078": 1326, - "\u907a": 1327, - "\u907c": 1328, - "\u90ce": 1329, - "\u90d1": 1330, - "\u90de": 1331, - "\u90e8": 1332, - "\u90ed": 1333, - "\u90fd": 1334, - "\u9115": 1335, - "\u9127": 1336, - "\u914d": 1337, - "\u9178": 1338, - "\u91ab": 1339, - "\u91cd": 1340, - "\u91ce": 1341, - "\u91d1": 1342, - "\u9234": 1343, - "\u925b": 1344, - "\u9271": 1345, - "\u9298": 1346, - "\u932b": 1347, - "\u9332": 1348, - "\u934d": 1349, - "\u93ad": 1350, - "\u93e1": 1351, - "\u9435": 1352, - "\u94c1": 1353, - "\u94dc": 1354, - "\u9500": 1355, - "\u950b": 1356, - "\u9577": 1357, - "\u9580": 1358, - "\u958b": 1359, - "\u95a2": 1360, - "\u95a3": 1361, - "\u95bb": 1362, - "\u95be": 1363, - "\u95dc": 1364, - "\u95e8": 1365, - "\u95ee": 1366, - "\u95f4": 1367, - "\u95fb": 1368, - "\u962a": 1369, - "\u9632": 1370, - "\u9633": 1371, - "\u9634": 1372, - "\u963f": 1373, - "\u9644": 1374, - "\u9645": 1375, - "\u9648": 1376, - "\u964d": 1377, - "\u9662": 1378, - "\u9670": 1379, - "\u9673": 1380, - "\u9676": 1381, - "\u967d": 1382, - "\u9686": 1383, - "\u9690": 1384, - "\u969b": 1385, - "\u969c": 1386, - "\u96a8": 1387, - "\u96aa": 1388, - "\u96c4": 1389, - "\u96c5": 1390, - "\u96c6": 1391, - "\u96d1": 1392, - "\u96d9": 1393, - "\u96dc": 1394, - "\u96e3": 1395, - "\u96ea": 1396, - "\u96ef": 1397, - "\u96f2": 1398, - "\u96f7": 1399, - "\u9707": 1400, - "\u9716": 1401, - "\u971e": 1402, - "\u9751": 1403, - "\u9752": 1404, - "\u975e": 1405, - "\u9762": 1406, - "\u9769": 1407, - "\u9773": 1408, - "\u97d3": 1409, - "\u97f3": 1410, - "\u97f5": 1411, - "\u9801": 1412, - "\u9808": 1413, - "\u9818": 1414, - "\u984c": 1415, - "\u985e": 1416, - "\u9875": 1417, - "\u987e": 1418, - "\u9896": 1419, - "\u9898": 1420, - "\u989c": 1421, - "\u98a8": 1422, - "\u98ce": 1423, - "\u98db": 1424, - "\u9918": 1425, - "\u9928": 1426, - "\u9970": 1427, - "\u9986": 1428, - "\u9999": 1429, - "\u99ac": 1430, - "\u99ae": 1431, - "\u9a57": 1432, - "\u9a6c": 1433, - "\u9ad4": 1434, - "\u9ad8": 1435, - "\u9b41": 1436, - "\u9bae": 1437, - "\u9c9c": 1438, - "\u9ce5": 1439, - "\u9df2": 1440, - "\u9e23": 1441, - "\u9e7d": 1442, - "\u9e97": 1443, - "\u9ed8": 1444, - "\u9ede": 1445, - "\u9f4a": 1446, - "\u9f4b": 1447, - "\u9f8d": 1448, - "\u9f99": 1449, - "\uac00": 1450, - "\uac01": 1451, - "\uac04": 1452, - "\uac08": 1453, - "\uac10": 1454, - "\uac15": 1455, - "\uac1c": 1456, - "\uac1d": 1457, - "\uac31": 1458, - "\uac70": 1459, - "\uac74": 1460, - "\uac80": 1461, - "\uac8c": 1462, - "\uaca9": 1463, - "\uacac": 1464, - "\uacb0": 1465, - "\uacbd": 1466, - "\uacc4": 1467, - "\uace0": 1468, - "\uace4": 1469, - "\uacf5": 1470, - "\uacfc": 1471, - "\uacfd": 1472, - "\uad00": 1473, - "\uad11": 1474, - "\uad34": 1475, - "\uad50": 1476, - "\uad6c": 1477, - "\uad6d": 1478, - "\uad70": 1479, - "\uad81": 1480, - "\uad8c": 1481, - "\uadc0": 1482, - "\uaddc": 1483, - "\uade0": 1484, - "\uadf8": 1485, - "\uadf9": 1486, - "\uadfc": 1487, - "\uae00": 1488, - "\uae08": 1489, - "\uae09": 1490, - "\uae30": 1491, - "\uae38": 1492, - "\uae40": 1493, - "\uae4c": 1494, - "\ub07c": 1495, - "\ub098": 1496, - "\ub099": 1497, - "\ub09c": 1498, - "\ub0a8": 1499, - "\ub0b4": 1500, - "\ub0b8": 1501, - "\ub108": 1502, - "\ub10c": 1503, - "\ub110": 1504, - "\ub124": 1505, - "\ub137": 1506, - "\ub140": 1507, - "\ub144": 1508, - "\ub150": 1509, - "\ub155": 1510, - "\ub178": 1511, - "\ub179": 1512, - "\ub17c": 1513, - "\ub180": 1514, - "\ub18d": 1515, - "\ub274": 1516, - "\ub294": 1517, - "\ub2a5": 1518, - "\ub2c8": 1519, - "\ub2dd": 1520, - "\ub2e4": 1521, - "\ub2e8": 1522, - "\ub2ec": 1523, - "\ub2f4": 1524, - "\ub2f9": 1525, - "\ub300": 1526, - "\ub304": 1527, - "\ub313": 1528, - "\ub354": 1529, - "\ub355": 1530, - "\ub358": 1531, - "\ub364": 1532, - "\ub370": 1533, - "\ub378": 1534, - "\ub3c4": 1535, - "\ub3c5": 1536, - "\ub3c8": 1537, - "\ub3cc": 1538, - "\ub3d9": 1539, - "\ub418": 1540, - "\ub41c": 1541, - "\ub450": 1542, - "\ub454": 1543, - "\ub458": 1544, - "\ub4c0": 1545, - "\ub4dc": 1546, - "\ub4dd": 1547, - "\ub4e3": 1548, - "\ub4e4": 1549, - "\ub4f1": 1550, - "\ub514": 1551, - "\ub525": 1552, - "\ub530": 1553, - "\ub77c": 1554, - "\ub780": 1555, - "\ub78c": 1556, - "\ub798": 1557, - "\ub79c": 1558, - "\ub7a8": 1559, - "\ub7b5": 1560, - "\ub7c9": 1561, - "\ub7ec": 1562, - "\ub7fc": 1563, - "\ub808": 1564, - "\ub80c": 1565, - "\ub824": 1566, - "\ub825": 1567, - "\ub828": 1568, - "\ub82c": 1569, - "\ub839": 1570, - "\ub840": 1571, - "\ub85c": 1572, - "\ub85d": 1573, - "\ub860": 1574, - "\ub8cc": 1575, - "\ub8e1": 1576, - "\ub8e8": 1577, - "\ub8ec": 1578, - "\ub958": 1579, - "\ub960": 1580, - "\ub978": 1581, - "\ub97c": 1582, - "\ub9ac": 1583, - "\ub9b0": 1584, - "\ub9bc": 1585, - "\ub9bd": 1586, - "\ub9c1": 1587, - "\ub9c8": 1588, - "\ub9cc": 1589, - "\ub9d0": 1590, - "\ub9dd": 1591, - "\ub9de": 1592, - "\ub9e4": 1593, - "\uba38": 1594, - "\uba39": 1595, - "\uba3c": 1596, - "\uba54": 1597, - "\uba58": 1598, - "\uba74": 1599, - "\uba78": 1600, - "\uba85": 1601, - "\uba87": 1602, - "\ubaa8": 1603, - "\ubaa9": 1604, - "\ubab0": 1605, - "\ubb18": 1606, - "\ubb34": 1607, - "\ubb35": 1608, - "\ubb38": 1609, - "\ubb3c": 1610, - "\ubba4": 1611, - "\ubbf8": 1612, - "\ubbfc": 1613, - "\ubc00": 1614, - "\ubc0f": 1615, - "\ubc14": 1616, - "\ubc15": 1617, - "\ubc18": 1618, - "\ubc1c": 1619, - "\ubc29": 1620, - "\ubc30": 1621, - "\ubc31": 1622, - "\ubc84": 1623, - "\ubc8c": 1624, - "\ubc94": 1625, - "\ubc95": 1626, - "\ubca0": 1627, - "\ubca4": 1628, - "\ubcbd": 1629, - "\ubcc0": 1630, - "\ubcc4": 1631, - "\ubcd1": 1632, - "\ubcf4": 1633, - "\ubcf5": 1634, - "\ubcf8": 1635, - "\ubd04": 1636, - "\ubd09": 1637, - "\ubd80": 1638, - "\ubd81": 1639, - "\ubd84": 1640, - "\ubd95": 1641, - "\ubdf0": 1642, - "\ube0c": 1643, - "\ube14": 1644, - "\ube44": 1645, - "\ube45": 1646, - "\ube48": 1647, - "\uc0ac": 1648, - "\uc0b0": 1649, - "\uc0b4": 1650, - "\uc0b6": 1651, - "\uc0bc": 1652, - "\uc0c1": 1653, - "\uc0c8": 1654, - "\uc0c9": 1655, - "\uc0dd": 1656, - "\uc11c": 1657, - "\uc11d": 1658, - "\uc120": 1659, - "\uc124": 1660, - "\uc12c": 1661, - "\uc12d": 1662, - "\uc131": 1663, - "\uc138": 1664, - "\uc140": 1665, - "\uc158": 1666, - "\uc15c": 1667, - "\uc18c": 1668, - "\uc18d": 1669, - "\uc190": 1670, - "\uc1a1": 1671, - "\uc1fc": 1672, - "\uc218": 1673, - "\uc219": 1674, - "\uc21c": 1675, - "\uc220": 1676, - "\uc22d": 1677, - "\uc288": 1678, - "\uc2a4": 1679, - "\uc2ac": 1680, - "\uc2b5": 1681, - "\uc2b9": 1682, - "\uc2dc": 1683, - "\uc2dd": 1684, - "\uc2e0": 1685, - "\uc2e4": 1686, - "\uc2ec": 1687, - "\uc2ed": 1688, - "\uc2fc": 1689, - "\uc4f0": 1690, - "\uc544": 1691, - "\uc545": 1692, - "\uc548": 1693, - "\uc54a": 1694, - "\uc554": 1695, - "\uc555": 1696, - "\uc559": 1697, - "\uc560": 1698, - "\uc561": 1699, - "\uc571": 1700, - "\uc57c": 1701, - "\uc591": 1702, - "\uc5b4": 1703, - "\uc5b8": 1704, - "\uc5bb": 1705, - "\uc5bc": 1706, - "\uc5c4": 1707, - "\uc5c5": 1708, - "\uc5d0": 1709, - "\uc5d4": 1710, - "\uc5ec": 1711, - "\uc5ed": 1712, - "\uc5f0": 1713, - "\uc5f4": 1714, - "\uc5fc": 1715, - "\uc5fd": 1716, - "\uc601": 1717, - "\uc608": 1718, - "\uc624": 1719, - "\uc625": 1720, - "\uc628": 1721, - "\uc62c": 1722, - "\uc635": 1723, - "\uc640": 1724, - "\uc644": 1725, - "\uc65c": 1726, - "\uc678": 1727, - "\uc694": 1728, - "\uc695": 1729, - "\uc6a9": 1730, - "\uc6b0": 1731, - "\uc6b1": 1732, - "\uc6b4": 1733, - "\uc6b8": 1734, - "\uc6c0": 1735, - "\uc6c5": 1736, - "\uc6cc": 1737, - "\uc6d0": 1738, - "\uc6d4": 1739, - "\uc6e8": 1740, - "\uc6f9": 1741, - "\uc704": 1742, - "\uc720": 1743, - "\uc721": 1744, - "\uc724": 1745, - "\uc728": 1746, - "\uc735": 1747, - "\uc73c": 1748, - "\uc740": 1749, - "\uc744": 1750, - "\uc74c": 1751, - "\uc751": 1752, - "\uc758": 1753, - "\uc774": 1754, - "\uc775": 1755, - "\uc778": 1756, - "\uc77c": 1757, - "\uc784": 1758, - "\uc785": 1759, - "\uc788": 1760, - "\uc789": 1761, - "\uc790": 1762, - "\uc791": 1763, - "\uc7a5": 1764, - "\uc7ac": 1765, - "\uc7c1": 1766, - "\uc800": 1767, - "\uc801": 1768, - "\uc804": 1769, - "\uc808": 1770, - "\uc810": 1771, - "\uc811": 1772, - "\uc813": 1773, - "\uc815": 1774, - "\uc81c": 1775, - "\uc820": 1776, - "\uc870": 1777, - "\uc871": 1778, - "\uc874": 1779, - "\uc878": 1780, - "\uc885": 1781, - "\uc88b": 1782, - "\uc8c4": 1783, - "\uc8fc": 1784, - "\uc8fd": 1785, - "\uc900": 1786, - "\uc911": 1787, - "\uc988": 1788, - "\uc99d": 1789, - "\uc9c0": 1790, - "\uc9c1": 1791, - "\uc9c4": 1792, - "\uc9c8": 1793, - "\uc9d1": 1794, - "\uc9d5": 1795, - "\uc9dc": 1796, - "\ucabd": 1797, - "\ucc28": 1798, - "\ucc29": 1799, - "\ucc2c": 1800, - "\ucc30": 1801, - "\ucc38": 1802, - "\ucc3d": 1803, - "\ucc44": 1804, - "\ucc45": 1805, - "\ucc98": 1806, - "\ucc99": 1807, - "\ucc9c": 1808, - "\ucca0": 1809, - "\ucca9": 1810, - "\uccad": 1811, - "\uccb4": 1812, - "\uccd0": 1813, - "\ucd08": 1814, - "\ucd09": 1815, - "\ucd0c": 1816, - "\ucd1d": 1817, - "\ucd5c": 1818, - "\ucd94": 1819, - "\ucd95": 1820, - "\ucd98": 1821, - "\ucd9c": 1822, - "\ucda4": 1823, - "\ucda9": 1824, - "\ucde8": 1825, - "\uce04": 1826, - "\uce20": 1827, - "\uce21": 1828, - "\uce35": 1829, - "\uce58": 1830, - "\uce5c": 1831, - "\uce68": 1832, - "\uce74": 1833, - "\ucea0": 1834, - "\ucee4": 1835, - "\ucef4": 1836, - "\ucf00": 1837, - "\ucf13": 1838, - "\ucf54": 1839, - "\ucf58": 1840, - "\ud06c": 1841, - "\ud074": 1842, - "\ud0a4": 1843, - "\ud0c0": 1844, - "\ud0c4": 1845, - "\ud0c8": 1846, - "\ud0d0": 1847, - "\ud0d1": 1848, - "\ud0d5": 1849, - "\ud0dc": 1850, - "\ud0dd": 1851, - "\ud130": 1852, - "\ud138": 1853, - "\ud14c": 1854, - "\ud14d": 1855, - "\ud150": 1856, - "\ud154": 1857, - "\ud15c": 1858, - "\ud1a0": 1859, - "\ud1b5": 1860, - "\ud1f4": 1861, - "\ud22c": 1862, - "\ud29c": 1863, - "\ud2b8": 1864, - "\ud2b9": 1865, - "\ud2f0": 1866, - "\ud305": 1867, - "\ud30c": 1868, - "\ud310": 1869, - "\ud328": 1870, - "\ud37c": 1871, - "\ud398": 1872, - "\ud3b4": 1873, - "\ud3b8": 1874, - "\ud3c9": 1875, - "\ud3ec": 1876, - "\ud3f0": 1877, - "\ud45c": 1878, - "\ud488": 1879, - "\ud48d": 1880, - "\ud4e8": 1881, - "\ud504": 1882, - "\ud50c": 1883, - "\ud53c": 1884, - "\ud53d": 1885, - "\ud558": 1886, - "\ud559": 1887, - "\ud55c": 1888, - "\ud560": 1889, - "\ud568": 1890, - "\ud569": 1891, - "\ud56d": 1892, - "\ud574": 1893, - "\ud589": 1894, - "\ud5a5": 1895, - "\ud5c8": 1896, - "\ud5cc": 1897, - "\ud5d8": 1898, - "\ud601": 1899, - "\ud604": 1900, - "\ud611": 1901, - "\ud615": 1902, - "\ud61c": 1903, - "\ud638": 1904, - "\ud63c": 1905, - "\ud64d": 1906, - "\ud654": 1907, - "\ud655": 1908, - "\ud658": 1909, - "\ud65c": 1910, - "\ud669": 1911, - "\ud68c": 1912, - "\ud68d": 1913, - "\ud6a8": 1914, - "\ud6c4": 1915, - "\ud6c8": 1916, - "\ud760": 1917, - "\ud761": 1918, - "\ud765": 1919, - "\ud76c": 1920, - "\uf061": 1921, - "\uf0a0": 1922, - "\uf90a": 1923, - "\uf922": 1924, - "\uf93d": 1925, - "\uf93f": 1926, - "\uf941": 1927, - "\uf958": 1928, - "\uf95a": 1929, - "\uf9ba": 1930, - "\uf9e1": 1931, - "\uf9f4": 1932, - "\uf9f6": 1933, - "\ufa02": 1934, - "\ufb00": 1935, - "\ufb01": 1936, - "\ufb02": 1937, - "\ufffd": 1938 - }, - "vocab_tag": { - "<PAD>": 0, - "B-<author>": 1, - "B-<booktitle>": 2, - "B-<collaboration>": 3, - "B-<date>": 4, - "B-<editor>": 5, - "B-<institution>": 6, - "B-<issue>": 7, - "B-<journal>": 8, - "B-<location>": 9, - "B-<note>": 10, - "B-<pages>": 11, - "B-<publisher>": 12, - "B-<pubnum>": 13, - "B-<series>": 14, - "B-<tech>": 15, - "B-<title>": 16, - "B-<volume>": 17, - "B-<web>": 18, - "I-<author>": 19, - "I-<booktitle>": 20, - "I-<collaboration>": 21, - "I-<date>": 22, - "I-<editor>": 23, - "I-<institution>": 24, - "I-<issue>": 25, - "I-<journal>": 26, - "I-<location>": 27, - "I-<note>": 28, - "I-<pages>": 29, - "I-<publisher>": 30, - "I-<pubnum>": 31, - "I-<series>": 32, - "I-<tech>": 33, - "I-<title>": 34, - "I-<volume>": 35, - "I-<web>": 36, - "O": 37 - }, - "vocab_case": [ - "<PAD>", - "numeric", - "allLower", - "allUpper", - "initialUpper", - "other", - "mainly_numeric", - "contains_digit" - ], - "max_char_length": 30, - "feature_preprocessor": { - "features_vocabulary_size": 12, - "features_indices": [ - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20, - 21, - 22, - 23, - 24, - 25, - 26, - 27 - ], - "features_map_to_index": { - "9": { - "LINEEND": 1, - "LINEIN": 2, - "LINESTART": 3 - }, - "10": { - "ALLCAP": 13, - "INITCAP": 14, - "NOCAPS": 15 - }, - "11": { - "ALLDIGIT": 25, - "CONTAINSDIGITS": 26, - "NODIGIT": 27 - }, - "12": { - "0": 37, - "1": 38 - }, - "13": { - "0": 49, - "1": 50 - }, - "14": { - "0": 61, - "1": 62 - }, - "15": { - "0": 73, - "1": 74 - }, - "16": { - "0": 85, - "1": 86 - }, - "17": { - "0": 97, - "1": 98 - }, - "18": { - "0": 109, - "1": 110 - }, - "19": { - "0": 121, - "1": 122 - }, - "20": { - "0": 133, - "1": 134 - }, - "21": { - "0": 145, - "1": 146 - }, - "22": { - "0": 157, - "1": 158 - }, - "23": { - "0": 169, - "1": 170 - }, - "24": { - "0": 181, - "1": 182 - }, - "25": { - "0": 193, - "1": 194 - }, - "26": { - "COMMA": 205, - "DOT": 206, - "ENDBRACKET": 207, - "HYPHEN": 208, - "NOPUNCT": 209, - "OPENBRACKET": 210, - "PUNCT": 211, - "QUOTE": 212 - }, - "27": { - "0": 217, - "1": 218, - "10": 219, - "11": 220, - "2": 221, - "3": 222, - "4": 223, - "5": 224, - "6": 225, - "7": 226, - "8": 227, - "9": 228 - } - } - }, - "indice_tag": { - "0": "<PAD>", - "1": "B-<author>", - "2": "B-<booktitle>", - "3": "B-<collaboration>", - "4": "B-<date>", - "5": "B-<editor>", - "6": "B-<institution>", - "7": "B-<issue>", - "8": "B-<journal>", - "9": "B-<location>", - "10": "B-<note>", - "11": "B-<pages>", - "12": "B-<publisher>", - "13": "B-<pubnum>", - "14": "B-<series>", - "15": "B-<tech>", - "16": "B-<title>", - "17": "B-<volume>", - "18": "B-<web>", - "19": "I-<author>", - "20": "I-<booktitle>", - "21": "I-<collaboration>", - "22": "I-<date>", - "23": "I-<editor>", - "24": "I-<institution>", - "25": "I-<issue>", - "26": "I-<journal>", - "27": "I-<location>", - "28": "I-<note>", - "29": "I-<pages>", - "30": "I-<publisher>", - "31": "I-<pubnum>", - "32": "I-<series>", - "33": "I-<tech>", - "34": "I-<title>", - "35": "I-<volume>", - "36": "I-<web>", - "37": "O" - } -} \ No newline at end of file diff --git a/grobid-home/models/citation-BidLSTM_ChainCRF_FEATURES-with_ELMo/config.json b/grobid-home/models/citation-BidLSTM_ChainCRF_FEATURES-with_ELMo/config.json deleted file mode 100644 index 5e74b37507..0000000000 --- a/grobid-home/models/citation-BidLSTM_ChainCRF_FEATURES-with_ELMo/config.json +++ /dev/null @@ -1,143 +0,0 @@ -{ - "model_name": "citation-BidLSTM_ChainCRF_FEATURES-with_ELMo", - "architecture": "BidLSTM_ChainCRF_FEATURES", - "embeddings_name": "glove-840B", - "char_vocab_size": 211, - "case_vocab_size": 8, - "char_embedding_size": 25, - "num_char_lstm_units": 25, - "max_char_length": 30, - "features_vocabulary_size": 12, - "features_indices": [ - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20, - 21, - 22, - 23, - 24, - 25, - 26, - 27 - ], - "features_embedding_size": 4, - "features_lstm_units": 4, - "max_sequence_length": 600, - "word_embedding_size": 1324, - "num_word_lstm_units": 100, - "case_embedding_size": 5, - "dropout": 0.5, - "recurrent_dropout": 0.5, - "use_crf": true, - "use_chain_crf": true, - "fold_number": 1, - "batch_size": 20, - "transformer_name": null, - "use_ELMo": true, - "features_map_to_index": { - "9": { - "LINEEND": 1, - "LINEIN": 2, - "LINESTART": 3 - }, - "10": { - "ALLCAP": 13, - "INITCAP": 14, - "NOCAPS": 15 - }, - "11": { - "ALLDIGIT": 25, - "CONTAINSDIGITS": 26, - "NODIGIT": 27 - }, - "12": { - "0": 37, - "1": 38 - }, - "13": { - "0": 49, - "1": 50 - }, - "14": { - "0": 61, - "1": 62 - }, - "15": { - "0": 73, - "1": 74 - }, - "16": { - "0": 85, - "1": 86 - }, - "17": { - "0": 97, - "1": 98 - }, - "18": { - "0": 109, - "1": 110 - }, - "19": { - "0": 121, - "1": 122 - }, - "20": { - "0": 133, - "1": 134 - }, - "21": { - "0": 145, - "1": 146 - }, - "22": { - "0": 157, - "1": 158 - }, - "23": { - "0": 169, - "1": 170 - }, - "24": { - "0": 181, - "1": 182 - }, - "25": { - "0": 193, - "1": 194 - }, - "26": { - "COMMA": 205, - "DOT": 206, - "ENDBRACKET": 207, - "HYPHEN": 208, - "NOPUNCT": 209, - "OPENBRACKET": 210, - "PUNCT": 211, - "QUOTE": 212 - }, - "27": { - "0": 217, - "1": 218, - "10": 219, - "11": 220, - "2": 221, - "3": 222, - "4": 223, - "5": 224, - "6": 225, - "7": 226, - "8": 227, - "9": 228 - } - } -} \ No newline at end of file diff --git a/grobid-home/models/citation-BidLSTM_ChainCRF_FEATURES-with_ELMo/model_weights.hdf5 b/grobid-home/models/citation-BidLSTM_ChainCRF_FEATURES-with_ELMo/model_weights.hdf5 deleted file mode 100644 index 91d59e8d6e..0000000000 Binary files a/grobid-home/models/citation-BidLSTM_ChainCRF_FEATURES-with_ELMo/model_weights.hdf5 and /dev/null differ diff --git a/grobid-home/models/citation-BidLSTM_ChainCRF_FEATURES-with_ELMo/preprocessor.json b/grobid-home/models/citation-BidLSTM_ChainCRF_FEATURES-with_ELMo/preprocessor.json deleted file mode 100644 index ba61948597..0000000000 --- a/grobid-home/models/citation-BidLSTM_ChainCRF_FEATURES-with_ELMo/preprocessor.json +++ /dev/null @@ -1,434 +0,0 @@ -{ - "padding": true, - "return_lengths": true, - "return_word_embeddings": true, - "return_casing": false, - "return_features": true, - "return_chars": true, - "return_bert_embeddings": false, - "vocab_char": { - "<PAD>": 0, - "<UNK>": 1, - "!": 2, - "\"": 3, - "#": 4, - "$": 5, - "%": 6, - "&": 7, - "'": 8, - "(": 9, - ")": 10, - "*": 11, - "+": 12, - ",": 13, - "-": 14, - ".": 15, - "/": 16, - "0": 17, - "1": 18, - "2": 19, - "3": 20, - "4": 21, - "5": 22, - "6": 23, - "7": 24, - "8": 25, - "9": 26, - ":": 27, - ";": 28, - "=": 29, - ">": 30, - "?": 31, - "@": 32, - "A": 33, - "B": 34, - "C": 35, - "D": 36, - "E": 37, - "F": 38, - "G": 39, - "H": 40, - "I": 41, - "J": 42, - "K": 43, - "L": 44, - "M": 45, - "N": 46, - "O": 47, - "P": 48, - "Q": 49, - "R": 50, - "S": 51, - "T": 52, - "U": 53, - "V": 54, - "W": 55, - "X": 56, - "Y": 57, - "Z": 58, - "[": 59, - "]": 60, - "_": 61, - "`": 62, - "a": 63, - "b": 64, - "c": 65, - "d": 66, - "e": 67, - "f": 68, - "g": 69, - "h": 70, - "i": 71, - "j": 72, - "k": 73, - "l": 74, - "m": 75, - "n": 76, - "o": 77, - "p": 78, - "q": 79, - "r": 80, - "s": 81, - "t": 82, - "u": 83, - "v": 84, - "w": 85, - "x": 86, - "y": 87, - "z": 88, - "|": 89, - "~": 90, - "\u0084": 91, - "\u0086": 92, - "\u0092": 93, - "\u0096": 94, - "\u00a1": 95, - "\u00a2": 96, - "\u00a4": 97, - "\u00a7": 98, - "\u00a8": 99, - "\u00a9": 100, - "\u00ab": 101, - "\u00ad": 102, - "\u00af": 103, - "\u00b0": 104, - "\u00b1": 105, - "\u00b2": 106, - "\u00b3": 107, - "\u00b4": 108, - "\u00b5": 109, - "\u00b8": 110, - "\u00ba": 111, - "\u00bb": 112, - "\u00bc": 113, - "\u00bf": 114, - "\u00c0": 115, - "\u00c3": 116, - "\u00c5": 117, - "\u00c8": 118, - "\u00c9": 119, - "\u00ca": 120, - "\u00ce": 121, - "\u00cf": 122, - "\u00d4": 123, - "\u00d6": 124, - "\u00d7": 125, - "\u00d8": 126, - "\u00dc": 127, - "\u00df": 128, - "\u00e0": 129, - "\u00e1": 130, - "\u00e2": 131, - "\u00e3": 132, - "\u00e4": 133, - "\u00e7": 134, - "\u00e8": 135, - "\u00e9": 136, - "\u00ea": 137, - "\u00ec": 138, - "\u00ed": 139, - "\u00ee": 140, - "\u00ef": 141, - "\u00f1": 142, - "\u00f2": 143, - "\u00f3": 144, - "\u00f4": 145, - "\u00f6": 146, - "\u00f8": 147, - "\u00fa": 148, - "\u00fc": 149, - "\u00fd": 150, - "\u0102": 151, - "\u0107": 152, - "\u010c": 153, - "\u010d": 154, - "\u011f": 155, - "\u0130": 156, - "\u0131": 157, - "\u0132": 158, - "\u013e": 159, - "\u0142": 160, - "\u0144": 161, - "\u0179": 162, - "\u017d": 163, - "\u02c6": 164, - "\u02c7": 165, - "\u039a": 166, - "\u039b": 167, - "\u039e": 168, - "\u03a0": 169, - "\u03b1": 170, - "\u03b2": 171, - "\u03b3": 172, - "\u03b5": 173, - "\u03b7": 174, - "\u03b8": 175, - "\u03b9": 176, - "\u03bb": 177, - "\u03bd": 178, - "\u03be": 179, - "\u03bf": 180, - "\u03c0": 181, - "\u03c1": 182, - "\u03c2": 183, - "\u03c3": 184, - "\u03c4": 185, - "\u03c6": 186, - "\u03e9": 187, - "\u1390": 188, - "\u1f04": 189, - "\u1f14": 190, - "\u1f73": 191, - "\u1f79": 192, - "\u2018": 193, - "\u2019": 194, - "\u201c": 195, - "\u201d": 196, - "\u2022": 197, - "\u2192": 198, - "\u2202": 199, - "\u2206": 200, - "\u2212": 201, - "\u221a": 202, - "\u221d": 203, - "\u221e": 204, - "\u223c": 205, - "\u2245": 206, - "\uf061": 207, - "\ufb00": 208, - "\ufb01": 209, - "\ufb02": 210 - }, - "vocab_tag": { - "<PAD>": 0, - "B-<author>": 1, - "B-<booktitle>": 2, - "B-<collaboration>": 3, - "B-<date>": 4, - "B-<editor>": 5, - "B-<institution>": 6, - "B-<issue>": 7, - "B-<journal>": 8, - "B-<location>": 9, - "B-<note>": 10, - "B-<pages>": 11, - "B-<publisher>": 12, - "B-<pubnum>": 13, - "B-<series>": 14, - "B-<tech>": 15, - "B-<title>": 16, - "B-<volume>": 17, - "B-<web>": 18, - "I-<author>": 19, - "I-<booktitle>": 20, - "I-<collaboration>": 21, - "I-<date>": 22, - "I-<editor>": 23, - "I-<institution>": 24, - "I-<issue>": 25, - "I-<journal>": 26, - "I-<location>": 27, - "I-<note>": 28, - "I-<pages>": 29, - "I-<publisher>": 30, - "I-<pubnum>": 31, - "I-<series>": 32, - "I-<tech>": 33, - "I-<title>": 34, - "I-<volume>": 35, - "I-<web>": 36, - "O": 37 - }, - "vocab_case": [ - "<PAD>", - "numeric", - "allLower", - "allUpper", - "initialUpper", - "other", - "mainly_numeric", - "contains_digit" - ], - "max_char_length": 30, - "feature_preprocessor": { - "features_vocabulary_size": 12, - "features_indices": [ - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20, - 21, - 22, - 23, - 24, - 25, - 26, - 27 - ], - "features_map_to_index": { - "9": { - "LINEEND": 1, - "LINEIN": 2, - "LINESTART": 3 - }, - "10": { - "ALLCAP": 13, - "INITCAP": 14, - "NOCAPS": 15 - }, - "11": { - "ALLDIGIT": 25, - "CONTAINSDIGITS": 26, - "NODIGIT": 27 - }, - "12": { - "0": 37, - "1": 38 - }, - "13": { - "0": 49, - "1": 50 - }, - "14": { - "0": 61, - "1": 62 - }, - "15": { - "0": 73, - "1": 74 - }, - "16": { - "0": 85, - "1": 86 - }, - "17": { - "0": 97, - "1": 98 - }, - "18": { - "0": 109, - "1": 110 - }, - "19": { - "0": 121, - "1": 122 - }, - "20": { - "0": 133, - "1": 134 - }, - "21": { - "0": 145, - "1": 146 - }, - "22": { - "0": 157, - "1": 158 - }, - "23": { - "0": 169, - "1": 170 - }, - "24": { - "0": 181, - "1": 182 - }, - "25": { - "0": 193, - "1": 194 - }, - "26": { - "COMMA": 205, - "DOT": 206, - "ENDBRACKET": 207, - "HYPHEN": 208, - "NOPUNCT": 209, - "OPENBRACKET": 210, - "PUNCT": 211, - "QUOTE": 212 - }, - "27": { - "0": 217, - "1": 218, - "10": 219, - "11": 220, - "2": 221, - "3": 222, - "4": 223, - "5": 224, - "6": 225, - "7": 226, - "8": 227, - "9": 228 - } - } - }, - "indice_tag": { - "0": "<PAD>", - "1": "B-<author>", - "2": "B-<booktitle>", - "3": "B-<collaboration>", - "4": "B-<date>", - "5": "B-<editor>", - "6": "B-<institution>", - "7": "B-<issue>", - "8": "B-<journal>", - "9": "B-<location>", - "10": "B-<note>", - "11": "B-<pages>", - "12": "B-<publisher>", - "13": "B-<pubnum>", - "14": "B-<series>", - "15": "B-<tech>", - "16": "B-<title>", - "17": "B-<volume>", - "18": "B-<web>", - "19": "I-<author>", - "20": "I-<booktitle>", - "21": "I-<collaboration>", - "22": "I-<date>", - "23": "I-<editor>", - "24": "I-<institution>", - "25": "I-<issue>", - "26": "I-<journal>", - "27": "I-<location>", - "28": "I-<note>", - "29": "I-<pages>", - "30": "I-<publisher>", - "31": "I-<pubnum>", - "32": "I-<series>", - "33": "I-<tech>", - "34": "I-<title>", - "35": "I-<volume>", - "36": "I-<web>", - "37": "O" - } -} \ No newline at end of file diff --git a/grobid-home/models/citation-BidLSTM_ChainCRF_FEATURES/config.json b/grobid-home/models/citation-BidLSTM_ChainCRF_FEATURES/config.json deleted file mode 100644 index 15fd79b64d..0000000000 --- a/grobid-home/models/citation-BidLSTM_ChainCRF_FEATURES/config.json +++ /dev/null @@ -1,143 +0,0 @@ -{ - "model_name": "citation-BidLSTM_ChainCRF_FEATURES", - "architecture": "BidLSTM_ChainCRF_FEATURES", - "embeddings_name": "glove-840B", - "char_vocab_size": 1939, - "case_vocab_size": 8, - "char_embedding_size": 25, - "num_char_lstm_units": 25, - "max_char_length": 30, - "features_vocabulary_size": 12, - "features_indices": [ - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20, - 21, - 22, - 23, - 24, - 25, - 26, - 27 - ], - "features_embedding_size": 4, - "features_lstm_units": 4, - "max_sequence_length": 500, - "word_embedding_size": 300, - "num_word_lstm_units": 100, - "case_embedding_size": 5, - "dropout": 0.5, - "recurrent_dropout": 0.5, - "use_crf": true, - "use_chain_crf": true, - "fold_number": 1, - "batch_size": 50, - "transformer_name": null, - "use_ELMo": false, - "features_map_to_index": { - "9": { - "LINEEND": 1, - "LINEIN": 2, - "LINESTART": 3 - }, - "10": { - "ALLCAP": 13, - "INITCAP": 14, - "NOCAPS": 15 - }, - "11": { - "ALLDIGIT": 25, - "CONTAINSDIGITS": 26, - "NODIGIT": 27 - }, - "12": { - "0": 37, - "1": 38 - }, - "13": { - "0": 49, - "1": 50 - }, - "14": { - "0": 61, - "1": 62 - }, - "15": { - "0": 73, - "1": 74 - }, - "16": { - "0": 85, - "1": 86 - }, - "17": { - "0": 97, - "1": 98 - }, - "18": { - "0": 109, - "1": 110 - }, - "19": { - "0": 121, - "1": 122 - }, - "20": { - "0": 133, - "1": 134 - }, - "21": { - "0": 145, - "1": 146 - }, - "22": { - "0": 157, - "1": 158 - }, - "23": { - "0": 169, - "1": 170 - }, - "24": { - "0": 181, - "1": 182 - }, - "25": { - "0": 193, - "1": 194 - }, - "26": { - "COMMA": 205, - "DOT": 206, - "ENDBRACKET": 207, - "HYPHEN": 208, - "NOPUNCT": 209, - "OPENBRACKET": 210, - "PUNCT": 211, - "QUOTE": 212 - }, - "27": { - "0": 217, - "1": 218, - "10": 219, - "11": 220, - "2": 221, - "3": 222, - "4": 223, - "5": 224, - "6": 225, - "7": 226, - "8": 227, - "9": 228 - } - } -} \ No newline at end of file diff --git a/grobid-home/models/citation-BidLSTM_ChainCRF_FEATURES/model_weights.hdf5 b/grobid-home/models/citation-BidLSTM_ChainCRF_FEATURES/model_weights.hdf5 deleted file mode 100644 index 9f98976382..0000000000 Binary files a/grobid-home/models/citation-BidLSTM_ChainCRF_FEATURES/model_weights.hdf5 and /dev/null differ diff --git a/grobid-home/models/citation-BidLSTM_ChainCRF_FEATURES/preprocessor.json b/grobid-home/models/citation-BidLSTM_ChainCRF_FEATURES/preprocessor.json deleted file mode 100644 index dac07cb7b2..0000000000 --- a/grobid-home/models/citation-BidLSTM_ChainCRF_FEATURES/preprocessor.json +++ /dev/null @@ -1,2162 +0,0 @@ -{ - "padding": true, - "return_lengths": true, - "return_word_embeddings": true, - "return_casing": false, - "return_features": true, - "return_chars": true, - "return_bert_embeddings": false, - "vocab_char": { - "<PAD>": 0, - "<UNK>": 1, - "!": 2, - "\"": 3, - "#": 4, - "$": 5, - "%": 6, - "&": 7, - "'": 8, - "(": 9, - ")": 10, - "*": 11, - "+": 12, - ",": 13, - "-": 14, - ".": 15, - "/": 16, - "0": 17, - "1": 18, - "2": 19, - "3": 20, - "4": 21, - "5": 22, - "6": 23, - "7": 24, - "8": 25, - "9": 26, - ":": 27, - ";": 28, - "<": 29, - "=": 30, - ">": 31, - "?": 32, - "@": 33, - "A": 34, - "B": 35, - "C": 36, - "D": 37, - "E": 38, - "F": 39, - "G": 40, - "H": 41, - "I": 42, - "J": 43, - "K": 44, - "L": 45, - "M": 46, - "N": 47, - "O": 48, - "P": 49, - "Q": 50, - "R": 51, - "S": 52, - "T": 53, - "U": 54, - "V": 55, - "W": 56, - "X": 57, - "Y": 58, - "Z": 59, - "[": 60, - "\\": 61, - "]": 62, - "^": 63, - "_": 64, - "`": 65, - "a": 66, - "b": 67, - "c": 68, - "d": 69, - "e": 70, - "f": 71, - "g": 72, - "h": 73, - "i": 74, - "j": 75, - "k": 76, - "l": 77, - "m": 78, - "n": 79, - "o": 80, - "p": 81, - "q": 82, - "r": 83, - "s": 84, - "t": 85, - "u": 86, - "v": 87, - "w": 88, - "x": 89, - "y": 90, - "z": 91, - "|": 92, - "~": 93, - "\u0084": 94, - "\u0086": 95, - "\u0092": 96, - "\u0096": 97, - "\u00a1": 98, - "\u00a2": 99, - "\u00a4": 100, - "\u00a7": 101, - "\u00a8": 102, - "\u00a9": 103, - "\u00ab": 104, - "\u00ad": 105, - "\u00af": 106, - "\u00b0": 107, - "\u00b1": 108, - "\u00b2": 109, - "\u00b3": 110, - "\u00b4": 111, - "\u00b5": 112, - "\u00b8": 113, - "\u00ba": 114, - "\u00bb": 115, - "\u00bc": 116, - "\u00bf": 117, - "\u00c0": 118, - "\u00c1": 119, - "\u00c2": 120, - "\u00c3": 121, - "\u00c5": 122, - "\u00c8": 123, - "\u00c9": 124, - "\u00ca": 125, - "\u00ce": 126, - "\u00cf": 127, - "\u00d4": 128, - "\u00d6": 129, - "\u00d7": 130, - "\u00d8": 131, - "\u00dc": 132, - "\u00df": 133, - "\u00e0": 134, - "\u00e1": 135, - "\u00e2": 136, - "\u00e3": 137, - "\u00e4": 138, - "\u00e7": 139, - "\u00e8": 140, - "\u00e9": 141, - "\u00ea": 142, - "\u00ec": 143, - "\u00ed": 144, - "\u00ee": 145, - "\u00ef": 146, - "\u00f1": 147, - "\u00f2": 148, - "\u00f3": 149, - "\u00f4": 150, - "\u00f5": 151, - "\u00f6": 152, - "\u00f8": 153, - "\u00fa": 154, - "\u00fb": 155, - "\u00fc": 156, - "\u00fd": 157, - "\u0102": 158, - "\u0105": 159, - "\u0107": 160, - "\u010c": 161, - "\u010d": 162, - "\u0117": 163, - "\u0119": 164, - "\u011f": 165, - "\u0130": 166, - "\u0131": 167, - "\u0132": 168, - "\u013e": 169, - "\u0142": 170, - "\u0144": 171, - "\u0159": 172, - "\u015b": 173, - "\u0161": 174, - "\u016b": 175, - "\u0179": 176, - "\u017d": 177, - "\u017e": 178, - "\u02c6": 179, - "\u02c7": 180, - "\u02dc": 181, - "\u0301": 182, - "\u039a": 183, - "\u039b": 184, - "\u039e": 185, - "\u03a0": 186, - "\u03b1": 187, - "\u03b2": 188, - "\u03b3": 189, - "\u03b5": 190, - "\u03b7": 191, - "\u03b8": 192, - "\u03b9": 193, - "\u03bb": 194, - "\u03bd": 195, - "\u03be": 196, - "\u03bf": 197, - "\u03c0": 198, - "\u03c1": 199, - "\u03c2": 200, - "\u03c3": 201, - "\u03c4": 202, - "\u03c6": 203, - "\u03e9": 204, - "\u0410": 205, - "\u0411": 206, - "\u0412": 207, - "\u0414": 208, - "\u0415": 209, - "\u0418": 210, - "\u041a": 211, - "\u041b": 212, - "\u041c": 213, - "\u041d": 214, - "\u041e": 215, - "\u041f": 216, - "\u0420": 217, - "\u0421": 218, - "\u0422": 219, - "\u0425": 220, - "\u0427": 221, - "\u042f": 222, - "\u0430": 223, - "\u0431": 224, - "\u0432": 225, - "\u0433": 226, - "\u0434": 227, - "\u0435": 228, - "\u0436": 229, - "\u0437": 230, - "\u0438": 231, - "\u0439": 232, - "\u043a": 233, - "\u043b": 234, - "\u043c": 235, - "\u043d": 236, - "\u043e": 237, - "\u043f": 238, - "\u0440": 239, - "\u0441": 240, - "\u0442": 241, - "\u0443": 242, - "\u0444": 243, - "\u0445": 244, - "\u0446": 245, - "\u0447": 246, - "\u0449": 247, - "\u044a": 248, - "\u044b": 249, - "\u044c": 250, - "\u044d": 251, - "\u044e": 252, - "\u044f": 253, - "\u1390": 254, - "\u1f04": 255, - "\u1f14": 256, - "\u1f73": 257, - "\u1f79": 258, - "\u2018": 259, - "\u2019": 260, - "\u201a": 261, - "\u201c": 262, - "\u201d": 263, - "\u201e": 264, - "\u2022": 265, - "\u2024": 266, - "\u2026": 267, - "\u2113": 268, - "\u2192": 269, - "\u21b5": 270, - "\u2202": 271, - "\u2206": 272, - "\u2212": 273, - "\u221a": 274, - "\u221d": 275, - "\u221e": 276, - "\u223c": 277, - "\u2245": 278, - "\u3001": 279, - "\u3042": 280, - "\u3044": 281, - "\u3048": 282, - "\u304a": 283, - "\u304b": 284, - "\u3050": 285, - "\u3051": 286, - "\u3052": 287, - "\u3053": 288, - "\u3055": 289, - "\u3057": 290, - "\u3059": 291, - "\u305f": 292, - "\u3061": 293, - "\u3063": 294, - "\u3064": 295, - "\u3065": 296, - "\u3066": 297, - "\u3067": 298, - "\u3068": 299, - "\u306a": 300, - "\u306b": 301, - "\u306e": 302, - "\u3073": 303, - "\u307e": 304, - "\u307f": 305, - "\u3080": 306, - "\u3081": 307, - "\u3086": 308, - "\u3088": 309, - "\u3089": 310, - "\u308a": 311, - "\u308b": 312, - "\u308c": 313, - "\u308f": 314, - "\u3092": 315, - "\u30a1": 316, - "\u30a2": 317, - "\u30a3": 318, - "\u30a4": 319, - "\u30a6": 320, - "\u30a7": 321, - "\u30a8": 322, - "\u30ab": 323, - "\u30ac": 324, - "\u30ad": 325, - "\u30af": 326, - "\u30b0": 327, - "\u30b3": 328, - "\u30b4": 329, - "\u30b7": 330, - "\u30b8": 331, - "\u30b9": 332, - "\u30ba": 333, - "\u30bb": 334, - "\u30bd": 335, - "\u30bf": 336, - "\u30c1": 337, - "\u30c3": 338, - "\u30c6": 339, - "\u30c7": 340, - "\u30c8": 341, - "\u30c9": 342, - "\u30ca": 343, - "\u30cb": 344, - "\u30cd": 345, - "\u30ce": 346, - "\u30cf": 347, - "\u30d0": 348, - "\u30d1": 349, - "\u30d2": 350, - "\u30d5": 351, - "\u30d6": 352, - "\u30d7": 353, - "\u30d9": 354, - "\u30df": 355, - "\u30e0": 356, - "\u30e1": 357, - "\u30e2": 358, - "\u30e5": 359, - "\u30e7": 360, - "\u30e9": 361, - "\u30ea": 362, - "\u30eb": 363, - "\u30ec": 364, - "\u30ed": 365, - "\u30ef": 366, - "\u30f3": 367, - "\u30fb": 368, - "\u30fc": 369, - "\u4e00": 370, - "\u4e01": 371, - "\u4e09": 372, - "\u4e0a": 373, - "\u4e0b": 374, - "\u4e0e": 375, - "\u4e16": 376, - "\u4e18": 377, - "\u4e1a": 378, - "\u4e1c": 379, - "\u4e2a": 380, - "\u4e2d": 381, - "\u4e34": 382, - "\u4e3a": 383, - "\u4e3b": 384, - "\u4e3d": 385, - "\u4e45": 386, - "\u4e4b": 387, - "\u4e50": 388, - "\u4e5f": 389, - "\u4e60": 390, - "\u4e66": 391, - "\u4e7e": 392, - "\u4e88": 393, - "\u4e8b": 394, - "\u4e8c": 395, - "\u4e8e": 396, - "\u4e91": 397, - "\u4e92": 398, - "\u4e94": 399, - "\u4e95": 400, - "\u4e9e": 401, - "\u4ea4": 402, - "\u4ea5": 403, - "\u4ea7": 404, - "\u4eac": 405, - "\u4eae": 406, - "\u4eba": 407, - "\u4ec1": 408, - "\u4eca": 409, - "\u4ecb": 410, - "\u4ece": 411, - "\u4ed9": 412, - "\u4ee3": 413, - "\u4ee5": 414, - "\u4ef2": 415, - "\u4ef7": 416, - "\u4efb": 417, - "\u4eff": 418, - "\u4f01": 419, - "\u4f0a": 420, - "\u4f0d": 421, - "\u4f17": 422, - "\u4f1a": 423, - "\u4f1f": 424, - "\u4f20": 425, - "\u4f2f": 426, - "\u4f38": 427, - "\u4f3d": 428, - "\u4f46": 429, - "\u4f4d": 430, - "\u4f4f": 431, - "\u4f53": 432, - "\u4f55": 433, - "\u4f5c": 434, - "\u4f73": 435, - "\u4f75": 436, - "\u4f7f": 437, - "\u4f86": 438, - "\u4f8b": 439, - "\u4f9b": 440, - "\u4fa1": 441, - "\u4fbf": 442, - "\u4fc2": 443, - "\u4fc4": 444, - "\u4fca": 445, - "\u4fdd": 446, - "\u4fe1": 447, - "\u4fee": 448, - "\u5009": 449, - "\u5029": 450, - "\u502a": 451, - "\u502b": 452, - "\u503b": 453, - "\u503c": 454, - "\u5049": 455, - "\u507f": 456, - "\u50b3": 457, - "\u50b7": 458, - "\u50be": 459, - "\u5143": 460, - "\u5146": 461, - "\u5149": 462, - "\u514b": 463, - "\u5165": 464, - "\u5167": 465, - "\u5168": 466, - "\u516b": 467, - "\u516c": 468, - "\u516d": 469, - "\u5171": 470, - "\u5173": 471, - "\u5175": 472, - "\u5176": 473, - "\u5178": 474, - "\u517c": 475, - "\u5185": 476, - "\u518d": 477, - "\u51a0": 478, - "\u51ac": 479, - "\u51e0": 480, - "\u51e6": 481, - "\u51fa": 482, - "\u5206": 483, - "\u520a": 484, - "\u5212": 485, - "\u5218": 486, - "\u521a": 487, - "\u521b": 488, - "\u521d": 489, - "\u5224": 490, - "\u5229": 491, - "\u5230": 492, - "\u5236": 493, - "\u5238": 494, - "\u523b": 495, - "\u524d": 496, - "\u525b": 497, - "\u5267": 498, - "\u5275": 499, - "\u5289": 500, - "\u529f": 501, - "\u52a0": 502, - "\u52a8": 503, - "\u52c9": 504, - "\u52d5": 505, - "\u52d9": 506, - "\u52dd": 507, - "\u52f5": 508, - "\u5316": 509, - "\u5317": 510, - "\u5331": 511, - "\u533a": 512, - "\u533b": 513, - "\u5340": 514, - "\u5341": 515, - "\u5343": 516, - "\u534a": 517, - "\u534e": 518, - "\u5353": 519, - "\u5354": 520, - "\u5357": 521, - "\u5360": 522, - "\u536b": 523, - "\u5371": 524, - "\u5377": 525, - "\u5385": 526, - "\u539f": 527, - "\u53bb": 528, - "\u53c2": 529, - "\u53c3": 530, - "\u53c9": 531, - "\u53ca": 532, - "\u53d1": 533, - "\u53d6": 534, - "\u53d7": 535, - "\u53d8": 536, - "\u53e2": 537, - "\u53e3": 538, - "\u53e4": 539, - "\u53e5": 540, - "\u53ef": 541, - "\u53f2": 542, - "\u53f7": 543, - "\u53f8": 544, - "\u5408": 545, - "\u5409": 546, - "\u540c": 547, - "\u540d": 548, - "\u540e": 549, - "\u5411": 550, - "\u542c": 551, - "\u542f": 552, - "\u5433": 553, - "\u5434": 554, - "\u543e": 555, - "\u5448": 556, - "\u544a": 557, - "\u5468": 558, - "\u548c": 559, - "\u54c1": 560, - "\u54c9": 561, - "\u54cd": 562, - "\u54e1": 563, - "\u54f2": 564, - "\u5546": 565, - "\u554f": 566, - "\u5578": 567, - "\u5584": 568, - "\u559c": 569, - "\u55aa": 570, - "\u55ac": 571, - "\u55b6": 572, - "\u5609": 573, - "\u5668": 574, - "\u56db": 575, - "\u56de": 576, - "\u56e0": 577, - "\u56f3": 578, - "\u56f4": 579, - "\u56fd": 580, - "\u56fe": 581, - "\u570b": 582, - "\u5712": 583, - "\u5716": 584, - "\u571f": 585, - "\u5728": 586, - "\u5730": 587, - "\u573a": 588, - "\u5740": 589, - "\u5742": 590, - "\u575a": 591, - "\u575b": 592, - "\u5764": 593, - "\u578b": 594, - "\u57ce": 595, - "\u57df": 596, - "\u57f9": 597, - "\u57fa": 598, - "\u5802": 599, - "\u582f": 600, - "\u5831": 601, - "\u5859": 602, - "\u585a": 603, - "\u5869": 604, - "\u5883": 605, - "\u589e": 606, - "\u58c1": 607, - "\u58c7": 608, - "\u58eb": 609, - "\u5909": 610, - "\u590d": 611, - "\u590f": 612, - "\u5916": 613, - "\u591a": 614, - "\u5927": 615, - "\u5929": 616, - "\u592a": 617, - "\u592b": 618, - "\u592d": 619, - "\u5947": 620, - "\u5973": 621, - "\u598d": 622, - "\u59cb": 623, - "\u59d4": 624, - "\u59da": 625, - "\u59dc": 626, - "\u5a07": 627, - "\u5a1f": 628, - "\u5a67": 629, - "\u5a77": 630, - "\u5a92": 631, - "\u5b00": 632, - "\u5b50": 633, - "\u5b58": 634, - "\u5b59": 635, - "\u5b5f": 636, - "\u5b63": 637, - "\u5b66": 638, - "\u5b6b": 639, - "\u5b78": 640, - "\u5b81": 641, - "\u5b87": 642, - "\u5b89": 643, - "\u5b8b": 644, - "\u5b8c": 645, - "\u5b8f": 646, - "\u5b97": 647, - "\u5b99": 648, - "\u5b9a": 649, - "\u5b9c": 650, - "\u5b9d": 651, - "\u5b9e": 652, - "\u5b9f": 653, - "\u5bb3": 654, - "\u5bb6": 655, - "\u5bb9": 656, - "\u5bbf": 657, - "\u5bc5": 658, - "\u5bd2": 659, - "\u5bdf": 660, - "\u5be6": 661, - "\u5be7": 662, - "\u5bf8": 663, - "\u5bf9": 664, - "\u5bfa": 665, - "\u5bfc": 666, - "\u5bff": 667, - "\u5c02": 668, - "\u5c0a": 669, - "\u5c0d": 670, - "\u5c0f": 671, - "\u5c11": 672, - "\u5c19": 673, - "\u5c1a": 674, - "\u5c31": 675, - "\u5c3b": 676, - "\u5c3d": 677, - "\u5c3e": 678, - "\u5c45": 679, - "\u5c4b": 680, - "\u5c55": 681, - "\u5c71": 682, - "\u5ca1": 683, - "\u5cad": 684, - "\u5cb8": 685, - "\u5cf0": 686, - "\u5cf6": 687, - "\u5d0e": 688, - "\u5d17": 689, - "\u5dba": 690, - "\u5ddd": 691, - "\u5dde": 692, - "\u5de5": 693, - "\u5deb": 694, - "\u5dee": 695, - "\u5dfb": 696, - "\u5e02": 697, - "\u5e08": 698, - "\u5e2b": 699, - "\u5e38": 700, - "\u5e3d": 701, - "\u5e73": 702, - "\u5e74": 703, - "\u5e78": 704, - "\u5e7f": 705, - "\u5e83": 706, - "\u5e86": 707, - "\u5e8a": 708, - "\u5e94": 709, - "\u5e97": 710, - "\u5e9c": 711, - "\u5ea6": 712, - "\u5eb7": 713, - "\u5eb8": 714, - "\u5ebe": 715, - "\u5ee2": 716, - "\u5ee3": 717, - "\u5efa": 718, - "\u5f00": 719, - "\u5f0f": 720, - "\u5f15": 721, - "\u5f18": 722, - "\u5f20": 723, - "\u5f25": 724, - "\u5f35": 725, - "\u5f52": 726, - "\u5f53": 727, - "\u5f62": 728, - "\u5f64": 729, - "\u5f66": 730, - "\u5f6c": 731, - "\u5f6d": 732, - "\u5f71": 733, - "\u5f81": 734, - "\u5f8b": 735, - "\u5f90": 736, - "\u5f9e": 737, - "\u5fa9": 738, - "\u5fae": 739, - "\u5fb3": 740, - "\u5fb7": 741, - "\u5fbd": 742, - "\u5fc3": 743, - "\u5fcd": 744, - "\u5fd7": 745, - "\u5fe0": 746, - "\u5feb": 747, - "\u5ff5": 748, - "\u6001": 749, - "\u601d": 750, - "\u6027": 751, - "\u606d": 752, - "\u606f": 753, - "\u6096": 754, - "\u60a0": 755, - "\u60bc": 756, - "\u60c5": 757, - "\u60e0": 758, - "\u60f3": 759, - "\u610f": 760, - "\u611f": 761, - "\u614b": 762, - "\u6167": 763, - "\u6176": 764, - "\u61c9": 765, - "\u6210": 766, - "\u6211": 767, - "\u6226": 768, - "\u6230": 769, - "\u6236": 770, - "\u6238": 771, - "\u623f": 772, - "\u6240": 773, - "\u6248": 774, - "\u6253": 775, - "\u6255": 776, - "\u627f": 777, - "\u6280": 778, - "\u62a5": 779, - "\u62e1": 780, - "\u62e9": 781, - "\u6301": 782, - "\u6307": 783, - "\u6311": 784, - "\u6377": 785, - "\u6392": 786, - "\u639b": 787, - "\u63a2": 788, - "\u63a5": 789, - "\u63a7": 790, - "\u63a8": 791, - "\u63aa": 792, - "\u63e3": 793, - "\u6447": 794, - "\u64a5": 795, - "\u64ad": 796, - "\u652f": 797, - "\u6539": 798, - "\u653e": 799, - "\u653f": 800, - "\u6548": 801, - "\u654e": 802, - "\u6559": 803, - "\u6566": 804, - "\u656c": 805, - "\u6570": 806, - "\u6587": 807, - "\u658c": 808, - "\u6590": 809, - "\u6599": 810, - "\u659c": 811, - "\u65b0": 812, - "\u65b7": 813, - "\u65b9": 814, - "\u65bc": 815, - "\u65bd": 816, - "\u65cf": 817, - "\u65e5": 818, - "\u65f6": 819, - "\u660c": 820, - "\u660e": 821, - "\u6613": 822, - "\u661f": 823, - "\u6625": 824, - "\u6642": 825, - "\u664b": 826, - "\u6653": 827, - "\u6668": 828, - "\u666f": 829, - "\u6674": 830, - "\u66a2": 831, - "\u66b4": 832, - "\u66c9": 833, - "\u66f4": 834, - "\u66f8": 835, - "\u66f9": 836, - "\u66ff": 837, - "\u6700": 838, - "\u6703": 839, - "\u6708": 840, - "\u6709": 841, - "\u670d": 842, - "\u671b": 843, - "\u671d": 844, - "\u671f": 845, - "\u6728": 846, - "\u672a": 847, - "\u672c": 848, - "\u672e": 849, - "\u672f": 850, - "\u6731": 851, - "\u673a": 852, - "\u6742": 853, - "\u6749": 854, - "\u674e": 855, - "\u6750": 856, - "\u6751": 857, - "\u675c": 858, - "\u6765": 859, - "\u6768": 860, - "\u676d": 861, - "\u6770": 862, - "\u6771": 863, - "\u677e": 864, - "\u677f": 865, - "\u6784": 866, - "\u6790": 867, - "\u6797": 868, - "\u679c": 869, - "\u67d0": 870, - "\u67d3": 871, - "\u67e5": 872, - "\u67f3": 873, - "\u67fb": 874, - "\u6807": 875, - "\u6811": 876, - "\u6821": 877, - "\u682a": 878, - "\u6839": 879, - "\u683c": 880, - "\u6842": 881, - "\u6848": 882, - "\u6893": 883, - "\u68a6": 884, - "\u68c4": 885, - "\u68ee": 886, - "\u690d": 887, - "\u691c": 888, - "\u694a": 889, - "\u695a": 890, - "\u6960": 891, - "\u696d": 892, - "\u696e": 893, - "\u697c": 894, - "\u6982": 895, - "\u69ae": 896, - "\u69cb": 897, - "\u69ea": 898, - "\u69fb": 899, - "\u6a19": 900, - "\u6a1f": 901, - "\u6a21": 902, - "\u6a23": 903, - "\u6a2a": 904, - "\u6a39": 905, - "\u6a4b": 906, - "\u6a5f": 907, - "\u6aa2": 908, - "\u6b12": 909, - "\u6b21": 910, - "\u6b49": 911, - "\u6b50": 912, - "\u6b63": 913, - "\u6b66": 914, - "\u6b74": 915, - "\u6b77": 916, - "\u6b8a": 917, - "\u6bc5": 918, - "\u6bd2": 919, - "\u6bd4": 920, - "\u6c0f": 921, - "\u6c11": 922, - "\u6c14": 923, - "\u6c17": 924, - "\u6c23": 925, - "\u6c34": 926, - "\u6c37": 927, - "\u6c38": 928, - "\u6c5a": 929, - "\u6c5f": 930, - "\u6c60": 931, - "\u6c61": 932, - "\u6c64": 933, - "\u6c6a": 934, - "\u6c76": 935, - "\u6c7a": 936, - "\u6c88": 937, - "\u6ca2": 938, - "\u6cb3": 939, - "\u6cb9": 940, - "\u6cbb": 941, - "\u6cbc": 942, - "\u6cbf": 943, - "\u6cc1": 944, - "\u6cd5": 945, - "\u6ce2": 946, - "\u6cf0": 947, - "\u6d0b": 948, - "\u6d25": 949, - "\u6d2a": 950, - "\u6d31": 951, - "\u6d3b": 952, - "\u6d41": 953, - "\u6d45": 954, - "\u6d4b": 955, - "\u6d4e": 956, - "\u6d59": 957, - "\u6d77": 958, - "\u6d89": 959, - "\u6d9b": 960, - "\u6db5": 961, - "\u6dd1": 962, - "\u6dd8": 963, - "\u6de1": 964, - "\u6df5": 965, - "\u6df7": 966, - "\u6df8": 967, - "\u6dfa": 968, - "\u6e05": 969, - "\u6e08": 970, - "\u6e21": 971, - "\u6e2c": 972, - "\u6e2f": 973, - "\u6e56": 974, - "\u6e6f": 975, - "\u6e90": 976, - "\u6e96": 977, - "\u6eab": 978, - "\u6ecb": 979, - "\u6ed9": 980, - "\u6eff": 981, - "\u6f14": 982, - "\u6f22": 983, - "\u6f2b": 984, - "\u6f58": 985, - "\u6fa4": 986, - "\u6fdf": 987, - "\u6fe4": 988, - "\u7009": 989, - "\u7063": 990, - "\u706b": 991, - "\u707c": 992, - "\u707d": 993, - "\u707e": 994, - "\u708e": 995, - "\u70ad": 996, - "\u70ae": 997, - "\u70b9": 998, - "\u714c": 999, - "\u7167": 1000, - "\u71b1": 1001, - "\u71d5": 1002, - "\u7232": 1003, - "\u7247": 1004, - "\u7248": 1005, - "\u725f": 1006, - "\u7269": 1007, - "\u7279": 1008, - "\u72b6": 1009, - "\u734e": 1010, - "\u7368": 1011, - "\u737b": 1012, - "\u7384": 1013, - "\u7389": 1014, - "\u738b": 1015, - "\u73af": 1016, - "\u73b0": 1017, - "\u73b2": 1018, - "\u73c9": 1019, - "\u73cd": 1020, - "\u73fe": 1021, - "\u7403": 1022, - "\u7406": 1023, - "\u741b": 1024, - "\u745b": 1025, - "\u7476": 1026, - "\u7490": 1027, - "\u74b0": 1028, - "\u74f7": 1029, - "\u7518": 1030, - "\u751f": 1031, - "\u7523": 1032, - "\u7528": 1033, - "\u7530": 1034, - "\u7531": 1035, - "\u7532": 1036, - "\u7535": 1037, - "\u753a": 1038, - "\u753b": 1039, - "\u754c": 1040, - "\u7565": 1041, - "\u756b": 1042, - "\u7570": 1043, - "\u7597": 1044, - "\u75be": 1045, - "\u75c5": 1046, - "\u75c7": 1047, - "\u7621": 1048, - "\u7642": 1049, - "\u767a": 1050, - "\u767b": 1051, - "\u767d": 1052, - "\u767e": 1053, - "\u7684": 1054, - "\u76ca": 1055, - "\u76d1": 1056, - "\u76d8": 1057, - "\u76df": 1058, - "\u76e3": 1059, - "\u76e4": 1060, - "\u76ee": 1061, - "\u76f4": 1062, - "\u76f8": 1063, - "\u7701": 1064, - "\u770b": 1065, - "\u770c": 1066, - "\u771e": 1067, - "\u771f": 1068, - "\u7763": 1069, - "\u77bb": 1070, - "\u77e5": 1071, - "\u77f3": 1072, - "\u7802": 1073, - "\u7814": 1074, - "\u784f": 1075, - "\u78a9": 1076, - "\u790e": 1077, - "\u793a": 1078, - "\u793e": 1079, - "\u7950": 1080, - "\u795d": 1081, - "\u795e": 1082, - "\u798e": 1083, - "\u798f": 1084, - "\u79c0": 1085, - "\u79c1": 1086, - "\u79d1": 1087, - "\u79d8": 1088, - "\u79e6": 1089, - "\u7a0b": 1090, - "\u7a3f": 1091, - "\u7a4e": 1092, - "\u7a6b": 1093, - "\u7a76": 1094, - "\u7a7a": 1095, - "\u7a9f": 1096, - "\u7acb": 1097, - "\u7ae0": 1098, - "\u7ae5": 1099, - "\u7b1b": 1100, - "\u7b2c": 1101, - "\u7b49": 1102, - "\u7b56": 1103, - "\u7b67": 1104, - "\u7ba1": 1105, - "\u7bc0": 1106, - "\u7bc9": 1107, - "\u7be4": 1108, - "\u7c21": 1109, - "\u7c4d": 1110, - "\u7c73": 1111, - "\u7c98": 1112, - "\u7cfb": 1113, - "\u7d00": 1114, - "\u7d05": 1115, - "\u7d20": 1116, - "\u7d30": 1117, - "\u7d44": 1118, - "\u7d4c": 1119, - "\u7d50": 1120, - "\u7d93": 1121, - "\u7d9c": 1122, - "\u7da0": 1123, - "\u7dcf": 1124, - "\u7dd1": 1125, - "\u7de8": 1126, - "\u7e23": 1127, - "\u7e3d": 1128, - "\u7e3e": 1129, - "\u7e41": 1130, - "\u7e8c": 1131, - "\u7ea2": 1132, - "\u7eaa": 1133, - "\u7ec3": 1134, - "\u7ec4": 1135, - "\u7ecf": 1136, - "\u7ed3": 1137, - "\u7edc": 1138, - "\u7ee9": 1139, - "\u7efc": 1140, - "\u7eff": 1141, - "\u7f51": 1142, - "\u7f6e": 1143, - "\u7f8e": 1144, - "\u7fa4": 1145, - "\u7fa9": 1146, - "\u7fd2": 1147, - "\u7fd4": 1148, - "\u7fdf": 1149, - "\u7ff0": 1150, - "\u8003": 1151, - "\u8005": 1152, - "\u8033": 1153, - "\u8054": 1154, - "\u805a": 1155, - "\u806a": 1156, - "\u8085": 1157, - "\u8096": 1158, - "\u80b2": 1159, - "\u80ba": 1160, - "\u80cc": 1161, - "\u80e1": 1162, - "\u80fd": 1163, - "\u8108": 1164, - "\u8109": 1165, - "\u8151": 1166, - "\u8154": 1167, - "\u81df": 1168, - "\u81e3": 1169, - "\u81e8": 1170, - "\u81f4": 1171, - "\u81fa": 1172, - "\u8207": 1173, - "\u8208": 1174, - "\u8212": 1175, - "\u821e": 1176, - "\u822c": 1177, - "\u8239": 1178, - "\u826f": 1179, - "\u827a": 1180, - "\u8282": 1181, - "\u828d": 1182, - "\u829d": 1183, - "\u82ac": 1184, - "\u82b1": 1185, - "\u82b3": 1186, - "\u82cf": 1187, - "\u82e6": 1188, - "\u82f1": 1189, - "\u8302": 1190, - "\u8303": 1191, - "\u8349": 1192, - "\u836f": 1193, - "\u839e": 1194, - "\u83ab": 1195, - "\u83b9": 1196, - "\u83c1": 1197, - "\u83ef": 1198, - "\u83f1": 1199, - "\u840d": 1200, - "\u8425": 1201, - "\u8429": 1202, - "\u8431": 1203, - "\u843d": 1204, - "\u8449": 1205, - "\u8463": 1206, - "\u8499": 1207, - "\u84bc": 1208, - "\u8521": 1209, - "\u852d": 1210, - "\u8587": 1211, - "\u859b": 1212, - "\u85ac": 1213, - "\u85cd": 1214, - "\u85e4": 1215, - "\u85e5": 1216, - "\u865a": 1217, - "\u865b": 1218, - "\u865f": 1219, - "\u8681": 1220, - "\u8682": 1221, - "\u878d": 1222, - "\u884c": 1223, - "\u8853": 1224, - "\u8857": 1225, - "\u885b": 1226, - "\u8865": 1227, - "\u888d": 1228, - "\u88ab": 1229, - "\u88c1": 1230, - "\u88dc": 1231, - "\u88fd": 1232, - "\u897f": 1233, - "\u8981": 1234, - "\u898b": 1235, - "\u898f": 1236, - "\u8996": 1237, - "\u89ba": 1238, - "\u89c0": 1239, - "\u89c2": 1240, - "\u89c4": 1241, - "\u89c6": 1242, - "\u89d2": 1243, - "\u89e3": 1244, - "\u89f4": 1245, - "\u8a02": 1246, - "\u8a08": 1247, - "\u8a0e": 1248, - "\u8a18": 1249, - "\u8a23": 1250, - "\u8a2d": 1251, - "\u8a3c": 1252, - "\u8a55": 1253, - "\u8a79": 1254, - "\u8a8c": 1255, - "\u8a9e": 1256, - "\u8abf": 1257, - "\u8ac7": 1258, - "\u8ad6": 1259, - "\u8b1b": 1260, - "\u8b28": 1261, - "\u8b49": 1262, - "\u8b70": 1263, - "\u8b8a": 1264, - "\u8ba8": 1265, - "\u8bae": 1266, - "\u8bb8": 1267, - "\u8bba": 1268, - "\u8bbe": 1269, - "\u8bc1": 1270, - "\u8bc4": 1271, - "\u8bd5": 1272, - "\u8bfe": 1273, - "\u8c03": 1274, - "\u8c22": 1275, - "\u8c37": 1276, - "\u8c61": 1277, - "\u8ca1": 1278, - "\u8cc3": 1279, - "\u8cc7": 1280, - "\u8cea": 1281, - "\u8d22": 1282, - "\u8d23": 1283, - "\u8d28": 1284, - "\u8d3a": 1285, - "\u8d44": 1286, - "\u8d64": 1287, - "\u8d75": 1288, - "\u8d8a": 1289, - "\u8d99": 1290, - "\u8de1": 1291, - "\u8def": 1292, - "\u8df5": 1293, - "\u8e48": 1294, - "\u8ef8": 1295, - "\u8f03": 1296, - "\u8f1d": 1297, - "\u8f2f": 1298, - "\u8f49": 1299, - "\u8f76": 1300, - "\u8f83": 1301, - "\u8f9b": 1302, - "\u8fa8": 1303, - "\u8fb2": 1304, - "\u8fba": 1305, - "\u8fbd": 1306, - "\u8fbe": 1307, - "\u8fc7": 1308, - "\u8fce": 1309, - "\u8fd1": 1310, - "\u8fdb": 1311, - "\u8fdc": 1312, - "\u8ff0": 1313, - "\u8ff9": 1314, - "\u9009": 1315, - "\u901a": 1316, - "\u901d": 1317, - "\u901f": 1318, - "\u9020": 1319, - "\u9023": 1320, - "\u9032": 1321, - "\u904b": 1322, - "\u9053": 1323, - "\u9054": 1324, - "\u9075": 1325, - "\u9078": 1326, - "\u907a": 1327, - "\u907c": 1328, - "\u90ce": 1329, - "\u90d1": 1330, - "\u90de": 1331, - "\u90e8": 1332, - "\u90ed": 1333, - "\u90fd": 1334, - "\u9115": 1335, - "\u9127": 1336, - "\u914d": 1337, - "\u9178": 1338, - "\u91ab": 1339, - "\u91cd": 1340, - "\u91ce": 1341, - "\u91d1": 1342, - "\u9234": 1343, - "\u925b": 1344, - "\u9271": 1345, - "\u9298": 1346, - "\u932b": 1347, - "\u9332": 1348, - "\u934d": 1349, - "\u93ad": 1350, - "\u93e1": 1351, - "\u9435": 1352, - "\u94c1": 1353, - "\u94dc": 1354, - "\u9500": 1355, - "\u950b": 1356, - "\u9577": 1357, - "\u9580": 1358, - "\u958b": 1359, - "\u95a2": 1360, - "\u95a3": 1361, - "\u95bb": 1362, - "\u95be": 1363, - "\u95dc": 1364, - "\u95e8": 1365, - "\u95ee": 1366, - "\u95f4": 1367, - "\u95fb": 1368, - "\u962a": 1369, - "\u9632": 1370, - "\u9633": 1371, - "\u9634": 1372, - "\u963f": 1373, - "\u9644": 1374, - "\u9645": 1375, - "\u9648": 1376, - "\u964d": 1377, - "\u9662": 1378, - "\u9670": 1379, - "\u9673": 1380, - "\u9676": 1381, - "\u967d": 1382, - "\u9686": 1383, - "\u9690": 1384, - "\u969b": 1385, - "\u969c": 1386, - "\u96a8": 1387, - "\u96aa": 1388, - "\u96c4": 1389, - "\u96c5": 1390, - "\u96c6": 1391, - "\u96d1": 1392, - "\u96d9": 1393, - "\u96dc": 1394, - "\u96e3": 1395, - "\u96ea": 1396, - "\u96ef": 1397, - "\u96f2": 1398, - "\u96f7": 1399, - "\u9707": 1400, - "\u9716": 1401, - "\u971e": 1402, - "\u9751": 1403, - "\u9752": 1404, - "\u975e": 1405, - "\u9762": 1406, - "\u9769": 1407, - "\u9773": 1408, - "\u97d3": 1409, - "\u97f3": 1410, - "\u97f5": 1411, - "\u9801": 1412, - "\u9808": 1413, - "\u9818": 1414, - "\u984c": 1415, - "\u985e": 1416, - "\u9875": 1417, - "\u987e": 1418, - "\u9896": 1419, - "\u9898": 1420, - "\u989c": 1421, - "\u98a8": 1422, - "\u98ce": 1423, - "\u98db": 1424, - "\u9918": 1425, - "\u9928": 1426, - "\u9970": 1427, - "\u9986": 1428, - "\u9999": 1429, - "\u99ac": 1430, - "\u99ae": 1431, - "\u9a57": 1432, - "\u9a6c": 1433, - "\u9ad4": 1434, - "\u9ad8": 1435, - "\u9b41": 1436, - "\u9bae": 1437, - "\u9c9c": 1438, - "\u9ce5": 1439, - "\u9df2": 1440, - "\u9e23": 1441, - "\u9e7d": 1442, - "\u9e97": 1443, - "\u9ed8": 1444, - "\u9ede": 1445, - "\u9f4a": 1446, - "\u9f4b": 1447, - "\u9f8d": 1448, - "\u9f99": 1449, - "\uac00": 1450, - "\uac01": 1451, - "\uac04": 1452, - "\uac08": 1453, - "\uac10": 1454, - "\uac15": 1455, - "\uac1c": 1456, - "\uac1d": 1457, - "\uac31": 1458, - "\uac70": 1459, - "\uac74": 1460, - "\uac80": 1461, - "\uac8c": 1462, - "\uaca9": 1463, - "\uacac": 1464, - "\uacb0": 1465, - "\uacbd": 1466, - "\uacc4": 1467, - "\uace0": 1468, - "\uace4": 1469, - "\uacf5": 1470, - "\uacfc": 1471, - "\uacfd": 1472, - "\uad00": 1473, - "\uad11": 1474, - "\uad34": 1475, - "\uad50": 1476, - "\uad6c": 1477, - "\uad6d": 1478, - "\uad70": 1479, - "\uad81": 1480, - "\uad8c": 1481, - "\uadc0": 1482, - "\uaddc": 1483, - "\uade0": 1484, - "\uadf8": 1485, - "\uadf9": 1486, - "\uadfc": 1487, - "\uae00": 1488, - "\uae08": 1489, - "\uae09": 1490, - "\uae30": 1491, - "\uae38": 1492, - "\uae40": 1493, - "\uae4c": 1494, - "\ub07c": 1495, - "\ub098": 1496, - "\ub099": 1497, - "\ub09c": 1498, - "\ub0a8": 1499, - "\ub0b4": 1500, - "\ub0b8": 1501, - "\ub108": 1502, - "\ub10c": 1503, - "\ub110": 1504, - "\ub124": 1505, - "\ub137": 1506, - "\ub140": 1507, - "\ub144": 1508, - "\ub150": 1509, - "\ub155": 1510, - "\ub178": 1511, - "\ub179": 1512, - "\ub17c": 1513, - "\ub180": 1514, - "\ub18d": 1515, - "\ub274": 1516, - "\ub294": 1517, - "\ub2a5": 1518, - "\ub2c8": 1519, - "\ub2dd": 1520, - "\ub2e4": 1521, - "\ub2e8": 1522, - "\ub2ec": 1523, - "\ub2f4": 1524, - "\ub2f9": 1525, - "\ub300": 1526, - "\ub304": 1527, - "\ub313": 1528, - "\ub354": 1529, - "\ub355": 1530, - "\ub358": 1531, - "\ub364": 1532, - "\ub370": 1533, - "\ub378": 1534, - "\ub3c4": 1535, - "\ub3c5": 1536, - "\ub3c8": 1537, - "\ub3cc": 1538, - "\ub3d9": 1539, - "\ub418": 1540, - "\ub41c": 1541, - "\ub450": 1542, - "\ub454": 1543, - "\ub458": 1544, - "\ub4c0": 1545, - "\ub4dc": 1546, - "\ub4dd": 1547, - "\ub4e3": 1548, - "\ub4e4": 1549, - "\ub4f1": 1550, - "\ub514": 1551, - "\ub525": 1552, - "\ub530": 1553, - "\ub77c": 1554, - "\ub780": 1555, - "\ub78c": 1556, - "\ub798": 1557, - "\ub79c": 1558, - "\ub7a8": 1559, - "\ub7b5": 1560, - "\ub7c9": 1561, - "\ub7ec": 1562, - "\ub7fc": 1563, - "\ub808": 1564, - "\ub80c": 1565, - "\ub824": 1566, - "\ub825": 1567, - "\ub828": 1568, - "\ub82c": 1569, - "\ub839": 1570, - "\ub840": 1571, - "\ub85c": 1572, - "\ub85d": 1573, - "\ub860": 1574, - "\ub8cc": 1575, - "\ub8e1": 1576, - "\ub8e8": 1577, - "\ub8ec": 1578, - "\ub958": 1579, - "\ub960": 1580, - "\ub978": 1581, - "\ub97c": 1582, - "\ub9ac": 1583, - "\ub9b0": 1584, - "\ub9bc": 1585, - "\ub9bd": 1586, - "\ub9c1": 1587, - "\ub9c8": 1588, - "\ub9cc": 1589, - "\ub9d0": 1590, - "\ub9dd": 1591, - "\ub9de": 1592, - "\ub9e4": 1593, - "\uba38": 1594, - "\uba39": 1595, - "\uba3c": 1596, - "\uba54": 1597, - "\uba58": 1598, - "\uba74": 1599, - "\uba78": 1600, - "\uba85": 1601, - "\uba87": 1602, - "\ubaa8": 1603, - "\ubaa9": 1604, - "\ubab0": 1605, - "\ubb18": 1606, - "\ubb34": 1607, - "\ubb35": 1608, - "\ubb38": 1609, - "\ubb3c": 1610, - "\ubba4": 1611, - "\ubbf8": 1612, - "\ubbfc": 1613, - "\ubc00": 1614, - "\ubc0f": 1615, - "\ubc14": 1616, - "\ubc15": 1617, - "\ubc18": 1618, - "\ubc1c": 1619, - "\ubc29": 1620, - "\ubc30": 1621, - "\ubc31": 1622, - "\ubc84": 1623, - "\ubc8c": 1624, - "\ubc94": 1625, - "\ubc95": 1626, - "\ubca0": 1627, - "\ubca4": 1628, - "\ubcbd": 1629, - "\ubcc0": 1630, - "\ubcc4": 1631, - "\ubcd1": 1632, - "\ubcf4": 1633, - "\ubcf5": 1634, - "\ubcf8": 1635, - "\ubd04": 1636, - "\ubd09": 1637, - "\ubd80": 1638, - "\ubd81": 1639, - "\ubd84": 1640, - "\ubd95": 1641, - "\ubdf0": 1642, - "\ube0c": 1643, - "\ube14": 1644, - "\ube44": 1645, - "\ube45": 1646, - "\ube48": 1647, - "\uc0ac": 1648, - "\uc0b0": 1649, - "\uc0b4": 1650, - "\uc0b6": 1651, - "\uc0bc": 1652, - "\uc0c1": 1653, - "\uc0c8": 1654, - "\uc0c9": 1655, - "\uc0dd": 1656, - "\uc11c": 1657, - "\uc11d": 1658, - "\uc120": 1659, - "\uc124": 1660, - "\uc12c": 1661, - "\uc12d": 1662, - "\uc131": 1663, - "\uc138": 1664, - "\uc140": 1665, - "\uc158": 1666, - "\uc15c": 1667, - "\uc18c": 1668, - "\uc18d": 1669, - "\uc190": 1670, - "\uc1a1": 1671, - "\uc1fc": 1672, - "\uc218": 1673, - "\uc219": 1674, - "\uc21c": 1675, - "\uc220": 1676, - "\uc22d": 1677, - "\uc288": 1678, - "\uc2a4": 1679, - "\uc2ac": 1680, - "\uc2b5": 1681, - "\uc2b9": 1682, - "\uc2dc": 1683, - "\uc2dd": 1684, - "\uc2e0": 1685, - "\uc2e4": 1686, - "\uc2ec": 1687, - "\uc2ed": 1688, - "\uc2fc": 1689, - "\uc4f0": 1690, - "\uc544": 1691, - "\uc545": 1692, - "\uc548": 1693, - "\uc54a": 1694, - "\uc554": 1695, - "\uc555": 1696, - "\uc559": 1697, - "\uc560": 1698, - "\uc561": 1699, - "\uc571": 1700, - "\uc57c": 1701, - "\uc591": 1702, - "\uc5b4": 1703, - "\uc5b8": 1704, - "\uc5bb": 1705, - "\uc5bc": 1706, - "\uc5c4": 1707, - "\uc5c5": 1708, - "\uc5d0": 1709, - "\uc5d4": 1710, - "\uc5ec": 1711, - "\uc5ed": 1712, - "\uc5f0": 1713, - "\uc5f4": 1714, - "\uc5fc": 1715, - "\uc5fd": 1716, - "\uc601": 1717, - "\uc608": 1718, - "\uc624": 1719, - "\uc625": 1720, - "\uc628": 1721, - "\uc62c": 1722, - "\uc635": 1723, - "\uc640": 1724, - "\uc644": 1725, - "\uc65c": 1726, - "\uc678": 1727, - "\uc694": 1728, - "\uc695": 1729, - "\uc6a9": 1730, - "\uc6b0": 1731, - "\uc6b1": 1732, - "\uc6b4": 1733, - "\uc6b8": 1734, - "\uc6c0": 1735, - "\uc6c5": 1736, - "\uc6cc": 1737, - "\uc6d0": 1738, - "\uc6d4": 1739, - "\uc6e8": 1740, - "\uc6f9": 1741, - "\uc704": 1742, - "\uc720": 1743, - "\uc721": 1744, - "\uc724": 1745, - "\uc728": 1746, - "\uc735": 1747, - "\uc73c": 1748, - "\uc740": 1749, - "\uc744": 1750, - "\uc74c": 1751, - "\uc751": 1752, - "\uc758": 1753, - "\uc774": 1754, - "\uc775": 1755, - "\uc778": 1756, - "\uc77c": 1757, - "\uc784": 1758, - "\uc785": 1759, - "\uc788": 1760, - "\uc789": 1761, - "\uc790": 1762, - "\uc791": 1763, - "\uc7a5": 1764, - "\uc7ac": 1765, - "\uc7c1": 1766, - "\uc800": 1767, - "\uc801": 1768, - "\uc804": 1769, - "\uc808": 1770, - "\uc810": 1771, - "\uc811": 1772, - "\uc813": 1773, - "\uc815": 1774, - "\uc81c": 1775, - "\uc820": 1776, - "\uc870": 1777, - "\uc871": 1778, - "\uc874": 1779, - "\uc878": 1780, - "\uc885": 1781, - "\uc88b": 1782, - "\uc8c4": 1783, - "\uc8fc": 1784, - "\uc8fd": 1785, - "\uc900": 1786, - "\uc911": 1787, - "\uc988": 1788, - "\uc99d": 1789, - "\uc9c0": 1790, - "\uc9c1": 1791, - "\uc9c4": 1792, - "\uc9c8": 1793, - "\uc9d1": 1794, - "\uc9d5": 1795, - "\uc9dc": 1796, - "\ucabd": 1797, - "\ucc28": 1798, - "\ucc29": 1799, - "\ucc2c": 1800, - "\ucc30": 1801, - "\ucc38": 1802, - "\ucc3d": 1803, - "\ucc44": 1804, - "\ucc45": 1805, - "\ucc98": 1806, - "\ucc99": 1807, - "\ucc9c": 1808, - "\ucca0": 1809, - "\ucca9": 1810, - "\uccad": 1811, - "\uccb4": 1812, - "\uccd0": 1813, - "\ucd08": 1814, - "\ucd09": 1815, - "\ucd0c": 1816, - "\ucd1d": 1817, - "\ucd5c": 1818, - "\ucd94": 1819, - "\ucd95": 1820, - "\ucd98": 1821, - "\ucd9c": 1822, - "\ucda4": 1823, - "\ucda9": 1824, - "\ucde8": 1825, - "\uce04": 1826, - "\uce20": 1827, - "\uce21": 1828, - "\uce35": 1829, - "\uce58": 1830, - "\uce5c": 1831, - "\uce68": 1832, - "\uce74": 1833, - "\ucea0": 1834, - "\ucee4": 1835, - "\ucef4": 1836, - "\ucf00": 1837, - "\ucf13": 1838, - "\ucf54": 1839, - "\ucf58": 1840, - "\ud06c": 1841, - "\ud074": 1842, - "\ud0a4": 1843, - "\ud0c0": 1844, - "\ud0c4": 1845, - "\ud0c8": 1846, - "\ud0d0": 1847, - "\ud0d1": 1848, - "\ud0d5": 1849, - "\ud0dc": 1850, - "\ud0dd": 1851, - "\ud130": 1852, - "\ud138": 1853, - "\ud14c": 1854, - "\ud14d": 1855, - "\ud150": 1856, - "\ud154": 1857, - "\ud15c": 1858, - "\ud1a0": 1859, - "\ud1b5": 1860, - "\ud1f4": 1861, - "\ud22c": 1862, - "\ud29c": 1863, - "\ud2b8": 1864, - "\ud2b9": 1865, - "\ud2f0": 1866, - "\ud305": 1867, - "\ud30c": 1868, - "\ud310": 1869, - "\ud328": 1870, - "\ud37c": 1871, - "\ud398": 1872, - "\ud3b4": 1873, - "\ud3b8": 1874, - "\ud3c9": 1875, - "\ud3ec": 1876, - "\ud3f0": 1877, - "\ud45c": 1878, - "\ud488": 1879, - "\ud48d": 1880, - "\ud4e8": 1881, - "\ud504": 1882, - "\ud50c": 1883, - "\ud53c": 1884, - "\ud53d": 1885, - "\ud558": 1886, - "\ud559": 1887, - "\ud55c": 1888, - "\ud560": 1889, - "\ud568": 1890, - "\ud569": 1891, - "\ud56d": 1892, - "\ud574": 1893, - "\ud589": 1894, - "\ud5a5": 1895, - "\ud5c8": 1896, - "\ud5cc": 1897, - "\ud5d8": 1898, - "\ud601": 1899, - "\ud604": 1900, - "\ud611": 1901, - "\ud615": 1902, - "\ud61c": 1903, - "\ud638": 1904, - "\ud63c": 1905, - "\ud64d": 1906, - "\ud654": 1907, - "\ud655": 1908, - "\ud658": 1909, - "\ud65c": 1910, - "\ud669": 1911, - "\ud68c": 1912, - "\ud68d": 1913, - "\ud6a8": 1914, - "\ud6c4": 1915, - "\ud6c8": 1916, - "\ud760": 1917, - "\ud761": 1918, - "\ud765": 1919, - "\ud76c": 1920, - "\uf061": 1921, - "\uf0a0": 1922, - "\uf90a": 1923, - "\uf922": 1924, - "\uf93d": 1925, - "\uf93f": 1926, - "\uf941": 1927, - "\uf958": 1928, - "\uf95a": 1929, - "\uf9ba": 1930, - "\uf9e1": 1931, - "\uf9f4": 1932, - "\uf9f6": 1933, - "\ufa02": 1934, - "\ufb00": 1935, - "\ufb01": 1936, - "\ufb02": 1937, - "\ufffd": 1938 - }, - "vocab_tag": { - "<PAD>": 0, - "B-<author>": 1, - "B-<booktitle>": 2, - "B-<collaboration>": 3, - "B-<date>": 4, - "B-<editor>": 5, - "B-<institution>": 6, - "B-<issue>": 7, - "B-<journal>": 8, - "B-<location>": 9, - "B-<note>": 10, - "B-<pages>": 11, - "B-<publisher>": 12, - "B-<pubnum>": 13, - "B-<series>": 14, - "B-<tech>": 15, - "B-<title>": 16, - "B-<volume>": 17, - "B-<web>": 18, - "I-<author>": 19, - "I-<booktitle>": 20, - "I-<collaboration>": 21, - "I-<date>": 22, - "I-<editor>": 23, - "I-<institution>": 24, - "I-<issue>": 25, - "I-<journal>": 26, - "I-<location>": 27, - "I-<note>": 28, - "I-<pages>": 29, - "I-<publisher>": 30, - "I-<pubnum>": 31, - "I-<series>": 32, - "I-<tech>": 33, - "I-<title>": 34, - "I-<volume>": 35, - "I-<web>": 36, - "O": 37 - }, - "vocab_case": [ - "<PAD>", - "numeric", - "allLower", - "allUpper", - "initialUpper", - "other", - "mainly_numeric", - "contains_digit" - ], - "max_char_length": 30, - "feature_preprocessor": { - "features_vocabulary_size": 12, - "features_indices": [ - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20, - 21, - 22, - 23, - 24, - 25, - 26, - 27 - ], - "features_map_to_index": { - "9": { - "LINEEND": 1, - "LINEIN": 2, - "LINESTART": 3 - }, - "10": { - "ALLCAP": 13, - "INITCAP": 14, - "NOCAPS": 15 - }, - "11": { - "ALLDIGIT": 25, - "CONTAINSDIGITS": 26, - "NODIGIT": 27 - }, - "12": { - "0": 37, - "1": 38 - }, - "13": { - "0": 49, - "1": 50 - }, - "14": { - "0": 61, - "1": 62 - }, - "15": { - "0": 73, - "1": 74 - }, - "16": { - "0": 85, - "1": 86 - }, - "17": { - "0": 97, - "1": 98 - }, - "18": { - "0": 109, - "1": 110 - }, - "19": { - "0": 121, - "1": 122 - }, - "20": { - "0": 133, - "1": 134 - }, - "21": { - "0": 145, - "1": 146 - }, - "22": { - "0": 157, - "1": 158 - }, - "23": { - "0": 169, - "1": 170 - }, - "24": { - "0": 181, - "1": 182 - }, - "25": { - "0": 193, - "1": 194 - }, - "26": { - "COMMA": 205, - "DOT": 206, - "ENDBRACKET": 207, - "HYPHEN": 208, - "NOPUNCT": 209, - "OPENBRACKET": 210, - "PUNCT": 211, - "QUOTE": 212 - }, - "27": { - "0": 217, - "1": 218, - "10": 219, - "11": 220, - "2": 221, - "3": 222, - "4": 223, - "5": 224, - "6": 225, - "7": 226, - "8": 227, - "9": 228 - } - } - }, - "indice_tag": { - "0": "<PAD>", - "1": "B-<author>", - "2": "B-<booktitle>", - "3": "B-<collaboration>", - "4": "B-<date>", - "5": "B-<editor>", - "6": "B-<institution>", - "7": "B-<issue>", - "8": "B-<journal>", - "9": "B-<location>", - "10": "B-<note>", - "11": "B-<pages>", - "12": "B-<publisher>", - "13": "B-<pubnum>", - "14": "B-<series>", - "15": "B-<tech>", - "16": "B-<title>", - "17": "B-<volume>", - "18": "B-<web>", - "19": "I-<author>", - "20": "I-<booktitle>", - "21": "I-<collaboration>", - "22": "I-<date>", - "23": "I-<editor>", - "24": "I-<institution>", - "25": "I-<issue>", - "26": "I-<journal>", - "27": "I-<location>", - "28": "I-<note>", - "29": "I-<pages>", - "30": "I-<publisher>", - "31": "I-<pubnum>", - "32": "I-<series>", - "33": "I-<tech>", - "34": "I-<title>", - "35": "I-<volume>", - "36": "I-<web>", - "37": "O" - } -} \ No newline at end of file diff --git a/grobid-home/models/copyright-gru.onnx/classifier.onnx b/grobid-home/models/copyright-gru.onnx/classifier.onnx new file mode 100644 index 0000000000..43644170a4 Binary files /dev/null and b/grobid-home/models/copyright-gru.onnx/classifier.onnx differ diff --git a/grobid-home/models/copyright-gru.onnx/config.json b/grobid-home/models/copyright-gru.onnx/config.json new file mode 100644 index 0000000000..599d1adad8 --- /dev/null +++ b/grobid-home/models/copyright-gru.onnx/config.json @@ -0,0 +1,8 @@ +{ + "modelName": "copyright_gru", + "architecture": "gru", + "wordEmbeddingSize": 300, + "maxlen": 300, + "numClasses": 3, + "embeddingsName": "glove-840B" +} \ No newline at end of file diff --git a/grobid-home/models/copyright-gru.onnx/labels.json b/grobid-home/models/copyright-gru.onnx/labels.json new file mode 100644 index 0000000000..7ec4da39e3 --- /dev/null +++ b/grobid-home/models/copyright-gru.onnx/labels.json @@ -0,0 +1,17 @@ +{ + "labels": [ + "publisher", + "authors", + "undecided" + ], + "labelToIndex": { + "publisher": 0, + "authors": 1, + "undecided": 2 + }, + "indexToLabel": { + "0": "publisher", + "1": "authors", + "2": "undecided" + } +} \ No newline at end of file diff --git a/grobid-home/models/date-BidLSTM_CRF/config.json b/grobid-home/models/date-BidLSTM_CRF/config.json deleted file mode 100644 index 62eee50aec..0000000000 --- a/grobid-home/models/date-BidLSTM_CRF/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "model_name": "date-BidLSTM_CRF", - "architecture": "BidLSTM_CRF", - "embeddings_name": "glove-840B", - "char_vocab_size": 70, - "case_vocab_size": 8, - "char_embedding_size": 25, - "num_char_lstm_units": 25, - "max_char_length": 30, - "features_vocabulary_size": 12, - "features_indices": null, - "features_embedding_size": 4, - "features_lstm_units": 4, - "max_sequence_length": 50, - "word_embedding_size": 300, - "num_word_lstm_units": 100, - "case_embedding_size": 5, - "dropout": 0.5, - "recurrent_dropout": 0.5, - "use_crf": true, - "use_chain_crf": false, - "fold_number": 1, - "batch_size": 60, - "transformer_name": null, - "use_ELMo": false -} \ No newline at end of file diff --git a/grobid-home/models/date-BidLSTM_CRF/model_weights.hdf5 b/grobid-home/models/date-BidLSTM_CRF/model_weights.hdf5 deleted file mode 100644 index 080842c457..0000000000 Binary files a/grobid-home/models/date-BidLSTM_CRF/model_weights.hdf5 and /dev/null differ diff --git a/grobid-home/models/date-BidLSTM_CRF/preprocessor.json b/grobid-home/models/date-BidLSTM_CRF/preprocessor.json deleted file mode 100644 index 6ee3190ff0..0000000000 --- a/grobid-home/models/date-BidLSTM_CRF/preprocessor.json +++ /dev/null @@ -1,111 +0,0 @@ -{ - "padding": true, - "return_lengths": true, - "return_word_embeddings": true, - "return_casing": false, - "return_features": false, - "return_chars": true, - "return_bert_embeddings": false, - "vocab_char": { - "<PAD>": 0, - "<UNK>": 1, - "(": 2, - ")": 3, - ",": 4, - "-": 5, - ".": 6, - "/": 7, - "0": 8, - "1": 9, - "2": 10, - "3": 11, - "4": 12, - "5": 13, - "6": 14, - "7": 15, - "8": 16, - "9": 17, - ":": 18, - ";": 19, - "@": 20, - "A": 21, - "B": 22, - "C": 23, - "D": 24, - "E": 25, - "F": 26, - "G": 27, - "I": 28, - "J": 29, - "L": 30, - "M": 31, - "N": 32, - "O": 33, - "P": 34, - "R": 35, - "S": 36, - "T": 37, - "U": 38, - "V": 39, - "W": 40, - "a": 41, - "b": 42, - "c": 43, - "d": 44, - "e": 45, - "f": 46, - "g": 47, - "h": 48, - "i": 49, - "j": 50, - "k": 51, - "l": 52, - "m": 53, - "n": 54, - "o": 55, - "p": 56, - "r": 57, - "s": 58, - "t": 59, - "u": 60, - "v": 61, - "w": 62, - "y": 63, - "z": 64, - "\u00b1": 65, - "\u00c9": 66, - "\u00f6": 67, - "\u2013": 68, - "\u2014": 69 - }, - "vocab_tag": { - "<PAD>": 0, - "B-<day>": 1, - "B-<month>": 2, - "B-<year>": 3, - "I-<day>": 4, - "I-<year>": 5, - "O": 6 - }, - "vocab_case": [ - "<PAD>", - "numeric", - "allLower", - "allUpper", - "initialUpper", - "other", - "mainly_numeric", - "contains_digit" - ], - "max_char_length": 30, - "feature_preprocessor": null, - "indice_tag": { - "0": "<PAD>", - "1": "B-<day>", - "2": "B-<month>", - "3": "B-<year>", - "4": "I-<day>", - "5": "I-<year>", - "6": "O" - } -} \ No newline at end of file diff --git a/grobid-home/models/date-BidLSTM_CRF_FEATURES.onnx/config.json b/grobid-home/models/date-BidLSTM_CRF_FEATURES.onnx/config.json new file mode 100644 index 0000000000..777b8ae654 --- /dev/null +++ b/grobid-home/models/date-BidLSTM_CRF_FEATURES.onnx/config.json @@ -0,0 +1,22 @@ +{ + "modelName": "grobid-date-BidLSTM_CRF_FEATURES", + "architecture": "BidLSTM_CRF_FEATURES", + "wordEmbeddingSize": 300, + "charEmbeddingSize": 25, + "numCharLstmUnits": 25, + "numWordLstmUnits": 100, + "maxSequenceLength": 50, + "embeddingsName": "glove-840B", + "featuresIndices": [ + 9, + 10, + 11, + 12, + 13, + 14, + 15 + ], + "featuresEmbeddingSize": 4, + "featuresLstmUnits": 4, + "featuresVocabularySize": 12 +} \ No newline at end of file diff --git a/grobid-home/models/date-BidLSTM_CRF_FEATURES.onnx/crf_params.json b/grobid-home/models/date-BidLSTM_CRF_FEATURES.onnx/crf_params.json new file mode 100644 index 0000000000..b92c39d772 --- /dev/null +++ b/grobid-home/models/date-BidLSTM_CRF_FEATURES.onnx/crf_params.json @@ -0,0 +1,85 @@ +{ + "transitions": [ + [ + 0.04369962215423584, + -0.12221870571374893, + 0.011854949407279491, + -0.04942571744322777, + -0.015781288966536522, + -0.12680797278881073, + -0.04864100366830826 + ], + [ + -0.061657655984163284, + -0.12712466716766357, + 0.0064918664284050465, + -0.02659914828836918, + -0.008010180667042732, + -0.1636497676372528, + 0.0051962085999548435 + ], + [ + 0.024414921179413795, + 0.016913048923015594, + -0.15344400703907013, + 0.0360393188893795, + -0.12968048453330994, + -0.0359795019030571, + 0.06415849924087524 + ], + [ + 0.0010195160284638405, + -0.013520807959139347, + -0.044127631932497025, + -0.07484129816293716, + 0.0008468001033179462, + 0.053396403789520264, + 0.028115879744291306 + ], + [ + 0.03976506367325783, + -0.044583760201931, + -0.12182940542697906, + -0.00027059740386903286, + -0.006749073974788189, + 0.01672307960689068, + 0.0010918000480160117 + ], + [ + 0.03349142521619797, + -0.023086048662662506, + -0.15992958843708038, + -0.0781441405415535, + -0.09504935145378113, + 0.03562815487384796, + -0.0998743325471878 + ], + [ + 0.04097858443856239, + -0.08796539902687073, + 0.1574825495481491, + 0.06794484704732895, + -0.16521617770195007, + -0.09549761563539505, + -0.05177278816699982 + ] + ], + "startTransitions": [ + 0.026969874277710915, + 0.1344051957130432, + 0.05689214542508125, + -0.10429777204990387, + -0.12426311522722244, + -0.07568344473838806, + 0.04306447505950928 + ], + "endTransitions": [ + -0.0009051802917383611, + -0.08473620563745499, + 0.024034366011619568, + 0.04712983965873718, + 0.03425560146570206, + -0.0033253529109060764, + -0.09761136025190353 + ] +} \ No newline at end of file diff --git a/grobid-home/models/date-BidLSTM_CRF_FEATURES.onnx/encoder.onnx b/grobid-home/models/date-BidLSTM_CRF_FEATURES.onnx/encoder.onnx new file mode 100644 index 0000000000..aa22bda9bd Binary files /dev/null and b/grobid-home/models/date-BidLSTM_CRF_FEATURES.onnx/encoder.onnx differ diff --git a/grobid-home/models/date-BidLSTM_CRF_FEATURES.onnx/vocab.json b/grobid-home/models/date-BidLSTM_CRF_FEATURES.onnx/vocab.json new file mode 100644 index 0000000000..d75145500f --- /dev/null +++ b/grobid-home/models/date-BidLSTM_CRF_FEATURES.onnx/vocab.json @@ -0,0 +1,142 @@ +{ + "charVocab": { + "<PAD>": 0, + "<UNK>": 1, + "(": 2, + ")": 3, + ",": 4, + "-": 5, + ".": 6, + "/": 7, + "0": 8, + "1": 9, + "2": 10, + "3": 11, + "4": 12, + "5": 13, + "6": 14, + "7": 15, + "8": 16, + "9": 17, + ":": 18, + ";": 19, + "@": 20, + "A": 21, + "B": 22, + "C": 23, + "D": 24, + "E": 25, + "F": 26, + "G": 27, + "I": 28, + "J": 29, + "L": 30, + "M": 31, + "N": 32, + "O": 33, + "P": 34, + "R": 35, + "S": 36, + "T": 37, + "U": 38, + "V": 39, + "W": 40, + "a": 41, + "b": 42, + "c": 43, + "d": 44, + "e": 45, + "f": 46, + "g": 47, + "h": 48, + "i": 49, + "j": 50, + "k": 51, + "l": 52, + "m": 53, + "n": 54, + "o": 55, + "p": 56, + "r": 57, + "s": 58, + "t": 59, + "u": 60, + "v": 61, + "w": 62, + "y": 63, + "z": 64, + "±": 65, + "É": 66, + "ö": 67, + "–": 68, + "—": 69 + }, + "tagVocab": { + "<PAD>": 0, + "B-<day>": 1, + "B-<month>": 2, + "B-<year>": 3, + "I-<day>": 4, + "I-<year>": 5, + "O": 6 + }, + "tagIndex": { + "0": "<PAD>", + "1": "B-<day>", + "2": "B-<month>", + "3": "B-<year>", + "4": "I-<day>", + "5": "I-<year>", + "6": "O" + }, + "maxCharLength": 30, + "returnChars": false, + "featuresIndices": [ + 9, + 10, + 11, + 12, + 13, + 14, + 15 + ], + "featuresVocabularySize": 12, + "featuresMapToIndex": { + "9": { + "LINEEND": 1, + "LINEIN": 2, + "LINESTART": 3 + }, + "10": { + "ALLCAP": 13, + "INITCAP": 14, + "NOCAPS": 15 + }, + "11": { + "ALLDIGIT": 25, + "CONTAINSDIGITS": 26, + "NODIGIT": 27 + }, + "12": { + "0": 37, + "1": 38 + }, + "13": { + "0": 49, + "1": 50 + }, + "14": { + "0": 61, + "1": 62 + }, + "15": { + "COMMA": 73, + "DOT": 74, + "ENDBRACKET": 75, + "HYPHEN": 76, + "NOPUNCT": 77, + "OPENBRACKET": 78, + "PUNCT": 79 + } + } +} \ No newline at end of file diff --git a/grobid-home/models/date-BidLSTM_CRF_FEATURES/config.json b/grobid-home/models/date-BidLSTM_CRF_FEATURES/config.json deleted file mode 100644 index 618610dd3a..0000000000 --- a/grobid-home/models/date-BidLSTM_CRF_FEATURES/config.json +++ /dev/null @@ -1,72 +0,0 @@ -{ - "model_name": "date-BidLSTM_CRF_FEATURES", - "architecture": "BidLSTM_CRF_FEATURES", - "embeddings_name": "glove-840B", - "char_vocab_size": 73, - "case_vocab_size": 8, - "char_embedding_size": 25, - "num_char_lstm_units": 25, - "max_char_length": 30, - "features_vocabulary_size": 12, - "features_indices": [ - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "features_embedding_size": 4, - "features_lstm_units": 4, - "max_sequence_length": 50, - "word_embedding_size": 300, - "num_word_lstm_units": 100, - "case_embedding_size": 5, - "dropout": 0.5, - "recurrent_dropout": 0.5, - "use_crf": true, - "use_chain_crf": false, - "fold_number": 1, - "batch_size": 60, - "transformer_name": null, - "use_ELMo": false, - "features_map_to_index": { - "9": { - "LINEEND": 1, - "LINEIN": 2, - "LINESTART": 3 - }, - "10": { - "ALLCAP": 13, - "INITCAP": 14, - "NOCAPS": 15 - }, - "11": { - "ALLDIGIT": 25, - "CONTAINSDIGITS": 26, - "NODIGIT": 27 - }, - "12": { - "0": 37, - "1": 38 - }, - "13": { - "0": 49, - "1": 50 - }, - "14": { - "0": 61, - "1": 62 - }, - "15": { - "COMMA": 73, - "DOT": 74, - "ENDBRACKET": 75, - "HYPHEN": 76, - "NOPUNCT": 77, - "OPENBRACKET": 78, - "PUNCT": 79 - } - } -} \ No newline at end of file diff --git a/grobid-home/models/date-BidLSTM_CRF_FEATURES/model_weights.hdf5 b/grobid-home/models/date-BidLSTM_CRF_FEATURES/model_weights.hdf5 deleted file mode 100644 index 84f3cfb0a4..0000000000 Binary files a/grobid-home/models/date-BidLSTM_CRF_FEATURES/model_weights.hdf5 and /dev/null differ diff --git a/grobid-home/models/date-BidLSTM_CRF_FEATURES/preprocessor.json b/grobid-home/models/date-BidLSTM_CRF_FEATURES/preprocessor.json deleted file mode 100644 index 2f24a47881..0000000000 --- a/grobid-home/models/date-BidLSTM_CRF_FEATURES/preprocessor.json +++ /dev/null @@ -1,163 +0,0 @@ -{ - "padding": true, - "return_lengths": true, - "return_word_embeddings": true, - "return_casing": false, - "return_features": true, - "return_chars": true, - "return_bert_embeddings": false, - "vocab_char": { - "<PAD>": 0, - "<UNK>": 1, - "(": 2, - ")": 3, - ",": 4, - "-": 5, - ".": 6, - "/": 7, - "0": 8, - "1": 9, - "2": 10, - "3": 11, - "4": 12, - "5": 13, - "6": 14, - "7": 15, - "8": 16, - "9": 17, - ":": 18, - ";": 19, - "@": 20, - "A": 21, - "B": 22, - "C": 23, - "D": 24, - "E": 25, - "F": 26, - "G": 27, - "I": 28, - "J": 29, - "L": 30, - "M": 31, - "N": 32, - "O": 33, - "P": 34, - "R": 35, - "S": 36, - "T": 37, - "U": 38, - "V": 39, - "W": 40, - "a": 41, - "b": 42, - "c": 43, - "d": 44, - "e": 45, - "f": 46, - "g": 47, - "h": 48, - "i": 49, - "j": 50, - "k": 51, - "l": 52, - "m": 53, - "n": 54, - "o": 55, - "p": 56, - "r": 57, - "s": 58, - "t": 59, - "u": 60, - "v": 61, - "w": 62, - "y": 63, - "z": 64, - "\u00b1": 65, - "\u00c9": 66, - "\u00f6": 67, - "\u2013": 68, - "\u2014": 69, - "\u5e74": 70, - "\u65e5": 71, - "\u6708": 72 - }, - "vocab_tag": { - "<PAD>": 0, - "B-<day>": 1, - "B-<month>": 2, - "B-<year>": 3, - "I-<day>": 4, - "I-<year>": 5, - "O": 6 - }, - "vocab_case": [ - "<PAD>", - "numeric", - "allLower", - "allUpper", - "initialUpper", - "other", - "mainly_numeric", - "contains_digit" - ], - "max_char_length": 30, - "feature_preprocessor": { - "features_vocabulary_size": 12, - "features_indices": [ - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "features_map_to_index": { - "9": { - "LINEEND": 1, - "LINEIN": 2, - "LINESTART": 3 - }, - "10": { - "ALLCAP": 13, - "INITCAP": 14, - "NOCAPS": 15 - }, - "11": { - "ALLDIGIT": 25, - "CONTAINSDIGITS": 26, - "NODIGIT": 27 - }, - "12": { - "0": 37, - "1": 38 - }, - "13": { - "0": 49, - "1": 50 - }, - "14": { - "0": 61, - "1": 62 - }, - "15": { - "COMMA": 73, - "DOT": 74, - "ENDBRACKET": 75, - "HYPHEN": 76, - "NOPUNCT": 77, - "OPENBRACKET": 78, - "PUNCT": 79 - } - } - }, - "indice_tag": { - "0": "<PAD>", - "1": "B-<day>", - "2": "B-<month>", - "3": "B-<year>", - "4": "I-<day>", - "5": "I-<year>", - "6": "O" - } -} \ No newline at end of file diff --git a/grobid-home/models/figure-BidLSTM_CRF/config.json b/grobid-home/models/figure-BidLSTM_CRF/config.json deleted file mode 100644 index fff374e6e2..0000000000 --- a/grobid-home/models/figure-BidLSTM_CRF/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "model_name": "figure-BidLSTM_CRF", - "architecture": "BidLSTM_CRF", - "embeddings_name": "glove-840B", - "char_vocab_size": 94, - "case_vocab_size": 8, - "char_embedding_size": 25, - "num_char_lstm_units": 25, - "max_char_length": 30, - "features_vocabulary_size": 12, - "features_indices": null, - "features_embedding_size": 4, - "features_lstm_units": 4, - "max_sequence_length": 3000, - "word_embedding_size": 300, - "num_word_lstm_units": 100, - "case_embedding_size": 5, - "dropout": 0.5, - "recurrent_dropout": 0.5, - "use_crf": true, - "use_chain_crf": false, - "fold_number": 1, - "batch_size": 20, - "transformer_name": null, - "use_ELMo": false -} \ No newline at end of file diff --git a/grobid-home/models/figure-BidLSTM_CRF/model_weights.hdf5 b/grobid-home/models/figure-BidLSTM_CRF/model_weights.hdf5 deleted file mode 100644 index 6b3473c26a..0000000000 Binary files a/grobid-home/models/figure-BidLSTM_CRF/model_weights.hdf5 and /dev/null differ diff --git a/grobid-home/models/figure-BidLSTM_CRF/preprocessor.json b/grobid-home/models/figure-BidLSTM_CRF/preprocessor.json deleted file mode 100644 index 6eb44fcec6..0000000000 --- a/grobid-home/models/figure-BidLSTM_CRF/preprocessor.json +++ /dev/null @@ -1,137 +0,0 @@ -{ - "padding": true, - "return_lengths": true, - "return_word_embeddings": true, - "return_casing": false, - "return_features": false, - "return_chars": true, - "return_bert_embeddings": false, - "vocab_char": { - "<PAD>": 0, - "<UNK>": 1, - "\"": 2, - "%": 3, - "&": 4, - "'": 5, - "(": 6, - ")": 7, - "*": 8, - "+": 9, - ",": 10, - "-": 11, - ".": 12, - "/": 13, - "0": 14, - "1": 15, - "2": 16, - "3": 17, - "4": 18, - "5": 19, - "6": 20, - "7": 21, - "8": 22, - "9": 23, - ":": 24, - ";": 25, - "<": 26, - "=": 27, - "A": 28, - "B": 29, - "C": 30, - "D": 31, - "E": 32, - "F": 33, - "G": 34, - "H": 35, - "I": 36, - "J": 37, - "K": 38, - "L": 39, - "M": 40, - "N": 41, - "O": 42, - "P": 43, - "Q": 44, - "R": 45, - "S": 46, - "T": 47, - "U": 48, - "V": 49, - "W": 50, - "X": 51, - "Y": 52, - "Z": 53, - "[": 54, - "]": 55, - "a": 56, - "b": 57, - "c": 58, - "d": 59, - "e": 60, - "f": 61, - "g": 62, - "h": 63, - "i": 64, - "j": 65, - "k": 66, - "l": 67, - "m": 68, - "n": 69, - "o": 70, - "p": 71, - "q": 72, - "r": 73, - "s": 74, - "t": 75, - "u": 76, - "v": 77, - "w": 78, - "x": 79, - "y": 80, - "z": 81, - "\u00ba": 82, - "\u00d7": 83, - "\u0394": 84, - "\u039b": 85, - "\u03b1": 86, - "\u03b5": 87, - "\u03bb": 88, - "\u03c4": 89, - "\u2126": 90, - "\u2192": 91, - "\u2212": 92, - "\u221d": 93 - }, - "vocab_tag": { - "<PAD>": 0, - "B-<content>": 1, - "B-<figDesc>": 2, - "B-<figure_head>": 3, - "B-<label>": 4, - "I-<content>": 5, - "I-<figDesc>": 6, - "I-<figure_head>": 7 - }, - "vocab_case": [ - "<PAD>", - "numeric", - "allLower", - "allUpper", - "initialUpper", - "other", - "mainly_numeric", - "contains_digit" - ], - "max_char_length": 30, - "feature_preprocessor": null, - "indice_tag": { - "0": "<PAD>", - "1": "B-<content>", - "2": "B-<figDesc>", - "3": "B-<figure_head>", - "4": "B-<label>", - "5": "I-<content>", - "6": "I-<figDesc>", - "7": "I-<figure_head>" - } -} \ No newline at end of file diff --git a/grobid-home/models/figure-BidLSTM_CRF_FEATURES/config.json b/grobid-home/models/figure-BidLSTM_CRF_FEATURES/config.json deleted file mode 100644 index 4abfcd42b4..0000000000 --- a/grobid-home/models/figure-BidLSTM_CRF_FEATURES/config.json +++ /dev/null @@ -1,138 +0,0 @@ -{ - "model_name": "figure-BidLSTM_CRF_FEATURES", - "architecture": "BidLSTM_CRF_FEATURES", - "embeddings_name": "glove-840B", - "char_vocab_size": 94, - "case_vocab_size": 8, - "char_embedding_size": 25, - "num_char_lstm_units": 25, - "max_char_length": 30, - "features_vocabulary_size": 12, - "features_indices": [ - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20, - 21, - 22, - 23, - 24 - ], - "features_embedding_size": 4, - "features_lstm_units": 4, - "max_sequence_length": 3000, - "word_embedding_size": 300, - "num_word_lstm_units": 100, - "case_embedding_size": 5, - "dropout": 0.5, - "recurrent_dropout": 0.5, - "use_crf": true, - "use_chain_crf": false, - "fold_number": 1, - "batch_size": 20, - "transformer_name": null, - "use_ELMo": false, - "features_map_to_index": { - "9": { - "BLOCKEND": 1, - "BLOCKIN": 2, - "BLOCKSTART": 3 - }, - "10": { - "LINEEND": 13, - "LINEIN": 14, - "LINESTART": 15 - }, - "11": { - "ALIGNEDLEFT": 25, - "LINEINDENT": 26 - }, - "12": { - "NEWFONT": 37, - "SAMEFONT": 38 - }, - "13": { - "HIGHERFONT": 49, - "LOWERFONT": 50, - "SAMEFONTSIZE": 51 - }, - "14": { - "0": 61, - "1": 62 - }, - "15": { - "0": 73, - "1": 74 - }, - "16": { - "ALLCAP": 85, - "INITCAP": 86, - "NOCAPS": 87 - }, - "17": { - "ALLDIGIT": 97, - "CONTAINSDIGITS": 98, - "NODIGIT": 99 - }, - "18": { - "0": 109, - "1": 110 - }, - "19": { - "COMMA": 121, - "DOT": 122, - "ENDBRACKET": 123, - "HYPHEN": 124, - "NOPUNCT": 125, - "OPENBRACKET": 126, - "PUNCT": 127, - "QUOTE": 128 - }, - "20": { - "0": 133, - "10": 134, - "11": 135, - "2": 136, - "3": 137, - "4": 138, - "5": 139, - "6": 140, - "7": 141, - "8": 142, - "9": 143 - }, - "21": { - "0": 145, - "1": 146, - "10": 147, - "11": 148, - "2": 149, - "3": 150, - "4": 151, - "5": 152, - "6": 153, - "7": 154, - "8": 155, - "9": 156 - }, - "22": { - "0": 157 - }, - "23": { - "NUMBER": 169, - "UNKNOWN": 170 - }, - "24": { - "0": 181, - "1": 182 - } - } -} \ No newline at end of file diff --git a/grobid-home/models/figure-BidLSTM_CRF_FEATURES/model_weights.hdf5 b/grobid-home/models/figure-BidLSTM_CRF_FEATURES/model_weights.hdf5 deleted file mode 100644 index 3a68874682..0000000000 Binary files a/grobid-home/models/figure-BidLSTM_CRF_FEATURES/model_weights.hdf5 and /dev/null differ diff --git a/grobid-home/models/figure-BidLSTM_CRF_FEATURES/preprocessor.json b/grobid-home/models/figure-BidLSTM_CRF_FEATURES/preprocessor.json deleted file mode 100644 index 2f3192b9a3..0000000000 --- a/grobid-home/models/figure-BidLSTM_CRF_FEATURES/preprocessor.json +++ /dev/null @@ -1,252 +0,0 @@ -{ - "padding": true, - "return_lengths": true, - "return_word_embeddings": true, - "return_casing": false, - "return_features": true, - "return_chars": true, - "return_bert_embeddings": false, - "vocab_char": { - "<PAD>": 0, - "<UNK>": 1, - "\"": 2, - "%": 3, - "&": 4, - "'": 5, - "(": 6, - ")": 7, - "*": 8, - "+": 9, - ",": 10, - "-": 11, - ".": 12, - "/": 13, - "0": 14, - "1": 15, - "2": 16, - "3": 17, - "4": 18, - "5": 19, - "6": 20, - "7": 21, - "8": 22, - "9": 23, - ":": 24, - ";": 25, - "<": 26, - "=": 27, - "A": 28, - "B": 29, - "C": 30, - "D": 31, - "E": 32, - "F": 33, - "G": 34, - "H": 35, - "I": 36, - "J": 37, - "K": 38, - "L": 39, - "M": 40, - "N": 41, - "O": 42, - "P": 43, - "Q": 44, - "R": 45, - "S": 46, - "T": 47, - "U": 48, - "V": 49, - "W": 50, - "X": 51, - "Y": 52, - "Z": 53, - "[": 54, - "]": 55, - "a": 56, - "b": 57, - "c": 58, - "d": 59, - "e": 60, - "f": 61, - "g": 62, - "h": 63, - "i": 64, - "j": 65, - "k": 66, - "l": 67, - "m": 68, - "n": 69, - "o": 70, - "p": 71, - "q": 72, - "r": 73, - "s": 74, - "t": 75, - "u": 76, - "v": 77, - "w": 78, - "x": 79, - "y": 80, - "z": 81, - "\u00ba": 82, - "\u00d7": 83, - "\u0394": 84, - "\u039b": 85, - "\u03b1": 86, - "\u03b5": 87, - "\u03bb": 88, - "\u03c4": 89, - "\u2126": 90, - "\u2192": 91, - "\u2212": 92, - "\u221d": 93 - }, - "vocab_tag": { - "<PAD>": 0, - "B-<content>": 1, - "B-<figDesc>": 2, - "B-<figure_head>": 3, - "B-<label>": 4, - "I-<content>": 5, - "I-<figDesc>": 6, - "I-<figure_head>": 7 - }, - "vocab_case": [ - "<PAD>", - "numeric", - "allLower", - "allUpper", - "initialUpper", - "other", - "mainly_numeric", - "contains_digit" - ], - "max_char_length": 30, - "feature_preprocessor": { - "features_vocabulary_size": 12, - "features_indices": [ - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20, - 21, - 22, - 23, - 24 - ], - "features_map_to_index": { - "9": { - "BLOCKEND": 1, - "BLOCKIN": 2, - "BLOCKSTART": 3 - }, - "10": { - "LINEEND": 13, - "LINEIN": 14, - "LINESTART": 15 - }, - "11": { - "ALIGNEDLEFT": 25, - "LINEINDENT": 26 - }, - "12": { - "NEWFONT": 37, - "SAMEFONT": 38 - }, - "13": { - "HIGHERFONT": 49, - "LOWERFONT": 50, - "SAMEFONTSIZE": 51 - }, - "14": { - "0": 61, - "1": 62 - }, - "15": { - "0": 73, - "1": 74 - }, - "16": { - "ALLCAP": 85, - "INITCAP": 86, - "NOCAPS": 87 - }, - "17": { - "ALLDIGIT": 97, - "CONTAINSDIGITS": 98, - "NODIGIT": 99 - }, - "18": { - "0": 109, - "1": 110 - }, - "19": { - "COMMA": 121, - "DOT": 122, - "ENDBRACKET": 123, - "HYPHEN": 124, - "NOPUNCT": 125, - "OPENBRACKET": 126, - "PUNCT": 127, - "QUOTE": 128 - }, - "20": { - "0": 133, - "10": 134, - "11": 135, - "2": 136, - "3": 137, - "4": 138, - "5": 139, - "6": 140, - "7": 141, - "8": 142, - "9": 143 - }, - "21": { - "0": 145, - "1": 146, - "10": 147, - "11": 148, - "2": 149, - "3": 150, - "4": 151, - "5": 152, - "6": 153, - "7": 154, - "8": 155, - "9": 156 - }, - "22": { - "0": 157 - }, - "23": { - "NUMBER": 169, - "UNKNOWN": 170 - }, - "24": { - "0": 181, - "1": 182 - } - } - }, - "indice_tag": { - "0": "<PAD>", - "1": "B-<content>", - "2": "B-<figDesc>", - "3": "B-<figure_head>", - "4": "B-<label>", - "5": "I-<content>", - "6": "I-<figDesc>", - "7": "I-<figure_head>" - } -} \ No newline at end of file diff --git a/grobid-home/models/funding-acknowledgement-BidLSTM_CRF_FEATURES-with_ELMo/config.json b/grobid-home/models/funding-acknowledgement-BidLSTM_CRF_FEATURES-with_ELMo/config.json deleted file mode 100644 index badf919076..0000000000 --- a/grobid-home/models/funding-acknowledgement-BidLSTM_CRF_FEATURES-with_ELMo/config.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "model_name": "funding-acknowledgement-BidLSTM_CRF_FEATURES-with_ELMo", - "architecture": "BidLSTM_CRF_FEATURES", - "embeddings_name": "glove-840B", - "char_vocab_size": 150, - "case_vocab_size": 8, - "char_embedding_size": 25, - "num_char_lstm_units": 25, - "max_char_length": 30, - "features_vocabulary_size": 12, - "features_indices": [ - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "features_embedding_size": 4, - "features_lstm_units": 4, - "max_sequence_length": 500, - "word_embedding_size": 1324, - "num_word_lstm_units": 100, - "case_embedding_size": 5, - "dropout": 0.5, - "recurrent_dropout": 0.5, - "use_crf": true, - "use_chain_crf": false, - "fold_number": 1, - "batch_size": 30, - "transformer_name": null, - "use_ELMo": true, - "features_map_to_index": { - "9": { - "LINEEND": 1, - "LINEIN": 2, - "LINESTART": 3 - }, - "10": { - "ALLCAP": 13, - "INITCAP": 14, - "NOCAPS": 15 - }, - "11": { - "ALLDIGIT": 25, - "CONTAINSDIGITS": 26, - "NODIGIT": 27 - }, - "12": { - "0": 37, - "1": 38 - }, - "13": { - "0": 49, - "1": 50 - }, - "14": { - "0": 61, - "1": 62 - }, - "15": { - "COMMA": 73, - "DOT": 74, - "ENDBRACKET": 75, - "HYPHEN": 76, - "NOPUNCT": 77, - "OPENBRACKET": 78, - "PUNCT": 79, - "QUOTE": 80 - } - } -} \ No newline at end of file diff --git a/grobid-home/models/funding-acknowledgement-BidLSTM_CRF_FEATURES-with_ELMo/model_weights.hdf5 b/grobid-home/models/funding-acknowledgement-BidLSTM_CRF_FEATURES-with_ELMo/model_weights.hdf5 deleted file mode 100644 index f0a6188b65..0000000000 Binary files a/grobid-home/models/funding-acknowledgement-BidLSTM_CRF_FEATURES-with_ELMo/model_weights.hdf5 and /dev/null differ diff --git a/grobid-home/models/funding-acknowledgement-BidLSTM_CRF_FEATURES-with_ELMo/preprocessor.json b/grobid-home/models/funding-acknowledgement-BidLSTM_CRF_FEATURES-with_ELMo/preprocessor.json deleted file mode 100644 index cff5ff1c07..0000000000 --- a/grobid-home/models/funding-acknowledgement-BidLSTM_CRF_FEATURES-with_ELMo/preprocessor.json +++ /dev/null @@ -1,267 +0,0 @@ -{ - "padding": true, - "return_lengths": true, - "return_word_embeddings": true, - "return_casing": false, - "return_features": true, - "return_chars": true, - "return_bert_embeddings": false, - "vocab_char": { - "<PAD>": 0, - "<UNK>": 1, - "\"": 2, - "#": 3, - "%": 4, - "&": 5, - "'": 6, - "(": 7, - ")": 8, - "*": 9, - "+": 10, - ",": 11, - "-": 12, - ".": 13, - "/": 14, - "0": 15, - "1": 16, - "2": 17, - "3": 18, - "4": 19, - "5": 20, - "6": 21, - "7": 22, - "8": 23, - "9": 24, - ":": 25, - ";": 26, - "<": 27, - "=": 28, - ">": 29, - "@": 30, - "A": 31, - "B": 32, - "C": 33, - "D": 34, - "E": 35, - "F": 36, - "G": 37, - "H": 38, - "I": 39, - "J": 40, - "K": 41, - "L": 42, - "M": 43, - "N": 44, - "O": 45, - "P": 46, - "Q": 47, - "R": 48, - "S": 49, - "T": 50, - "U": 51, - "V": 52, - "W": 53, - "X": 54, - "Y": 55, - "Z": 56, - "[": 57, - "\\": 58, - "]": 59, - "_": 60, - "`": 61, - "a": 62, - "b": 63, - "c": 64, - "d": 65, - "e": 66, - "f": 67, - "g": 68, - "h": 69, - "i": 70, - "j": 71, - "k": 72, - "l": 73, - "m": 74, - "n": 75, - "o": 76, - "p": 77, - "q": 78, - "r": 79, - "s": 80, - "t": 81, - "u": 82, - "v": 83, - "w": 84, - "x": 85, - "y": 86, - "z": 87, - "{": 88, - "\u00a3": 89, - "\u00a8": 90, - "\u00ab": 91, - "\u00ae": 92, - "\u00b0": 93, - "\u00b4": 94, - "\u00b5": 95, - "\u00b8": 96, - "\u00bb": 97, - "\u00c7": 98, - "\u00c9": 99, - "\u00ce": 100, - "\u00d6": 101, - "\u00d8": 102, - "\u00df": 103, - "\u00e0": 104, - "\u00e1": 105, - "\u00e2": 106, - "\u00e3": 107, - "\u00e4": 108, - "\u00e5": 109, - "\u00e7": 110, - "\u00e8": 111, - "\u00e9": 112, - "\u00ea": 113, - "\u00eb": 114, - "\u00ed": 115, - "\u00ee": 116, - "\u00f1": 117, - "\u00f2": 118, - "\u00f3": 119, - "\u00f4": 120, - "\u00f5": 121, - "\u00f6": 122, - "\u00f8": 123, - "\u00fa": 124, - "\u00fc": 125, - "\u0101": 126, - "\u0105": 127, - "\u0107": 128, - "\u011b": 129, - "\u011f": 130, - "\u0131": 131, - "\u0142": 132, - "\u0159": 133, - "\u015f": 134, - "\u016f": 135, - "\u0229": 136, - "\u02da": 137, - "\u03b2": 138, - "\u03b3": 139, - "\u03bc": 140, - "\u0430": 141, - "\u1e4d": 142, - "\u2019": 143, - "\u201c": 144, - "\u201d": 145, - "\u2020": 146, - "\u20ac": 147, - "\u2116": 148, - "\u25a1": 149 - }, - "vocab_tag": { - "<PAD>": 0, - "B-<affiliation>": 1, - "B-<funderName>": 2, - "B-<grantName>": 3, - "B-<grantNumber>": 4, - "B-<infrastructure>": 5, - "B-<institution>": 6, - "B-<person>": 7, - "B-<programName>": 8, - "B-<projectName>": 9, - "I-<affiliation>": 10, - "I-<funderName>": 11, - "I-<grantName>": 12, - "I-<grantNumber>": 13, - "I-<infrastructure>": 14, - "I-<institution>": 15, - "I-<person>": 16, - "I-<programName>": 17, - "I-<projectName>": 18, - "O": 19 - }, - "vocab_case": [ - "<PAD>", - "numeric", - "allLower", - "allUpper", - "initialUpper", - "other", - "mainly_numeric", - "contains_digit" - ], - "max_char_length": 30, - "feature_preprocessor": { - "features_vocabulary_size": 12, - "features_indices": [ - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "features_map_to_index": { - "9": { - "LINEEND": 1, - "LINEIN": 2, - "LINESTART": 3 - }, - "10": { - "ALLCAP": 13, - "INITCAP": 14, - "NOCAPS": 15 - }, - "11": { - "ALLDIGIT": 25, - "CONTAINSDIGITS": 26, - "NODIGIT": 27 - }, - "12": { - "0": 37, - "1": 38 - }, - "13": { - "0": 49, - "1": 50 - }, - "14": { - "0": 61, - "1": 62 - }, - "15": { - "COMMA": 73, - "DOT": 74, - "ENDBRACKET": 75, - "HYPHEN": 76, - "NOPUNCT": 77, - "OPENBRACKET": 78, - "PUNCT": 79, - "QUOTE": 80 - } - } - }, - "indice_tag": { - "0": "<PAD>", - "1": "B-<affiliation>", - "2": "B-<funderName>", - "3": "B-<grantName>", - "4": "B-<grantNumber>", - "5": "B-<infrastructure>", - "6": "B-<institution>", - "7": "B-<person>", - "8": "B-<programName>", - "9": "B-<projectName>", - "10": "I-<affiliation>", - "11": "I-<funderName>", - "12": "I-<grantName>", - "13": "I-<grantNumber>", - "14": "I-<infrastructure>", - "15": "I-<institution>", - "16": "I-<person>", - "17": "I-<programName>", - "18": "I-<projectName>", - "19": "O" - } -} \ No newline at end of file diff --git a/grobid-home/models/funding-acknowledgement-BidLSTM_CRF_FEATURES.onnx/config.json b/grobid-home/models/funding-acknowledgement-BidLSTM_CRF_FEATURES.onnx/config.json new file mode 100644 index 0000000000..42f8414c3e --- /dev/null +++ b/grobid-home/models/funding-acknowledgement-BidLSTM_CRF_FEATURES.onnx/config.json @@ -0,0 +1,21 @@ +{ + "modelName": "grobid-funding-acknowledgement-BidLSTM_CRF_FEATURES", + "architecture": "BidLSTM_CRF_FEATURES", + "wordEmbeddingSize": 300, + "charEmbeddingSize": 25, + "numCharLstmUnits": 25, + "numWordLstmUnits": 100, + "maxSequenceLength": 800, + "embeddingsName": "glove-840B", + "featuresIndices": [ + 9, + 10, + 11, + 12, + 13, + 14 + ], + "featuresEmbeddingSize": 4, + "featuresLstmUnits": 4, + "featuresVocabularySize": 12 +} \ No newline at end of file diff --git a/grobid-home/models/funding-acknowledgement-BidLSTM_CRF_FEATURES.onnx/crf_params.json b/grobid-home/models/funding-acknowledgement-BidLSTM_CRF_FEATURES.onnx/crf_params.json new file mode 100644 index 0000000000..ddc5600c0f --- /dev/null +++ b/grobid-home/models/funding-acknowledgement-BidLSTM_CRF_FEATURES.onnx/crf_params.json @@ -0,0 +1,404 @@ +{ + "transitions": [ + [ + 0.05653524771332741, + -0.24288514256477356, + -0.15196844935417175, + -0.45539411902427673, + -0.27754223346710205, + -0.1887950897216797, + -0.23659807443618774, + -0.35053715109825134, + -0.14702659845352173, + -0.03484121337532997, + -0.2675206959247589, + -0.3982822000980377, + -0.22974683344364166, + -0.05026468262076378, + -0.3696364760398865, + -0.3499599099159241, + -0.44049182534217834, + -0.3422165811061859 + ], + [ + -0.09962109476327896, + -0.5586426258087158, + -0.2151985764503479, + -0.10790594667196274, + -0.0790245309472084, + -0.35025036334991455, + -0.10324051976203918, + -0.15235663950443268, + -0.00985940732061863, + 0.703696608543396, + -0.4877941906452179, + -0.23389452695846558, + -0.22507135570049286, + -0.9885483384132385, + -0.8274292945861816, + -0.22610998153686523, + -0.1456933170557022, + -0.3425562083721161 + ], + [ + -0.2748405635356903, + -0.2421104609966278, + -0.5666518807411194, + 0.08169285207986832, + 0.007790330331772566, + -0.49785876274108887, + -0.10162047296762466, + 0.05211301147937775, + -0.1772768497467041, + -0.4384084641933441, + 0.7252213358879089, + -1.001523494720459, + -0.5294941663742065, + -0.8137633800506592, + -0.5713112354278564, + -1.016876220703125, + -0.7835744619369507, + -0.16529481112957 + ], + [ + -0.44897133111953735, + -0.10642313957214355, + -0.40602433681488037, + -0.638512372970581, + -0.36809098720550537, + -0.09636710584163666, + -0.1826362907886505, + -0.5091497302055359, + -0.349395215511322, + -0.28299638628959656, + -0.9007267951965332, + 0.8441708087921143, + -0.4828413426876068, + -0.536125659942627, + -0.161577045917511, + -0.815366268157959, + -0.5810884833335876, + -0.6482445597648621 + ], + [ + -0.2829087972640991, + -0.15350373089313507, + -0.11379502713680267, + -0.12590354681015015, + -0.9214949607849121, + -0.0780823677778244, + -0.22648146748542786, + -0.18860545754432678, + 0.046752385795116425, + -0.21155749261379242, + -0.7522338032722473, + -0.6754022240638733, + 0.6698613166809082, + -0.3700776696205139, + -0.41181376576423645, + -0.7261378765106201, + -0.7792487144470215, + -0.07098010182380676 + ], + [ + -0.13739053905010223, + -0.5134162902832031, + -0.44445088505744934, + -0.3060857653617859, + -0.3716025948524475, + -0.760222852230072, + -0.21622157096862793, + -0.2555842101573944, + -0.3502296507358551, + -0.988154411315918, + -0.8763798475265503, + -0.5544582009315491, + -0.27966195344924927, + 0.8110397458076477, + -0.7942994832992554, + -0.7267717123031616, + -0.7377699613571167, + -0.42242002487182617 + ], + [ + -0.41354110836982727, + -0.3887498080730438, + -0.23522529006004333, + -0.12481104582548141, + -0.213913694024086, + -0.2768189609050751, + -0.36394914984703064, + -0.09196390211582184, + -0.17237180471420288, + -0.5764569640159607, + -0.25966235995292664, + -0.18999458849430084, + -0.35656070709228516, + -0.5974794626235962, + 0.5019974112510681, + -0.13209109008312225, + -0.2703125774860382, + -0.3440954089164734 + ], + [ + -0.3605848550796509, + 0.0013702864525839686, + -0.4253484010696411, + -0.13838908076286316, + -0.3083483874797821, + -0.17249265313148499, + -0.0910978838801384, + -0.5463467240333557, + -0.13224893808364868, + -0.2512015402317047, + -1.043896198272705, + -0.88191157579422, + -0.7457115054130554, + -0.602675199508667, + -0.22251692414283752, + 0.9117735624313354, + -0.8229843974113464, + -0.6792265176773071 + ], + [ + -0.2822444438934326, + -0.06822029501199722, + -0.19915048778057098, + -0.2034611850976944, + -0.286653071641922, + -0.08500991016626358, + -0.04939419776201248, + -0.314241498708725, + -0.5276393890380859, + -0.31855684518814087, + -0.7244556546211243, + -0.6404930353164673, + -0.876244306564331, + -0.6452759504318237, + -0.333271861076355, + -0.9300932288169861, + 0.717154324054718, + -0.043474242091178894 + ], + [ + -0.14135009050369263, + -0.9201858639717102, + -0.19474343955516815, + -0.02426719106733799, + -0.2119818776845932, + -0.61188805103302, + -0.42610952258110046, + -0.125978484749794, + -0.14932580292224884, + 0.47962436079978943, + -0.40551602840423584, + -0.24006791412830353, + -0.12199819833040237, + -0.8503081798553467, + -0.6466490626335144, + -0.308322548866272, + -0.3648732602596283, + -0.3907102346420288 + ], + [ + -0.004341791849583387, + -0.19648638367652893, + -0.6363983154296875, + -0.02778562717139721, + -0.40046972036361694, + -0.4655126631259918, + -0.10831563919782639, + -0.5490319728851318, + -0.42529022693634033, + -0.5064828395843506, + 0.6889162063598633, + -0.998571515083313, + -0.4101109504699707, + -0.858644962310791, + -0.2287295013666153, + -1.0609585046768188, + -0.9149004817008972, + -0.562552809715271 + ], + [ + -0.42696326971054077, + -0.11054018139839172, + -0.5634124875068665, + -0.7639787793159485, + -0.027798840776085854, + -0.3022802472114563, + -0.0826323851943016, + -0.6280609965324402, + -0.6260813474655151, + -0.21312689781188965, + -0.8807752728462219, + 0.5955087542533875, + -0.6155940890312195, + -0.42442187666893005, + -0.28568705916404724, + -0.9378145337104797, + -0.8752440214157104, + -0.5888253450393677 + ], + [ + -0.39210399985313416, + -0.21312251687049866, + -0.08425953984260559, + -0.33170178532600403, + -0.6494635939598083, + -0.18792493641376495, + -0.08676949143409729, + -0.08569207042455673, + -0.18273021280765533, + -0.20552878081798553, + -0.33004459738731384, + -0.623752236366272, + 0.550236165523529, + -0.2414838820695877, + -0.38457831740379333, + -0.6346907615661621, + -0.7360970377922058, + -0.35468852519989014 + ], + [ + -0.10819970816373825, + -0.6498776078224182, + -0.3727312982082367, + -0.1892727017402649, + -0.42728880047798157, + -0.9224845170974731, + -0.3849949240684509, + -0.41456806659698486, + -0.39978423714637756, + -1.0176318883895874, + -0.7417430877685547, + -0.5455319881439209, + -0.2996966242790222, + 0.7415692210197449, + -0.4724261462688446, + -0.8500942587852478, + -0.758048951625824, + -0.6639863848686218 + ], + [ + -0.3140590786933899, + -0.5966055393218994, + -0.1952442079782486, + -0.09754670411348343, + -0.22222411632537842, + -0.2702353000640869, + -0.36362430453300476, + -0.1828572303056717, + -0.1976410448551178, + -0.6270851492881775, + -0.3265601694583893, + -0.3786303699016571, + -0.34490031003952026, + -0.5984370112419128, + 0.3766128718852997, + -0.10510849207639694, + -0.27632635831832886, + -0.07290343940258026 + ], + [ + -0.3801642954349518, + -0.05747685208916664, + -0.5276033282279968, + -0.4757595956325531, + -0.2658785879611969, + -0.21543309092521667, + -0.10567840933799744, + -0.7407453060150146, + -0.6813671588897705, + -0.31447634100914, + -0.8926599025726318, + -0.8921177983283997, + -0.5969862341880798, + -0.7332285642623901, + -0.24994219839572906, + 0.6904953718185425, + -1.1386303901672363, + -0.5399476885795593 + ], + [ + -0.4454038739204407, + -0.013940366916358471, + -0.39776039123535156, + -0.32307159900665283, + -0.5413869619369507, + -0.3439432382583618, + -0.1824028491973877, + -0.7387387752532959, + -0.8050464987754822, + -0.33723676204681396, + -0.7209362983703613, + -0.792214035987854, + -0.7858200669288635, + -0.7991609573364258, + -0.262999564409256, + -1.0366326570510864, + 0.5519434213638306, + -0.6131811141967773 + ], + [ + -0.11188361793756485, + 0.37677642703056335, + 0.4332883954048157, + 0.10742682218551636, + 0.4803352355957031, + 0.3536052703857422, + 0.2755962610244751, + 0.43702203035354614, + 0.3069528639316559, + -0.6668485403060913, + -0.5525391697883606, + -0.697446882724762, + -0.6754106283187866, + -0.8478207588195801, + -0.588163435459137, + -0.7740446925163269, + -0.7412075400352478, + 0.6042668223381042 + ] + ], + "startTransitions": [ + -0.20571115612983704, + -0.17791028320789337, + -0.0923755019903183, + -0.3220617473125458, + -0.3557651937007904, + -0.03196358680725098, + 0.1573391854763031, + -0.30256420373916626, + -0.26535797119140625, + -0.2592357397079468, + -0.43514975905418396, + -0.6240488886833191, + -0.46043044328689575, + -0.4572790265083313, + -0.5044698119163513, + -0.5700479745864868, + -0.6632629632949829, + 0.37008023262023926 + ], + "endTransitions": [ + 0.34100842475891113, + 0.14775437116622925, + -0.0993899405002594, + -0.3061085641384125, + -0.09683345258235931, + -0.15180693566799164, + -0.21961873769760132, + -0.1995694786310196, + -0.15983252227306366, + -0.14371265470981598, + -0.32148054242134094, + -0.37855055928230286, + -0.230210542678833, + -0.3209848701953888, + -0.2995285093784332, + -0.3072023093700409, + -0.3252164423465729, + 0.19557806849479675 + ] +} \ No newline at end of file diff --git a/grobid-home/models/funding-acknowledgement-BidLSTM_CRF_FEATURES.onnx/encoder.onnx b/grobid-home/models/funding-acknowledgement-BidLSTM_CRF_FEATURES.onnx/encoder.onnx new file mode 100644 index 0000000000..d47a72e712 Binary files /dev/null and b/grobid-home/models/funding-acknowledgement-BidLSTM_CRF_FEATURES.onnx/encoder.onnx differ diff --git a/grobid-home/models/funding-acknowledgement-BidLSTM_CRF_FEATURES.onnx/vocab.json b/grobid-home/models/funding-acknowledgement-BidLSTM_CRF_FEATURES.onnx/vocab.json new file mode 100644 index 0000000000..4c1ca14b96 --- /dev/null +++ b/grobid-home/models/funding-acknowledgement-BidLSTM_CRF_FEATURES.onnx/vocab.json @@ -0,0 +1,238 @@ +{ + "charVocab": { + "<PAD>": 0, + "<UNK>": 1, + "\"": 2, + "#": 3, + "%": 4, + "&": 5, + "'": 6, + "(": 7, + ")": 8, + "*": 9, + "+": 10, + ",": 11, + "-": 12, + ".": 13, + "/": 14, + "0": 15, + "1": 16, + "2": 17, + "3": 18, + "4": 19, + "5": 20, + "6": 21, + "7": 22, + "8": 23, + "9": 24, + ":": 25, + ";": 26, + "<": 27, + "=": 28, + ">": 29, + "@": 30, + "A": 31, + "B": 32, + "C": 33, + "D": 34, + "E": 35, + "F": 36, + "G": 37, + "H": 38, + "I": 39, + "J": 40, + "K": 41, + "L": 42, + "M": 43, + "N": 44, + "O": 45, + "P": 46, + "Q": 47, + "R": 48, + "S": 49, + "T": 50, + "U": 51, + "V": 52, + "W": 53, + "X": 54, + "Y": 55, + "Z": 56, + "[": 57, + "\\": 58, + "]": 59, + "_": 60, + "`": 61, + "a": 62, + "b": 63, + "c": 64, + "d": 65, + "e": 66, + "f": 67, + "g": 68, + "h": 69, + "i": 70, + "j": 71, + "k": 72, + "l": 73, + "m": 74, + "n": 75, + "o": 76, + "p": 77, + "q": 78, + "r": 79, + "s": 80, + "t": 81, + "u": 82, + "v": 83, + "w": 84, + "x": 85, + "y": 86, + "z": 87, + "{": 88, + "£": 89, + "¨": 90, + "«": 91, + "®": 92, + "°": 93, + "´": 94, + "µ": 95, + "¸": 96, + "»": 97, + "Ç": 98, + "É": 99, + "Î": 100, + "Ö": 101, + "Ø": 102, + "ß": 103, + "à": 104, + "á": 105, + "â": 106, + "ã": 107, + "ä": 108, + "å": 109, + "ç": 110, + "è": 111, + "é": 112, + "ê": 113, + "ë": 114, + "í": 115, + "î": 116, + "ñ": 117, + "ò": 118, + "ó": 119, + "ô": 120, + "õ": 121, + "ö": 122, + "ø": 123, + "ú": 124, + "ü": 125, + "ā": 126, + "ą": 127, + "ć": 128, + "ě": 129, + "ğ": 130, + "ı": 131, + "ł": 132, + "ř": 133, + "ş": 134, + "ů": 135, + "ȩ": 136, + "β": 137, + "γ": 138, + "а": 139, + "ṍ": 140, + "’": 141, + "“": 142, + "”": 143, + "†": 144, + "€": 145, + "№": 146, + "□": 147 + }, + "tagVocab": { + "<PAD>": 0, + "B-<affiliation>": 1, + "B-<funderName>": 2, + "B-<grantName>": 3, + "B-<grantNumber>": 4, + "B-<institution>": 5, + "B-<person>": 6, + "B-<programName>": 7, + "B-<projectName>": 8, + "I-<affiliation>": 9, + "I-<funderName>": 10, + "I-<grantName>": 11, + "I-<grantNumber>": 12, + "I-<institution>": 13, + "I-<person>": 14, + "I-<programName>": 15, + "I-<projectName>": 16, + "O": 17 + }, + "tagIndex": { + "0": "<PAD>", + "1": "B-<affiliation>", + "2": "B-<funderName>", + "3": "B-<grantName>", + "4": "B-<grantNumber>", + "5": "B-<institution>", + "6": "B-<person>", + "7": "B-<programName>", + "8": "B-<projectName>", + "9": "I-<affiliation>", + "10": "I-<funderName>", + "11": "I-<grantName>", + "12": "I-<grantNumber>", + "13": "I-<institution>", + "14": "I-<person>", + "15": "I-<programName>", + "16": "I-<projectName>", + "17": "O" + }, + "maxCharLength": 30, + "returnChars": false, + "featuresIndices": [ + 9, + 10, + 11, + 12, + 13, + 14 + ], + "featuresVocabularySize": 12, + "featuresMapToIndex": { + "9": { + "LINEEND": 1, + "LINEIN": 2, + "LINESTART": 3 + }, + "10": { + "ALLCAP": 13, + "INITCAP": 14, + "NOCAPS": 15 + }, + "11": { + "ALLDIGIT": 25, + "CONTAINSDIGITS": 26, + "NODIGIT": 27 + }, + "12": { + "0": 37, + "1": 38 + }, + "13": { + "0": 49, + "1": 50 + }, + "14": { + "COMMA": 61, + "DOT": 62, + "ENDBRACKET": 63, + "HYPHEN": 64, + "NOPUNCT": 65, + "OPENBRACKET": 66, + "PUNCT": 67, + "QUOTE": 68 + } + } +} \ No newline at end of file diff --git a/grobid-home/models/funding-acknowledgement-BidLSTM_CRF_FEATURES/config.json b/grobid-home/models/funding-acknowledgement-BidLSTM_CRF_FEATURES/config.json deleted file mode 100644 index 5b63a1b3c9..0000000000 --- a/grobid-home/models/funding-acknowledgement-BidLSTM_CRF_FEATURES/config.json +++ /dev/null @@ -1,73 +0,0 @@ -{ - "model_name": "funding-acknowledgement-BidLSTM_CRF_FEATURES", - "architecture": "BidLSTM_CRF_FEATURES", - "embeddings_name": "glove-840B", - "char_vocab_size": 150, - "case_vocab_size": 8, - "char_embedding_size": 25, - "num_char_lstm_units": 25, - "max_char_length": 30, - "features_vocabulary_size": 12, - "features_indices": [ - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "features_embedding_size": 4, - "features_lstm_units": 4, - "max_sequence_length": 800, - "word_embedding_size": 300, - "num_word_lstm_units": 100, - "case_embedding_size": 5, - "dropout": 0.5, - "recurrent_dropout": 0.5, - "use_crf": true, - "use_chain_crf": false, - "fold_number": 1, - "batch_size": 30, - "transformer_name": null, - "use_ELMo": false, - "features_map_to_index": { - "9": { - "LINEEND": 1, - "LINEIN": 2, - "LINESTART": 3 - }, - "10": { - "ALLCAP": 13, - "INITCAP": 14, - "NOCAPS": 15 - }, - "11": { - "ALLDIGIT": 25, - "CONTAINSDIGITS": 26, - "NODIGIT": 27 - }, - "12": { - "0": 37, - "1": 38 - }, - "13": { - "0": 49, - "1": 50 - }, - "14": { - "0": 61, - "1": 62 - }, - "15": { - "COMMA": 73, - "DOT": 74, - "ENDBRACKET": 75, - "HYPHEN": 76, - "NOPUNCT": 77, - "OPENBRACKET": 78, - "PUNCT": 79, - "QUOTE": 80 - } - } -} \ No newline at end of file diff --git a/grobid-home/models/funding-acknowledgement-BidLSTM_CRF_FEATURES/model_weights.hdf5 b/grobid-home/models/funding-acknowledgement-BidLSTM_CRF_FEATURES/model_weights.hdf5 deleted file mode 100644 index acbce9458b..0000000000 Binary files a/grobid-home/models/funding-acknowledgement-BidLSTM_CRF_FEATURES/model_weights.hdf5 and /dev/null differ diff --git a/grobid-home/models/funding-acknowledgement-BidLSTM_CRF_FEATURES/preprocessor.json b/grobid-home/models/funding-acknowledgement-BidLSTM_CRF_FEATURES/preprocessor.json deleted file mode 100644 index cff5ff1c07..0000000000 --- a/grobid-home/models/funding-acknowledgement-BidLSTM_CRF_FEATURES/preprocessor.json +++ /dev/null @@ -1,267 +0,0 @@ -{ - "padding": true, - "return_lengths": true, - "return_word_embeddings": true, - "return_casing": false, - "return_features": true, - "return_chars": true, - "return_bert_embeddings": false, - "vocab_char": { - "<PAD>": 0, - "<UNK>": 1, - "\"": 2, - "#": 3, - "%": 4, - "&": 5, - "'": 6, - "(": 7, - ")": 8, - "*": 9, - "+": 10, - ",": 11, - "-": 12, - ".": 13, - "/": 14, - "0": 15, - "1": 16, - "2": 17, - "3": 18, - "4": 19, - "5": 20, - "6": 21, - "7": 22, - "8": 23, - "9": 24, - ":": 25, - ";": 26, - "<": 27, - "=": 28, - ">": 29, - "@": 30, - "A": 31, - "B": 32, - "C": 33, - "D": 34, - "E": 35, - "F": 36, - "G": 37, - "H": 38, - "I": 39, - "J": 40, - "K": 41, - "L": 42, - "M": 43, - "N": 44, - "O": 45, - "P": 46, - "Q": 47, - "R": 48, - "S": 49, - "T": 50, - "U": 51, - "V": 52, - "W": 53, - "X": 54, - "Y": 55, - "Z": 56, - "[": 57, - "\\": 58, - "]": 59, - "_": 60, - "`": 61, - "a": 62, - "b": 63, - "c": 64, - "d": 65, - "e": 66, - "f": 67, - "g": 68, - "h": 69, - "i": 70, - "j": 71, - "k": 72, - "l": 73, - "m": 74, - "n": 75, - "o": 76, - "p": 77, - "q": 78, - "r": 79, - "s": 80, - "t": 81, - "u": 82, - "v": 83, - "w": 84, - "x": 85, - "y": 86, - "z": 87, - "{": 88, - "\u00a3": 89, - "\u00a8": 90, - "\u00ab": 91, - "\u00ae": 92, - "\u00b0": 93, - "\u00b4": 94, - "\u00b5": 95, - "\u00b8": 96, - "\u00bb": 97, - "\u00c7": 98, - "\u00c9": 99, - "\u00ce": 100, - "\u00d6": 101, - "\u00d8": 102, - "\u00df": 103, - "\u00e0": 104, - "\u00e1": 105, - "\u00e2": 106, - "\u00e3": 107, - "\u00e4": 108, - "\u00e5": 109, - "\u00e7": 110, - "\u00e8": 111, - "\u00e9": 112, - "\u00ea": 113, - "\u00eb": 114, - "\u00ed": 115, - "\u00ee": 116, - "\u00f1": 117, - "\u00f2": 118, - "\u00f3": 119, - "\u00f4": 120, - "\u00f5": 121, - "\u00f6": 122, - "\u00f8": 123, - "\u00fa": 124, - "\u00fc": 125, - "\u0101": 126, - "\u0105": 127, - "\u0107": 128, - "\u011b": 129, - "\u011f": 130, - "\u0131": 131, - "\u0142": 132, - "\u0159": 133, - "\u015f": 134, - "\u016f": 135, - "\u0229": 136, - "\u02da": 137, - "\u03b2": 138, - "\u03b3": 139, - "\u03bc": 140, - "\u0430": 141, - "\u1e4d": 142, - "\u2019": 143, - "\u201c": 144, - "\u201d": 145, - "\u2020": 146, - "\u20ac": 147, - "\u2116": 148, - "\u25a1": 149 - }, - "vocab_tag": { - "<PAD>": 0, - "B-<affiliation>": 1, - "B-<funderName>": 2, - "B-<grantName>": 3, - "B-<grantNumber>": 4, - "B-<infrastructure>": 5, - "B-<institution>": 6, - "B-<person>": 7, - "B-<programName>": 8, - "B-<projectName>": 9, - "I-<affiliation>": 10, - "I-<funderName>": 11, - "I-<grantName>": 12, - "I-<grantNumber>": 13, - "I-<infrastructure>": 14, - "I-<institution>": 15, - "I-<person>": 16, - "I-<programName>": 17, - "I-<projectName>": 18, - "O": 19 - }, - "vocab_case": [ - "<PAD>", - "numeric", - "allLower", - "allUpper", - "initialUpper", - "other", - "mainly_numeric", - "contains_digit" - ], - "max_char_length": 30, - "feature_preprocessor": { - "features_vocabulary_size": 12, - "features_indices": [ - 9, - 10, - 11, - 12, - 13, - 14, - 15 - ], - "features_map_to_index": { - "9": { - "LINEEND": 1, - "LINEIN": 2, - "LINESTART": 3 - }, - "10": { - "ALLCAP": 13, - "INITCAP": 14, - "NOCAPS": 15 - }, - "11": { - "ALLDIGIT": 25, - "CONTAINSDIGITS": 26, - "NODIGIT": 27 - }, - "12": { - "0": 37, - "1": 38 - }, - "13": { - "0": 49, - "1": 50 - }, - "14": { - "0": 61, - "1": 62 - }, - "15": { - "COMMA": 73, - "DOT": 74, - "ENDBRACKET": 75, - "HYPHEN": 76, - "NOPUNCT": 77, - "OPENBRACKET": 78, - "PUNCT": 79, - "QUOTE": 80 - } - } - }, - "indice_tag": { - "0": "<PAD>", - "1": "B-<affiliation>", - "2": "B-<funderName>", - "3": "B-<grantName>", - "4": "B-<grantNumber>", - "5": "B-<infrastructure>", - "6": "B-<institution>", - "7": "B-<person>", - "8": "B-<programName>", - "9": "B-<projectName>", - "10": "I-<affiliation>", - "11": "I-<funderName>", - "12": "I-<grantName>", - "13": "I-<grantNumber>", - "14": "I-<infrastructure>", - "15": "I-<institution>", - "16": "I-<person>", - "17": "I-<programName>", - "18": "I-<projectName>", - "19": "O" - } -} \ No newline at end of file diff --git a/grobid-home/models/header-BidLSTM_CRF/config.json b/grobid-home/models/header-BidLSTM_CRF/config.json deleted file mode 100644 index 43aaaa3652..0000000000 --- a/grobid-home/models/header-BidLSTM_CRF/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "model_name": "header-BidLSTM_CRF", - "architecture": "BidLSTM_CRF", - "embeddings_name": "glove-840B", - "char_vocab_size": 331, - "case_vocab_size": 8, - "char_embedding_size": 25, - "num_char_lstm_units": 25, - "max_char_length": 30, - "features_vocabulary_size": 12, - "features_indices": null, - "features_embedding_size": 4, - "features_lstm_units": 4, - "max_sequence_length": 3000, - "word_embedding_size": 300, - "num_word_lstm_units": 100, - "case_embedding_size": 5, - "dropout": 0.5, - "recurrent_dropout": 0.5, - "use_crf": true, - "use_chain_crf": false, - "fold_number": 1, - "batch_size": 9, - "transformer_name": null, - "use_ELMo": false -} \ No newline at end of file diff --git a/grobid-home/models/header-BidLSTM_CRF/model_weights.hdf5 b/grobid-home/models/header-BidLSTM_CRF/model_weights.hdf5 deleted file mode 100644 index 8adb000b2d..0000000000 Binary files a/grobid-home/models/header-BidLSTM_CRF/model_weights.hdf5 and /dev/null differ diff --git a/grobid-home/models/header-BidLSTM_CRF/preprocessor.json b/grobid-home/models/header-BidLSTM_CRF/preprocessor.json deleted file mode 100644 index 095b9de412..0000000000 --- a/grobid-home/models/header-BidLSTM_CRF/preprocessor.json +++ /dev/null @@ -1,438 +0,0 @@ -{ - "padding": true, - "return_lengths": true, - "return_word_embeddings": true, - "return_casing": false, - "return_features": false, - "return_chars": true, - "return_bert_embeddings": false, - "vocab_char": { - "<PAD>": 0, - "<UNK>": 1, - "!": 2, - "\"": 3, - "#": 4, - "$": 5, - "%": 6, - "&": 7, - "'": 8, - "(": 9, - ")": 10, - "*": 11, - "+": 12, - ",": 13, - "-": 14, - ".": 15, - "/": 16, - "0": 17, - "1": 18, - "2": 19, - "3": 20, - "4": 21, - "5": 22, - "6": 23, - "7": 24, - "8": 25, - "9": 26, - ":": 27, - ";": 28, - "<": 29, - "=": 30, - ">": 31, - "?": 32, - "@": 33, - "A": 34, - "B": 35, - "C": 36, - "D": 37, - "E": 38, - "F": 39, - "G": 40, - "H": 41, - "I": 42, - "J": 43, - "K": 44, - "L": 45, - "M": 46, - "N": 47, - "O": 48, - "P": 49, - "Q": 50, - "R": 51, - "S": 52, - "T": 53, - "U": 54, - "V": 55, - "W": 56, - "X": 57, - "Y": 58, - "Z": 59, - "[": 60, - "\\": 61, - "]": 62, - "^": 63, - "_": 64, - "`": 65, - "a": 66, - "b": 67, - "c": 68, - "d": 69, - "e": 70, - "f": 71, - "g": 72, - "h": 73, - "i": 74, - "j": 75, - "k": 76, - "l": 77, - "m": 78, - "n": 79, - "o": 80, - "p": 81, - "q": 82, - "r": 83, - "s": 84, - "t": 85, - "u": 86, - "v": 87, - "w": 88, - "x": 89, - "y": 90, - "z": 91, - "{": 92, - "|": 93, - "}": 94, - "~": 95, - "\u00a1": 96, - "\u00a2": 97, - "\u00a3": 98, - "\u00a4": 99, - "\u00a5": 100, - "\u00a7": 101, - "\u00a8": 102, - "\u00a9": 103, - "\u00aa": 104, - "\u00ab": 105, - "\u00ae": 106, - "\u00af": 107, - "\u00b0": 108, - "\u00b1": 109, - "\u00b2": 110, - "\u00b4": 111, - "\u00b5": 112, - "\u00b6": 113, - "\u00b8": 114, - "\u00bb": 115, - "\u00bc": 116, - "\u00c0": 117, - "\u00c1": 118, - "\u00c2": 119, - "\u00c3": 120, - "\u00c5": 121, - "\u00c9": 122, - "\u00ca": 123, - "\u00cc": 124, - "\u00ce": 125, - "\u00d3": 126, - "\u00d5": 127, - "\u00d6": 128, - "\u00d7": 129, - "\u00d8": 130, - "\u00df": 131, - "\u00e0": 132, - "\u00e1": 133, - "\u00e2": 134, - "\u00e3": 135, - "\u00e4": 136, - "\u00e5": 137, - "\u00e7": 138, - "\u00e8": 139, - "\u00e9": 140, - "\u00ea": 141, - "\u00eb": 142, - "\u00ed": 143, - "\u00ee": 144, - "\u00ef": 145, - "\u00f1": 146, - "\u00f3": 147, - "\u00f4": 148, - "\u00f5": 149, - "\u00f6": 150, - "\u00f8": 151, - "\u00fa": 152, - "\u00fb": 153, - "\u00fc": 154, - "\u00fe": 155, - "\u00ff": 156, - "\u0100": 157, - "\u0103": 158, - "\u0104": 159, - "\u0107": 160, - "\u010c": 161, - "\u010d": 162, - "\u0117": 163, - "\u0119": 164, - "\u0130": 165, - "\u0131": 166, - "\u0141": 167, - "\u0142": 168, - "\u0144": 169, - "\u014c": 170, - "\u0159": 171, - "\u015f": 172, - "\u0160": 173, - "\u0161": 174, - "\u0179": 175, - "\u017a": 176, - "\u017c": 177, - "\u017d": 178, - "\u017e": 179, - "\u0202": 180, - "\u0288": 181, - "\u02d9": 182, - "\u0351": 183, - "\u0352": 184, - "\u0353": 185, - "\u0354": 186, - "\u0357": 187, - "\u0358": 188, - "\u0393": 189, - "\u0394": 190, - "\u0398": 191, - "\u039b": 192, - "\u03a0": 193, - "\u03a5": 194, - "\u03a8": 195, - "\u03b1": 196, - "\u03b2": 197, - "\u03b3": 198, - "\u03b4": 199, - "\u03b5": 200, - "\u03b6": 201, - "\u03b7": 202, - "\u03b8": 203, - "\u03ba": 204, - "\u03bb": 205, - "\u03bc": 206, - "\u03bd": 207, - "\u03be": 208, - "\u03c0": 209, - "\u03c1": 210, - "\u03c3": 211, - "\u03c4": 212, - "\u03c6": 213, - "\u03c7": 214, - "\u03c8": 215, - "\u03c9": 216, - "\u03d5": 217, - "\u03e9": 218, - "\u03ea": 219, - "\u03eb": 220, - "\u03ed": 221, - "\u03f3": 222, - "\u03fd": 223, - "\u03fe": 224, - "\u0408": 225, - "\u0409": 226, - "\u0411": 227, - "\u0418": 228, - "\u041b": 229, - "\u041f": 230, - "\u0424": 231, - "\u0426": 232, - "\u0431": 233, - "\u0432": 234, - "\u0433": 235, - "\u0434": 236, - "\u0437": 237, - "\u0438": 238, - "\u0439": 239, - "\u043a": 240, - "\u043b": 241, - "\u043c": 242, - "\u043d": 243, - "\u043f": 244, - "\u0440": 245, - "\u0441": 246, - "\u0442": 247, - "\u0443": 248, - "\u0444": 249, - "\u0445": 250, - "\u0446": 251, - "\u0447": 252, - "\u0448": 253, - "\u0449": 254, - "\u044b": 255, - "\u044c": 256, - "\u044d": 257, - "\u044e": 258, - "\u044f": 259, - "\u0451": 260, - "\u0546": 261, - "\u060a": 262, - "\u060d": 263, - "\u065e": 264, - "\u0728": 265, - "\u0846": 266, - "\u0be6": 267, - "\u0be7": 268, - "\u1c14": 269, - "\u1e46": 270, - "\u2020": 271, - "\u2021": 272, - "\u2022": 273, - "\u202b": 274, - "\u202c": 275, - "\u2032": 276, - "\u2039": 277, - "\u204e": 278, - "\u2113": 279, - "\u2122": 280, - "\u2162": 281, - "\u2192": 282, - "\u2193": 283, - "\u21b5": 284, - "\u21d1": 285, - "\u21e4": 286, - "\u2206": 287, - "\u2208": 288, - "\u2212": 289, - "\u221a": 290, - "\u221e": 291, - "\u223c": 292, - "\u2243": 293, - "\u2248": 294, - "\u2264": 295, - "\u2265": 296, - "\u2299": 297, - "\u22c5": 298, - "\u22c6": 299, - "\u232c": 300, - "\u2423": 301, - "\u2424": 302, - "\u24d2": 303, - "\u25a1": 304, - "\u262f": 305, - "\u2663": 306, - "\u2666": 307, - "\u2709": 308, - "\u271d": 309, - "\u2e38": 310, - "\u318d": 311, - "\uf761": 312, - "\uf764": 313, - "\uf765": 314, - "\uf767": 315, - "\uf769": 316, - "\uf76b": 317, - "\uf76c": 318, - "\uf76e": 319, - "\uf76f": 320, - "\uf770": 321, - "\uf772": 322, - "\uf773": 323, - "\uf774": 324, - "\uf777": 325, - "\uf779": 326, - "\uf8e9": 327, - "\uff0c": 328, - "\uff1b": 329, - "\ufffd": 330 - }, - "vocab_tag": { - "<PAD>": 0, - "B-<abstract>": 1, - "B-<address>": 2, - "B-<affiliation>": 3, - "B-<author>": 4, - "B-<availability>": 5, - "B-<copyright>": 6, - "B-<date>": 7, - "B-<doctype>": 8, - "B-<editor>": 9, - "B-<email>": 10, - "B-<funding>": 11, - "B-<group>": 12, - "B-<keyword>": 13, - "B-<meeting>": 14, - "B-<pubnum>": 15, - "B-<reference>": 16, - "B-<submission>": 17, - "B-<title>": 18, - "B-<web>": 19, - "I-<abstract>": 20, - "I-<address>": 21, - "I-<affiliation>": 22, - "I-<author>": 23, - "I-<availability>": 24, - "I-<copyright>": 25, - "I-<date>": 26, - "I-<doctype>": 27, - "I-<editor>": 28, - "I-<email>": 29, - "I-<funding>": 30, - "I-<group>": 31, - "I-<keyword>": 32, - "I-<meeting>": 33, - "I-<pubnum>": 34, - "I-<reference>": 35, - "I-<submission>": 36, - "I-<title>": 37, - "I-<web>": 38, - "O": 39 - }, - "vocab_case": [ - "<PAD>", - "numeric", - "allLower", - "allUpper", - "initialUpper", - "other", - "mainly_numeric", - "contains_digit" - ], - "max_char_length": 30, - "feature_preprocessor": null, - "indice_tag": { - "0": "<PAD>", - "1": "B-<abstract>", - "2": "B-<address>", - "3": "B-<affiliation>", - "4": "B-<author>", - "5": "B-<availability>", - "6": "B-<copyright>", - "7": "B-<date>", - "8": "B-<doctype>", - "9": "B-<editor>", - "10": "B-<email>", - "11": "B-<funding>", - "12": "B-<group>", - "13": "B-<keyword>", - "14": "B-<meeting>", - "15": "B-<pubnum>", - "16": "B-<reference>", - "17": "B-<submission>", - "18": "B-<title>", - "19": "B-<web>", - "20": "I-<abstract>", - "21": "I-<address>", - "22": "I-<affiliation>", - "23": "I-<author>", - "24": "I-<availability>", - "25": "I-<copyright>", - "26": "I-<date>", - "27": "I-<doctype>", - "28": "I-<editor>", - "29": "I-<email>", - "30": "I-<funding>", - "31": "I-<group>", - "32": "I-<keyword>", - "33": "I-<meeting>", - "34": "I-<pubnum>", - "35": "I-<reference>", - "36": "I-<submission>", - "37": "I-<title>", - "38": "I-<web>", - "39": "O" - } -} \ No newline at end of file diff --git a/grobid-home/models/header-BidLSTM_CRF_FEATURES-with_ELMo/config.json b/grobid-home/models/header-BidLSTM_CRF_FEATURES-with_ELMo/config.json deleted file mode 100644 index 1101a1ae26..0000000000 --- a/grobid-home/models/header-BidLSTM_CRF_FEATURES-with_ELMo/config.json +++ /dev/null @@ -1,148 +0,0 @@ -{ - "model_name": "header-BidLSTM_CRF_FEATURES-with_ELMo", - "architecture": "BidLSTM_CRF_FEATURES", - "embeddings_name": "glove-840B", - "char_vocab_size": 325, - "case_vocab_size": 8, - "char_embedding_size": 25, - "num_char_lstm_units": 25, - "max_char_length": 30, - "features_vocabulary_size": 12, - "features_indices": [ - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20, - 21, - 22, - 23, - 24, - 25, - 26, - 27, - 28, - 29, - 30 - ], - "features_embedding_size": 4, - "features_lstm_units": 4, - "max_sequence_length": 1500, - "word_embedding_size": 1324, - "num_word_lstm_units": 100, - "case_embedding_size": 5, - "dropout": 0.5, - "recurrent_dropout": 0.5, - "use_crf": true, - "use_chain_crf": false, - "fold_number": 1, - "batch_size": 10, - "transformer_name": null, - "use_ELMo": true, - "features_map_to_index": { - "9": { - "BLOCKEND": 1, - "BLOCKIN": 2, - "BLOCKSTART": 3 - }, - "10": { - "LINEEND": 13, - "LINEIN": 14, - "LINESTART": 15 - }, - "11": { - "ALIGNEDLEFT": 25, - "LINEINDENT": 26 - }, - "12": { - "NEWFONT": 37, - "SAMEFONT": 38 - }, - "13": { - "HIGHERFONT": 49, - "LOWERFONT": 50, - "SAMEFONTSIZE": 51 - }, - "14": { - "0": 61, - "1": 62 - }, - "15": { - "0": 73, - "1": 74 - }, - "16": { - "ALLCAP": 85, - "INITCAP": 86, - "NOCAPS": 87 - }, - "17": { - "ALLDIGIT": 97, - "CONTAINSDIGITS": 98, - "NODIGIT": 99 - }, - "18": { - "0": 109, - "1": 110 - }, - "19": { - "0": 121, - "1": 122 - }, - "20": { - "0": 133, - "1": 134 - }, - "21": { - "0": 145, - "1": 146 - }, - "22": { - "0": 157, - "1": 158 - }, - "23": { - "0": 169, - "1": 170 - }, - "24": { - "0": 181, - "1": 182 - }, - "25": { - "0": 193, - "1": 194 - }, - "26": { - "COMMA": 205, - "DOT": 206, - "ENDBRACKET": 207, - "HYPHEN": 208, - "NOPUNCT": 209, - "OPENBRACKET": 210, - "PUNCT": 211, - "QUOTE": 212 - }, - "27": { - "0": 217, - "1": 218 - }, - "28": { - "0": 229 - }, - "29": { - "0": 241, - "1": 242 - }, - "30": { - "0": 253 - } - } -} \ No newline at end of file diff --git a/grobid-home/models/header-BidLSTM_CRF_FEATURES-with_ELMo/model_weights.hdf5 b/grobid-home/models/header-BidLSTM_CRF_FEATURES-with_ELMo/model_weights.hdf5 deleted file mode 100644 index 1aee51d1fe..0000000000 Binary files a/grobid-home/models/header-BidLSTM_CRF_FEATURES-with_ELMo/model_weights.hdf5 and /dev/null differ diff --git a/grobid-home/models/header-BidLSTM_CRF_FEATURES-with_ELMo/preprocessor.json b/grobid-home/models/header-BidLSTM_CRF_FEATURES-with_ELMo/preprocessor.json deleted file mode 100644 index e38f22aefc..0000000000 --- a/grobid-home/models/header-BidLSTM_CRF_FEATURES-with_ELMo/preprocessor.json +++ /dev/null @@ -1,553 +0,0 @@ -{ - "padding": true, - "return_lengths": true, - "return_word_embeddings": true, - "return_casing": false, - "return_features": true, - "return_chars": true, - "return_bert_embeddings": false, - "vocab_char": { - "<PAD>": 0, - "<UNK>": 1, - "!": 2, - "\"": 3, - "#": 4, - "$": 5, - "%": 6, - "&": 7, - "'": 8, - "(": 9, - ")": 10, - "*": 11, - "+": 12, - ",": 13, - "-": 14, - ".": 15, - "/": 16, - "0": 17, - "1": 18, - "2": 19, - "3": 20, - "4": 21, - "5": 22, - "6": 23, - "7": 24, - "8": 25, - "9": 26, - ":": 27, - ";": 28, - "<": 29, - "=": 30, - ">": 31, - "?": 32, - "@": 33, - "A": 34, - "B": 35, - "C": 36, - "D": 37, - "E": 38, - "F": 39, - "G": 40, - "H": 41, - "I": 42, - "J": 43, - "K": 44, - "L": 45, - "M": 46, - "N": 47, - "O": 48, - "P": 49, - "Q": 50, - "R": 51, - "S": 52, - "T": 53, - "U": 54, - "V": 55, - "W": 56, - "X": 57, - "Y": 58, - "Z": 59, - "[": 60, - "\\": 61, - "]": 62, - "^": 63, - "_": 64, - "`": 65, - "a": 66, - "b": 67, - "c": 68, - "d": 69, - "e": 70, - "f": 71, - "g": 72, - "h": 73, - "i": 74, - "j": 75, - "k": 76, - "l": 77, - "m": 78, - "n": 79, - "o": 80, - "p": 81, - "q": 82, - "r": 83, - "s": 84, - "t": 85, - "u": 86, - "v": 87, - "w": 88, - "x": 89, - "y": 90, - "z": 91, - "{": 92, - "|": 93, - "}": 94, - "~": 95, - "\u00a1": 96, - "\u00a2": 97, - "\u00a3": 98, - "\u00a4": 99, - "\u00a5": 100, - "\u00a7": 101, - "\u00a9": 102, - "\u00aa": 103, - "\u00ab": 104, - "\u00ae": 105, - "\u00af": 106, - "\u00b0": 107, - "\u00b1": 108, - "\u00b2": 109, - "\u00b5": 110, - "\u00b6": 111, - "\u00b8": 112, - "\u00bb": 113, - "\u00bc": 114, - "\u00c0": 115, - "\u00c1": 116, - "\u00c2": 117, - "\u00c3": 118, - "\u00c5": 119, - "\u00c9": 120, - "\u00ca": 121, - "\u00cc": 122, - "\u00ce": 123, - "\u00d3": 124, - "\u00d5": 125, - "\u00d6": 126, - "\u00d7": 127, - "\u00d8": 128, - "\u00df": 129, - "\u00e0": 130, - "\u00e1": 131, - "\u00e2": 132, - "\u00e3": 133, - "\u00e4": 134, - "\u00e5": 135, - "\u00e7": 136, - "\u00e8": 137, - "\u00e9": 138, - "\u00ea": 139, - "\u00eb": 140, - "\u00ed": 141, - "\u00ee": 142, - "\u00ef": 143, - "\u00f1": 144, - "\u00f3": 145, - "\u00f4": 146, - "\u00f5": 147, - "\u00f6": 148, - "\u00f8": 149, - "\u00fa": 150, - "\u00fb": 151, - "\u00fc": 152, - "\u00fe": 153, - "\u00ff": 154, - "\u0100": 155, - "\u0103": 156, - "\u0104": 157, - "\u0107": 158, - "\u010c": 159, - "\u010d": 160, - "\u0117": 161, - "\u0119": 162, - "\u0130": 163, - "\u0141": 164, - "\u0142": 165, - "\u0144": 166, - "\u014c": 167, - "\u0159": 168, - "\u015f": 169, - "\u0160": 170, - "\u0161": 171, - "\u0179": 172, - "\u017a": 173, - "\u017c": 174, - "\u017d": 175, - "\u017e": 176, - "\u0202": 177, - "\u0288": 178, - "\u02d9": 179, - "\u0351": 180, - "\u0352": 181, - "\u0353": 182, - "\u0354": 183, - "\u0357": 184, - "\u0358": 185, - "\u0393": 186, - "\u0394": 187, - "\u0398": 188, - "\u039b": 189, - "\u03a0": 190, - "\u03a5": 191, - "\u03a8": 192, - "\u03b1": 193, - "\u03b2": 194, - "\u03b4": 195, - "\u03b5": 196, - "\u03b6": 197, - "\u03b7": 198, - "\u03b8": 199, - "\u03ba": 200, - "\u03bb": 201, - "\u03bc": 202, - "\u03bd": 203, - "\u03be": 204, - "\u03c0": 205, - "\u03c1": 206, - "\u03c3": 207, - "\u03c4": 208, - "\u03c6": 209, - "\u03c7": 210, - "\u03c8": 211, - "\u03c9": 212, - "\u03d5": 213, - "\u03e9": 214, - "\u03ea": 215, - "\u03eb": 216, - "\u03ed": 217, - "\u03f3": 218, - "\u03fd": 219, - "\u03fe": 220, - "\u0408": 221, - "\u0409": 222, - "\u0411": 223, - "\u0418": 224, - "\u041b": 225, - "\u041f": 226, - "\u0424": 227, - "\u0426": 228, - "\u0431": 229, - "\u0432": 230, - "\u0433": 231, - "\u0434": 232, - "\u0437": 233, - "\u0438": 234, - "\u0439": 235, - "\u043a": 236, - "\u043b": 237, - "\u043c": 238, - "\u043d": 239, - "\u043f": 240, - "\u0440": 241, - "\u0441": 242, - "\u0442": 243, - "\u0443": 244, - "\u0444": 245, - "\u0445": 246, - "\u0446": 247, - "\u0447": 248, - "\u0448": 249, - "\u0449": 250, - "\u044b": 251, - "\u044c": 252, - "\u044d": 253, - "\u044e": 254, - "\u044f": 255, - "\u0451": 256, - "\u0546": 257, - "\u060a": 258, - "\u060d": 259, - "\u065e": 260, - "\u0728": 261, - "\u0846": 262, - "\u0be6": 263, - "\u0be7": 264, - "\u1c14": 265, - "\u1e46": 266, - "\u2020": 267, - "\u2021": 268, - "\u2022": 269, - "\u202b": 270, - "\u202c": 271, - "\u2032": 272, - "\u2039": 273, - "\u204e": 274, - "\u2113": 275, - "\u2122": 276, - "\u2162": 277, - "\u2192": 278, - "\u2193": 279, - "\u21b5": 280, - "\u21d1": 281, - "\u21e4": 282, - "\u2206": 283, - "\u2208": 284, - "\u2212": 285, - "\u221a": 286, - "\u221e": 287, - "\u223c": 288, - "\u2243": 289, - "\u2248": 290, - "\u2264": 291, - "\u2265": 292, - "\u2299": 293, - "\u22c5": 294, - "\u22c6": 295, - "\u232c": 296, - "\u2423": 297, - "\u2424": 298, - "\u24d2": 299, - "\u25a1": 300, - "\u262f": 301, - "\u2663": 302, - "\u2666": 303, - "\u271d": 304, - "\u2e38": 305, - "\u318d": 306, - "\uf761": 307, - "\uf764": 308, - "\uf765": 309, - "\uf767": 310, - "\uf769": 311, - "\uf76b": 312, - "\uf76c": 313, - "\uf76e": 314, - "\uf76f": 315, - "\uf770": 316, - "\uf772": 317, - "\uf773": 318, - "\uf774": 319, - "\uf777": 320, - "\uf779": 321, - "\uf8e9": 322, - "\uff0c": 323, - "\ufffd": 324 - }, - "vocab_tag": { - "<PAD>": 0, - "B-<abstract>": 1, - "B-<address>": 2, - "B-<affiliation>": 3, - "B-<author>": 4, - "B-<copyright>": 5, - "B-<date>": 6, - "B-<doctype>": 7, - "B-<editor>": 8, - "B-<email>": 9, - "B-<funding>": 10, - "B-<group>": 11, - "B-<keyword>": 12, - "B-<meeting>": 13, - "B-<pubnum>": 14, - "B-<reference>": 15, - "B-<submission>": 16, - "B-<title>": 17, - "B-<web>": 18, - "I-<abstract>": 19, - "I-<address>": 20, - "I-<affiliation>": 21, - "I-<author>": 22, - "I-<copyright>": 23, - "I-<date>": 24, - "I-<doctype>": 25, - "I-<editor>": 26, - "I-<email>": 27, - "I-<funding>": 28, - "I-<group>": 29, - "I-<keyword>": 30, - "I-<meeting>": 31, - "I-<pubnum>": 32, - "I-<reference>": 33, - "I-<submission>": 34, - "I-<title>": 35, - "I-<web>": 36, - "O": 37 - }, - "vocab_case": [ - "<PAD>", - "numeric", - "allLower", - "allUpper", - "initialUpper", - "other", - "mainly_numeric", - "contains_digit" - ], - "max_char_length": 30, - "feature_preprocessor": { - "features_vocabulary_size": 12, - "features_indices": [ - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20, - 21, - 22, - 23, - 24, - 25, - 26, - 27, - 28, - 29, - 30 - ], - "features_map_to_index": { - "9": { - "BLOCKEND": 1, - "BLOCKIN": 2, - "BLOCKSTART": 3 - }, - "10": { - "LINEEND": 13, - "LINEIN": 14, - "LINESTART": 15 - }, - "11": { - "ALIGNEDLEFT": 25, - "LINEINDENT": 26 - }, - "12": { - "NEWFONT": 37, - "SAMEFONT": 38 - }, - "13": { - "HIGHERFONT": 49, - "LOWERFONT": 50, - "SAMEFONTSIZE": 51 - }, - "14": { - "0": 61, - "1": 62 - }, - "15": { - "0": 73, - "1": 74 - }, - "16": { - "ALLCAP": 85, - "INITCAP": 86, - "NOCAPS": 87 - }, - "17": { - "ALLDIGIT": 97, - "CONTAINSDIGITS": 98, - "NODIGIT": 99 - }, - "18": { - "0": 109, - "1": 110 - }, - "19": { - "0": 121, - "1": 122 - }, - "20": { - "0": 133, - "1": 134 - }, - "21": { - "0": 145, - "1": 146 - }, - "22": { - "0": 157, - "1": 158 - }, - "23": { - "0": 169, - "1": 170 - }, - "24": { - "0": 181, - "1": 182 - }, - "25": { - "0": 193, - "1": 194 - }, - "26": { - "COMMA": 205, - "DOT": 206, - "ENDBRACKET": 207, - "HYPHEN": 208, - "NOPUNCT": 209, - "OPENBRACKET": 210, - "PUNCT": 211, - "QUOTE": 212 - }, - "27": { - "0": 217, - "1": 218 - }, - "28": { - "0": 229 - }, - "29": { - "0": 241, - "1": 242 - }, - "30": { - "0": 253 - } - } - }, - "indice_tag": { - "0": "<PAD>", - "1": "B-<abstract>", - "2": "B-<address>", - "3": "B-<affiliation>", - "4": "B-<author>", - "5": "B-<copyright>", - "6": "B-<date>", - "7": "B-<doctype>", - "8": "B-<editor>", - "9": "B-<email>", - "10": "B-<funding>", - "11": "B-<group>", - "12": "B-<keyword>", - "13": "B-<meeting>", - "14": "B-<pubnum>", - "15": "B-<reference>", - "16": "B-<submission>", - "17": "B-<title>", - "18": "B-<web>", - "19": "I-<abstract>", - "20": "I-<address>", - "21": "I-<affiliation>", - "22": "I-<author>", - "23": "I-<copyright>", - "24": "I-<date>", - "25": "I-<doctype>", - "26": "I-<editor>", - "27": "I-<email>", - "28": "I-<funding>", - "29": "I-<group>", - "30": "I-<keyword>", - "31": "I-<meeting>", - "32": "I-<pubnum>", - "33": "I-<reference>", - "34": "I-<submission>", - "35": "I-<title>", - "36": "I-<web>", - "37": "O" - } -} \ No newline at end of file diff --git a/grobid-home/models/header-BidLSTM_CRF_FEATURES.onnx/config.json b/grobid-home/models/header-BidLSTM_CRF_FEATURES.onnx/config.json new file mode 100644 index 0000000000..990606376c --- /dev/null +++ b/grobid-home/models/header-BidLSTM_CRF_FEATURES.onnx/config.json @@ -0,0 +1,37 @@ +{ + "modelName": "grobid-header-BidLSTM_CRF_FEATURES", + "architecture": "BidLSTM_CRF_FEATURES", + "wordEmbeddingSize": 300, + "charEmbeddingSize": 25, + "numCharLstmUnits": 25, + "numWordLstmUnits": 100, + "maxSequenceLength": 3500, + "embeddingsName": "glove-840B", + "featuresIndices": [ + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30 + ], + "featuresEmbeddingSize": 4, + "featuresLstmUnits": 4, + "featuresVocabularySize": 12 +} \ No newline at end of file diff --git a/grobid-home/models/header-BidLSTM_CRF_FEATURES.onnx/crf_params.json b/grobid-home/models/header-BidLSTM_CRF_FEATURES.onnx/crf_params.json new file mode 100644 index 0000000000..98a3452c8a --- /dev/null +++ b/grobid-home/models/header-BidLSTM_CRF_FEATURES.onnx/crf_params.json @@ -0,0 +1,1768 @@ +{ + "transitions": [ + [ + 0.04156292974948883, + -0.42034652829170227, + -0.582455575466156, + -0.5034688115119934, + -0.4858841598033905, + -0.23182523250579834, + -0.13605178892612457, + -0.05670740082859993, + -0.430170476436615, + -0.3749964237213135, + -0.23557248711585999, + -0.2803138792514801, + -0.37073543667793274, + -0.5220673680305481, + -0.10174797475337982, + -0.4649459719657898, + -0.3134481608867645, + -0.16348674893379211, + -0.6262795329093933, + -0.20370081067085266, + -0.13714617490768433, + -0.46868982911109924, + -0.3243594169616699, + -0.46919679641723633, + -0.446424275636673, + -0.10341541469097137, + -0.11308738589286804, + -0.567082941532135, + -0.76316237449646, + -0.2719029486179352, + -0.3495528995990753, + -0.6166005730628967, + -0.6246852874755859, + -0.24770304560661316, + -0.6685033440589905, + -0.2818247377872467, + -0.026504646986722946, + -0.7123740315437317, + -0.3999952971935272, + -0.4051845371723175 + ], + [ + -0.6813356876373291, + -1.474766492843628, + -0.16950246691703796, + -0.0860653817653656, + -0.15086710453033447, + -0.36285310983657837, + -0.1413889229297638, + -0.11594749242067337, + -0.11833596974611282, + -0.018607037141919136, + -0.278219610452652, + -0.20365630090236664, + -0.1835183948278427, + -0.34602490067481995, + -0.11432009190320969, + -0.11247134208679199, + -0.0704735592007637, + -0.266855925321579, + -0.5239958763122559, + -0.17411847412586212, + 1.1674197912216187, + -0.514645516872406, + -0.32536453008651733, + -0.4876648783683777, + -0.8310027122497559, + -0.4660119116306305, + -0.37788864970207214, + -0.3253327012062073, + -0.20476830005645752, + -0.5306098461151123, + -0.5099935531616211, + -0.13105814158916473, + -0.8620332479476929, + -0.2939162850379944, + -0.3817184567451477, + -0.6944235563278198, + -0.6972801685333252, + -0.6911481618881226, + -0.4677702784538269, + -1.3427419662475586 + ], + [ + -0.5409645438194275, + -0.07047738879919052, + -1.3952556848526, + -0.15806199610233307, + 0.15669220685958862, + -0.1406523436307907, + -0.3703059256076813, + -0.01761159673333168, + -0.3873545825481415, + -0.12275929749011993, + 0.11652562767267227, + -0.27875685691833496, + -0.23607727885246277, + -0.06015251949429512, + -0.17262694239616394, + -0.09679384529590607, + -0.548249363899231, + 0.11960920691490173, + -0.17760060727596283, + -0.1720617413520813, + -0.1861981749534607, + 1.1027923822402954, + -1.4277780055999756, + -0.762417733669281, + -0.1602180451154709, + -0.2566070258617401, + -0.5916413068771362, + -0.4375160038471222, + -0.398407906293869, + -0.8198686242103577, + -0.792327880859375, + -0.5078124403953552, + -0.32416778802871704, + -0.9172552824020386, + -0.2622852325439453, + -0.6416853070259094, + -0.6593009233474731, + -0.30857622623443604, + -0.4165657162666321, + -0.527925968170166 + ], + [ + -0.484244167804718, + -0.3666740655899048, + -0.37780722975730896, + -1.6875845193862915, + -1.183866024017334, + -0.10146588832139969, + -0.7491909265518188, + -0.5704653859138489, + -0.655354917049408, + -0.4948749542236328, + -0.8505204319953918, + -0.5942954421043396, + -0.3796069622039795, + -0.12976579368114471, + -0.4414024353027344, + -0.228094682097435, + -0.6754174828529358, + -0.7848886251449585, + -0.505978524684906, + -0.2449217140674591, + -0.16848520934581757, + -0.9090512990951538, + 1.1570771932601929, + -1.187882900238037, + -0.4338572919368744, + -0.8199523687362671, + -0.5749700665473938, + -0.8078997135162354, + -0.9899665117263794, + -0.876798152923584, + -1.0010523796081543, + -0.9244416356086731, + -0.6079093217849731, + -0.8723459243774414, + -0.22195518016815186, + -0.745803952217102, + -0.6759161353111267, + -0.9199683666229248, + -0.455925315618515, + -1.4009610414505005 + ], + [ + -0.6266952157020569, + -0.5407925248146057, + -0.6304386258125305, + -1.3547453880310059, + -1.6646023988723755, + -0.1721358597278595, + -0.5518987774848938, + -0.26372185349464417, + -0.6072196960449219, + -0.870395839214325, + -0.9405350089073181, + -0.3867407739162445, + -0.32487136125564575, + -0.3842311203479767, + -0.2526833117008209, + -0.10469390451908112, + -0.5453751683235168, + -0.26929566264152527, + -0.5917561054229736, + -0.047401320189237595, + -0.45880424976348877, + -1.0397727489471436, + -1.2495393753051758, + 1.1563442945480347, + -0.378095805644989, + -0.8152077794075012, + -0.31757715344429016, + -0.8718245625495911, + -1.3014018535614014, + -1.0014152526855469, + -0.8599753379821777, + -0.6970136761665344, + -0.6627819538116455, + -0.5795347094535828, + -0.45376861095428467, + -0.7846733927726746, + -0.4451672434806824, + -1.21035635471344, + -0.5712770223617554, + -1.4532051086425781 + ], + [ + -0.4365459084510803, + -0.28249359130859375, + -0.02376020886003971, + -0.18138352036476135, + -0.20920251309871674, + -0.4420705735683441, + -0.14728674292564392, + -0.1730966567993164, + -0.09027798473834991, + -0.06269630789756775, + -0.07426708936691284, + -0.25130367279052734, + 0.018002521246671677, + -0.10293055325746536, + 0.015967577695846558, + -0.10057936608791351, + -0.05844185873866081, + -0.07831716537475586, + -0.2745784819126129, + -0.04195404797792435, + -0.49093765020370483, + -0.13960810005664825, + -0.24329142272472382, + -0.14429505169391632, + 0.8041207790374756, + -0.7003335952758789, + -0.16637469828128815, + -0.17984290421009064, + -0.1777666211128235, + -0.30532413721084595, + -0.6573166251182556, + -0.06742829829454422, + -0.4460724890232086, + -0.10141368955373764, + -0.40190449357032776, + -0.3302376866340637, + -0.5683164000511169, + -0.5364250540733337, + -0.5681791305541992, + -1.0590612888336182 + ], + [ + -0.1096491664648056, + -0.1868945062160492, + -0.14614057540893555, + -0.8443647623062134, + -0.4174745976924896, + -0.0730038434267044, + -1.4503209590911865, + -0.700299084186554, + -0.5353071689605713, + -0.20168323814868927, + -0.233582004904747, + -0.59527188539505, + -0.09144330769777298, + -0.2737840414047241, + -0.29488494992256165, + -0.5366660952568054, + -0.6995000839233398, + -0.4897797107696533, + -0.46223580837249756, + -0.22862350940704346, + -0.36141449213027954, + -0.3465692400932312, + -0.6881803870201111, + -0.41511377692222595, + -0.5503169894218445, + 1.1610031127929688, + -1.0007247924804688, + -0.7587050199508667, + -0.4132329523563385, + -0.4924057722091675, + -1.0153120756149292, + -0.15574948489665985, + -0.5283001065254211, + -0.7923330664634705, + -0.7999781370162964, + -1.1418731212615967, + -0.9220249652862549, + -0.46959856152534485, + -0.7797813415527344, + -1.3314197063446045 + ], + [ + -0.350909024477005, + -0.4722571074962616, + -0.34038659930229187, + -0.7337698936462402, + -0.30900928378105164, + -0.19109587371349335, + -0.8953171372413635, + -1.5924586057662964, + -0.5003796219825745, + -0.09381618350744247, + -0.2568371593952179, + -0.23446565866470337, + -0.02136874385178089, + -0.36792007088661194, + -0.34503456950187683, + -0.7644948959350586, + -1.0700143575668335, + -0.9186438918113708, + -0.5524345636367798, + -0.17378772795200348, + -0.187188059091568, + -0.5372270941734314, + -0.42948928475379944, + -0.42569872736930847, + -0.3845140039920807, + -0.7883515954017639, + 1.2048274278640747, + -0.8243076205253601, + -0.22183218598365784, + -0.3597870171070099, + -0.3538007140159607, + -0.17713113129138947, + -0.6163832545280457, + -1.0482850074768066, + -1.1051616668701172, + -1.2585625648498535, + -1.5411053895950317, + -0.615164577960968, + -0.736624538898468, + -1.1172575950622559 + ], + [ + -0.6330390572547913, + -0.40603768825531006, + -0.24949012696743011, + -0.02229117602109909, + -0.7385624051094055, + -0.26631587743759155, + -0.11707957834005356, + -0.6899716854095459, + -0.9221588969230652, + -0.21934917569160461, + -0.15408752858638763, + -0.38660213351249695, + -0.16131241619586945, + -0.45811140537261963, + -0.45025834441185, + -0.5852434039115906, + -0.4074675440788269, + 0.206324502825737, + -0.4051540791988373, + -0.1650168001651764, + -0.28711357712745667, + -0.3572807013988495, + -0.7843915224075317, + -0.8513976335525513, + -0.6112757325172424, + -0.8955145478248596, + -0.8060703277587891, + 1.4280718564987183, + -0.45094889402389526, + -0.3962550759315491, + -0.873701810836792, + -0.35037174820899963, + -0.7433686256408691, + -1.0379375219345093, + -1.0574424266815186, + -1.267561912536621, + -1.2847706079483032, + -1.4558340311050415, + -0.5670545101165771, + -1.0146708488464355 + ], + [ + -0.3708043098449707, + -0.08016509562730789, + -0.24943198263645172, + -0.8755296468734741, + -0.8036324381828308, + 0.022000256925821304, + -0.19891218841075897, + -0.23631413280963898, + -0.18867620825767517, + -0.6653138995170593, + -0.2017122358083725, + -0.03606778383255005, + -0.12663663923740387, + -0.17319482564926147, + 0.02351020835340023, + -0.025198351591825485, + -0.2425854206085205, + -0.1882440149784088, + -0.27000805735588074, + -0.10075734555721283, + -0.03670665994286537, + -0.3124080300331116, + -0.6170286536216736, + -1.215265154838562, + -0.19469447433948517, + -0.4169005751609802, + -0.3058970272541046, + -0.31583020091056824, + 0.9546530246734619, + -0.465960294008255, + -0.26553264260292053, + -0.09785130620002747, + -0.3066287636756897, + -0.1403069645166397, + -0.4410221576690674, + -0.39608755707740784, + -0.4093926250934601, + -0.40213125944137573, + -0.39916273951530457, + -0.9870394468307495 + ], + [ + -0.3803991675376892, + -0.49690863490104675, + -0.38411182165145874, + -0.7968136072158813, + -0.5928879380226135, + -0.12430550158023834, + -0.3443760871887207, + -0.19975829124450684, + -0.14863744378089905, + -0.093198761343956, + -1.4613806009292603, + -0.22524777054786682, + -0.031624432653188705, + -0.31373903155326843, + -0.11153695732355118, + -0.13967281579971313, + -0.257637619972229, + -0.4296002686023712, + -0.2023470103740692, + -0.37638479471206665, + -0.5170212984085083, + -0.972842276096344, + -0.9385592937469482, + -0.8883246183395386, + -0.4725227355957031, + -0.5023865103721619, + -0.2609289586544037, + -0.2186785638332367, + -0.37796077132225037, + 1.0229277610778809, + -0.6477054953575134, + -0.1174684464931488, + -0.935849666595459, + -0.256399542093277, + -0.488235205411911, + -0.48960357904434204, + -0.6635177731513977, + -0.4809073209762573, + -0.7808321118354797, + -1.3176100254058838 + ], + [ + -0.14119569957256317, + -0.2399723082780838, + -0.16910454630851746, + -0.3988133370876312, + -0.16579025983810425, + -0.07674484699964523, + -0.47447678446769714, + -0.14829570055007935, + -0.22340483963489532, + -0.12209080159664154, + -0.21864329278469086, + -0.83942711353302, + 0.010623214766383171, + -0.179604172706604, + -0.03827885165810585, + -0.018567802384495735, + -0.35419103503227234, + -0.20164431631565094, + -0.21746708452701569, + -0.0737922340631485, + -0.49502918124198914, + -0.3656984269618988, + -0.5985224843025208, + -0.2556264102458954, + -0.9066402316093445, + -1.0101884603500366, + -0.20245403051376343, + -0.4099595844745636, + -0.2225094437599182, + -0.560097873210907, + 1.060692548751831, + -0.1559591293334961, + -0.6336045265197754, + -0.34309056401252747, + -0.3156619071960449, + -0.5499412417411804, + -0.7333216667175293, + -0.40754327178001404, + -0.5051531791687012, + -1.367671012878418 + ], + [ + -0.4348279535770416, + -0.12248143553733826, + -0.11734054237604141, + -0.5120225548744202, + -0.44896578788757324, + -0.016921043395996094, + -0.18972915410995483, + -0.01113987062126398, + -0.14425107836723328, + -0.09558076411485672, + -0.1005316823720932, + -0.08669819682836533, + -0.11811905354261398, + -0.0995558574795723, + -0.16204391419887543, + -0.07633376121520996, + -0.1415678709745407, + -0.20812752842903137, + -0.26216182112693787, + 0.018229510635137558, + -0.29564911127090454, + -0.28513407707214355, + -0.8498364686965942, + -0.7939232587814331, + -0.1863965541124344, + -0.2369379699230194, + -0.05635624751448631, + -0.2300068587064743, + -0.0611715130507946, + -0.2546541094779968, + -0.4451213479042053, + 0.7374852299690247, + -0.27028486132621765, + -0.40969598293304443, + -0.11609885841608047, + -0.39875558018684387, + -0.25339314341545105, + -0.7509329915046692, + -0.0841495469212532, + -0.9139083027839661 + ], + [ + -0.5572792291641235, + -0.519359827041626, + -0.12938286364078522, + -0.27291175723075867, + -0.30714747309684753, + -0.21406503021717072, + -0.40643930435180664, + -0.24368543922901154, + -0.45103684067726135, + -0.13535144925117493, + -0.26518040895462036, + -0.2173173725605011, + -0.13737432658672333, + -1.3029110431671143, + -0.22711248695850372, + -0.37777599692344666, + -0.6159701347351074, + -0.16774292290210724, + -0.5871739387512207, + -0.0014065696159377694, + -0.7687084674835205, + -0.19936904311180115, + -0.2658281624317169, + -0.37194305658340454, + -0.5364986658096313, + -0.6791477799415588, + -0.4025857448577881, + -0.4510268568992615, + -0.3367443084716797, + -0.75540691614151, + -0.5600202679634094, + -0.15535014867782593, + 1.1982311010360718, + -0.7161053419113159, + -0.6629363894462585, + -1.0095518827438354, + -0.6781514883041382, + -0.8733344078063965, + -0.4296688735485077, + -1.429097294807434 + ], + [ + -0.29142531752586365, + -0.14716967940330505, + -0.15987709164619446, + -0.4696163535118103, + -0.24072185158729553, + 0.028466232120990753, + -0.3674014210700989, + -0.3585822880268097, + -0.3868233859539032, + 0.007031602784991264, + -0.07912008464336395, + -0.04158562049269676, + -0.09214133024215698, + -0.19614823162555695, + -0.30998560786247253, + -0.15514224767684937, + -0.7876662015914917, + -0.15473127365112305, + -0.4241771101951599, + -0.046541012823581696, + -0.15466803312301636, + -0.23355647921562195, + -0.5655817985534668, + -0.3462924361228943, + -0.25054487586021423, + -0.6488871574401855, + -0.5959131121635437, + -0.5412275791168213, + -0.14463770389556885, + -0.10996915400028229, + -0.5374557971954346, + -0.07842688262462616, + -0.585525393486023, + 1.06948721408844, + -0.2947211265563965, + -1.1695761680603027, + -0.8965415954589844, + -0.8885880708694458, + -0.24935682117938995, + -1.0525985956192017 + ], + [ + -0.6464415192604065, + -0.305727481842041, + -0.17866100370883942, + -0.25261542201042175, + -0.24515055119991302, + -0.006827491335570812, + -0.5698317885398865, + -0.7725009918212891, + -0.34288904070854187, + -0.10316130518913269, + -0.13547927141189575, + -0.17856958508491516, + -0.03644802048802376, + -0.3273963928222656, + -0.05469410493969917, + -1.2809638977050781, + -0.8187608122825623, + -0.5023124814033508, + -0.46329542994499207, + -0.43066495656967163, + -0.31300652027130127, + -0.2017337530851364, + -0.280733585357666, + -0.32892411947250366, + -0.43571972846984863, + -0.8307340741157532, + -0.8676496148109436, + -0.42877131700515747, + -0.08281746506690979, + -0.5332894921302795, + -0.1950041651725769, + -0.022707698866724968, + -0.7474026679992676, + -0.6845366358757019, + 0.9475741386413574, + -1.343342900276184, + -0.9077244997024536, + -0.48911574482917786, + -1.027992606163025, + -1.3189400434494019 + ], + [ + -0.36316707730293274, + -0.3786342144012451, + -0.250501811504364, + -0.5821082592010498, + -0.41846275329589844, + -0.10929112136363983, + -0.7451236248016357, + -0.9985359311103821, + -0.7053328156471252, + -0.12864002585411072, + -0.143621027469635, + -0.2634906470775604, + -0.15485702455043793, + -0.553053617477417, + -0.4649953544139862, + -0.7414917945861816, + -1.6150813102722168, + -0.6079680323600769, + -0.5825133919715881, + -0.15274299681186676, + -0.597998857498169, + -0.6733279824256897, + -0.5707519054412842, + -0.5867926478385925, + -0.46064597368240356, + -1.2828141450881958, + -1.129135012626648, + -0.9546836018562317, + -0.3797231614589691, + -0.5237982869148254, + -0.7738146185874939, + -0.24501749873161316, + -1.12493896484375, + -1.165708303451538, + -1.1019026041030884, + 1.365217924118042, + -1.151609182357788, + -0.8651992082595825, + -0.5809709429740906, + -1.498673915863037 + ], + [ + -0.23034290969371796, + -0.40542107820510864, + -0.24694037437438965, + -0.5687817335128784, + -0.1727573573589325, + -0.12565280497074127, + -0.588555097579956, + -1.2063745260238647, + -0.5229619741439819, + -0.13348151743412018, + -0.45546671748161316, + -0.3462865948677063, + 0.022690359503030777, + -0.2398720532655716, + -0.26423487067222595, + -0.3952869474887848, + -0.5276382565498352, + -1.1613562107086182, + -0.37541380524635315, + -0.0429205484688282, + -0.2812117338180542, + -0.5051730871200562, + -0.5934233665466309, + -0.22144430875778198, + -0.46291202306747437, + -0.7152884006500244, + -0.8923318982124329, + -0.6108689308166504, + -0.09117639064788818, + -0.5358982086181641, + -0.9405224919319153, + -0.05702901631593704, + -0.27311381697654724, + -0.5293586850166321, + -0.6918503046035767, + -0.7813796401023865, + 1.0490317344665527, + -0.36871179938316345, + -0.5944532752037048, + -1.4385783672332764 + ], + [ + -0.7026892304420471, + -0.6112515330314636, + -0.11681456863880157, + -0.5532870292663574, + -0.3529999554157257, + -0.15321123600006104, + -0.3245249092578888, + -0.22842290997505188, + -1.1224476099014282, + -0.08679701387882233, + -0.22794944047927856, + -0.20033913850784302, + -0.12310652434825897, + -0.4931032061576843, + -0.31309786438941956, + -0.19886906445026398, + -0.48423391580581665, + -0.29342660307884216, + -1.59029221534729, + -0.13868442177772522, + -0.41863155364990234, + -0.2890331447124481, + -0.566576361656189, + -0.7371069192886353, + -0.6192184686660767, + -0.6282697916030884, + -0.3898712396621704, + -1.1341197490692139, + -0.32864952087402344, + -0.29066595435142517, + -0.48690810799598694, + -0.37654078006744385, + -0.7649629712104797, + -1.0250204801559448, + -0.6596238017082214, + -0.8896840214729309, + -0.6647070050239563, + 1.2343186140060425, + -0.3453792631626129, + -1.297972559928894 + ], + [ + -0.31789809465408325, + -0.257191926240921, + -0.012328460812568665, + -0.22293342649936676, + -0.17311148345470428, + -0.08924578130245209, + -0.3589031994342804, + -0.2580183446407318, + -0.052689384669065475, + -0.0646144449710846, + -0.380517840385437, + -0.19052205979824066, + -0.024287883192300797, + -0.19355154037475586, + -0.04858299717307091, + -0.5049391984939575, + -0.28680315613746643, + -0.2170730084180832, + -0.19818326830863953, + -0.6970682144165039, + -0.24192027747631073, + -0.2290681004524231, + -0.20936629176139832, + -0.19708527624607086, + -0.6968801021575928, + -0.8602487444877625, + -0.30266615748405457, + -0.17058084905147552, + -0.07894787937402725, + -0.9679717421531677, + -0.499275803565979, + 0.006713770795613527, + -0.29572242498397827, + -0.23887892067432404, + -1.1538605690002441, + -0.7857212424278259, + -0.7514793276786804, + -0.3496302664279938, + 0.9217904806137085, + -1.264298915863037 + ], + [ + -0.23606973886489868, + -1.4463157653808594, + -0.23220869898796082, + 0.13629260659217834, + -0.1786210983991623, + -0.4012305736541748, + 0.14486804604530334, + -0.29543042182922363, + 0.12321124970912933, + -0.22028908133506775, + -0.3052022159099579, + 0.2007196843624115, + -0.3641248941421509, + -0.2620239853858948, + -0.21142695844173431, + -0.26977068185806274, + 0.32724833488464355, + -0.022709127515554428, + -0.3768470585346222, + -0.2676788568496704, + 0.46919921040534973, + -0.2502219080924988, + -0.14228688180446625, + -0.19589297473430634, + -1.0157383680343628, + -0.3602328598499298, + -0.2956793010234833, + -0.49971845746040344, + -0.223921999335289, + -0.3289044201374054, + -0.7163994312286377, + -0.4843128025531769, + -0.7367880940437317, + -0.9378494024276733, + -0.33491554856300354, + -0.7461263537406921, + -0.6342778205871582, + -0.4916072487831116, + -0.44268321990966797, + -0.9400246739387512 + ], + [ + -0.4336954951286316, + 0.21285495162010193, + -1.3548504114151, + 0.44445592164993286, + 0.27642619609832764, + -0.30957698822021484, + -0.1239735409617424, + -0.020248541608452797, + -0.4148254692554474, + -0.49172282218933105, + 0.4216710329055786, + 0.035787731409072876, + -0.3574660122394562, + -0.15405511856079102, + -0.38970819115638733, + -0.4826144278049469, + 0.3571203052997589, + 0.5227870941162109, + -0.18514394760131836, + 0.1293911635875702, + -0.30987119674682617, + 0.8971624374389648, + -1.221816062927246, + -0.734688937664032, + -0.45632219314575195, + -0.5278704166412354, + -0.8619887828826904, + -0.8728556632995605, + -0.7488753199577332, + -1.1139971017837524, + -0.8810344338417053, + -0.5964037775993347, + -0.559690535068512, + -1.0759050846099854, + -0.3320060968399048, + -0.8225044012069702, + -0.798534631729126, + -0.39116230607032776, + -0.8211519122123718, + -0.2718215584754944 + ], + [ + -0.27022865414619446, + -0.025489069521427155, + 1.061098337173462, + -1.187835693359375, + -0.08859965205192566, + -0.17365822196006775, + -0.1482945680618286, + 0.002331435214728117, + -0.2552967071533203, + -0.4594759941101074, + 0.1060689315199852, + -0.024365130811929703, + -0.6649194955825806, + -0.27583613991737366, + -0.4237890839576721, + -0.23720431327819824, + 0.037234075367450714, + -0.04297833517193794, + -0.5174627304077148, + -0.03396277502179146, + -0.1732037365436554, + -1.3797804117202759, + 0.8003237247467041, + -0.6973430514335632, + -0.3991013169288635, + -0.5851643085479736, + -0.5346295237541199, + -0.697088897228241, + -0.8451032042503357, + -0.8463428616523743, + -0.9544570446014404, + -1.4208178520202637, + -0.35457390546798706, + -1.1202505826950073, + -0.1993352770805359, + -0.5961745977401733, + -0.5651789903640747, + -0.6554792523384094, + -0.6110849380493164, + -0.6645213961601257 + ], + [ + -0.6263826489448547, + 0.448794960975647, + -0.21956975758075714, + 0.9563042521476746, + -1.3115061521530151, + -0.14423759281635284, + -0.2814503014087677, + 0.2620517313480377, + -0.3538280725479126, + -0.7011265754699707, + -0.0537618026137352, + -0.32151874899864197, + -0.08661741763353348, + -0.3996248245239258, + -0.19852717220783234, + -0.11282169818878174, + -0.1736166626214981, + 0.12522171437740326, + -0.5370998978614807, + -0.29357272386550903, + -0.32698947191238403, + -0.662463366985321, + -1.0588432550430298, + 0.6756607890129089, + -0.3203624486923218, + -0.5049635767936707, + -0.5340639352798462, + -1.0516678094863892, + -1.5358338356018066, + -0.7544243931770325, + -0.4641764163970947, + -1.2537416219711304, + -0.4743019938468933, + -0.49880701303482056, + -0.5088231563568115, + -0.7872334718704224, + -0.34279975295066833, + -0.9644922018051147, + -0.31442761421203613, + -0.3138767182826996 + ], + [ + -0.5070892572402954, + -0.5308007597923279, + -0.22313173115253448, + -0.3716026246547699, + -0.2520104944705963, + -0.9058025479316711, + -0.27356281876564026, + -0.41171324253082275, + -0.6004811525344849, + -0.2200334072113037, + -0.4952961504459381, + -0.7090423107147217, + -0.11303672939538956, + -0.39241334795951843, + -0.30231383442878723, + -0.35453617572784424, + -0.5606191754341125, + -0.37688302993774414, + -0.603290319442749, + -0.5254389643669128, + -1.0286805629730225, + -0.32538920640945435, + -0.5946657061576843, + -0.19192741811275482, + 0.4871574342250824, + -1.022027611732483, + -0.5660563707351685, + -0.7591540217399597, + -0.2255805879831314, + -0.6454559564590454, + -1.1442869901657104, + -0.15754275023937225, + -0.6715388894081116, + -0.7608570456504822, + -0.6398380398750305, + -0.821348249912262, + -1.029642939567566, + -0.8389519453048706, + -1.1561096906661987, + -1.1223244667053223 + ], + [ + 0.10197855532169342, + -0.16327281296253204, + -0.2579091787338257, + -0.5245249271392822, + -0.05406729131937027, + -0.6982443332672119, + -1.4404972791671753, + -0.6188504099845886, + -0.6523463129997253, + -0.393809050321579, + -0.5271112322807312, + -0.6743000149726868, + -0.2006983906030655, + -0.3996371626853943, + -0.14575497806072235, + 0.16130957007408142, + 0.4960499405860901, + 0.05017544701695442, + 0.08038009703159332, + -0.46632927656173706, + -0.2510089576244354, + -0.37739527225494385, + -0.5296316742897034, + -0.6195923089981079, + -0.9437140226364136, + 0.6330336332321167, + -0.7182974815368652, + -1.265605092048645, + -0.9404584765434265, + -0.3079855442047119, + -0.6627950668334961, + -0.3792281150817871, + -0.5756688117980957, + -1.120900273323059, + -0.7244983911514282, + -1.1535292863845825, + -0.6660135984420776, + -0.5793812274932861, + -1.1335818767547607, + -1.1479418277740479 + ], + [ + 0.38318324089050293, + -0.2949684262275696, + -0.5106570720672607, + 0.22697028517723083, + -0.06791551411151886, + -0.21506793797016144, + 0.14140965044498444, + -1.0935304164886475, + 0.27524086833000183, + -0.33363181352615356, + -0.45296284556388855, + 0.12732155621051788, + -0.2221115231513977, + -0.011475641280412674, + 0.021220413967967033, + -0.05930551886558533, + 0.3832757771015167, + -0.22517706453800201, + 0.5619155168533325, + -0.4148225784301758, + -0.5350783467292786, + -0.7181210517883301, + -0.4170970320701599, + -0.5437574982643127, + -0.6320541501045227, + -0.9587052464485168, + 1.0398529767990112, + -0.9753633737564087, + -0.5263147354125977, + -0.44543933868408203, + -0.5084238648414612, + -0.24288222193717957, + -0.6249004006385803, + -1.2945687770843506, + -0.954213559627533, + -1.2774262428283691, + -1.3194218873977661, + -0.9062341451644897, + -0.7147584557533264, + 0.06210535392165184 + ], + [ + -0.5051313042640686, + -0.5246748924255371, + -0.2822871208190918, + -0.7797554135322571, + -0.811289370059967, + -0.0855674147605896, + -0.5346070528030396, + -0.8744418621063232, + 0.29167866706848145, + -0.33962762355804443, + -0.22174429893493652, + -0.22944435477256775, + -0.30147725343704224, + -0.4168172776699066, + -0.5083267688751221, + 0.04293220490217209, + 0.23917457461357117, + 0.05598636344075203, + 0.8834315538406372, + -0.17927446961402893, + -0.242061048746109, + -0.5209828019142151, + -0.785039484500885, + -0.8804749846458435, + -0.6486948728561401, + -1.015662431716919, + -0.9519917368888855, + 0.5595604181289673, + -0.5415106415748596, + -0.402771532535553, + -0.6580290794372559, + -0.34239545464515686, + -0.7175209522247314, + -1.195926308631897, + -0.7918554544448853, + -1.1720532178878784, + -0.9991738796234131, + -1.2991547584533691, + -0.4898427128791809, + -0.11035766452550888 + ], + [ + -0.11819321662187576, + -0.2270825058221817, + -0.4102851152420044, + 0.5404738187789917, + -0.8372912406921387, + 0.02756374515593052, + 0.10799206793308258, + -0.1922263652086258, + -0.27897730469703674, + -0.6563141345977783, + -0.4382755160331726, + -0.2026623636484146, + -0.08526606857776642, + -0.10464176535606384, + -0.14120721817016602, + -0.106257863342762, + -0.4128992557525635, + -0.16693070530891418, + -0.29915890097618103, + -0.08575282990932465, + -0.27699726819992065, + -0.7068314552307129, + -1.1883654594421387, + -1.4734282493591309, + -0.30308404564857483, + -1.074573040008545, + -0.42632225155830383, + -0.6243751645088196, + 0.778475821018219, + -0.7221218347549438, + -0.36355239152908325, + -0.22360914945602417, + -0.9331650137901306, + -0.45516207814216614, + -0.6594123244285583, + -0.8509856462478638, + -0.5235475301742554, + -0.525314211845398, + -0.5332517027854919, + -0.48189350962638855 + ], + [ + -0.13280554115772247, + -0.4661535322666168, + -0.20665624737739563, + 0.05569493770599365, + 0.5110679864883423, + -0.25412365794181824, + 0.1816188395023346, + 0.24341408908367157, + 0.1728239804506302, + -0.5351969003677368, + -1.2690777778625488, + 0.26813873648643494, + -0.33636653423309326, + -0.6350238919258118, + -0.2724403738975525, + -0.11094849556684494, + 0.3842201232910156, + -0.29999351501464844, + -0.19094416499137878, + -0.062415074557065964, + -0.4385630786418915, + -1.0005778074264526, + -0.7862153053283691, + -0.8003290295600891, + -0.5067071914672852, + -0.46321651339530945, + -0.7780666947364807, + -0.8882781267166138, + -0.9520387649536133, + 0.6393846273422241, + -0.7552918791770935, + -0.46548253297805786, + -0.784381628036499, + -0.5951924324035645, + -0.5723059773445129, + -0.6876113414764404, + -0.7346171736717224, + -0.4135490655899048, + -1.1644666194915771, + -0.2836045026779175 + ], + [ + -0.1274297535419464, + 0.008997329510748386, + -0.5222504734992981, + -0.36204278469085693, + -0.6607531309127808, + -0.5474413633346558, + -0.3231222331523895, + -0.32273128628730774, + -0.6724898815155029, + -0.18730103969573975, + -0.3565734028816223, + -1.4610234498977661, + -0.35356900095939636, + -0.5140745043754578, + -0.4845224618911743, + -0.27587926387786865, + -0.6584151387214661, + -0.5026947855949402, + -0.3978222906589508, + -0.39153364300727844, + -0.6897847652435303, + -0.9011215567588806, + -0.9116640686988831, + -0.4401787221431732, + -1.1851575374603271, + -0.5727109909057617, + -0.366688996553421, + -0.8015463948249817, + -0.3768409490585327, + -0.8048186302185059, + 0.6427338719367981, + -0.6137231588363647, + -0.6721800565719604, + -0.9919865727424622, + -0.28422579169273376, + -0.5548990368843079, + -1.1084535121917725, + -0.7177608609199524, + -0.8815315961837769, + -1.150091290473938 + ], + [ + -0.6270958781242371, + -0.31494495272636414, + -0.3429671823978424, + 0.4348585903644562, + -0.8601574301719666, + -0.09037350118160248, + -0.18524733185768127, + -0.17186665534973145, + -0.2354670763015747, + -0.13700519502162933, + -0.16439518332481384, + 0.20578794181346893, + -0.3018346428871155, + -0.059829019010066986, + -0.06508678197860718, + -0.12817634642124176, + -0.1845833957195282, + -0.2442999929189682, + -0.500748336315155, + 0.05316472798585892, + -0.45354384183883667, + -0.5063074231147766, + -1.2598968744277954, + -0.9861728549003601, + -0.129134863615036, + -0.2026510089635849, + -0.25762102007865906, + -0.46471264958381653, + -0.24046321213245392, + -0.4158719778060913, + -0.5865145325660706, + 0.47584444284439087, + -0.3503740131855011, + -0.5837350487709045, + -0.15452851355075836, + -0.6362277269363403, + -0.4688337445259094, + -1.2961217164993286, + -0.24461756646633148, + -0.41765713691711426 + ], + [ + -0.4329317510128021, + -0.23693867027759552, + -0.4559042453765869, + -0.11878562718629837, + -0.36096733808517456, + -0.27717387676239014, + -0.1884436160326004, + -0.6868228316307068, + -0.7924507856369019, + -0.24318307638168335, + -0.8223795294761658, + 0.2096933126449585, + -0.25050583481788635, + -1.1807514429092407, + 0.11585768312215805, + -0.23341068625450134, + 0.004753140266984701, + 0.27175387740135193, + -0.766198992729187, + -0.47283053398132324, + -0.7449429631233215, + -0.46376675367355347, + -0.4070962369441986, + -0.7076084613800049, + -0.5476935505867004, + -0.7695042490959167, + -0.5998086929321289, + -0.8001632690429688, + -1.0328688621520996, + -0.7878431081771851, + -0.800920307636261, + -0.3687223196029663, + 0.6117673516273499, + -1.2697592973709106, + -0.8840562701225281, + -1.0815774202346802, + -0.8366291522979736, + -0.932192862033844, + -0.742341935634613, + -0.9671081900596619 + ], + [ + -0.22664734721183777, + -0.3784821629524231, + -0.9590899348258972, + -0.2505393624305725, + -0.6114493608474731, + -0.1330420821905136, + -0.4305511713027954, + -1.0179345607757568, + -0.8169320225715637, + -0.10289542376995087, + -0.202921062707901, + 0.21325542032718658, + -0.24924060702323914, + -0.476957768201828, + -0.8324711322784424, + -0.7506987452507019, + -1.1040763854980469, + -0.6716598868370056, + 0.2165674865245819, + -0.2538530230522156, + -0.7705047130584717, + -1.2258223295211792, + -1.1192984580993652, + -0.9047117233276367, + -0.7112280130386353, + -1.0551313161849976, + -1.2667772769927979, + -1.1795698404312134, + -0.47473034262657166, + -0.6294308304786682, + -0.8662847280502319, + -0.6126305460929871, + -1.2969838380813599, + 0.7683587074279785, + -1.0477334260940552, + -1.6561365127563477, + -1.3956817388534546, + -1.5456737279891968, + -0.6598970890045166, + -1.0187383890151978 + ], + [ + -0.3714697062969208, + -0.7023609280586243, + -0.27911949157714844, + -0.04535667225718498, + 0.15580002963542938, + -0.33758416771888733, + 0.18779830634593964, + 0.32232561707496643, + -0.15236234664916992, + -0.658189594745636, + -0.5211215615272522, + -0.2893148958683014, + -0.09006551653146744, + -0.3823089003562927, + -0.43952515721321106, + -1.0295833349227905, + -0.13881614804267883, + 0.12338744848966599, + 0.05876949429512024, + -0.5440958738327026, + -0.41416996717453003, + -0.5256949067115784, + -0.2472485899925232, + -0.3820854723453522, + -0.7918862700462341, + -0.8735074996948242, + -1.264232873916626, + -0.9945945739746094, + -0.8599066138267517, + -0.4563657343387604, + -0.4086567461490631, + -0.1191699281334877, + -0.9032454490661621, + -0.9968965649604797, + 0.8297999501228333, + -1.2104164361953735, + -0.7442485690116882, + -0.7819410562515259, + -1.2283908128738403, + -0.7861208915710449 + ], + [ + -0.11765145510435104, + 0.4552221894264221, + -0.5095613598823547, + -0.45911768078804016, + -0.5852908492088318, + -0.3833903968334198, + 0.39020830392837524, + -1.1180121898651123, + -0.2874312996864319, + -0.40151381492614746, + -0.3155965209007263, + -0.45782941579818726, + -0.3872508704662323, + -0.8338177800178528, + -1.0188603401184082, + 0.2401266247034073, + -1.6530715227127075, + -0.05106320232152939, + 0.572688102722168, + 0.43971768021583557, + -0.6952989101409912, + -0.7196653485298157, + -0.517774760723114, + -0.794509768486023, + -0.7213160991668701, + -0.9622771739959717, + -1.306052803993225, + -1.1636146306991577, + -0.9856113791465759, + -0.645675003528595, + -0.6629850268363953, + -0.768916666507721, + -1.0084186792373657, + -1.6613470315933228, + -1.4335733652114868, + 0.8143667578697205, + -0.9774119257926941, + -1.1026009321212769, + -1.071450114250183, + -1.256163477897644 + ], + [ + 0.17288579046726227, + -0.22893399000167847, + -0.496307909488678, + -0.4579932689666748, + -0.5722309947013855, + -0.3914315402507782, + 0.06241348013281822, + -0.8456422090530396, + -1.1123853921890259, + -0.2737908363342285, + -0.6109524965286255, + -0.7070383429527283, + -0.1774359941482544, + -0.2795770764350891, + -0.7700290679931641, + -0.4517732262611389, + -0.30187514424324036, + -1.272359013557434, + -0.16791802644729614, + -0.6884859800338745, + -0.5162859559059143, + -0.4469009339809418, + -0.4602634608745575, + -0.27824485301971436, + -0.9371410608291626, + -0.8376596570014954, + -1.6007215976715088, + -1.243834376335144, + -0.5253052711486816, + -0.526663601398468, + -1.0813370943069458, + -0.3387793302536011, + -0.8314875364303589, + -1.4982787370681763, + -0.6979645490646362, + -1.0610499382019043, + 0.8076802492141724, + -0.6167906522750854, + -0.8743483424186707, + -0.651902437210083 + ], + [ + -0.5685088634490967, + -0.23347128927707672, + -0.29525426030158997, + -0.3052290380001068, + 1.0145035982131958, + -0.18385833501815796, + -0.39535456895828247, + -0.3827759325504303, + -0.527492105960846, + -0.6158386468887329, + -0.45736223459243774, + -0.44764307141304016, + -0.7756786942481995, + -0.6740925312042236, + -0.7005599141120911, + -0.11771242320537567, + -0.30154865980148315, + -0.03409808874130249, + -1.4305565357208252, + -0.204928457736969, + -0.5014259219169617, + -0.4739360213279724, + -0.7382394075393677, + -1.1949821710586548, + -0.671454668045044, + -0.5525047779083252, + -0.3644644320011139, + -1.163359522819519, + -0.7601977586746216, + -0.47897303104400635, + -0.6577714681625366, + -1.262861967086792, + -0.6968811750411987, + -1.350586175918579, + -0.6986533403396606, + -1.0405231714248657, + -0.5288347005844116, + 0.7863135933876038, + -0.5588980913162231, + -1.1899672746658325 + ], + [ + -0.09736085683107376, + -0.6251624822616577, + 0.031343284994363785, + -0.2904556095600128, + 0.07459717243909836, + -0.5493168234825134, + 0.21497398614883423, + -0.2757851183414459, + -0.3496554493904114, + -0.37604203820228577, + -0.6251357793807983, + -0.4212270677089691, + -0.04651430994272232, + -0.36558079719543457, + -0.17215359210968018, + -0.5602476596832275, + -0.27946144342422485, + -0.6749061942100525, + -0.319109171628952, + -1.0248873233795166, + -0.39127472043037415, + -0.6079170107841492, + -0.6078996062278748, + -0.5447589755058289, + -1.0102092027664185, + -1.0337401628494263, + -0.8539299368858337, + -0.6640287041664124, + -0.7284238934516907, + -0.9365366697311401, + -1.0108264684677124, + -0.09477058053016663, + -0.5871573686599731, + -0.6075239181518555, + -1.1083470582962036, + -0.9873295426368713, + -0.8282650113105774, + -0.7653031349182129, + 0.6622989773750305, + -1.2304456233978271 + ], + [ + -0.13130564987659454, + 1.1630288362503052, + 0.07262177765369415, + 0.7802839875221252, + 0.6930831074714661, + 0.6624056696891785, + 0.8480949997901917, + 1.1006073951721191, + 0.3133780360221863, + 0.8186824321746826, + 0.86527019739151, + 0.4291747212409973, + 0.25939708948135376, + 1.083701252937317, + 0.6104759573936462, + 0.5139119625091553, + 0.6247552037239075, + 0.4408226013183594, + 0.3566909432411194, + 0.5008954405784607, + -1.0770149230957031, + -1.128820538520813, + -1.1541414260864258, + -1.1989822387695312, + -1.2094358205795288, + -1.2801498174667358, + -1.3953444957733154, + -1.4601926803588867, + -1.256149411201477, + -1.2954893112182617, + -1.2066659927368164, + -1.2240227460861206, + -1.2374651432037354, + -1.6764224767684937, + -1.2014753818511963, + -1.3278864622116089, + -1.2122613191604614, + -1.2466320991516113, + -1.3345719575881958, + 1.103376030921936 + ] + ], + "startTransitions": [ + -0.5383257269859314, + -0.7169902324676514, + -0.5406637191772461, + -0.04139348119497299, + -0.4239015579223633, + -0.7584452033042908, + -0.011937097646296024, + -0.9248396158218384, + 0.8743446469306946, + -0.801080584526062, + -0.5285987854003906, + -0.913509726524353, + -0.8525186777114868, + -1.1192541122436523, + 0.3343737721443176, + 0.4641059339046478, + 0.7782814502716064, + -0.2167549431324005, + 1.004253625869751, + -0.6936427354812622, + -0.22565436363220215, + -0.42796000838279724, + -0.5926423072814941, + -0.6464582681655884, + -0.8255985975265503, + -0.9062461256980896, + -0.9446082711219788, + -1.7861886024475098, + -0.7176482081413269, + -0.46548235416412354, + -0.7510925531387329, + -0.9913697838783264, + -0.7502139806747437, + -1.394468903541565, + -0.7849307060241699, + -1.3436453342437744, + -1.0151522159576416, + -1.386102318763733, + -0.7071612477302551, + 0.29477259516716003 + ], + "endTransitions": [ + 0.5332940220832825, + -0.6539165377616882, + -0.42947837710380554, + -0.4048272669315338, + -0.6972808241844177, + -0.4650331437587738, + -0.695010244846344, + -0.6986792087554932, + -0.5632227063179016, + -0.5043367147445679, + -0.3305620849132538, + -0.47241678833961487, + -0.5526825785636902, + -0.5190877318382263, + -0.35417014360427856, + -0.6192663311958313, + -0.5646839141845703, + -0.39550334215164185, + -0.5576760172843933, + -0.5534887313842773, + -0.14909960329532623, + -0.17214544117450714, + -0.4196673333644867, + -0.458352655172348, + -0.598503053188324, + -0.04246647283434868, + -0.006736681796610355, + -0.06980064511299133, + 0.1432356983423233, + -0.07996407151222229, + -0.30611342191696167, + -0.5918934941291809, + -0.07721111923456192, + -0.21090175211429596, + -0.06443853676319122, + 0.0028925384394824505, + 0.06278187036514282, + -0.551756739616394, + 0.12060985714197159, + 0.3459245264530182 + ] +} \ No newline at end of file diff --git a/grobid-home/models/header-BidLSTM_CRF_FEATURES.onnx/encoder.onnx b/grobid-home/models/header-BidLSTM_CRF_FEATURES.onnx/encoder.onnx new file mode 100644 index 0000000000..8f805bb97b Binary files /dev/null and b/grobid-home/models/header-BidLSTM_CRF_FEATURES.onnx/encoder.onnx differ diff --git a/grobid-home/models/header-BidLSTM_CRF_FEATURES.onnx/vocab.json b/grobid-home/models/header-BidLSTM_CRF_FEATURES.onnx/vocab.json new file mode 100644 index 0000000000..27617ea621 --- /dev/null +++ b/grobid-home/models/header-BidLSTM_CRF_FEATURES.onnx/vocab.json @@ -0,0 +1,545 @@ +{ + "charVocab": { + "<PAD>": 0, + "<UNK>": 1, + "!": 2, + "\"": 3, + "#": 4, + "$": 5, + "%": 6, + "&": 7, + "'": 8, + "(": 9, + ")": 10, + "*": 11, + "+": 12, + ",": 13, + "-": 14, + ".": 15, + "/": 16, + "0": 17, + "1": 18, + "2": 19, + "3": 20, + "4": 21, + "5": 22, + "6": 23, + "7": 24, + "8": 25, + "9": 26, + ":": 27, + ";": 28, + "<": 29, + "=": 30, + ">": 31, + "?": 32, + "@": 33, + "A": 34, + "B": 35, + "C": 36, + "D": 37, + "E": 38, + "F": 39, + "G": 40, + "H": 41, + "I": 42, + "J": 43, + "K": 44, + "L": 45, + "M": 46, + "N": 47, + "O": 48, + "P": 49, + "Q": 50, + "R": 51, + "S": 52, + "T": 53, + "U": 54, + "V": 55, + "W": 56, + "X": 57, + "Y": 58, + "Z": 59, + "[": 60, + "\\": 61, + "]": 62, + "^": 63, + "_": 64, + "`": 65, + "a": 66, + "b": 67, + "c": 68, + "d": 69, + "e": 70, + "f": 71, + "g": 72, + "h": 73, + "i": 74, + "j": 75, + "k": 76, + "l": 77, + "m": 78, + "n": 79, + "o": 80, + "p": 81, + "q": 82, + "r": 83, + "s": 84, + "t": 85, + "u": 86, + "v": 87, + "w": 88, + "x": 89, + "y": 90, + "z": 91, + "{": 92, + "|": 93, + "}": 94, + "~": 95, + "¡": 96, + "¢": 97, + "£": 98, + "¤": 99, + "¥": 100, + "§": 101, + "¨": 102, + "©": 103, + "ª": 104, + "«": 105, + "®": 106, + "¯": 107, + "°": 108, + "±": 109, + "²": 110, + "´": 111, + "µ": 112, + "¶": 113, + "¸": 114, + "»": 115, + "¼": 116, + "À": 117, + "Á": 118, + "Â": 119, + "Ã": 120, + "Å": 121, + "É": 122, + "Ê": 123, + "Ì": 124, + "Î": 125, + "Ó": 126, + "Õ": 127, + "Ö": 128, + "×": 129, + "Ø": 130, + "ß": 131, + "à": 132, + "á": 133, + "â": 134, + "ã": 135, + "ä": 136, + "å": 137, + "ç": 138, + "è": 139, + "é": 140, + "ê": 141, + "ë": 142, + "í": 143, + "î": 144, + "ï": 145, + "ñ": 146, + "ó": 147, + "ô": 148, + "õ": 149, + "ö": 150, + "ø": 151, + "ú": 152, + "û": 153, + "ü": 154, + "þ": 155, + "ÿ": 156, + "Ā": 157, + "ă": 158, + "Ą": 159, + "ć": 160, + "Č": 161, + "č": 162, + "ė": 163, + "ę": 164, + "İ": 165, + "ı": 166, + "Ł": 167, + "ł": 168, + "ń": 169, + "Ō": 170, + "ř": 171, + "ş": 172, + "Š": 173, + "š": 174, + "Ź": 175, + "ź": 176, + "ż": 177, + "Ž": 178, + "ž": 179, + "Ȃ": 180, + "ʈ": 181, + "˙": 182, + "͑": 183, + "͒": 184, + "͓": 185, + "͔": 186, + "͗": 187, + "͘": 188, + "Γ": 189, + "Δ": 190, + "Θ": 191, + "Λ": 192, + "Π": 193, + "Υ": 194, + "Ψ": 195, + "α": 196, + "β": 197, + "γ": 198, + "δ": 199, + "ε": 200, + "ζ": 201, + "η": 202, + "θ": 203, + "κ": 204, + "λ": 205, + "μ": 206, + "ν": 207, + "ξ": 208, + "π": 209, + "ρ": 210, + "σ": 211, + "τ": 212, + "φ": 213, + "χ": 214, + "ψ": 215, + "ω": 216, + "ϕ": 217, + "ϩ": 218, + "Ϫ": 219, + "ϫ": 220, + "ϭ": 221, + "ϳ": 222, + "Ͻ": 223, + "Ͼ": 224, + "Ј": 225, + "Љ": 226, + "Б": 227, + "И": 228, + "Л": 229, + "П": 230, + "Ф": 231, + "Ц": 232, + "б": 233, + "в": 234, + "г": 235, + "д": 236, + "з": 237, + "и": 238, + "й": 239, + "к": 240, + "л": 241, + "м": 242, + "н": 243, + "п": 244, + "р": 245, + "с": 246, + "т": 247, + "у": 248, + "ф": 249, + "х": 250, + "ц": 251, + "ч": 252, + "ш": 253, + "щ": 254, + "ы": 255, + "ь": 256, + "э": 257, + "ю": 258, + "я": 259, + "ё": 260, + "Ն": 261, + "؊": 262, + "؍": 263, + "ٞ": 264, + "ܨ": 265, + "ࡆ": 266, + "௦": 267, + "௧": 268, + "ᰔ": 269, + "Ṇ": 270, + "†": 271, + "‡": 272, + "•": 273, + "‫": 274, + "‬": 275, + "′": 276, + "‹": 277, + "⁎": 278, + "ℓ": 279, + "™": 280, + "Ⅲ": 281, + "→": 282, + "↓": 283, + "↵": 284, + "⇑": 285, + "⇤": 286, + "∆": 287, + "∈": 288, + "−": 289, + "√": 290, + "∞": 291, + "∼": 292, + "≃": 293, + "≈": 294, + "≤": 295, + "≥": 296, + "⊙": 297, + "⋅": 298, + "⋆": 299, + "⌬": 300, + "␣": 301, + "␤": 302, + "ⓒ": 303, + "□": 304, + "☯": 305, + "♣": 306, + "♦": 307, + "✉": 308, + "✝": 309, + "⸸": 310, + "ㆍ": 311, + "": 312, + "": 313, + "": 314, + "": 315, + "": 316, + "": 317, + "": 318, + "": 319, + "": 320, + "": 321, + "": 322, + "": 323, + "": 324, + "": 325, + "": 326, + "": 327, + ",": 328, + ";": 329, + "�": 330 + }, + "tagVocab": { + "<PAD>": 0, + "B-<abstract>": 1, + "B-<address>": 2, + "B-<affiliation>": 3, + "B-<author>": 4, + "B-<availability>": 5, + "B-<copyright>": 6, + "B-<date>": 7, + "B-<doctype>": 8, + "B-<editor>": 9, + "B-<email>": 10, + "B-<funding>": 11, + "B-<group>": 12, + "B-<keyword>": 13, + "B-<meeting>": 14, + "B-<pubnum>": 15, + "B-<reference>": 16, + "B-<submission>": 17, + "B-<title>": 18, + "B-<web>": 19, + "I-<abstract>": 20, + "I-<address>": 21, + "I-<affiliation>": 22, + "I-<author>": 23, + "I-<availability>": 24, + "I-<copyright>": 25, + "I-<date>": 26, + "I-<doctype>": 27, + "I-<editor>": 28, + "I-<email>": 29, + "I-<funding>": 30, + "I-<group>": 31, + "I-<keyword>": 32, + "I-<meeting>": 33, + "I-<pubnum>": 34, + "I-<reference>": 35, + "I-<submission>": 36, + "I-<title>": 37, + "I-<web>": 38, + "O": 39 + }, + "tagIndex": { + "0": "<PAD>", + "1": "B-<abstract>", + "2": "B-<address>", + "3": "B-<affiliation>", + "4": "B-<author>", + "5": "B-<availability>", + "6": "B-<copyright>", + "7": "B-<date>", + "8": "B-<doctype>", + "9": "B-<editor>", + "10": "B-<email>", + "11": "B-<funding>", + "12": "B-<group>", + "13": "B-<keyword>", + "14": "B-<meeting>", + "15": "B-<pubnum>", + "16": "B-<reference>", + "17": "B-<submission>", + "18": "B-<title>", + "19": "B-<web>", + "20": "I-<abstract>", + "21": "I-<address>", + "22": "I-<affiliation>", + "23": "I-<author>", + "24": "I-<availability>", + "25": "I-<copyright>", + "26": "I-<date>", + "27": "I-<doctype>", + "28": "I-<editor>", + "29": "I-<email>", + "30": "I-<funding>", + "31": "I-<group>", + "32": "I-<keyword>", + "33": "I-<meeting>", + "34": "I-<pubnum>", + "35": "I-<reference>", + "36": "I-<submission>", + "37": "I-<title>", + "38": "I-<web>", + "39": "O" + }, + "maxCharLength": 30, + "returnChars": false, + "featuresIndices": [ + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30 + ], + "featuresVocabularySize": 12, + "featuresMapToIndex": { + "9": { + "BLOCKEND": 1, + "BLOCKIN": 2, + "BLOCKSTART": 3 + }, + "10": { + "LINEEND": 13, + "LINEIN": 14, + "LINESTART": 15 + }, + "11": { + "ALIGNEDLEFT": 25, + "LINEINDENT": 26 + }, + "12": { + "NEWFONT": 37, + "SAMEFONT": 38 + }, + "13": { + "HIGHERFONT": 49, + "LOWERFONT": 50, + "SAMEFONTSIZE": 51 + }, + "14": { + "0": 61, + "1": 62 + }, + "15": { + "0": 73, + "1": 74 + }, + "16": { + "ALLCAP": 85, + "INITCAP": 86, + "NOCAPS": 87 + }, + "17": { + "ALLDIGIT": 97, + "CONTAINSDIGITS": 98, + "NODIGIT": 99 + }, + "18": { + "0": 109, + "1": 110 + }, + "19": { + "0": 121, + "1": 122 + }, + "20": { + "0": 133, + "1": 134 + }, + "21": { + "0": 145, + "1": 146 + }, + "22": { + "0": 157, + "1": 158 + }, + "23": { + "0": 169, + "1": 170 + }, + "24": { + "0": 181, + "1": 182 + }, + "25": { + "0": 193, + "1": 194 + }, + "26": { + "COMMA": 205, + "DOT": 206, + "ENDBRACKET": 207, + "HYPHEN": 208, + "NOPUNCT": 209, + "OPENBRACKET": 210, + "PUNCT": 211, + "QUOTE": 212 + }, + "27": { + "0": 217, + "1": 218 + }, + "28": { + "0": 229 + }, + "29": { + "0": 241, + "1": 242 + }, + "30": { + "0": 253 + } + } +} \ No newline at end of file diff --git a/grobid-home/models/header-BidLSTM_CRF_FEATURES/config.json b/grobid-home/models/header-BidLSTM_CRF_FEATURES/config.json deleted file mode 100644 index 689da764ea..0000000000 --- a/grobid-home/models/header-BidLSTM_CRF_FEATURES/config.json +++ /dev/null @@ -1,148 +0,0 @@ -{ - "model_name": "header-BidLSTM_CRF_FEATURES", - "architecture": "BidLSTM_CRF_FEATURES", - "embeddings_name": "glove-840B", - "char_vocab_size": 336, - "case_vocab_size": 8, - "char_embedding_size": 25, - "num_char_lstm_units": 25, - "max_char_length": 30, - "features_vocabulary_size": 12, - "features_indices": [ - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20, - 21, - 22, - 23, - 24, - 25, - 26, - 27, - 28, - 29, - 30 - ], - "features_embedding_size": 4, - "features_lstm_units": 4, - "max_sequence_length": 3000, - "word_embedding_size": 300, - "num_word_lstm_units": 100, - "case_embedding_size": 5, - "dropout": 0.5, - "recurrent_dropout": 0.5, - "use_crf": true, - "use_chain_crf": false, - "fold_number": 1, - "batch_size": 9, - "transformer_name": null, - "use_ELMo": false, - "features_map_to_index": { - "9": { - "BLOCKEND": 1, - "BLOCKIN": 2, - "BLOCKSTART": 3 - }, - "10": { - "LINEEND": 13, - "LINEIN": 14, - "LINESTART": 15 - }, - "11": { - "ALIGNEDLEFT": 25, - "LINEINDENT": 26 - }, - "12": { - "NEWFONT": 37, - "SAMEFONT": 38 - }, - "13": { - "HIGHERFONT": 49, - "LOWERFONT": 50, - "SAMEFONTSIZE": 51 - }, - "14": { - "0": 61, - "1": 62 - }, - "15": { - "0": 73, - "1": 74 - }, - "16": { - "ALLCAP": 85, - "INITCAP": 86, - "NOCAPS": 87 - }, - "17": { - "ALLDIGIT": 97, - "CONTAINSDIGITS": 98, - "NODIGIT": 99 - }, - "18": { - "0": 109, - "1": 110 - }, - "19": { - "0": 121, - "1": 122 - }, - "20": { - "0": 133, - "1": 134 - }, - "21": { - "0": 145, - "1": 146 - }, - "22": { - "0": 157, - "1": 158 - }, - "23": { - "0": 169, - "1": 170 - }, - "24": { - "0": 181, - "1": 182 - }, - "25": { - "0": 193, - "1": 194 - }, - "26": { - "COMMA": 205, - "DOT": 206, - "ENDBRACKET": 207, - "HYPHEN": 208, - "NOPUNCT": 209, - "OPENBRACKET": 210, - "PUNCT": 211, - "QUOTE": 212 - }, - "27": { - "0": 217, - "1": 218 - }, - "28": { - "0": 229 - }, - "29": { - "0": 241, - "1": 242 - }, - "30": { - "0": 253 - } - } -} \ No newline at end of file diff --git a/grobid-home/models/header-BidLSTM_CRF_FEATURES/model_weights.hdf5 b/grobid-home/models/header-BidLSTM_CRF_FEATURES/model_weights.hdf5 deleted file mode 100644 index 28ca13aefc..0000000000 Binary files a/grobid-home/models/header-BidLSTM_CRF_FEATURES/model_weights.hdf5 and /dev/null differ diff --git a/grobid-home/models/header-BidLSTM_CRF_FEATURES/preprocessor.json b/grobid-home/models/header-BidLSTM_CRF_FEATURES/preprocessor.json deleted file mode 100644 index ceacc64819..0000000000 --- a/grobid-home/models/header-BidLSTM_CRF_FEATURES/preprocessor.json +++ /dev/null @@ -1,568 +0,0 @@ -{ - "padding": true, - "return_lengths": true, - "return_word_embeddings": true, - "return_casing": false, - "return_features": true, - "return_chars": true, - "return_bert_embeddings": false, - "vocab_char": { - "<PAD>": 0, - "<UNK>": 1, - "!": 2, - "\"": 3, - "#": 4, - "$": 5, - "%": 6, - "&": 7, - "'": 8, - "(": 9, - ")": 10, - "*": 11, - "+": 12, - ",": 13, - "-": 14, - ".": 15, - "/": 16, - "0": 17, - "1": 18, - "2": 19, - "3": 20, - "4": 21, - "5": 22, - "6": 23, - "7": 24, - "8": 25, - "9": 26, - ":": 27, - ";": 28, - "<": 29, - "=": 30, - ">": 31, - "?": 32, - "@": 33, - "A": 34, - "B": 35, - "C": 36, - "D": 37, - "E": 38, - "F": 39, - "G": 40, - "H": 41, - "I": 42, - "J": 43, - "K": 44, - "L": 45, - "M": 46, - "N": 47, - "O": 48, - "P": 49, - "Q": 50, - "R": 51, - "S": 52, - "T": 53, - "U": 54, - "V": 55, - "W": 56, - "X": 57, - "Y": 58, - "Z": 59, - "[": 60, - "\\": 61, - "]": 62, - "^": 63, - "_": 64, - "`": 65, - "a": 66, - "b": 67, - "c": 68, - "d": 69, - "e": 70, - "f": 71, - "g": 72, - "h": 73, - "i": 74, - "j": 75, - "k": 76, - "l": 77, - "m": 78, - "n": 79, - "o": 80, - "p": 81, - "q": 82, - "r": 83, - "s": 84, - "t": 85, - "u": 86, - "v": 87, - "w": 88, - "x": 89, - "y": 90, - "z": 91, - "{": 92, - "|": 93, - "}": 94, - "~": 95, - "\u00a1": 96, - "\u00a2": 97, - "\u00a3": 98, - "\u00a4": 99, - "\u00a5": 100, - "\u00a7": 101, - "\u00a8": 102, - "\u00a9": 103, - "\u00aa": 104, - "\u00ab": 105, - "\u00ae": 106, - "\u00af": 107, - "\u00b0": 108, - "\u00b1": 109, - "\u00b2": 110, - "\u00b4": 111, - "\u00b5": 112, - "\u00b6": 113, - "\u00b8": 114, - "\u00bb": 115, - "\u00bc": 116, - "\u00c0": 117, - "\u00c1": 118, - "\u00c2": 119, - "\u00c3": 120, - "\u00c5": 121, - "\u00c9": 122, - "\u00ca": 123, - "\u00cc": 124, - "\u00ce": 125, - "\u00d3": 126, - "\u00d5": 127, - "\u00d6": 128, - "\u00d7": 129, - "\u00d8": 130, - "\u00df": 131, - "\u00e0": 132, - "\u00e1": 133, - "\u00e2": 134, - "\u00e3": 135, - "\u00e4": 136, - "\u00e5": 137, - "\u00e7": 138, - "\u00e8": 139, - "\u00e9": 140, - "\u00ea": 141, - "\u00eb": 142, - "\u00ed": 143, - "\u00ee": 144, - "\u00ef": 145, - "\u00f1": 146, - "\u00f2": 147, - "\u00f3": 148, - "\u00f4": 149, - "\u00f5": 150, - "\u00f6": 151, - "\u00f8": 152, - "\u00fa": 153, - "\u00fb": 154, - "\u00fc": 155, - "\u00fd": 156, - "\u00fe": 157, - "\u00ff": 158, - "\u0100": 159, - "\u0103": 160, - "\u0104": 161, - "\u0107": 162, - "\u010c": 163, - "\u010d": 164, - "\u0117": 165, - "\u0119": 166, - "\u0130": 167, - "\u0131": 168, - "\u0141": 169, - "\u0142": 170, - "\u0144": 171, - "\u014c": 172, - "\u0159": 173, - "\u015f": 174, - "\u0160": 175, - "\u0161": 176, - "\u0179": 177, - "\u017a": 178, - "\u017c": 179, - "\u017d": 180, - "\u017e": 181, - "\u0202": 182, - "\u0288": 183, - "\u02d9": 184, - "\u0351": 185, - "\u0352": 186, - "\u0353": 187, - "\u0354": 188, - "\u0357": 189, - "\u0358": 190, - "\u0393": 191, - "\u0394": 192, - "\u0398": 193, - "\u039b": 194, - "\u039e": 195, - "\u03a0": 196, - "\u03a3": 197, - "\u03a5": 198, - "\u03a8": 199, - "\u03b1": 200, - "\u03b2": 201, - "\u03b3": 202, - "\u03b4": 203, - "\u03b5": 204, - "\u03b6": 205, - "\u03b7": 206, - "\u03b8": 207, - "\u03ba": 208, - "\u03bb": 209, - "\u03bc": 210, - "\u03bd": 211, - "\u03be": 212, - "\u03c0": 213, - "\u03c1": 214, - "\u03c3": 215, - "\u03c4": 216, - "\u03c6": 217, - "\u03c7": 218, - "\u03c8": 219, - "\u03c9": 220, - "\u03d5": 221, - "\u03e9": 222, - "\u03ea": 223, - "\u03eb": 224, - "\u03ed": 225, - "\u03f3": 226, - "\u03fd": 227, - "\u03fe": 228, - "\u0408": 229, - "\u0409": 230, - "\u0411": 231, - "\u0418": 232, - "\u041b": 233, - "\u041f": 234, - "\u0424": 235, - "\u0426": 236, - "\u0431": 237, - "\u0432": 238, - "\u0433": 239, - "\u0434": 240, - "\u0437": 241, - "\u0438": 242, - "\u0439": 243, - "\u043a": 244, - "\u043b": 245, - "\u043c": 246, - "\u043d": 247, - "\u043f": 248, - "\u0440": 249, - "\u0441": 250, - "\u0442": 251, - "\u0443": 252, - "\u0444": 253, - "\u0445": 254, - "\u0446": 255, - "\u0447": 256, - "\u0448": 257, - "\u0449": 258, - "\u044b": 259, - "\u044c": 260, - "\u044d": 261, - "\u044e": 262, - "\u044f": 263, - "\u0451": 264, - "\u0546": 265, - "\u060a": 266, - "\u060d": 267, - "\u065e": 268, - "\u0728": 269, - "\u0846": 270, - "\u0be6": 271, - "\u0be7": 272, - "\u1c14": 273, - "\u1e46": 274, - "\u2020": 275, - "\u2021": 276, - "\u2022": 277, - "\u202b": 278, - "\u202c": 279, - "\u2032": 280, - "\u2039": 281, - "\u204e": 282, - "\u2113": 283, - "\u2122": 284, - "\u2162": 285, - "\u2192": 286, - "\u2193": 287, - "\u21b5": 288, - "\u21d1": 289, - "\u21e4": 290, - "\u2206": 291, - "\u2208": 292, - "\u2212": 293, - "\u2213": 294, - "\u221a": 295, - "\u221e": 296, - "\u223c": 297, - "\u2243": 298, - "\u2248": 299, - "\u2264": 300, - "\u2265": 301, - "\u2299": 302, - "\u22c5": 303, - "\u22c6": 304, - "\u232c": 305, - "\u2423": 306, - "\u2424": 307, - "\u24d2": 308, - "\u25a1": 309, - "\u262f": 310, - "\u2663": 311, - "\u2666": 312, - "\u2709": 313, - "\u271d": 314, - "\u2e38": 315, - "\u318d": 316, - "\uf761": 317, - "\uf764": 318, - "\uf765": 319, - "\uf767": 320, - "\uf769": 321, - "\uf76b": 322, - "\uf76c": 323, - "\uf76e": 324, - "\uf76f": 325, - "\uf770": 326, - "\uf772": 327, - "\uf773": 328, - "\uf774": 329, - "\uf777": 330, - "\uf779": 331, - "\uf8e9": 332, - "\uff0c": 333, - "\uff1b": 334, - "\ufffd": 335 - }, - "vocab_tag": { - "<PAD>": 0, - "B-<abstract>": 1, - "B-<address>": 2, - "B-<affiliation>": 3, - "B-<author>": 4, - "B-<availability>": 5, - "B-<copyright>": 6, - "B-<date>": 7, - "B-<doctype>": 8, - "B-<editor>": 9, - "B-<email>": 10, - "B-<funding>": 11, - "B-<group>": 12, - "B-<keyword>": 13, - "B-<meeting>": 14, - "B-<pubnum>": 15, - "B-<reference>": 16, - "B-<submission>": 17, - "B-<title>": 18, - "B-<web>": 19, - "I-<abstract>": 20, - "I-<address>": 21, - "I-<affiliation>": 22, - "I-<author>": 23, - "I-<availability>": 24, - "I-<copyright>": 25, - "I-<date>": 26, - "I-<doctype>": 27, - "I-<editor>": 28, - "I-<email>": 29, - "I-<funding>": 30, - "I-<group>": 31, - "I-<keyword>": 32, - "I-<meeting>": 33, - "I-<pubnum>": 34, - "I-<reference>": 35, - "I-<submission>": 36, - "I-<title>": 37, - "I-<web>": 38, - "O": 39 - }, - "vocab_case": [ - "<PAD>", - "numeric", - "allLower", - "allUpper", - "initialUpper", - "other", - "mainly_numeric", - "contains_digit" - ], - "max_char_length": 30, - "feature_preprocessor": { - "features_vocabulary_size": 12, - "features_indices": [ - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20, - 21, - 22, - 23, - 24, - 25, - 26, - 27, - 28, - 29, - 30 - ], - "features_map_to_index": { - "9": { - "BLOCKEND": 1, - "BLOCKIN": 2, - "BLOCKSTART": 3 - }, - "10": { - "LINEEND": 13, - "LINEIN": 14, - "LINESTART": 15 - }, - "11": { - "ALIGNEDLEFT": 25, - "LINEINDENT": 26 - }, - "12": { - "NEWFONT": 37, - "SAMEFONT": 38 - }, - "13": { - "HIGHERFONT": 49, - "LOWERFONT": 50, - "SAMEFONTSIZE": 51 - }, - "14": { - "0": 61, - "1": 62 - }, - "15": { - "0": 73, - "1": 74 - }, - "16": { - "ALLCAP": 85, - "INITCAP": 86, - "NOCAPS": 87 - }, - "17": { - "ALLDIGIT": 97, - "CONTAINSDIGITS": 98, - "NODIGIT": 99 - }, - "18": { - "0": 109, - "1": 110 - }, - "19": { - "0": 121, - "1": 122 - }, - "20": { - "0": 133, - "1": 134 - }, - "21": { - "0": 145, - "1": 146 - }, - "22": { - "0": 157, - "1": 158 - }, - "23": { - "0": 169, - "1": 170 - }, - "24": { - "0": 181, - "1": 182 - }, - "25": { - "0": 193, - "1": 194 - }, - "26": { - "COMMA": 205, - "DOT": 206, - "ENDBRACKET": 207, - "HYPHEN": 208, - "NOPUNCT": 209, - "OPENBRACKET": 210, - "PUNCT": 211, - "QUOTE": 212 - }, - "27": { - "0": 217, - "1": 218 - }, - "28": { - "0": 229 - }, - "29": { - "0": 241, - "1": 242 - }, - "30": { - "0": 253 - } - } - }, - "indice_tag": { - "0": "<PAD>", - "1": "B-<abstract>", - "2": "B-<address>", - "3": "B-<affiliation>", - "4": "B-<author>", - "5": "B-<availability>", - "6": "B-<copyright>", - "7": "B-<date>", - "8": "B-<doctype>", - "9": "B-<editor>", - "10": "B-<email>", - "11": "B-<funding>", - "12": "B-<group>", - "13": "B-<keyword>", - "14": "B-<meeting>", - "15": "B-<pubnum>", - "16": "B-<reference>", - "17": "B-<submission>", - "18": "B-<title>", - "19": "B-<web>", - "20": "I-<abstract>", - "21": "I-<address>", - "22": "I-<affiliation>", - "23": "I-<author>", - "24": "I-<availability>", - "25": "I-<copyright>", - "26": "I-<date>", - "27": "I-<doctype>", - "28": "I-<editor>", - "29": "I-<email>", - "30": "I-<funding>", - "31": "I-<group>", - "32": "I-<keyword>", - "33": "I-<meeting>", - "34": "I-<pubnum>", - "35": "I-<reference>", - "36": "I-<submission>", - "37": "I-<title>", - "38": "I-<web>", - "39": "O" - } -} \ No newline at end of file diff --git a/grobid-home/models/header-BidLSTM_ChainCRF-with_ELMo/config.json b/grobid-home/models/header-BidLSTM_ChainCRF-with_ELMo/config.json deleted file mode 100644 index f1b6f1819a..0000000000 --- a/grobid-home/models/header-BidLSTM_ChainCRF-with_ELMo/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "model_name": "header-BidLSTM_ChainCRF-with_ELMo", - "architecture": "BidLSTM_ChainCRF", - "embeddings_name": "glove-840B", - "char_vocab_size": 318, - "case_vocab_size": 8, - "char_embedding_size": 25, - "num_char_lstm_units": 25, - "max_char_length": 30, - "features_vocabulary_size": 12, - "features_indices": null, - "features_embedding_size": 4, - "features_lstm_units": 4, - "max_sequence_length": 1500, - "word_embedding_size": 1324, - "num_word_lstm_units": 100, - "case_embedding_size": 5, - "dropout": 0.5, - "recurrent_dropout": 0.5, - "use_crf": true, - "use_chain_crf": true, - "fold_number": 1, - "batch_size": 10, - "transformer_name": null, - "use_ELMo": true -} \ No newline at end of file diff --git a/grobid-home/models/header-BidLSTM_ChainCRF-with_ELMo/model_weights.hdf5 b/grobid-home/models/header-BidLSTM_ChainCRF-with_ELMo/model_weights.hdf5 deleted file mode 100644 index 9ad6c4556a..0000000000 Binary files a/grobid-home/models/header-BidLSTM_ChainCRF-with_ELMo/model_weights.hdf5 and /dev/null differ diff --git a/grobid-home/models/header-BidLSTM_ChainCRF-with_ELMo/preprocessor.json b/grobid-home/models/header-BidLSTM_ChainCRF-with_ELMo/preprocessor.json deleted file mode 100644 index 9e274ca2f6..0000000000 --- a/grobid-home/models/header-BidLSTM_ChainCRF-with_ELMo/preprocessor.json +++ /dev/null @@ -1,421 +0,0 @@ -{ - "padding": true, - "return_lengths": true, - "return_word_embeddings": true, - "return_casing": false, - "return_features": false, - "return_chars": true, - "return_bert_embeddings": false, - "vocab_char": { - "<PAD>": 0, - "<UNK>": 1, - "!": 2, - "\"": 3, - "#": 4, - "$": 5, - "%": 6, - "&": 7, - "'": 8, - "(": 9, - ")": 10, - "*": 11, - "+": 12, - ",": 13, - "-": 14, - ".": 15, - "/": 16, - "0": 17, - "1": 18, - "2": 19, - "3": 20, - "4": 21, - "5": 22, - "6": 23, - "7": 24, - "8": 25, - "9": 26, - ":": 27, - ";": 28, - "<": 29, - "=": 30, - ">": 31, - "?": 32, - "@": 33, - "A": 34, - "B": 35, - "C": 36, - "D": 37, - "E": 38, - "F": 39, - "G": 40, - "H": 41, - "I": 42, - "J": 43, - "K": 44, - "L": 45, - "M": 46, - "N": 47, - "O": 48, - "P": 49, - "Q": 50, - "R": 51, - "S": 52, - "T": 53, - "U": 54, - "V": 55, - "W": 56, - "X": 57, - "Y": 58, - "Z": 59, - "[": 60, - "\\": 61, - "]": 62, - "^": 63, - "_": 64, - "`": 65, - "a": 66, - "b": 67, - "c": 68, - "d": 69, - "e": 70, - "f": 71, - "g": 72, - "h": 73, - "i": 74, - "j": 75, - "k": 76, - "l": 77, - "m": 78, - "n": 79, - "o": 80, - "p": 81, - "q": 82, - "r": 83, - "s": 84, - "t": 85, - "u": 86, - "v": 87, - "w": 88, - "x": 89, - "y": 90, - "z": 91, - "{": 92, - "|": 93, - "}": 94, - "~": 95, - "\u00a1": 96, - "\u00a2": 97, - "\u00a3": 98, - "\u00a5": 99, - "\u00a7": 100, - "\u00a9": 101, - "\u00aa": 102, - "\u00ab": 103, - "\u00ae": 104, - "\u00af": 105, - "\u00b0": 106, - "\u00b1": 107, - "\u00b2": 108, - "\u00b5": 109, - "\u00b6": 110, - "\u00b8": 111, - "\u00bb": 112, - "\u00bc": 113, - "\u00c0": 114, - "\u00c1": 115, - "\u00c2": 116, - "\u00c3": 117, - "\u00c5": 118, - "\u00c9": 119, - "\u00ca": 120, - "\u00cc": 121, - "\u00ce": 122, - "\u00d3": 123, - "\u00d5": 124, - "\u00d6": 125, - "\u00d7": 126, - "\u00d8": 127, - "\u00df": 128, - "\u00e0": 129, - "\u00e1": 130, - "\u00e2": 131, - "\u00e3": 132, - "\u00e4": 133, - "\u00e5": 134, - "\u00e7": 135, - "\u00e8": 136, - "\u00e9": 137, - "\u00ea": 138, - "\u00eb": 139, - "\u00ed": 140, - "\u00ee": 141, - "\u00ef": 142, - "\u00f1": 143, - "\u00f3": 144, - "\u00f4": 145, - "\u00f6": 146, - "\u00f8": 147, - "\u00fa": 148, - "\u00fb": 149, - "\u00fc": 150, - "\u00fe": 151, - "\u00ff": 152, - "\u0100": 153, - "\u0103": 154, - "\u0104": 155, - "\u0107": 156, - "\u010c": 157, - "\u010d": 158, - "\u0117": 159, - "\u0119": 160, - "\u0130": 161, - "\u0141": 162, - "\u0142": 163, - "\u0144": 164, - "\u014c": 165, - "\u0159": 166, - "\u015f": 167, - "\u0160": 168, - "\u0161": 169, - "\u0179": 170, - "\u017a": 171, - "\u017c": 172, - "\u017d": 173, - "\u017e": 174, - "\u0202": 175, - "\u0288": 176, - "\u02d9": 177, - "\u0351": 178, - "\u0352": 179, - "\u0353": 180, - "\u0354": 181, - "\u0357": 182, - "\u0358": 183, - "\u0393": 184, - "\u0394": 185, - "\u0398": 186, - "\u039b": 187, - "\u03a0": 188, - "\u03a5": 189, - "\u03a8": 190, - "\u03b1": 191, - "\u03b2": 192, - "\u03b4": 193, - "\u03b6": 194, - "\u03b7": 195, - "\u03b8": 196, - "\u03ba": 197, - "\u03bb": 198, - "\u03bc": 199, - "\u03bd": 200, - "\u03be": 201, - "\u03c0": 202, - "\u03c1": 203, - "\u03c3": 204, - "\u03c4": 205, - "\u03c6": 206, - "\u03c8": 207, - "\u03c9": 208, - "\u03d5": 209, - "\u03e9": 210, - "\u03ea": 211, - "\u03eb": 212, - "\u03ed": 213, - "\u03f3": 214, - "\u03fd": 215, - "\u03fe": 216, - "\u0408": 217, - "\u0409": 218, - "\u0411": 219, - "\u0418": 220, - "\u041b": 221, - "\u041f": 222, - "\u0424": 223, - "\u0426": 224, - "\u0431": 225, - "\u0432": 226, - "\u0433": 227, - "\u0434": 228, - "\u0437": 229, - "\u0438": 230, - "\u0439": 231, - "\u043a": 232, - "\u043b": 233, - "\u043c": 234, - "\u043d": 235, - "\u043f": 236, - "\u0440": 237, - "\u0441": 238, - "\u0442": 239, - "\u0443": 240, - "\u0444": 241, - "\u0445": 242, - "\u0446": 243, - "\u0447": 244, - "\u0448": 245, - "\u0449": 246, - "\u044b": 247, - "\u044c": 248, - "\u044d": 249, - "\u044e": 250, - "\u044f": 251, - "\u0451": 252, - "\u0546": 253, - "\u060d": 254, - "\u065e": 255, - "\u0728": 256, - "\u0846": 257, - "\u0be6": 258, - "\u0be7": 259, - "\u1c14": 260, - "\u1e46": 261, - "\u2020": 262, - "\u2021": 263, - "\u2022": 264, - "\u202b": 265, - "\u202c": 266, - "\u2032": 267, - "\u2039": 268, - "\u204e": 269, - "\u2113": 270, - "\u2122": 271, - "\u2162": 272, - "\u2192": 273, - "\u2193": 274, - "\u21b5": 275, - "\u21d1": 276, - "\u21e4": 277, - "\u2206": 278, - "\u2208": 279, - "\u2212": 280, - "\u221a": 281, - "\u221e": 282, - "\u223c": 283, - "\u2248": 284, - "\u2264": 285, - "\u2265": 286, - "\u2299": 287, - "\u22c5": 288, - "\u22c6": 289, - "\u232c": 290, - "\u2423": 291, - "\u2424": 292, - "\u24d2": 293, - "\u25a1": 294, - "\u262f": 295, - "\u2663": 296, - "\u2666": 297, - "\u271d": 298, - "\u2e38": 299, - "\u318d": 300, - "\uf761": 301, - "\uf764": 302, - "\uf765": 303, - "\uf767": 304, - "\uf769": 305, - "\uf76b": 306, - "\uf76c": 307, - "\uf76e": 308, - "\uf76f": 309, - "\uf770": 310, - "\uf772": 311, - "\uf773": 312, - "\uf774": 313, - "\uf777": 314, - "\uf779": 315, - "\uf8e9": 316, - "\ufffd": 317 - }, - "vocab_tag": { - "<PAD>": 0, - "B-<abstract>": 1, - "B-<address>": 2, - "B-<affiliation>": 3, - "B-<author>": 4, - "B-<copyright>": 5, - "B-<date>": 6, - "B-<doctype>": 7, - "B-<editor>": 8, - "B-<email>": 9, - "B-<funding>": 10, - "B-<group>": 11, - "B-<keyword>": 12, - "B-<meeting>": 13, - "B-<pubnum>": 14, - "B-<reference>": 15, - "B-<submission>": 16, - "B-<title>": 17, - "B-<web>": 18, - "I-<abstract>": 19, - "I-<address>": 20, - "I-<affiliation>": 21, - "I-<author>": 22, - "I-<copyright>": 23, - "I-<date>": 24, - "I-<doctype>": 25, - "I-<editor>": 26, - "I-<email>": 27, - "I-<funding>": 28, - "I-<group>": 29, - "I-<keyword>": 30, - "I-<meeting>": 31, - "I-<pubnum>": 32, - "I-<reference>": 33, - "I-<submission>": 34, - "I-<title>": 35, - "I-<web>": 36, - "O": 37 - }, - "vocab_case": [ - "<PAD>", - "numeric", - "allLower", - "allUpper", - "initialUpper", - "other", - "mainly_numeric", - "contains_digit" - ], - "max_char_length": 30, - "feature_preprocessor": null, - "indice_tag": { - "0": "<PAD>", - "1": "B-<abstract>", - "2": "B-<address>", - "3": "B-<affiliation>", - "4": "B-<author>", - "5": "B-<copyright>", - "6": "B-<date>", - "7": "B-<doctype>", - "8": "B-<editor>", - "9": "B-<email>", - "10": "B-<funding>", - "11": "B-<group>", - "12": "B-<keyword>", - "13": "B-<meeting>", - "14": "B-<pubnum>", - "15": "B-<reference>", - "16": "B-<submission>", - "17": "B-<title>", - "18": "B-<web>", - "19": "I-<abstract>", - "20": "I-<address>", - "21": "I-<affiliation>", - "22": "I-<author>", - "23": "I-<copyright>", - "24": "I-<date>", - "25": "I-<doctype>", - "26": "I-<editor>", - "27": "I-<email>", - "28": "I-<funding>", - "29": "I-<group>", - "30": "I-<keyword>", - "31": "I-<meeting>", - "32": "I-<pubnum>", - "33": "I-<reference>", - "34": "I-<submission>", - "35": "I-<title>", - "36": "I-<web>", - "37": "O" - } -} \ No newline at end of file diff --git a/grobid-home/models/header-BidLSTM_ChainCRF/config.json b/grobid-home/models/header-BidLSTM_ChainCRF/config.json deleted file mode 100644 index 6b76b16277..0000000000 --- a/grobid-home/models/header-BidLSTM_ChainCRF/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "model_name": "header-BidLSTM_ChainCRF", - "architecture": "BidLSTM_ChainCRF", - "embeddings_name": "glove-840B", - "char_vocab_size": 331, - "case_vocab_size": 8, - "char_embedding_size": 25, - "num_char_lstm_units": 25, - "max_char_length": 30, - "features_vocabulary_size": 12, - "features_indices": null, - "features_embedding_size": 4, - "features_lstm_units": 4, - "max_sequence_length": 3000, - "word_embedding_size": 300, - "num_word_lstm_units": 100, - "case_embedding_size": 5, - "dropout": 0.5, - "recurrent_dropout": 0.5, - "use_crf": true, - "use_chain_crf": true, - "fold_number": 1, - "batch_size": 9, - "transformer_name": null, - "use_ELMo": false -} \ No newline at end of file diff --git a/grobid-home/models/header-BidLSTM_ChainCRF/model_weights.hdf5 b/grobid-home/models/header-BidLSTM_ChainCRF/model_weights.hdf5 deleted file mode 100644 index f48f45c70d..0000000000 Binary files a/grobid-home/models/header-BidLSTM_ChainCRF/model_weights.hdf5 and /dev/null differ diff --git a/grobid-home/models/header-BidLSTM_ChainCRF/preprocessor.json b/grobid-home/models/header-BidLSTM_ChainCRF/preprocessor.json deleted file mode 100644 index 095b9de412..0000000000 --- a/grobid-home/models/header-BidLSTM_ChainCRF/preprocessor.json +++ /dev/null @@ -1,438 +0,0 @@ -{ - "padding": true, - "return_lengths": true, - "return_word_embeddings": true, - "return_casing": false, - "return_features": false, - "return_chars": true, - "return_bert_embeddings": false, - "vocab_char": { - "<PAD>": 0, - "<UNK>": 1, - "!": 2, - "\"": 3, - "#": 4, - "$": 5, - "%": 6, - "&": 7, - "'": 8, - "(": 9, - ")": 10, - "*": 11, - "+": 12, - ",": 13, - "-": 14, - ".": 15, - "/": 16, - "0": 17, - "1": 18, - "2": 19, - "3": 20, - "4": 21, - "5": 22, - "6": 23, - "7": 24, - "8": 25, - "9": 26, - ":": 27, - ";": 28, - "<": 29, - "=": 30, - ">": 31, - "?": 32, - "@": 33, - "A": 34, - "B": 35, - "C": 36, - "D": 37, - "E": 38, - "F": 39, - "G": 40, - "H": 41, - "I": 42, - "J": 43, - "K": 44, - "L": 45, - "M": 46, - "N": 47, - "O": 48, - "P": 49, - "Q": 50, - "R": 51, - "S": 52, - "T": 53, - "U": 54, - "V": 55, - "W": 56, - "X": 57, - "Y": 58, - "Z": 59, - "[": 60, - "\\": 61, - "]": 62, - "^": 63, - "_": 64, - "`": 65, - "a": 66, - "b": 67, - "c": 68, - "d": 69, - "e": 70, - "f": 71, - "g": 72, - "h": 73, - "i": 74, - "j": 75, - "k": 76, - "l": 77, - "m": 78, - "n": 79, - "o": 80, - "p": 81, - "q": 82, - "r": 83, - "s": 84, - "t": 85, - "u": 86, - "v": 87, - "w": 88, - "x": 89, - "y": 90, - "z": 91, - "{": 92, - "|": 93, - "}": 94, - "~": 95, - "\u00a1": 96, - "\u00a2": 97, - "\u00a3": 98, - "\u00a4": 99, - "\u00a5": 100, - "\u00a7": 101, - "\u00a8": 102, - "\u00a9": 103, - "\u00aa": 104, - "\u00ab": 105, - "\u00ae": 106, - "\u00af": 107, - "\u00b0": 108, - "\u00b1": 109, - "\u00b2": 110, - "\u00b4": 111, - "\u00b5": 112, - "\u00b6": 113, - "\u00b8": 114, - "\u00bb": 115, - "\u00bc": 116, - "\u00c0": 117, - "\u00c1": 118, - "\u00c2": 119, - "\u00c3": 120, - "\u00c5": 121, - "\u00c9": 122, - "\u00ca": 123, - "\u00cc": 124, - "\u00ce": 125, - "\u00d3": 126, - "\u00d5": 127, - "\u00d6": 128, - "\u00d7": 129, - "\u00d8": 130, - "\u00df": 131, - "\u00e0": 132, - "\u00e1": 133, - "\u00e2": 134, - "\u00e3": 135, - "\u00e4": 136, - "\u00e5": 137, - "\u00e7": 138, - "\u00e8": 139, - "\u00e9": 140, - "\u00ea": 141, - "\u00eb": 142, - "\u00ed": 143, - "\u00ee": 144, - "\u00ef": 145, - "\u00f1": 146, - "\u00f3": 147, - "\u00f4": 148, - "\u00f5": 149, - "\u00f6": 150, - "\u00f8": 151, - "\u00fa": 152, - "\u00fb": 153, - "\u00fc": 154, - "\u00fe": 155, - "\u00ff": 156, - "\u0100": 157, - "\u0103": 158, - "\u0104": 159, - "\u0107": 160, - "\u010c": 161, - "\u010d": 162, - "\u0117": 163, - "\u0119": 164, - "\u0130": 165, - "\u0131": 166, - "\u0141": 167, - "\u0142": 168, - "\u0144": 169, - "\u014c": 170, - "\u0159": 171, - "\u015f": 172, - "\u0160": 173, - "\u0161": 174, - "\u0179": 175, - "\u017a": 176, - "\u017c": 177, - "\u017d": 178, - "\u017e": 179, - "\u0202": 180, - "\u0288": 181, - "\u02d9": 182, - "\u0351": 183, - "\u0352": 184, - "\u0353": 185, - "\u0354": 186, - "\u0357": 187, - "\u0358": 188, - "\u0393": 189, - "\u0394": 190, - "\u0398": 191, - "\u039b": 192, - "\u03a0": 193, - "\u03a5": 194, - "\u03a8": 195, - "\u03b1": 196, - "\u03b2": 197, - "\u03b3": 198, - "\u03b4": 199, - "\u03b5": 200, - "\u03b6": 201, - "\u03b7": 202, - "\u03b8": 203, - "\u03ba": 204, - "\u03bb": 205, - "\u03bc": 206, - "\u03bd": 207, - "\u03be": 208, - "\u03c0": 209, - "\u03c1": 210, - "\u03c3": 211, - "\u03c4": 212, - "\u03c6": 213, - "\u03c7": 214, - "\u03c8": 215, - "\u03c9": 216, - "\u03d5": 217, - "\u03e9": 218, - "\u03ea": 219, - "\u03eb": 220, - "\u03ed": 221, - "\u03f3": 222, - "\u03fd": 223, - "\u03fe": 224, - "\u0408": 225, - "\u0409": 226, - "\u0411": 227, - "\u0418": 228, - "\u041b": 229, - "\u041f": 230, - "\u0424": 231, - "\u0426": 232, - "\u0431": 233, - "\u0432": 234, - "\u0433": 235, - "\u0434": 236, - "\u0437": 237, - "\u0438": 238, - "\u0439": 239, - "\u043a": 240, - "\u043b": 241, - "\u043c": 242, - "\u043d": 243, - "\u043f": 244, - "\u0440": 245, - "\u0441": 246, - "\u0442": 247, - "\u0443": 248, - "\u0444": 249, - "\u0445": 250, - "\u0446": 251, - "\u0447": 252, - "\u0448": 253, - "\u0449": 254, - "\u044b": 255, - "\u044c": 256, - "\u044d": 257, - "\u044e": 258, - "\u044f": 259, - "\u0451": 260, - "\u0546": 261, - "\u060a": 262, - "\u060d": 263, - "\u065e": 264, - "\u0728": 265, - "\u0846": 266, - "\u0be6": 267, - "\u0be7": 268, - "\u1c14": 269, - "\u1e46": 270, - "\u2020": 271, - "\u2021": 272, - "\u2022": 273, - "\u202b": 274, - "\u202c": 275, - "\u2032": 276, - "\u2039": 277, - "\u204e": 278, - "\u2113": 279, - "\u2122": 280, - "\u2162": 281, - "\u2192": 282, - "\u2193": 283, - "\u21b5": 284, - "\u21d1": 285, - "\u21e4": 286, - "\u2206": 287, - "\u2208": 288, - "\u2212": 289, - "\u221a": 290, - "\u221e": 291, - "\u223c": 292, - "\u2243": 293, - "\u2248": 294, - "\u2264": 295, - "\u2265": 296, - "\u2299": 297, - "\u22c5": 298, - "\u22c6": 299, - "\u232c": 300, - "\u2423": 301, - "\u2424": 302, - "\u24d2": 303, - "\u25a1": 304, - "\u262f": 305, - "\u2663": 306, - "\u2666": 307, - "\u2709": 308, - "\u271d": 309, - "\u2e38": 310, - "\u318d": 311, - "\uf761": 312, - "\uf764": 313, - "\uf765": 314, - "\uf767": 315, - "\uf769": 316, - "\uf76b": 317, - "\uf76c": 318, - "\uf76e": 319, - "\uf76f": 320, - "\uf770": 321, - "\uf772": 322, - "\uf773": 323, - "\uf774": 324, - "\uf777": 325, - "\uf779": 326, - "\uf8e9": 327, - "\uff0c": 328, - "\uff1b": 329, - "\ufffd": 330 - }, - "vocab_tag": { - "<PAD>": 0, - "B-<abstract>": 1, - "B-<address>": 2, - "B-<affiliation>": 3, - "B-<author>": 4, - "B-<availability>": 5, - "B-<copyright>": 6, - "B-<date>": 7, - "B-<doctype>": 8, - "B-<editor>": 9, - "B-<email>": 10, - "B-<funding>": 11, - "B-<group>": 12, - "B-<keyword>": 13, - "B-<meeting>": 14, - "B-<pubnum>": 15, - "B-<reference>": 16, - "B-<submission>": 17, - "B-<title>": 18, - "B-<web>": 19, - "I-<abstract>": 20, - "I-<address>": 21, - "I-<affiliation>": 22, - "I-<author>": 23, - "I-<availability>": 24, - "I-<copyright>": 25, - "I-<date>": 26, - "I-<doctype>": 27, - "I-<editor>": 28, - "I-<email>": 29, - "I-<funding>": 30, - "I-<group>": 31, - "I-<keyword>": 32, - "I-<meeting>": 33, - "I-<pubnum>": 34, - "I-<reference>": 35, - "I-<submission>": 36, - "I-<title>": 37, - "I-<web>": 38, - "O": 39 - }, - "vocab_case": [ - "<PAD>", - "numeric", - "allLower", - "allUpper", - "initialUpper", - "other", - "mainly_numeric", - "contains_digit" - ], - "max_char_length": 30, - "feature_preprocessor": null, - "indice_tag": { - "0": "<PAD>", - "1": "B-<abstract>", - "2": "B-<address>", - "3": "B-<affiliation>", - "4": "B-<author>", - "5": "B-<availability>", - "6": "B-<copyright>", - "7": "B-<date>", - "8": "B-<doctype>", - "9": "B-<editor>", - "10": "B-<email>", - "11": "B-<funding>", - "12": "B-<group>", - "13": "B-<keyword>", - "14": "B-<meeting>", - "15": "B-<pubnum>", - "16": "B-<reference>", - "17": "B-<submission>", - "18": "B-<title>", - "19": "B-<web>", - "20": "I-<abstract>", - "21": "I-<address>", - "22": "I-<affiliation>", - "23": "I-<author>", - "24": "I-<availability>", - "25": "I-<copyright>", - "26": "I-<date>", - "27": "I-<doctype>", - "28": "I-<editor>", - "29": "I-<email>", - "30": "I-<funding>", - "31": "I-<group>", - "32": "I-<keyword>", - "33": "I-<meeting>", - "34": "I-<pubnum>", - "35": "I-<reference>", - "36": "I-<submission>", - "37": "I-<title>", - "38": "I-<web>", - "39": "O" - } -} \ No newline at end of file diff --git a/grobid-home/models/header-BidLSTM_ChainCRF_FEATURES-with_ELMo/config.json b/grobid-home/models/header-BidLSTM_ChainCRF_FEATURES-with_ELMo/config.json deleted file mode 100644 index c55e0e06fe..0000000000 --- a/grobid-home/models/header-BidLSTM_ChainCRF_FEATURES-with_ELMo/config.json +++ /dev/null @@ -1,148 +0,0 @@ -{ - "model_name": "header-BidLSTM_ChainCRF_FEATURES-with_ELMo", - "architecture": "BidLSTM_ChainCRF_FEATURES", - "embeddings_name": "glove-840B", - "char_vocab_size": 325, - "case_vocab_size": 8, - "char_embedding_size": 25, - "num_char_lstm_units": 25, - "max_char_length": 30, - "features_vocabulary_size": 12, - "features_indices": [ - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20, - 21, - 22, - 23, - 24, - 25, - 26, - 27, - 28, - 29, - 30 - ], - "features_embedding_size": 4, - "features_lstm_units": 4, - "max_sequence_length": 1500, - "word_embedding_size": 1324, - "num_word_lstm_units": 100, - "case_embedding_size": 5, - "dropout": 0.5, - "recurrent_dropout": 0.5, - "use_crf": true, - "use_chain_crf": true, - "fold_number": 1, - "batch_size": 10, - "transformer_name": null, - "use_ELMo": true, - "features_map_to_index": { - "9": { - "BLOCKEND": 1, - "BLOCKIN": 2, - "BLOCKSTART": 3 - }, - "10": { - "LINEEND": 13, - "LINEIN": 14, - "LINESTART": 15 - }, - "11": { - "ALIGNEDLEFT": 25, - "LINEINDENT": 26 - }, - "12": { - "NEWFONT": 37, - "SAMEFONT": 38 - }, - "13": { - "HIGHERFONT": 49, - "LOWERFONT": 50, - "SAMEFONTSIZE": 51 - }, - "14": { - "0": 61, - "1": 62 - }, - "15": { - "0": 73, - "1": 74 - }, - "16": { - "ALLCAP": 85, - "INITCAP": 86, - "NOCAPS": 87 - }, - "17": { - "ALLDIGIT": 97, - "CONTAINSDIGITS": 98, - "NODIGIT": 99 - }, - "18": { - "0": 109, - "1": 110 - }, - "19": { - "0": 121, - "1": 122 - }, - "20": { - "0": 133, - "1": 134 - }, - "21": { - "0": 145, - "1": 146 - }, - "22": { - "0": 157, - "1": 158 - }, - "23": { - "0": 169, - "1": 170 - }, - "24": { - "0": 181, - "1": 182 - }, - "25": { - "0": 193, - "1": 194 - }, - "26": { - "COMMA": 205, - "DOT": 206, - "ENDBRACKET": 207, - "HYPHEN": 208, - "NOPUNCT": 209, - "OPENBRACKET": 210, - "PUNCT": 211, - "QUOTE": 212 - }, - "27": { - "0": 217, - "1": 218 - }, - "28": { - "0": 229 - }, - "29": { - "0": 241, - "1": 242 - }, - "30": { - "0": 253 - } - } -} \ No newline at end of file diff --git a/grobid-home/models/header-BidLSTM_ChainCRF_FEATURES-with_ELMo/model_weights.hdf5 b/grobid-home/models/header-BidLSTM_ChainCRF_FEATURES-with_ELMo/model_weights.hdf5 deleted file mode 100644 index 49cccada7f..0000000000 Binary files a/grobid-home/models/header-BidLSTM_ChainCRF_FEATURES-with_ELMo/model_weights.hdf5 and /dev/null differ diff --git a/grobid-home/models/header-BidLSTM_ChainCRF_FEATURES-with_ELMo/preprocessor.json b/grobid-home/models/header-BidLSTM_ChainCRF_FEATURES-with_ELMo/preprocessor.json deleted file mode 100644 index e38f22aefc..0000000000 --- a/grobid-home/models/header-BidLSTM_ChainCRF_FEATURES-with_ELMo/preprocessor.json +++ /dev/null @@ -1,553 +0,0 @@ -{ - "padding": true, - "return_lengths": true, - "return_word_embeddings": true, - "return_casing": false, - "return_features": true, - "return_chars": true, - "return_bert_embeddings": false, - "vocab_char": { - "<PAD>": 0, - "<UNK>": 1, - "!": 2, - "\"": 3, - "#": 4, - "$": 5, - "%": 6, - "&": 7, - "'": 8, - "(": 9, - ")": 10, - "*": 11, - "+": 12, - ",": 13, - "-": 14, - ".": 15, - "/": 16, - "0": 17, - "1": 18, - "2": 19, - "3": 20, - "4": 21, - "5": 22, - "6": 23, - "7": 24, - "8": 25, - "9": 26, - ":": 27, - ";": 28, - "<": 29, - "=": 30, - ">": 31, - "?": 32, - "@": 33, - "A": 34, - "B": 35, - "C": 36, - "D": 37, - "E": 38, - "F": 39, - "G": 40, - "H": 41, - "I": 42, - "J": 43, - "K": 44, - "L": 45, - "M": 46, - "N": 47, - "O": 48, - "P": 49, - "Q": 50, - "R": 51, - "S": 52, - "T": 53, - "U": 54, - "V": 55, - "W": 56, - "X": 57, - "Y": 58, - "Z": 59, - "[": 60, - "\\": 61, - "]": 62, - "^": 63, - "_": 64, - "`": 65, - "a": 66, - "b": 67, - "c": 68, - "d": 69, - "e": 70, - "f": 71, - "g": 72, - "h": 73, - "i": 74, - "j": 75, - "k": 76, - "l": 77, - "m": 78, - "n": 79, - "o": 80, - "p": 81, - "q": 82, - "r": 83, - "s": 84, - "t": 85, - "u": 86, - "v": 87, - "w": 88, - "x": 89, - "y": 90, - "z": 91, - "{": 92, - "|": 93, - "}": 94, - "~": 95, - "\u00a1": 96, - "\u00a2": 97, - "\u00a3": 98, - "\u00a4": 99, - "\u00a5": 100, - "\u00a7": 101, - "\u00a9": 102, - "\u00aa": 103, - "\u00ab": 104, - "\u00ae": 105, - "\u00af": 106, - "\u00b0": 107, - "\u00b1": 108, - "\u00b2": 109, - "\u00b5": 110, - "\u00b6": 111, - "\u00b8": 112, - "\u00bb": 113, - "\u00bc": 114, - "\u00c0": 115, - "\u00c1": 116, - "\u00c2": 117, - "\u00c3": 118, - "\u00c5": 119, - "\u00c9": 120, - "\u00ca": 121, - "\u00cc": 122, - "\u00ce": 123, - "\u00d3": 124, - "\u00d5": 125, - "\u00d6": 126, - "\u00d7": 127, - "\u00d8": 128, - "\u00df": 129, - "\u00e0": 130, - "\u00e1": 131, - "\u00e2": 132, - "\u00e3": 133, - "\u00e4": 134, - "\u00e5": 135, - "\u00e7": 136, - "\u00e8": 137, - "\u00e9": 138, - "\u00ea": 139, - "\u00eb": 140, - "\u00ed": 141, - "\u00ee": 142, - "\u00ef": 143, - "\u00f1": 144, - "\u00f3": 145, - "\u00f4": 146, - "\u00f5": 147, - "\u00f6": 148, - "\u00f8": 149, - "\u00fa": 150, - "\u00fb": 151, - "\u00fc": 152, - "\u00fe": 153, - "\u00ff": 154, - "\u0100": 155, - "\u0103": 156, - "\u0104": 157, - "\u0107": 158, - "\u010c": 159, - "\u010d": 160, - "\u0117": 161, - "\u0119": 162, - "\u0130": 163, - "\u0141": 164, - "\u0142": 165, - "\u0144": 166, - "\u014c": 167, - "\u0159": 168, - "\u015f": 169, - "\u0160": 170, - "\u0161": 171, - "\u0179": 172, - "\u017a": 173, - "\u017c": 174, - "\u017d": 175, - "\u017e": 176, - "\u0202": 177, - "\u0288": 178, - "\u02d9": 179, - "\u0351": 180, - "\u0352": 181, - "\u0353": 182, - "\u0354": 183, - "\u0357": 184, - "\u0358": 185, - "\u0393": 186, - "\u0394": 187, - "\u0398": 188, - "\u039b": 189, - "\u03a0": 190, - "\u03a5": 191, - "\u03a8": 192, - "\u03b1": 193, - "\u03b2": 194, - "\u03b4": 195, - "\u03b5": 196, - "\u03b6": 197, - "\u03b7": 198, - "\u03b8": 199, - "\u03ba": 200, - "\u03bb": 201, - "\u03bc": 202, - "\u03bd": 203, - "\u03be": 204, - "\u03c0": 205, - "\u03c1": 206, - "\u03c3": 207, - "\u03c4": 208, - "\u03c6": 209, - "\u03c7": 210, - "\u03c8": 211, - "\u03c9": 212, - "\u03d5": 213, - "\u03e9": 214, - "\u03ea": 215, - "\u03eb": 216, - "\u03ed": 217, - "\u03f3": 218, - "\u03fd": 219, - "\u03fe": 220, - "\u0408": 221, - "\u0409": 222, - "\u0411": 223, - "\u0418": 224, - "\u041b": 225, - "\u041f": 226, - "\u0424": 227, - "\u0426": 228, - "\u0431": 229, - "\u0432": 230, - "\u0433": 231, - "\u0434": 232, - "\u0437": 233, - "\u0438": 234, - "\u0439": 235, - "\u043a": 236, - "\u043b": 237, - "\u043c": 238, - "\u043d": 239, - "\u043f": 240, - "\u0440": 241, - "\u0441": 242, - "\u0442": 243, - "\u0443": 244, - "\u0444": 245, - "\u0445": 246, - "\u0446": 247, - "\u0447": 248, - "\u0448": 249, - "\u0449": 250, - "\u044b": 251, - "\u044c": 252, - "\u044d": 253, - "\u044e": 254, - "\u044f": 255, - "\u0451": 256, - "\u0546": 257, - "\u060a": 258, - "\u060d": 259, - "\u065e": 260, - "\u0728": 261, - "\u0846": 262, - "\u0be6": 263, - "\u0be7": 264, - "\u1c14": 265, - "\u1e46": 266, - "\u2020": 267, - "\u2021": 268, - "\u2022": 269, - "\u202b": 270, - "\u202c": 271, - "\u2032": 272, - "\u2039": 273, - "\u204e": 274, - "\u2113": 275, - "\u2122": 276, - "\u2162": 277, - "\u2192": 278, - "\u2193": 279, - "\u21b5": 280, - "\u21d1": 281, - "\u21e4": 282, - "\u2206": 283, - "\u2208": 284, - "\u2212": 285, - "\u221a": 286, - "\u221e": 287, - "\u223c": 288, - "\u2243": 289, - "\u2248": 290, - "\u2264": 291, - "\u2265": 292, - "\u2299": 293, - "\u22c5": 294, - "\u22c6": 295, - "\u232c": 296, - "\u2423": 297, - "\u2424": 298, - "\u24d2": 299, - "\u25a1": 300, - "\u262f": 301, - "\u2663": 302, - "\u2666": 303, - "\u271d": 304, - "\u2e38": 305, - "\u318d": 306, - "\uf761": 307, - "\uf764": 308, - "\uf765": 309, - "\uf767": 310, - "\uf769": 311, - "\uf76b": 312, - "\uf76c": 313, - "\uf76e": 314, - "\uf76f": 315, - "\uf770": 316, - "\uf772": 317, - "\uf773": 318, - "\uf774": 319, - "\uf777": 320, - "\uf779": 321, - "\uf8e9": 322, - "\uff0c": 323, - "\ufffd": 324 - }, - "vocab_tag": { - "<PAD>": 0, - "B-<abstract>": 1, - "B-<address>": 2, - "B-<affiliation>": 3, - "B-<author>": 4, - "B-<copyright>": 5, - "B-<date>": 6, - "B-<doctype>": 7, - "B-<editor>": 8, - "B-<email>": 9, - "B-<funding>": 10, - "B-<group>": 11, - "B-<keyword>": 12, - "B-<meeting>": 13, - "B-<pubnum>": 14, - "B-<reference>": 15, - "B-<submission>": 16, - "B-<title>": 17, - "B-<web>": 18, - "I-<abstract>": 19, - "I-<address>": 20, - "I-<affiliation>": 21, - "I-<author>": 22, - "I-<copyright>": 23, - "I-<date>": 24, - "I-<doctype>": 25, - "I-<editor>": 26, - "I-<email>": 27, - "I-<funding>": 28, - "I-<group>": 29, - "I-<keyword>": 30, - "I-<meeting>": 31, - "I-<pubnum>": 32, - "I-<reference>": 33, - "I-<submission>": 34, - "I-<title>": 35, - "I-<web>": 36, - "O": 37 - }, - "vocab_case": [ - "<PAD>", - "numeric", - "allLower", - "allUpper", - "initialUpper", - "other", - "mainly_numeric", - "contains_digit" - ], - "max_char_length": 30, - "feature_preprocessor": { - "features_vocabulary_size": 12, - "features_indices": [ - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20, - 21, - 22, - 23, - 24, - 25, - 26, - 27, - 28, - 29, - 30 - ], - "features_map_to_index": { - "9": { - "BLOCKEND": 1, - "BLOCKIN": 2, - "BLOCKSTART": 3 - }, - "10": { - "LINEEND": 13, - "LINEIN": 14, - "LINESTART": 15 - }, - "11": { - "ALIGNEDLEFT": 25, - "LINEINDENT": 26 - }, - "12": { - "NEWFONT": 37, - "SAMEFONT": 38 - }, - "13": { - "HIGHERFONT": 49, - "LOWERFONT": 50, - "SAMEFONTSIZE": 51 - }, - "14": { - "0": 61, - "1": 62 - }, - "15": { - "0": 73, - "1": 74 - }, - "16": { - "ALLCAP": 85, - "INITCAP": 86, - "NOCAPS": 87 - }, - "17": { - "ALLDIGIT": 97, - "CONTAINSDIGITS": 98, - "NODIGIT": 99 - }, - "18": { - "0": 109, - "1": 110 - }, - "19": { - "0": 121, - "1": 122 - }, - "20": { - "0": 133, - "1": 134 - }, - "21": { - "0": 145, - "1": 146 - }, - "22": { - "0": 157, - "1": 158 - }, - "23": { - "0": 169, - "1": 170 - }, - "24": { - "0": 181, - "1": 182 - }, - "25": { - "0": 193, - "1": 194 - }, - "26": { - "COMMA": 205, - "DOT": 206, - "ENDBRACKET": 207, - "HYPHEN": 208, - "NOPUNCT": 209, - "OPENBRACKET": 210, - "PUNCT": 211, - "QUOTE": 212 - }, - "27": { - "0": 217, - "1": 218 - }, - "28": { - "0": 229 - }, - "29": { - "0": 241, - "1": 242 - }, - "30": { - "0": 253 - } - } - }, - "indice_tag": { - "0": "<PAD>", - "1": "B-<abstract>", - "2": "B-<address>", - "3": "B-<affiliation>", - "4": "B-<author>", - "5": "B-<copyright>", - "6": "B-<date>", - "7": "B-<doctype>", - "8": "B-<editor>", - "9": "B-<email>", - "10": "B-<funding>", - "11": "B-<group>", - "12": "B-<keyword>", - "13": "B-<meeting>", - "14": "B-<pubnum>", - "15": "B-<reference>", - "16": "B-<submission>", - "17": "B-<title>", - "18": "B-<web>", - "19": "I-<abstract>", - "20": "I-<address>", - "21": "I-<affiliation>", - "22": "I-<author>", - "23": "I-<copyright>", - "24": "I-<date>", - "25": "I-<doctype>", - "26": "I-<editor>", - "27": "I-<email>", - "28": "I-<funding>", - "29": "I-<group>", - "30": "I-<keyword>", - "31": "I-<meeting>", - "32": "I-<pubnum>", - "33": "I-<reference>", - "34": "I-<submission>", - "35": "I-<title>", - "36": "I-<web>", - "37": "O" - } -} \ No newline at end of file diff --git a/grobid-home/models/header-BidLSTM_ChainCRF_FEATURES.onnx/config.json b/grobid-home/models/header-BidLSTM_ChainCRF_FEATURES.onnx/config.json new file mode 100644 index 0000000000..b0b9b28f91 --- /dev/null +++ b/grobid-home/models/header-BidLSTM_ChainCRF_FEATURES.onnx/config.json @@ -0,0 +1,37 @@ +{ + "modelName": "grobid-header-BidLSTM_ChainCRF_FEATURES", + "architecture": "BidLSTM_ChainCRF_FEATURES", + "wordEmbeddingSize": 300, + "charEmbeddingSize": 25, + "numCharLstmUnits": 25, + "numWordLstmUnits": 100, + "maxSequenceLength": 3500, + "embeddingsName": "glove-840B", + "featuresIndices": [ + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30 + ], + "featuresEmbeddingSize": 4, + "featuresLstmUnits": 4, + "featuresVocabularySize": 12 +} \ No newline at end of file diff --git a/grobid-home/models/header-BidLSTM_ChainCRF_FEATURES.onnx/crf_params.json b/grobid-home/models/header-BidLSTM_ChainCRF_FEATURES.onnx/crf_params.json new file mode 100644 index 0000000000..210f6ccde9 --- /dev/null +++ b/grobid-home/models/header-BidLSTM_ChainCRF_FEATURES.onnx/crf_params.json @@ -0,0 +1,1768 @@ +{ + "transitions": [ + [ + -0.1606302112340927, + 0.043633703142404556, + -0.16714037954807281, + -0.14135804772377014, + -0.09958339482545853, + 0.1718382090330124, + -0.17842881381511688, + 0.02059328183531761, + -0.2709449529647827, + -0.20375846326351166, + -0.06543589383363724, + -0.02024862915277481, + 0.09793464839458466, + 0.058344386518001556, + -0.09301751852035522, + 0.12585629522800446, + -0.19213077425956726, + -0.13705673813819885, + 0.12874269485473633, + -0.11799249053001404, + -0.08664525300264359, + 0.24983571469783783, + -0.010730385780334473, + 0.22418630123138428, + 0.17918217182159424, + -0.1231611967086792, + 0.18307723104953766, + 0.11699503660202026, + 0.029868990182876587, + -0.2603664994239807, + -0.05764438956975937, + 0.1297904998064041, + -0.11681883782148361, + -0.11091574281454086, + 0.053098730742931366, + -0.1312444508075714, + 0.058684173971414566, + 0.0580403134226799, + 0.17952944338321686, + -0.19752013683319092 + ], + [ + 0.1746719926595688, + 0.01876123994588852, + 0.01073032058775425, + 0.21861132979393005, + 0.01789756864309311, + -0.07456880807876587, + 0.18233047425746918, + 0.05164806917309761, + -0.23951278626918793, + 0.23199956119060516, + -0.1658005714416504, + -0.23983943462371826, + 0.26420125365257263, + -0.12653745710849762, + -0.14462143182754517, + 0.14843574166297913, + 0.23453252017498016, + 0.14820867776870728, + 0.18625448644161224, + -0.15545357763767242, + -0.16230744123458862, + 0.17424774169921875, + 0.23913471400737762, + -0.2314494401216507, + 0.1973552256822586, + 0.24976465106010437, + -0.017706291750073433, + 0.12407338619232178, + -0.20182155072689056, + 0.21544311940670013, + 0.23737677931785583, + -0.05314001068472862, + -0.27028965950012207, + -0.0971355065703392, + 0.183010533452034, + 0.07772891968488693, + -0.24008654057979584, + 0.15131054818630219, + -0.13375145196914673, + -0.08681870251893997 + ], + [ + 0.0013647346058860421, + -0.19414949417114258, + -0.2539403736591339, + -0.20838919281959534, + -0.09874372184276581, + 0.0400146059691906, + 0.10620291531085968, + -0.25207415223121643, + 0.02893245220184326, + 0.25696539878845215, + 0.196473628282547, + 0.26994165778160095, + -0.13977040350437164, + -0.06150316074490547, + -0.18084076046943665, + -0.14938555657863617, + 0.20593012869358063, + -0.2681409418582916, + -0.007980121299624443, + 0.24515900015830994, + 0.23318397998809814, + 0.15928570926189423, + -0.18366149067878723, + 0.23123060166835785, + -0.1904379427433014, + -0.08521997183561325, + -0.24444985389709473, + -0.04672623425722122, + -0.15239323675632477, + 0.20753416419029236, + -0.16465574502944946, + 0.21417586505413055, + 0.25540855526924133, + 0.08315364271402359, + -0.18333812057971954, + 0.061338361352682114, + -0.20972716808319092, + -0.08739107102155685, + 0.07264786958694458, + -0.04969389736652374 + ], + [ + -0.011130832135677338, + 0.02735179103910923, + -0.15376847982406616, + -0.151920348405838, + 0.11878669261932373, + 0.22354328632354736, + -0.26149919629096985, + -0.2652323842048645, + 0.027938388288021088, + 0.008065786212682724, + 0.02892187424004078, + 0.13280487060546875, + -0.05418702960014343, + -0.13656766712665558, + 0.009091255255043507, + -0.036386795341968536, + -0.06909167021512985, + 0.15065236389636993, + -0.0315946526825428, + 0.09752444177865982, + 0.23814472556114197, + 0.12030398100614548, + 0.12351913750171661, + -0.14271695911884308, + 0.06624798476696014, + 0.01972223073244095, + -0.12667107582092285, + -0.08497864753007889, + 0.03601879999041557, + -0.07845555245876312, + 0.2510727047920227, + 0.2074412852525711, + -0.14907747507095337, + -0.08064770698547363, + 0.19083255529403687, + 0.24405266344547272, + 0.2445250004529953, + 0.20522712171077728, + 0.1575574278831482, + 0.24647442996501923 + ], + [ + 0.18203920125961304, + 0.0762581154704094, + 0.13015548884868622, + 0.07960950583219528, + -0.10310246050357819, + -0.20743003487586975, + 0.13196998834609985, + 0.01354992762207985, + 0.19376462697982788, + -0.008541417308151722, + -0.19380801916122437, + -0.23224574327468872, + -0.12328411638736725, + -0.022433793172240257, + 0.1441819965839386, + 0.06718683987855911, + -0.16297774016857147, + -0.18969927728176117, + 0.1542854607105255, + -0.2295478731393814, + 0.02080538682639599, + -0.23514536023139954, + -0.08465371280908585, + 0.1998954713344574, + 0.26954951882362366, + 0.1836417019367218, + -0.22812657058238983, + 0.06020456925034523, + 0.09356405586004257, + -0.011718392372131348, + -0.10152503103017807, + 0.035358455032110214, + 0.11788247525691986, + -0.08393903076648712, + -0.08436637371778488, + 0.0861099436879158, + -0.24669072031974792, + 0.2149425745010376, + -0.27110397815704346, + -0.24359972774982452 + ], + [ + 0.24454092979431152, + -0.14845772087574005, + -0.1223529800772667, + 0.048032891005277634, + -0.06750105321407318, + -0.2320694774389267, + 0.04178181290626526, + 0.022256048396229744, + -0.16639675199985504, + 0.021565601229667664, + -0.21335795521736145, + 0.015501096844673157, + -0.04271402582526207, + -0.08120804280042648, + -0.25656870007514954, + 0.008020798675715923, + -0.1924804002046585, + -0.13749849796295166, + -0.26099416613578796, + 0.12944741547107697, + 0.2476164549589157, + 0.012016539461910725, + 0.23167920112609863, + 0.008982802741229534, + -0.2436799257993698, + 0.11587315797805786, + 0.2299536168575287, + -0.01087914127856493, + -0.03945402801036835, + -0.06446411460638046, + 0.04425738751888275, + 0.22394222021102905, + -0.02637847512960434, + 0.004901363514363766, + 0.24088412523269653, + -0.27377551794052124, + -0.20405152440071106, + 0.1631748527288437, + 0.1723630130290985, + -0.22614003717899323 + ], + [ + 0.2512151896953583, + 0.15875095129013062, + -0.23989005386829376, + -0.10412473231554031, + 0.02469339407980442, + 0.07242169231176376, + 0.09896866977214813, + -0.06264837831258774, + -0.24596203863620758, + -0.2193225622177124, + -0.2548896074295044, + 0.14248690009117126, + -0.13167524337768555, + 0.0722116082906723, + -0.10474007576704025, + -0.16372594237327576, + -0.08356058597564697, + -0.08134948462247849, + -0.19958360493183136, + 0.03589878976345062, + -0.16340017318725586, + 0.14147403836250305, + -0.22215887904167175, + -0.10575629025697708, + 0.07128120958805084, + -0.015574944205582142, + -0.16519562900066376, + 0.04529516398906708, + -0.06588379293680191, + -0.08035607635974884, + -0.12558743357658386, + -0.20451149344444275, + 0.18335966765880585, + -0.07609362155199051, + 0.16758449375629425, + -0.23972870409488678, + -0.22235000133514404, + 0.016375575214624405, + -0.1404997706413269, + 0.27138179540634155 + ], + [ + 0.06811945885419846, + -0.0803876742720604, + 0.1953868418931961, + 0.0835551992058754, + -0.11281067878007889, + 0.004687820561230183, + -0.09764311462640762, + -0.23143325746059418, + -0.2556000351905823, + -0.17608343064785004, + 0.08504343777894974, + -0.054455939680337906, + 0.06945846974849701, + 0.1410948783159256, + 0.23471005260944366, + 0.1490805745124817, + -0.2511075437068939, + -0.052315518260002136, + -0.14584344625473022, + -0.0782441645860672, + -0.2638644874095917, + 0.09778042882680893, + 0.0022264798171818256, + -0.18095311522483826, + 0.1514904946088791, + -0.2359384298324585, + -0.05688200518488884, + -0.07930666208267212, + -0.16991455852985382, + 0.19135701656341553, + 0.1292327493429184, + 0.22523579001426697, + 0.051484115421772, + -0.008523265831172466, + -0.02904372848570347, + 0.040976349264383316, + 0.19386734068393707, + 0.05248904973268509, + -0.1982509195804596, + -0.15686102211475372 + ], + [ + -0.23160545527935028, + 0.03946584835648537, + 0.10740412026643753, + -0.16381248831748962, + 0.19221456348896027, + -0.13195432722568512, + -0.26691943407058716, + 0.02127191051840782, + -0.014071264304220676, + 0.02943655103445053, + 0.03227173164486885, + -0.17601266503334045, + 0.2056572437286377, + -0.13046768307685852, + -0.12459356337785721, + 0.10997645556926727, + 0.02676156908273697, + 0.1608143001794815, + -0.1014527678489685, + 0.048356976360082626, + 0.22250616550445557, + -0.18061476945877075, + -0.037482570856809616, + -0.001239011762663722, + 0.21393324434757233, + -0.20118798315525055, + 0.01670471951365471, + -0.010696155950427055, + -0.07001273334026337, + 0.23290874063968658, + 0.2022254317998886, + 0.003503622952848673, + 0.25353768467903137, + -0.13798290491104126, + -0.1171594113111496, + 0.09992695599794388, + -0.25850045680999756, + 0.23568573594093323, + -0.23854975402355194, + -0.0693306252360344 + ], + [ + 0.047528043389320374, + 0.13320264220237732, + 0.06865604221820831, + 0.023816728964447975, + -0.22629337012767792, + 0.1773655116558075, + 0.17313235998153687, + -0.1683274805545807, + 0.08501415699720383, + -0.17335757613182068, + 0.0006854197708889842, + 0.2199753075838089, + -0.17771603167057037, + 0.09571992605924606, + -0.176936075091362, + 0.12262389808893204, + -0.26872846484184265, + 0.011559125036001205, + 0.10384610295295715, + -0.2602391242980957, + -0.19591312110424042, + 0.07357686758041382, + 0.14122925698757172, + 0.15958769619464874, + -0.012517325580120087, + -0.26638737320899963, + 0.1926216036081314, + -0.08019030839204788, + -0.020313970744609833, + -0.20012687146663666, + 0.03428306803107262, + -0.1632600575685501, + -0.2409207671880722, + 0.24693873524665833, + 0.22702179849147797, + -0.24353723227977753, + -0.08596104383468628, + -0.044263262301683426, + 0.24695616960525513, + 0.005608526058495045 + ], + [ + -0.1352442353963852, + -0.1000145822763443, + 0.0069076078943908215, + -0.25212135910987854, + 0.2645370066165924, + -0.036693789064884186, + 0.05845065042376518, + -0.021344060078263283, + -0.010532546788454056, + 0.25649768114089966, + 0.15649059414863586, + -0.2273271232843399, + -0.08941200375556946, + 0.032601069658994675, + 0.008318210951983929, + 0.20293749868869781, + 0.1774439960718155, + -0.05136578902602196, + -0.03092433139681816, + -0.2579447031021118, + 0.2595227062702179, + -0.1090468093752861, + -0.17337356507778168, + -0.091027170419693, + 0.0643380805850029, + 0.18156647682189941, + 0.08905713260173798, + 0.055515117943286896, + -0.027281388640403748, + -0.2672785222530365, + 0.14795580506324768, + -0.18255756795406342, + 0.11968689411878586, + -0.044479694217443466, + 0.03797408193349838, + 0.22432279586791992, + 0.1875845193862915, + 0.07309937477111816, + 0.11393201351165771, + -0.11698388308286667 + ], + [ + 0.24198846518993378, + 0.15651674568653107, + 0.019638819620013237, + -0.016601359471678734, + -0.2642781436443329, + -0.243647500872612, + 0.11977265775203705, + 9.784248686628416e-05, + 0.12649205327033997, + -0.18958885967731476, + -0.2161664068698883, + 0.12133190035820007, + -0.16792066395282745, + 0.05579310283064842, + -0.23082831501960754, + 0.08990725874900818, + 0.24842560291290283, + -0.2404637485742569, + 0.2510489821434021, + 0.18000581860542297, + -0.18748103082180023, + -0.102199025452137, + -0.2715989947319031, + 0.11861708760261536, + 0.1432359516620636, + -0.10912160575389862, + 0.21774442493915558, + 0.010846870020031929, + -0.21671640872955322, + 0.1828717291355133, + 0.11566640436649323, + 0.09473996609449387, + 0.04748429358005524, + -0.05284813418984413, + -0.04476174712181091, + -0.23577629029750824, + -0.025251751765608788, + 0.12438885122537613, + 0.2320699393749237, + -0.06245662644505501 + ], + [ + 0.10372789204120636, + 0.11347299814224243, + 0.17003846168518066, + 0.1724015325307846, + -0.16984863579273224, + 0.15372268855571747, + -0.2624516785144806, + -0.22006207704544067, + 0.20036180317401886, + 0.050332795828580856, + 0.15912580490112305, + 0.25774818658828735, + -0.07564213871955872, + 0.2510140836238861, + -0.016544880345463753, + -0.23880736529827118, + 0.21483014523983002, + 0.16438347101211548, + -0.0015141916228458285, + 0.10322751104831696, + -0.20075847208499908, + -0.13435529172420502, + 0.2627103328704834, + -0.1586332768201828, + 0.1428026556968689, + 0.010795777663588524, + -0.09880895167589188, + -0.006051608361303806, + 0.2677515745162964, + -0.05405629426240921, + 0.2060886025428772, + -0.019438106566667557, + -0.0016821431927382946, + -0.23441261053085327, + -0.11100826412439346, + 0.22099977731704712, + -0.13993556797504425, + -0.018024515360593796, + 0.1279558688402176, + -0.03586740046739578 + ], + [ + -0.09311340004205704, + -0.15737220644950867, + -0.26190856099128723, + 0.25903788208961487, + -0.15530884265899658, + -0.1500508189201355, + -0.10818249732255936, + 0.012784294784069061, + 0.13713383674621582, + -0.11901791393756866, + -0.1707514524459839, + 0.21998955309391022, + -0.06285211443901062, + 0.05195292457938194, + -0.15885834395885468, + 0.127012699842453, + -0.02062266319990158, + 0.06542720645666122, + -0.2413201928138733, + -0.14061422646045685, + -0.15633317828178406, + -0.07426976412534714, + 0.18301105499267578, + -0.180566668510437, + 0.10280914604663849, + 0.17439517378807068, + -0.11117270588874817, + 0.05470648780465126, + 0.029024679213762283, + -0.16665829718112946, + 0.2154286652803421, + 0.19887995719909668, + 0.030877841636538506, + 0.15617410838603973, + 0.003121524816378951, + 0.19620588421821594, + 0.21036840975284576, + 0.24774853885173798, + 0.20166920125484467, + 0.06786925345659256 + ], + [ + 0.04059859365224838, + -0.04021678864955902, + 0.0475047305226326, + 0.1417064517736435, + 0.2182660549879074, + 0.0578811950981617, + -0.05492319539189339, + 0.13871005177497864, + -0.13194455206394196, + 0.14395369589328766, + 0.0175698921084404, + 0.1848442107439041, + 0.11788638681173325, + 0.17071840167045593, + 0.10133135318756104, + -0.26275211572647095, + 0.03829049691557884, + 0.14473329484462738, + -0.2502201199531555, + 0.16081035137176514, + 0.03851892426609993, + -0.10171995311975479, + -0.034088801592588425, + 0.09418760985136032, + 0.2591724097728729, + -0.12542439997196198, + -0.22579389810562134, + -0.1758710741996765, + -0.2483779639005661, + -0.27026674151420593, + -0.07127141207456589, + -0.239274799823761, + -0.07897289097309113, + 0.12658396363258362, + -0.21855367720127106, + -0.25659695267677307, + 0.27358418703079224, + 0.12278413027524948, + -0.18373264372348785, + 0.04834218695759773 + ], + [ + -0.19875016808509827, + 0.1683618575334549, + -0.13283465802669525, + -0.04612971469759941, + 0.24538324773311615, + -0.030594419687986374, + -0.18376505374908447, + 0.002627447945997119, + 0.11721742153167725, + -0.20188702642917633, + -0.2537902593612671, + 0.07109443843364716, + 0.22086279094219208, + -0.11620524525642395, + 0.23660004138946533, + 0.058651626110076904, + -0.17289023101329803, + 0.1922258585691452, + -0.07534690946340561, + 0.10293447971343994, + 0.19500978291034698, + -0.16382181644439697, + 0.12702788412570953, + -0.053176674991846085, + -0.0927366092801094, + -0.06353618949651718, + 0.12914767861366272, + 0.043157026171684265, + 0.05628587305545807, + -0.10986289381980896, + -0.22919519245624542, + 0.040118783712387085, + -0.008589701727032661, + -0.009877308271825314, + 0.2169731855392456, + -0.26328974962234497, + -0.07126442342996597, + 0.2706502676010132, + -0.07520382106304169, + 0.2694412171840668 + ], + [ + -0.09603092074394226, + 0.08876550197601318, + -0.03468991070985794, + 0.15437723696231842, + -0.0014384511159732938, + 0.10219626873731613, + -0.23560823500156403, + -0.031296294182538986, + -0.16816382110118866, + 0.1392878144979477, + -0.008461139164865017, + 0.19537869095802307, + 0.13422441482543945, + 0.025898421183228493, + 0.15127071738243103, + 0.07300019264221191, + -0.03327788785099983, + 0.18550902605056763, + 0.13726216554641724, + 0.2698049545288086, + 0.16135580837726593, + 0.10155684500932693, + -0.06869884580373764, + -0.18645340204238892, + -0.17449833452701569, + -0.19945786893367767, + -0.06933335214853287, + -0.1536562740802765, + -0.24070240557193756, + 0.09112155437469482, + -0.1401819884777069, + -0.19321681559085846, + -0.18088193237781525, + -0.026873955503106117, + -0.030727569013834, + -0.131565660238266, + 0.2195643186569214, + 0.14847807586193085, + 0.14730267226696014, + 0.272862046957016 + ], + [ + 0.004857486113905907, + -0.20800134539604187, + -0.07936950027942657, + 0.09042908251285553, + 0.19066327810287476, + -0.06077980622649193, + -0.04458876699209213, + 0.052627213299274445, + -0.2712633013725281, + 0.0991043820977211, + 0.1498817354440689, + 0.22104910016059875, + -0.20481126010417938, + 0.043536316603422165, + -0.21920715272426605, + -0.22803500294685364, + 0.2721710205078125, + -0.12677502632141113, + 0.17755655944347382, + -0.16105374693870544, + 0.24315790832042694, + -0.1733940988779068, + 0.003428633324801922, + -0.05678972974419594, + -0.0975780189037323, + -0.11324218660593033, + -0.18430934846401215, + 0.25911855697631836, + 0.23752544820308685, + -0.10030854493379593, + 0.17460203170776367, + 0.09828922897577286, + -0.08701019734144211, + 0.09503055363893509, + 0.05284085124731064, + -0.12934446334838867, + -0.08159565925598145, + 0.2587669789791107, + 0.13474294543266296, + -0.1264643371105194 + ], + [ + 0.11300262063741684, + -0.09897738695144653, + 0.09717956185340881, + -0.12028354406356812, + -0.27185964584350586, + 0.09522800147533417, + -0.23772022128105164, + 0.06830587238073349, + -0.009580320678651333, + -0.007516634184867144, + 0.034832123667001724, + 0.2043648511171341, + -0.12448695302009583, + -0.20921389758586884, + -0.18977591395378113, + -0.007970033213496208, + 0.09770272672176361, + -0.06751281768083572, + 0.09300129115581512, + -0.07759101688861847, + 0.020815735682845116, + -0.2081674188375473, + 0.18229466676712036, + 0.1073157787322998, + 0.2608170211315155, + 0.14400191605091095, + -0.04091444984078407, + -0.25493788719177246, + 0.05120084062218666, + 0.22773624956607819, + 0.21920396387577057, + -0.14248338341712952, + 0.10995654761791229, + -0.2621748447418213, + -0.027212144806981087, + 0.16584928333759308, + 0.22699213027954102, + -0.06717953085899353, + -0.19763876497745514, + -0.02751205489039421 + ], + [ + -0.16757789254188538, + 0.2641242444515228, + 0.265273779630661, + 0.24456308782100677, + -0.09214796870946884, + 0.10124180465936661, + 0.09159751236438751, + 0.17743968963623047, + -0.22361963987350464, + 0.0938483476638794, + -0.06280183792114258, + -0.15385626256465912, + 0.043064504861831665, + 0.1303781419992447, + -0.2514287531375885, + 0.1727704405784607, + 0.21202056109905243, + -0.11915411055088043, + -0.09212654083967209, + 0.1342979073524475, + 0.07859791070222855, + 0.03848014026880264, + 0.03141530603170395, + -0.23245227336883545, + -0.2027319073677063, + 0.11544303596019745, + -0.20256687700748444, + -0.22586055099964142, + -0.18427297472953796, + -0.2483004629611969, + -0.0037156802136451006, + 0.13610197603702545, + -0.17151996493339539, + -0.15036410093307495, + 0.03600861504673958, + 0.051672063767910004, + 0.17670902609825134, + 0.04085781052708626, + 0.02129407785832882, + 0.19850896298885345 + ], + [ + -0.17723260819911957, + -0.18321259319782257, + 0.2227443903684616, + -0.26413869857788086, + -0.07267025113105774, + -0.06615497171878815, + -0.21808312833309174, + -0.21188156306743622, + -0.11231689155101776, + 0.22213131189346313, + -0.09579313546419144, + 0.11005070060491562, + -0.03563332185149193, + 0.16179262101650238, + -0.16195806860923767, + -0.16056165099143982, + 0.030638018622994423, + -0.20994052290916443, + 0.08520657569169998, + 0.0967448353767395, + 0.12325763702392578, + -0.26751741766929626, + -0.18517521023750305, + -0.21731716394424438, + -0.21552185714244843, + -0.01576385460793972, + -0.09861837327480316, + 0.218730628490448, + -0.2511923909187317, + 0.045801155269145966, + 0.17479075491428375, + 0.16503363847732544, + -0.06545303016901016, + -0.06169794872403145, + 0.15660808980464935, + 0.17777787148952484, + 0.17173348367214203, + -0.14393268525600433, + -0.020913464948534966, + 0.041779886931180954 + ], + [ + 0.1868051439523697, + -0.2323080450296402, + 0.21384163200855255, + -0.11686421930789948, + 0.009962957352399826, + -0.19880832731723785, + -0.22032096982002258, + 0.18629059195518494, + 0.032988980412483215, + -0.005254177376627922, + 0.2021755576133728, + 0.23053903877735138, + -0.028986351564526558, + 0.22442546486854553, + 0.1077556237578392, + -0.034469302743673325, + 0.022066663950681686, + 0.2098335176706314, + -0.0066949790343642235, + -0.15535874664783478, + 0.13521739840507507, + 0.12824822962284088, + 0.12232119590044022, + 0.14970944821834564, + 0.142818883061409, + 0.2450467199087143, + 0.22954370081424713, + 0.055695001035928726, + 0.15990802645683289, + 0.10907889902591705, + -0.24488070607185364, + 0.15724116563796997, + -0.11395496129989624, + -0.10790683329105377, + -0.17355115711688995, + -0.20424088835716248, + 0.06735950708389282, + -0.09130499511957169, + -0.14112138748168945, + -0.22460615634918213 + ], + [ + -0.19471171498298645, + 0.05237266421318054, + -0.12131801247596741, + -0.2278168648481369, + 0.061968281865119934, + 0.19585193693637848, + -0.15974614024162292, + -0.13237808644771576, + 0.26284828782081604, + -0.0032515733037143946, + 0.019368242472410202, + 0.229537233710289, + -0.06298217922449112, + 0.20957332849502563, + 0.022719241678714752, + 0.26372289657592773, + 0.04041106998920441, + 0.04505925625562668, + -0.2388584166765213, + -0.07777637243270874, + 0.012447118759155273, + -0.15943986177444458, + -0.191522017121315, + 0.08419207483530045, + 0.16840583086013794, + -0.15479271113872528, + -0.08816678822040558, + 0.07929482311010361, + -0.14186735451221466, + -0.008290543220937252, + 0.115993432700634, + -0.0481087788939476, + 0.2587204575538635, + -0.27268147468566895, + 0.02253328450024128, + -0.06033959612250328, + -0.12554121017456055, + -0.15235355496406555, + -0.02964531071484089, + 0.2557893991470337 + ], + [ + -0.053759779781103134, + 0.042750801891088486, + -0.07440552115440369, + -0.03244077414274216, + 0.2270193248987198, + -0.10478406399488449, + -0.12616273760795593, + 0.08605702221393585, + -0.08681663125753403, + 0.009793455712497234, + 0.086821548640728, + 0.054874781519174576, + -0.0997568666934967, + 0.19625204801559448, + -0.21616044640541077, + -0.261860191822052, + -0.010369834490120411, + -0.11366848647594452, + -0.12206073850393295, + -0.034010011702775955, + -0.09009241312742233, + 0.22580081224441528, + -0.26809030771255493, + -0.26689720153808594, + 0.21384593844413757, + -0.03923049569129944, + 0.09966156631708145, + -0.06466954201459885, + 0.2559144198894501, + -0.20743338763713837, + 0.26174771785736084, + -0.23841150104999542, + -0.15092052519321442, + 0.1879679560661316, + 0.0022389835212379694, + -0.21221059560775757, + 0.12631142139434814, + 0.0627048909664154, + 0.2545890808105469, + 0.06066541001200676 + ], + [ + -0.1390155553817749, + -0.13818761706352234, + -0.10949690639972687, + -0.23039856553077698, + -0.22046932578086853, + -0.019411155954003334, + 0.10379739850759506, + -0.15400195121765137, + 0.18629921972751617, + -0.0514962300658226, + -0.18933731317520142, + 0.15909139811992645, + -0.26704487204551697, + 0.03854190930724144, + -0.10245738923549652, + -0.10832463204860687, + 0.26518547534942627, + -0.18446452915668488, + 0.03322269767522812, + 0.1320880502462387, + 0.19934840500354767, + 0.11950325220823288, + 0.18068638443946838, + -0.11248807609081268, + -0.17573684453964233, + 0.08963867276906967, + 0.20899879932403564, + 0.25504767894744873, + 0.23887458443641663, + -0.1973639875650406, + -0.13637399673461914, + 0.05588216334581375, + 0.24674922227859497, + -0.06664653867483139, + 0.26164695620536804, + -0.13109208643436432, + 0.26178672909736633, + -0.00810946710407734, + 0.023364048451185226, + -0.21397776901721954 + ], + [ + 0.09875167161226273, + 0.23817577958106995, + 0.13078024983406067, + 0.2052568942308426, + 0.2669854760169983, + 0.10569274425506592, + -0.12447187304496765, + 0.026136253029108047, + -0.12911197543144226, + 0.13691005110740662, + -0.12488333880901337, + -0.004112045280635357, + 0.08167173713445663, + -0.11749531328678131, + 0.08943198621273041, + 0.2708214819431305, + -0.10875435918569565, + -0.15426476299762726, + 0.17152039706707, + 0.2670862376689911, + -0.11339204758405685, + -0.23832516372203827, + 0.23736904561519623, + 0.22550630569458008, + 0.09248828142881393, + -0.11291395127773285, + -0.23334486782550812, + 0.0675383135676384, + 0.22201544046401978, + 0.20257070660591125, + -0.026385903358459473, + -0.14197663962841034, + -0.05326645448803902, + 0.07517051696777344, + 0.26955947279930115, + -0.17346277832984924, + -0.1700165569782257, + -0.12313831597566605, + 0.15810883045196533, + -0.09707271307706833 + ], + [ + -0.09238069504499435, + -0.19081760942935944, + 0.07735922932624817, + -0.23871903121471405, + 0.028931831941008568, + 0.2205648571252823, + 0.21962709724903107, + 0.12407086789608002, + 0.2689138650894165, + -0.14207641780376434, + 0.19214390218257904, + 0.10813462734222412, + -0.25954145193099976, + -0.01904827170073986, + 0.06341707706451416, + 0.05811615288257599, + -0.24252036213874817, + 0.2530101239681244, + 0.005113567691296339, + -0.036503866314888, + -0.26424503326416016, + 0.27275344729423523, + 0.02506706863641739, + -0.1123596578836441, + -0.23808181285858154, + 0.0007773858378641307, + 0.03660043701529503, + 0.023996058851480484, + 0.18391063809394836, + 0.21472325921058655, + -0.009393548592925072, + 0.23182672262191772, + 0.1411275863647461, + 0.06655532121658325, + 0.04591365531086922, + 0.10803256928920746, + -0.17246074974536896, + 0.14872616529464722, + 0.038559213280677795, + 0.1925031840801239 + ], + [ + -0.18148991465568542, + 0.12081193178892136, + 0.05374174192547798, + 0.1729581654071808, + -0.12829409539699554, + -0.17785370349884033, + 0.2527864873409271, + -0.18434102833271027, + 0.23760423064231873, + -0.13261763751506805, + 0.2260705679655075, + 0.1343986541032791, + 0.054253414273262024, + 0.1508047878742218, + 0.12182330340147018, + 0.19643062353134155, + 0.04367643594741821, + 0.20951808989048004, + 0.036187030375003815, + -0.07745514065027237, + -0.03103647381067276, + 0.20612454414367676, + 0.05166605859994888, + -0.14629098773002625, + 0.0026404738891869783, + 0.08200033009052277, + 0.15613584220409393, + -0.14395815134048462, + 0.012864639051258564, + -0.21568770706653595, + -0.1556437909603119, + -0.00756984855979681, + 0.08578752726316452, + -0.10721532255411148, + 0.15736669301986694, + 0.13342614471912384, + 0.17331619560718536, + 0.09752535820007324, + -0.17556129395961761, + 0.19285045564174652 + ], + [ + -0.04971521347761154, + -0.11029169708490372, + -0.2253650575876236, + 0.09686079621315002, + 0.15731169283390045, + 0.22863687574863434, + -0.16918225586414337, + 0.13544155657291412, + -0.04989839717745781, + 0.08274735510349274, + -0.08143268525600433, + 0.10567498207092285, + -0.13660098612308502, + -0.1001979261636734, + -0.09495139867067337, + 0.1909729391336441, + 0.060694172978401184, + -0.04913721978664398, + 0.15239059925079346, + -0.16915559768676758, + -0.24273541569709778, + 0.23788930475711823, + 0.06185509264469147, + 0.2737584710121155, + 0.21743467450141907, + -0.2347860485315323, + -0.26267433166503906, + -0.09612317383289337, + 0.11000750958919525, + -0.06390021741390228, + 0.2350226193666458, + -0.07299385964870453, + -0.21880702674388885, + 0.058207105845212936, + 0.20681169629096985, + -0.2113189995288849, + -0.09973326325416565, + 0.026551226153969765, + 0.22284623980522156, + 0.2000834196805954 + ], + [ + -0.23862946033477783, + -0.023741789162158966, + -0.22110047936439514, + -0.08623424917459488, + -0.027297532185912132, + -0.13475839793682098, + 0.05706401169300079, + 0.21629971265792847, + 0.22765861451625824, + -0.2497250735759735, + 0.08204600214958191, + 0.13923192024230957, + -0.17721343040466309, + -0.05852118507027626, + -0.2297343909740448, + -0.027636617422103882, + 0.13776767253875732, + 0.04144718125462532, + 0.17488475143909454, + -0.019581817090511322, + -0.04400254413485527, + -0.18735410273075104, + -0.02322288416326046, + -0.11068009585142136, + 0.16326646506786346, + -0.03633668273687363, + -0.07512378692626953, + 0.12681496143341064, + 0.16040679812431335, + 0.21220828592777252, + 0.117737777531147, + 0.10560417175292969, + -0.20192775130271912, + 0.09589308500289917, + -0.08858303725719452, + -0.027778778225183487, + 0.25021523237228394, + -0.08380607515573502, + 0.1892021745443344, + -0.07599682360887527 + ], + [ + 0.058119744062423706, + 0.10062768310308456, + -0.10317539423704147, + 0.06296149641275406, + -0.02837797813117504, + -0.1638452261686325, + -0.020259957760572433, + -0.17452381551265717, + -0.08191604912281036, + -0.025728395208716393, + -0.03964204341173172, + 0.033110033720731735, + -0.07200480997562408, + -0.23292763531208038, + 0.16697469353675842, + -0.2197219729423523, + -0.2220652550458908, + 0.2182207703590393, + 0.1100478544831276, + -0.10515906661748886, + 0.12696024775505066, + -0.07123803347349167, + -0.08257617056369781, + 0.12285520136356354, + 0.1081000491976738, + 0.1251242458820343, + -0.1655389964580536, + 0.18251264095306396, + 0.26864132285118103, + -0.20161254703998566, + -0.1516120433807373, + -0.16453732550144196, + -0.12497730553150177, + 0.021686753258109093, + 0.16365371644496918, + 0.039451416581869125, + 0.23743991553783417, + 0.017798714339733124, + 0.025912001729011536, + 0.11739097535610199 + ], + [ + -0.24582573771476746, + -0.20446963608264923, + 0.1405278593301773, + -0.027178337797522545, + 0.23325897753238678, + 0.23788999021053314, + 0.04004532843828201, + -0.2123832255601883, + -0.149732768535614, + -0.12848733365535736, + -0.09457971900701523, + 0.247767835855484, + 0.028545554727315903, + -0.18160344660282135, + -0.026770221069455147, + -0.15597015619277954, + -0.0763777643442154, + 0.19130824506282806, + 0.02589825727045536, + 0.2636871337890625, + 0.0015124613419175148, + 0.07153744995594025, + -0.037734389305114746, + -0.06028290465474129, + 0.07820069789886475, + 0.2343473583459854, + -0.20650362968444824, + 0.2620554566383362, + -0.16528086364269257, + 0.16189906001091003, + 0.2444600611925125, + 0.24288442730903625, + -0.11182790994644165, + 0.23294314742088318, + -0.03564649820327759, + 0.160216823220253, + -0.16498062014579773, + 0.20354652404785156, + 0.012435202486813068, + -0.18550655245780945 + ], + [ + 0.025386681780219078, + 0.13508808612823486, + 0.052342694252729416, + 0.24645155668258667, + 0.07275452464818954, + -0.20073819160461426, + 0.02382325939834118, + -0.02854669652879238, + 0.07693488150835037, + -0.18815074861049652, + -0.10367193818092346, + 0.25518959760665894, + -0.05642591416835785, + -0.012326031923294067, + -0.12703120708465576, + -0.12662477791309357, + 0.008236627094447613, + 0.19642458856105804, + -0.2600991129875183, + 0.2616615891456604, + 0.08811978250741959, + -0.1392967849969864, + 0.20760828256607056, + 0.00010665712761692703, + 0.05426882579922676, + -0.23861315846443176, + -0.2161395400762558, + 0.1946164220571518, + -0.02397245354950428, + 0.12592151761054993, + -0.0686875656247139, + 0.15708276629447937, + 0.05778086930513382, + 0.19923290610313416, + -0.11771008372306824, + 0.028966665267944336, + -0.08780711889266968, + -0.24831296503543854, + -0.10959508270025253, + -0.2721446752548218 + ], + [ + -0.010387904942035675, + 0.02742603048682213, + 0.2350398600101471, + -0.15609246492385864, + -0.01343065220862627, + 0.22623445093631744, + 0.11369215697050095, + -0.25949162244796753, + 0.15253490209579468, + 0.24170346558094025, + -0.11508730053901672, + 0.08148359507322311, + -0.0066216872073709965, + -0.22764943540096283, + 0.11036727577447891, + 0.19305115938186646, + -0.15688420832157135, + -0.2629188001155853, + 0.1311204582452774, + 0.20822207629680634, + 0.19303856790065765, + 0.0979660227894783, + 0.15971481800079346, + 0.10553421080112457, + 0.2653781771659851, + 0.08983344584703445, + 0.08634304255247116, + -0.11689493805170059, + 0.004267427604645491, + -0.1963551938533783, + -0.09376221150159836, + -0.11637041717767715, + -0.17245036363601685, + -0.0670960322022438, + -0.10167516022920609, + 0.19380709528923035, + 0.2091997116804123, + -0.07919654250144958, + -0.029643548652529716, + -0.019242502748966217 + ], + [ + 0.15020950138568878, + -0.01747462898492813, + -0.05745108798146248, + 0.21478436887264252, + -0.1315833479166031, + 0.1608791947364807, + 0.22707736492156982, + -0.17587247490882874, + 0.14957119524478912, + -0.23426739871501923, + 0.04669492691755295, + -0.26979151368141174, + 0.18176715075969696, + 0.10483070462942123, + 0.049282416701316833, + 0.03191640228033066, + 0.13434135913848877, + -0.1102510318160057, + -0.25327134132385254, + -0.1590372771024704, + 0.22416242957115173, + -0.026615556329488754, + -0.11280082911252975, + 0.031357619911432266, + 0.26629334688186646, + 0.06967165321111679, + 0.21169902384281158, + 0.27211156487464905, + 0.26642051339149475, + -0.2698020040988922, + 0.1882237195968628, + -0.12385931611061096, + 0.08877523243427277, + 0.17132866382598877, + -0.1989602893590927, + 0.23641997575759888, + -0.23440030217170715, + 0.1292775422334671, + -0.02672572247684002, + 0.21344630420207977 + ], + [ + 0.04243742302060127, + -0.13038799166679382, + 0.034666407853364944, + 0.14873285591602325, + -0.09848371893167496, + -0.05746868625283241, + -0.19976118206977844, + -0.019729316234588623, + 0.21925143897533417, + 0.08832966536283493, + 0.02994084544479847, + -0.031811654567718506, + 0.12111205607652664, + -0.17920590937137604, + 0.005474347621202469, + -0.19852498173713684, + -0.2113359570503235, + 0.23827411234378815, + -0.037451766431331635, + -0.23058205842971802, + 0.09065418690443039, + 0.09679380804300308, + -0.23932738602161407, + 0.0727965384721756, + -0.2323746383190155, + -0.1577540934085846, + 0.12599630653858185, + -0.13320676982402802, + 0.20796836912631989, + 0.1597203016281128, + -0.05352410301566124, + -0.2651612162590027, + -0.26934197545051575, + -0.26890552043914795, + -0.07791241258382797, + 0.050364527851343155, + -0.14589910209178925, + 0.2726397216320038, + 0.1382700800895691, + 0.042895425111055374 + ], + [ + 0.007023797836154699, + 0.17919129133224487, + -0.13639122247695923, + -0.034999437630176544, + 0.13327951729297638, + 0.12714293599128723, + -0.06270165741443634, + 0.240922749042511, + 0.05699711665511131, + 0.10538416355848312, + 0.02858453430235386, + -0.18960869312286377, + -0.04383883625268936, + 0.05575255677103996, + -0.07013453543186188, + 0.18866339325904846, + 0.2681806683540344, + -0.13986662030220032, + -0.019262630492448807, + -0.06207684800028801, + -0.049403976649045944, + -0.26883062720298767, + -0.271192729473114, + 0.2641875445842743, + 0.0866464301943779, + -0.10045119374990463, + -0.21010500192642212, + 0.12392699718475342, + -0.13814695179462433, + 0.17781509459018707, + 0.11388140916824341, + -0.1355186253786087, + -0.1309320032596588, + 0.21489602327346802, + -0.1488940715789795, + 0.20246173441410065, + -0.06867259740829468, + -0.15204834938049316, + 0.21241620182991028, + -0.022369137033820152 + ], + [ + -0.18901090323925018, + 0.19012705981731415, + -0.01545583177357912, + -0.23918268084526062, + -0.03995513916015625, + -0.19676272571086884, + -0.1607106477022171, + 0.18876692652702332, + 0.0777154341340065, + 0.08024635165929794, + 0.20943620800971985, + -0.09681065380573273, + 0.24073852598667145, + 0.1750727891921997, + -0.16939552128314972, + 0.10423862189054489, + -0.12317293882369995, + 0.06559158861637115, + 0.1454998403787613, + -0.08931630104780197, + -0.2681041657924652, + 0.19528554379940033, + -0.05244241654872894, + 0.14140290021896362, + 0.1353157013654709, + 0.06142137944698334, + 0.04169604927301407, + 0.2240157127380371, + 0.2620091438293457, + 0.27219051122665405, + 0.008261210285127163, + -0.22597789764404297, + 0.25293031334877014, + -0.049066245555877686, + 0.02405240572988987, + 0.08633703738451004, + 0.17000561952590942, + -0.09647024422883987, + 0.21940703690052032, + 0.27082982659339905 + ], + [ + 0.09191131591796875, + 0.0649779886007309, + -0.13163058459758759, + -0.2683109641075134, + 0.002378450706601143, + -0.2506503462791443, + 0.20164625346660614, + 0.2533280849456787, + 0.24033614993095398, + 0.12502053380012512, + -0.08044714480638504, + -0.05339764431118965, + 0.16621939837932587, + -0.2373645007610321, + 0.1810118705034256, + 0.19977271556854248, + -0.07758168131113052, + -0.17326538264751434, + 0.15771128237247467, + 0.1434255987405777, + -0.14888417720794678, + -0.023914866149425507, + 0.11335347592830658, + -0.20271848142147064, + 0.25898098945617676, + 0.27146661281585693, + 0.09789877384901047, + 0.05483534559607506, + 0.1347857117652893, + 0.10791099071502686, + 0.16686855256557465, + 0.2288292646408081, + -0.024811705574393272, + 0.12698064744472504, + -0.043233469128608704, + -0.24381203949451447, + -0.17980119585990906, + -0.2726667523384094, + 0.043056830763816833, + 0.217466801404953 + ], + [ + -0.24407778680324554, + -0.25952038168907166, + 0.23125097155570984, + -0.07816098630428314, + 0.2617059648036957, + 0.07804816961288452, + -0.05119408294558525, + -0.259684294462204, + -0.21636469662189484, + -0.1951000690460205, + -0.09476960450410843, + -0.22422395646572113, + -0.2239178717136383, + -0.0979776605963707, + 0.0931602194905281, + 0.10940872877836227, + 0.04789825528860092, + 0.03611700236797333, + 0.0871991366147995, + -0.13172350823879242, + -0.11928369104862213, + -0.08836005628108978, + -0.2537994980812073, + -0.1935189664363861, + -0.1608610600233078, + 0.01920931786298752, + 0.22404778003692627, + 0.17784346640110016, + 0.08533262461423874, + -0.1559295952320099, + -0.017139216884970665, + 0.20249328017234802, + 0.12310840934515, + -0.019955459982156754, + -0.19374558329582214, + -0.26813045144081116, + -0.06594691425561905, + -0.12911051511764526, + -0.150323286652565, + 0.06551672518253326 + ] + ], + "startTransitions": [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "endTransitions": [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ] +} \ No newline at end of file diff --git a/grobid-home/models/header-BidLSTM_ChainCRF_FEATURES.onnx/encoder.onnx b/grobid-home/models/header-BidLSTM_ChainCRF_FEATURES.onnx/encoder.onnx new file mode 100644 index 0000000000..2b6d4e538f Binary files /dev/null and b/grobid-home/models/header-BidLSTM_ChainCRF_FEATURES.onnx/encoder.onnx differ diff --git a/grobid-home/models/header-BidLSTM_ChainCRF_FEATURES.onnx/vocab.json b/grobid-home/models/header-BidLSTM_ChainCRF_FEATURES.onnx/vocab.json new file mode 100644 index 0000000000..27617ea621 --- /dev/null +++ b/grobid-home/models/header-BidLSTM_ChainCRF_FEATURES.onnx/vocab.json @@ -0,0 +1,545 @@ +{ + "charVocab": { + "<PAD>": 0, + "<UNK>": 1, + "!": 2, + "\"": 3, + "#": 4, + "$": 5, + "%": 6, + "&": 7, + "'": 8, + "(": 9, + ")": 10, + "*": 11, + "+": 12, + ",": 13, + "-": 14, + ".": 15, + "/": 16, + "0": 17, + "1": 18, + "2": 19, + "3": 20, + "4": 21, + "5": 22, + "6": 23, + "7": 24, + "8": 25, + "9": 26, + ":": 27, + ";": 28, + "<": 29, + "=": 30, + ">": 31, + "?": 32, + "@": 33, + "A": 34, + "B": 35, + "C": 36, + "D": 37, + "E": 38, + "F": 39, + "G": 40, + "H": 41, + "I": 42, + "J": 43, + "K": 44, + "L": 45, + "M": 46, + "N": 47, + "O": 48, + "P": 49, + "Q": 50, + "R": 51, + "S": 52, + "T": 53, + "U": 54, + "V": 55, + "W": 56, + "X": 57, + "Y": 58, + "Z": 59, + "[": 60, + "\\": 61, + "]": 62, + "^": 63, + "_": 64, + "`": 65, + "a": 66, + "b": 67, + "c": 68, + "d": 69, + "e": 70, + "f": 71, + "g": 72, + "h": 73, + "i": 74, + "j": 75, + "k": 76, + "l": 77, + "m": 78, + "n": 79, + "o": 80, + "p": 81, + "q": 82, + "r": 83, + "s": 84, + "t": 85, + "u": 86, + "v": 87, + "w": 88, + "x": 89, + "y": 90, + "z": 91, + "{": 92, + "|": 93, + "}": 94, + "~": 95, + "¡": 96, + "¢": 97, + "£": 98, + "¤": 99, + "¥": 100, + "§": 101, + "¨": 102, + "©": 103, + "ª": 104, + "«": 105, + "®": 106, + "¯": 107, + "°": 108, + "±": 109, + "²": 110, + "´": 111, + "µ": 112, + "¶": 113, + "¸": 114, + "»": 115, + "¼": 116, + "À": 117, + "Á": 118, + "Â": 119, + "Ã": 120, + "Å": 121, + "É": 122, + "Ê": 123, + "Ì": 124, + "Î": 125, + "Ó": 126, + "Õ": 127, + "Ö": 128, + "×": 129, + "Ø": 130, + "ß": 131, + "à": 132, + "á": 133, + "â": 134, + "ã": 135, + "ä": 136, + "å": 137, + "ç": 138, + "è": 139, + "é": 140, + "ê": 141, + "ë": 142, + "í": 143, + "î": 144, + "ï": 145, + "ñ": 146, + "ó": 147, + "ô": 148, + "õ": 149, + "ö": 150, + "ø": 151, + "ú": 152, + "û": 153, + "ü": 154, + "þ": 155, + "ÿ": 156, + "Ā": 157, + "ă": 158, + "Ą": 159, + "ć": 160, + "Č": 161, + "č": 162, + "ė": 163, + "ę": 164, + "İ": 165, + "ı": 166, + "Ł": 167, + "ł": 168, + "ń": 169, + "Ō": 170, + "ř": 171, + "ş": 172, + "Š": 173, + "š": 174, + "Ź": 175, + "ź": 176, + "ż": 177, + "Ž": 178, + "ž": 179, + "Ȃ": 180, + "ʈ": 181, + "˙": 182, + "͑": 183, + "͒": 184, + "͓": 185, + "͔": 186, + "͗": 187, + "͘": 188, + "Γ": 189, + "Δ": 190, + "Θ": 191, + "Λ": 192, + "Π": 193, + "Υ": 194, + "Ψ": 195, + "α": 196, + "β": 197, + "γ": 198, + "δ": 199, + "ε": 200, + "ζ": 201, + "η": 202, + "θ": 203, + "κ": 204, + "λ": 205, + "μ": 206, + "ν": 207, + "ξ": 208, + "π": 209, + "ρ": 210, + "σ": 211, + "τ": 212, + "φ": 213, + "χ": 214, + "ψ": 215, + "ω": 216, + "ϕ": 217, + "ϩ": 218, + "Ϫ": 219, + "ϫ": 220, + "ϭ": 221, + "ϳ": 222, + "Ͻ": 223, + "Ͼ": 224, + "Ј": 225, + "Љ": 226, + "Б": 227, + "И": 228, + "Л": 229, + "П": 230, + "Ф": 231, + "Ц": 232, + "б": 233, + "в": 234, + "г": 235, + "д": 236, + "з": 237, + "и": 238, + "й": 239, + "к": 240, + "л": 241, + "м": 242, + "н": 243, + "п": 244, + "р": 245, + "с": 246, + "т": 247, + "у": 248, + "ф": 249, + "х": 250, + "ц": 251, + "ч": 252, + "ш": 253, + "щ": 254, + "ы": 255, + "ь": 256, + "э": 257, + "ю": 258, + "я": 259, + "ё": 260, + "Ն": 261, + "؊": 262, + "؍": 263, + "ٞ": 264, + "ܨ": 265, + "ࡆ": 266, + "௦": 267, + "௧": 268, + "ᰔ": 269, + "Ṇ": 270, + "†": 271, + "‡": 272, + "•": 273, + "‫": 274, + "‬": 275, + "′": 276, + "‹": 277, + "⁎": 278, + "ℓ": 279, + "™": 280, + "Ⅲ": 281, + "→": 282, + "↓": 283, + "↵": 284, + "⇑": 285, + "⇤": 286, + "∆": 287, + "∈": 288, + "−": 289, + "√": 290, + "∞": 291, + "∼": 292, + "≃": 293, + "≈": 294, + "≤": 295, + "≥": 296, + "⊙": 297, + "⋅": 298, + "⋆": 299, + "⌬": 300, + "␣": 301, + "␤": 302, + "ⓒ": 303, + "□": 304, + "☯": 305, + "♣": 306, + "♦": 307, + "✉": 308, + "✝": 309, + "⸸": 310, + "ㆍ": 311, + "": 312, + "": 313, + "": 314, + "": 315, + "": 316, + "": 317, + "": 318, + "": 319, + "": 320, + "": 321, + "": 322, + "": 323, + "": 324, + "": 325, + "": 326, + "": 327, + ",": 328, + ";": 329, + "�": 330 + }, + "tagVocab": { + "<PAD>": 0, + "B-<abstract>": 1, + "B-<address>": 2, + "B-<affiliation>": 3, + "B-<author>": 4, + "B-<availability>": 5, + "B-<copyright>": 6, + "B-<date>": 7, + "B-<doctype>": 8, + "B-<editor>": 9, + "B-<email>": 10, + "B-<funding>": 11, + "B-<group>": 12, + "B-<keyword>": 13, + "B-<meeting>": 14, + "B-<pubnum>": 15, + "B-<reference>": 16, + "B-<submission>": 17, + "B-<title>": 18, + "B-<web>": 19, + "I-<abstract>": 20, + "I-<address>": 21, + "I-<affiliation>": 22, + "I-<author>": 23, + "I-<availability>": 24, + "I-<copyright>": 25, + "I-<date>": 26, + "I-<doctype>": 27, + "I-<editor>": 28, + "I-<email>": 29, + "I-<funding>": 30, + "I-<group>": 31, + "I-<keyword>": 32, + "I-<meeting>": 33, + "I-<pubnum>": 34, + "I-<reference>": 35, + "I-<submission>": 36, + "I-<title>": 37, + "I-<web>": 38, + "O": 39 + }, + "tagIndex": { + "0": "<PAD>", + "1": "B-<abstract>", + "2": "B-<address>", + "3": "B-<affiliation>", + "4": "B-<author>", + "5": "B-<availability>", + "6": "B-<copyright>", + "7": "B-<date>", + "8": "B-<doctype>", + "9": "B-<editor>", + "10": "B-<email>", + "11": "B-<funding>", + "12": "B-<group>", + "13": "B-<keyword>", + "14": "B-<meeting>", + "15": "B-<pubnum>", + "16": "B-<reference>", + "17": "B-<submission>", + "18": "B-<title>", + "19": "B-<web>", + "20": "I-<abstract>", + "21": "I-<address>", + "22": "I-<affiliation>", + "23": "I-<author>", + "24": "I-<availability>", + "25": "I-<copyright>", + "26": "I-<date>", + "27": "I-<doctype>", + "28": "I-<editor>", + "29": "I-<email>", + "30": "I-<funding>", + "31": "I-<group>", + "32": "I-<keyword>", + "33": "I-<meeting>", + "34": "I-<pubnum>", + "35": "I-<reference>", + "36": "I-<submission>", + "37": "I-<title>", + "38": "I-<web>", + "39": "O" + }, + "maxCharLength": 30, + "returnChars": false, + "featuresIndices": [ + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30 + ], + "featuresVocabularySize": 12, + "featuresMapToIndex": { + "9": { + "BLOCKEND": 1, + "BLOCKIN": 2, + "BLOCKSTART": 3 + }, + "10": { + "LINEEND": 13, + "LINEIN": 14, + "LINESTART": 15 + }, + "11": { + "ALIGNEDLEFT": 25, + "LINEINDENT": 26 + }, + "12": { + "NEWFONT": 37, + "SAMEFONT": 38 + }, + "13": { + "HIGHERFONT": 49, + "LOWERFONT": 50, + "SAMEFONTSIZE": 51 + }, + "14": { + "0": 61, + "1": 62 + }, + "15": { + "0": 73, + "1": 74 + }, + "16": { + "ALLCAP": 85, + "INITCAP": 86, + "NOCAPS": 87 + }, + "17": { + "ALLDIGIT": 97, + "CONTAINSDIGITS": 98, + "NODIGIT": 99 + }, + "18": { + "0": 109, + "1": 110 + }, + "19": { + "0": 121, + "1": 122 + }, + "20": { + "0": 133, + "1": 134 + }, + "21": { + "0": 145, + "1": 146 + }, + "22": { + "0": 157, + "1": 158 + }, + "23": { + "0": 169, + "1": 170 + }, + "24": { + "0": 181, + "1": 182 + }, + "25": { + "0": 193, + "1": 194 + }, + "26": { + "COMMA": 205, + "DOT": 206, + "ENDBRACKET": 207, + "HYPHEN": 208, + "NOPUNCT": 209, + "OPENBRACKET": 210, + "PUNCT": 211, + "QUOTE": 212 + }, + "27": { + "0": 217, + "1": 218 + }, + "28": { + "0": 229 + }, + "29": { + "0": 241, + "1": 242 + }, + "30": { + "0": 253 + } + } +} \ No newline at end of file diff --git a/grobid-home/models/header-BidLSTM_ChainCRF_FEATURES/config.json b/grobid-home/models/header-BidLSTM_ChainCRF_FEATURES/config.json deleted file mode 100644 index 7215772277..0000000000 --- a/grobid-home/models/header-BidLSTM_ChainCRF_FEATURES/config.json +++ /dev/null @@ -1,148 +0,0 @@ -{ - "model_name": "header-BidLSTM_ChainCRF_FEATURES", - "architecture": "BidLSTM_ChainCRF_FEATURES", - "embeddings_name": "glove-840B", - "char_vocab_size": 339, - "case_vocab_size": 8, - "char_embedding_size": 25, - "num_char_lstm_units": 25, - "max_char_length": 30, - "features_vocabulary_size": 12, - "features_indices": [ - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20, - 21, - 22, - 23, - 24, - 25, - 26, - 27, - 28, - 29, - 30 - ], - "features_embedding_size": 4, - "features_lstm_units": 4, - "max_sequence_length": 3000, - "word_embedding_size": 300, - "num_word_lstm_units": 100, - "case_embedding_size": 5, - "dropout": 0.5, - "recurrent_dropout": 0.5, - "use_crf": true, - "use_chain_crf": true, - "fold_number": 1, - "batch_size": 9, - "transformer_name": null, - "use_ELMo": false, - "features_map_to_index": { - "9": { - "BLOCKEND": 1, - "BLOCKIN": 2, - "BLOCKSTART": 3 - }, - "10": { - "LINEEND": 13, - "LINEIN": 14, - "LINESTART": 15 - }, - "11": { - "ALIGNEDLEFT": 25, - "LINEINDENT": 26 - }, - "12": { - "NEWFONT": 37, - "SAMEFONT": 38 - }, - "13": { - "HIGHERFONT": 49, - "LOWERFONT": 50, - "SAMEFONTSIZE": 51 - }, - "14": { - "0": 61, - "1": 62 - }, - "15": { - "0": 73, - "1": 74 - }, - "16": { - "ALLCAP": 85, - "INITCAP": 86, - "NOCAPS": 87 - }, - "17": { - "ALLDIGIT": 97, - "CONTAINSDIGITS": 98, - "NODIGIT": 99 - }, - "18": { - "0": 109, - "1": 110 - }, - "19": { - "0": 121, - "1": 122 - }, - "20": { - "0": 133, - "1": 134 - }, - "21": { - "0": 145, - "1": 146 - }, - "22": { - "0": 157, - "1": 158 - }, - "23": { - "0": 169, - "1": 170 - }, - "24": { - "0": 181, - "1": 182 - }, - "25": { - "0": 193, - "1": 194 - }, - "26": { - "COMMA": 205, - "DOT": 206, - "ENDBRACKET": 207, - "HYPHEN": 208, - "NOPUNCT": 209, - "OPENBRACKET": 210, - "PUNCT": 211, - "QUOTE": 212 - }, - "27": { - "0": 217, - "1": 218 - }, - "28": { - "0": 229 - }, - "29": { - "0": 241, - "1": 242 - }, - "30": { - "0": 253 - } - } -} \ No newline at end of file diff --git a/grobid-home/models/header-BidLSTM_ChainCRF_FEATURES/model_weights.hdf5 b/grobid-home/models/header-BidLSTM_ChainCRF_FEATURES/model_weights.hdf5 deleted file mode 100644 index a5a4af4fbc..0000000000 Binary files a/grobid-home/models/header-BidLSTM_ChainCRF_FEATURES/model_weights.hdf5 and /dev/null differ diff --git a/grobid-home/models/header-BidLSTM_ChainCRF_FEATURES/preprocessor.json b/grobid-home/models/header-BidLSTM_ChainCRF_FEATURES/preprocessor.json deleted file mode 100644 index 4090e97773..0000000000 --- a/grobid-home/models/header-BidLSTM_ChainCRF_FEATURES/preprocessor.json +++ /dev/null @@ -1,571 +0,0 @@ -{ - "padding": true, - "return_lengths": true, - "return_word_embeddings": true, - "return_casing": false, - "return_features": true, - "return_chars": true, - "return_bert_embeddings": false, - "vocab_char": { - "<PAD>": 0, - "<UNK>": 1, - "!": 2, - "\"": 3, - "#": 4, - "$": 5, - "%": 6, - "&": 7, - "'": 8, - "(": 9, - ")": 10, - "*": 11, - "+": 12, - ",": 13, - "-": 14, - ".": 15, - "/": 16, - "0": 17, - "1": 18, - "2": 19, - "3": 20, - "4": 21, - "5": 22, - "6": 23, - "7": 24, - "8": 25, - "9": 26, - ":": 27, - ";": 28, - "<": 29, - "=": 30, - ">": 31, - "?": 32, - "@": 33, - "A": 34, - "B": 35, - "C": 36, - "D": 37, - "E": 38, - "F": 39, - "G": 40, - "H": 41, - "I": 42, - "J": 43, - "K": 44, - "L": 45, - "M": 46, - "N": 47, - "O": 48, - "P": 49, - "Q": 50, - "R": 51, - "S": 52, - "T": 53, - "U": 54, - "V": 55, - "W": 56, - "X": 57, - "Y": 58, - "Z": 59, - "[": 60, - "\\": 61, - "]": 62, - "^": 63, - "_": 64, - "`": 65, - "a": 66, - "b": 67, - "c": 68, - "d": 69, - "e": 70, - "f": 71, - "g": 72, - "h": 73, - "i": 74, - "j": 75, - "k": 76, - "l": 77, - "m": 78, - "n": 79, - "o": 80, - "p": 81, - "q": 82, - "r": 83, - "s": 84, - "t": 85, - "u": 86, - "v": 87, - "w": 88, - "x": 89, - "y": 90, - "z": 91, - "{": 92, - "|": 93, - "}": 94, - "~": 95, - "\u00a1": 96, - "\u00a2": 97, - "\u00a3": 98, - "\u00a4": 99, - "\u00a5": 100, - "\u00a7": 101, - "\u00a8": 102, - "\u00a9": 103, - "\u00aa": 104, - "\u00ab": 105, - "\u00ae": 106, - "\u00af": 107, - "\u00b0": 108, - "\u00b1": 109, - "\u00b2": 110, - "\u00b4": 111, - "\u00b5": 112, - "\u00b6": 113, - "\u00b8": 114, - "\u00bb": 115, - "\u00bc": 116, - "\u00c0": 117, - "\u00c1": 118, - "\u00c2": 119, - "\u00c3": 120, - "\u00c5": 121, - "\u00c7": 122, - "\u00c9": 123, - "\u00ca": 124, - "\u00cc": 125, - "\u00ce": 126, - "\u00d3": 127, - "\u00d5": 128, - "\u00d6": 129, - "\u00d7": 130, - "\u00d8": 131, - "\u00df": 132, - "\u00e0": 133, - "\u00e1": 134, - "\u00e2": 135, - "\u00e3": 136, - "\u00e4": 137, - "\u00e5": 138, - "\u00e7": 139, - "\u00e8": 140, - "\u00e9": 141, - "\u00ea": 142, - "\u00eb": 143, - "\u00ed": 144, - "\u00ee": 145, - "\u00ef": 146, - "\u00f1": 147, - "\u00f2": 148, - "\u00f3": 149, - "\u00f4": 150, - "\u00f5": 151, - "\u00f6": 152, - "\u00f8": 153, - "\u00fa": 154, - "\u00fb": 155, - "\u00fc": 156, - "\u00fd": 157, - "\u00fe": 158, - "\u00ff": 159, - "\u0100": 160, - "\u0103": 161, - "\u0104": 162, - "\u0107": 163, - "\u010c": 164, - "\u010d": 165, - "\u0117": 166, - "\u0119": 167, - "\u0130": 168, - "\u0131": 169, - "\u0141": 170, - "\u0142": 171, - "\u0144": 172, - "\u014c": 173, - "\u0159": 174, - "\u015f": 175, - "\u0160": 176, - "\u0161": 177, - "\u0179": 178, - "\u017a": 179, - "\u017c": 180, - "\u017d": 181, - "\u017e": 182, - "\u0202": 183, - "\u0288": 184, - "\u02d9": 185, - "\u0351": 186, - "\u0352": 187, - "\u0353": 188, - "\u0354": 189, - "\u0357": 190, - "\u0358": 191, - "\u0393": 192, - "\u0394": 193, - "\u0398": 194, - "\u039b": 195, - "\u039e": 196, - "\u03a0": 197, - "\u03a3": 198, - "\u03a5": 199, - "\u03a8": 200, - "\u03b1": 201, - "\u03b2": 202, - "\u03b3": 203, - "\u03b4": 204, - "\u03b5": 205, - "\u03b6": 206, - "\u03b7": 207, - "\u03b8": 208, - "\u03ba": 209, - "\u03bb": 210, - "\u03bc": 211, - "\u03bd": 212, - "\u03be": 213, - "\u03c0": 214, - "\u03c1": 215, - "\u03c3": 216, - "\u03c4": 217, - "\u03c6": 218, - "\u03c7": 219, - "\u03c8": 220, - "\u03c9": 221, - "\u03d5": 222, - "\u03e9": 223, - "\u03ea": 224, - "\u03eb": 225, - "\u03ed": 226, - "\u03f3": 227, - "\u03fd": 228, - "\u03fe": 229, - "\u0408": 230, - "\u0409": 231, - "\u0411": 232, - "\u0418": 233, - "\u041b": 234, - "\u041f": 235, - "\u0424": 236, - "\u0426": 237, - "\u0431": 238, - "\u0432": 239, - "\u0433": 240, - "\u0434": 241, - "\u0437": 242, - "\u0438": 243, - "\u0439": 244, - "\u043a": 245, - "\u043b": 246, - "\u043c": 247, - "\u043d": 248, - "\u043f": 249, - "\u0440": 250, - "\u0441": 251, - "\u0442": 252, - "\u0443": 253, - "\u0444": 254, - "\u0445": 255, - "\u0446": 256, - "\u0447": 257, - "\u0448": 258, - "\u0449": 259, - "\u044b": 260, - "\u044c": 261, - "\u044d": 262, - "\u044e": 263, - "\u044f": 264, - "\u0451": 265, - "\u0546": 266, - "\u060a": 267, - "\u060d": 268, - "\u065e": 269, - "\u0728": 270, - "\u0846": 271, - "\u0be6": 272, - "\u0be7": 273, - "\u1c14": 274, - "\u1e46": 275, - "\u2020": 276, - "\u2021": 277, - "\u2022": 278, - "\u202b": 279, - "\u202c": 280, - "\u2032": 281, - "\u2039": 282, - "\u204e": 283, - "\u2113": 284, - "\u2122": 285, - "\u2162": 286, - "\u2192": 287, - "\u2193": 288, - "\u21b5": 289, - "\u21d1": 290, - "\u21e4": 291, - "\u2206": 292, - "\u2208": 293, - "\u2212": 294, - "\u2213": 295, - "\u221a": 296, - "\u221e": 297, - "\u223c": 298, - "\u2243": 299, - "\u2248": 300, - "\u2264": 301, - "\u2265": 302, - "\u2299": 303, - "\u22c5": 304, - "\u22c6": 305, - "\u232c": 306, - "\u2423": 307, - "\u2424": 308, - "\u24d2": 309, - "\u25a1": 310, - "\u262f": 311, - "\u2663": 312, - "\u2666": 313, - "\u2709": 314, - "\u271d": 315, - "\u2e38": 316, - "\u318d": 317, - "\uf061": 318, - "\uf067": 319, - "\uf761": 320, - "\uf764": 321, - "\uf765": 322, - "\uf767": 323, - "\uf769": 324, - "\uf76b": 325, - "\uf76c": 326, - "\uf76e": 327, - "\uf76f": 328, - "\uf770": 329, - "\uf772": 330, - "\uf773": 331, - "\uf774": 332, - "\uf777": 333, - "\uf779": 334, - "\uf8e9": 335, - "\uff0c": 336, - "\uff1b": 337, - "\ufffd": 338 - }, - "vocab_tag": { - "<PAD>": 0, - "B-<abstract>": 1, - "B-<address>": 2, - "B-<affiliation>": 3, - "B-<author>": 4, - "B-<availability>": 5, - "B-<copyright>": 6, - "B-<date>": 7, - "B-<doctype>": 8, - "B-<editor>": 9, - "B-<email>": 10, - "B-<funding>": 11, - "B-<group>": 12, - "B-<keyword>": 13, - "B-<meeting>": 14, - "B-<pubnum>": 15, - "B-<reference>": 16, - "B-<submission>": 17, - "B-<title>": 18, - "B-<web>": 19, - "I-<abstract>": 20, - "I-<address>": 21, - "I-<affiliation>": 22, - "I-<author>": 23, - "I-<availability>": 24, - "I-<copyright>": 25, - "I-<date>": 26, - "I-<doctype>": 27, - "I-<editor>": 28, - "I-<email>": 29, - "I-<funding>": 30, - "I-<group>": 31, - "I-<keyword>": 32, - "I-<meeting>": 33, - "I-<pubnum>": 34, - "I-<reference>": 35, - "I-<submission>": 36, - "I-<title>": 37, - "I-<web>": 38, - "O": 39 - }, - "vocab_case": [ - "<PAD>", - "numeric", - "allLower", - "allUpper", - "initialUpper", - "other", - "mainly_numeric", - "contains_digit" - ], - "max_char_length": 30, - "feature_preprocessor": { - "features_vocabulary_size": 12, - "features_indices": [ - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20, - 21, - 22, - 23, - 24, - 25, - 26, - 27, - 28, - 29, - 30 - ], - "features_map_to_index": { - "9": { - "BLOCKEND": 1, - "BLOCKIN": 2, - "BLOCKSTART": 3 - }, - "10": { - "LINEEND": 13, - "LINEIN": 14, - "LINESTART": 15 - }, - "11": { - "ALIGNEDLEFT": 25, - "LINEINDENT": 26 - }, - "12": { - "NEWFONT": 37, - "SAMEFONT": 38 - }, - "13": { - "HIGHERFONT": 49, - "LOWERFONT": 50, - "SAMEFONTSIZE": 51 - }, - "14": { - "0": 61, - "1": 62 - }, - "15": { - "0": 73, - "1": 74 - }, - "16": { - "ALLCAP": 85, - "INITCAP": 86, - "NOCAPS": 87 - }, - "17": { - "ALLDIGIT": 97, - "CONTAINSDIGITS": 98, - "NODIGIT": 99 - }, - "18": { - "0": 109, - "1": 110 - }, - "19": { - "0": 121, - "1": 122 - }, - "20": { - "0": 133, - "1": 134 - }, - "21": { - "0": 145, - "1": 146 - }, - "22": { - "0": 157, - "1": 158 - }, - "23": { - "0": 169, - "1": 170 - }, - "24": { - "0": 181, - "1": 182 - }, - "25": { - "0": 193, - "1": 194 - }, - "26": { - "COMMA": 205, - "DOT": 206, - "ENDBRACKET": 207, - "HYPHEN": 208, - "NOPUNCT": 209, - "OPENBRACKET": 210, - "PUNCT": 211, - "QUOTE": 212 - }, - "27": { - "0": 217, - "1": 218 - }, - "28": { - "0": 229 - }, - "29": { - "0": 241, - "1": 242 - }, - "30": { - "0": 253 - } - } - }, - "indice_tag": { - "0": "<PAD>", - "1": "B-<abstract>", - "2": "B-<address>", - "3": "B-<affiliation>", - "4": "B-<author>", - "5": "B-<availability>", - "6": "B-<copyright>", - "7": "B-<date>", - "8": "B-<doctype>", - "9": "B-<editor>", - "10": "B-<email>", - "11": "B-<funding>", - "12": "B-<group>", - "13": "B-<keyword>", - "14": "B-<meeting>", - "15": "B-<pubnum>", - "16": "B-<reference>", - "17": "B-<submission>", - "18": "B-<title>", - "19": "B-<web>", - "20": "I-<abstract>", - "21": "I-<address>", - "22": "I-<affiliation>", - "23": "I-<author>", - "24": "I-<availability>", - "25": "I-<copyright>", - "26": "I-<date>", - "27": "I-<doctype>", - "28": "I-<editor>", - "29": "I-<email>", - "30": "I-<funding>", - "31": "I-<group>", - "32": "I-<keyword>", - "33": "I-<meeting>", - "34": "I-<pubnum>", - "35": "I-<reference>", - "36": "I-<submission>", - "37": "I-<title>", - "38": "I-<web>", - "39": "O" - } -} \ No newline at end of file diff --git a/grobid-home/models/header-article-light-BidLSTM_CRF_FEATURES/config.json b/grobid-home/models/header-article-light-BidLSTM_CRF_FEATURES/config.json deleted file mode 100644 index ef23fe6f73..0000000000 --- a/grobid-home/models/header-article-light-BidLSTM_CRF_FEATURES/config.json +++ /dev/null @@ -1,148 +0,0 @@ -{ - "model_name": "header-article-light-BidLSTM_CRF_FEATURES", - "architecture": "BidLSTM_CRF_FEATURES", - "embeddings_name": "glove-840B", - "char_vocab_size": 334, - "case_vocab_size": 8, - "char_embedding_size": 25, - "num_char_lstm_units": 25, - "max_char_length": 30, - "features_vocabulary_size": 12, - "features_indices": [ - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20, - 21, - 22, - 23, - 24, - 25, - 26, - 27, - 28, - 29, - 30 - ], - "features_embedding_size": 4, - "features_lstm_units": 4, - "max_sequence_length": 2500, - "word_embedding_size": 300, - "num_word_lstm_units": 100, - "case_embedding_size": 5, - "dropout": 0.5, - "recurrent_dropout": 0.5, - "use_crf": true, - "use_chain_crf": false, - "fold_number": 1, - "batch_size": 9, - "transformer_name": null, - "use_ELMo": false, - "features_map_to_index": { - "9": { - "BLOCKEND": 1, - "BLOCKIN": 2, - "BLOCKSTART": 3 - }, - "10": { - "LINEEND": 13, - "LINEIN": 14, - "LINESTART": 15 - }, - "11": { - "ALIGNEDLEFT": 25, - "LINEINDENT": 26 - }, - "12": { - "NEWFONT": 37, - "SAMEFONT": 38 - }, - "13": { - "HIGHERFONT": 49, - "LOWERFONT": 50, - "SAMEFONTSIZE": 51 - }, - "14": { - "0": 61, - "1": 62 - }, - "15": { - "0": 73, - "1": 74 - }, - "16": { - "ALLCAP": 85, - "INITCAP": 86, - "NOCAPS": 87 - }, - "17": { - "ALLDIGIT": 97, - "CONTAINSDIGITS": 98, - "NODIGIT": 99 - }, - "18": { - "0": 109, - "1": 110 - }, - "19": { - "0": 121, - "1": 122 - }, - "20": { - "0": 133, - "1": 134 - }, - "21": { - "0": 145, - "1": 146 - }, - "22": { - "0": 157, - "1": 158 - }, - "23": { - "0": 169, - "1": 170 - }, - "24": { - "0": 181, - "1": 182 - }, - "25": { - "0": 193, - "1": 194 - }, - "26": { - "COMMA": 205, - "DOT": 206, - "ENDBRACKET": 207, - "HYPHEN": 208, - "NOPUNCT": 209, - "OPENBRACKET": 210, - "PUNCT": 211, - "QUOTE": 212 - }, - "27": { - "0": 217, - "1": 218 - }, - "28": { - "0": 229 - }, - "29": { - "0": 241, - "1": 242 - }, - "30": { - "0": 253 - } - } -} \ No newline at end of file diff --git a/grobid-home/models/header-article-light-BidLSTM_CRF_FEATURES/model_weights.hdf5 b/grobid-home/models/header-article-light-BidLSTM_CRF_FEATURES/model_weights.hdf5 deleted file mode 100644 index c438d7f272..0000000000 Binary files a/grobid-home/models/header-article-light-BidLSTM_CRF_FEATURES/model_weights.hdf5 and /dev/null differ diff --git a/grobid-home/models/header-article-light-BidLSTM_CRF_FEATURES/preprocessor.json b/grobid-home/models/header-article-light-BidLSTM_CRF_FEATURES/preprocessor.json deleted file mode 100644 index 8b28e70aff..0000000000 --- a/grobid-home/models/header-article-light-BidLSTM_CRF_FEATURES/preprocessor.json +++ /dev/null @@ -1,498 +0,0 @@ -{ - "padding": true, - "return_lengths": true, - "return_word_embeddings": true, - "return_casing": false, - "return_features": true, - "return_chars": true, - "return_bert_embeddings": false, - "vocab_char": { - "<PAD>": 0, - "<UNK>": 1, - "!": 2, - "\"": 3, - "#": 4, - "$": 5, - "%": 6, - "&": 7, - "'": 8, - "(": 9, - ")": 10, - "*": 11, - "+": 12, - ",": 13, - "-": 14, - ".": 15, - "/": 16, - "0": 17, - "1": 18, - "2": 19, - "3": 20, - "4": 21, - "5": 22, - "6": 23, - "7": 24, - "8": 25, - "9": 26, - ":": 27, - ";": 28, - "<": 29, - "=": 30, - ">": 31, - "?": 32, - "@": 33, - "A": 34, - "B": 35, - "C": 36, - "D": 37, - "E": 38, - "F": 39, - "G": 40, - "H": 41, - "I": 42, - "J": 43, - "K": 44, - "L": 45, - "M": 46, - "N": 47, - "O": 48, - "P": 49, - "Q": 50, - "R": 51, - "S": 52, - "T": 53, - "U": 54, - "V": 55, - "W": 56, - "X": 57, - "Y": 58, - "Z": 59, - "[": 60, - "\\": 61, - "]": 62, - "^": 63, - "_": 64, - "`": 65, - "a": 66, - "b": 67, - "c": 68, - "d": 69, - "e": 70, - "f": 71, - "g": 72, - "h": 73, - "i": 74, - "j": 75, - "k": 76, - "l": 77, - "m": 78, - "n": 79, - "o": 80, - "p": 81, - "q": 82, - "r": 83, - "s": 84, - "t": 85, - "u": 86, - "v": 87, - "w": 88, - "x": 89, - "y": 90, - "z": 91, - "{": 92, - "|": 93, - "}": 94, - "~": 95, - "\u00a1": 96, - "\u00a2": 97, - "\u00a3": 98, - "\u00a4": 99, - "\u00a5": 100, - "\u00a7": 101, - "\u00a8": 102, - "\u00a9": 103, - "\u00aa": 104, - "\u00ab": 105, - "\u00ae": 106, - "\u00af": 107, - "\u00b0": 108, - "\u00b1": 109, - "\u00b2": 110, - "\u00b4": 111, - "\u00b5": 112, - "\u00b6": 113, - "\u00b8": 114, - "\u00bb": 115, - "\u00bc": 116, - "\u00c0": 117, - "\u00c1": 118, - "\u00c2": 119, - "\u00c3": 120, - "\u00c5": 121, - "\u00c7": 122, - "\u00c9": 123, - "\u00ca": 124, - "\u00cc": 125, - "\u00ce": 126, - "\u00d3": 127, - "\u00d5": 128, - "\u00d6": 129, - "\u00d7": 130, - "\u00d8": 131, - "\u00df": 132, - "\u00e0": 133, - "\u00e1": 134, - "\u00e2": 135, - "\u00e3": 136, - "\u00e4": 137, - "\u00e5": 138, - "\u00e7": 139, - "\u00e8": 140, - "\u00e9": 141, - "\u00ea": 142, - "\u00eb": 143, - "\u00ed": 144, - "\u00ee": 145, - "\u00ef": 146, - "\u00f1": 147, - "\u00f2": 148, - "\u00f3": 149, - "\u00f4": 150, - "\u00f5": 151, - "\u00f6": 152, - "\u00f8": 153, - "\u00fa": 154, - "\u00fb": 155, - "\u00fc": 156, - "\u00fd": 157, - "\u00fe": 158, - "\u00ff": 159, - "\u0100": 160, - "\u0103": 161, - "\u0104": 162, - "\u0107": 163, - "\u010d": 164, - "\u0117": 165, - "\u0119": 166, - "\u0130": 167, - "\u0131": 168, - "\u0141": 169, - "\u0142": 170, - "\u0144": 171, - "\u014c": 172, - "\u0159": 173, - "\u015f": 174, - "\u0160": 175, - "\u0161": 176, - "\u0179": 177, - "\u017a": 178, - "\u017c": 179, - "\u017d": 180, - "\u017e": 181, - "\u0202": 182, - "\u0288": 183, - "\u02d9": 184, - "\u0351": 185, - "\u0352": 186, - "\u0353": 187, - "\u0354": 188, - "\u0393": 189, - "\u0394": 190, - "\u0398": 191, - "\u039b": 192, - "\u039e": 193, - "\u03a0": 194, - "\u03a3": 195, - "\u03a5": 196, - "\u03a8": 197, - "\u03b1": 198, - "\u03b2": 199, - "\u03b3": 200, - "\u03b4": 201, - "\u03b5": 202, - "\u03b6": 203, - "\u03b7": 204, - "\u03b8": 205, - "\u03ba": 206, - "\u03bb": 207, - "\u03bc": 208, - "\u03bd": 209, - "\u03be": 210, - "\u03c0": 211, - "\u03c1": 212, - "\u03c3": 213, - "\u03c4": 214, - "\u03c6": 215, - "\u03c7": 216, - "\u03c8": 217, - "\u03c9": 218, - "\u03d5": 219, - "\u03e9": 220, - "\u03ea": 221, - "\u03eb": 222, - "\u03ed": 223, - "\u03f3": 224, - "\u03fd": 225, - "\u03fe": 226, - "\u0408": 227, - "\u0409": 228, - "\u0411": 229, - "\u0418": 230, - "\u041b": 231, - "\u041f": 232, - "\u0424": 233, - "\u0426": 234, - "\u0431": 235, - "\u0432": 236, - "\u0433": 237, - "\u0434": 238, - "\u0437": 239, - "\u0438": 240, - "\u0439": 241, - "\u043a": 242, - "\u043b": 243, - "\u043c": 244, - "\u043d": 245, - "\u043f": 246, - "\u0440": 247, - "\u0441": 248, - "\u0442": 249, - "\u0443": 250, - "\u0444": 251, - "\u0445": 252, - "\u0446": 253, - "\u0447": 254, - "\u0448": 255, - "\u0449": 256, - "\u044b": 257, - "\u044c": 258, - "\u044d": 259, - "\u044e": 260, - "\u044f": 261, - "\u0451": 262, - "\u0546": 263, - "\u060a": 264, - "\u060d": 265, - "\u065e": 266, - "\u0728": 267, - "\u0846": 268, - "\u0be6": 269, - "\u0be7": 270, - "\u1c14": 271, - "\u1e46": 272, - "\u2020": 273, - "\u2021": 274, - "\u2022": 275, - "\u202b": 276, - "\u202c": 277, - "\u2032": 278, - "\u2039": 279, - "\u204e": 280, - "\u2113": 281, - "\u2122": 282, - "\u2192": 283, - "\u2193": 284, - "\u21b5": 285, - "\u21d1": 286, - "\u21e4": 287, - "\u2206": 288, - "\u2208": 289, - "\u2212": 290, - "\u2213": 291, - "\u221a": 292, - "\u221e": 293, - "\u223c": 294, - "\u2243": 295, - "\u2248": 296, - "\u2264": 297, - "\u2265": 298, - "\u2299": 299, - "\u22c5": 300, - "\u22c6": 301, - "\u232c": 302, - "\u2423": 303, - "\u2424": 304, - "\u24d2": 305, - "\u25a1": 306, - "\u262f": 307, - "\u2663": 308, - "\u2666": 309, - "\u2709": 310, - "\u271d": 311, - "\u2e38": 312, - "\u318d": 313, - "\uf061": 314, - "\uf067": 315, - "\uf761": 316, - "\uf764": 317, - "\uf765": 318, - "\uf767": 319, - "\uf769": 320, - "\uf76b": 321, - "\uf76c": 322, - "\uf76e": 323, - "\uf76f": 324, - "\uf770": 325, - "\uf772": 326, - "\uf773": 327, - "\uf774": 328, - "\uf777": 329, - "\uf779": 330, - "\uf8e9": 331, - "\uff1b": 332, - "\ufffd": 333 - }, - "vocab_tag": { - "<PAD>": 0, - "B-<author>": 1, - "B-<title>": 2, - "I-<author>": 3, - "I-<title>": 4, - "O": 5 - }, - "vocab_case": [ - "<PAD>", - "numeric", - "allLower", - "allUpper", - "initialUpper", - "other", - "mainly_numeric", - "contains_digit" - ], - "max_char_length": 30, - "feature_preprocessor": { - "features_vocabulary_size": 12, - "features_indices": [ - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20, - 21, - 22, - 23, - 24, - 25, - 26, - 27, - 28, - 29, - 30 - ], - "features_map_to_index": { - "9": { - "BLOCKEND": 1, - "BLOCKIN": 2, - "BLOCKSTART": 3 - }, - "10": { - "LINEEND": 13, - "LINEIN": 14, - "LINESTART": 15 - }, - "11": { - "ALIGNEDLEFT": 25, - "LINEINDENT": 26 - }, - "12": { - "NEWFONT": 37, - "SAMEFONT": 38 - }, - "13": { - "HIGHERFONT": 49, - "LOWERFONT": 50, - "SAMEFONTSIZE": 51 - }, - "14": { - "0": 61, - "1": 62 - }, - "15": { - "0": 73, - "1": 74 - }, - "16": { - "ALLCAP": 85, - "INITCAP": 86, - "NOCAPS": 87 - }, - "17": { - "ALLDIGIT": 97, - "CONTAINSDIGITS": 98, - "NODIGIT": 99 - }, - "18": { - "0": 109, - "1": 110 - }, - "19": { - "0": 121, - "1": 122 - }, - "20": { - "0": 133, - "1": 134 - }, - "21": { - "0": 145, - "1": 146 - }, - "22": { - "0": 157, - "1": 158 - }, - "23": { - "0": 169, - "1": 170 - }, - "24": { - "0": 181, - "1": 182 - }, - "25": { - "0": 193, - "1": 194 - }, - "26": { - "COMMA": 205, - "DOT": 206, - "ENDBRACKET": 207, - "HYPHEN": 208, - "NOPUNCT": 209, - "OPENBRACKET": 210, - "PUNCT": 211, - "QUOTE": 212 - }, - "27": { - "0": 217, - "1": 218 - }, - "28": { - "0": 229 - }, - "29": { - "0": 241, - "1": 242 - }, - "30": { - "0": 253 - } - } - }, - "indice_tag": { - "0": "<PAD>", - "1": "B-<author>", - "2": "B-<title>", - "3": "I-<author>", - "4": "I-<title>", - "5": "O" - } -} \ No newline at end of file diff --git a/grobid-home/models/header-article-light-BidLSTM_ChainCRF_FEATURES/config.json b/grobid-home/models/header-article-light-BidLSTM_ChainCRF_FEATURES/config.json deleted file mode 100644 index 8370affa96..0000000000 --- a/grobid-home/models/header-article-light-BidLSTM_ChainCRF_FEATURES/config.json +++ /dev/null @@ -1,148 +0,0 @@ -{ - "model_name": "header-article-light-BidLSTM_ChainCRF_FEATURES", - "architecture": "BidLSTM_ChainCRF_FEATURES", - "embeddings_name": "glove-840B", - "char_vocab_size": 334, - "case_vocab_size": 8, - "char_embedding_size": 25, - "num_char_lstm_units": 25, - "max_char_length": 30, - "features_vocabulary_size": 12, - "features_indices": [ - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20, - 21, - 22, - 23, - 24, - 25, - 26, - 27, - 28, - 29, - 30 - ], - "features_embedding_size": 4, - "features_lstm_units": 4, - "max_sequence_length": 2500, - "word_embedding_size": 300, - "num_word_lstm_units": 100, - "case_embedding_size": 5, - "dropout": 0.5, - "recurrent_dropout": 0.5, - "use_crf": true, - "use_chain_crf": true, - "fold_number": 1, - "batch_size": 9, - "transformer_name": null, - "use_ELMo": false, - "features_map_to_index": { - "9": { - "BLOCKEND": 1, - "BLOCKIN": 2, - "BLOCKSTART": 3 - }, - "10": { - "LINEEND": 13, - "LINEIN": 14, - "LINESTART": 15 - }, - "11": { - "ALIGNEDLEFT": 25, - "LINEINDENT": 26 - }, - "12": { - "NEWFONT": 37, - "SAMEFONT": 38 - }, - "13": { - "HIGHERFONT": 49, - "LOWERFONT": 50, - "SAMEFONTSIZE": 51 - }, - "14": { - "0": 61, - "1": 62 - }, - "15": { - "0": 73, - "1": 74 - }, - "16": { - "ALLCAP": 85, - "INITCAP": 86, - "NOCAPS": 87 - }, - "17": { - "ALLDIGIT": 97, - "CONTAINSDIGITS": 98, - "NODIGIT": 99 - }, - "18": { - "0": 109, - "1": 110 - }, - "19": { - "0": 121, - "1": 122 - }, - "20": { - "0": 133, - "1": 134 - }, - "21": { - "0": 145, - "1": 146 - }, - "22": { - "0": 157, - "1": 158 - }, - "23": { - "0": 169, - "1": 170 - }, - "24": { - "0": 181, - "1": 182 - }, - "25": { - "0": 193, - "1": 194 - }, - "26": { - "COMMA": 205, - "DOT": 206, - "ENDBRACKET": 207, - "HYPHEN": 208, - "NOPUNCT": 209, - "OPENBRACKET": 210, - "PUNCT": 211, - "QUOTE": 212 - }, - "27": { - "0": 217, - "1": 218 - }, - "28": { - "0": 229 - }, - "29": { - "0": 241, - "1": 242 - }, - "30": { - "0": 253 - } - } -} \ No newline at end of file diff --git a/grobid-home/models/header-article-light-BidLSTM_ChainCRF_FEATURES/model_weights.hdf5 b/grobid-home/models/header-article-light-BidLSTM_ChainCRF_FEATURES/model_weights.hdf5 deleted file mode 100644 index d9e2a5b251..0000000000 Binary files a/grobid-home/models/header-article-light-BidLSTM_ChainCRF_FEATURES/model_weights.hdf5 and /dev/null differ diff --git a/grobid-home/models/header-article-light-BidLSTM_ChainCRF_FEATURES/preprocessor.json b/grobid-home/models/header-article-light-BidLSTM_ChainCRF_FEATURES/preprocessor.json deleted file mode 100644 index 8b28e70aff..0000000000 --- a/grobid-home/models/header-article-light-BidLSTM_ChainCRF_FEATURES/preprocessor.json +++ /dev/null @@ -1,498 +0,0 @@ -{ - "padding": true, - "return_lengths": true, - "return_word_embeddings": true, - "return_casing": false, - "return_features": true, - "return_chars": true, - "return_bert_embeddings": false, - "vocab_char": { - "<PAD>": 0, - "<UNK>": 1, - "!": 2, - "\"": 3, - "#": 4, - "$": 5, - "%": 6, - "&": 7, - "'": 8, - "(": 9, - ")": 10, - "*": 11, - "+": 12, - ",": 13, - "-": 14, - ".": 15, - "/": 16, - "0": 17, - "1": 18, - "2": 19, - "3": 20, - "4": 21, - "5": 22, - "6": 23, - "7": 24, - "8": 25, - "9": 26, - ":": 27, - ";": 28, - "<": 29, - "=": 30, - ">": 31, - "?": 32, - "@": 33, - "A": 34, - "B": 35, - "C": 36, - "D": 37, - "E": 38, - "F": 39, - "G": 40, - "H": 41, - "I": 42, - "J": 43, - "K": 44, - "L": 45, - "M": 46, - "N": 47, - "O": 48, - "P": 49, - "Q": 50, - "R": 51, - "S": 52, - "T": 53, - "U": 54, - "V": 55, - "W": 56, - "X": 57, - "Y": 58, - "Z": 59, - "[": 60, - "\\": 61, - "]": 62, - "^": 63, - "_": 64, - "`": 65, - "a": 66, - "b": 67, - "c": 68, - "d": 69, - "e": 70, - "f": 71, - "g": 72, - "h": 73, - "i": 74, - "j": 75, - "k": 76, - "l": 77, - "m": 78, - "n": 79, - "o": 80, - "p": 81, - "q": 82, - "r": 83, - "s": 84, - "t": 85, - "u": 86, - "v": 87, - "w": 88, - "x": 89, - "y": 90, - "z": 91, - "{": 92, - "|": 93, - "}": 94, - "~": 95, - "\u00a1": 96, - "\u00a2": 97, - "\u00a3": 98, - "\u00a4": 99, - "\u00a5": 100, - "\u00a7": 101, - "\u00a8": 102, - "\u00a9": 103, - "\u00aa": 104, - "\u00ab": 105, - "\u00ae": 106, - "\u00af": 107, - "\u00b0": 108, - "\u00b1": 109, - "\u00b2": 110, - "\u00b4": 111, - "\u00b5": 112, - "\u00b6": 113, - "\u00b8": 114, - "\u00bb": 115, - "\u00bc": 116, - "\u00c0": 117, - "\u00c1": 118, - "\u00c2": 119, - "\u00c3": 120, - "\u00c5": 121, - "\u00c7": 122, - "\u00c9": 123, - "\u00ca": 124, - "\u00cc": 125, - "\u00ce": 126, - "\u00d3": 127, - "\u00d5": 128, - "\u00d6": 129, - "\u00d7": 130, - "\u00d8": 131, - "\u00df": 132, - "\u00e0": 133, - "\u00e1": 134, - "\u00e2": 135, - "\u00e3": 136, - "\u00e4": 137, - "\u00e5": 138, - "\u00e7": 139, - "\u00e8": 140, - "\u00e9": 141, - "\u00ea": 142, - "\u00eb": 143, - "\u00ed": 144, - "\u00ee": 145, - "\u00ef": 146, - "\u00f1": 147, - "\u00f2": 148, - "\u00f3": 149, - "\u00f4": 150, - "\u00f5": 151, - "\u00f6": 152, - "\u00f8": 153, - "\u00fa": 154, - "\u00fb": 155, - "\u00fc": 156, - "\u00fd": 157, - "\u00fe": 158, - "\u00ff": 159, - "\u0100": 160, - "\u0103": 161, - "\u0104": 162, - "\u0107": 163, - "\u010d": 164, - "\u0117": 165, - "\u0119": 166, - "\u0130": 167, - "\u0131": 168, - "\u0141": 169, - "\u0142": 170, - "\u0144": 171, - "\u014c": 172, - "\u0159": 173, - "\u015f": 174, - "\u0160": 175, - "\u0161": 176, - "\u0179": 177, - "\u017a": 178, - "\u017c": 179, - "\u017d": 180, - "\u017e": 181, - "\u0202": 182, - "\u0288": 183, - "\u02d9": 184, - "\u0351": 185, - "\u0352": 186, - "\u0353": 187, - "\u0354": 188, - "\u0393": 189, - "\u0394": 190, - "\u0398": 191, - "\u039b": 192, - "\u039e": 193, - "\u03a0": 194, - "\u03a3": 195, - "\u03a5": 196, - "\u03a8": 197, - "\u03b1": 198, - "\u03b2": 199, - "\u03b3": 200, - "\u03b4": 201, - "\u03b5": 202, - "\u03b6": 203, - "\u03b7": 204, - "\u03b8": 205, - "\u03ba": 206, - "\u03bb": 207, - "\u03bc": 208, - "\u03bd": 209, - "\u03be": 210, - "\u03c0": 211, - "\u03c1": 212, - "\u03c3": 213, - "\u03c4": 214, - "\u03c6": 215, - "\u03c7": 216, - "\u03c8": 217, - "\u03c9": 218, - "\u03d5": 219, - "\u03e9": 220, - "\u03ea": 221, - "\u03eb": 222, - "\u03ed": 223, - "\u03f3": 224, - "\u03fd": 225, - "\u03fe": 226, - "\u0408": 227, - "\u0409": 228, - "\u0411": 229, - "\u0418": 230, - "\u041b": 231, - "\u041f": 232, - "\u0424": 233, - "\u0426": 234, - "\u0431": 235, - "\u0432": 236, - "\u0433": 237, - "\u0434": 238, - "\u0437": 239, - "\u0438": 240, - "\u0439": 241, - "\u043a": 242, - "\u043b": 243, - "\u043c": 244, - "\u043d": 245, - "\u043f": 246, - "\u0440": 247, - "\u0441": 248, - "\u0442": 249, - "\u0443": 250, - "\u0444": 251, - "\u0445": 252, - "\u0446": 253, - "\u0447": 254, - "\u0448": 255, - "\u0449": 256, - "\u044b": 257, - "\u044c": 258, - "\u044d": 259, - "\u044e": 260, - "\u044f": 261, - "\u0451": 262, - "\u0546": 263, - "\u060a": 264, - "\u060d": 265, - "\u065e": 266, - "\u0728": 267, - "\u0846": 268, - "\u0be6": 269, - "\u0be7": 270, - "\u1c14": 271, - "\u1e46": 272, - "\u2020": 273, - "\u2021": 274, - "\u2022": 275, - "\u202b": 276, - "\u202c": 277, - "\u2032": 278, - "\u2039": 279, - "\u204e": 280, - "\u2113": 281, - "\u2122": 282, - "\u2192": 283, - "\u2193": 284, - "\u21b5": 285, - "\u21d1": 286, - "\u21e4": 287, - "\u2206": 288, - "\u2208": 289, - "\u2212": 290, - "\u2213": 291, - "\u221a": 292, - "\u221e": 293, - "\u223c": 294, - "\u2243": 295, - "\u2248": 296, - "\u2264": 297, - "\u2265": 298, - "\u2299": 299, - "\u22c5": 300, - "\u22c6": 301, - "\u232c": 302, - "\u2423": 303, - "\u2424": 304, - "\u24d2": 305, - "\u25a1": 306, - "\u262f": 307, - "\u2663": 308, - "\u2666": 309, - "\u2709": 310, - "\u271d": 311, - "\u2e38": 312, - "\u318d": 313, - "\uf061": 314, - "\uf067": 315, - "\uf761": 316, - "\uf764": 317, - "\uf765": 318, - "\uf767": 319, - "\uf769": 320, - "\uf76b": 321, - "\uf76c": 322, - "\uf76e": 323, - "\uf76f": 324, - "\uf770": 325, - "\uf772": 326, - "\uf773": 327, - "\uf774": 328, - "\uf777": 329, - "\uf779": 330, - "\uf8e9": 331, - "\uff1b": 332, - "\ufffd": 333 - }, - "vocab_tag": { - "<PAD>": 0, - "B-<author>": 1, - "B-<title>": 2, - "I-<author>": 3, - "I-<title>": 4, - "O": 5 - }, - "vocab_case": [ - "<PAD>", - "numeric", - "allLower", - "allUpper", - "initialUpper", - "other", - "mainly_numeric", - "contains_digit" - ], - "max_char_length": 30, - "feature_preprocessor": { - "features_vocabulary_size": 12, - "features_indices": [ - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20, - 21, - 22, - 23, - 24, - 25, - 26, - 27, - 28, - 29, - 30 - ], - "features_map_to_index": { - "9": { - "BLOCKEND": 1, - "BLOCKIN": 2, - "BLOCKSTART": 3 - }, - "10": { - "LINEEND": 13, - "LINEIN": 14, - "LINESTART": 15 - }, - "11": { - "ALIGNEDLEFT": 25, - "LINEINDENT": 26 - }, - "12": { - "NEWFONT": 37, - "SAMEFONT": 38 - }, - "13": { - "HIGHERFONT": 49, - "LOWERFONT": 50, - "SAMEFONTSIZE": 51 - }, - "14": { - "0": 61, - "1": 62 - }, - "15": { - "0": 73, - "1": 74 - }, - "16": { - "ALLCAP": 85, - "INITCAP": 86, - "NOCAPS": 87 - }, - "17": { - "ALLDIGIT": 97, - "CONTAINSDIGITS": 98, - "NODIGIT": 99 - }, - "18": { - "0": 109, - "1": 110 - }, - "19": { - "0": 121, - "1": 122 - }, - "20": { - "0": 133, - "1": 134 - }, - "21": { - "0": 145, - "1": 146 - }, - "22": { - "0": 157, - "1": 158 - }, - "23": { - "0": 169, - "1": 170 - }, - "24": { - "0": 181, - "1": 182 - }, - "25": { - "0": 193, - "1": 194 - }, - "26": { - "COMMA": 205, - "DOT": 206, - "ENDBRACKET": 207, - "HYPHEN": 208, - "NOPUNCT": 209, - "OPENBRACKET": 210, - "PUNCT": 211, - "QUOTE": 212 - }, - "27": { - "0": 217, - "1": 218 - }, - "28": { - "0": 229 - }, - "29": { - "0": 241, - "1": 242 - }, - "30": { - "0": 253 - } - } - }, - "indice_tag": { - "0": "<PAD>", - "1": "B-<author>", - "2": "B-<title>", - "3": "I-<author>", - "4": "I-<title>", - "5": "O" - } -} \ No newline at end of file diff --git a/grobid-home/models/header-article-light-ref-BidLSTM_CRF_FEATURES/config.json b/grobid-home/models/header-article-light-ref-BidLSTM_CRF_FEATURES/config.json deleted file mode 100644 index e98d4f1c72..0000000000 --- a/grobid-home/models/header-article-light-ref-BidLSTM_CRF_FEATURES/config.json +++ /dev/null @@ -1,148 +0,0 @@ -{ - "model_name": "header-article-light-ref-BidLSTM_CRF_FEATURES", - "architecture": "BidLSTM_CRF_FEATURES", - "embeddings_name": "glove-840B", - "char_vocab_size": 334, - "case_vocab_size": 8, - "char_embedding_size": 25, - "num_char_lstm_units": 25, - "max_char_length": 30, - "features_vocabulary_size": 12, - "features_indices": [ - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20, - 21, - 22, - 23, - 24, - 25, - 26, - 27, - 28, - 29, - 30 - ], - "features_embedding_size": 4, - "features_lstm_units": 4, - "max_sequence_length": 3000, - "word_embedding_size": 300, - "num_word_lstm_units": 100, - "case_embedding_size": 5, - "dropout": 0.5, - "recurrent_dropout": 0.5, - "use_crf": true, - "use_chain_crf": false, - "fold_number": 1, - "batch_size": 20, - "transformer_name": null, - "use_ELMo": false, - "features_map_to_index": { - "9": { - "BLOCKEND": 1, - "BLOCKIN": 2, - "BLOCKSTART": 3 - }, - "10": { - "LINEEND": 13, - "LINEIN": 14, - "LINESTART": 15 - }, - "11": { - "ALIGNEDLEFT": 25, - "LINEINDENT": 26 - }, - "12": { - "NEWFONT": 37, - "SAMEFONT": 38 - }, - "13": { - "HIGHERFONT": 49, - "LOWERFONT": 50, - "SAMEFONTSIZE": 51 - }, - "14": { - "0": 61, - "1": 62 - }, - "15": { - "0": 73, - "1": 74 - }, - "16": { - "ALLCAP": 85, - "INITCAP": 86, - "NOCAPS": 87 - }, - "17": { - "ALLDIGIT": 97, - "CONTAINSDIGITS": 98, - "NODIGIT": 99 - }, - "18": { - "0": 109, - "1": 110 - }, - "19": { - "0": 121, - "1": 122 - }, - "20": { - "0": 133, - "1": 134 - }, - "21": { - "0": 145, - "1": 146 - }, - "22": { - "0": 157, - "1": 158 - }, - "23": { - "0": 169, - "1": 170 - }, - "24": { - "0": 181, - "1": 182 - }, - "25": { - "0": 193, - "1": 194 - }, - "26": { - "COMMA": 205, - "DOT": 206, - "ENDBRACKET": 207, - "HYPHEN": 208, - "NOPUNCT": 209, - "OPENBRACKET": 210, - "PUNCT": 211, - "QUOTE": 212 - }, - "27": { - "0": 217, - "1": 218 - }, - "28": { - "0": 229 - }, - "29": { - "0": 241, - "1": 242 - }, - "30": { - "0": 253 - } - } -} \ No newline at end of file diff --git a/grobid-home/models/header-article-light-ref-BidLSTM_CRF_FEATURES/model_weights.hdf5 b/grobid-home/models/header-article-light-ref-BidLSTM_CRF_FEATURES/model_weights.hdf5 deleted file mode 100644 index 919d6512b9..0000000000 Binary files a/grobid-home/models/header-article-light-ref-BidLSTM_CRF_FEATURES/model_weights.hdf5 and /dev/null differ diff --git a/grobid-home/models/header-article-light-ref-BidLSTM_CRF_FEATURES/preprocessor.json b/grobid-home/models/header-article-light-ref-BidLSTM_CRF_FEATURES/preprocessor.json deleted file mode 100644 index 3787bd16b7..0000000000 --- a/grobid-home/models/header-article-light-ref-BidLSTM_CRF_FEATURES/preprocessor.json +++ /dev/null @@ -1,506 +0,0 @@ -{ - "padding": true, - "return_lengths": true, - "return_word_embeddings": true, - "return_casing": false, - "return_features": true, - "return_chars": true, - "return_bert_embeddings": false, - "vocab_char": { - "<PAD>": 0, - "<UNK>": 1, - "!": 2, - "\"": 3, - "#": 4, - "$": 5, - "%": 6, - "&": 7, - "'": 8, - "(": 9, - ")": 10, - "*": 11, - "+": 12, - ",": 13, - "-": 14, - ".": 15, - "/": 16, - "0": 17, - "1": 18, - "2": 19, - "3": 20, - "4": 21, - "5": 22, - "6": 23, - "7": 24, - "8": 25, - "9": 26, - ":": 27, - ";": 28, - "<": 29, - "=": 30, - ">": 31, - "?": 32, - "@": 33, - "A": 34, - "B": 35, - "C": 36, - "D": 37, - "E": 38, - "F": 39, - "G": 40, - "H": 41, - "I": 42, - "J": 43, - "K": 44, - "L": 45, - "M": 46, - "N": 47, - "O": 48, - "P": 49, - "Q": 50, - "R": 51, - "S": 52, - "T": 53, - "U": 54, - "V": 55, - "W": 56, - "X": 57, - "Y": 58, - "Z": 59, - "[": 60, - "\\": 61, - "]": 62, - "^": 63, - "_": 64, - "`": 65, - "a": 66, - "b": 67, - "c": 68, - "d": 69, - "e": 70, - "f": 71, - "g": 72, - "h": 73, - "i": 74, - "j": 75, - "k": 76, - "l": 77, - "m": 78, - "n": 79, - "o": 80, - "p": 81, - "q": 82, - "r": 83, - "s": 84, - "t": 85, - "u": 86, - "v": 87, - "w": 88, - "x": 89, - "y": 90, - "z": 91, - "{": 92, - "|": 93, - "}": 94, - "~": 95, - "\u00a1": 96, - "\u00a2": 97, - "\u00a3": 98, - "\u00a4": 99, - "\u00a5": 100, - "\u00a7": 101, - "\u00a8": 102, - "\u00a9": 103, - "\u00aa": 104, - "\u00ab": 105, - "\u00ae": 106, - "\u00af": 107, - "\u00b0": 108, - "\u00b1": 109, - "\u00b2": 110, - "\u00b4": 111, - "\u00b5": 112, - "\u00b6": 113, - "\u00b8": 114, - "\u00bb": 115, - "\u00bc": 116, - "\u00c0": 117, - "\u00c1": 118, - "\u00c2": 119, - "\u00c3": 120, - "\u00c5": 121, - "\u00c7": 122, - "\u00c9": 123, - "\u00ca": 124, - "\u00cc": 125, - "\u00ce": 126, - "\u00d3": 127, - "\u00d5": 128, - "\u00d6": 129, - "\u00d7": 130, - "\u00d8": 131, - "\u00df": 132, - "\u00e0": 133, - "\u00e1": 134, - "\u00e2": 135, - "\u00e3": 136, - "\u00e4": 137, - "\u00e5": 138, - "\u00e7": 139, - "\u00e8": 140, - "\u00e9": 141, - "\u00ea": 142, - "\u00eb": 143, - "\u00ed": 144, - "\u00ee": 145, - "\u00ef": 146, - "\u00f1": 147, - "\u00f2": 148, - "\u00f3": 149, - "\u00f4": 150, - "\u00f5": 151, - "\u00f6": 152, - "\u00f8": 153, - "\u00fa": 154, - "\u00fb": 155, - "\u00fc": 156, - "\u00fd": 157, - "\u00fe": 158, - "\u00ff": 159, - "\u0100": 160, - "\u0103": 161, - "\u0104": 162, - "\u0107": 163, - "\u010d": 164, - "\u0117": 165, - "\u0119": 166, - "\u0130": 167, - "\u0131": 168, - "\u0141": 169, - "\u0142": 170, - "\u0144": 171, - "\u014c": 172, - "\u0159": 173, - "\u015f": 174, - "\u0160": 175, - "\u0161": 176, - "\u0179": 177, - "\u017a": 178, - "\u017c": 179, - "\u017d": 180, - "\u017e": 181, - "\u0202": 182, - "\u0288": 183, - "\u02d9": 184, - "\u0351": 185, - "\u0352": 186, - "\u0353": 187, - "\u0354": 188, - "\u0393": 189, - "\u0394": 190, - "\u0398": 191, - "\u039b": 192, - "\u039e": 193, - "\u03a0": 194, - "\u03a3": 195, - "\u03a5": 196, - "\u03a8": 197, - "\u03b1": 198, - "\u03b2": 199, - "\u03b3": 200, - "\u03b4": 201, - "\u03b5": 202, - "\u03b6": 203, - "\u03b7": 204, - "\u03b8": 205, - "\u03ba": 206, - "\u03bb": 207, - "\u03bc": 208, - "\u03bd": 209, - "\u03be": 210, - "\u03c0": 211, - "\u03c1": 212, - "\u03c3": 213, - "\u03c4": 214, - "\u03c6": 215, - "\u03c7": 216, - "\u03c8": 217, - "\u03c9": 218, - "\u03d5": 219, - "\u03e9": 220, - "\u03ea": 221, - "\u03eb": 222, - "\u03ed": 223, - "\u03f3": 224, - "\u03fd": 225, - "\u03fe": 226, - "\u0408": 227, - "\u0409": 228, - "\u0411": 229, - "\u0418": 230, - "\u041b": 231, - "\u041f": 232, - "\u0424": 233, - "\u0426": 234, - "\u0431": 235, - "\u0432": 236, - "\u0433": 237, - "\u0434": 238, - "\u0437": 239, - "\u0438": 240, - "\u0439": 241, - "\u043a": 242, - "\u043b": 243, - "\u043c": 244, - "\u043d": 245, - "\u043f": 246, - "\u0440": 247, - "\u0441": 248, - "\u0442": 249, - "\u0443": 250, - "\u0444": 251, - "\u0445": 252, - "\u0446": 253, - "\u0447": 254, - "\u0448": 255, - "\u0449": 256, - "\u044b": 257, - "\u044c": 258, - "\u044d": 259, - "\u044e": 260, - "\u044f": 261, - "\u0451": 262, - "\u0546": 263, - "\u060a": 264, - "\u060d": 265, - "\u065e": 266, - "\u0728": 267, - "\u0846": 268, - "\u0be6": 269, - "\u0be7": 270, - "\u1c14": 271, - "\u1e46": 272, - "\u2020": 273, - "\u2021": 274, - "\u2022": 275, - "\u202b": 276, - "\u202c": 277, - "\u2032": 278, - "\u2039": 279, - "\u204e": 280, - "\u2113": 281, - "\u2122": 282, - "\u2192": 283, - "\u2193": 284, - "\u21b5": 285, - "\u21d1": 286, - "\u21e4": 287, - "\u2206": 288, - "\u2208": 289, - "\u2212": 290, - "\u2213": 291, - "\u221a": 292, - "\u221e": 293, - "\u223c": 294, - "\u2243": 295, - "\u2248": 296, - "\u2264": 297, - "\u2265": 298, - "\u2299": 299, - "\u22c5": 300, - "\u22c6": 301, - "\u232c": 302, - "\u2423": 303, - "\u2424": 304, - "\u24d2": 305, - "\u25a1": 306, - "\u262f": 307, - "\u2663": 308, - "\u2666": 309, - "\u2709": 310, - "\u271d": 311, - "\u2e38": 312, - "\u318d": 313, - "\uf061": 314, - "\uf067": 315, - "\uf761": 316, - "\uf764": 317, - "\uf765": 318, - "\uf767": 319, - "\uf769": 320, - "\uf76b": 321, - "\uf76c": 322, - "\uf76e": 323, - "\uf76f": 324, - "\uf770": 325, - "\uf772": 326, - "\uf773": 327, - "\uf774": 328, - "\uf777": 329, - "\uf779": 330, - "\uf8e9": 331, - "\uff1b": 332, - "\ufffd": 333 - }, - "vocab_tag": { - "<PAD>": 0, - "B-<author>": 1, - "B-<date>": 2, - "B-<pubnum>": 3, - "B-<title>": 4, - "I-<author>": 5, - "I-<date>": 6, - "I-<pubnum>": 7, - "I-<title>": 8, - "O": 9 - }, - "vocab_case": [ - "<PAD>", - "numeric", - "allLower", - "allUpper", - "initialUpper", - "other", - "mainly_numeric", - "contains_digit" - ], - "max_char_length": 30, - "feature_preprocessor": { - "features_vocabulary_size": 12, - "features_indices": [ - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20, - 21, - 22, - 23, - 24, - 25, - 26, - 27, - 28, - 29, - 30 - ], - "features_map_to_index": { - "9": { - "BLOCKEND": 1, - "BLOCKIN": 2, - "BLOCKSTART": 3 - }, - "10": { - "LINEEND": 13, - "LINEIN": 14, - "LINESTART": 15 - }, - "11": { - "ALIGNEDLEFT": 25, - "LINEINDENT": 26 - }, - "12": { - "NEWFONT": 37, - "SAMEFONT": 38 - }, - "13": { - "HIGHERFONT": 49, - "LOWERFONT": 50, - "SAMEFONTSIZE": 51 - }, - "14": { - "0": 61, - "1": 62 - }, - "15": { - "0": 73, - "1": 74 - }, - "16": { - "ALLCAP": 85, - "INITCAP": 86, - "NOCAPS": 87 - }, - "17": { - "ALLDIGIT": 97, - "CONTAINSDIGITS": 98, - "NODIGIT": 99 - }, - "18": { - "0": 109, - "1": 110 - }, - "19": { - "0": 121, - "1": 122 - }, - "20": { - "0": 133, - "1": 134 - }, - "21": { - "0": 145, - "1": 146 - }, - "22": { - "0": 157, - "1": 158 - }, - "23": { - "0": 169, - "1": 170 - }, - "24": { - "0": 181, - "1": 182 - }, - "25": { - "0": 193, - "1": 194 - }, - "26": { - "COMMA": 205, - "DOT": 206, - "ENDBRACKET": 207, - "HYPHEN": 208, - "NOPUNCT": 209, - "OPENBRACKET": 210, - "PUNCT": 211, - "QUOTE": 212 - }, - "27": { - "0": 217, - "1": 218 - }, - "28": { - "0": 229 - }, - "29": { - "0": 241, - "1": 242 - }, - "30": { - "0": 253 - } - } - }, - "indice_tag": { - "0": "<PAD>", - "1": "B-<author>", - "2": "B-<date>", - "3": "B-<pubnum>", - "4": "B-<title>", - "5": "I-<author>", - "6": "I-<date>", - "7": "I-<pubnum>", - "8": "I-<title>", - "9": "O" - } -} \ No newline at end of file diff --git a/grobid-home/models/header-article-light-ref-BidLSTM_ChainCRF_FEATURES/config.json b/grobid-home/models/header-article-light-ref-BidLSTM_ChainCRF_FEATURES/config.json deleted file mode 100644 index 04ae3a1e5b..0000000000 --- a/grobid-home/models/header-article-light-ref-BidLSTM_ChainCRF_FEATURES/config.json +++ /dev/null @@ -1,148 +0,0 @@ -{ - "model_name": "header-article-light-ref-BidLSTM_ChainCRF_FEATURES", - "architecture": "BidLSTM_ChainCRF_FEATURES", - "embeddings_name": "glove-840B", - "char_vocab_size": 334, - "case_vocab_size": 8, - "char_embedding_size": 25, - "num_char_lstm_units": 25, - "max_char_length": 30, - "features_vocabulary_size": 12, - "features_indices": [ - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20, - 21, - 22, - 23, - 24, - 25, - 26, - 27, - 28, - 29, - 30 - ], - "features_embedding_size": 4, - "features_lstm_units": 4, - "max_sequence_length": 3000, - "word_embedding_size": 300, - "num_word_lstm_units": 100, - "case_embedding_size": 5, - "dropout": 0.5, - "recurrent_dropout": 0.5, - "use_crf": true, - "use_chain_crf": true, - "fold_number": 1, - "batch_size": 20, - "transformer_name": null, - "use_ELMo": false, - "features_map_to_index": { - "9": { - "BLOCKEND": 1, - "BLOCKIN": 2, - "BLOCKSTART": 3 - }, - "10": { - "LINEEND": 13, - "LINEIN": 14, - "LINESTART": 15 - }, - "11": { - "ALIGNEDLEFT": 25, - "LINEINDENT": 26 - }, - "12": { - "NEWFONT": 37, - "SAMEFONT": 38 - }, - "13": { - "HIGHERFONT": 49, - "LOWERFONT": 50, - "SAMEFONTSIZE": 51 - }, - "14": { - "0": 61, - "1": 62 - }, - "15": { - "0": 73, - "1": 74 - }, - "16": { - "ALLCAP": 85, - "INITCAP": 86, - "NOCAPS": 87 - }, - "17": { - "ALLDIGIT": 97, - "CONTAINSDIGITS": 98, - "NODIGIT": 99 - }, - "18": { - "0": 109, - "1": 110 - }, - "19": { - "0": 121, - "1": 122 - }, - "20": { - "0": 133, - "1": 134 - }, - "21": { - "0": 145, - "1": 146 - }, - "22": { - "0": 157, - "1": 158 - }, - "23": { - "0": 169, - "1": 170 - }, - "24": { - "0": 181, - "1": 182 - }, - "25": { - "0": 193, - "1": 194 - }, - "26": { - "COMMA": 205, - "DOT": 206, - "ENDBRACKET": 207, - "HYPHEN": 208, - "NOPUNCT": 209, - "OPENBRACKET": 210, - "PUNCT": 211, - "QUOTE": 212 - }, - "27": { - "0": 217, - "1": 218 - }, - "28": { - "0": 229 - }, - "29": { - "0": 241, - "1": 242 - }, - "30": { - "0": 253 - } - } -} \ No newline at end of file diff --git a/grobid-home/models/header-article-light-ref-BidLSTM_ChainCRF_FEATURES/model_weights.hdf5 b/grobid-home/models/header-article-light-ref-BidLSTM_ChainCRF_FEATURES/model_weights.hdf5 deleted file mode 100644 index 9cfcff84b5..0000000000 Binary files a/grobid-home/models/header-article-light-ref-BidLSTM_ChainCRF_FEATURES/model_weights.hdf5 and /dev/null differ diff --git a/grobid-home/models/header-article-light-ref-BidLSTM_ChainCRF_FEATURES/preprocessor.json b/grobid-home/models/header-article-light-ref-BidLSTM_ChainCRF_FEATURES/preprocessor.json deleted file mode 100644 index 3787bd16b7..0000000000 --- a/grobid-home/models/header-article-light-ref-BidLSTM_ChainCRF_FEATURES/preprocessor.json +++ /dev/null @@ -1,506 +0,0 @@ -{ - "padding": true, - "return_lengths": true, - "return_word_embeddings": true, - "return_casing": false, - "return_features": true, - "return_chars": true, - "return_bert_embeddings": false, - "vocab_char": { - "<PAD>": 0, - "<UNK>": 1, - "!": 2, - "\"": 3, - "#": 4, - "$": 5, - "%": 6, - "&": 7, - "'": 8, - "(": 9, - ")": 10, - "*": 11, - "+": 12, - ",": 13, - "-": 14, - ".": 15, - "/": 16, - "0": 17, - "1": 18, - "2": 19, - "3": 20, - "4": 21, - "5": 22, - "6": 23, - "7": 24, - "8": 25, - "9": 26, - ":": 27, - ";": 28, - "<": 29, - "=": 30, - ">": 31, - "?": 32, - "@": 33, - "A": 34, - "B": 35, - "C": 36, - "D": 37, - "E": 38, - "F": 39, - "G": 40, - "H": 41, - "I": 42, - "J": 43, - "K": 44, - "L": 45, - "M": 46, - "N": 47, - "O": 48, - "P": 49, - "Q": 50, - "R": 51, - "S": 52, - "T": 53, - "U": 54, - "V": 55, - "W": 56, - "X": 57, - "Y": 58, - "Z": 59, - "[": 60, - "\\": 61, - "]": 62, - "^": 63, - "_": 64, - "`": 65, - "a": 66, - "b": 67, - "c": 68, - "d": 69, - "e": 70, - "f": 71, - "g": 72, - "h": 73, - "i": 74, - "j": 75, - "k": 76, - "l": 77, - "m": 78, - "n": 79, - "o": 80, - "p": 81, - "q": 82, - "r": 83, - "s": 84, - "t": 85, - "u": 86, - "v": 87, - "w": 88, - "x": 89, - "y": 90, - "z": 91, - "{": 92, - "|": 93, - "}": 94, - "~": 95, - "\u00a1": 96, - "\u00a2": 97, - "\u00a3": 98, - "\u00a4": 99, - "\u00a5": 100, - "\u00a7": 101, - "\u00a8": 102, - "\u00a9": 103, - "\u00aa": 104, - "\u00ab": 105, - "\u00ae": 106, - "\u00af": 107, - "\u00b0": 108, - "\u00b1": 109, - "\u00b2": 110, - "\u00b4": 111, - "\u00b5": 112, - "\u00b6": 113, - "\u00b8": 114, - "\u00bb": 115, - "\u00bc": 116, - "\u00c0": 117, - "\u00c1": 118, - "\u00c2": 119, - "\u00c3": 120, - "\u00c5": 121, - "\u00c7": 122, - "\u00c9": 123, - "\u00ca": 124, - "\u00cc": 125, - "\u00ce": 126, - "\u00d3": 127, - "\u00d5": 128, - "\u00d6": 129, - "\u00d7": 130, - "\u00d8": 131, - "\u00df": 132, - "\u00e0": 133, - "\u00e1": 134, - "\u00e2": 135, - "\u00e3": 136, - "\u00e4": 137, - "\u00e5": 138, - "\u00e7": 139, - "\u00e8": 140, - "\u00e9": 141, - "\u00ea": 142, - "\u00eb": 143, - "\u00ed": 144, - "\u00ee": 145, - "\u00ef": 146, - "\u00f1": 147, - "\u00f2": 148, - "\u00f3": 149, - "\u00f4": 150, - "\u00f5": 151, - "\u00f6": 152, - "\u00f8": 153, - "\u00fa": 154, - "\u00fb": 155, - "\u00fc": 156, - "\u00fd": 157, - "\u00fe": 158, - "\u00ff": 159, - "\u0100": 160, - "\u0103": 161, - "\u0104": 162, - "\u0107": 163, - "\u010d": 164, - "\u0117": 165, - "\u0119": 166, - "\u0130": 167, - "\u0131": 168, - "\u0141": 169, - "\u0142": 170, - "\u0144": 171, - "\u014c": 172, - "\u0159": 173, - "\u015f": 174, - "\u0160": 175, - "\u0161": 176, - "\u0179": 177, - "\u017a": 178, - "\u017c": 179, - "\u017d": 180, - "\u017e": 181, - "\u0202": 182, - "\u0288": 183, - "\u02d9": 184, - "\u0351": 185, - "\u0352": 186, - "\u0353": 187, - "\u0354": 188, - "\u0393": 189, - "\u0394": 190, - "\u0398": 191, - "\u039b": 192, - "\u039e": 193, - "\u03a0": 194, - "\u03a3": 195, - "\u03a5": 196, - "\u03a8": 197, - "\u03b1": 198, - "\u03b2": 199, - "\u03b3": 200, - "\u03b4": 201, - "\u03b5": 202, - "\u03b6": 203, - "\u03b7": 204, - "\u03b8": 205, - "\u03ba": 206, - "\u03bb": 207, - "\u03bc": 208, - "\u03bd": 209, - "\u03be": 210, - "\u03c0": 211, - "\u03c1": 212, - "\u03c3": 213, - "\u03c4": 214, - "\u03c6": 215, - "\u03c7": 216, - "\u03c8": 217, - "\u03c9": 218, - "\u03d5": 219, - "\u03e9": 220, - "\u03ea": 221, - "\u03eb": 222, - "\u03ed": 223, - "\u03f3": 224, - "\u03fd": 225, - "\u03fe": 226, - "\u0408": 227, - "\u0409": 228, - "\u0411": 229, - "\u0418": 230, - "\u041b": 231, - "\u041f": 232, - "\u0424": 233, - "\u0426": 234, - "\u0431": 235, - "\u0432": 236, - "\u0433": 237, - "\u0434": 238, - "\u0437": 239, - "\u0438": 240, - "\u0439": 241, - "\u043a": 242, - "\u043b": 243, - "\u043c": 244, - "\u043d": 245, - "\u043f": 246, - "\u0440": 247, - "\u0441": 248, - "\u0442": 249, - "\u0443": 250, - "\u0444": 251, - "\u0445": 252, - "\u0446": 253, - "\u0447": 254, - "\u0448": 255, - "\u0449": 256, - "\u044b": 257, - "\u044c": 258, - "\u044d": 259, - "\u044e": 260, - "\u044f": 261, - "\u0451": 262, - "\u0546": 263, - "\u060a": 264, - "\u060d": 265, - "\u065e": 266, - "\u0728": 267, - "\u0846": 268, - "\u0be6": 269, - "\u0be7": 270, - "\u1c14": 271, - "\u1e46": 272, - "\u2020": 273, - "\u2021": 274, - "\u2022": 275, - "\u202b": 276, - "\u202c": 277, - "\u2032": 278, - "\u2039": 279, - "\u204e": 280, - "\u2113": 281, - "\u2122": 282, - "\u2192": 283, - "\u2193": 284, - "\u21b5": 285, - "\u21d1": 286, - "\u21e4": 287, - "\u2206": 288, - "\u2208": 289, - "\u2212": 290, - "\u2213": 291, - "\u221a": 292, - "\u221e": 293, - "\u223c": 294, - "\u2243": 295, - "\u2248": 296, - "\u2264": 297, - "\u2265": 298, - "\u2299": 299, - "\u22c5": 300, - "\u22c6": 301, - "\u232c": 302, - "\u2423": 303, - "\u2424": 304, - "\u24d2": 305, - "\u25a1": 306, - "\u262f": 307, - "\u2663": 308, - "\u2666": 309, - "\u2709": 310, - "\u271d": 311, - "\u2e38": 312, - "\u318d": 313, - "\uf061": 314, - "\uf067": 315, - "\uf761": 316, - "\uf764": 317, - "\uf765": 318, - "\uf767": 319, - "\uf769": 320, - "\uf76b": 321, - "\uf76c": 322, - "\uf76e": 323, - "\uf76f": 324, - "\uf770": 325, - "\uf772": 326, - "\uf773": 327, - "\uf774": 328, - "\uf777": 329, - "\uf779": 330, - "\uf8e9": 331, - "\uff1b": 332, - "\ufffd": 333 - }, - "vocab_tag": { - "<PAD>": 0, - "B-<author>": 1, - "B-<date>": 2, - "B-<pubnum>": 3, - "B-<title>": 4, - "I-<author>": 5, - "I-<date>": 6, - "I-<pubnum>": 7, - "I-<title>": 8, - "O": 9 - }, - "vocab_case": [ - "<PAD>", - "numeric", - "allLower", - "allUpper", - "initialUpper", - "other", - "mainly_numeric", - "contains_digit" - ], - "max_char_length": 30, - "feature_preprocessor": { - "features_vocabulary_size": 12, - "features_indices": [ - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20, - 21, - 22, - 23, - 24, - 25, - 26, - 27, - 28, - 29, - 30 - ], - "features_map_to_index": { - "9": { - "BLOCKEND": 1, - "BLOCKIN": 2, - "BLOCKSTART": 3 - }, - "10": { - "LINEEND": 13, - "LINEIN": 14, - "LINESTART": 15 - }, - "11": { - "ALIGNEDLEFT": 25, - "LINEINDENT": 26 - }, - "12": { - "NEWFONT": 37, - "SAMEFONT": 38 - }, - "13": { - "HIGHERFONT": 49, - "LOWERFONT": 50, - "SAMEFONTSIZE": 51 - }, - "14": { - "0": 61, - "1": 62 - }, - "15": { - "0": 73, - "1": 74 - }, - "16": { - "ALLCAP": 85, - "INITCAP": 86, - "NOCAPS": 87 - }, - "17": { - "ALLDIGIT": 97, - "CONTAINSDIGITS": 98, - "NODIGIT": 99 - }, - "18": { - "0": 109, - "1": 110 - }, - "19": { - "0": 121, - "1": 122 - }, - "20": { - "0": 133, - "1": 134 - }, - "21": { - "0": 145, - "1": 146 - }, - "22": { - "0": 157, - "1": 158 - }, - "23": { - "0": 169, - "1": 170 - }, - "24": { - "0": 181, - "1": 182 - }, - "25": { - "0": 193, - "1": 194 - }, - "26": { - "COMMA": 205, - "DOT": 206, - "ENDBRACKET": 207, - "HYPHEN": 208, - "NOPUNCT": 209, - "OPENBRACKET": 210, - "PUNCT": 211, - "QUOTE": 212 - }, - "27": { - "0": 217, - "1": 218 - }, - "28": { - "0": 229 - }, - "29": { - "0": 241, - "1": 242 - }, - "30": { - "0": 253 - } - } - }, - "indice_tag": { - "0": "<PAD>", - "1": "B-<author>", - "2": "B-<date>", - "3": "B-<pubnum>", - "4": "B-<title>", - "5": "I-<author>", - "6": "I-<date>", - "7": "I-<pubnum>", - "8": "I-<title>", - "9": "O" - } -} \ No newline at end of file diff --git a/grobid-home/models/header-coi-ac-BidLSTM_CRF_FEATURES.onnx/config.json b/grobid-home/models/header-coi-ac-BidLSTM_CRF_FEATURES.onnx/config.json new file mode 100644 index 0000000000..990606376c --- /dev/null +++ b/grobid-home/models/header-coi-ac-BidLSTM_CRF_FEATURES.onnx/config.json @@ -0,0 +1,37 @@ +{ + "modelName": "grobid-header-BidLSTM_CRF_FEATURES", + "architecture": "BidLSTM_CRF_FEATURES", + "wordEmbeddingSize": 300, + "charEmbeddingSize": 25, + "numCharLstmUnits": 25, + "numWordLstmUnits": 100, + "maxSequenceLength": 3500, + "embeddingsName": "glove-840B", + "featuresIndices": [ + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30 + ], + "featuresEmbeddingSize": 4, + "featuresLstmUnits": 4, + "featuresVocabularySize": 12 +} \ No newline at end of file diff --git a/grobid-home/models/header-coi-ac-BidLSTM_CRF_FEATURES.onnx/crf_params.json b/grobid-home/models/header-coi-ac-BidLSTM_CRF_FEATURES.onnx/crf_params.json new file mode 100644 index 0000000000..a8d1800cd7 --- /dev/null +++ b/grobid-home/models/header-coi-ac-BidLSTM_CRF_FEATURES.onnx/crf_params.json @@ -0,0 +1,2120 @@ +{ + "transitions": [ + [ + 0.10606145858764648, + -1.313651442527771, + -1.4376778602600098, + -1.1701772212982178, + -1.0297372341156006, + -0.7124889492988586, + -0.7189705967903137, + -0.40152859687805176, + -0.3677240312099457, + -0.6122543215751648, + -1.5036227703094482, + -0.6850084662437439, + -0.7905215620994568, + -0.5083444118499756, + -1.1801183223724365, + -0.5940806865692139, + -0.4847988486289978, + -0.9939469695091248, + -0.7119877338409424, + -0.8098775148391724, + -1.0869159698486328, + -0.9758521318435669, + -0.8569740056991577, + -1.3656045198440552, + -1.0145111083984375, + -1.504555344581604, + -1.9268507957458496, + -1.6290020942687988, + -0.8406298756599426, + -1.5827875137329102, + -0.8539354205131531, + -1.7412880659103394, + -1.3313027620315552, + -1.299437165260315, + -1.003744125366211, + -1.4886008501052856, + -1.7328691482543945, + -1.297523856163025, + -1.4334995746612549, + -1.2930277585983276, + -0.47802987694740295, + -1.7083308696746826, + -1.3386878967285156, + -1.6334774494171143 + ], + [ + -1.4747968912124634, + -2.1974685192108154, + -0.17661699652671814, + -0.6699958443641663, + -0.5839681029319763, + -0.5922682881355286, + -0.6361759901046753, + -0.10504089295864105, + -0.3174946904182434, + -0.5399513840675354, + -0.4608932435512543, + -0.11605458706617355, + -0.439577579498291, + -0.49277374148368835, + -0.2795621156692505, + -0.7170296311378479, + -0.1409100890159607, + -0.23377086222171783, + -0.47857666015625, + -0.5570945739746094, + -1.1918773651123047, + -0.14627566933631897, + 1.6517819166183472, + -0.7540834546089172, + -0.6564549803733826, + -0.8760032653808594, + -1.3506734371185303, + -1.3782755136489868, + -0.6317763924598694, + -0.7418060898780823, + -1.023388385772705, + -0.7423036694526672, + -0.35955503582954407, + -0.6867935657501221, + -0.9868190884590149, + -0.5554009675979614, + -1.4234570264816284, + -0.7009080052375793, + -0.7322750091552734, + -1.2438883781433105, + -1.1539502143859863, + -1.4422177076339722, + -0.7737656831741333, + -2.1980810165405273 + ], + [ + -0.8828140497207642, + -0.08790223300457001, + -1.8419908285140991, + -0.018999896943569183, + 0.2072463184595108, + -0.026490623131394386, + -0.14549143612384796, + -0.12294924259185791, + -0.37572041153907776, + 0.12674805521965027, + -0.8118178844451904, + -0.4198340177536011, + 0.2658769488334656, + -0.22733590006828308, + -0.46021154522895813, + -0.48816338181495667, + -0.534018337726593, + -0.5665790438652039, + -0.9023154973983765, + 0.565956175327301, + -0.6082590222358704, + -0.2959911823272705, + -0.22044062614440918, + 1.6459912061691284, + -1.9950631856918335, + -1.21187162399292, + -0.3802725672721863, + -0.6842143535614014, + -0.6948117017745972, + -0.7661070227622986, + -0.7345529794692993, + -0.9399616718292236, + -0.8288425207138062, + -1.2953749895095825, + -0.9703502058982849, + -0.8878897428512573, + -1.2864760160446167, + -1.6539126634597778, + -0.5149934887886047, + -1.1296287775039673, + -1.3222302198410034, + -0.9602451324462891, + -0.7842482328414917, + -0.7869082093238831 + ], + [ + -1.473185658454895, + -0.4837585687637329, + -0.6119325757026672, + -2.473233461380005, + -0.650382399559021, + -0.1858602911233902, + -0.31455859541893005, + -0.17401573061943054, + -0.7210996150970459, + -0.6188898682594299, + -1.0029001235961914, + -0.9111948013305664, + -0.5898763537406921, + -0.9544499516487122, + -1.1141200065612793, + -0.6321624517440796, + -0.604821503162384, + -0.6605418920516968, + -0.8049523234367371, + -1.2440069913864136, + -0.9771444797515869, + -0.5636851191520691, + -0.555396318435669, + -1.6373831033706665, + 1.7467765808105469, + -1.9105567932128906, + -0.8212067484855652, + -1.1460782289505005, + -1.0586837530136108, + -1.3923511505126953, + -0.7167290449142456, + -1.2442115545272827, + -1.864804744720459, + -1.3921315670013428, + -1.5953369140625, + -1.6449387073516846, + -1.2928282022476196, + -1.462823510169983, + -0.6500322818756104, + -0.986577033996582, + -1.4107720851898193, + -1.1852086782455444, + -1.245222568511963, + -2.0384700298309326 + ], + [ + -1.0056567192077637, + -0.7494616508483887, + -0.9623234868049622, + -2.0155460834503174, + -2.551652431488037, + -0.2565722167491913, + -0.36885881423950195, + -0.44464248418807983, + -0.7311455607414246, + -0.6171789765357971, + -1.0439258813858032, + -1.516077995300293, + -1.2336293458938599, + -0.8647197484970093, + -0.7797775268554688, + -0.4738527834415436, + -0.43765702843666077, + -0.500245988368988, + -0.8216310143470764, + -0.6379428505897522, + -1.1383415460586548, + -0.18492618203163147, + -0.816653847694397, + -1.610772967338562, + -2.0346293449401855, + 1.8099653720855713, + -0.8892136812210083, + -1.0557951927185059, + -1.4573643207550049, + -1.1268746852874756, + -0.7904425859451294, + -1.4053696393966675, + -2.118560314178467, + -1.7401057481765747, + -1.3500547409057617, + -1.2587804794311523, + -1.284476637840271, + -1.0986149311065674, + -0.9027242660522461, + -1.0566880702972412, + -1.0930743217468262, + -1.641221046447754, + -0.86891770362854, + -2.4265167713165283 + ], + [ + -0.9155982732772827, + -0.6481753587722778, + -0.12278278172016144, + -0.2200712263584137, + -0.38941314816474915, + -1.0897504091262817, + -0.4827134907245636, + -0.16438713669776917, + -0.4628644585609436, + -0.153703510761261, + -0.48488473892211914, + -0.14258509874343872, + -0.30679377913475037, + -0.8558671474456787, + -0.017545288428664207, + -0.3769710063934326, + -0.04414454847574234, + -0.18619008362293243, + -0.46108776330947876, + -0.2375042587518692, + -0.33581164479255676, + -0.2723478376865387, + -0.9998431205749512, + -0.21488764882087708, + -0.49104708433151245, + -0.5305434465408325, + 1.3660926818847656, + -1.7398338317871094, + -0.9661266207695007, + -1.436803936958313, + -0.498421311378479, + -0.7154809236526489, + -0.3756656348705292, + -0.5950932502746582, + -1.7830023765563965, + -0.18258006870746613, + -1.241003155708313, + -0.4841371178627014, + -0.5596067905426025, + -0.7980955839157104, + -1.0397064685821533, + -0.7515741586685181, + -0.9218160510063171, + -2.0122199058532715 + ], + [ + -0.878972589969635, + -0.8952834606170654, + -0.027583003044128418, + -0.2143869698047638, + -0.30539819598197937, + -0.6129248738288879, + -1.0734484195709229, + -0.21366557478904724, + -0.43543076515197754, + -0.2253861278295517, + -0.5241081714630127, + -0.028774604201316833, + -0.3194110691547394, + -0.6889936923980713, + -0.011561594903469086, + -0.3438284993171692, + -0.021590765565633774, + -0.2260226011276245, + -0.2551673948764801, + -0.5428061485290527, + -0.6078290343284607, + -0.044799886643886566, + -1.1269168853759766, + -0.1339332014322281, + -0.4828794002532959, + -0.48029589653015137, + -1.5482795238494873, + 1.4011200666427612, + -0.9308713674545288, + -1.0308983325958252, + -0.29572027921676636, + -0.8071727156639099, + -0.3305283784866333, + -0.7660056948661804, + -1.556260108947754, + -0.049435194581747055, + -0.9586555361747742, + -0.19802206754684448, + -0.3855893611907959, + -0.5718777775764465, + -1.4157878160476685, + -0.7582136988639832, + -0.4185509979724884, + -1.9440035820007324 + ], + [ + -0.4031279683113098, + -0.37594425678253174, + -0.185463085770607, + -0.33981454372406006, + -0.37627285718917847, + -0.1089535504579544, + -0.16766127943992615, + -0.07994423061609268, + -0.19351042807102203, + -0.27863070368766785, + -0.38509809970855713, + -0.21039941906929016, + -0.27849578857421875, + -0.22860202193260193, + -0.004800476133823395, + -0.11365190893411636, + -0.015742788091301918, + -0.1956218034029007, + -0.25274690985679626, + -0.29801470041275024, + -0.29466712474823, + -0.038269124925136566, + -0.4379207193851471, + -0.3541938364505768, + -0.32859858870506287, + -0.6824396252632141, + -0.819296658039093, + -0.8372656106948853, + 1.0297776460647583, + -0.5784624814987183, + -0.275290310382843, + -0.4537108838558197, + -0.5403257012367249, + -0.5719990134239197, + -1.1687074899673462, + -0.11970873922109604, + -0.8344970345497131, + -0.276283860206604, + -0.5002096891403198, + -0.7415198683738708, + -0.7738289833068848, + -0.6389927268028259, + -0.17142431437969208, + -1.6600284576416016 + ], + [ + -0.9511176347732544, + -0.8078587651252747, + -0.28544095158576965, + -0.9944533705711365, + -0.8281559944152832, + -0.2819159924983978, + -0.20542119443416595, + -0.1404869705438614, + -2.049488067626953, + -1.1573774814605713, + -1.281499981880188, + -0.19805380702018738, + -0.6302734017372131, + -0.499246209859848, + -0.20566816627979279, + -0.5912179350852966, + -0.7832170128822327, + -1.1423399448394775, + -1.552214503288269, + -1.2145733833312988, + -1.2743825912475586, + -0.5250895619392395, + -0.8862490057945251, + -0.6571983695030212, + -0.8801300525665283, + -0.9206610918045044, + -1.075413465499878, + -0.9874422550201416, + -0.6813443303108215, + 1.8297075033187866, + -1.2785831689834595, + -1.5212961435317993, + -0.5698965787887573, + -0.9264691472053528, + -1.205248236656189, + -0.34355393052101135, + -1.011214017868042, + -1.5024492740631104, + -1.4017847776412964, + -1.8053293228149414, + -1.5205121040344238, + -1.2743583917617798, + -1.5182026624679565, + -2.33072829246521 + ], + [ + -1.0972734689712524, + -0.9302043914794922, + -0.31777313351631165, + -0.7518823742866516, + -0.6987303495407104, + -0.18190228939056396, + -0.06595836579799652, + -0.11837780475616455, + -1.18993079662323, + -2.3393425941467285, + -0.8458686470985413, + -0.11454863101243973, + -0.4039982259273529, + -0.16759634017944336, + -0.14628972113132477, + -0.3685365915298462, + -0.7490832805633545, + -1.1729732751846313, + -1.2148879766464233, + -1.161948800086975, + -1.0510079860687256, + -0.4230615794658661, + -0.7464518547058105, + -1.0824289321899414, + -0.6296868920326233, + -0.9491758346557617, + -0.7701402306556702, + -0.5336593389511108, + -0.6982924342155457, + -1.3282579183578491, + 1.8583261966705322, + -1.0799601078033447, + -0.3915098309516907, + -0.6041625738143921, + -0.613841712474823, + -0.1960369497537613, + -1.214112639427185, + -1.506902813911438, + -1.7305951118469238, + -1.8291559219360352, + -2.012927770614624, + -1.1129333972930908, + -1.1627092361450195, + -1.6322335004806519 + ], + [ + -1.2167303562164307, + -0.9953426122665405, + -0.4546596109867096, + 0.20923401415348053, + -1.3474379777908325, + -0.35563716292381287, + -0.5607011914253235, + -0.35561761260032654, + -0.19471341371536255, + -1.0480018854141235, + -1.5109490156173706, + -0.4598984718322754, + -0.4981340765953064, + -0.5742297172546387, + -0.49075546860694885, + -0.9529384970664978, + -0.9074456691741943, + -1.187824010848999, + -0.9505136013031006, + 0.08323923498392105, + -0.5787250399589539, + 0.34049805998802185, + -0.5758973956108093, + -0.7259535193443298, + -1.1698503494262695, + -1.2735397815704346, + -1.04014253616333, + -1.0555999279022217, + -1.1746915578842163, + -1.7644137144088745, + -1.299688458442688, + 1.919707179069519, + -0.8965156674385071, + -0.6453364491462708, + -1.1871289014816284, + -0.871825635433197, + -1.5017701387405396, + -1.7228628396987915, + -1.4604007005691528, + -2.0488173961639404, + -1.8185665607452393, + -2.1370859146118164, + -0.9240784645080566, + -1.4404319524765015 + ], + [ + -0.9062386751174927, + -0.4110085964202881, + -0.2830755412578583, + -1.4459301233291626, + -1.4648263454437256, + -0.10950940102338791, + -0.18182849884033203, + -0.21526934206485748, + -0.4772871136665344, + -0.24396537244319916, + -0.4328925609588623, + -1.5363494157791138, + -0.5296850204467773, + -0.22818708419799805, + -0.14119136333465576, + -0.20344506204128265, + -0.05105459317564964, + -0.27326565980911255, + -0.3689967691898346, + -0.3299713730812073, + -0.46652644872665405, + -0.09659687429666519, + -0.23075063526630402, + -0.8304752111434937, + -1.4570825099945068, + -1.9723891019821167, + -0.5752971768379211, + -0.5227700471878052, + -0.8549981713294983, + -0.6964556574821472, + -0.31069985032081604, + -0.644609808921814, + 1.5872873067855835, + -1.1929142475128174, + -0.8239450454711914, + -0.3048316538333893, + -0.8174525499343872, + -0.39413711428642273, + -0.7198638916015625, + -0.7190297245979309, + -0.5418018698692322, + -0.9234427213668823, + -0.6052152514457703, + -1.7431421279907227 + ], + [ + -0.7885118722915649, + -0.7277624607086182, + -0.6886875629425049, + -0.8690451383590698, + -1.094600796699524, + -0.09708169102668762, + -0.09417004138231277, + -0.0722949206829071, + -0.6714276075363159, + -0.5353403091430664, + -0.44954222440719604, + -0.34081190824508667, + -1.8184305429458618, + -0.3858911395072937, + -0.08020418882369995, + -0.1786392480134964, + -0.13814426958560944, + -0.46515989303588867, + -0.642915666103363, + -0.6645185947418213, + -0.29712721705436707, + -0.5643153190612793, + -0.2957344651222229, + -1.6206108331680298, + -1.2089927196502686, + -1.6677268743515015, + -0.8463014364242554, + -0.877714216709137, + -0.9702369570732117, + -0.8501294851303101, + -0.6642234325408936, + -0.5683411955833435, + -1.1306232213974, + 1.325343370437622, + -0.9831547737121582, + -0.35316503047943115, + -0.9595889449119568, + -0.5520840287208557, + -1.0779789686203003, + -0.9827486872673035, + -1.022352695465088, + -0.8019840121269226, + -1.4191548824310303, + -2.0304207801818848 + ], + [ + -0.6403186321258545, + -0.7366170883178711, + -0.263467401266098, + -1.0883339643478394, + -0.7534691691398621, + -0.5431240797042847, + -0.522432804107666, + -0.25743719935417175, + -0.5347521305084229, + -0.28479865193367004, + -0.46386590600013733, + -0.17683210968971252, + -0.30216923356056213, + -1.6038148403167725, + -0.1752796769142151, + -0.4120135009288788, + -0.3019999563694, + -0.2810678780078888, + -0.5356054902076721, + -0.5669334530830383, + -0.38385555148124695, + -0.21953119337558746, + -1.2993587255477905, + -0.5504370927810669, + -1.2659019231796265, + -0.9146221280097961, + -1.5098814964294434, + -1.5351985692977905, + -0.9820360541343689, + -1.0197806358337402, + -0.31330761313438416, + -0.7317755222320557, + -0.7603033781051636, + -0.919221043586731, + 1.6611406803131104, + -0.2532734274864197, + -1.199256420135498, + -0.6449409127235413, + -0.5516800880432129, + -0.6710701584815979, + -1.1080944538116455, + -0.6708918213844299, + -0.8574648499488831, + -2.04489803314209 + ], + [ + -1.2213752269744873, + -0.5916210412979126, + -0.35742461681365967, + -1.390786051750183, + -1.1515781879425049, + -0.035148587077856064, + -0.03266404569149017, + -0.13224729895591736, + -0.25647956132888794, + -0.097181037068367, + -0.46254274249076843, + -0.1939854621887207, + -0.19961145520210266, + -0.15666118264198303, + -0.6371447443962097, + -0.1756746917963028, + -0.23422378301620483, + -0.11639859527349472, + -0.28005051612854004, + -0.23224838078022003, + -0.9844086170196533, + -0.1342131793498993, + -0.6858506798744202, + -0.8221314549446106, + -1.6681888103485107, + -1.6146254539489746, + -0.4921133518218994, + -0.603042721748352, + -0.5743899345397949, + -0.7056053876876831, + -0.2982729971408844, + -0.8848795890808105, + -0.6648846864700317, + -0.4642595052719116, + -1.111807942390442, + 1.1949584484100342, + -0.7504810690879822, + -0.9545611143112183, + -0.39094093441963196, + -0.9623547196388245, + -0.6372947096824646, + -1.7565152645111084, + -0.24716687202453613, + -1.8945462703704834 + ], + [ + -0.8199842572212219, + -1.0581713914871216, + -0.3895179033279419, + -0.6554551124572754, + -0.7402202486991882, + -0.23816083371639252, + -0.301558256149292, + -0.25387269258499146, + -0.5882362127304077, + -0.6546512842178345, + -0.8351142406463623, + -0.23503021895885468, + -0.3703468143939972, + -0.5921592116355896, + -0.1336117684841156, + -1.879852056503296, + -0.4173140525817871, + -0.5341428518295288, + -1.4381307363510132, + -0.7192791104316711, + -1.1055238246917725, + -0.049252890050411224, + -1.3107514381408691, + -0.798275351524353, + -1.0494543313980103, + -0.8608297109603882, + -1.0742087364196777, + -1.3123905658721924, + -0.9105372428894043, + -0.8553599715232849, + -0.8842824697494507, + -1.1454758644104004, + -0.752220630645752, + -0.6608718037605286, + -1.1810517311096191, + -0.1978917121887207, + 1.594441533088684, + -1.3925206661224365, + -1.121725082397461, + -1.6944462060928345, + -1.3131816387176514, + -1.3645774126052856, + -0.7815371155738831, + -2.000072717666626 + ], + [ + -0.5955431461334229, + -0.3870457708835602, + -0.33502960205078125, + -0.8240070939064026, + -0.44934597611427307, + -0.18618376553058624, + -0.09696011990308762, + -0.0011824870016425848, + -0.7701959609985352, + -0.73843914270401, + -0.9871739149093628, + -0.05437180772423744, + -0.19008493423461914, + -0.2912467420101166, + -0.21283957362174988, + -0.563954770565033, + -1.0350180864334106, + -0.5521043539047241, + -1.465614914894104, + -0.7037262916564941, + -1.12129807472229, + -0.10146971791982651, + -0.5693973898887634, + -0.9393613934516907, + -1.2012048959732056, + -0.5726311206817627, + -0.5450927019119263, + -0.4757236838340759, + -0.4643552601337433, + -1.4310671091079712, + -1.0821939706802368, + -1.2049908638000488, + -0.20270903408527374, + -0.2905389070510864, + -1.1066077947616577, + -0.389126718044281, + -1.4959689378738403, + 1.4735982418060303, + -0.98104327917099, + -1.9268826246261597, + -1.5158042907714844, + -1.5427162647247314, + -0.504143238067627, + -2.1602721214294434 + ], + [ + -1.3887711763381958, + -0.5197256207466125, + -0.42461180686950684, + -0.6427264213562012, + -0.5473313927650452, + -0.16330918669700623, + -0.04349257051944733, + -0.01867394894361496, + -1.0594695806503296, + -1.3125146627426147, + -0.8849044442176819, + -0.23232080042362213, + -0.399066299200058, + -0.263920396566391, + -0.15519572794437408, + -0.4353133738040924, + -0.4134717285633087, + -1.8509230613708496, + -1.725076675415039, + -0.9473397135734558, + -0.8082354068756104, + -0.8733066320419312, + -0.37680619955062866, + -0.7895876169204712, + -0.6754703521728516, + -0.7886179685592651, + -0.8330291509628296, + -0.3947210907936096, + -0.5771331787109375, + -1.5819299221038818, + -1.499448299407959, + -1.1570508480072021, + -0.3950798511505127, + -0.8640972375869751, + -0.5236647725105286, + -0.20778506994247437, + -0.9460934996604919, + -1.2275608777999878, + 1.5637173652648926, + -2.120708465576172, + -1.4826818704605103, + -0.8767129778862, + -1.6054742336273193, + -2.159467935562134 + ], + [ + -0.9558020830154419, + -0.7967756986618042, + -0.6849426031112671, + -0.8254446983337402, + -0.818534255027771, + -0.16325300931930542, + -0.12725353240966797, + -0.26729878783226013, + -1.273923397064209, + -1.4024536609649658, + -1.4382987022399902, + -0.2188236266374588, + -0.5272561311721802, + -0.41542261838912964, + -0.25941145420074463, + -1.1514527797698975, + -1.074644923210144, + -1.2326256036758423, + -2.430025815963745, + -1.1254827976226807, + -1.2785718441009521, + -0.41700634360313416, + -0.9622179865837097, + -1.2208197116851807, + -1.064914584159851, + -1.192073106765747, + -0.9289313554763794, + -0.8210610151290894, + -1.0453439950942993, + -1.8738595247268677, + -1.5459870100021362, + -1.83896005153656, + -0.8284062743186951, + -0.8182161450386047, + -0.9136028289794922, + -0.4103112816810608, + -1.6954915523529053, + -1.7601470947265625, + -1.6837375164031982, + 1.9428070783615112, + -1.7793283462524414, + -1.3961591720581055, + -1.0999901294708252, + -2.4274744987487793 + ], + [ + -0.9748932123184204, + -0.7267990112304688, + -0.43581423163414, + -0.939288318157196, + -0.4213060140609741, + -0.06970441341400146, + -0.27182555198669434, + -0.12847860157489777, + -0.7467304468154907, + -1.3996806144714355, + -1.0414838790893555, + -0.1613486260175705, + -0.5888731479644775, + -0.6932592988014221, + -0.14955639839172363, + -0.46234673261642456, + -0.5896210670471191, + -0.621442437171936, + -1.3771250247955322, + -1.7573366165161133, + -0.45277106761932373, + -0.2917012870311737, + -0.4837053716182709, + -0.9292042851448059, + -0.9749695062637329, + -0.5843163132667542, + -0.7213122844696045, + -1.1033921241760254, + -0.6752656102180481, + -1.0825871229171753, + -1.2403614521026611, + -1.152074933052063, + -0.2560882568359375, + -0.877312958240509, + -1.3907618522644043, + -0.11431421339511871, + -0.9682782888412476, + -1.021195650100708, + -1.1248037815093994, + -1.4217591285705566, + 1.5522842407226562, + -0.45146429538726807, + -0.8905574679374695, + -2.154161214828491 + ], + [ + -1.3341175317764282, + -1.2127447128295898, + -0.2510504722595215, + -0.9798818826675415, + -0.38345301151275635, + -0.22977784276008606, + -0.30243316292762756, + -0.15780122578144073, + -0.5574586391448975, + -0.6439893841743469, + -1.6199467182159424, + -0.3215227723121643, + -0.3619880676269531, + -0.2935754656791687, + -0.6278892159461975, + -0.8737902641296387, + -0.7115558385848999, + -0.5050841569900513, + -1.0476233959197998, + -0.641818642616272, + -2.397000551223755, + -0.18396084010601044, + -0.9675662517547607, + -0.7984750270843506, + -1.1510848999023438, + -1.2975869178771973, + -0.800314724445343, + -1.050410509109497, + -0.9445002675056458, + -1.2546149492263794, + -0.756198525428772, + -1.880790114402771, + -0.6687176823616028, + -0.7270624041557312, + -0.6538228392601013, + -1.1021677255630493, + -1.507163405418396, + -1.6656510829925537, + -1.097037434577942, + -1.495436668395996, + -1.1796156167984009, + 1.7328078746795654, + -0.5829249620437622, + -2.2849690914154053 + ], + [ + -1.2701189517974854, + -0.4476449489593506, + -0.21415522694587708, + -0.6814586520195007, + -0.2858044505119324, + -0.09339509904384613, + -0.06140643358230591, + -0.02359689585864544, + -0.8372898697853088, + -0.5136232376098633, + -0.2746569514274597, + -0.07340657711029053, + -0.8121970891952515, + -0.26877927780151367, + -0.025315076112747192, + -0.20198561251163483, + -0.16245821118354797, + -0.8104990720748901, + -0.7082597613334656, + -0.6865860819816589, + -0.34367313981056213, + -1.0192939043045044, + -0.3818875551223755, + -0.985541582107544, + -0.787164568901062, + -0.42013785243034363, + -1.1402697563171387, + -0.5348063111305237, + -0.302166223526001, + -1.4675787687301636, + -0.6088804006576538, + -0.30395618081092834, + -0.3057531714439392, + -1.558325171470642, + -0.9454678297042847, + -0.11173126846551895, + -0.6570347547531128, + -0.5936419367790222, + -1.544317603111267, + -1.4160106182098389, + -1.0775524377822876, + -0.45718279480934143, + 1.1333165168762207, + -1.985992193222046 + ], + [ + -0.857444703578949, + -2.1819465160369873, + -0.3381064534187317, + 0.27635660767555237, + -0.1974707543849945, + -0.1758267879486084, + -0.37945157289505005, + -0.3947829008102417, + 0.21475814282894135, + -0.7107601761817932, + 0.21609708666801453, + -0.27318695187568665, + -0.40528857707977295, + 0.26568418741226196, + -0.726378858089447, + -0.3543165326118469, + -0.40960827469825745, + -0.2572087347507477, + 0.21266694366931915, + -0.04012273997068405, + -0.7679876685142517, + -0.4320548176765442, + 0.7517787218093872, + -0.5242950916290283, + -0.4607303738594055, + -0.6963070631027222, + -1.8807580471038818, + -1.5299102067947388, + -0.8597427010536194, + -0.7534052729606628, + -1.0582765340805054, + -0.7399492859840393, + -0.8179884552955627, + -0.6245130300521851, + -1.4749475717544556, + -0.9217278957366943, + -1.3865718841552734, + -1.4318594932556152, + -0.5265694260597229, + -1.1233302354812622, + -1.2186834812164307, + -0.9237138032913208, + -1.2061635255813599, + -1.3392715454101562 + ], + [ + -0.7851254343986511, + 0.3929997682571411, + -2.0016396045684814, + 0.7592524290084839, + 0.19009341299533844, + -0.43317118287086487, + 0.039919205009937286, + -0.41573458909988403, + -0.3262815773487091, + 0.22155345976352692, + -0.4425256848335266, + -0.7930318713188171, + 0.42165809869766235, + 0.4006428122520447, + -0.776274561882019, + -0.4384792447090149, + -0.9663617014884949, + -0.5824832320213318, + 0.0491776205599308, + 0.5460504293441772, + -0.3973346948623657, + -0.06312479823827744, + -0.5312902927398682, + 1.1767621040344238, + -1.8124598264694214, + -1.5470328330993652, + -0.72476726770401, + -1.3208693265914917, + -0.8853896260261536, + -1.1251099109649658, + -1.345171570777893, + -1.3865721225738525, + -1.2673087120056152, + -1.9137910604476929, + -1.2186853885650635, + -1.2201741933822632, + -1.4115391969680786, + -1.9391875267028809, + -0.8660712242126465, + -1.3645683526992798, + -1.4715049266815186, + -1.1469299793243408, + -1.259924292564392, + -0.5465595722198486 + ], + [ + -0.9629695415496826, + -0.18060632050037384, + 1.291077971458435, + -1.6840962171554565, + 0.14362141489982605, + 0.22156460583209991, + -0.3724135160446167, + -0.3748529255390167, + -0.10190075635910034, + 0.2604474425315857, + -0.07562024146318436, + -0.7882198691368103, + -0.09455970674753189, + 0.06648153066635132, + -1.406691551208496, + -0.5715747475624084, + -0.9123480319976807, + -0.07008083164691925, + -0.015383838675916195, + -0.16155311465263367, + -1.003702998161316, + -0.18660330772399902, + -0.3844347894191742, + -2.1740078926086426, + 1.3440037965774536, + -1.4745874404907227, + -0.6410636305809021, + -1.3630266189575195, + -1.209732174873352, + -1.243194580078125, + -0.6646550297737122, + -1.3222856521606445, + -1.7630382776260376, + -1.5445400476455688, + -1.412925362586975, + -1.8498917818069458, + -1.3070671558380127, + -1.9106886386871338, + -0.5271961688995361, + -1.0537760257720947, + -1.227264165878296, + -1.1275352239608765, + -1.2299515008926392, + -0.7907952070236206 + ], + [ + -1.0647355318069458, + 0.6028676629066467, + -0.35099801421165466, + 1.292887568473816, + -2.085275173187256, + -0.42167702317237854, + -0.4510023593902588, + -0.5625584721565247, + -0.32740408182144165, + 0.25386473536491394, + -0.2455645203590393, + -1.3035334348678589, + 0.08828213065862656, + -0.8104369640350342, + -0.5349513292312622, + -0.7233052253723145, + -0.5532506108283997, + -0.6194726228713989, + -0.40261030197143555, + 0.13334700465202332, + -1.3111248016357422, + -0.2874852120876312, + -0.728262186050415, + -1.4404404163360596, + -1.8723113536834717, + 0.9524585008621216, + -0.728548526763916, + -1.2822258472442627, + -1.709338903427124, + -0.9082033634185791, + -0.9813474416732788, + -1.6544911861419678, + -2.370166063308716, + -1.4383416175842285, + -1.0028314590454102, + -1.8948595523834229, + -1.251629114151001, + -0.7283076047897339, + -0.60310298204422, + -1.4178414344787598, + -0.833082377910614, + -1.4172619581222534, + -0.9894936680793762, + -0.4769415855407715 + ], + [ + -1.9110723733901978, + 0.09498032927513123, + -0.3759983777999878, + -0.7753094434738159, + -0.842014729976654, + -1.68686044216156, + -1.103469967842102, + -0.7081699967384338, + -0.5870118141174316, + -0.7173025012016296, + -1.0202851295471191, + -0.5057967305183411, + -0.6216097474098206, + -0.7837823033332825, + -0.41809800267219543, + -0.9985976815223694, + -0.5829616189002991, + -0.8234896659851074, + -0.2855328917503357, + -0.7383800745010376, + -0.5899205207824707, + -1.0354347229003906, + -1.9367636442184448, + -0.6741010546684265, + -0.8952195644378662, + -0.8376147150993347, + 0.8124628067016602, + -1.6828268766403198, + -1.6140824556350708, + -1.3437687158584595, + -0.953965961933136, + -0.9013178944587708, + -1.0265780687332153, + -0.7800254225730896, + -1.7207812070846558, + -0.5627949237823486, + -1.3675082921981812, + -1.267412543296814, + -0.976050615310669, + -1.183606505393982, + -1.2779630422592163, + -1.0846513509750366, + -1.5334951877593994, + -1.4582569599151611 + ], + [ + -0.4911023676395416, + -0.08200903981924057, + -0.8256944417953491, + -1.1337511539459229, + -1.0287926197052002, + -1.0894160270690918, + -1.7988673448562622, + -0.872911274433136, + -0.004599399399012327, + -0.5183807015419006, + -0.8390076756477356, + -0.23333536088466644, + -0.90162593126297, + -0.5175248980522156, + -0.6022132635116577, + -0.8133704662322998, + -0.3930714428424835, + -0.42469850182533264, + -0.6761521697044373, + -0.5113751888275146, + -0.9541450142860413, + -0.5129753351211548, + -1.621427297592163, + -1.2636758089065552, + -1.5125044584274292, + -1.0403426885604858, + -1.7235219478607178, + 0.7906205058097839, + -1.54011869430542, + -1.4613016843795776, + -0.7613629698753357, + -1.2205642461776733, + -1.1099122762680054, + -1.3558679819107056, + -1.9205306768417358, + -0.7765863537788391, + -1.5205918550491333, + -1.1072564125061035, + -0.496324360370636, + -0.9855103492736816, + -1.9051713943481445, + -1.1788653135299683, + -1.1737948656082153, + -1.8811662197113037 + ], + [ + -0.992041289806366, + -0.7309350967407227, + -0.5812665224075317, + -1.0775575637817383, + -1.1284276247024536, + -0.7982011437416077, + -0.6239766478538513, + -0.8641077280044556, + -0.053367167711257935, + -0.7909495234489441, + -1.066342830657959, + -0.5627466440200806, + -1.0331839323043823, + -0.932465672492981, + -0.5039976835250854, + -0.5174804329872131, + -0.1918928027153015, + 0.023185577243566513, + -1.0831462144851685, + -0.8098952770233154, + -1.1635212898254395, + -0.23548124730587006, + -0.8429036736488342, + -0.9273772835731506, + -1.2647472620010376, + -1.64719557762146, + -1.581739068031311, + -1.526267170906067, + 0.493853360414505, + -1.2025138139724731, + -0.8888171315193176, + -1.297102689743042, + -1.2441372871398926, + -1.3798654079437256, + -1.727911114692688, + -0.6168105602264404, + -1.42630934715271, + -1.0671906471252441, + -1.0012857913970947, + -1.5993391275405884, + -1.4811166524887085, + -1.522581696510315, + -0.8988516926765442, + -1.8898310661315918 + ], + [ + -0.4485774040222168, + -0.09611231088638306, + -1.0518251657485962, + -0.5434262156486511, + 0.016958454623818398, + -1.2948644161224365, + -0.7141709923744202, + -0.3695029318332672, + -2.2086446285247803, + -0.5280615091323853, + -0.6141325235366821, + -0.524783730506897, + -0.8678925037384033, + -0.998257040977478, + -0.579944908618927, + -0.29846420884132385, + -0.285239040851593, + 0.1003589779138565, + 0.5528989434242249, + -0.1623832732439041, + 0.18357457220554352, + -0.6135508418083191, + -0.7129210829734802, + -1.0949194431304932, + -1.2341246604919434, + -0.901550829410553, + -1.3113443851470947, + -1.3967437744140625, + -1.4238439798355103, + 1.053584098815918, + -1.4696062803268433, + -1.9302698373794556, + -1.400292158126831, + -0.7989141941070557, + -1.0317507982254028, + -0.9900637865066528, + -0.8581745624542236, + -2.044525384902954, + -1.3141367435455322, + -1.7948830127716064, + -1.5259017944335938, + -1.2293436527252197, + -1.6742714643478394, + -1.4376575946807861 + ], + [ + 0.2801467180252075, + -0.20296600461006165, + -0.7185594439506531, + 0.43285223841667175, + -0.20045027136802673, + -0.5705807209014893, + -0.33906805515289307, + -0.2198764830827713, + -0.05681410804390907, + -1.2832354307174683, + 0.46533507108688354, + -0.38395944237709045, + -0.5119662284851074, + 0.30478349328041077, + -0.2787074148654938, + -0.02027345448732376, + -0.9890509247779846, + 0.030379364266991615, + 0.6076752543449402, + -0.3718917667865753, + 0.8156512975692749, + -0.7254595756530762, + -1.1221067905426025, + -1.3213139772415161, + -0.7352039217948914, + -0.8902181386947632, + -1.0909825563430786, + -0.8634517788887024, + -0.7583605051040649, + -1.5926748514175415, + 1.478055477142334, + -1.5352531671524048, + -0.656775951385498, + -0.6595602631568909, + -0.67375648021698, + -0.3228457272052765, + -1.2538946866989136, + -1.8111881017684937, + -1.187678575515747, + -1.8858790397644043, + -1.8138715028762817, + -1.4695559740066528, + -1.359169363975525, + -0.10125806927680969 + ], + [ + -1.3506709337234497, + -0.7109214067459106, + -0.48739445209503174, + -1.0995734930038452, + -0.9003349542617798, + -0.32530832290649414, + -0.4364820420742035, + -0.3899161219596863, + -0.9902524948120117, + -1.226841688156128, + 0.1259801983833313, + -0.5045827031135559, + -0.4398016631603241, + -0.30949321389198303, + -0.6269985437393188, + -0.7807498574256897, + -0.9656681418418884, + 0.16975300014019012, + 0.10808371752500534, + -0.2531088590621948, + 1.1206806898117065, + -0.26039183139801025, + -0.3574061393737793, + -0.8663679957389832, + -1.1320855617523193, + -1.294532060623169, + -0.7717506885528564, + -1.2719627618789673, + -1.2136894464492798, + -1.636584997177124, + -1.2144132852554321, + 0.7266753315925598, + -1.1856621503829956, + -0.6294547915458679, + -1.115993618965149, + -0.9077757596969604, + -1.27091646194458, + -1.5899101495742798, + -1.1343806982040405, + -2.0126242637634277, + -1.6978973150253296, + -2.1946704387664795, + -0.7236542701721191, + -0.13827845454216003 + ], + [ + -0.07831442356109619, + -0.3702993392944336, + -0.8524748086929321, + 0.5505713224411011, + -1.3390130996704102, + -0.05870664119720459, + -0.23327529430389404, + -0.25566595792770386, + 0.12553493678569794, + -0.2356530725955963, + -0.48939672112464905, + -1.1564178466796875, + -0.8726139068603516, + -0.30930331349372864, + -0.38549095392227173, + -0.20244361460208893, + -0.26763010025024414, + -0.3244018256664276, + -0.7131379246711731, + 0.14325742423534393, + -0.4905678331851959, + -0.2759847044944763, + -0.5339694619178772, + -1.3418728113174438, + -2.0488779544830322, + -2.3421502113342285, + -0.8740392923355103, + -1.0362733602523804, + -1.2273609638214111, + -1.33444082736969, + -0.526572585105896, + -1.172040343284607, + 1.0332307815551758, + -1.3561482429504395, + -1.1110975742340088, + -0.7565243244171143, + -1.4508605003356934, + -0.7846783995628357, + -0.8338473439216614, + -1.6385693550109863, + -0.7432569861412048, + -1.1133803129196167, + -0.8984667658805847, + -0.04723875969648361 + ], + [ + -0.4634150266647339, + -0.5219835638999939, + -0.06973763555288315, + 0.3318372368812561, + 0.5084220170974731, + -0.7975906729698181, + -0.9383347034454346, + -0.7348359823226929, + 0.1759268194437027, + 0.4408266544342041, + 0.49036866426467896, + -1.0727598667144775, + -1.6718648672103882, + 0.355263352394104, + -0.5312793850898743, + -0.850870668888092, + -0.628629744052887, + -0.19279681146144867, + 0.6321269869804382, + -0.06899591535329819, + -0.292658269405365, + 0.0533822625875473, + -0.6534861326217651, + -1.5944626331329346, + -1.549121379852295, + -1.459794521331787, + -0.9207741618156433, + -1.3205265998840332, + -1.4306031465530396, + -1.0713894367218018, + -1.0086956024169922, + -1.2102283239364624, + -1.5633631944656372, + 1.0862001180648804, + -1.0930492877960205, + -0.9329509735107422, + -0.9554045796394348, + -1.01990807056427, + -0.922025740146637, + -1.068987488746643, + -1.1590133905410767, + -0.8367498517036438, + -1.643620491027832, + -0.27717968821525574 + ], + [ + -0.1037895679473877, + 0.41183194518089294, + -0.41687867045402527, + -0.5059555768966675, + -1.0623546838760376, + -1.1852121353149414, + -0.7140600681304932, + -0.992517352104187, + -0.39650875329971313, + -0.5241384506225586, + -1.0087041854858398, + -0.5364452004432678, + -0.42470207810401917, + -2.161935567855835, + -0.8856833577156067, + -0.925571620464325, + -0.8439810276031494, + -0.555046558380127, + -0.9316202998161316, + -0.939799427986145, + -0.6770917773246765, + -0.9017058610916138, + -1.30314302444458, + -1.1380990743637085, + -1.5716235637664795, + -0.8848603963851929, + -1.6336008310317993, + -1.876107931137085, + -1.6632128953933716, + -0.9911980032920837, + -0.733587920665741, + -1.1895055770874023, + -1.2897628545761108, + -1.1011593341827393, + 0.8816722631454468, + -1.303109884262085, + -1.1102001667022705, + -1.518673062324524, + -0.7363095879554749, + -0.8188085556030273, + -1.9027550220489502, + -0.8945104479789734, + -1.4520494937896729, + -1.5091127157211304 + ], + [ + -1.6358838081359863, + -0.5891309380531311, + -0.912893533706665, + 0.4160476326942444, + -1.3123986721038818, + -0.11752227693796158, + -0.07042272388935089, + -0.02373996004462242, + -0.170106902718544, + -0.2871367931365967, + -0.46724972128868103, + -0.08388169854879379, + -0.4053846597671509, + 0.23949576914310455, + -0.6945983171463013, + -0.23114465177059174, + -0.2772136628627777, + -0.1450517624616623, + -0.3774748146533966, + -0.40894511342048645, + -1.1909607648849487, + -0.024407345801591873, + -0.8204823136329651, + -1.076987862586975, + -1.868801474571228, + -1.6753565073013306, + -0.3987763226032257, + -0.712627112865448, + -0.5629633069038391, + -0.6791931986808777, + -0.36298713088035583, + -0.9459781050682068, + -0.6016009449958801, + -0.6691443920135498, + -1.1775881052017212, + 0.6407793760299683, + -0.7026576995849609, + -0.8767650127410889, + -0.47622188925743103, + -0.9879043102264404, + -0.641616940498352, + -1.9672846794128418, + -0.3111562132835388, + -0.43414604663848877 + ], + [ + -0.4741019010543823, + -0.1592492014169693, + -1.1913484334945679, + -0.29359200596809387, + -0.09476865082979202, + -1.0873568058013916, + -0.8707907795906067, + -0.7147337794303894, + 0.12292155623435974, + -1.1492255926132202, + -0.7791502475738525, + -0.7934942245483398, + -1.0994762182235718, + 0.3673947751522064, + -0.7489203214645386, + -1.461464285850525, + -0.11387024819850922, + 0.10427235811948776, + -0.05753609538078308, + 0.3254644572734833, + -1.3302167654037476, + -0.1304035484790802, + -1.250616192817688, + -1.358672857284546, + -1.3370531797409058, + -1.2670531272888184, + -1.5214053392410278, + -1.4980601072311401, + -1.4767054319381714, + -1.0198616981506348, + -1.428122639656067, + -1.5458747148513794, + -1.7927950620651245, + -1.2026150226593018, + -1.2426018714904785, + -0.9483041167259216, + 1.0217193365097046, + -1.924140214920044, + -1.3013725280761719, + -1.5111193656921387, + -1.5739595890045166, + -1.4754908084869385, + -1.421231746673584, + -1.4421849250793457 + ], + [ + -0.8727916479110718, + -0.857430636882782, + -1.5276998281478882, + -0.5390227437019348, + -1.0516153573989868, + -0.25578048825263977, + -0.12022681534290314, + -0.13148680329322815, + -0.5173234343528748, + -1.3751788139343262, + -1.400125503540039, + -0.2505536377429962, + -0.48713359236717224, + 0.3449162542819977, + -0.6132157444953918, + -1.009467363357544, + -1.4971150159835815, + -1.167686939239502, + -1.6253985166549683, + -1.0021528005599976, + 0.37638330459594727, + -0.7206936478614807, + -1.5410996675491333, + -1.8945778608322144, + -1.8527778387069702, + -1.630933165550232, + -1.312156081199646, + -1.0971477031707764, + -0.8832635283470154, + -1.9929125308990479, + -1.579314947128296, + -1.590375304222107, + -0.77951580286026, + -0.7532129883766174, + -1.349238634109497, + -1.0872472524642944, + -1.8342186212539673, + 1.0311139822006226, + -1.6920208930969238, + -2.212067127227783, + -1.8064463138580322, + -2.0170059204101562, + -1.3872560262680054, + -1.500409483909607 + ], + [ + -0.1380932331085205, + -0.2383122444152832, + -0.5031805038452148, + -0.3580821454524994, + -0.10382884740829468, + -0.6943897008895874, + -0.607791543006897, + -0.6534503102302551, + -0.08422088623046875, + 0.5181382298469543, + -0.07513613253831863, + -1.0540416240692139, + -0.8061460852622986, + -0.6407434344291687, + -0.46750152111053467, + -0.32075589895248413, + -0.8156580328941345, + -1.6073788404464722, + 0.001842763856984675, + 0.5004723072052002, + 0.17412152886390686, + -0.7270460724830627, + -0.6661803126335144, + -1.0007507801055908, + -0.6574458479881287, + -0.7749836444854736, + -1.1459460258483887, + -0.7119711637496948, + -1.183426022529602, + -1.264506459236145, + -1.7309558391571045, + -1.6221017837524414, + -1.3652111291885376, + -0.8301494121551514, + -0.5257412791252136, + -0.6868889331817627, + -1.3384435176849365, + -1.743384838104248, + 1.145517110824585, + -1.972631573677063, + -1.1718074083328247, + -1.579086184501648, + -1.8740234375, + -0.9259582161903381 + ], + [ + -0.4218597114086151, + 0.6243670582771301, + -1.2201364040374756, + -0.42054951190948486, + -0.9648842215538025, + -0.6355407238006592, + -0.42803680896759033, + -0.5946596264839172, + 0.5100739598274231, + -1.668476939201355, + -0.23615221679210663, + -0.4520643651485443, + -0.24785679578781128, + -0.6715077757835388, + -0.8370340466499329, + -1.1565834283828735, + -1.5529907941818237, + 0.03253365308046341, + -2.410151481628418, + -0.0196477510035038, + 0.7922629117965698, + 0.25237229466438293, + -1.1658520698547363, + -1.3117995262145996, + -1.059692621231079, + -1.305335521697998, + -1.2665759325027466, + -0.8814589977264404, + -1.57208251953125, + -1.7780479192733765, + -2.0586652755737305, + -1.8728214502334595, + -1.6280543804168701, + -0.7476612329483032, + -0.7620548009872437, + -1.2629042863845825, + -1.4794116020202637, + -2.3951833248138428, + -2.1601064205169678, + 1.2344528436660767, + -1.4307661056518555, + -1.705609917640686, + -1.9521998167037964, + -1.653950810432434 + ], + [ + -0.1581835299730301, + -0.41388240456581116, + -1.040244221687317, + -0.4006899297237396, + -0.995573878288269, + -0.6401690244674683, + -0.861973226070404, + -0.5498507022857666, + 0.16262388229370117, + -0.9968903064727783, + -1.6532351970672607, + -0.5180301070213318, + -0.8989958763122559, + -0.739486038684845, + -0.489407479763031, + -0.7313289642333984, + -1.5020804405212402, + -0.38913318514823914, + -0.09014114737510681, + -2.0040807723999023, + 0.030766207724809647, + -0.8575266599655151, + -1.367136836051941, + -1.3242930173873901, + -1.2093360424041748, + -0.6744038462638855, + -1.2773722410202026, + -1.6951074600219727, + -1.2510273456573486, + -1.4774456024169922, + -2.1714208126068115, + -1.8072155714035034, + -0.8311782479286194, + -0.7931506037712097, + -1.6125178337097168, + -0.6126685738563538, + -1.5302754640579224, + -1.8956257104873657, + -1.1174112558364868, + -1.5973939895629883, + 1.3189467191696167, + -1.2491767406463623, + -1.2491631507873535, + -1.1635794639587402 + ], + [ + -1.6301325559616089, + -0.21710838377475739, + -1.0764491558074951, + -0.29275089502334595, + 1.294045090675354, + -0.6542890071868896, + -0.5155290365219116, + -0.5389382839202881, + -0.8567457795143127, + -0.8600627183914185, + -0.7329082489013672, + -1.2863078117370605, + -0.7768701314926147, + -0.5860442519187927, + -1.7954251766204834, + -0.9866839647293091, + -1.1437376737594604, + -0.0067693074233829975, + -0.2919849455356598, + 0.1796121597290039, + -2.269725799560547, + -0.3620854914188385, + -0.9737839102745056, + -1.1345038414001465, + -1.2973395586013794, + -1.7797824144363403, + -1.2839449644088745, + -1.248028039932251, + -1.5135103464126587, + -0.8698253035545349, + -1.2308835983276367, + -1.8383028507232666, + -1.5095500946044922, + -0.7486065626144409, + -0.8448894023895264, + -2.209364652633667, + -1.3507304191589355, + -2.064326524734497, + -1.2966722249984741, + -1.6203917264938354, + -1.110198736190796, + 1.0995824337005615, + -0.9949035048484802, + -1.906154990196228 + ], + [ + -0.3857121467590332, + -1.0385457277297974, + 0.07184969633817673, + -0.7274305820465088, + 0.2214350700378418, + -0.6810690760612488, + -0.37308505177497864, + -0.38499343395233154, + -0.07556971162557602, + -0.4546475112438202, + -0.23352579772472382, + -0.7280697226524353, + -0.6412975788116455, + -0.851034939289093, + -0.33060574531555176, + -0.6066662073135376, + -0.5821095705032349, + -0.9577417373657227, + -0.3557337522506714, + -1.1291959285736084, + -0.15420185029506683, + -1.7562874555587769, + -1.116240382194519, + -1.0349167585372925, + -1.2941757440567017, + -0.9298011064529419, + -1.5578397512435913, + -1.2477195262908936, + -1.0260570049285889, + -1.8155425786972046, + -1.5396621227264404, + -0.8416985869407654, + -1.0417908430099487, + -1.4197145700454712, + -1.4956721067428589, + -0.5721123814582825, + -1.2055737972259521, + -1.3808032274246216, + -1.757938265800476, + -1.7883199453353882, + -1.3312883377075195, + -1.2078183889389038, + 0.8964169025421143, + -1.5287233591079712 + ], + [ + -0.540481448173523, + 1.620806097984314, + 0.5379748344421387, + 1.1045295000076294, + 1.008168339729309, + 1.1148606538772583, + 0.8590996861457825, + 0.42925238609313965, + 1.0378775596618652, + 1.345681071281433, + 0.4986938238143921, + 1.0448731184005737, + 1.30496084690094, + 0.8440292477607727, + 0.6583732962608337, + 1.427945613861084, + 1.0127397775650024, + 0.9426275491714478, + 0.8845685124397278, + 0.615810215473175, + 0.6648202538490295, + 0.5675833225250244, + -1.5480952262878418, + -1.9959079027175903, + -2.150942087173462, + -1.9917072057724, + -1.8536220788955688, + -2.030714273452759, + -1.9748508930206299, + -1.9697057008743286, + -2.027642250061035, + -2.2515573501586914, + -2.018990993499756, + -1.9842983484268188, + -1.8218733072280884, + -1.886641263961792, + -2.1928389072418213, + -2.483461380004883, + -1.7410305738449097, + -2.0795488357543945, + -1.9926034212112427, + -2.0786914825439453, + -2.087390661239624, + 1.4239481687545776 + ] + ], + "startTransitions": [ + -0.6278643012046814, + -0.8496521711349487, + -0.8266299366950989, + 0.28754591941833496, + -0.46926406025886536, + -0.9389898777008057, + -0.9730744957923889, + -1.1531033515930176, + 0.11331871896982193, + -1.1602532863616943, + 1.0827338695526123, + -1.0836642980575562, + -0.7251284122467041, + -1.0941987037658691, + -1.4626950025558472, + -1.4925812482833862, + 0.491633802652359, + 0.6841503977775574, + 1.0214602947235107, + -0.1990128457546234, + 1.3070342540740967, + -1.0140821933746338, + -0.4823978543281555, + -0.8779351115226746, + -0.9003897309303284, + -0.9263347387313843, + -0.7159107327461243, + -1.0999647378921509, + -1.4477791786193848, + -1.333817481994629, + -1.1298960447311401, + -2.4479169845581055, + -1.072827935218811, + -0.5421392917633057, + -0.63331538438797, + -1.5358810424804688, + -1.0700675249099731, + -1.8937907218933105, + -1.020530343055725, + -1.8996700048446655, + -1.4971352815628052, + -1.9857802391052246, + -1.0464967489242554, + 0.5005144476890564 + ], + "endTransitions": [ + 0.7837432622909546, + -1.0096880197525024, + -0.9491877555847168, + -0.8354703783988953, + -1.1565037965774536, + -0.7632436156272888, + -0.6353173851966858, + -0.7303943037986755, + -0.9719206094741821, + -0.8249671459197998, + -0.6155180335044861, + -0.6932002305984497, + -0.8537092804908752, + -0.7837308645248413, + -1.1888949871063232, + -0.804864227771759, + -0.7379578351974487, + -0.903874933719635, + -0.8609393239021301, + -0.6742830872535706, + -0.6277506351470947, + -0.9232121109962463, + -0.5970984101295471, + -0.2940647602081299, + -0.956906259059906, + -0.8675761222839355, + -0.8842039108276367, + 0.6615644693374634, + -0.9183928370475769, + -0.5086935758590698, + 0.037190359085798264, + -0.2767193913459778, + 0.2122756689786911, + -0.40360909700393677, + -0.9173226356506348, + -1.4344888925552368, + -0.05531555041670799, + -0.6402292847633362, + -0.14646022021770477, + -0.2573417127132416, + -0.15405860543251038, + -1.1314908266067505, + 0.12888982892036438, + 0.3973238170146942 + ] +} \ No newline at end of file diff --git a/grobid-home/models/header-coi-ac-BidLSTM_CRF_FEATURES.onnx/encoder.onnx b/grobid-home/models/header-coi-ac-BidLSTM_CRF_FEATURES.onnx/encoder.onnx new file mode 100644 index 0000000000..cb0f3fe46a Binary files /dev/null and b/grobid-home/models/header-coi-ac-BidLSTM_CRF_FEATURES.onnx/encoder.onnx differ diff --git a/grobid-home/models/header-coi-ac-BidLSTM_CRF_FEATURES.onnx/vocab.json b/grobid-home/models/header-coi-ac-BidLSTM_CRF_FEATURES.onnx/vocab.json new file mode 100644 index 0000000000..a21c6b46ea --- /dev/null +++ b/grobid-home/models/header-coi-ac-BidLSTM_CRF_FEATURES.onnx/vocab.json @@ -0,0 +1,567 @@ +{ + "charVocab": { + "<PAD>": 0, + "<UNK>": 1, + "!": 2, + "\"": 3, + "#": 4, + "$": 5, + "%": 6, + "&": 7, + "'": 8, + "(": 9, + ")": 10, + "*": 11, + "+": 12, + ",": 13, + "-": 14, + ".": 15, + "/": 16, + "0": 17, + "1": 18, + "2": 19, + "3": 20, + "4": 21, + "5": 22, + "6": 23, + "7": 24, + "8": 25, + "9": 26, + ":": 27, + ";": 28, + "<": 29, + "=": 30, + ">": 31, + "?": 32, + "@": 33, + "A": 34, + "B": 35, + "C": 36, + "D": 37, + "E": 38, + "F": 39, + "G": 40, + "H": 41, + "I": 42, + "J": 43, + "K": 44, + "L": 45, + "M": 46, + "N": 47, + "O": 48, + "P": 49, + "Q": 50, + "R": 51, + "S": 52, + "T": 53, + "U": 54, + "V": 55, + "W": 56, + "X": 57, + "Y": 58, + "Z": 59, + "[": 60, + "\\": 61, + "]": 62, + "^": 63, + "_": 64, + "`": 65, + "a": 66, + "b": 67, + "c": 68, + "d": 69, + "e": 70, + "f": 71, + "g": 72, + "h": 73, + "i": 74, + "j": 75, + "k": 76, + "l": 77, + "m": 78, + "n": 79, + "o": 80, + "p": 81, + "q": 82, + "r": 83, + "s": 84, + "t": 85, + "u": 86, + "v": 87, + "w": 88, + "x": 89, + "y": 90, + "z": 91, + "{": 92, + "|": 93, + "}": 94, + "~": 95, + "¡": 96, + "¢": 97, + "£": 98, + "¤": 99, + "¥": 100, + "§": 101, + "¨": 102, + "©": 103, + "ª": 104, + "«": 105, + "®": 106, + "¯": 107, + "°": 108, + "±": 109, + "²": 110, + "´": 111, + "µ": 112, + "¶": 113, + "¸": 114, + "»": 115, + "¼": 116, + "À": 117, + "Á": 118, + "Â": 119, + "Ã": 120, + "Å": 121, + "Ç": 122, + "É": 123, + "Ê": 124, + "Ì": 125, + "Î": 126, + "Ó": 127, + "Õ": 128, + "Ö": 129, + "×": 130, + "Ø": 131, + "ß": 132, + "à": 133, + "á": 134, + "â": 135, + "ã": 136, + "ä": 137, + "å": 138, + "ç": 139, + "è": 140, + "é": 141, + "ê": 142, + "ë": 143, + "í": 144, + "î": 145, + "ï": 146, + "ñ": 147, + "ò": 148, + "ó": 149, + "ô": 150, + "õ": 151, + "ö": 152, + "ø": 153, + "ú": 154, + "û": 155, + "ü": 156, + "ý": 157, + "þ": 158, + "ÿ": 159, + "Ā": 160, + "ă": 161, + "Ą": 162, + "Ć": 163, + "ć": 164, + "Č": 165, + "č": 166, + "ė": 167, + "ę": 168, + "ě": 169, + "İ": 170, + "ı": 171, + "Ľ": 172, + "ľ": 173, + "Ł": 174, + "ł": 175, + "ń": 176, + "ň": 177, + "Ō": 178, + "ř": 179, + "ş": 180, + "Š": 181, + "š": 182, + "Ź": 183, + "ź": 184, + "ż": 185, + "Ž": 186, + "ž": 187, + "Ȃ": 188, + "ʈ": 189, + "ˆ": 190, + "˙": 191, + "͑": 192, + "͒": 193, + "͓": 194, + "͔": 195, + "͗": 196, + "͘": 197, + "Γ": 198, + "Δ": 199, + "Θ": 200, + "Λ": 201, + "Ξ": 202, + "Π": 203, + "Σ": 204, + "Υ": 205, + "Φ": 206, + "Ψ": 207, + "α": 208, + "β": 209, + "γ": 210, + "δ": 211, + "ε": 212, + "ζ": 213, + "η": 214, + "θ": 215, + "κ": 216, + "λ": 217, + "μ": 218, + "ν": 219, + "ξ": 220, + "π": 221, + "ρ": 222, + "σ": 223, + "τ": 224, + "φ": 225, + "χ": 226, + "ψ": 227, + "ω": 228, + "ϕ": 229, + "ϩ": 230, + "Ϫ": 231, + "ϫ": 232, + "ϭ": 233, + "ϳ": 234, + "Ͻ": 235, + "Ͼ": 236, + "Ј": 237, + "Љ": 238, + "Б": 239, + "И": 240, + "Л": 241, + "П": 242, + "Ф": 243, + "Ц": 244, + "б": 245, + "в": 246, + "г": 247, + "д": 248, + "з": 249, + "и": 250, + "й": 251, + "к": 252, + "л": 253, + "м": 254, + "н": 255, + "п": 256, + "р": 257, + "с": 258, + "т": 259, + "у": 260, + "ф": 261, + "х": 262, + "ц": 263, + "ч": 264, + "ш": 265, + "щ": 266, + "ы": 267, + "ь": 268, + "э": 269, + "ю": 270, + "я": 271, + "ё": 272, + "Ն": 273, + "؊": 274, + "؍": 275, + "ٞ": 276, + "ܨ": 277, + "ࡆ": 278, + "௦": 279, + "௧": 280, + "ᰔ": 281, + "Ṇ": 282, + "†": 283, + "‡": 284, + "•": 285, + "‫": 286, + "‬": 287, + "′": 288, + "‹": 289, + "⁎": 290, + "ℓ": 291, + "™": 292, + "Ⅲ": 293, + "→": 294, + "↓": 295, + "↵": 296, + "⇑": 297, + "⇤": 298, + "∆": 299, + "∈": 300, + "−": 301, + "∓": 302, + "√": 303, + "∞": 304, + "∼": 305, + "≃": 306, + "≈": 307, + "≤": 308, + "≥": 309, + "⊙": 310, + "⋅": 311, + "⋆": 312, + "⌬": 313, + "␣": 314, + "␤": 315, + "ⓒ": 316, + "□": 317, + "☯": 318, + "♣": 319, + "♦": 320, + "✉": 321, + "✝": 322, + "⸸": 323, + "ㆍ": 324, + "": 325, + "": 326, + "": 327, + "": 328, + "": 329, + "": 330, + "": 331, + "": 332, + "": 333, + "": 334, + "": 335, + "": 336, + "": 337, + "": 338, + "": 339, + "": 340, + "": 341, + "": 342, + ",": 343, + ";": 344, + "�": 345 + }, + "tagVocab": { + "<PAD>": 0, + "B-<abstract>": 1, + "B-<address>": 2, + "B-<affiliation>": 3, + "B-<author>": 4, + "B-<availability>": 5, + "B-<conflict>": 6, + "B-<contribution>": 7, + "B-<copyright>": 8, + "B-<date>": 9, + "B-<doctype>": 10, + "B-<editor>": 11, + "B-<email>": 12, + "B-<funding>": 13, + "B-<group>": 14, + "B-<keyword>": 15, + "B-<meeting>": 16, + "B-<pubnum>": 17, + "B-<reference>": 18, + "B-<submission>": 19, + "B-<title>": 20, + "B-<web>": 21, + "I-<abstract>": 22, + "I-<address>": 23, + "I-<affiliation>": 24, + "I-<author>": 25, + "I-<availability>": 26, + "I-<conflict>": 27, + "I-<contribution>": 28, + "I-<copyright>": 29, + "I-<date>": 30, + "I-<doctype>": 31, + "I-<editor>": 32, + "I-<email>": 33, + "I-<funding>": 34, + "I-<group>": 35, + "I-<keyword>": 36, + "I-<meeting>": 37, + "I-<pubnum>": 38, + "I-<reference>": 39, + "I-<submission>": 40, + "I-<title>": 41, + "I-<web>": 42, + "O": 43 + }, + "tagIndex": { + "0": "<PAD>", + "1": "B-<abstract>", + "2": "B-<address>", + "3": "B-<affiliation>", + "4": "B-<author>", + "5": "B-<availability>", + "6": "B-<conflict>", + "7": "B-<contribution>", + "8": "B-<copyright>", + "9": "B-<date>", + "10": "B-<doctype>", + "11": "B-<editor>", + "12": "B-<email>", + "13": "B-<funding>", + "14": "B-<group>", + "15": "B-<keyword>", + "16": "B-<meeting>", + "17": "B-<pubnum>", + "18": "B-<reference>", + "19": "B-<submission>", + "20": "B-<title>", + "21": "B-<web>", + "22": "I-<abstract>", + "23": "I-<address>", + "24": "I-<affiliation>", + "25": "I-<author>", + "26": "I-<availability>", + "27": "I-<conflict>", + "28": "I-<contribution>", + "29": "I-<copyright>", + "30": "I-<date>", + "31": "I-<doctype>", + "32": "I-<editor>", + "33": "I-<email>", + "34": "I-<funding>", + "35": "I-<group>", + "36": "I-<keyword>", + "37": "I-<meeting>", + "38": "I-<pubnum>", + "39": "I-<reference>", + "40": "I-<submission>", + "41": "I-<title>", + "42": "I-<web>", + "43": "O" + }, + "maxCharLength": 30, + "featuresIndices": [ + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30 + ], + "featuresVocabularySize": 12, + "featuresMapToIndex": { + "9": { + "BLOCKEND": 1, + "BLOCKIN": 2, + "BLOCKSTART": 3 + }, + "10": { + "LINEEND": 13, + "LINEIN": 14, + "LINESTART": 15 + }, + "11": { + "ALIGNEDLEFT": 25, + "LINEINDENT": 26 + }, + "12": { + "NEWFONT": 37, + "SAMEFONT": 38 + }, + "13": { + "HIGHERFONT": 49, + "LOWERFONT": 50, + "SAMEFONTSIZE": 51 + }, + "14": { + "0": 61, + "1": 62 + }, + "15": { + "0": 73, + "1": 74 + }, + "16": { + "ALLCAP": 85, + "INITCAP": 86, + "NOCAPS": 87 + }, + "17": { + "ALLDIGIT": 97, + "CONTAINSDIGITS": 98, + "NODIGIT": 99 + }, + "18": { + "0": 109, + "1": 110 + }, + "19": { + "0": 121, + "1": 122 + }, + "20": { + "0": 133, + "1": 134 + }, + "21": { + "0": 145, + "1": 146 + }, + "22": { + "0": 157, + "1": 158 + }, + "23": { + "0": 169, + "1": 170 + }, + "24": { + "0": 181, + "1": 182 + }, + "25": { + "0": 193, + "1": 194 + }, + "26": { + "COMMA": 205, + "DOT": 206, + "ENDBRACKET": 207, + "HYPHEN": 208, + "NOPUNCT": 209, + "OPENBRACKET": 210, + "PUNCT": 211, + "QUOTE": 212 + }, + "27": { + "0": 217, + "1": 218 + }, + "28": { + "0": 229 + }, + "29": { + "0": 241, + "1": 242 + }, + "30": { + "0": 253 + } + } +} \ No newline at end of file diff --git a/grobid-home/models/header-coi-ac-BidLSTM_ChainCRF_FEATURES.onnx/config.json b/grobid-home/models/header-coi-ac-BidLSTM_ChainCRF_FEATURES.onnx/config.json new file mode 100644 index 0000000000..b0b9b28f91 --- /dev/null +++ b/grobid-home/models/header-coi-ac-BidLSTM_ChainCRF_FEATURES.onnx/config.json @@ -0,0 +1,37 @@ +{ + "modelName": "grobid-header-BidLSTM_ChainCRF_FEATURES", + "architecture": "BidLSTM_ChainCRF_FEATURES", + "wordEmbeddingSize": 300, + "charEmbeddingSize": 25, + "numCharLstmUnits": 25, + "numWordLstmUnits": 100, + "maxSequenceLength": 3500, + "embeddingsName": "glove-840B", + "featuresIndices": [ + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30 + ], + "featuresEmbeddingSize": 4, + "featuresLstmUnits": 4, + "featuresVocabularySize": 12 +} \ No newline at end of file diff --git a/grobid-home/models/header-coi-ac-BidLSTM_ChainCRF_FEATURES.onnx/crf_params.json b/grobid-home/models/header-coi-ac-BidLSTM_ChainCRF_FEATURES.onnx/crf_params.json new file mode 100644 index 0000000000..9a5e89f957 --- /dev/null +++ b/grobid-home/models/header-coi-ac-BidLSTM_ChainCRF_FEATURES.onnx/crf_params.json @@ -0,0 +1,2120 @@ +{ + "transitions": [ + [ + 0.17608831822872162, + -0.09344400465488434, + -0.11279518157243729, + 0.08164521306753159, + 0.2524421811103821, + 0.019747447222471237, + 0.24828490614891052, + -0.06411658227443695, + -0.1491447538137436, + -0.1684187948703766, + 0.020138222724199295, + 0.20290765166282654, + -0.07633064687252045, + 0.1139090359210968, + 0.0541418194770813, + 0.2323269546031952, + 0.042331453412771225, + -0.11665656417608261, + -0.06411020457744598, + 0.08080732822418213, + 0.07106472551822662, + 0.1463261842727661, + 0.017091087996959686, + 0.18297556042671204, + 0.08839266747236252, + 0.11858659982681274, + 0.0952586755156517, + -0.21322757005691528, + -0.15811048448085785, + 0.21000556647777557, + -0.0783323273062706, + 0.18619738519191742, + 0.25660741329193115, + -0.035708699375391006, + -0.009457222186028957, + 0.16335436701774597, + -0.1350918859243393, + 0.09985524415969849, + -0.2116004079580307, + 0.09054064750671387, + 0.15509842336177826, + 0.14944946765899658, + 0.168524369597435, + -0.18152254819869995 + ], + [ + 0.20479321479797363, + -0.23013362288475037, + 0.10807287693023682, + 0.20396704971790314, + 0.14699341356754303, + -0.23974648118019104, + 0.036243222653865814, + -0.1682346761226654, + 0.1335408091545105, + 0.12054246664047241, + 0.1850842982530594, + -0.049250662326812744, + 0.10225281119346619, + 0.218078151345253, + -0.0004700720892287791, + 0.13437961041927338, + 0.15097928047180176, + -0.00729456078261137, + -0.12674030661582947, + -0.10833417624235153, + -0.20261169970035553, + -0.08155622333288193, + 0.01618652231991291, + 0.0857839286327362, + -0.1885899007320404, + -0.044700831174850464, + -0.025100430473685265, + 0.19465236365795135, + -0.1324956864118576, + -0.20205610990524292, + 0.1423414945602417, + 0.0630531907081604, + -0.18513770401477814, + 0.010973038151860237, + 0.24787993729114532, + -0.19717277586460114, + -0.07775189727544785, + -0.20328021049499512, + 0.15571412444114685, + -0.06866826862096786, + 0.029656298458576202, + -0.046863242983818054, + 0.08763337135314941, + 0.13731947541236877 + ], + [ + 0.03483022004365921, + -0.18058693408966064, + -0.0315898135304451, + 0.024960091337561607, + -0.2200380563735962, + 0.09387701749801636, + 0.17861200869083405, + -0.17190204560756683, + 0.25931334495544434, + 0.06906288862228394, + -0.06325556337833405, + -0.03318234533071518, + 0.18502072989940643, + 0.06103565916419029, + -0.0363306887447834, + -0.096612349152565, + -0.23345902562141418, + 0.23546162247657776, + 0.0021703967358917, + -0.01729467697441578, + 0.014008686877787113, + 0.23847360908985138, + -0.00507350405678153, + 0.022482123225927353, + -0.17985770106315613, + -0.13733460009098053, + 0.16458968818187714, + -0.11735963076353073, + -0.23730216920375824, + 0.21187718212604523, + -0.027834327891469002, + 0.011223863810300827, + -0.05543797090649605, + -0.24022267758846283, + 0.08127258718013763, + 0.10463409870862961, + -0.02593601681292057, + 0.14551986753940582, + 0.0868021696805954, + -0.2464718073606491, + -0.1047435849905014, + -0.01606990396976471, + 0.07216636091470718, + -0.1947905570268631 + ], + [ + -0.022271763533353806, + -0.09018302708864212, + -0.05861001834273338, + -0.2500336468219757, + -0.22573135793209076, + 0.16268524527549744, + 0.07211939245462418, + -0.11527135968208313, + -0.07009616494178772, + 0.1939704567193985, + 0.19994677603244781, + 0.169842928647995, + -0.19377218186855316, + 0.05421845614910126, + 0.044129468500614166, + 0.2170141190290451, + 0.15958380699157715, + -0.037845369428396225, + 0.03848359361290932, + -0.06989135593175888, + 0.09200893342494965, + -0.16608810424804688, + -0.018439702689647675, + -0.07070168852806091, + -0.19255609810352325, + 0.04738391190767288, + -0.07356280088424683, + 0.00013403505727183074, + 0.18240976333618164, + 0.18141604959964752, + -0.15999655425548553, + 0.03174420818686485, + 0.15980403125286102, + 0.09906289726495743, + 0.1578342467546463, + 0.14924706518650055, + 0.19479231536388397, + -0.0318082831799984, + -0.07796841859817505, + -0.19124412536621094, + -0.057935502380132675, + -0.18203243613243103, + -0.05702631175518036, + 0.19576601684093475 + ], + [ + 0.1433619111776352, + 0.0698120966553688, + -0.1523277908563614, + 0.12928573787212372, + -0.08928843587636948, + 0.11140264570713043, + -0.0856877937912941, + 0.18436814844608307, + -0.1084543764591217, + -0.05755138769745827, + 0.07595626264810562, + 0.21837569773197174, + 0.10476168990135193, + -0.21987874805927277, + 0.1716732233762741, + 0.14502128958702087, + -0.08518984913825989, + -0.05032845214009285, + -0.2557395100593567, + -0.08108624070882797, + 0.21062485873699188, + 0.15743444859981537, + 0.11222083121538162, + 0.1405111700296402, + -0.06356285512447357, + 0.2205681949853897, + -0.1413753628730774, + 0.1800808310508728, + 0.25920528173446655, + 0.15701407194137573, + -0.09979496896266937, + -0.030576303601264954, + 0.1852893978357315, + -0.06530984491109848, + -0.24955607950687408, + -0.10924945026636124, + -0.04467407613992691, + -0.06006300449371338, + 0.18519918620586395, + -0.08368217200040817, + 0.0736055076122284, + 0.12661677598953247, + -0.1029278114438057, + -0.25652867555618286 + ], + [ + 0.2122664898633957, + -0.12993018329143524, + -0.01366304699331522, + 0.0919293686747551, + 0.0899886041879654, + -0.13732457160949707, + -0.07644999027252197, + -0.12750761210918427, + 0.24881292879581451, + 0.15890191495418549, + 0.008352258242666721, + -0.19182227551937103, + 0.22380265593528748, + -0.19805459678173065, + -0.169789656996727, + 0.1435500532388687, + -0.12910319864749908, + 0.22116711735725403, + -0.1721218377351761, + -0.25623729825019836, + 0.20375503599643707, + -0.17768463492393494, + -0.15149390697479248, + -0.04566769674420357, + 0.21815381944179535, + 0.1861327588558197, + -0.2043372094631195, + 0.25619569420814514, + -0.1284329742193222, + -0.11916321516036987, + -0.08028190582990646, + -0.2530951201915741, + -0.07262513041496277, + 0.07802405953407288, + -0.15833166241645813, + -0.17106321454048157, + 0.0345936194062233, + -0.14409931004047394, + 0.020227652043104172, + 0.015030944719910622, + 0.15207326412200928, + 0.13792745769023895, + 0.08631356805562973, + -0.09714183211326599 + ], + [ + 0.08008059114217758, + 0.12952184677124023, + -0.2085115611553192, + -0.21531908214092255, + -0.019156787544488907, + -0.25738295912742615, + 0.12157204002141953, + 0.014478867873549461, + 0.11762737482786179, + -0.23587724566459656, + 0.2516946494579315, + 0.03545180708169937, + 0.1417679637670517, + -0.21940883994102478, + -0.19858236610889435, + 0.13454760611057281, + -0.015398918651044369, + 0.03427913784980774, + -0.09007105976343155, + 0.14026111364364624, + -0.06247160583734512, + 0.1938670426607132, + -0.18140894174575806, + -0.22780950367450714, + -0.12389097362756729, + -0.18532945215702057, + -0.02844330668449402, + -0.0029098151717334986, + -0.21038740873336792, + 0.19451181590557098, + 0.23135511577129364, + -0.16074877977371216, + -0.052138857543468475, + -0.2331937700510025, + 0.25910457968711853, + -0.24136672914028168, + 0.10667481273412704, + -0.049983371049165726, + 0.015444567427039146, + 0.020764725282788277, + 0.05715854465961456, + -0.009339777752757072, + -0.2031509429216385, + 0.07257873564958572 + ], + [ + 0.18895858526229858, + -0.2029486447572708, + -0.1477566659450531, + -0.2521504759788513, + 0.17049959301948547, + -0.24179016053676605, + -0.16710354387760162, + -0.1288226693868637, + -0.03685372695326805, + -0.138637512922287, + 0.045131247490644455, + 0.09579039365053177, + -0.17663657665252686, + -0.04524168744683266, + -0.007228570524603128, + 0.15511131286621094, + -0.0675443783402443, + 0.1817418485879898, + -0.11434489488601685, + 0.042167190462350845, + -0.1073877289891243, + -0.1601920872926712, + 0.15229691565036774, + 0.1432410180568695, + 0.1655871719121933, + 0.028566431254148483, + 0.19398483633995056, + 0.14212565124034882, + 0.03171566501259804, + 0.25595593452453613, + 0.13766545057296753, + 0.11411403864622116, + 0.18390598893165588, + -0.20249928534030914, + 0.17717024683952332, + 0.13392050564289093, + -0.24013811349868774, + -0.07079939544200897, + 0.22697106003761292, + 0.25172483921051025, + -0.22229403257369995, + 0.04800098389387131, + -0.24339914321899414, + -0.14734840393066406 + ], + [ + 0.16166234016418457, + -0.17032699286937714, + 0.04335585981607437, + -0.22720466554164886, + -0.1736232489347458, + 0.18997551500797272, + 0.08028192818164825, + -0.004056910518556833, + -0.06595863401889801, + -0.24060587584972382, + 0.07252320647239685, + 0.10771827399730682, + -0.19978471100330353, + -0.22429680824279785, + 0.1920725405216217, + 0.14993785321712494, + 0.08888233453035355, + -0.026043111458420753, + 0.09147702902555466, + -0.22779175639152527, + 0.0911039337515831, + -0.00961698405444622, + 0.01779842935502529, + -0.071563221514225, + 0.08422242105007172, + -0.2246960997581482, + 0.08383344858884811, + -0.12260214239358902, + -0.25669509172439575, + 0.11162492632865906, + -0.012505368329584599, + 0.024402691051363945, + 0.1494443565607071, + -0.03831228241324425, + 0.03709110617637634, + 0.15600161254405975, + -0.18138277530670166, + 0.2094220221042633, + 0.23861654102802277, + -0.2558484673500061, + 0.20222067832946777, + 0.20915274322032928, + 0.0564621277153492, + 0.18603573739528656 + ], + [ + 0.20042262971401215, + -0.14594364166259766, + 0.06221747770905495, + 0.20793083310127258, + -0.18085633218288422, + -0.12036857008934021, + 0.08955813944339752, + 0.18877467513084412, + -0.18903785943984985, + -0.05116318166255951, + -0.07459753006696701, + 0.0031554577872157097, + -0.2272607535123825, + 0.18867473304271698, + -0.06812319159507751, + -0.05740927532315254, + 0.015465298667550087, + 0.19657687842845917, + 0.23393842577934265, + 0.21072834730148315, + 0.0231767650693655, + 0.1272657811641693, + 0.18957145512104034, + -0.25117766857147217, + -0.05401583015918732, + 0.10109660774469376, + 0.17890197038650513, + -0.16143585741519928, + 0.07087472081184387, + 0.1464037001132965, + 0.19709272682666779, + -0.15123999118804932, + 0.1025981679558754, + -0.21975427865982056, + -0.058663807809352875, + 0.19665245711803436, + -0.25114136934280396, + 0.03990350663661957, + -0.20499029755592346, + 0.18955105543136597, + 0.08829492330551147, + 0.1589541882276535, + -0.017950845882296562, + -0.16043895483016968 + ], + [ + 0.021742161363363266, + 0.21237331628799438, + -0.11641068756580353, + 4.3578515033004805e-05, + 0.009086182340979576, + 0.1600947231054306, + 0.010562621988356113, + -0.1439310759305954, + 0.03016103059053421, + -0.14742647111415863, + -0.2545258104801178, + 0.10019297897815704, + -0.23427605628967285, + -0.06304176896810532, + 0.0026618377305567265, + -0.09673290699720383, + 0.07251511514186859, + -0.02426236867904663, + -0.20277605950832367, + -0.23737533390522003, + -0.012475267983973026, + -0.1473412960767746, + -0.2130659967660904, + -0.19198457896709442, + -0.24599966406822205, + 0.013650782406330109, + -0.22959432005882263, + 0.03775987774133682, + -0.10278285294771194, + 0.06405562162399292, + -0.1198977679014206, + 0.21220074594020844, + -0.052213627845048904, + 0.25127631425857544, + -0.02574802190065384, + -0.0188775435090065, + 0.007578708231449127, + -0.25543609261512756, + -0.1055583506822586, + -0.17411746084690094, + -0.14625638723373413, + -0.16779080033302307, + -0.06801331043243408, + 0.20719242095947266 + ], + [ + 0.022590603679418564, + -0.23805567622184753, + 0.09707511961460114, + 0.08693916350603104, + -0.1718473583459854, + -0.23571038246154785, + 0.11133059114217758, + -0.11260750144720078, + 0.1489877700805664, + 0.1211848109960556, + -0.18085576593875885, + 0.23716311156749725, + -0.10591162741184235, + -0.010075476951897144, + -0.14127080142498016, + 0.22839044034481049, + 0.0829378142952919, + 0.19817622005939484, + 0.20631565153598785, + -0.025981634855270386, + -0.22943346202373505, + -0.003565220395103097, + 0.14012403786182404, + -0.180677592754364, + -0.21187692880630493, + -0.07776612043380737, + 0.07205311954021454, + 0.17604054510593414, + -0.008787124417722225, + -0.03293671831488609, + -0.09618742763996124, + 0.1687256097793579, + 0.04985736683011055, + 0.24820135533809662, + -0.06343197822570801, + 0.18195857107639313, + -0.25686022639274597, + -0.17716094851493835, + -0.1623009890317917, + 0.25091078877449036, + -0.020329391583800316, + -0.17091907560825348, + -0.1071627214550972, + -0.172651007771492 + ], + [ + 0.22754894196987152, + -0.1366780698299408, + -0.10902845859527588, + -0.2331756055355072, + 0.24051091074943542, + 0.0012156537268310785, + 0.25259929895401, + -0.2346457839012146, + 0.21247659623622894, + 0.14213387668132782, + -0.1149187833070755, + 0.25560030341148376, + 0.23696361482143402, + 0.23762136697769165, + -0.11610616743564606, + 0.042282238602638245, + 0.08552174270153046, + -0.14677096903324127, + -0.049143631011247635, + 0.137804314494133, + 0.2114216536283493, + -0.12923584878444672, + -0.2002267837524414, + 0.24957433342933655, + 0.18262770771980286, + -0.046280473470687866, + 0.08028724789619446, + -0.22078095376491547, + -0.1799858808517456, + -2.00461163331056e-05, + 0.08891087770462036, + -0.10288906842470169, + -0.2248207926750183, + 0.09684711694717407, + -0.16206900775432587, + 0.2548343241214752, + -0.22539322078227997, + -0.155459463596344, + -0.2163069248199463, + 0.17052045464515686, + -0.1222405955195427, + 0.03218715265393257, + -0.13611909747123718, + -0.23594903945922852 + ], + [ + -0.23905062675476074, + -0.07092778384685516, + -0.15637385845184326, + 0.07388761639595032, + -0.0010819610906764865, + -0.0033595142886042595, + 0.24817873537540436, + 0.24256911873817444, + -0.12015995383262634, + -0.13928855955600739, + -0.003766662208363414, + 0.15298256278038025, + 0.06776162981987, + 0.10634856671094894, + 0.07082775235176086, + 0.1863267570734024, + -0.1996307373046875, + 0.15057002007961273, + -0.005250899586826563, + -0.02886989340186119, + 0.19083046913146973, + -0.1323070228099823, + 0.014523877762258053, + -0.0977248027920723, + -0.04742283746600151, + 0.04178018495440483, + -0.09201889485120773, + 0.051181718707084656, + 0.23846395313739777, + -0.14387866854667664, + 0.10179919004440308, + 0.036952804774045944, + -0.1746874451637268, + 0.15747815370559692, + -0.25421345233917236, + 0.16825103759765625, + 0.22197479009628296, + 0.20245550572872162, + -0.1665739268064499, + -0.010249992832541466, + 0.20592385530471802, + -0.1438225358724594, + 0.13423460721969604, + -0.20063185691833496 + ], + [ + -0.12021176517009735, + -0.09926673024892807, + 0.17920957505702972, + 0.059772226959466934, + -0.24338370561599731, + -0.23846685886383057, + 0.1687130331993103, + 0.1949266642332077, + -0.23171423375606537, + 0.12432212382555008, + -0.08689258247613907, + -0.26098477840423584, + 0.01786566525697708, + 0.06324991583824158, + -0.22498719394207, + 0.06928765773773193, + -0.06540856510400772, + -0.16365982592105865, + -0.10233625024557114, + -0.1874213069677353, + 0.04114481061697006, + -0.2594088315963745, + 0.11754438281059265, + -0.16563042998313904, + -0.2541983425617218, + -0.11178410053253174, + 0.11522563546895981, + 0.11003836244344711, + 0.19493117928504944, + 0.12662182748317719, + -0.16844189167022705, + -0.01695541851222515, + 0.2592948377132416, + -0.16325709223747253, + 0.01481878012418747, + 0.04505722597241402, + 0.004048537462949753, + 0.22008824348449707, + 0.1301647573709488, + 0.09173762798309326, + 0.2268114984035492, + 0.16565720736980438, + -0.0744222030043602, + -0.029143115505576134 + ], + [ + 0.19636720418930054, + -0.11015000194311142, + 0.2328423261642456, + -0.2584368884563446, + -0.030370177701115608, + -0.12783043086528778, + 0.06758501380681992, + 0.11688684672117233, + -0.1292201578617096, + -0.04306158050894737, + -0.177101269364357, + -0.2090233564376831, + -0.11837244033813477, + -0.18558458983898163, + -0.14798280596733093, + 0.014431989751756191, + -0.1206749901175499, + 0.07073573768138885, + -0.17101305723190308, + -0.14681313931941986, + -0.23682646453380585, + 0.2551625669002533, + 0.06502126157283783, + -0.08521316200494766, + 0.03293897584080696, + -0.06489221751689911, + 0.09044533967971802, + -0.23027393221855164, + 0.08856461197137833, + 0.07722775638103485, + -0.1726072132587433, + 0.03807426616549492, + 0.10519899427890778, + 0.13527794182300568, + -0.18197490274906158, + 0.08957111835479736, + -0.006422523409128189, + 0.24223078787326813, + -0.15691480040550232, + -0.09770956635475159, + -0.16610820591449738, + 0.027300599962472916, + 0.09200970828533173, + 0.013478398323059082 + ], + [ + 0.18412137031555176, + 0.2030554860830307, + 0.1112975925207138, + -0.017041470855474472, + -0.12107771635055542, + -0.08454886823892593, + -0.11229021847248077, + -0.2147866040468216, + -0.06764116883277893, + -0.022788947448134422, + -0.02017289772629738, + 0.0670335590839386, + 0.12060811370611191, + -0.1936901956796646, + -0.07178213447332382, + 0.039714373648166656, + 0.09764886647462845, + 0.23928603529930115, + -0.015429454855620861, + -0.008741491474211216, + 0.20744885504245758, + 0.007038614712655544, + -0.09588563442230225, + -0.1424168199300766, + -0.16682006418704987, + 0.04722917824983597, + -0.13935759663581848, + 0.14460913836956024, + 0.17050713300704956, + 0.11925975978374481, + 0.1166687086224556, + 0.004277604632079601, + -0.09062574803829193, + -0.05251612514257431, + 0.16753438115119934, + -0.20014847815036774, + -0.18074847757816315, + 0.06268909573554993, + -0.09045141935348511, + 0.24216598272323608, + -0.0435660295188427, + 0.1231214702129364, + 0.10106747597455978, + -0.119942307472229 + ], + [ + -0.2281997948884964, + -0.18061652779579163, + -0.18152135610580444, + 0.20407114923000336, + -0.11960824579000473, + -0.18492236733436584, + 0.2554111182689667, + -0.019991394132375717, + 0.2215806394815445, + -0.22950579226016998, + 0.16150794923305511, + -0.23659752309322357, + 0.23418663442134857, + 0.023772358894348145, + 0.048625122755765915, + 0.009161199443042278, + 0.22606438398361206, + -0.24793991446495056, + 0.09289313852787018, + -0.12058784812688828, + 0.061217911541461945, + -0.06342127174139023, + -0.04756249114871025, + 0.22346068918704987, + -0.11355472356081009, + -0.04072801023721695, + -0.034254953265190125, + -0.11879318952560425, + 0.013256956823170185, + -0.06337730586528778, + -0.19407270848751068, + -0.10251999646425247, + -0.030916230753064156, + -0.16659580171108246, + -0.016722476109862328, + 0.21098864078521729, + 0.1732875257730484, + -0.19588324427604675, + -0.11471755802631378, + 0.01161796972155571, + -0.24225008487701416, + 0.050511542707681656, + -0.00351782888174057, + -0.06089070066809654 + ], + [ + 0.056371111422777176, + -0.05654318258166313, + -0.0037315813824534416, + 0.09352584183216095, + -0.12178513407707214, + 0.15091930329799652, + -0.06461160629987717, + -0.05057936906814575, + -0.08188478648662567, + -0.047934792935848236, + -0.15919247269630432, + -0.07043727487325668, + -0.1692715734243393, + -0.05945653095841408, + 0.0007296910625882447, + 0.2194390445947647, + -0.21545402705669403, + -0.1674940437078476, + -0.02600782737135887, + -0.010822365991771221, + -0.08818811178207397, + -0.1168171688914299, + 0.13689172267913818, + -0.17819471657276154, + 0.02562662400305271, + -0.17681190371513367, + -0.052262358367443085, + -0.10921040177345276, + -0.13016530871391296, + -0.20143768191337585, + -0.04020741954445839, + 0.1623109132051468, + -0.013060573488473892, + -0.2201118767261505, + 0.08279478549957275, + -0.17255470156669617, + 0.17701470851898193, + -0.19562068581581116, + -0.124993696808815, + -0.044759802520275116, + -0.24222403764724731, + 0.02674979902803898, + -0.01598324440419674, + -0.2354331910610199 + ], + [ + -0.028829878196120262, + -0.02354498766362667, + 0.16536498069763184, + 0.06242195516824722, + -0.11670825630426407, + -0.2179786115884781, + 0.1280946433544159, + -0.14648868143558502, + 0.11805204302072525, + -0.232424795627594, + 0.1798878312110901, + -0.1948549747467041, + 0.16280633211135864, + 0.11264622211456299, + 0.19981731474399567, + -0.0631944015622139, + -0.25161463022232056, + 0.24532844126224518, + -0.17229747772216797, + 0.19722899794578552, + 0.24002011120319366, + -0.154518261551857, + -0.19677720963954926, + 0.11740275472402573, + 0.17045122385025024, + 0.14858414232730865, + -0.12962108850479126, + 0.03586745262145996, + -0.04168030992150307, + 0.07267473638057709, + 0.20363116264343262, + 0.14445844292640686, + 0.209702730178833, + 0.09534505754709244, + 0.2130970060825348, + -0.016783220693469048, + -0.11470115184783936, + -0.15149444341659546, + 0.12304691970348358, + 0.04285691678524017, + -0.18624423444271088, + -0.15135377645492554, + -0.08607196807861328, + 0.20791096985340118 + ], + [ + -0.02135835774242878, + 0.1259908378124237, + -0.0671389251947403, + 0.17878222465515137, + 0.04953709617257118, + 0.11896498501300812, + 0.26076218485832214, + 0.12442876398563385, + 0.13137786090373993, + 0.18850985169410706, + -0.13872280716896057, + -0.14260956645011902, + 0.14161302149295807, + -0.09569063037633896, + 0.1808786243200302, + 0.18595223128795624, + -0.16579750180244446, + 0.052825313061475754, + 0.010402409359812737, + 0.19490931928157806, + 0.21322546899318695, + -0.14745306968688965, + -0.19128970801830292, + 0.12556548416614532, + -0.11987202614545822, + 0.030174074694514275, + 0.0742829367518425, + -0.19377675652503967, + -0.10239459574222565, + 0.2332124412059784, + 0.10399018973112106, + -0.003907451871782541, + 0.20903635025024414, + -0.18542911112308502, + 0.08584185689687729, + 0.22475266456604004, + -0.20073102414608002, + -0.061867184937000275, + -0.2166346162557602, + -0.0696101039648056, + -0.19890175759792328, + 0.01856989413499832, + -0.08446888625621796, + -0.013984205201268196 + ], + [ + 0.0721021443605423, + 0.13361726701259613, + 0.19870488345623016, + -0.13789819180965424, + -0.04780903831124306, + -0.1112748235464096, + 0.12046940624713898, + 0.011692924425005913, + 0.18431933224201202, + 0.20353730022907257, + -0.07383564859628677, + -0.15324096381664276, + -0.24819216132164001, + -0.09674017131328583, + 0.06304425746202469, + 0.09332699328660965, + 0.08233963698148727, + -0.2558674216270447, + -0.19620993733406067, + -0.10805602371692657, + 0.22444234788417816, + 0.2312205284833908, + 0.010728531517088413, + 0.059420112520456314, + 0.09136826545000076, + 0.00976504199206829, + -0.05254754796624184, + -0.23562924563884735, + 0.2553556263446808, + -0.08198419213294983, + -0.12549883127212524, + -0.0008410497684963048, + 0.15978746116161346, + 0.043460413813591, + -0.05065975710749626, + -0.23185813426971436, + 0.01483885757625103, + -0.15568962693214417, + -0.16824442148208618, + -0.1524868756532669, + 0.1323120892047882, + -0.13201427459716797, + -0.12967565655708313, + -0.2306380420923233 + ], + [ + 0.0856105163693428, + -0.039163075387477875, + -0.007660449016839266, + -0.23639492690563202, + -0.14876137673854828, + 0.23596590757369995, + -0.2319946438074112, + 0.14375892281532288, + 0.24334661662578583, + -0.016004225239157677, + 0.21533367037773132, + 0.2292565405368805, + -0.0934634879231453, + 0.09324220567941666, + -0.2060185968875885, + -0.009391807951033115, + 0.08910215646028519, + 0.24460454285144806, + 0.22665318846702576, + 0.14251241087913513, + 0.006249283440411091, + -0.19659513235092163, + 0.2060229331254959, + 0.06269640475511551, + -0.08452891558408737, + 0.04204345867037773, + -0.22881095111370087, + -0.24355155229568481, + -0.153992161154747, + 0.10096388310194016, + -0.14057868719100952, + -0.13317978382110596, + -0.1629979908466339, + -0.2610569894313812, + -0.004141499754041433, + 0.23834551870822906, + 0.13898159563541412, + 0.0653998926281929, + -0.12012863904237747, + 0.2396547794342041, + 0.09394730627536774, + -0.00331875286065042, + 0.2154504507780075, + -0.20959773659706116 + ], + [ + -0.0013496421743184328, + 0.11362107843160629, + 0.2347259819507599, + -0.1599472463130951, + -0.2428695410490036, + -0.23714780807495117, + 0.08935108035802841, + 0.07781631499528885, + 0.14863747358322144, + 0.03857022151350975, + -0.17479480803012848, + -0.05819109082221985, + -0.20325499773025513, + -0.0670558288693428, + -0.08611521869897842, + -0.055958233773708344, + 0.11789286136627197, + 0.11770681291818619, + 0.03661506995558739, + 0.20708024501800537, + -0.055593546479940414, + 0.22660519182682037, + 0.019988996908068657, + 0.1592756062746048, + -0.09208742529153824, + 0.0609409399330616, + 0.11322784423828125, + 0.1357812136411667, + 0.00348488031886518, + 0.13403689861297607, + -0.07296796888113022, + -0.006760568358004093, + -0.01220971904695034, + -0.14174780249595642, + -0.03851800411939621, + 0.06900499016046524, + 0.22059878706932068, + 0.020700914785265923, + 0.20984268188476562, + -0.25772079825401306, + 0.013449418358504772, + 0.19452384114265442, + -0.08921404182910919, + -0.08832069486379623 + ], + [ + 0.1785605251789093, + -0.2315802425146103, + -0.18298424780368805, + 0.021065479144454002, + -0.04086179658770561, + 0.13302963972091675, + -0.18682070076465607, + -0.1799120157957077, + 0.02062072977423668, + 0.14716167747974396, + -0.160993292927742, + -0.17657485604286194, + -0.01715763844549656, + 0.08906825631856918, + -0.04854646325111389, + 0.15574805438518524, + 0.030685218051075935, + -0.13363923132419586, + -0.11157509684562683, + 0.03764766454696655, + -0.19468821585178375, + 0.11676760017871857, + 0.11740721017122269, + -0.009662305936217308, + 0.07340744882822037, + -0.04824094846844673, + 0.21767041087150574, + 0.11161571741104126, + 0.1407696157693863, + 0.03062383458018303, + -0.14746955037117004, + -0.051837094128131866, + -0.15277142822742462, + 0.139139786362648, + -0.1213664710521698, + 0.21807941794395447, + -0.23123376071453094, + -0.1857152134180069, + -0.021335573866963387, + 0.025178823620080948, + -0.006558301858603954, + -0.032769471406936646, + 0.07736369222402573, + -0.0802716538310051 + ], + [ + 0.04183661937713623, + -0.2118944674730301, + -0.17278632521629333, + -0.10232599079608917, + -0.019761517643928528, + 0.2046845406293869, + 0.19030138850212097, + 0.025678234174847603, + -0.15994517505168915, + -0.2509307265281677, + 0.17728744447231293, + -0.0049660829827189445, + 0.02867933176457882, + 0.17630138993263245, + -0.13522106409072876, + 0.037690870463848114, + 0.024629516527056694, + 0.23881109058856964, + -0.2026282250881195, + -0.03152930364012718, + 0.07313890755176544, + 0.08646350353956223, + -0.13686388731002808, + 0.07214046269655228, + -0.19369325041770935, + 0.2263975739479065, + 0.21804599463939667, + -0.02870410867035389, + -0.008677197620272636, + 0.23371903598308563, + 0.18480107188224792, + 0.22566276788711548, + 0.22906821966171265, + 0.2602115571498871, + -0.029445409774780273, + 0.11786440759897232, + 0.0745813250541687, + 0.26013806462287903, + -0.12193123251199722, + -0.021648233756422997, + 0.22116973996162415, + -0.13412794470787048, + 0.09106604754924774, + 0.17197827994823456 + ], + [ + -0.031467143446207047, + -0.22625911235809326, + -0.031283847987651825, + 0.06975515931844711, + 0.15159685909748077, + 0.22567671537399292, + -0.12158534675836563, + 0.17386934161186218, + -0.06193088740110397, + -0.04254912585020065, + 0.04739328473806381, + 0.2527969181537628, + -0.24182157218456268, + 0.1330549418926239, + 0.03340890631079674, + -0.18478471040725708, + -0.11644480377435684, + 0.08554552495479584, + -0.14870955049991608, + 0.13656671345233917, + 0.09613535553216934, + 0.16053947806358337, + -0.18454919755458832, + 0.17615509033203125, + 0.07906359434127808, + -0.14955659210681915, + 0.18399614095687866, + 0.06683147698640823, + 0.22206063568592072, + 0.24475650489330292, + -0.07050320506095886, + 0.15924762189388275, + 0.17203854024410248, + 0.1803644299507141, + -0.12211348116397858, + 0.10522184520959854, + 0.15564890205860138, + -0.20760756731033325, + -0.11653585731983185, + -0.2157912701368332, + 0.2536090314388275, + 0.002183408010751009, + -0.2271886169910431, + -0.054987382143735886 + ], + [ + 0.04415586590766907, + -0.22434557974338531, + -0.03700718656182289, + -0.11924712359905243, + -0.10875044763088226, + -0.022878143936395645, + -0.21422874927520752, + 0.1691303253173828, + 0.0039276378229260445, + 0.2581000030040741, + -0.13169732689857483, + 0.2082824409008026, + 0.20725315809249878, + -0.14576669037342072, + -0.1776389181613922, + 0.0062802862375974655, + -0.06527195870876312, + -0.06335712224245071, + -0.11966921389102936, + -0.23639211058616638, + -0.06964977830648422, + 0.23173049092292786, + -0.012474598363041878, + 0.09170970320701599, + -0.19506143033504486, + -0.11639198660850525, + -0.15599657595157623, + 0.12104196846485138, + 0.021261489018797874, + -0.12865804135799408, + 0.02907838486135006, + 0.054657790809869766, + -0.10954256355762482, + 0.14241299033164978, + 0.046723853796720505, + 0.15816348791122437, + -0.1968781054019928, + -0.1514749675989151, + -0.11683116108179092, + -0.2525866627693176, + 0.12262081354856491, + -0.24588064849376678, + -0.13663837313652039, + -0.24870353937149048 + ], + [ + -0.25742068886756897, + -0.0729801133275032, + -0.2536620795726776, + -0.022325318306684494, + 0.20645657181739807, + -0.14745929837226868, + -0.14956870675086975, + -0.1814701110124588, + 0.2203589826822281, + -0.19113537669181824, + -0.002645324682816863, + -0.12147999554872513, + -0.012990085408091545, + -0.1193656399846077, + -0.14592067897319794, + -0.20844610035419464, + 0.18286827206611633, + 0.20180290937423706, + -0.12110649049282074, + 0.2002381682395935, + -0.14147204160690308, + 0.11574767529964447, + -0.19407568871974945, + 0.12822239100933075, + 0.19796834886074066, + -0.014225349761545658, + 0.12734846770763397, + 0.25924792885780334, + 0.19098320603370667, + -0.005524416919797659, + 0.02799822948873043, + 0.2216598093509674, + 0.008114723488688469, + 0.019749905914068222, + 0.13698288798332214, + -0.02114211581647396, + -0.19195953011512756, + -0.13687926530838013, + -0.10411310195922852, + -0.03149507939815521, + 0.024532150477170944, + -0.19148796796798706, + -0.04437106475234032, + -0.10013893991708755 + ], + [ + -0.09159188717603683, + -0.14973925054073334, + 0.24883045256137848, + -0.058021675795316696, + -0.08491963148117065, + -0.24563047289848328, + -0.055955979973077774, + -0.15922461450099945, + -0.24170583486557007, + -0.0040222033858299255, + 0.11146026849746704, + 0.06376326829195023, + -0.1850855052471161, + -0.14124171435832977, + -0.18406522274017334, + 0.18816234171390533, + -0.004414830356836319, + -0.041076965630054474, + 0.013340877369046211, + -0.08979346603155136, + 0.21819183230400085, + 0.18074776232242584, + 0.09257822483778, + -0.13537168502807617, + 0.016279906034469604, + 0.141400545835495, + 0.07301224768161774, + 0.24689331650733948, + 0.11356759816408157, + 0.2271111011505127, + -0.11115144938230515, + 0.06314539164304733, + -0.0250127911567688, + -0.05897710472345352, + 0.07076795399188995, + 0.16759903728961945, + 0.14542759954929352, + -0.1513948142528534, + -0.2535487711429596, + -0.24660196900367737, + 0.028924522921442986, + 0.1370343714952469, + 0.05405706167221069, + -0.1836196333169937 + ], + [ + -0.11587836593389511, + -0.24752923846244812, + -0.16659270226955414, + -0.051295723766088486, + 0.01697797141969204, + 0.18938234448432922, + -0.1579776406288147, + 0.24812354147434235, + -0.05959174782037735, + 0.22455765306949615, + -0.2599749267101288, + -0.039775650948286057, + -0.05417900159955025, + 0.2239886373281479, + 0.05391200631856918, + -0.1648648977279663, + 0.09074786305427551, + -0.1592332422733307, + -0.25472337007522583, + -0.07938233762979507, + -0.06677084416151047, + 0.23579595983028412, + -0.16717322170734406, + 0.25717517733573914, + -0.18593399226665497, + -0.0800771713256836, + 0.24991953372955322, + 0.07264173775911331, + -0.2525496482849121, + -0.04727945104241371, + 0.07500151544809341, + 0.14086595177650452, + 0.17503605782985687, + -0.03432822600007057, + -0.24502620100975037, + -0.23667176067829132, + -0.0683472603559494, + 0.06657283753156662, + -0.04490784555673599, + 0.16621103882789612, + -0.23649683594703674, + 0.004124208353459835, + 0.17379307746887207, + 0.11671461910009384 + ], + [ + 0.0049126059748232365, + -0.23750007152557373, + 0.145565003156662, + 0.08261720091104507, + -0.2523353695869446, + 0.1452942192554474, + -0.24946661293506622, + 0.11491918563842773, + -0.0023335826117545366, + -0.07372357696294785, + -0.11932330578565598, + -0.20588792860507965, + 0.20730827748775482, + -0.013458197005093098, + -0.07771029323339462, + -0.24452437460422516, + -0.07810355722904205, + -0.13398872315883636, + -0.0802755281329155, + 0.11435269564390182, + 0.22698864340782166, + -0.05094681680202484, + -0.2200314998626709, + 0.14702169597148895, + -0.1751888394355774, + 0.1417492926120758, + 0.12678731977939606, + 0.07209345698356628, + 0.06643889844417572, + 0.07845772802829742, + 0.1976015418767929, + -0.06695769727230072, + -0.10912350565195084, + 0.04966845363378525, + -0.2484932392835617, + -0.02764132246375084, + 0.05910233035683632, + -0.03608651086688042, + 0.09725140035152435, + 0.19968281686306, + -0.012229749001562595, + 0.10874077677726746, + -0.10288236290216446, + -0.02819226309657097 + ], + [ + 0.23395943641662598, + -0.22170233726501465, + 0.1509355753660202, + 0.1036052405834198, + 0.15339060127735138, + -0.06051880493760109, + 0.21968501806259155, + -0.09717017412185669, + 0.03077181614935398, + -0.039918385446071625, + -0.2543540596961975, + -0.21587862074375153, + 0.004511091858148575, + 0.05437997728586197, + -0.23118701577186584, + 0.22033731639385223, + -0.23018395900726318, + -0.16324910521507263, + -0.23284858465194702, + -0.18264354765415192, + -0.05185055732727051, + 0.15531207621097565, + -0.004727428313344717, + 0.01488203089684248, + 0.13419029116630554, + -0.07551480829715729, + -0.2549087107181549, + 0.05748547613620758, + 0.23749449849128723, + 0.20443570613861084, + -0.21892093122005463, + -0.22308330237865448, + -0.059244271367788315, + 0.15302138030529022, + 0.1483544558286667, + 0.15101543068885803, + 0.2502576410770416, + -0.048049669712781906, + -0.20598997175693512, + -0.25133538246154785, + -0.1531386524438858, + 0.19227425754070282, + 0.028265709057450294, + -0.20504599809646606 + ], + [ + 0.09156953543424606, + 0.008438232354819775, + 0.08288583159446716, + -0.00635044788941741, + 0.034097012132406235, + 0.06470618396997452, + 0.15537796914577484, + 0.01781523786485195, + 0.06972372531890869, + -0.22142952680587769, + 0.05330944061279297, + 0.22468537092208862, + 0.2136218100786209, + 0.07765862345695496, + 0.16746056079864502, + -0.12209009379148483, + 0.026869360357522964, + 0.18006651103496552, + 0.1353592574596405, + -0.04273916035890579, + -0.178911030292511, + 0.0668080672621727, + 0.14683228731155396, + 0.1715245246887207, + -0.230466827750206, + 0.0034315278753638268, + 0.15175603330135345, + 0.06633446365594864, + -0.15856362879276276, + -0.05964183434844017, + 0.16535437107086182, + -0.11738698929548264, + 0.10899185389280319, + -0.21219110488891602, + -0.15860657393932343, + -0.019679199904203415, + 0.17344354093074799, + 0.09734188765287399, + 0.17562294006347656, + -0.16138909757137299, + -0.18187406659126282, + -0.08831391483545303, + 0.0523899644613266, + 0.024609409272670746 + ], + [ + -0.15730895102024078, + -0.03681395947933197, + -0.16943901777267456, + 0.2552204430103302, + -0.04551498591899872, + -0.09611127525568008, + 0.10044212639331818, + 0.015592423267662525, + -0.1555062234401703, + 0.12912559509277344, + -0.09457419812679291, + -0.1833663433790207, + 0.05429353564977646, + -0.040105197578668594, + 0.18561962246894836, + 0.2500194013118744, + -0.20119145512580872, + -0.17714561522006989, + 0.030866162851452827, + 0.018056662753224373, + 0.039305638521909714, + 0.003521486185491085, + -0.24058197438716888, + 0.2528053820133209, + 0.18086972832679749, + 0.04900304228067398, + 0.23746508359909058, + 0.2526344954967499, + -0.15291135013103485, + 0.1068943589925766, + -0.1740184724330902, + 0.19945095479488373, + 0.25226497650146484, + 0.08851957321166992, + -0.05801260471343994, + 0.23043175041675568, + 0.16895930469036102, + -0.12329186499118805, + 0.25689831376075745, + 0.1704922765493393, + -0.18289321660995483, + 0.007651079446077347, + 0.11093579977750778, + 0.11301464587450027 + ], + [ + -0.13642077147960663, + 0.07983101159334183, + 0.1744544357061386, + -0.24553684890270233, + -0.17549456655979156, + 0.19703787565231323, + 0.19706997275352478, + 0.2454596757888794, + 0.1653539091348648, + 0.15103808045387268, + 0.15305520594120026, + 0.015083488076925278, + 0.09802043437957764, + 0.2082226425409317, + 0.16979072988033295, + 0.22483739256858826, + -0.25775933265686035, + 0.13786816596984863, + -0.027962541207671165, + 0.1604480892419815, + -0.23751184344291687, + -0.06003718450665474, + -0.033253345638513565, + 0.1890634447336197, + -0.08525195717811584, + -0.07307948917150497, + 0.1201339140534401, + 0.0802876204252243, + 0.06707409024238586, + 0.25334975123405457, + 0.09942076355218887, + 0.045603856444358826, + 0.19870330393314362, + -0.24521982669830322, + -0.09133923798799515, + -0.04649413377046585, + 0.14283707737922668, + 0.2209198772907257, + -0.06308673322200775, + -0.17969562113285065, + -0.0878414735198021, + -0.14730624854564667, + -0.21577170491218567, + 0.22471673786640167 + ], + [ + 0.11253425478935242, + -0.011456775479018688, + 0.17293734848499298, + 0.23182427883148193, + 0.02208680473268032, + 0.010555618442595005, + 0.03217999264597893, + 0.15006165206432343, + 0.030289338901638985, + 0.08835948258638382, + 0.09884291887283325, + 0.15453612804412842, + 0.21435025334358215, + 0.14587798714637756, + 0.11196447163820267, + 0.21180950105190277, + -0.023749807849526405, + 0.20578816533088684, + -0.12507586181163788, + -0.049820296466350555, + 0.2548519968986511, + 0.15751463174819946, + 0.15206432342529297, + -0.026834886521100998, + 0.1329364776611328, + -0.18858914077281952, + 0.10737624019384384, + 0.04255065321922302, + -0.19436852633953094, + 0.23927894234657288, + 0.16362950205802917, + 0.12174597382545471, + -0.030283689498901367, + -0.14180411398410797, + 0.2535238564014435, + -0.13213381171226501, + -0.09856351464986801, + -0.16278758645057678, + -0.2397853285074234, + 0.2272649109363556, + -0.03048640675842762, + -0.1252664178609848, + 0.05278313532471657, + 0.1920422613620758 + ], + [ + 0.07116913050413132, + -0.07864007353782654, + -0.21683908998966217, + 0.22332727909088135, + -0.006808784790337086, + -0.2394266128540039, + 0.07121255248785019, + 0.03606031835079193, + -0.1244770735502243, + -0.18728746473789215, + -0.035441845655441284, + -0.2475198656320572, + -0.18342049419879913, + 0.027762407436966896, + 0.09220055490732193, + -0.21047775447368622, + 0.15015879273414612, + -0.19722570478916168, + 0.15325047075748444, + -0.05767303332686424, + -0.022553203627467155, + -0.09181196242570877, + 0.19038374722003937, + 0.1785672903060913, + -0.024112069979310036, + 0.03977697342634201, + -0.2286224365234375, + -0.026909995824098587, + 0.020900970324873924, + -0.22622337937355042, + 0.07937408983707428, + 0.22596894204616547, + -0.18907229602336884, + -0.07193369418382645, + -0.19813692569732666, + 0.16196274757385254, + -0.2081318348646164, + -0.2411973625421524, + 0.22234046459197998, + 0.1303422451019287, + -0.25080567598342896, + -0.23592963814735413, + 0.15191376209259033, + 0.07288002222776413 + ], + [ + 0.16529124975204468, + 0.18687480688095093, + -0.24700793623924255, + -0.08879557996988297, + -0.044565316289663315, + -0.10635138303041458, + -0.13489797711372375, + 0.1486460268497467, + -0.12961305677890778, + -0.25960734486579895, + -0.0410744771361351, + -0.09086266160011292, + -0.07228979468345642, + 0.02424316294491291, + -0.0753764808177948, + 0.18313312530517578, + 0.22685523331165314, + -0.01637590304017067, + -0.10807599127292633, + -0.033400457352399826, + 0.16493305563926697, + -0.09818705916404724, + -0.14066332578659058, + 0.02895340882241726, + -0.08408276736736298, + -0.1820533722639084, + 0.21578866243362427, + -0.2519669532775879, + 0.18912552297115326, + 0.0166032575070858, + 0.09667476266622543, + 0.2028828263282776, + -0.05869936943054199, + -0.0029846145771443844, + -0.20318995416164398, + 0.12422609329223633, + -0.18822523951530457, + 0.09426047652959824, + 0.21314425766468048, + 0.15613916516304016, + 0.23674613237380981, + -0.08597900718450546, + -0.17684632539749146, + 0.0099410992115736 + ], + [ + 0.21964405477046967, + -0.13569045066833496, + 0.22310814261436462, + 0.07874155044555664, + 0.204693004488945, + 0.0007564918487332761, + 0.11054256558418274, + -0.025405464693903923, + -0.13658267259597778, + 0.23482954502105713, + -0.17053313553333282, + -0.21429872512817383, + 0.1253930926322937, + 0.045067716389894485, + 0.171335369348526, + 0.09783049672842026, + 0.11413371562957764, + -0.11648416519165039, + -0.0035603956785053015, + 0.1920628398656845, + -0.1403433084487915, + -0.24709293246269226, + -0.01254321914166212, + 0.016940617933869362, + -0.1538248211145401, + 0.16473212838172913, + -0.11168999969959259, + -0.24529682099819183, + -0.021955352276563644, + -0.16724859178066254, + -0.07103053480386734, + 0.2559516131877899, + 0.2212129682302475, + -0.05182451754808426, + -0.19284504652023315, + -0.03630916774272919, + -0.17890238761901855, + 0.2556970417499542, + -0.10854237526655197, + 0.08383820950984955, + 0.06848163902759552, + 0.1883818507194519, + -0.14531460404396057, + -0.25003573298454285 + ], + [ + 0.07084982097148895, + -0.18229717016220093, + 0.17459441721439362, + 0.23132595419883728, + 0.05805211886763573, + 0.24833978712558746, + -0.05206986516714096, + 0.174324169754982, + -0.22106362879276276, + 0.18692831695079803, + -0.25091418623924255, + 0.24036167562007904, + -0.19917500019073486, + 0.006931256037205458, + 0.1793861836194992, + -0.17627547681331635, + -0.11784037947654724, + -0.10734885185956955, + -0.21086649596691132, + -0.18754878640174866, + 0.042323607951402664, + 0.15513648092746735, + -0.02566842921078205, + -0.014010818675160408, + -0.22423048317432404, + -0.04277436435222626, + -0.19187481701374054, + 0.04044552892446518, + -0.01951335370540619, + -0.25928735733032227, + -0.023383405059576035, + 0.18759824335575104, + -0.08989730477333069, + 0.051524028182029724, + -0.20520316064357758, + -0.25369992852211, + 0.2367756962776184, + -0.22194823622703552, + -0.15747667849063873, + -0.23987850546836853, + 0.08299732953310013, + -0.17885777354240417, + 0.09608113020658493, + 0.10098153352737427 + ], + [ + 0.028944848105311394, + 0.23309293389320374, + -0.20472440123558044, + -0.010671848431229591, + 0.07546469569206238, + 0.024432137608528137, + 0.24040700495243073, + 0.06980966776609421, + 0.08226947486400604, + 0.11640629917383194, + 0.0016123427776619792, + 0.02291065640747547, + 0.10578428953886032, + 0.013786871917545795, + -0.07322797924280167, + 0.07155177742242813, + -0.23590292036533356, + -0.04582822322845459, + 0.23349763453006744, + 0.05547628924250603, + -2.023288061536732e-07, + -0.13335752487182617, + -0.007104262709617615, + -0.1867811381816864, + 0.02790861390531063, + 0.11813593655824661, + 0.05904580280184746, + 0.012783756479620934, + 0.09566865861415863, + -0.01839073933660984, + -0.07374323159456253, + 0.05545004829764366, + 0.217270165681839, + -0.2096945196390152, + -0.08982153981924057, + -0.22805702686309814, + 0.2551310360431671, + -0.04364316537976265, + -0.007283308077603579, + 0.15015879273414612, + -0.028622927144169807, + 0.16844399273395538, + -0.21798717975616455, + -0.25630298256874084 + ], + [ + 0.15786674618721008, + 0.18996717035770416, + 0.11197742074728012, + -0.042392928153276443, + 0.19751030206680298, + 0.018470659852027893, + 0.0010380401508882642, + -0.0005703337956219912, + 0.1959228366613388, + 0.002501468872651458, + -0.055930592119693756, + -0.15108688175678253, + 0.1256583034992218, + -0.19432084262371063, + 0.15099836885929108, + 0.16084825992584229, + 0.08309678733348846, + -0.010916978120803833, + 0.09931237995624542, + -0.029546497389674187, + -0.08155357837677002, + -0.0911812037229538, + 0.023836854845285416, + -0.1424150913953781, + -0.13245996832847595, + 0.09342828392982483, + 0.10870597511529922, + 0.09720417857170105, + 0.17620928585529327, + -0.17150522768497467, + 0.05795092508196831, + 0.2378191202878952, + 0.047969672828912735, + 0.12692515552043915, + 0.1797170788049698, + 0.021013746038079262, + -0.19885312020778656, + -0.19434663653373718, + -0.024565644562244415, + 0.2590258717536926, + -0.18922017514705658, + -0.14682354032993317, + -0.2208714783191681, + 0.14774298667907715 + ], + [ + -0.1755276918411255, + 0.2291610985994339, + 0.03682266175746918, + 0.047353439033031464, + 0.053795527666807175, + -0.09823668003082275, + -0.2421059012413025, + -0.2371693104505539, + 0.07203568518161774, + -0.1951310783624649, + -0.16012020409107208, + 0.1523687243461609, + 0.1153315007686615, + -0.19153693318367004, + -0.1497534066438675, + 0.09064386785030365, + 0.17116591334342957, + 0.10310159623622894, + 0.18139241635799408, + -0.051042452454566956, + -0.12866714596748352, + 0.19175852835178375, + 0.21687693893909454, + 0.10450249165296555, + 0.023130042478442192, + -0.09172355383634567, + -0.1441272646188736, + -0.11595510691404343, + 0.12468578666448593, + -0.11853697896003723, + -0.19164398312568665, + -0.24186576902866364, + 0.23234353959560394, + -0.06486228853464127, + 0.03240986913442612, + -0.022264495491981506, + 0.1914547234773636, + -0.17822317779064178, + -0.06780707836151123, + -0.18503063917160034, + -0.0858154445886612, + 0.1968127340078354, + -0.1163979098200798, + -0.05726046860218048 + ] + ], + "startTransitions": [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "endTransitions": [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ] +} \ No newline at end of file diff --git a/grobid-home/models/header-coi-ac-BidLSTM_ChainCRF_FEATURES.onnx/encoder.onnx b/grobid-home/models/header-coi-ac-BidLSTM_ChainCRF_FEATURES.onnx/encoder.onnx new file mode 100644 index 0000000000..7c816a1306 Binary files /dev/null and b/grobid-home/models/header-coi-ac-BidLSTM_ChainCRF_FEATURES.onnx/encoder.onnx differ diff --git a/grobid-home/models/header-coi-ac-BidLSTM_ChainCRF_FEATURES.onnx/vocab.json b/grobid-home/models/header-coi-ac-BidLSTM_ChainCRF_FEATURES.onnx/vocab.json new file mode 100644 index 0000000000..a21c6b46ea --- /dev/null +++ b/grobid-home/models/header-coi-ac-BidLSTM_ChainCRF_FEATURES.onnx/vocab.json @@ -0,0 +1,567 @@ +{ + "charVocab": { + "<PAD>": 0, + "<UNK>": 1, + "!": 2, + "\"": 3, + "#": 4, + "$": 5, + "%": 6, + "&": 7, + "'": 8, + "(": 9, + ")": 10, + "*": 11, + "+": 12, + ",": 13, + "-": 14, + ".": 15, + "/": 16, + "0": 17, + "1": 18, + "2": 19, + "3": 20, + "4": 21, + "5": 22, + "6": 23, + "7": 24, + "8": 25, + "9": 26, + ":": 27, + ";": 28, + "<": 29, + "=": 30, + ">": 31, + "?": 32, + "@": 33, + "A": 34, + "B": 35, + "C": 36, + "D": 37, + "E": 38, + "F": 39, + "G": 40, + "H": 41, + "I": 42, + "J": 43, + "K": 44, + "L": 45, + "M": 46, + "N": 47, + "O": 48, + "P": 49, + "Q": 50, + "R": 51, + "S": 52, + "T": 53, + "U": 54, + "V": 55, + "W": 56, + "X": 57, + "Y": 58, + "Z": 59, + "[": 60, + "\\": 61, + "]": 62, + "^": 63, + "_": 64, + "`": 65, + "a": 66, + "b": 67, + "c": 68, + "d": 69, + "e": 70, + "f": 71, + "g": 72, + "h": 73, + "i": 74, + "j": 75, + "k": 76, + "l": 77, + "m": 78, + "n": 79, + "o": 80, + "p": 81, + "q": 82, + "r": 83, + "s": 84, + "t": 85, + "u": 86, + "v": 87, + "w": 88, + "x": 89, + "y": 90, + "z": 91, + "{": 92, + "|": 93, + "}": 94, + "~": 95, + "¡": 96, + "¢": 97, + "£": 98, + "¤": 99, + "¥": 100, + "§": 101, + "¨": 102, + "©": 103, + "ª": 104, + "«": 105, + "®": 106, + "¯": 107, + "°": 108, + "±": 109, + "²": 110, + "´": 111, + "µ": 112, + "¶": 113, + "¸": 114, + "»": 115, + "¼": 116, + "À": 117, + "Á": 118, + "Â": 119, + "Ã": 120, + "Å": 121, + "Ç": 122, + "É": 123, + "Ê": 124, + "Ì": 125, + "Î": 126, + "Ó": 127, + "Õ": 128, + "Ö": 129, + "×": 130, + "Ø": 131, + "ß": 132, + "à": 133, + "á": 134, + "â": 135, + "ã": 136, + "ä": 137, + "å": 138, + "ç": 139, + "è": 140, + "é": 141, + "ê": 142, + "ë": 143, + "í": 144, + "î": 145, + "ï": 146, + "ñ": 147, + "ò": 148, + "ó": 149, + "ô": 150, + "õ": 151, + "ö": 152, + "ø": 153, + "ú": 154, + "û": 155, + "ü": 156, + "ý": 157, + "þ": 158, + "ÿ": 159, + "Ā": 160, + "ă": 161, + "Ą": 162, + "Ć": 163, + "ć": 164, + "Č": 165, + "č": 166, + "ė": 167, + "ę": 168, + "ě": 169, + "İ": 170, + "ı": 171, + "Ľ": 172, + "ľ": 173, + "Ł": 174, + "ł": 175, + "ń": 176, + "ň": 177, + "Ō": 178, + "ř": 179, + "ş": 180, + "Š": 181, + "š": 182, + "Ź": 183, + "ź": 184, + "ż": 185, + "Ž": 186, + "ž": 187, + "Ȃ": 188, + "ʈ": 189, + "ˆ": 190, + "˙": 191, + "͑": 192, + "͒": 193, + "͓": 194, + "͔": 195, + "͗": 196, + "͘": 197, + "Γ": 198, + "Δ": 199, + "Θ": 200, + "Λ": 201, + "Ξ": 202, + "Π": 203, + "Σ": 204, + "Υ": 205, + "Φ": 206, + "Ψ": 207, + "α": 208, + "β": 209, + "γ": 210, + "δ": 211, + "ε": 212, + "ζ": 213, + "η": 214, + "θ": 215, + "κ": 216, + "λ": 217, + "μ": 218, + "ν": 219, + "ξ": 220, + "π": 221, + "ρ": 222, + "σ": 223, + "τ": 224, + "φ": 225, + "χ": 226, + "ψ": 227, + "ω": 228, + "ϕ": 229, + "ϩ": 230, + "Ϫ": 231, + "ϫ": 232, + "ϭ": 233, + "ϳ": 234, + "Ͻ": 235, + "Ͼ": 236, + "Ј": 237, + "Љ": 238, + "Б": 239, + "И": 240, + "Л": 241, + "П": 242, + "Ф": 243, + "Ц": 244, + "б": 245, + "в": 246, + "г": 247, + "д": 248, + "з": 249, + "и": 250, + "й": 251, + "к": 252, + "л": 253, + "м": 254, + "н": 255, + "п": 256, + "р": 257, + "с": 258, + "т": 259, + "у": 260, + "ф": 261, + "х": 262, + "ц": 263, + "ч": 264, + "ш": 265, + "щ": 266, + "ы": 267, + "ь": 268, + "э": 269, + "ю": 270, + "я": 271, + "ё": 272, + "Ն": 273, + "؊": 274, + "؍": 275, + "ٞ": 276, + "ܨ": 277, + "ࡆ": 278, + "௦": 279, + "௧": 280, + "ᰔ": 281, + "Ṇ": 282, + "†": 283, + "‡": 284, + "•": 285, + "‫": 286, + "‬": 287, + "′": 288, + "‹": 289, + "⁎": 290, + "ℓ": 291, + "™": 292, + "Ⅲ": 293, + "→": 294, + "↓": 295, + "↵": 296, + "⇑": 297, + "⇤": 298, + "∆": 299, + "∈": 300, + "−": 301, + "∓": 302, + "√": 303, + "∞": 304, + "∼": 305, + "≃": 306, + "≈": 307, + "≤": 308, + "≥": 309, + "⊙": 310, + "⋅": 311, + "⋆": 312, + "⌬": 313, + "␣": 314, + "␤": 315, + "ⓒ": 316, + "□": 317, + "☯": 318, + "♣": 319, + "♦": 320, + "✉": 321, + "✝": 322, + "⸸": 323, + "ㆍ": 324, + "": 325, + "": 326, + "": 327, + "": 328, + "": 329, + "": 330, + "": 331, + "": 332, + "": 333, + "": 334, + "": 335, + "": 336, + "": 337, + "": 338, + "": 339, + "": 340, + "": 341, + "": 342, + ",": 343, + ";": 344, + "�": 345 + }, + "tagVocab": { + "<PAD>": 0, + "B-<abstract>": 1, + "B-<address>": 2, + "B-<affiliation>": 3, + "B-<author>": 4, + "B-<availability>": 5, + "B-<conflict>": 6, + "B-<contribution>": 7, + "B-<copyright>": 8, + "B-<date>": 9, + "B-<doctype>": 10, + "B-<editor>": 11, + "B-<email>": 12, + "B-<funding>": 13, + "B-<group>": 14, + "B-<keyword>": 15, + "B-<meeting>": 16, + "B-<pubnum>": 17, + "B-<reference>": 18, + "B-<submission>": 19, + "B-<title>": 20, + "B-<web>": 21, + "I-<abstract>": 22, + "I-<address>": 23, + "I-<affiliation>": 24, + "I-<author>": 25, + "I-<availability>": 26, + "I-<conflict>": 27, + "I-<contribution>": 28, + "I-<copyright>": 29, + "I-<date>": 30, + "I-<doctype>": 31, + "I-<editor>": 32, + "I-<email>": 33, + "I-<funding>": 34, + "I-<group>": 35, + "I-<keyword>": 36, + "I-<meeting>": 37, + "I-<pubnum>": 38, + "I-<reference>": 39, + "I-<submission>": 40, + "I-<title>": 41, + "I-<web>": 42, + "O": 43 + }, + "tagIndex": { + "0": "<PAD>", + "1": "B-<abstract>", + "2": "B-<address>", + "3": "B-<affiliation>", + "4": "B-<author>", + "5": "B-<availability>", + "6": "B-<conflict>", + "7": "B-<contribution>", + "8": "B-<copyright>", + "9": "B-<date>", + "10": "B-<doctype>", + "11": "B-<editor>", + "12": "B-<email>", + "13": "B-<funding>", + "14": "B-<group>", + "15": "B-<keyword>", + "16": "B-<meeting>", + "17": "B-<pubnum>", + "18": "B-<reference>", + "19": "B-<submission>", + "20": "B-<title>", + "21": "B-<web>", + "22": "I-<abstract>", + "23": "I-<address>", + "24": "I-<affiliation>", + "25": "I-<author>", + "26": "I-<availability>", + "27": "I-<conflict>", + "28": "I-<contribution>", + "29": "I-<copyright>", + "30": "I-<date>", + "31": "I-<doctype>", + "32": "I-<editor>", + "33": "I-<email>", + "34": "I-<funding>", + "35": "I-<group>", + "36": "I-<keyword>", + "37": "I-<meeting>", + "38": "I-<pubnum>", + "39": "I-<reference>", + "40": "I-<submission>", + "41": "I-<title>", + "42": "I-<web>", + "43": "O" + }, + "maxCharLength": 30, + "featuresIndices": [ + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30 + ], + "featuresVocabularySize": 12, + "featuresMapToIndex": { + "9": { + "BLOCKEND": 1, + "BLOCKIN": 2, + "BLOCKSTART": 3 + }, + "10": { + "LINEEND": 13, + "LINEIN": 14, + "LINESTART": 15 + }, + "11": { + "ALIGNEDLEFT": 25, + "LINEINDENT": 26 + }, + "12": { + "NEWFONT": 37, + "SAMEFONT": 38 + }, + "13": { + "HIGHERFONT": 49, + "LOWERFONT": 50, + "SAMEFONTSIZE": 51 + }, + "14": { + "0": 61, + "1": 62 + }, + "15": { + "0": 73, + "1": 74 + }, + "16": { + "ALLCAP": 85, + "INITCAP": 86, + "NOCAPS": 87 + }, + "17": { + "ALLDIGIT": 97, + "CONTAINSDIGITS": 98, + "NODIGIT": 99 + }, + "18": { + "0": 109, + "1": 110 + }, + "19": { + "0": 121, + "1": 122 + }, + "20": { + "0": 133, + "1": 134 + }, + "21": { + "0": 145, + "1": 146 + }, + "22": { + "0": 157, + "1": 158 + }, + "23": { + "0": 169, + "1": 170 + }, + "24": { + "0": 181, + "1": 182 + }, + "25": { + "0": 193, + "1": 194 + }, + "26": { + "COMMA": 205, + "DOT": 206, + "ENDBRACKET": 207, + "HYPHEN": 208, + "NOPUNCT": 209, + "OPENBRACKET": 210, + "PUNCT": 211, + "QUOTE": 212 + }, + "27": { + "0": 217, + "1": 218 + }, + "28": { + "0": 229 + }, + "29": { + "0": 241, + "1": 242 + }, + "30": { + "0": 253 + } + } +} \ No newline at end of file diff --git a/grobid-home/models/license-gru.onnx/classifier.onnx b/grobid-home/models/license-gru.onnx/classifier.onnx new file mode 100644 index 0000000000..a1ccd03e85 Binary files /dev/null and b/grobid-home/models/license-gru.onnx/classifier.onnx differ diff --git a/grobid-home/models/license-gru.onnx/config.json b/grobid-home/models/license-gru.onnx/config.json new file mode 100644 index 0000000000..586b7048b3 --- /dev/null +++ b/grobid-home/models/license-gru.onnx/config.json @@ -0,0 +1,8 @@ +{ + "modelName": "license_gru", + "architecture": "gru", + "wordEmbeddingSize": 300, + "maxlen": 300, + "numClasses": 10, + "embeddingsName": "glove-840B" +} \ No newline at end of file diff --git a/grobid-home/models/license-gru.onnx/labels.json b/grobid-home/models/license-gru.onnx/labels.json new file mode 100644 index 0000000000..d3b1b640e5 --- /dev/null +++ b/grobid-home/models/license-gru.onnx/labels.json @@ -0,0 +1,38 @@ +{ + "labels": [ + "CC-0", + "CC-BY", + "CC-BY-NC", + "CC-BY-NC-ND", + "CC-BY-SA", + "CC-BY-NC-SA", + "CC-BY-ND", + "copyright", + "other", + "undecided" + ], + "labelToIndex": { + "CC-0": 0, + "CC-BY": 1, + "CC-BY-NC": 2, + "CC-BY-NC-ND": 3, + "CC-BY-SA": 4, + "CC-BY-NC-SA": 5, + "CC-BY-ND": 6, + "copyright": 7, + "other": 8, + "undecided": 9 + }, + "indexToLabel": { + "0": "CC-0", + "1": "CC-BY", + "2": "CC-BY-NC", + "3": "CC-BY-NC-ND", + "4": "CC-BY-SA", + "5": "CC-BY-NC-SA", + "6": "CC-BY-ND", + "7": "copyright", + "8": "other", + "9": "undecided" + } +} \ No newline at end of file diff --git a/grobid-home/models/name-citation-BidLSTM_CRF/config.json b/grobid-home/models/name-citation-BidLSTM_CRF/config.json deleted file mode 100644 index 30a24845d7..0000000000 --- a/grobid-home/models/name-citation-BidLSTM_CRF/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "model_name": "name-citation-BidLSTM_CRF", - "architecture": "BidLSTM_CRF", - "embeddings_name": "glove-840B", - "char_vocab_size": 89, - "case_vocab_size": 8, - "char_embedding_size": 25, - "num_char_lstm_units": 25, - "max_char_length": 30, - "features_vocabulary_size": 12, - "features_indices": null, - "features_embedding_size": 4, - "features_lstm_units": 4, - "max_sequence_length": 3000, - "word_embedding_size": 300, - "num_word_lstm_units": 100, - "case_embedding_size": 5, - "dropout": 0.5, - "recurrent_dropout": 0.5, - "use_crf": true, - "use_chain_crf": false, - "fold_number": 1, - "batch_size": 20, - "transformer_name": null, - "use_ELMo": false -} \ No newline at end of file diff --git a/grobid-home/models/name-citation-BidLSTM_CRF/model_weights.hdf5 b/grobid-home/models/name-citation-BidLSTM_CRF/model_weights.hdf5 deleted file mode 100644 index a10292e42e..0000000000 Binary files a/grobid-home/models/name-citation-BidLSTM_CRF/model_weights.hdf5 and /dev/null differ diff --git a/grobid-home/models/name-citation-BidLSTM_CRF/preprocessor.json b/grobid-home/models/name-citation-BidLSTM_CRF/preprocessor.json deleted file mode 100644 index 664ba0ea87..0000000000 --- a/grobid-home/models/name-citation-BidLSTM_CRF/preprocessor.json +++ /dev/null @@ -1,134 +0,0 @@ -{ - "padding": true, - "return_lengths": true, - "return_word_embeddings": true, - "return_casing": false, - "return_features": false, - "return_chars": true, - "return_bert_embeddings": false, - "vocab_char": { - "<PAD>": 0, - "<UNK>": 1, - "\"": 2, - "&": 3, - "'": 4, - "(": 5, - ",": 6, - "-": 7, - ".": 8, - "0": 9, - "1": 10, - "2": 11, - "3": 12, - "4": 13, - "5": 14, - "6": 15, - "7": 16, - "8": 17, - "9": 18, - ":": 19, - ";": 20, - "A": 21, - "B": 22, - "C": 23, - "D": 24, - "E": 25, - "F": 26, - "G": 27, - "H": 28, - "I": 29, - "J": 30, - "K": 31, - "L": 32, - "M": 33, - "N": 34, - "O": 35, - "P": 36, - "Q": 37, - "R": 38, - "S": 39, - "T": 40, - "U": 41, - "V": 42, - "W": 43, - "X": 44, - "Y": 45, - "Z": 46, - "a": 47, - "b": 48, - "c": 49, - "d": 50, - "e": 51, - "f": 52, - "g": 53, - "h": 54, - "i": 55, - "j": 56, - "k": 57, - "l": 58, - "m": 59, - "n": 60, - "o": 61, - "p": 62, - "q": 63, - "r": 64, - "s": 65, - "t": 66, - "u": 67, - "v": 68, - "w": 69, - "x": 70, - "y": 71, - "z": 72, - "~": 73, - "\u00a8": 74, - "\u00b4": 75, - "\u00c9": 76, - "\u00cf": 77, - "\u00d8": 78, - "\u00e1": 79, - "\u00e4": 80, - "\u00e7": 81, - "\u00e9": 82, - "\u00ed": 83, - "\u00f6": 84, - "\u00fc": 85, - "\u010c": 86, - "\u0148": 87, - "\u02c7": 88 - }, - "vocab_tag": { - "<PAD>": 0, - "B-<forename>": 1, - "B-<middlename>": 2, - "B-<suffix>": 3, - "B-<surname>": 4, - "I-<forename>": 5, - "I-<middlename>": 6, - "I-<surname>": 7, - "O": 8 - }, - "vocab_case": [ - "<PAD>", - "numeric", - "allLower", - "allUpper", - "initialUpper", - "other", - "mainly_numeric", - "contains_digit" - ], - "max_char_length": 30, - "feature_preprocessor": null, - "indice_tag": { - "0": "<PAD>", - "1": "B-<forename>", - "2": "B-<middlename>", - "3": "B-<suffix>", - "4": "B-<surname>", - "5": "I-<forename>", - "6": "I-<middlename>", - "7": "I-<surname>", - "8": "O" - } -} \ No newline at end of file diff --git a/grobid-home/models/name-citation-BidLSTM_CRF_FEATURES/config.json b/grobid-home/models/name-citation-BidLSTM_CRF_FEATURES/config.json deleted file mode 100644 index 9f844be8a3..0000000000 --- a/grobid-home/models/name-citation-BidLSTM_CRF_FEATURES/config.json +++ /dev/null @@ -1,87 +0,0 @@ -{ - "model_name": "name-citation-BidLSTM_CRF_FEATURES", - "architecture": "BidLSTM_CRF_FEATURES", - "embeddings_name": "glove-840B", - "char_vocab_size": 89, - "case_vocab_size": 8, - "char_embedding_size": 25, - "num_char_lstm_units": 25, - "max_char_length": 30, - "features_vocabulary_size": 12, - "features_indices": [ - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18 - ], - "features_embedding_size": 4, - "features_lstm_units": 4, - "max_sequence_length": 3000, - "word_embedding_size": 300, - "num_word_lstm_units": 100, - "case_embedding_size": 5, - "dropout": 0.5, - "recurrent_dropout": 0.5, - "use_crf": true, - "use_chain_crf": false, - "fold_number": 1, - "batch_size": 20, - "transformer_name": null, - "use_ELMo": false, - "features_map_to_index": { - "9": { - "LINEEND": 1, - "LINEIN": 2, - "LINESTART": 3 - }, - "10": { - "ALLCAP": 13, - "INITCAP": 14, - "NOCAPS": 15 - }, - "11": { - "ALLDIGIT": 25, - "CONTAINSDIGITS": 26, - "NODIGIT": 27 - }, - "12": { - "0": 37, - "1": 38 - }, - "13": { - "0": 49, - "1": 50 - }, - "14": { - "0": 61, - "1": 62 - }, - "15": { - "0": 73, - "1": 74 - }, - "16": { - "0": 85, - "1": 86 - }, - "17": { - "0": 97, - "1": 98 - }, - "18": { - "COMMA": 109, - "DOT": 110, - "HYPHEN": 111, - "NOPUNCT": 112, - "OPENBRACKET": 113, - "PUNCT": 114, - "QUOTE": 115 - } - } -} \ No newline at end of file diff --git a/grobid-home/models/name-citation-BidLSTM_CRF_FEATURES/model_weights.hdf5 b/grobid-home/models/name-citation-BidLSTM_CRF_FEATURES/model_weights.hdf5 deleted file mode 100644 index 6f617fd985..0000000000 Binary files a/grobid-home/models/name-citation-BidLSTM_CRF_FEATURES/model_weights.hdf5 and /dev/null differ diff --git a/grobid-home/models/name-citation-BidLSTM_CRF_FEATURES/preprocessor.json b/grobid-home/models/name-citation-BidLSTM_CRF_FEATURES/preprocessor.json deleted file mode 100644 index 3574ca3ee1..0000000000 --- a/grobid-home/models/name-citation-BidLSTM_CRF_FEATURES/preprocessor.json +++ /dev/null @@ -1,198 +0,0 @@ -{ - "padding": true, - "return_lengths": true, - "return_word_embeddings": true, - "return_casing": false, - "return_features": true, - "return_chars": true, - "return_bert_embeddings": false, - "vocab_char": { - "<PAD>": 0, - "<UNK>": 1, - "\"": 2, - "&": 3, - "'": 4, - "(": 5, - ",": 6, - "-": 7, - ".": 8, - "0": 9, - "1": 10, - "2": 11, - "3": 12, - "4": 13, - "5": 14, - "6": 15, - "7": 16, - "8": 17, - "9": 18, - ":": 19, - ";": 20, - "A": 21, - "B": 22, - "C": 23, - "D": 24, - "E": 25, - "F": 26, - "G": 27, - "H": 28, - "I": 29, - "J": 30, - "K": 31, - "L": 32, - "M": 33, - "N": 34, - "O": 35, - "P": 36, - "Q": 37, - "R": 38, - "S": 39, - "T": 40, - "U": 41, - "V": 42, - "W": 43, - "X": 44, - "Y": 45, - "Z": 46, - "a": 47, - "b": 48, - "c": 49, - "d": 50, - "e": 51, - "f": 52, - "g": 53, - "h": 54, - "i": 55, - "j": 56, - "k": 57, - "l": 58, - "m": 59, - "n": 60, - "o": 61, - "p": 62, - "q": 63, - "r": 64, - "s": 65, - "t": 66, - "u": 67, - "v": 68, - "w": 69, - "x": 70, - "y": 71, - "z": 72, - "~": 73, - "\u00a8": 74, - "\u00b4": 75, - "\u00c9": 76, - "\u00cf": 77, - "\u00d8": 78, - "\u00e1": 79, - "\u00e4": 80, - "\u00e7": 81, - "\u00e9": 82, - "\u00ed": 83, - "\u00f6": 84, - "\u00fc": 85, - "\u010c": 86, - "\u0148": 87, - "\u02c7": 88 - }, - "vocab_tag": { - "<PAD>": 0, - "B-<forename>": 1, - "B-<middlename>": 2, - "B-<suffix>": 3, - "B-<surname>": 4, - "I-<forename>": 5, - "I-<middlename>": 6, - "I-<surname>": 7, - "O": 8 - }, - "vocab_case": [ - "<PAD>", - "numeric", - "allLower", - "allUpper", - "initialUpper", - "other", - "mainly_numeric", - "contains_digit" - ], - "max_char_length": 30, - "feature_preprocessor": { - "features_vocabulary_size": 12, - "features_indices": [ - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18 - ], - "features_map_to_index": { - "9": { - "LINEEND": 1, - "LINEIN": 2, - "LINESTART": 3 - }, - "10": { - "ALLCAP": 13, - "INITCAP": 14, - "NOCAPS": 15 - }, - "11": { - "ALLDIGIT": 25, - "CONTAINSDIGITS": 26, - "NODIGIT": 27 - }, - "12": { - "0": 37, - "1": 38 - }, - "13": { - "0": 49, - "1": 50 - }, - "14": { - "0": 61, - "1": 62 - }, - "15": { - "0": 73, - "1": 74 - }, - "16": { - "0": 85, - "1": 86 - }, - "17": { - "0": 97, - "1": 98 - }, - "18": { - "COMMA": 109, - "DOT": 110, - "HYPHEN": 111, - "NOPUNCT": 112, - "OPENBRACKET": 113, - "PUNCT": 114, - "QUOTE": 115 - } - } - }, - "indice_tag": { - "0": "<PAD>", - "1": "B-<forename>", - "2": "B-<middlename>", - "3": "B-<suffix>", - "4": "B-<surname>", - "5": "I-<forename>", - "6": "I-<middlename>", - "7": "I-<surname>", - "8": "O" - } -} \ No newline at end of file diff --git a/grobid-home/models/name-header-BidLSTM_CRF/config.json b/grobid-home/models/name-header-BidLSTM_CRF/config.json deleted file mode 100644 index a4fdcba78b..0000000000 --- a/grobid-home/models/name-header-BidLSTM_CRF/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "model_name": "name-header-BidLSTM_CRF", - "architecture": "BidLSTM_CRF", - "embeddings_name": "glove-840B", - "char_vocab_size": 172, - "case_vocab_size": 8, - "char_embedding_size": 25, - "num_char_lstm_units": 25, - "max_char_length": 30, - "features_vocabulary_size": 12, - "features_indices": null, - "features_embedding_size": 4, - "features_lstm_units": 4, - "max_sequence_length": 3000, - "word_embedding_size": 300, - "num_word_lstm_units": 100, - "case_embedding_size": 5, - "dropout": 0.5, - "recurrent_dropout": 0.5, - "use_crf": true, - "use_chain_crf": false, - "fold_number": 1, - "batch_size": 20, - "transformer_name": null, - "use_ELMo": false -} \ No newline at end of file diff --git a/grobid-home/models/name-header-BidLSTM_CRF/model_weights.hdf5 b/grobid-home/models/name-header-BidLSTM_CRF/model_weights.hdf5 deleted file mode 100644 index e7709303a9..0000000000 Binary files a/grobid-home/models/name-header-BidLSTM_CRF/model_weights.hdf5 and /dev/null differ diff --git a/grobid-home/models/name-header-BidLSTM_CRF/preprocessor.json b/grobid-home/models/name-header-BidLSTM_CRF/preprocessor.json deleted file mode 100644 index 8cdcc77220..0000000000 --- a/grobid-home/models/name-header-BidLSTM_CRF/preprocessor.json +++ /dev/null @@ -1,225 +0,0 @@ -{ - "padding": true, - "return_lengths": true, - "return_word_embeddings": true, - "return_casing": false, - "return_features": false, - "return_chars": true, - "return_bert_embeddings": false, - "vocab_char": { - "<PAD>": 0, - "<UNK>": 1, - "!": 2, - "\"": 3, - "#": 4, - "$": 5, - "&": 6, - "'": 7, - "(": 8, - ")": 9, - "*": 10, - "+": 11, - ",": 12, - "-": 13, - ".": 14, - "0": 15, - "1": 16, - "2": 17, - "3": 18, - "4": 19, - "5": 20, - "6": 21, - "7": 22, - "8": 23, - "9": 24, - ":": 25, - ";": 26, - "@": 27, - "A": 28, - "B": 29, - "C": 30, - "D": 31, - "E": 32, - "F": 33, - "G": 34, - "H": 35, - "I": 36, - "J": 37, - "K": 38, - "L": 39, - "M": 40, - "N": 41, - "O": 42, - "P": 43, - "Q": 44, - "R": 45, - "S": 46, - "T": 47, - "U": 48, - "V": 49, - "W": 50, - "X": 51, - "Y": 52, - "Z": 53, - "[": 54, - "\\": 55, - "]": 56, - "^": 57, - "a": 58, - "b": 59, - "c": 60, - "d": 61, - "e": 62, - "f": 63, - "g": 64, - "h": 65, - "i": 66, - "j": 67, - "k": 68, - "l": 69, - "m": 70, - "n": 71, - "o": 72, - "p": 73, - "q": 74, - "r": 75, - "s": 76, - "t": 77, - "u": 78, - "v": 79, - "w": 80, - "x": 81, - "y": 82, - "z": 83, - "{": 84, - "|": 85, - "~": 86, - "\u00a3": 87, - "\u00a4": 88, - "\u00a7": 89, - "\u00af": 90, - "\u00b6": 91, - "\u00c2": 92, - "\u00c3": 93, - "\u00c5": 94, - "\u00c7": 95, - "\u00c8": 96, - "\u00c9": 97, - "\u00cc": 98, - "\u00d6": 99, - "\u00dc": 100, - "\u00df": 101, - "\u00e0": 102, - "\u00e1": 103, - "\u00e2": 104, - "\u00e3": 105, - "\u00e4": 106, - "\u00e5": 107, - "\u00e7": 108, - "\u00e8": 109, - "\u00e9": 110, - "\u00ea": 111, - "\u00eb": 112, - "\u00ec": 113, - "\u00ed": 114, - "\u00ee": 115, - "\u00f1": 116, - "\u00f2": 117, - "\u00f3": 118, - "\u00f4": 119, - "\u00f5": 120, - "\u00f6": 121, - "\u00f8": 122, - "\u00fa": 123, - "\u00fc": 124, - "\u0102": 125, - "\u0107": 126, - "\u010c": 127, - "\u010d": 128, - "\u0141": 129, - "\u0142": 130, - "\u0144": 131, - "\u0151": 132, - "\u0158": 133, - "\u015b": 134, - "\u015e": 135, - "\u0160": 136, - "\u0161": 137, - "\u017a": 138, - "\u017b": 139, - "\u017c": 140, - "\u0288": 141, - "\u02c7": 142, - "\u02dd": 143, - "\u0352": 144, - "\u03f0": 145, - "\u03f1": 146, - "\u03f2": 147, - "\u0846": 148, - "\u2020": 149, - "\u2021": 150, - "\u2022": 151, - "\u204e": 152, - "\u20ac": 153, - "\u21d1": 154, - "\u2217": 155, - "\u22a5": 156, - "\u262f": 157, - "\u2663": 158, - "\u2666": 159, - "\u4e9a": 160, - "\u52c7": 161, - "\u5468": 162, - "\u56db": 163, - "\u5e73": 164, - "\u671b": 165, - "\u6797": 166, - "\u6b63": 167, - "\u864e": 168, - "\u9676": 169, - "\u9f9a": 170, - "\uf078": 171 - }, - "vocab_tag": { - "<PAD>": 0, - "B-<forename>": 1, - "B-<marker>": 2, - "B-<middlename>": 3, - "B-<suffix>": 4, - "B-<surname>": 5, - "B-<title>": 6, - "I-<forename>": 7, - "I-<marker>": 8, - "I-<middlename>": 9, - "I-<surname>": 10, - "I-<title>": 11, - "O": 12 - }, - "vocab_case": [ - "<PAD>", - "numeric", - "allLower", - "allUpper", - "initialUpper", - "other", - "mainly_numeric", - "contains_digit" - ], - "max_char_length": 30, - "feature_preprocessor": null, - "indice_tag": { - "0": "<PAD>", - "1": "B-<forename>", - "2": "B-<marker>", - "3": "B-<middlename>", - "4": "B-<suffix>", - "5": "B-<surname>", - "6": "B-<title>", - "7": "I-<forename>", - "8": "I-<marker>", - "9": "I-<middlename>", - "10": "I-<surname>", - "11": "I-<title>", - "12": "O" - } -} \ No newline at end of file diff --git a/grobid-home/models/name-header-BidLSTM_CRF_FEATURES/config.json b/grobid-home/models/name-header-BidLSTM_CRF_FEATURES/config.json deleted file mode 100644 index 171c2ea342..0000000000 --- a/grobid-home/models/name-header-BidLSTM_CRF_FEATURES/config.json +++ /dev/null @@ -1,88 +0,0 @@ -{ - "model_name": "name-header-BidLSTM_CRF_FEATURES", - "architecture": "BidLSTM_CRF_FEATURES", - "embeddings_name": "glove-840B", - "char_vocab_size": 172, - "case_vocab_size": 8, - "char_embedding_size": 25, - "num_char_lstm_units": 25, - "max_char_length": 30, - "features_vocabulary_size": 12, - "features_indices": [ - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18 - ], - "features_embedding_size": 4, - "features_lstm_units": 4, - "max_sequence_length": 3000, - "word_embedding_size": 300, - "num_word_lstm_units": 100, - "case_embedding_size": 5, - "dropout": 0.5, - "recurrent_dropout": 0.5, - "use_crf": true, - "use_chain_crf": false, - "fold_number": 1, - "batch_size": 20, - "transformer_name": null, - "use_ELMo": false, - "features_map_to_index": { - "9": { - "LINEEND": 1, - "LINEIN": 2, - "LINESTART": 3 - }, - "10": { - "ALLCAP": 13, - "INITCAP": 14, - "NOCAPS": 15 - }, - "11": { - "ALLDIGIT": 25, - "CONTAINSDIGITS": 26, - "NODIGIT": 27 - }, - "12": { - "0": 37, - "1": 38 - }, - "13": { - "0": 49, - "1": 50 - }, - "14": { - "0": 61, - "1": 62 - }, - "15": { - "0": 73, - "1": 74 - }, - "16": { - "0": 85, - "1": 86 - }, - "17": { - "0": 97, - "1": 98 - }, - "18": { - "COMMA": 109, - "DOT": 110, - "ENDBRACKET": 111, - "HYPHEN": 112, - "NOPUNCT": 113, - "OPENBRACKET": 114, - "PUNCT": 115, - "QUOTE": 116 - } - } -} \ No newline at end of file diff --git a/grobid-home/models/name-header-BidLSTM_CRF_FEATURES/model_weights.hdf5 b/grobid-home/models/name-header-BidLSTM_CRF_FEATURES/model_weights.hdf5 deleted file mode 100644 index 36a299a641..0000000000 Binary files a/grobid-home/models/name-header-BidLSTM_CRF_FEATURES/model_weights.hdf5 and /dev/null differ diff --git a/grobid-home/models/name-header-BidLSTM_CRF_FEATURES/preprocessor.json b/grobid-home/models/name-header-BidLSTM_CRF_FEATURES/preprocessor.json deleted file mode 100644 index 48af3fe63b..0000000000 --- a/grobid-home/models/name-header-BidLSTM_CRF_FEATURES/preprocessor.json +++ /dev/null @@ -1,290 +0,0 @@ -{ - "padding": true, - "return_lengths": true, - "return_word_embeddings": true, - "return_casing": false, - "return_features": true, - "return_chars": true, - "return_bert_embeddings": false, - "vocab_char": { - "<PAD>": 0, - "<UNK>": 1, - "!": 2, - "\"": 3, - "#": 4, - "$": 5, - "&": 6, - "'": 7, - "(": 8, - ")": 9, - "*": 10, - "+": 11, - ",": 12, - "-": 13, - ".": 14, - "0": 15, - "1": 16, - "2": 17, - "3": 18, - "4": 19, - "5": 20, - "6": 21, - "7": 22, - "8": 23, - "9": 24, - ":": 25, - ";": 26, - "@": 27, - "A": 28, - "B": 29, - "C": 30, - "D": 31, - "E": 32, - "F": 33, - "G": 34, - "H": 35, - "I": 36, - "J": 37, - "K": 38, - "L": 39, - "M": 40, - "N": 41, - "O": 42, - "P": 43, - "Q": 44, - "R": 45, - "S": 46, - "T": 47, - "U": 48, - "V": 49, - "W": 50, - "X": 51, - "Y": 52, - "Z": 53, - "[": 54, - "\\": 55, - "]": 56, - "^": 57, - "a": 58, - "b": 59, - "c": 60, - "d": 61, - "e": 62, - "f": 63, - "g": 64, - "h": 65, - "i": 66, - "j": 67, - "k": 68, - "l": 69, - "m": 70, - "n": 71, - "o": 72, - "p": 73, - "q": 74, - "r": 75, - "s": 76, - "t": 77, - "u": 78, - "v": 79, - "w": 80, - "x": 81, - "y": 82, - "z": 83, - "{": 84, - "|": 85, - "~": 86, - "\u00a3": 87, - "\u00a4": 88, - "\u00a7": 89, - "\u00af": 90, - "\u00b6": 91, - "\u00c2": 92, - "\u00c3": 93, - "\u00c5": 94, - "\u00c7": 95, - "\u00c8": 96, - "\u00c9": 97, - "\u00cc": 98, - "\u00d6": 99, - "\u00dc": 100, - "\u00df": 101, - "\u00e0": 102, - "\u00e1": 103, - "\u00e2": 104, - "\u00e3": 105, - "\u00e4": 106, - "\u00e5": 107, - "\u00e7": 108, - "\u00e8": 109, - "\u00e9": 110, - "\u00ea": 111, - "\u00eb": 112, - "\u00ec": 113, - "\u00ed": 114, - "\u00ee": 115, - "\u00f1": 116, - "\u00f2": 117, - "\u00f3": 118, - "\u00f4": 119, - "\u00f5": 120, - "\u00f6": 121, - "\u00f8": 122, - "\u00fa": 123, - "\u00fc": 124, - "\u0102": 125, - "\u0107": 126, - "\u010c": 127, - "\u010d": 128, - "\u0141": 129, - "\u0142": 130, - "\u0144": 131, - "\u0151": 132, - "\u0158": 133, - "\u015b": 134, - "\u015e": 135, - "\u0160": 136, - "\u0161": 137, - "\u017a": 138, - "\u017b": 139, - "\u017c": 140, - "\u0288": 141, - "\u02c7": 142, - "\u02dd": 143, - "\u0352": 144, - "\u03f0": 145, - "\u03f1": 146, - "\u03f2": 147, - "\u0846": 148, - "\u2020": 149, - "\u2021": 150, - "\u2022": 151, - "\u204e": 152, - "\u20ac": 153, - "\u21d1": 154, - "\u2217": 155, - "\u22a5": 156, - "\u262f": 157, - "\u2663": 158, - "\u2666": 159, - "\u4e9a": 160, - "\u52c7": 161, - "\u5468": 162, - "\u56db": 163, - "\u5e73": 164, - "\u671b": 165, - "\u6797": 166, - "\u6b63": 167, - "\u864e": 168, - "\u9676": 169, - "\u9f9a": 170, - "\uf078": 171 - }, - "vocab_tag": { - "<PAD>": 0, - "B-<forename>": 1, - "B-<marker>": 2, - "B-<middlename>": 3, - "B-<suffix>": 4, - "B-<surname>": 5, - "B-<title>": 6, - "I-<forename>": 7, - "I-<marker>": 8, - "I-<middlename>": 9, - "I-<surname>": 10, - "I-<title>": 11, - "O": 12 - }, - "vocab_case": [ - "<PAD>", - "numeric", - "allLower", - "allUpper", - "initialUpper", - "other", - "mainly_numeric", - "contains_digit" - ], - "max_char_length": 30, - "feature_preprocessor": { - "features_vocabulary_size": 12, - "features_indices": [ - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18 - ], - "features_map_to_index": { - "9": { - "LINEEND": 1, - "LINEIN": 2, - "LINESTART": 3 - }, - "10": { - "ALLCAP": 13, - "INITCAP": 14, - "NOCAPS": 15 - }, - "11": { - "ALLDIGIT": 25, - "CONTAINSDIGITS": 26, - "NODIGIT": 27 - }, - "12": { - "0": 37, - "1": 38 - }, - "13": { - "0": 49, - "1": 50 - }, - "14": { - "0": 61, - "1": 62 - }, - "15": { - "0": 73, - "1": 74 - }, - "16": { - "0": 85, - "1": 86 - }, - "17": { - "0": 97, - "1": 98 - }, - "18": { - "COMMA": 109, - "DOT": 110, - "ENDBRACKET": 111, - "HYPHEN": 112, - "NOPUNCT": 113, - "OPENBRACKET": 114, - "PUNCT": 115, - "QUOTE": 116 - } - } - }, - "indice_tag": { - "0": "<PAD>", - "1": "B-<forename>", - "2": "B-<marker>", - "3": "B-<middlename>", - "4": "B-<suffix>", - "5": "B-<surname>", - "6": "B-<title>", - "7": "I-<forename>", - "8": "I-<marker>", - "9": "I-<middlename>", - "10": "I-<surname>", - "11": "I-<title>", - "12": "O" - } -} \ No newline at end of file diff --git a/grobid-home/models/reference-segmenter-BidLSTM_CRF_FEATURES.onnx/config.json b/grobid-home/models/reference-segmenter-BidLSTM_CRF_FEATURES.onnx/config.json new file mode 100644 index 0000000000..f334b7e215 --- /dev/null +++ b/grobid-home/models/reference-segmenter-BidLSTM_CRF_FEATURES.onnx/config.json @@ -0,0 +1,33 @@ +{ + "modelName": "grobid-reference-segmenter-BidLSTM_CRF_FEATURES", + "architecture": "BidLSTM_CRF_FEATURES", + "wordEmbeddingSize": 300, + "charEmbeddingSize": 25, + "numCharLstmUnits": 25, + "numWordLstmUnits": 100, + "maxSequenceLength": 3000, + "embeddingsName": "glove-840B", + "featuresIndices": [ + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26 + ], + "featuresEmbeddingSize": 4, + "featuresLstmUnits": 4, + "featuresVocabularySize": 12 +} \ No newline at end of file diff --git a/grobid-home/models/reference-segmenter-BidLSTM_CRF_FEATURES.onnx/crf_params.json b/grobid-home/models/reference-segmenter-BidLSTM_CRF_FEATURES.onnx/crf_params.json new file mode 100644 index 0000000000..2a83cfbf05 --- /dev/null +++ b/grobid-home/models/reference-segmenter-BidLSTM_CRF_FEATURES.onnx/crf_params.json @@ -0,0 +1,68 @@ +{ + "transitions": [ + [ + -0.035008031874895096, + -0.04882281273603439, + -0.24163351953029633, + -0.047716833651065826, + -0.2241421639919281, + -0.1709415316581726 + ], + [ + -0.07010223716497421, + -0.20693330466747284, + 0.05974139645695686, + 0.14560627937316895, + -0.16560286283493042, + -0.10017284005880356 + ], + [ + -0.09943439066410065, + -0.08676478266716003, + -0.33931127190589905, + -0.06375936418771744, + 0.11801717430353165, + -0.014736460521817207 + ], + [ + -0.07629616558551788, + -0.14463835954666138, + 0.04880824685096741, + 0.05476561561226845, + -0.2306673675775528, + -0.007203778717666864 + ], + [ + -0.21132022142410278, + 0.10044291615486145, + -0.0018908551428467035, + -0.15064720809459686, + 0.16896380484104156, + -0.17745210230350494 + ], + [ + -0.13350343704223633, + 0.13824297487735748, + 0.023562461137771606, + 0.00014155093231238425, + -0.22973428666591644, + 0.09059157967567444 + ] + ], + "startTransitions": [ + -0.36013272404670715, + -0.1196531355381012, + -0.2237708568572998, + -0.2663116753101349, + -0.34630417823791504, + 0.32599008083343506 + ], + "endTransitions": [ + 0.07183124870061874, + 0.019855862483382225, + -0.2772512435913086, + -0.14116591215133667, + 0.05183485895395279, + -0.15485863387584686 + ] +} \ No newline at end of file diff --git a/grobid-home/models/reference-segmenter-BidLSTM_CRF_FEATURES.onnx/encoder.onnx b/grobid-home/models/reference-segmenter-BidLSTM_CRF_FEATURES.onnx/encoder.onnx new file mode 100644 index 0000000000..22dbef9883 Binary files /dev/null and b/grobid-home/models/reference-segmenter-BidLSTM_CRF_FEATURES.onnx/encoder.onnx differ diff --git a/grobid-home/models/reference-segmenter-BidLSTM_CRF_FEATURES.onnx/vocab.json b/grobid-home/models/reference-segmenter-BidLSTM_CRF_FEATURES.onnx/vocab.json new file mode 100644 index 0000000000..e9106c20ac --- /dev/null +++ b/grobid-home/models/reference-segmenter-BidLSTM_CRF_FEATURES.onnx/vocab.json @@ -0,0 +1,323 @@ +{ + "charVocab": { + "<PAD>": 0, + "<UNK>": 1, + "!": 2, + "\"": 3, + "#": 4, + "%": 5, + "&": 6, + "'": 7, + "(": 8, + ")": 9, + "*": 10, + "+": 11, + ",": 12, + "-": 13, + ".": 14, + "/": 15, + "0": 16, + "1": 17, + "2": 18, + "3": 19, + "4": 20, + "5": 21, + "6": 22, + "7": 23, + "8": 24, + "9": 25, + ":": 26, + ";": 27, + "<": 28, + "=": 29, + ">": 30, + "?": 31, + "@": 32, + "A": 33, + "B": 34, + "C": 35, + "D": 36, + "E": 37, + "F": 38, + "G": 39, + "H": 40, + "I": 41, + "J": 42, + "K": 43, + "L": 44, + "M": 45, + "N": 46, + "O": 47, + "P": 48, + "Q": 49, + "R": 50, + "S": 51, + "T": 52, + "U": 53, + "V": 54, + "W": 55, + "X": 56, + "Y": 57, + "Z": 58, + "[": 59, + "]": 60, + "_": 61, + "`": 62, + "a": 63, + "b": 64, + "c": 65, + "d": 66, + "e": 67, + "f": 68, + "g": 69, + "h": 70, + "i": 71, + "j": 72, + "k": 73, + "l": 74, + "m": 75, + "n": 76, + "o": 77, + "p": 78, + "q": 79, + "r": 80, + "s": 81, + "t": 82, + "u": 83, + "v": 84, + "w": 85, + "x": 86, + "y": 87, + "z": 88, + "{": 89, + "|": 90, + "}": 91, + "~": 92, + "¨": 93, + "±": 94, + "´": 95, + "¸": 96, + "Á": 97, + "Ã": 98, + "É": 99, + "Ö": 100, + "×": 101, + "Ø": 102, + "ß": 103, + "à": 104, + "á": 105, + "â": 106, + "ã": 107, + "ä": 108, + "å": 109, + "ç": 110, + "è": 111, + "é": 112, + "ê": 113, + "ì": 114, + "í": 115, + "î": 116, + "ñ": 117, + "ò": 118, + "ó": 119, + "ô": 120, + "ö": 121, + "ø": 122, + "ú": 123, + "ü": 124, + "ý": 125, + "ÿ": 126, + "Ă": 127, + "ć": 128, + "č": 129, + "ğ": 130, + "İ": 131, + "ı": 132, + "IJ": 133, + "ľ": 134, + "ł": 135, + "ń": 136, + "ň": 137, + "ř": 138, + "ş": 139, + "š": 140, + "ů": 141, + "Ź": 142, + "ž": 143, + "ˆ": 144, + "˜": 145, + "Λ": 146, + "β": 147, + "γ": 148, + "π": 149, + "σ": 150, + "φ": 151, + "ϩ": 152, + "Ϫ": 153, + "ϫ": 154, + "ϳ": 155, + "Ј": 156, + "–": 157, + "—": 158, + "†": 159, + "•": 160, + "…": 161, + "→": 162, + "∆": 163, + "−": 164, + "∞": 165, + "∼": 166, + "≅": 167, + "␣": 168, + "␤": 169, + "": 170, + "": 171 + }, + "tagVocab": { + "<PAD>": 0, + "B-<label>": 1, + "B-<reference>": 2, + "I-<label>": 3, + "I-<reference>": 4, + "O": 5 + }, + "tagIndex": { + "0": "<PAD>", + "1": "B-<label>", + "2": "B-<reference>", + "3": "I-<label>", + "4": "I-<reference>", + "5": "O" + }, + "maxCharLength": 30, + "returnChars": false, + "featuresIndices": [ + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26 + ], + "featuresVocabularySize": 12, + "featuresMapToIndex": { + "9": { + "LINEEND": 1, + "LINEIN": 2, + "LINESTART": 3 + }, + "10": { + "ALIGNEDLEFT": 13, + "LINEINDENT": 14 + }, + "11": { + "ALLCAP": 25, + "INITCAP": 26, + "NOCAPS": 27 + }, + "12": { + "ALLDIGIT": 37, + "CONTAINSDIGITS": 38, + "NODIGIT": 39 + }, + "13": { + "0": 49, + "1": 50 + }, + "14": { + "0": 61, + "1": 62 + }, + "15": { + "0": 73, + "1": 74 + }, + "16": { + "0": 85 + }, + "17": { + "0": 97 + }, + "18": { + "0": 109, + "1": 110 + }, + "19": { + "0": 121, + "1": 122 + }, + "20": { + "0": 133, + "1": 134 + }, + "21": { + "COMMA": 145, + "DOT": 146, + "ENDBRACKET": 147, + "HYPHEN": 148, + "NOPUNCT": 149, + "OPENBRACKET": 150, + "PUNCT": 151, + "QUOTE": 152 + }, + "22": { + "0": 157, + "1": 158, + "10": 159, + "2": 160, + "3": 161, + "4": 162, + "5": 163, + "6": 164, + "7": 165, + "8": 166, + "9": 167 + }, + "23": { + "0": 169, + "1": 170, + "10": 171, + "2": 172, + "3": 173, + "4": 174, + "5": 175, + "6": 176, + "7": 177, + "8": 178, + "9": 179 + }, + "24": { + "BLOCKEND": 181, + "BLOCKIN": 182, + "BLOCKSTART": 183 + }, + "25": { + "1": 193, + "10": 194, + "2": 195, + "3": 196, + "4": 197, + "5": 198, + "6": 199, + "7": 200, + "8": 201, + "9": 202, + "no": 203 + }, + "26": { + "0": 205 + } + } +} \ No newline at end of file diff --git a/grobid-home/models/reference-segmenter-BidLSTM_CRF_FEATURES/config.json b/grobid-home/models/reference-segmenter-BidLSTM_CRF_FEATURES/config.json deleted file mode 100644 index b37c8fdd3b..0000000000 --- a/grobid-home/models/reference-segmenter-BidLSTM_CRF_FEATURES/config.json +++ /dev/null @@ -1,153 +0,0 @@ -{ - "model_name": "reference-segmenter-BidLSTM_CRF_FEATURES", - "architecture": "BidLSTM_CRF_FEATURES", - "embeddings_name": "glove-840B", - "char_vocab_size": 172, - "case_vocab_size": 8, - "char_embedding_size": 25, - "num_char_lstm_units": 25, - "max_char_length": 30, - "features_vocabulary_size": 12, - "features_indices": [ - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20, - 21, - 22, - 23, - 24, - 25, - 26 - ], - "features_embedding_size": 4, - "features_lstm_units": 4, - "max_sequence_length": 3000, - "word_embedding_size": 300, - "num_word_lstm_units": 100, - "case_embedding_size": 5, - "dropout": 0.5, - "recurrent_dropout": 0.5, - "use_crf": true, - "use_chain_crf": false, - "fold_number": 1, - "batch_size": 10, - "transformer_name": null, - "use_ELMo": false, - "features_map_to_index": { - "9": { - "LINEEND": 1, - "LINEIN": 2, - "LINESTART": 3 - }, - "10": { - "ALIGNEDLEFT": 13, - "LINEINDENT": 14 - }, - "11": { - "ALLCAP": 25, - "INITCAP": 26, - "NOCAPS": 27 - }, - "12": { - "ALLDIGIT": 37, - "CONTAINSDIGITS": 38, - "NODIGIT": 39 - }, - "13": { - "0": 49, - "1": 50 - }, - "14": { - "0": 61, - "1": 62 - }, - "15": { - "0": 73, - "1": 74 - }, - "16": { - "0": 85 - }, - "17": { - "0": 97 - }, - "18": { - "0": 109, - "1": 110 - }, - "19": { - "0": 121, - "1": 122 - }, - "20": { - "0": 133, - "1": 134 - }, - "21": { - "COMMA": 145, - "DOT": 146, - "ENDBRACKET": 147, - "HYPHEN": 148, - "NOPUNCT": 149, - "OPENBRACKET": 150, - "PUNCT": 151, - "QUOTE": 152 - }, - "22": { - "0": 157, - "1": 158, - "10": 159, - "2": 160, - "3": 161, - "4": 162, - "5": 163, - "6": 164, - "7": 165, - "8": 166, - "9": 167 - }, - "23": { - "0": 169, - "1": 170, - "10": 171, - "2": 172, - "3": 173, - "4": 174, - "5": 175, - "6": 176, - "7": 177, - "8": 178, - "9": 179 - }, - "24": { - "BLOCKEND": 181, - "BLOCKIN": 182, - "BLOCKSTART": 183 - }, - "25": { - "1": 193, - "10": 194, - "2": 195, - "3": 196, - "4": 197, - "5": 198, - "6": 199, - "7": 200, - "8": 201, - "9": 202, - "no": 203 - }, - "26": { - "0": 205 - } - } -} \ No newline at end of file diff --git a/grobid-home/models/reference-segmenter-BidLSTM_CRF_FEATURES/model_weights.hdf5 b/grobid-home/models/reference-segmenter-BidLSTM_CRF_FEATURES/model_weights.hdf5 deleted file mode 100644 index b597e56a11..0000000000 Binary files a/grobid-home/models/reference-segmenter-BidLSTM_CRF_FEATURES/model_weights.hdf5 and /dev/null differ diff --git a/grobid-home/models/reference-segmenter-BidLSTM_CRF_FEATURES/preprocessor.json b/grobid-home/models/reference-segmenter-BidLSTM_CRF_FEATURES/preprocessor.json deleted file mode 100644 index 64d9d555af..0000000000 --- a/grobid-home/models/reference-segmenter-BidLSTM_CRF_FEATURES/preprocessor.json +++ /dev/null @@ -1,341 +0,0 @@ -{ - "padding": true, - "return_lengths": true, - "return_word_embeddings": true, - "return_casing": false, - "return_features": true, - "return_chars": true, - "return_bert_embeddings": false, - "vocab_char": { - "<PAD>": 0, - "<UNK>": 1, - "!": 2, - "\"": 3, - "#": 4, - "%": 5, - "&": 6, - "'": 7, - "(": 8, - ")": 9, - "*": 10, - "+": 11, - ",": 12, - "-": 13, - ".": 14, - "/": 15, - "0": 16, - "1": 17, - "2": 18, - "3": 19, - "4": 20, - "5": 21, - "6": 22, - "7": 23, - "8": 24, - "9": 25, - ":": 26, - ";": 27, - "<": 28, - "=": 29, - ">": 30, - "?": 31, - "@": 32, - "A": 33, - "B": 34, - "C": 35, - "D": 36, - "E": 37, - "F": 38, - "G": 39, - "H": 40, - "I": 41, - "J": 42, - "K": 43, - "L": 44, - "M": 45, - "N": 46, - "O": 47, - "P": 48, - "Q": 49, - "R": 50, - "S": 51, - "T": 52, - "U": 53, - "V": 54, - "W": 55, - "X": 56, - "Y": 57, - "Z": 58, - "[": 59, - "]": 60, - "_": 61, - "`": 62, - "a": 63, - "b": 64, - "c": 65, - "d": 66, - "e": 67, - "f": 68, - "g": 69, - "h": 70, - "i": 71, - "j": 72, - "k": 73, - "l": 74, - "m": 75, - "n": 76, - "o": 77, - "p": 78, - "q": 79, - "r": 80, - "s": 81, - "t": 82, - "u": 83, - "v": 84, - "w": 85, - "x": 86, - "y": 87, - "z": 88, - "{": 89, - "|": 90, - "}": 91, - "~": 92, - "\u00a8": 93, - "\u00b1": 94, - "\u00b4": 95, - "\u00b8": 96, - "\u00c1": 97, - "\u00c3": 98, - "\u00c9": 99, - "\u00d6": 100, - "\u00d7": 101, - "\u00d8": 102, - "\u00df": 103, - "\u00e0": 104, - "\u00e1": 105, - "\u00e2": 106, - "\u00e3": 107, - "\u00e4": 108, - "\u00e5": 109, - "\u00e7": 110, - "\u00e8": 111, - "\u00e9": 112, - "\u00ea": 113, - "\u00ec": 114, - "\u00ed": 115, - "\u00ee": 116, - "\u00f1": 117, - "\u00f2": 118, - "\u00f3": 119, - "\u00f4": 120, - "\u00f6": 121, - "\u00f8": 122, - "\u00fa": 123, - "\u00fc": 124, - "\u00fd": 125, - "\u00ff": 126, - "\u0102": 127, - "\u0107": 128, - "\u010d": 129, - "\u011f": 130, - "\u0130": 131, - "\u0131": 132, - "\u0132": 133, - "\u013e": 134, - "\u0142": 135, - "\u0144": 136, - "\u0148": 137, - "\u0159": 138, - "\u015f": 139, - "\u0161": 140, - "\u016f": 141, - "\u0179": 142, - "\u017e": 143, - "\u02c6": 144, - "\u02dc": 145, - "\u039b": 146, - "\u03b2": 147, - "\u03b3": 148, - "\u03c0": 149, - "\u03c3": 150, - "\u03c6": 151, - "\u03e9": 152, - "\u03ea": 153, - "\u03eb": 154, - "\u03f3": 155, - "\u0408": 156, - "\u2013": 157, - "\u2014": 158, - "\u2020": 159, - "\u2022": 160, - "\u2026": 161, - "\u2192": 162, - "\u2206": 163, - "\u2212": 164, - "\u221e": 165, - "\u223c": 166, - "\u2245": 167, - "\u2423": 168, - "\u2424": 169, - "\uf044": 170, - "\uf062": 171 - }, - "vocab_tag": { - "<PAD>": 0, - "B-<label>": 1, - "B-<reference>": 2, - "I-<label>": 3, - "I-<reference>": 4, - "O": 5 - }, - "vocab_case": [ - "<PAD>", - "numeric", - "allLower", - "allUpper", - "initialUpper", - "other", - "mainly_numeric", - "contains_digit" - ], - "max_char_length": 30, - "feature_preprocessor": { - "features_vocabulary_size": 12, - "features_indices": [ - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20, - 21, - 22, - 23, - 24, - 25, - 26 - ], - "features_map_to_index": { - "9": { - "LINEEND": 1, - "LINEIN": 2, - "LINESTART": 3 - }, - "10": { - "ALIGNEDLEFT": 13, - "LINEINDENT": 14 - }, - "11": { - "ALLCAP": 25, - "INITCAP": 26, - "NOCAPS": 27 - }, - "12": { - "ALLDIGIT": 37, - "CONTAINSDIGITS": 38, - "NODIGIT": 39 - }, - "13": { - "0": 49, - "1": 50 - }, - "14": { - "0": 61, - "1": 62 - }, - "15": { - "0": 73, - "1": 74 - }, - "16": { - "0": 85 - }, - "17": { - "0": 97 - }, - "18": { - "0": 109, - "1": 110 - }, - "19": { - "0": 121, - "1": 122 - }, - "20": { - "0": 133, - "1": 134 - }, - "21": { - "COMMA": 145, - "DOT": 146, - "ENDBRACKET": 147, - "HYPHEN": 148, - "NOPUNCT": 149, - "OPENBRACKET": 150, - "PUNCT": 151, - "QUOTE": 152 - }, - "22": { - "0": 157, - "1": 158, - "10": 159, - "2": 160, - "3": 161, - "4": 162, - "5": 163, - "6": 164, - "7": 165, - "8": 166, - "9": 167 - }, - "23": { - "0": 169, - "1": 170, - "10": 171, - "2": 172, - "3": 173, - "4": 174, - "5": 175, - "6": 176, - "7": 177, - "8": 178, - "9": 179 - }, - "24": { - "BLOCKEND": 181, - "BLOCKIN": 182, - "BLOCKSTART": 183 - }, - "25": { - "1": 193, - "10": 194, - "2": 195, - "3": 196, - "4": 197, - "5": 198, - "6": 199, - "7": 200, - "8": 201, - "9": 202, - "no": 203 - }, - "26": { - "0": 205 - } - } - }, - "indice_tag": { - "0": "<PAD>", - "1": "B-<label>", - "2": "B-<reference>", - "3": "I-<label>", - "4": "I-<reference>", - "5": "O" - } -} \ No newline at end of file diff --git a/grobid-home/models/reference-segmenter-BidLSTM_ChainCRF_FEATURES/config.json b/grobid-home/models/reference-segmenter-BidLSTM_ChainCRF_FEATURES/config.json deleted file mode 100644 index 55c17eb347..0000000000 --- a/grobid-home/models/reference-segmenter-BidLSTM_ChainCRF_FEATURES/config.json +++ /dev/null @@ -1,153 +0,0 @@ -{ - "model_name": "reference-segmenter-BidLSTM_ChainCRF_FEATURES", - "architecture": "BidLSTM_ChainCRF_FEATURES", - "embeddings_name": "glove-840B", - "char_vocab_size": 172, - "case_vocab_size": 8, - "char_embedding_size": 25, - "num_char_lstm_units": 25, - "max_char_length": 30, - "features_vocabulary_size": 12, - "features_indices": [ - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20, - 21, - 22, - 23, - 24, - 25, - 26 - ], - "features_embedding_size": 4, - "features_lstm_units": 4, - "max_sequence_length": 3000, - "word_embedding_size": 300, - "num_word_lstm_units": 100, - "case_embedding_size": 5, - "dropout": 0.5, - "recurrent_dropout": 0.5, - "use_crf": true, - "use_chain_crf": true, - "fold_number": 1, - "batch_size": 10, - "transformer_name": null, - "use_ELMo": false, - "features_map_to_index": { - "9": { - "LINEEND": 1, - "LINEIN": 2, - "LINESTART": 3 - }, - "10": { - "ALIGNEDLEFT": 13, - "LINEINDENT": 14 - }, - "11": { - "ALLCAP": 25, - "INITCAP": 26, - "NOCAPS": 27 - }, - "12": { - "ALLDIGIT": 37, - "CONTAINSDIGITS": 38, - "NODIGIT": 39 - }, - "13": { - "0": 49, - "1": 50 - }, - "14": { - "0": 61, - "1": 62 - }, - "15": { - "0": 73, - "1": 74 - }, - "16": { - "0": 85 - }, - "17": { - "0": 97 - }, - "18": { - "0": 109, - "1": 110 - }, - "19": { - "0": 121, - "1": 122 - }, - "20": { - "0": 133, - "1": 134 - }, - "21": { - "COMMA": 145, - "DOT": 146, - "ENDBRACKET": 147, - "HYPHEN": 148, - "NOPUNCT": 149, - "OPENBRACKET": 150, - "PUNCT": 151, - "QUOTE": 152 - }, - "22": { - "0": 157, - "1": 158, - "10": 159, - "2": 160, - "3": 161, - "4": 162, - "5": 163, - "6": 164, - "7": 165, - "8": 166, - "9": 167 - }, - "23": { - "0": 169, - "1": 170, - "10": 171, - "2": 172, - "3": 173, - "4": 174, - "5": 175, - "6": 176, - "7": 177, - "8": 178, - "9": 179 - }, - "24": { - "BLOCKEND": 181, - "BLOCKIN": 182, - "BLOCKSTART": 183 - }, - "25": { - "1": 193, - "10": 194, - "2": 195, - "3": 196, - "4": 197, - "5": 198, - "6": 199, - "7": 200, - "8": 201, - "9": 202, - "no": 203 - }, - "26": { - "0": 205 - } - } -} \ No newline at end of file diff --git a/grobid-home/models/reference-segmenter-BidLSTM_ChainCRF_FEATURES/model_weights.hdf5 b/grobid-home/models/reference-segmenter-BidLSTM_ChainCRF_FEATURES/model_weights.hdf5 deleted file mode 100644 index b8ad7b9c5f..0000000000 Binary files a/grobid-home/models/reference-segmenter-BidLSTM_ChainCRF_FEATURES/model_weights.hdf5 and /dev/null differ diff --git a/grobid-home/models/reference-segmenter-BidLSTM_ChainCRF_FEATURES/preprocessor.json b/grobid-home/models/reference-segmenter-BidLSTM_ChainCRF_FEATURES/preprocessor.json deleted file mode 100644 index 64d9d555af..0000000000 --- a/grobid-home/models/reference-segmenter-BidLSTM_ChainCRF_FEATURES/preprocessor.json +++ /dev/null @@ -1,341 +0,0 @@ -{ - "padding": true, - "return_lengths": true, - "return_word_embeddings": true, - "return_casing": false, - "return_features": true, - "return_chars": true, - "return_bert_embeddings": false, - "vocab_char": { - "<PAD>": 0, - "<UNK>": 1, - "!": 2, - "\"": 3, - "#": 4, - "%": 5, - "&": 6, - "'": 7, - "(": 8, - ")": 9, - "*": 10, - "+": 11, - ",": 12, - "-": 13, - ".": 14, - "/": 15, - "0": 16, - "1": 17, - "2": 18, - "3": 19, - "4": 20, - "5": 21, - "6": 22, - "7": 23, - "8": 24, - "9": 25, - ":": 26, - ";": 27, - "<": 28, - "=": 29, - ">": 30, - "?": 31, - "@": 32, - "A": 33, - "B": 34, - "C": 35, - "D": 36, - "E": 37, - "F": 38, - "G": 39, - "H": 40, - "I": 41, - "J": 42, - "K": 43, - "L": 44, - "M": 45, - "N": 46, - "O": 47, - "P": 48, - "Q": 49, - "R": 50, - "S": 51, - "T": 52, - "U": 53, - "V": 54, - "W": 55, - "X": 56, - "Y": 57, - "Z": 58, - "[": 59, - "]": 60, - "_": 61, - "`": 62, - "a": 63, - "b": 64, - "c": 65, - "d": 66, - "e": 67, - "f": 68, - "g": 69, - "h": 70, - "i": 71, - "j": 72, - "k": 73, - "l": 74, - "m": 75, - "n": 76, - "o": 77, - "p": 78, - "q": 79, - "r": 80, - "s": 81, - "t": 82, - "u": 83, - "v": 84, - "w": 85, - "x": 86, - "y": 87, - "z": 88, - "{": 89, - "|": 90, - "}": 91, - "~": 92, - "\u00a8": 93, - "\u00b1": 94, - "\u00b4": 95, - "\u00b8": 96, - "\u00c1": 97, - "\u00c3": 98, - "\u00c9": 99, - "\u00d6": 100, - "\u00d7": 101, - "\u00d8": 102, - "\u00df": 103, - "\u00e0": 104, - "\u00e1": 105, - "\u00e2": 106, - "\u00e3": 107, - "\u00e4": 108, - "\u00e5": 109, - "\u00e7": 110, - "\u00e8": 111, - "\u00e9": 112, - "\u00ea": 113, - "\u00ec": 114, - "\u00ed": 115, - "\u00ee": 116, - "\u00f1": 117, - "\u00f2": 118, - "\u00f3": 119, - "\u00f4": 120, - "\u00f6": 121, - "\u00f8": 122, - "\u00fa": 123, - "\u00fc": 124, - "\u00fd": 125, - "\u00ff": 126, - "\u0102": 127, - "\u0107": 128, - "\u010d": 129, - "\u011f": 130, - "\u0130": 131, - "\u0131": 132, - "\u0132": 133, - "\u013e": 134, - "\u0142": 135, - "\u0144": 136, - "\u0148": 137, - "\u0159": 138, - "\u015f": 139, - "\u0161": 140, - "\u016f": 141, - "\u0179": 142, - "\u017e": 143, - "\u02c6": 144, - "\u02dc": 145, - "\u039b": 146, - "\u03b2": 147, - "\u03b3": 148, - "\u03c0": 149, - "\u03c3": 150, - "\u03c6": 151, - "\u03e9": 152, - "\u03ea": 153, - "\u03eb": 154, - "\u03f3": 155, - "\u0408": 156, - "\u2013": 157, - "\u2014": 158, - "\u2020": 159, - "\u2022": 160, - "\u2026": 161, - "\u2192": 162, - "\u2206": 163, - "\u2212": 164, - "\u221e": 165, - "\u223c": 166, - "\u2245": 167, - "\u2423": 168, - "\u2424": 169, - "\uf044": 170, - "\uf062": 171 - }, - "vocab_tag": { - "<PAD>": 0, - "B-<label>": 1, - "B-<reference>": 2, - "I-<label>": 3, - "I-<reference>": 4, - "O": 5 - }, - "vocab_case": [ - "<PAD>", - "numeric", - "allLower", - "allUpper", - "initialUpper", - "other", - "mainly_numeric", - "contains_digit" - ], - "max_char_length": 30, - "feature_preprocessor": { - "features_vocabulary_size": 12, - "features_indices": [ - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20, - 21, - 22, - 23, - 24, - 25, - 26 - ], - "features_map_to_index": { - "9": { - "LINEEND": 1, - "LINEIN": 2, - "LINESTART": 3 - }, - "10": { - "ALIGNEDLEFT": 13, - "LINEINDENT": 14 - }, - "11": { - "ALLCAP": 25, - "INITCAP": 26, - "NOCAPS": 27 - }, - "12": { - "ALLDIGIT": 37, - "CONTAINSDIGITS": 38, - "NODIGIT": 39 - }, - "13": { - "0": 49, - "1": 50 - }, - "14": { - "0": 61, - "1": 62 - }, - "15": { - "0": 73, - "1": 74 - }, - "16": { - "0": 85 - }, - "17": { - "0": 97 - }, - "18": { - "0": 109, - "1": 110 - }, - "19": { - "0": 121, - "1": 122 - }, - "20": { - "0": 133, - "1": 134 - }, - "21": { - "COMMA": 145, - "DOT": 146, - "ENDBRACKET": 147, - "HYPHEN": 148, - "NOPUNCT": 149, - "OPENBRACKET": 150, - "PUNCT": 151, - "QUOTE": 152 - }, - "22": { - "0": 157, - "1": 158, - "10": 159, - "2": 160, - "3": 161, - "4": 162, - "5": 163, - "6": 164, - "7": 165, - "8": 166, - "9": 167 - }, - "23": { - "0": 169, - "1": 170, - "10": 171, - "2": 172, - "3": 173, - "4": 174, - "5": 175, - "6": 176, - "7": 177, - "8": 178, - "9": 179 - }, - "24": { - "BLOCKEND": 181, - "BLOCKIN": 182, - "BLOCKSTART": 183 - }, - "25": { - "1": 193, - "10": 194, - "2": 195, - "3": 196, - "4": 197, - "5": 198, - "6": 199, - "7": 200, - "8": 201, - "9": 202, - "no": 203 - }, - "26": { - "0": 205 - } - } - }, - "indice_tag": { - "0": "<PAD>", - "1": "B-<label>", - "2": "B-<reference>", - "3": "I-<label>", - "4": "I-<reference>", - "5": "O" - } -} \ No newline at end of file diff --git a/grobid-home/models/segmentation-BidLSTM_CRF_FEATURES/config.json b/grobid-home/models/segmentation-BidLSTM_CRF_FEATURES/config.json deleted file mode 100644 index 1b6f0d5f2c..0000000000 --- a/grobid-home/models/segmentation-BidLSTM_CRF_FEATURES/config.json +++ /dev/null @@ -1,182 +0,0 @@ -{ - "model_name": "segmentation-BidLSTM_CRF_FEATURES", - "architecture": "BidLSTM_CRF_FEATURES", - "embeddings_name": "glove-840B", - "char_vocab_size": 685, - "case_vocab_size": 8, - "char_embedding_size": 25, - "num_char_lstm_units": 25, - "max_char_length": 30, - "features_vocabulary_size": 12, - "features_indices": [ - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20, - 21, - 22, - 23, - 26, - 27, - 28, - 29, - 30, - 31 - ], - "features_embedding_size": 4, - "features_lstm_units": 4, - "max_sequence_length": 3000, - "word_embedding_size": 300, - "num_word_lstm_units": 100, - "case_embedding_size": 5, - "dropout": 0.5, - "recurrent_dropout": 0.5, - "use_crf": true, - "use_chain_crf": false, - "fold_number": 1, - "batch_size": 10, - "transformer_name": null, - "use_ELMo": false, - "features_map_to_index": { - "6": { - "BLOCKEND": 1, - "BLOCKIN": 2, - "BLOCKSTART": 3 - }, - "7": { - "PAGEEND": 13, - "PAGEIN": 14, - "PAGESTART": 15 - }, - "8": { - "NEWFONT": 25, - "SAMEFONT": 26 - }, - "9": { - "HIGHERFONT": 37, - "LOWERFONT": 38, - "SAMEFONTSIZE": 39 - }, - "10": { - "0": 49, - "1": 50 - }, - "11": { - "0": 61, - "1": 62 - }, - "12": { - "ALLCAP": 73, - "INITCAP": 74, - "NOCAPS": 75 - }, - "13": { - "ALLDIGIT": 85, - "CONTAINSDIGITS": 86, - "NODIGIT": 87 - }, - "14": { - "0": 97, - "1": 98 - }, - "15": { - "0": 109, - "1": 110 - }, - "16": { - "0": 121, - "1": 122 - }, - "17": { - "0": 133 - }, - "18": { - "0": 145, - "1": 146 - }, - "19": { - "0": 157, - "1": 158 - }, - "20": { - "0": 169, - "1": 170 - }, - "21": { - "0": 181, - "1": 182 - }, - "22": { - "0": 193, - "1": 194, - "10": 195, - "11": 196, - "2": 197, - "3": 198, - "4": 199, - "5": 200, - "6": 201, - "7": 202, - "8": 203, - "9": 204 - }, - "23": { - "0": 205, - "1": 206, - "10": 207, - "11": 208, - "2": 209, - "3": 210, - "4": 211, - "5": 212, - "6": 213, - "7": 214, - "8": 215, - "9": 216 - }, - "26": { - "0": 217, - "1": 218, - "10": 219, - "2": 220, - "3": 221, - "4": 222, - "5": 223, - "6": 224, - "7": 225, - "8": 226, - "9": 227 - }, - "27": { - "0": 229, - "1": 230 - }, - "28": { - "0": 241, - "1": 242 - }, - "29": { - "0": 253, - "1": 254 - }, - "30": { - "0": 265, - "1": 266 - }, - "31": { - "0": 277, - "1": 278 - } - } -} \ No newline at end of file diff --git a/grobid-home/models/segmentation-BidLSTM_CRF_FEATURES/model_weights.hdf5 b/grobid-home/models/segmentation-BidLSTM_CRF_FEATURES/model_weights.hdf5 deleted file mode 100644 index 3534a31dc9..0000000000 Binary files a/grobid-home/models/segmentation-BidLSTM_CRF_FEATURES/model_weights.hdf5 and /dev/null differ diff --git a/grobid-home/models/segmentation-BidLSTM_CRF_FEATURES/preprocessor.json b/grobid-home/models/segmentation-BidLSTM_CRF_FEATURES/preprocessor.json deleted file mode 100644 index 9a2707c90a..0000000000 --- a/grobid-home/models/segmentation-BidLSTM_CRF_FEATURES/preprocessor.json +++ /dev/null @@ -1,923 +0,0 @@ -{ - "padding": true, - "return_lengths": true, - "return_word_embeddings": true, - "return_casing": false, - "return_features": true, - "return_chars": true, - "return_bert_embeddings": false, - "vocab_char": { - "<PAD>": 0, - "<UNK>": 1, - "!": 2, - "\"": 3, - "#": 4, - "$": 5, - "%": 6, - "&": 7, - "'": 8, - "(": 9, - ")": 10, - "*": 11, - "+": 12, - ",": 13, - "-": 14, - ".": 15, - "/": 16, - "0": 17, - "1": 18, - "2": 19, - "3": 20, - "4": 21, - "5": 22, - "6": 23, - "7": 24, - "8": 25, - "9": 26, - ":": 27, - ";": 28, - "<": 29, - "=": 30, - ">": 31, - "?": 32, - "@": 33, - "A": 34, - "B": 35, - "C": 36, - "D": 37, - "E": 38, - "F": 39, - "G": 40, - "H": 41, - "I": 42, - "J": 43, - "K": 44, - "L": 45, - "M": 46, - "N": 47, - "O": 48, - "P": 49, - "Q": 50, - "R": 51, - "S": 52, - "T": 53, - "U": 54, - "V": 55, - "W": 56, - "X": 57, - "Y": 58, - "Z": 59, - "[": 60, - "\\": 61, - "]": 62, - "^": 63, - "_": 64, - "`": 65, - "a": 66, - "b": 67, - "c": 68, - "d": 69, - "e": 70, - "f": 71, - "g": 72, - "h": 73, - "i": 74, - "j": 75, - "k": 76, - "l": 77, - "m": 78, - "n": 79, - "o": 80, - "p": 81, - "q": 82, - "r": 83, - "s": 84, - "t": 85, - "u": 86, - "v": 87, - "w": 88, - "x": 89, - "y": 90, - "z": 91, - "{": 92, - "|": 93, - "}": 94, - "~": 95, - "\u00a1": 96, - "\u00a2": 97, - "\u00a3": 98, - "\u00a4": 99, - "\u00a5": 100, - "\u00a6": 101, - "\u00a7": 102, - "\u00a8": 103, - "\u00a9": 104, - "\u00aa": 105, - "\u00ab": 106, - "\u00ac": 107, - "\u00ad": 108, - "\u00ae": 109, - "\u00af": 110, - "\u00b0": 111, - "\u00b1": 112, - "\u00b2": 113, - "\u00b3": 114, - "\u00b4": 115, - "\u00b5": 116, - "\u00b6": 117, - "\u00b7": 118, - "\u00b9": 119, - "\u00ba": 120, - "\u00bb": 121, - "\u00bc": 122, - "\u00bd": 123, - "\u00be": 124, - "\u00bf": 125, - "\u00c0": 126, - "\u00c1": 127, - "\u00c2": 128, - "\u00c3": 129, - "\u00c4": 130, - "\u00c5": 131, - "\u00c7": 132, - "\u00c8": 133, - "\u00c9": 134, - "\u00ca": 135, - "\u00cb": 136, - "\u00cc": 137, - "\u00cd": 138, - "\u00ce": 139, - "\u00cf": 140, - "\u00d0": 141, - "\u00d1": 142, - "\u00d2": 143, - "\u00d3": 144, - "\u00d4": 145, - "\u00d5": 146, - "\u00d6": 147, - "\u00d7": 148, - "\u00d8": 149, - "\u00db": 150, - "\u00dc": 151, - "\u00de": 152, - "\u00df": 153, - "\u00e0": 154, - "\u00e1": 155, - "\u00e2": 156, - "\u00e3": 157, - "\u00e4": 158, - "\u00e5": 159, - "\u00e7": 160, - "\u00e8": 161, - "\u00e9": 162, - "\u00ea": 163, - "\u00eb": 164, - "\u00ec": 165, - "\u00ed": 166, - "\u00ee": 167, - "\u00ef": 168, - "\u00f0": 169, - "\u00f1": 170, - "\u00f2": 171, - "\u00f3": 172, - "\u00f4": 173, - "\u00f6": 174, - "\u00f7": 175, - "\u00f8": 176, - "\u00f9": 177, - "\u00fa": 178, - "\u00fb": 179, - "\u00fc": 180, - "\u00fd": 181, - "\u00fe": 182, - "\u00ff": 183, - "\u0100": 184, - "\u0101": 185, - "\u0102": 186, - "\u0103": 187, - "\u0107": 188, - "\u010d": 189, - "\u0117": 190, - "\u0119": 191, - "\u011b": 192, - "\u0125": 193, - "\u012b": 194, - "\u0131": 195, - "\u0141": 196, - "\u0142": 197, - "\u0144": 198, - "\u0148": 199, - "\u0159": 200, - "\u015b": 201, - "\u015c": 202, - "\u0161": 203, - "\u016b": 204, - "\u0173": 205, - "\u0174": 206, - "\u0179": 207, - "\u017e": 208, - "\u0192": 209, - "\u019f": 210, - "\u01eb": 211, - "\u01fa": 212, - "\u0232": 213, - "\u0288": 214, - "\u02b9": 215, - "\u02bc": 216, - "\u02c6": 217, - "\u02d9": 218, - "\u02da": 219, - "\u02dc": 220, - "\u0301": 221, - "\u031f": 222, - "\u033a": 223, - "\u0350": 224, - "\u0351": 225, - "\u0352": 226, - "\u0353": 227, - "\u0354": 228, - "\u0357": 229, - "\u0370": 230, - "\u0384": 231, - "\u0387": 232, - "\u038a": 233, - "\u0391": 234, - "\u0393": 235, - "\u0394": 236, - "\u0397": 237, - "\u0398": 238, - "\u039a": 239, - "\u039b": 240, - "\u039e": 241, - "\u03a0": 242, - "\u03a3": 243, - "\u03a4": 244, - "\u03a5": 245, - "\u03a6": 246, - "\u03a8": 247, - "\u03a9": 248, - "\u03b1": 249, - "\u03b2": 250, - "\u03b3": 251, - "\u03b4": 252, - "\u03b5": 253, - "\u03b6": 254, - "\u03b7": 255, - "\u03b8": 256, - "\u03b9": 257, - "\u03ba": 258, - "\u03bb": 259, - "\u03bc": 260, - "\u03bd": 261, - "\u03be": 262, - "\u03c0": 263, - "\u03c1": 264, - "\u03c3": 265, - "\u03c4": 266, - "\u03c5": 267, - "\u03c6": 268, - "\u03c7": 269, - "\u03c8": 270, - "\u03c9": 271, - "\u03d2": 272, - "\u03d5": 273, - "\u03e9": 274, - "\u03ea": 275, - "\u03eb": 276, - "\u03ed": 277, - "\u03f1": 278, - "\u03f3": 279, - "\u03f5": 280, - "\u03fd": 281, - "\u03fe": 282, - "\u0408": 283, - "\u0409": 284, - "\u0411": 285, - "\u041b": 286, - "\u041f": 287, - "\u0431": 288, - "\u0432": 289, - "\u0433": 290, - "\u0434": 291, - "\u0438": 292, - "\u043b": 293, - "\u043c": 294, - "\u043d": 295, - "\u043f": 296, - "\u0440": 297, - "\u0442": 298, - "\u0443": 299, - "\u0444": 300, - "\u0445": 301, - "\u0447": 302, - "\u0448": 303, - "\u044b": 304, - "\u044f": 305, - "\u0468": 306, - "\u0472": 307, - "\u0545": 308, - "\u0546": 309, - "\u05e5": 310, - "\u060c": 311, - "\u0621": 312, - "\u0622": 313, - "\u0627": 314, - "\u0628": 315, - "\u062a": 316, - "\u062f": 317, - "\u0631": 318, - "\u0632": 319, - "\u0634": 320, - "\u0641": 321, - "\u0643": 322, - "\u0644": 323, - "\u0645": 324, - "\u0646": 325, - "\u0647": 326, - "\u0648": 327, - "\u064a": 328, - "\u065e": 329, - "\u073b": 330, - "\u07f6": 331, - "\u0d20": 332, - "\u123a": 333, - "\u123b": 334, - "\u1b80": 335, - "\u1e45": 336, - "\u1e47": 337, - "\u1e57": 338, - "\u1e59": 339, - "\u1e5b": 340, - "\u1e63": 341, - "\u1e6d": 342, - "\u1e91": 343, - "\u1ebc": 344, - "\u1ebd": 345, - "\u1ef9": 346, - "\u2016": 347, - "\u2017": 348, - "\u201a": 349, - "\u2020": 350, - "\u2021": 351, - "\u2022": 352, - "\u2026": 353, - "\u202b": 354, - "\u202c": 355, - "\u2032": 356, - "\u2033": 357, - "\u2044": 358, - "\u204e": 359, - "\u2071": 360, - "\u20ac": 361, - "\u20dd": 362, - "\u210e": 363, - "\u2113": 364, - "\u2116": 365, - "\u2122": 366, - "\u2126": 367, - "\u2190": 368, - "\u2191": 369, - "\u2192": 370, - "\u2193": 371, - "\u21b5": 372, - "\u21d0": 373, - "\u21d1": 374, - "\u21d2": 375, - "\u21d4": 376, - "\u21e1": 377, - "\u21e4": 378, - "\u21e5": 379, - "\u2200": 380, - "\u2202": 381, - "\u2203": 382, - "\u2205": 383, - "\u2206": 384, - "\u2207": 385, - "\u2208": 386, - "\u220f": 387, - "\u2211": 388, - "\u2212": 389, - "\u2215": 390, - "\u221a": 391, - "\u221d": 392, - "\u221e": 393, - "\u2227": 394, - "\u2228": 395, - "\u2229": 396, - "\u222b": 397, - "\u223c": 398, - "\u2243": 399, - "\u2248": 400, - "\u2260": 401, - "\u2261": 402, - "\u2264": 403, - "\u2265": 404, - "\u226b": 405, - "\u2295": 406, - "\u2296": 407, - "\u2297": 408, - "\u22a5": 409, - "\u22c5": 410, - "\u22c6": 411, - "\u230b": 412, - "\u2327": 413, - "\u2329": 414, - "\u232c": 415, - "\u239b": 416, - "\u239c": 417, - "\u239d": 418, - "\u239e": 419, - "\u239f": 420, - "\u23a0": 421, - "\u23a7": 422, - "\u23a8": 423, - "\u23a9": 424, - "\u2424": 425, - "\u24d2": 426, - "\u2500": 427, - "\u2514": 428, - "\u25a0": 429, - "\u25a1": 430, - "\u25b2": 431, - "\u25b3": 432, - "\u25cb": 433, - "\u262f": 434, - "\u2640": 435, - "\u2642": 436, - "\u2663": 437, - "\u2666": 438, - "\u270f": 439, - "\u2713": 440, - "\u2715": 441, - "\u271d": 442, - "\u27e8": 443, - "\u27e9": 444, - "\u27f6": 445, - "\u2e38": 446, - "\u3002": 447, - "\ue02c": 448, - "\ue03f": 449, - "\ue103": 450, - "\uf001": 451, - "\uf002": 452, - "\uf003": 453, - "\uf004": 454, - "\uf005": 455, - "\uf006": 456, - "\uf007": 457, - "\uf008": 458, - "\uf009": 459, - "\uf00a": 460, - "\uf00b": 461, - "\uf00c": 462, - "\uf00d": 463, - "\uf00e": 464, - "\uf00f": 465, - "\uf010": 466, - "\uf011": 467, - "\uf012": 468, - "\uf013": 469, - "\uf014": 470, - "\uf015": 471, - "\uf016": 472, - "\uf017": 473, - "\uf018": 474, - "\uf019": 475, - "\uf01a": 476, - "\uf01b": 477, - "\uf01c": 478, - "\uf01d": 479, - "\uf01e": 480, - "\uf022": 481, - "\uf028": 482, - "\uf029": 483, - "\uf02b": 484, - "\uf02d": 485, - "\uf03c": 486, - "\uf03d": 487, - "\uf03e": 488, - "\uf04b": 489, - "\uf04f": 490, - "\uf050": 491, - "\uf05b": 492, - "\uf05d": 493, - "\uf060": 494, - "\uf061": 495, - "\uf062": 496, - "\uf064": 497, - "\uf065": 498, - "\uf067": 499, - "\uf06c": 500, - "\uf070": 501, - "\uf071": 502, - "\uf073": 503, - "\uf077": 504, - "\uf07b": 505, - "\uf07d": 506, - "\uf0a0": 507, - "\uf0a2": 508, - "\uf0a3": 509, - "\uf0a5": 510, - "\uf0ae": 511, - "\uf0b0": 512, - "\uf0b3": 513, - "\uf0b4": 514, - "\uf0b5": 515, - "\uf0b6": 516, - "\uf0bb": 517, - "\uf0ce": 518, - "\uf0d7": 519, - "\uf0de": 520, - "\uf0e5": 521, - "\uf0e6": 522, - "\uf0e7": 523, - "\uf0e8": 524, - "\uf0e9": 525, - "\uf0ea": 526, - "\uf0eb": 527, - "\uf0ec": 528, - "\uf0ed": 529, - "\uf0ee": 530, - "\uf0ef": 531, - "\uf0f2": 532, - "\uf0f6": 533, - "\uf0f7": 534, - "\uf0f8": 535, - "\uf0f9": 536, - "\uf0fa": 537, - "\uf0fb": 538, - "\uf0ff": 539, - "\uf3a4": 540, - "\uf8eb": 541, - "\uf8ec": 542, - "\uf8ed": 543, - "\uf8ee": 544, - "\uf8ef": 545, - "\uf8f0": 546, - "\uf8f1": 547, - "\uf8f2": 548, - "\uf8f3": 549, - "\uf8f4": 550, - "\uf8f6": 551, - "\uf8f7": 552, - "\uf8f8": 553, - "\uf8f9": 554, - "\uf8fa": 555, - "\uf8fb": 556, - "\ufb58": 557, - "\ufb7c": 558, - "\ufb8b": 559, - "\ufb94": 560, - "\ufe86": 561, - "\ufe8e": 562, - "\ufe91": 563, - "\ufe92": 564, - "\ufe96": 565, - "\ufe97": 566, - "\ufe98": 567, - "\ufe9f": 568, - "\ufea0": 569, - "\ufea3": 570, - "\ufea4": 571, - "\ufea7": 572, - "\ufeaa": 573, - "\ufeac": 574, - "\ufeae": 575, - "\ufeb0": 576, - "\ufeb3": 577, - "\ufeb4": 578, - "\ufeb6": 579, - "\ufeb7": 580, - "\ufeb8": 581, - "\ufebb": 582, - "\ufebc": 583, - "\ufebf": 584, - "\ufec4": 585, - "\ufeca": 586, - "\ufecb": 587, - "\ufecc": 588, - "\ufed3": 589, - "\ufed4": 590, - "\ufed7": 591, - "\ufed8": 592, - "\ufedb": 593, - "\ufedc": 594, - "\ufedf": 595, - "\ufee0": 596, - "\ufee2": 597, - "\ufee3": 598, - "\ufee4": 599, - "\ufee6": 600, - "\ufee7": 601, - "\ufee8": 602, - "\ufeea": 603, - "\ufeeb": 604, - "\ufeee": 605, - "\ufef2": 606, - "\ufef3": 607, - "\ufef4": 608, - "\ufefb": 609, - "\ufefc": 610, - "\uff1b": 611, - "\ufffd": 612, - "\ud835\udc2e": 613, - "\ud835\udc34": 614, - "\ud835\udc35": 615, - "\ud835\udc36": 616, - "\ud835\udc37": 617, - "\ud835\udc38": 618, - "\ud835\udc3a": 619, - "\ud835\udc3b": 620, - "\ud835\udc3c": 621, - "\ud835\udc40": 622, - "\ud835\udc41": 623, - "\ud835\udc43": 624, - "\ud835\udc46": 625, - "\ud835\udc47": 626, - "\ud835\udc48": 627, - "\ud835\udc49": 628, - "\ud835\udc4e": 629, - "\ud835\udc4f": 630, - "\ud835\udc50": 631, - "\ud835\udc52": 632, - "\ud835\udc53": 633, - "\ud835\udc54": 634, - "\ud835\udc56": 635, - "\ud835\udc57": 636, - "\ud835\udc58": 637, - "\ud835\udc59": 638, - "\ud835\udc5a": 639, - "\ud835\udc5b": 640, - "\ud835\udc5c": 641, - "\ud835\udc5f": 642, - "\ud835\udc60": 643, - "\ud835\udc61": 644, - "\ud835\udc62": 645, - "\ud835\udc64": 646, - "\ud835\udc65": 647, - "\ud835\udc66": 648, - "\ud835\udc67": 649, - "\ud835\udc69": 650, - "\ud835\udc6b": 651, - "\ud835\udc6d": 652, - "\ud835\udc78": 653, - "\ud835\udc83": 654, - "\ud835\udc84": 655, - "\ud835\udc86": 656, - "\ud835\udc89": 657, - "\ud835\udc8a": 658, - "\ud835\udc8e": 659, - "\ud835\udc8f": 660, - "\ud835\udc90": 661, - "\ud835\udc93": 662, - "\ud835\udc94": 663, - "\ud835\udc95": 664, - "\ud835\udc98": 665, - "\ud835\udd38": 666, - "\ud835\udeb7": 667, - "\ud835\udeec": 668, - "\ud835\udef4": 669, - "\ud835\udefc": 670, - "\ud835\udefd": 671, - "\ud835\udeff": 672, - "\ud835\udf03": 673, - "\ud835\udf08": 674, - "\ud835\udf0f": 675, - "\ud835\udf11": 676, - "\ud835\udf14": 677, - "\ud835\udf19": 678, - "\ud835\udf40": 679, - "\ud835\udf4e": 680, - "\ud835\udfce": 681, - "\ud835\udfcf": 682, - "\ud835\udfd0": 683, - "\ud835\udfd5": 684 - }, - "vocab_tag": { - "<PAD>": 0, - "B-<acknowledgement>": 1, - "B-<annex>": 2, - "B-<availability>": 3, - "B-<body>": 4, - "B-<cover>": 5, - "B-<footnote>": 6, - "B-<funding>": 7, - "B-<header>": 8, - "B-<headnote>": 9, - "B-<page>": 10, - "B-<references>": 11, - "B-<toc>": 12, - "I-<acknowledgement>": 13, - "I-<annex>": 14, - "I-<availability>": 15, - "I-<body>": 16, - "I-<cover>": 17, - "I-<footnote>": 18, - "I-<funding>": 19, - "I-<header>": 20, - "I-<headnote>": 21, - "I-<page>": 22, - "I-<references>": 23, - "I-<toc>": 24, - "O": 25 - }, - "vocab_case": [ - "<PAD>", - "numeric", - "allLower", - "allUpper", - "initialUpper", - "other", - "mainly_numeric", - "contains_digit" - ], - "max_char_length": 30, - "feature_preprocessor": { - "features_vocabulary_size": 12, - "features_indices": [ - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20, - 21, - 22, - 23, - 26, - 27, - 28, - 29, - 30, - 31 - ], - "features_map_to_index": { - "6": { - "BLOCKEND": 1, - "BLOCKIN": 2, - "BLOCKSTART": 3 - }, - "7": { - "PAGEEND": 13, - "PAGEIN": 14, - "PAGESTART": 15 - }, - "8": { - "NEWFONT": 25, - "SAMEFONT": 26 - }, - "9": { - "HIGHERFONT": 37, - "LOWERFONT": 38, - "SAMEFONTSIZE": 39 - }, - "10": { - "0": 49, - "1": 50 - }, - "11": { - "0": 61, - "1": 62 - }, - "12": { - "ALLCAP": 73, - "INITCAP": 74, - "NOCAPS": 75 - }, - "13": { - "ALLDIGIT": 85, - "CONTAINSDIGITS": 86, - "NODIGIT": 87 - }, - "14": { - "0": 97, - "1": 98 - }, - "15": { - "0": 109, - "1": 110 - }, - "16": { - "0": 121, - "1": 122 - }, - "17": { - "0": 133 - }, - "18": { - "0": 145, - "1": 146 - }, - "19": { - "0": 157, - "1": 158 - }, - "20": { - "0": 169, - "1": 170 - }, - "21": { - "0": 181, - "1": 182 - }, - "22": { - "0": 193, - "1": 194, - "10": 195, - "11": 196, - "2": 197, - "3": 198, - "4": 199, - "5": 200, - "6": 201, - "7": 202, - "8": 203, - "9": 204 - }, - "23": { - "0": 205, - "1": 206, - "10": 207, - "11": 208, - "2": 209, - "3": 210, - "4": 211, - "5": 212, - "6": 213, - "7": 214, - "8": 215, - "9": 216 - }, - "26": { - "0": 217, - "1": 218, - "10": 219, - "2": 220, - "3": 221, - "4": 222, - "5": 223, - "6": 224, - "7": 225, - "8": 226, - "9": 227 - }, - "27": { - "0": 229, - "1": 230 - }, - "28": { - "0": 241, - "1": 242 - }, - "29": { - "0": 253, - "1": 254 - }, - "30": { - "0": 265, - "1": 266 - }, - "31": { - "0": 277, - "1": 278 - } - } - }, - "indice_tag": { - "0": "<PAD>", - "1": "B-<acknowledgement>", - "2": "B-<annex>", - "3": "B-<availability>", - "4": "B-<body>", - "5": "B-<cover>", - "6": "B-<footnote>", - "7": "B-<funding>", - "8": "B-<header>", - "9": "B-<headnote>", - "10": "B-<page>", - "11": "B-<references>", - "12": "B-<toc>", - "13": "I-<acknowledgement>", - "14": "I-<annex>", - "15": "I-<availability>", - "16": "I-<body>", - "17": "I-<cover>", - "18": "I-<footnote>", - "19": "I-<funding>", - "20": "I-<header>", - "21": "I-<headnote>", - "22": "I-<page>", - "23": "I-<references>", - "24": "I-<toc>", - "25": "O" - } -} \ No newline at end of file diff --git a/grobid-home/models/table-BidLSTM_CRF/config.json b/grobid-home/models/table-BidLSTM_CRF/config.json deleted file mode 100644 index 5f3db31ac3..0000000000 --- a/grobid-home/models/table-BidLSTM_CRF/config.json +++ /dev/null @@ -1,26 +0,0 @@ -{ - "model_name": "table-BidLSTM_CRF", - "architecture": "BidLSTM_CRF", - "embeddings_name": "glove-840B", - "char_vocab_size": 104, - "case_vocab_size": 8, - "char_embedding_size": 25, - "num_char_lstm_units": 25, - "max_char_length": 30, - "features_vocabulary_size": 12, - "features_indices": null, - "features_embedding_size": 4, - "features_lstm_units": 4, - "max_sequence_length": 3000, - "word_embedding_size": 300, - "num_word_lstm_units": 100, - "case_embedding_size": 5, - "dropout": 0.5, - "recurrent_dropout": 0.5, - "use_crf": true, - "use_chain_crf": false, - "fold_number": 1, - "batch_size": 20, - "transformer_name": null, - "use_ELMo": false -} \ No newline at end of file diff --git a/grobid-home/models/table-BidLSTM_CRF/model_weights.hdf5 b/grobid-home/models/table-BidLSTM_CRF/model_weights.hdf5 deleted file mode 100644 index 4b926694b0..0000000000 Binary files a/grobid-home/models/table-BidLSTM_CRF/model_weights.hdf5 and /dev/null differ diff --git a/grobid-home/models/table-BidLSTM_CRF/preprocessor.json b/grobid-home/models/table-BidLSTM_CRF/preprocessor.json deleted file mode 100644 index cd140757bf..0000000000 --- a/grobid-home/models/table-BidLSTM_CRF/preprocessor.json +++ /dev/null @@ -1,153 +0,0 @@ -{ - "padding": true, - "return_lengths": true, - "return_word_embeddings": true, - "return_casing": false, - "return_features": false, - "return_chars": true, - "return_bert_embeddings": false, - "vocab_char": { - "<PAD>": 0, - "<UNK>": 1, - "\"": 2, - "#": 3, - "%": 4, - "&": 5, - "'": 6, - "(": 7, - ")": 8, - "*": 9, - "+": 10, - ",": 11, - "-": 12, - ".": 13, - "/": 14, - "0": 15, - "1": 16, - "2": 17, - "3": 18, - "4": 19, - "5": 20, - "6": 21, - "7": 22, - "8": 23, - "9": 24, - ":": 25, - ";": 26, - "<": 27, - "=": 28, - "?": 29, - "A": 30, - "B": 31, - "C": 32, - "D": 33, - "E": 34, - "F": 35, - "G": 36, - "H": 37, - "I": 38, - "J": 39, - "K": 40, - "L": 41, - "M": 42, - "N": 43, - "O": 44, - "P": 45, - "Q": 46, - "R": 47, - "S": 48, - "T": 49, - "U": 50, - "V": 51, - "W": 52, - "X": 53, - "Y": 54, - "Z": 55, - "[": 56, - "]": 57, - "a": 58, - "b": 59, - "c": 60, - "d": 61, - "e": 62, - "f": 63, - "g": 64, - "h": 65, - "i": 66, - "j": 67, - "k": 68, - "l": 69, - "m": 70, - "n": 71, - "o": 72, - "p": 73, - "q": 74, - "r": 75, - "s": 76, - "t": 77, - "u": 78, - "v": 79, - "w": 80, - "x": 81, - "y": 82, - "z": 83, - "|": 84, - "\u00a7": 85, - "\u00b1": 86, - "\u00b7": 87, - "\u00bc": 88, - "\u00c0": 89, - "\u00d7": 90, - "\u00e1": 91, - "\u00ea": 92, - "\u00ee": 93, - "\u00f4": 94, - "\u03b5": 95, - "\u03bb": 96, - "\u03c7": 97, - "\u2020": 98, - "\u2030": 99, - "\u20ac": 100, - "\u21b5": 101, - "\u2212": 102, - "\uf0ae": 103 - }, - "vocab_tag": { - "<PAD>": 0, - "B-<content>": 1, - "B-<figDesc>": 2, - "B-<figure_head>": 3, - "B-<label>": 4, - "B-<note>": 5, - "I-<content>": 6, - "I-<figDesc>": 7, - "I-<figure_head>": 8, - "I-<note>": 9, - "O": 10 - }, - "vocab_case": [ - "<PAD>", - "numeric", - "allLower", - "allUpper", - "initialUpper", - "other", - "mainly_numeric", - "contains_digit" - ], - "max_char_length": 30, - "feature_preprocessor": null, - "indice_tag": { - "0": "<PAD>", - "1": "B-<content>", - "2": "B-<figDesc>", - "3": "B-<figure_head>", - "4": "B-<label>", - "5": "B-<note>", - "6": "I-<content>", - "7": "I-<figDesc>", - "8": "I-<figure_head>", - "9": "I-<note>", - "10": "O" - } -} \ No newline at end of file diff --git a/grobid-home/scripts/preload_embeddings.py b/grobid-home/scripts/preload_embeddings.py index 3d90ad31da..87c07a40ae 100644 --- a/grobid-home/scripts/preload_embeddings.py +++ b/grobid-home/scripts/preload_embeddings.py @@ -11,7 +11,9 @@ remove the embedding file. Obviously it will add a few GB more to the docker image. Without pre-loading, the embedding file will be -downloaded and loaded in lmdb at each run of the docker container. +downloaded and loaded in lmdb at each run of the docker container. + +Embeddings are stored as raw float32 bytes (little-endian) for direct use by Java ONNX inference. ''' import os @@ -20,10 +22,19 @@ from delft.utilities.Utilities import download_file import lmdb import json +import struct -map_size = 100 * 1024 * 1024 * 1024 +map_size = 100 * 1024 * 1024 * 1024 def preload(embeddings_name, input_path=None, registry_path=None): + """ + Preload embeddings into LMDB database as raw float32 bytes. + + Args: + embeddings_name: Name of the embeddings (e.g., 'glove-840B') + input_path: Optional path to embeddings file + registry_path: Optional path to embedding registry JSON + """ resource_registry = None if registry_path != None: with open(registry_path, 'r') as f: @@ -33,7 +44,8 @@ def preload(embeddings_name, input_path=None, registry_path=None): description = embeddings.get_description(embeddings_name) if description is None: - print("Error: embedding name", embeddings_name, "is not registered in", path) + print("Error: embedding name", embeddings_name, "is not registered") + return if input_path is None: embeddings_path = None @@ -58,22 +70,117 @@ def preload(embeddings_name, input_path=None, registry_path=None): embeddings_path = input_path if embeddings_path == None: - print("Fail to retrive embedding file for", embeddings_name) + print("Fail to retrieve embedding file for", embeddings_name) + return + + # Load and store as raw float32 bytes for Java compatibility + load_embeddings_raw_format(embeddings, embeddings_name, embeddings_path) + embeddings.clean_downloads() + + +def load_embeddings_raw_format(embeddings, embeddings_name, embeddings_path): + """ + Load embeddings from file and store as raw float32 bytes in LMDB. + + This format is compatible with Java's WordEmbeddings class which expects + little-endian float32 arrays without pickle serialization. + """ + print(f"Loading embeddings from {embeddings_path} in raw float32 format...") embedding_file = open_embedding_file(embeddings_path) if embedding_file is None: print("Error: could not open embeddings file", embeddings_path) return - # create and load the database in write mode + # Create LMDB environment embedding_lmdb_path = embeddings.registry["embedding-lmdb-path"] if not os.path.isdir(embedding_lmdb_path): os.makedirs(embedding_lmdb_path) - envFilePath = os.path.join(embedding_lmdb_path, embeddings_name) - embeddings.env = lmdb.open(envFilePath, map_size=map_size) - embeddings.load_embeddings_from_file(embeddings_path) - embeddings.clean_downloads() + env_path = os.path.join(embedding_lmdb_path, embeddings_name) + env = lmdb.open(env_path, map_size=map_size) + + max_key_size = env.max_key_size() # Get the max key size for this LMDB instance + + count = 0 + skipped = 0 + batch_size = 10000 + batch = [] + + # Read header line for some formats (e.g., word2vec binary) + first_line = True + embedding_dim = None + + for line in embedding_file: + try: + if isinstance(line, bytes): + line = line.decode('utf-8', errors='ignore') + + line = line.rstrip() + if not line: + continue + + parts = line.split(' ') + if len(parts) < 10: # Skip header or malformed lines + if first_line: + first_line = False + continue + continue + + first_line = False + word = parts[0] + + # Parse vector values + try: + values = [float(x) for x in parts[1:] if x] + except ValueError: + continue + + if embedding_dim is None: + embedding_dim = len(values) + print(f"Detected embedding dimension: {embedding_dim}") + + if len(values) != embedding_dim: + continue + + # Check key size before storing (LMDB has a max key size, typically 511 bytes) + key = word.encode('utf-8') + if len(key) >= max_key_size: + skipped += 1 + continue + + # Convert to raw float32 bytes (little-endian) + raw_bytes = struct.pack(f'<{len(values)}f', *values) + + batch.append((key, raw_bytes)) + count += 1 + + if len(batch) >= batch_size: + with env.begin(write=True) as txn: + for k, v in batch: + txn.put(k, v) + batch = [] + if count % 100000 == 0: + print(f"Processed {count} embeddings...") + + except Exception as e: + print(f"Error processing line: {e}") + continue + + # Write remaining batch + if batch: + with env.begin(write=True) as txn: + for k, v in batch: + txn.put(k, v) + + embedding_file.close() + env.close() + + print(f"Loaded {count} embeddings with dimension {embedding_dim}") + if skipped > 0: + print(f"Skipped {skipped} entries with keys exceeding max key size ({max_key_size} bytes)") + print(f"Stored in raw float32 format at: {env_path}") + if __name__ == "__main__": parser = argparse.ArgumentParser(description = "preload embeddings during the GROBID docker image build as embedded lmdb") @@ -86,7 +193,7 @@ def preload(embeddings_name, input_path=None, registry_path=None): ) parser.add_argument("--input", help="path to the embeddings file to be loaded located on the host machine (where the docker image is built)," " this is optional, without this parameter the embeddings file will be downloaded from the url indicated" - " in the embddings registry, embedding-registry.json") + " in the embeddings registry, embedding-registry.json") parser.add_argument("--registry", help="path to the embedding registry to be considered for setting the paths/urls to embeddings") args = parser.parse_args() diff --git a/grobid-home/scripts/preload_embeddings_standalone.py b/grobid-home/scripts/preload_embeddings_standalone.py new file mode 100644 index 0000000000..b1e5c70b40 --- /dev/null +++ b/grobid-home/scripts/preload_embeddings_standalone.py @@ -0,0 +1,282 @@ +''' +Standalone script to preload embeddings for GROBID ONNX Docker image. +This version has no DeLFT dependency - uses only standard library + lmdb + requests. + +Embeddings are stored as raw float32 bytes (little-endian) for direct use by Java ONNX inference. +''' + +import os +import argparse +import json +import struct +import gzip +import zipfile +import tempfile +import shutil + +try: + import lmdb +except ImportError: + print("Error: lmdb package is required. Install with: pip install lmdb") + exit(1) + +try: + import requests +except ImportError: + print("Error: requests package is required. Install with: pip install requests") + exit(1) + +map_size = 100 * 1024 * 1024 * 1024 # 100GB max size + + +def download_file(url, download_path): + """Download a file from URL to the specified path.""" + local_filename = os.path.join(download_path, url.split('/')[-1]) + + print(f"Downloading {url}...") + with requests.get(url, stream=True) as r: + r.raise_for_status() + total_size = int(r.headers.get('content-length', 0)) + downloaded = 0 + with open(local_filename, 'wb') as f: + for chunk in r.iter_content(chunk_size=8192): + f.write(chunk) + downloaded += len(chunk) + if total_size > 0: + pct = (downloaded / total_size) * 100 + print(f"\rDownloaded {downloaded / (1024*1024):.1f}MB / {total_size / (1024*1024):.1f}MB ({pct:.1f}%)", end='', flush=True) + print() + return local_filename + + +def extract_file(filepath): + """Extract a compressed file and return the path to the extracted content.""" + extract_dir = tempfile.mkdtemp() + + if filepath.endswith('.zip'): + print(f"Extracting {filepath}...") + with zipfile.ZipFile(filepath, 'r') as zf: + zf.extractall(extract_dir) + # Find the .txt or .vec file in the extracted contents + for root, dirs, files in os.walk(extract_dir): + for f in files: + if f.endswith(('.txt', '.vec')): + return os.path.join(root, f), extract_dir + elif filepath.endswith('.gz') and not filepath.endswith('.tar.gz'): + print(f"Extracting {filepath}...") + output_path = os.path.join(extract_dir, os.path.basename(filepath)[:-3]) + with gzip.open(filepath, 'rb') as f_in: + with open(output_path, 'wb') as f_out: + shutil.copyfileobj(f_in, f_out) + return output_path, extract_dir + + return filepath, None + + +def open_embedding_file(filepath): + """Open an embedding file, handling gzip compression.""" + if filepath.endswith('.gz'): + return gzip.open(filepath, 'rt', encoding='utf-8', errors='ignore') + else: + return open(filepath, 'r', encoding='utf-8', errors='ignore') + + +def preload(embeddings_name, input_path=None, registry_path=None): + """ + Preload embeddings into LMDB database as raw float32 bytes. + + Args: + embeddings_name: Name of the embeddings (e.g., 'glove-840B') + input_path: Optional path to embeddings file + registry_path: Optional path to embedding registry JSON + """ + # Load registry + registry = None + if registry_path and os.path.exists(registry_path): + with open(registry_path, 'r') as f: + registry = json.load(f) + + if registry is None: + print("Error: registry file is required") + return + + # Find embedding description + description = None + for emb in registry.get('embeddings', []): + if emb.get('name') == embeddings_name: + description = emb + break + + if description is None: + print(f"Error: embedding name '{embeddings_name}' is not registered") + return + + embeddings_path = input_path + temp_dir = None + downloaded_file = None + + if embeddings_path is None: + # Download if url is available + url = description.get('url', '') + if url: + download_path = registry.get('embedding-download-path', 'data/download') + if not os.path.isdir(download_path): + os.makedirs(download_path) + + print(f"Downloading resource file for {embeddings_name}...") + downloaded_file = download_file(url, download_path) + + if downloaded_file and os.path.isfile(downloaded_file): + print(f"Download successful: {downloaded_file}") + # Extract if compressed + embeddings_path, temp_dir = extract_file(downloaded_file) + else: + print(f"Failed to download embedding file") + return + else: + print(f"Embeddings resource URL is not specified for: {embeddings_name}") + return + + if embeddings_path is None or not os.path.exists(embeddings_path): + print(f"Fail to retrieve embedding file for {embeddings_name}") + return + + # Load and store as raw float32 bytes for Java compatibility + load_embeddings_raw_format(registry, embeddings_name, embeddings_path) + + # Cleanup + if temp_dir and os.path.exists(temp_dir): + shutil.rmtree(temp_dir) + if downloaded_file and os.path.exists(downloaded_file): + os.remove(downloaded_file) + + +def load_embeddings_raw_format(registry, embeddings_name, embeddings_path): + """ + Load embeddings from file and store as raw float32 bytes in LMDB. + + This format is compatible with Java's WordEmbeddings class which expects + little-endian float32 arrays without pickle serialization. + """ + print(f"Loading embeddings from {embeddings_path} in raw float32 format...") + + embedding_file = open_embedding_file(embeddings_path) + if embedding_file is None: + print("Error: could not open embeddings file", embeddings_path) + return + + # Create LMDB environment + embedding_lmdb_path = registry.get("embedding-lmdb-path", "data/db") + if not os.path.isdir(embedding_lmdb_path): + os.makedirs(embedding_lmdb_path) + + env_path = os.path.join(embedding_lmdb_path, embeddings_name) + env = lmdb.open(env_path, map_size=map_size) + + max_key_size = env.max_key_size() # Get the max key size for this LMDB instance + + count = 0 + skipped = 0 + batch_size = 10000 + batch = [] + + # Read header line for some formats (e.g., word2vec binary) + first_line = True + embedding_dim = None + + for line in embedding_file: + try: + if isinstance(line, bytes): + line = line.decode('utf-8', errors='ignore') + + line = line.rstrip() + if not line: + continue + + parts = line.split(' ') + if len(parts) < 10: # Skip header or malformed lines + if first_line: + first_line = False + continue + continue + + first_line = False + word = parts[0] + + # Parse vector values + try: + values = [float(x) for x in parts[1:] if x] + except ValueError: + continue + + if embedding_dim is None: + embedding_dim = len(values) + print(f"Detected embedding dimension: {embedding_dim}") + + if len(values) != embedding_dim: + continue + + # Check key size before storing (LMDB has a max key size, typically 511 bytes) + key = word.encode('utf-8') + if len(key) >= max_key_size: + skipped += 1 + continue + + # Convert to raw float32 bytes (little-endian) + raw_bytes = struct.pack(f'<{len(values)}f', *values) + + batch.append((key, raw_bytes)) + count += 1 + + if len(batch) >= batch_size: + with env.begin(write=True) as txn: + for k, v in batch: + txn.put(k, v) + batch = [] + if count % 100000 == 0: + print(f"Processed {count} embeddings...") + + except Exception as e: + print(f"Error processing line: {e}") + continue + + # Write remaining batch + if batch: + with env.begin(write=True) as txn: + for k, v in batch: + txn.put(k, v) + + embedding_file.close() + env.close() + + print(f"Loaded {count} embeddings with dimension {embedding_dim}") + if skipped > 0: + print(f"Skipped {skipped} entries with keys exceeding max key size ({max_key_size} bytes)") + print(f"Stored in raw float32 format at: {env_path}") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Standalone preload embeddings for GROBID ONNX docker image (no DeLFT dependency)" + ) + parser.add_argument("--embedding", default='glove-840B', + help=( + "the desired pre-trained word embeddings using their descriptions in the file" + " resources-registry.json," + " be sure to use here the same name as in the registry (e.g. 'glove-840B', 'fasttext-crawl', 'word2vec')" + ) + ) + parser.add_argument("--input", + help="path to the embeddings file to be loaded located on the host machine (where the docker image is built)," + " this is optional, without this parameter the embeddings file will be downloaded from the url indicated" + " in the embeddings registry") + parser.add_argument("--registry", required=True, + help="path to the embedding registry JSON file (resources-registry.json)") + + args = parser.parse_args() + + embeddings_name = args.embedding + input_path = args.input + registry_path = args.registry + + preload(embeddings_name, input_path, registry_path) diff --git a/grobid-service/src/test/java/org/grobid/service/tests/GrobidRestServiceTest.java b/grobid-service/src/test/java/org/grobid/service/tests/GrobidRestServiceTest.java index 4ea1eef310..ac4b2270c5 100755 --- a/grobid-service/src/test/java/org/grobid/service/tests/GrobidRestServiceTest.java +++ b/grobid-service/src/test/java/org/grobid/service/tests/GrobidRestServiceTest.java @@ -25,6 +25,7 @@ import org.glassfish.jersey.media.multipart.MultiPartFeature; import org.glassfish.jersey.media.multipart.file.FileDataBodyPart; import org.grobid.core.utilities.GrobidProperties; +import org.grobid.core.utilities.TestEngineUtils; import org.grobid.service.GrobidPaths; import org.grobid.service.GrobidRestService; import org.grobid.service.GrobidServiceConfiguration; @@ -56,6 +57,7 @@ public class GrobidRestServiceTest { @BeforeClass public static void setInitialContext() throws Exception { + TestEngineUtils.initGrobidForceWapiti(); } @AfterClass