From 33f695c0374f429d4e64a2f0c94ec56e0b0c85c2 Mon Sep 17 00:00:00 2001 From: Daniel Ecer Date: Fri, 26 Jul 2019 16:03:03 +0100 Subject: [PATCH 01/14] added grobid.delft.train.args --- .../org/grobid/core/utilities/GrobidProperties.java | 4 ++++ .../grobid/core/utilities/GrobidPropertyKeys.java | 1 + .../grobid/core/utilities/GrobidPropertiesTest.java | 12 ++++++++++++ 3 files changed, 17 insertions(+) diff --git a/grobid-core/src/main/java/org/grobid/core/utilities/GrobidProperties.java b/grobid-core/src/main/java/org/grobid/core/utilities/GrobidProperties.java index 071c1e1391..bd4965ff95 100755 --- a/grobid-core/src/main/java/org/grobid/core/utilities/GrobidProperties.java +++ b/grobid-core/src/main/java/org/grobid/core/utilities/GrobidProperties.java @@ -446,6 +446,10 @@ public static boolean isDeLFTRedirectOutput() { ); } + public static String getDeLFTTrainArgs() { + return getPropertyValue(GrobidPropertyKeys.PROP_GROBID_DELFT_TRAIN_ARGS, ""); + } + public static String getGluttonHost() { return getPropertyValue(GrobidPropertyKeys.PROP_GLUTTON_HOST); } diff --git a/grobid-core/src/main/java/org/grobid/core/utilities/GrobidPropertyKeys.java b/grobid-core/src/main/java/org/grobid/core/utilities/GrobidPropertyKeys.java index bc145fbd1e..7321aa3e6a 100755 --- a/grobid-core/src/main/java/org/grobid/core/utilities/GrobidPropertyKeys.java +++ b/grobid-core/src/main/java/org/grobid/core/utilities/GrobidPropertyKeys.java @@ -20,6 +20,7 @@ public interface GrobidPropertyKeys { String PROP_GROBID_DELFT_PATH = "grobid.delft.install"; String PROP_GROBID_DELFT_REDIRECT_OUTPUT = "grobid.delft.redirect_output"; String PROP_GROBID_DELFT_ELMO = "grobid.delft.useELMo"; + String PROP_GROBID_DELFT_TRAIN_ARGS = "grobid.delft.train.args"; String PROP_USE_LANG_ID = "grobid.use_language_id"; String PROP_LANG_DETECTOR_FACTORY = "grobid.language_detector_factory"; diff --git a/grobid-core/src/test/java/org/grobid/core/utilities/GrobidPropertiesTest.java b/grobid-core/src/test/java/org/grobid/core/utilities/GrobidPropertiesTest.java index 0f1e5c1f0f..8466f5dc43 100755 --- a/grobid-core/src/test/java/org/grobid/core/utilities/GrobidPropertiesTest.java +++ b/grobid-core/src/test/java/org/grobid/core/utilities/GrobidPropertiesTest.java @@ -76,6 +76,18 @@ public void testIsDeLFTRedirectOutputTrueIfSet() throws IOException { assertTrue(GrobidProperties.isDeLFTRedirectOutput()); } + @Test + public void testShouldReturnEmptyTrainArgsByDefault() { + GrobidProperties.getProps().remove(GrobidPropertyKeys.PROP_GROBID_DELFT_TRAIN_ARGS); + assertEquals(GrobidProperties.getDeLFTTrainArgs(), ""); + } + + @Test + public void testShouldReturnConfiguredTrainArgs() { + GrobidProperties.getProps().put(GrobidPropertyKeys.PROP_GROBID_DELFT_TRAIN_ARGS, "args"); + assertEquals(GrobidProperties.getDeLFTTrainArgs(), "args"); + } + /*@Test(expected = GrobidPropertyException.class) public void testCheckPropertiesException_shouldThrowException() { GrobidProperties.getProps().put( From 139d5648e6e423e7911acdafc7503893de2472c7 Mon Sep 17 00:00:00 2001 From: Daniel Ecer Date: Fri, 26 Jul 2019 16:31:52 +0100 Subject: [PATCH 02/14] extracted getTrainCommand --- .../java/org/grobid/core/jni/DeLFTModel.java | 27 ++++++++++++------- .../org/grobid/core/jni/DeLFTModelTest.java | 27 +++++++++++++++++++ 2 files changed, 44 insertions(+), 10 deletions(-) create mode 100644 grobid-core/src/test/java/org/grobid/core/jni/DeLFTModelTest.java diff --git a/grobid-core/src/main/java/org/grobid/core/jni/DeLFTModel.java b/grobid-core/src/main/java/org/grobid/core/jni/DeLFTModel.java index 78e0e313d1..fb130a82b9 100644 --- a/grobid-core/src/main/java/org/grobid/core/jni/DeLFTModel.java +++ b/grobid-core/src/main/java/org/grobid/core/jni/DeLFTModel.java @@ -234,7 +234,22 @@ public void run() { LOGGER.error("DeLFT model training via JEP failed", e); } } - } + } + + protected static List getTrainCommand( + String modelName, File trainingData, File outputModel + ) { + List command = Arrays.asList("python3", + "grobidTagger.py", + modelName, + "train", + "--input", trainingData.getAbsolutePath(), + "--output", GrobidProperties.getInstance().getModelPath().getAbsolutePath()); + if (GrobidProperties.getInstance().useELMo()) { + command.add("--use-ELMo"); + } + return command; + } /** * Train with an external process rather than with JNI, this approach appears to be more stable for the @@ -243,15 +258,7 @@ public void run() { public static void train(String modelName, File trainingData, File outputModel) { try { LOGGER.info("Train DeLFT model " + modelName + "..."); - List command = Arrays.asList("python3", - "grobidTagger.py", - modelName, - "train", - "--input", trainingData.getAbsolutePath(), - "--output", GrobidProperties.getInstance().getModelPath().getAbsolutePath()); - if (GrobidProperties.getInstance().useELMo()) { - command.add("--use-ELMo"); - } + List command = getTrainCommand(modelName, trainingData, outputModel); ProcessBuilder pb = new ProcessBuilder(command); File delftPath = new File(GrobidProperties.getInstance().getDeLFTFilePath()); diff --git a/grobid-core/src/test/java/org/grobid/core/jni/DeLFTModelTest.java b/grobid-core/src/test/java/org/grobid/core/jni/DeLFTModelTest.java new file mode 100644 index 0000000000..d68aa90679 --- /dev/null +++ b/grobid-core/src/test/java/org/grobid/core/jni/DeLFTModelTest.java @@ -0,0 +1,27 @@ +package org.grobid.core.jni; + +import java.io.File; + +import org.junit.Test; + +import static org.hamcrest.Matchers.contains; +import static org.junit.Assert.assertThat; + +import org.grobid.core.utilities.GrobidProperties; + + +public class DeLFTModelTest { + @Test + public void testShouldBuildTrainCommand() { + File trainingData = new File("test/train.data"); + File outputModel = new File("test/output"); + assertThat( + DeLFTModel.getTrainCommand("model1", trainingData, outputModel), + contains( + "python3", "grobidTagger.py", "model1", "train", + "--input", trainingData.getAbsolutePath(), + "--output", GrobidProperties.getModelPath().getAbsolutePath() + ) + ); + } +} From 129a66ea72a4a1f06ddc7740efda2b91de81251f Mon Sep 17 00:00:00 2001 From: Daniel Ecer Date: Fri, 26 Jul 2019 16:34:08 +0100 Subject: [PATCH 03/14] log command --- grobid-core/src/main/java/org/grobid/core/jni/DeLFTModel.java | 1 + 1 file changed, 1 insertion(+) diff --git a/grobid-core/src/main/java/org/grobid/core/jni/DeLFTModel.java b/grobid-core/src/main/java/org/grobid/core/jni/DeLFTModel.java index fb130a82b9..7866ff3c35 100644 --- a/grobid-core/src/main/java/org/grobid/core/jni/DeLFTModel.java +++ b/grobid-core/src/main/java/org/grobid/core/jni/DeLFTModel.java @@ -259,6 +259,7 @@ public static void train(String modelName, File trainingData, File outputModel) try { LOGGER.info("Train DeLFT model " + modelName + "..."); List command = getTrainCommand(modelName, trainingData, outputModel); + LOGGER.info("Running: {}", command); ProcessBuilder pb = new ProcessBuilder(command); File delftPath = new File(GrobidProperties.getInstance().getDeLFTFilePath()); From 62fbc54d5874186c09a80dfd014a85307c5c0085 Mon Sep 17 00:00:00 2001 From: Daniel Ecer Date: Fri, 26 Jul 2019 16:37:55 +0100 Subject: [PATCH 04/14] removed outputModel argument --- .../src/main/java/org/grobid/core/jni/DeLFTModel.java | 6 ++---- .../src/test/java/org/grobid/core/jni/DeLFTModelTest.java | 3 +-- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/grobid-core/src/main/java/org/grobid/core/jni/DeLFTModel.java b/grobid-core/src/main/java/org/grobid/core/jni/DeLFTModel.java index 7866ff3c35..3773ecf899 100644 --- a/grobid-core/src/main/java/org/grobid/core/jni/DeLFTModel.java +++ b/grobid-core/src/main/java/org/grobid/core/jni/DeLFTModel.java @@ -236,9 +236,7 @@ public void run() { } } - protected static List getTrainCommand( - String modelName, File trainingData, File outputModel - ) { + protected static List getTrainCommand(String modelName, File trainingData) { List command = Arrays.asList("python3", "grobidTagger.py", modelName, @@ -258,7 +256,7 @@ protected static List getTrainCommand( public static void train(String modelName, File trainingData, File outputModel) { try { LOGGER.info("Train DeLFT model " + modelName + "..."); - List command = getTrainCommand(modelName, trainingData, outputModel); + List command = getTrainCommand(modelName, trainingData); LOGGER.info("Running: {}", command); ProcessBuilder pb = new ProcessBuilder(command); diff --git a/grobid-core/src/test/java/org/grobid/core/jni/DeLFTModelTest.java b/grobid-core/src/test/java/org/grobid/core/jni/DeLFTModelTest.java index d68aa90679..15eb169534 100644 --- a/grobid-core/src/test/java/org/grobid/core/jni/DeLFTModelTest.java +++ b/grobid-core/src/test/java/org/grobid/core/jni/DeLFTModelTest.java @@ -14,9 +14,8 @@ public class DeLFTModelTest { @Test public void testShouldBuildTrainCommand() { File trainingData = new File("test/train.data"); - File outputModel = new File("test/output"); assertThat( - DeLFTModel.getTrainCommand("model1", trainingData, outputModel), + DeLFTModel.getTrainCommand("model1", trainingData), contains( "python3", "grobidTagger.py", "model1", "train", "--input", trainingData.getAbsolutePath(), From 729381d45c0355a3ab13e9f6ee896903ef116aec Mon Sep 17 00:00:00 2001 From: Daniel Ecer Date: Fri, 26 Jul 2019 16:39:03 +0100 Subject: [PATCH 05/14] fixed GrobidProperties getInstance vs static --- grobid-core/src/main/java/org/grobid/core/jni/DeLFTModel.java | 4 ++-- .../src/test/java/org/grobid/core/jni/DeLFTModelTest.java | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/grobid-core/src/main/java/org/grobid/core/jni/DeLFTModel.java b/grobid-core/src/main/java/org/grobid/core/jni/DeLFTModel.java index 3773ecf899..e6f901029d 100644 --- a/grobid-core/src/main/java/org/grobid/core/jni/DeLFTModel.java +++ b/grobid-core/src/main/java/org/grobid/core/jni/DeLFTModel.java @@ -242,8 +242,8 @@ protected static List getTrainCommand(String modelName, File trainingDat modelName, "train", "--input", trainingData.getAbsolutePath(), - "--output", GrobidProperties.getInstance().getModelPath().getAbsolutePath()); - if (GrobidProperties.getInstance().useELMo()) { + "--output", GrobidProperties.getModelPath().getAbsolutePath()); + if (GrobidProperties.useELMo()) { command.add("--use-ELMo"); } return command; diff --git a/grobid-core/src/test/java/org/grobid/core/jni/DeLFTModelTest.java b/grobid-core/src/test/java/org/grobid/core/jni/DeLFTModelTest.java index 15eb169534..2b52d72c51 100644 --- a/grobid-core/src/test/java/org/grobid/core/jni/DeLFTModelTest.java +++ b/grobid-core/src/test/java/org/grobid/core/jni/DeLFTModelTest.java @@ -13,6 +13,7 @@ public class DeLFTModelTest { @Test public void testShouldBuildTrainCommand() { + GrobidProperties.getInstance(); File trainingData = new File("test/train.data"); assertThat( DeLFTModel.getTrainCommand("model1", trainingData), From 1d082ff9361dd736ed90db17721e5247b6f851eb Mon Sep 17 00:00:00 2001 From: Daniel Ecer Date: Fri, 26 Jul 2019 16:47:31 +0100 Subject: [PATCH 06/14] added test for useELMO flag and fixed implementation --- .../java/org/grobid/core/jni/DeLFTModel.java | 5 +++-- .../org/grobid/core/jni/DeLFTModelTest.java | 17 +++++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/grobid-core/src/main/java/org/grobid/core/jni/DeLFTModel.java b/grobid-core/src/main/java/org/grobid/core/jni/DeLFTModel.java index e6f901029d..b2ad3dc208 100644 --- a/grobid-core/src/main/java/org/grobid/core/jni/DeLFTModel.java +++ b/grobid-core/src/main/java/org/grobid/core/jni/DeLFTModel.java @@ -237,12 +237,13 @@ public void run() { } protected static List getTrainCommand(String modelName, File trainingData) { - List command = Arrays.asList("python3", + List command = new ArrayList<>(Arrays.asList("python3", "grobidTagger.py", modelName, "train", "--input", trainingData.getAbsolutePath(), - "--output", GrobidProperties.getModelPath().getAbsolutePath()); + "--output", GrobidProperties.getModelPath().getAbsolutePath() + )); if (GrobidProperties.useELMo()) { command.add("--use-ELMo"); } diff --git a/grobid-core/src/test/java/org/grobid/core/jni/DeLFTModelTest.java b/grobid-core/src/test/java/org/grobid/core/jni/DeLFTModelTest.java index 2b52d72c51..35ca49c80d 100644 --- a/grobid-core/src/test/java/org/grobid/core/jni/DeLFTModelTest.java +++ b/grobid-core/src/test/java/org/grobid/core/jni/DeLFTModelTest.java @@ -8,6 +8,7 @@ import static org.junit.Assert.assertThat; import org.grobid.core.utilities.GrobidProperties; +import org.grobid.core.utilities.GrobidPropertyKeys; public class DeLFTModelTest { @@ -24,4 +25,20 @@ public void testShouldBuildTrainCommand() { ) ); } + + @Test + public void testShouldAddUseELMO() { + GrobidProperties.getInstance(); + GrobidProperties.getProps().put(GrobidPropertyKeys.PROP_GROBID_DELFT_ELMO, "true"); + File trainingData = new File("test/train.data"); + assertThat( + DeLFTModel.getTrainCommand("model1", trainingData), + contains( + "python3", "grobidTagger.py", "model1", "train", + "--input", trainingData.getAbsolutePath(), + "--output", GrobidProperties.getModelPath().getAbsolutePath(), + "--use-ELMo" + ) + ); + } } From a2342b6b0a425d1a0a83d4d5616111a1defd8fac Mon Sep 17 00:00:00 2001 From: Daniel Ecer Date: Fri, 26 Jul 2019 16:56:25 +0100 Subject: [PATCH 07/14] implemented simple arg --- .../java/org/grobid/core/jni/DeLFTModel.java | 5 ++++ .../org/grobid/core/jni/DeLFTModelTest.java | 25 +++++++++++++++++-- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/grobid-core/src/main/java/org/grobid/core/jni/DeLFTModel.java b/grobid-core/src/main/java/org/grobid/core/jni/DeLFTModel.java index b2ad3dc208..4ca3a7b8bb 100644 --- a/grobid-core/src/main/java/org/grobid/core/jni/DeLFTModel.java +++ b/grobid-core/src/main/java/org/grobid/core/jni/DeLFTModel.java @@ -1,5 +1,7 @@ package org.grobid.core.jni; +import org.apache.commons.lang3.StringUtils; + import org.grobid.core.GrobidModel; import org.grobid.core.engines.label.TaggingLabels; import org.grobid.core.exceptions.GrobidException; @@ -247,6 +249,9 @@ protected static List getTrainCommand(String modelName, File trainingDat if (GrobidProperties.useELMo()) { command.add("--use-ELMo"); } + if (StringUtils.isNotEmpty(GrobidProperties.getDeLFTTrainArgs())) { + command.add(GrobidProperties.getDeLFTTrainArgs()); + } return command; } diff --git a/grobid-core/src/test/java/org/grobid/core/jni/DeLFTModelTest.java b/grobid-core/src/test/java/org/grobid/core/jni/DeLFTModelTest.java index 35ca49c80d..de64730683 100644 --- a/grobid-core/src/test/java/org/grobid/core/jni/DeLFTModelTest.java +++ b/grobid-core/src/test/java/org/grobid/core/jni/DeLFTModelTest.java @@ -2,6 +2,7 @@ import java.io.File; +import org.junit.Before; import org.junit.Test; import static org.hamcrest.Matchers.contains; @@ -12,9 +13,15 @@ public class DeLFTModelTest { + @Before + public void setUp() { + GrobidProperties.getInstance(); + GrobidProperties.getProps().put(GrobidPropertyKeys.PROP_GROBID_DELFT_ELMO, "false"); + GrobidProperties.getProps().remove(GrobidPropertyKeys.PROP_GROBID_DELFT_TRAIN_ARGS); + } + @Test public void testShouldBuildTrainCommand() { - GrobidProperties.getInstance(); File trainingData = new File("test/train.data"); assertThat( DeLFTModel.getTrainCommand("model1", trainingData), @@ -28,7 +35,6 @@ public void testShouldBuildTrainCommand() { @Test public void testShouldAddUseELMO() { - GrobidProperties.getInstance(); GrobidProperties.getProps().put(GrobidPropertyKeys.PROP_GROBID_DELFT_ELMO, "true"); File trainingData = new File("test/train.data"); assertThat( @@ -41,4 +47,19 @@ public void testShouldAddUseELMO() { ) ); } + + @Test + public void testShouldAddSingleCustomArg() { + GrobidProperties.getProps().put(GrobidPropertyKeys.PROP_GROBID_DELFT_TRAIN_ARGS, "arg1"); + File trainingData = new File("test/train.data"); + assertThat( + DeLFTModel.getTrainCommand("model1", trainingData), + contains( + "python3", "grobidTagger.py", "model1", "train", + "--input", trainingData.getAbsolutePath(), + "--output", GrobidProperties.getModelPath().getAbsolutePath(), + "arg1" + ) + ); + } } From 46c87ae8e47da2014f8ea68d4e224a0c30b9c8b4 Mon Sep 17 00:00:00 2001 From: Daniel Ecer Date: Fri, 26 Jul 2019 16:59:56 +0100 Subject: [PATCH 08/14] support for multiple args (space separated) --- .../java/org/grobid/core/jni/DeLFTModel.java | 4 +++- .../org/grobid/core/jni/DeLFTModelTest.java | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/grobid-core/src/main/java/org/grobid/core/jni/DeLFTModel.java b/grobid-core/src/main/java/org/grobid/core/jni/DeLFTModel.java index 4ca3a7b8bb..19845358c7 100644 --- a/grobid-core/src/main/java/org/grobid/core/jni/DeLFTModel.java +++ b/grobid-core/src/main/java/org/grobid/core/jni/DeLFTModel.java @@ -250,7 +250,9 @@ protected static List getTrainCommand(String modelName, File trainingDat command.add("--use-ELMo"); } if (StringUtils.isNotEmpty(GrobidProperties.getDeLFTTrainArgs())) { - command.add(GrobidProperties.getDeLFTTrainArgs()); + command.addAll(Arrays.asList( + GrobidProperties.getDeLFTTrainArgs().split(" ") + )); } return command; } diff --git a/grobid-core/src/test/java/org/grobid/core/jni/DeLFTModelTest.java b/grobid-core/src/test/java/org/grobid/core/jni/DeLFTModelTest.java index de64730683..3ed8af637e 100644 --- a/grobid-core/src/test/java/org/grobid/core/jni/DeLFTModelTest.java +++ b/grobid-core/src/test/java/org/grobid/core/jni/DeLFTModelTest.java @@ -62,4 +62,22 @@ public void testShouldAddSingleCustomArg() { ) ); } + + @Test + public void testShouldAddMultipleCustomArg() { + GrobidProperties.getProps().put( + GrobidPropertyKeys.PROP_GROBID_DELFT_TRAIN_ARGS, "arg1 arg2" + ); + File trainingData = new File("test/train.data"); + assertThat( + DeLFTModel.getTrainCommand("model1", trainingData), + contains( + "python3", "grobidTagger.py", "model1", "train", + "--input", trainingData.getAbsolutePath(), + "--output", GrobidProperties.getModelPath().getAbsolutePath(), + "arg1", + "arg2" + ) + ); + } } From fa7381fc372f28d7081fd99418cae2ad3b59e261 Mon Sep 17 00:00:00 2001 From: Daniel Ecer Date: Fri, 26 Jul 2019 17:18:44 +0100 Subject: [PATCH 09/14] added support for custom train module --- .../java/org/grobid/core/jni/DeLFTModel.java | 6 +++++- .../grobid/core/utilities/GrobidProperties.java | 6 ++++++ .../core/utilities/GrobidPropertyKeys.java | 1 + .../org/grobid/core/jni/DeLFTModelTest.java | 17 +++++++++++++++++ .../core/utilities/GrobidPropertiesTest.java | 14 ++++++++++++++ 5 files changed, 43 insertions(+), 1 deletion(-) diff --git a/grobid-core/src/main/java/org/grobid/core/jni/DeLFTModel.java b/grobid-core/src/main/java/org/grobid/core/jni/DeLFTModel.java index 19845358c7..8309b0a1ad 100644 --- a/grobid-core/src/main/java/org/grobid/core/jni/DeLFTModel.java +++ b/grobid-core/src/main/java/org/grobid/core/jni/DeLFTModel.java @@ -239,8 +239,12 @@ public void run() { } protected static List getTrainCommand(String modelName, File trainingData) { + String trainModule = GrobidProperties.getDeLFTTrainModule(); + if (StringUtils.isEmpty(trainModule)) { + trainModule = "grobidTagger.py"; + } List command = new ArrayList<>(Arrays.asList("python3", - "grobidTagger.py", + trainModule, modelName, "train", "--input", trainingData.getAbsolutePath(), diff --git a/grobid-core/src/main/java/org/grobid/core/utilities/GrobidProperties.java b/grobid-core/src/main/java/org/grobid/core/utilities/GrobidProperties.java index bd4965ff95..e463a0d627 100755 --- a/grobid-core/src/main/java/org/grobid/core/utilities/GrobidProperties.java +++ b/grobid-core/src/main/java/org/grobid/core/utilities/GrobidProperties.java @@ -446,6 +446,12 @@ public static boolean isDeLFTRedirectOutput() { ); } + public static String getDeLFTTrainModule() { + return getPropertyValue( + GrobidPropertyKeys.PROP_GROBID_DELFT_TRAIN_MODULE, "" + ); + } + public static String getDeLFTTrainArgs() { return getPropertyValue(GrobidPropertyKeys.PROP_GROBID_DELFT_TRAIN_ARGS, ""); } diff --git a/grobid-core/src/main/java/org/grobid/core/utilities/GrobidPropertyKeys.java b/grobid-core/src/main/java/org/grobid/core/utilities/GrobidPropertyKeys.java index 7321aa3e6a..f5b8b8ec87 100755 --- a/grobid-core/src/main/java/org/grobid/core/utilities/GrobidPropertyKeys.java +++ b/grobid-core/src/main/java/org/grobid/core/utilities/GrobidPropertyKeys.java @@ -20,6 +20,7 @@ public interface GrobidPropertyKeys { String PROP_GROBID_DELFT_PATH = "grobid.delft.install"; String PROP_GROBID_DELFT_REDIRECT_OUTPUT = "grobid.delft.redirect_output"; String PROP_GROBID_DELFT_ELMO = "grobid.delft.useELMo"; + String PROP_GROBID_DELFT_TRAIN_MODULE = "grobid.delft.train.module"; String PROP_GROBID_DELFT_TRAIN_ARGS = "grobid.delft.train.args"; String PROP_USE_LANG_ID = "grobid.use_language_id"; String PROP_LANG_DETECTOR_FACTORY = "grobid.language_detector_factory"; diff --git a/grobid-core/src/test/java/org/grobid/core/jni/DeLFTModelTest.java b/grobid-core/src/test/java/org/grobid/core/jni/DeLFTModelTest.java index 3ed8af637e..c11f51189e 100644 --- a/grobid-core/src/test/java/org/grobid/core/jni/DeLFTModelTest.java +++ b/grobid-core/src/test/java/org/grobid/core/jni/DeLFTModelTest.java @@ -17,6 +17,7 @@ public class DeLFTModelTest { public void setUp() { GrobidProperties.getInstance(); GrobidProperties.getProps().put(GrobidPropertyKeys.PROP_GROBID_DELFT_ELMO, "false"); + GrobidProperties.getProps().remove(GrobidPropertyKeys.PROP_GROBID_DELFT_TRAIN_MODULE); GrobidProperties.getProps().remove(GrobidPropertyKeys.PROP_GROBID_DELFT_TRAIN_ARGS); } @@ -48,6 +49,22 @@ public void testShouldAddUseELMO() { ); } + @Test + public void testShouldUseCustomTrainModule() { + GrobidProperties.getProps().put( + GrobidPropertyKeys.PROP_GROBID_DELFT_TRAIN_MODULE, "module1.py" + ); + File trainingData = new File("test/train.data"); + assertThat( + DeLFTModel.getTrainCommand("model1", trainingData), + contains( + "python3", "module1.py", "model1", "train", + "--input", trainingData.getAbsolutePath(), + "--output", GrobidProperties.getModelPath().getAbsolutePath() + ) + ); + } + @Test public void testShouldAddSingleCustomArg() { GrobidProperties.getProps().put(GrobidPropertyKeys.PROP_GROBID_DELFT_TRAIN_ARGS, "arg1"); diff --git a/grobid-core/src/test/java/org/grobid/core/utilities/GrobidPropertiesTest.java b/grobid-core/src/test/java/org/grobid/core/utilities/GrobidPropertiesTest.java index 8466f5dc43..df00da42db 100755 --- a/grobid-core/src/test/java/org/grobid/core/utilities/GrobidPropertiesTest.java +++ b/grobid-core/src/test/java/org/grobid/core/utilities/GrobidPropertiesTest.java @@ -88,6 +88,20 @@ public void testShouldReturnConfiguredTrainArgs() { assertEquals(GrobidProperties.getDeLFTTrainArgs(), "args"); } + @Test + public void testShouldReturnEmptyTrainModuleByDefault() { + GrobidProperties.getProps().remove(GrobidPropertyKeys.PROP_GROBID_DELFT_TRAIN_ARGS); + assertEquals(GrobidProperties.getDeLFTTrainModule(), ""); + } + + @Test + public void testShouldReturnConfiguredModule() { + GrobidProperties.getProps().put( + GrobidPropertyKeys.PROP_GROBID_DELFT_TRAIN_MODULE, "module1" + ); + assertEquals(GrobidProperties.getDeLFTTrainModule(), "module1"); + } + /*@Test(expected = GrobidPropertyException.class) public void testCheckPropertiesException_shouldThrowException() { GrobidProperties.getProps().put( From e3ed0e674aa06dbd8fc00ca51caa3e8d6cdbdb3b Mon Sep 17 00:00:00 2001 From: Daniel Ecer Date: Fri, 26 Jul 2019 17:19:23 +0100 Subject: [PATCH 10/14] minor test method rename --- .../src/test/java/org/grobid/core/jni/DeLFTModelTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/grobid-core/src/test/java/org/grobid/core/jni/DeLFTModelTest.java b/grobid-core/src/test/java/org/grobid/core/jni/DeLFTModelTest.java index c11f51189e..5679f4614e 100644 --- a/grobid-core/src/test/java/org/grobid/core/jni/DeLFTModelTest.java +++ b/grobid-core/src/test/java/org/grobid/core/jni/DeLFTModelTest.java @@ -66,7 +66,7 @@ public void testShouldUseCustomTrainModule() { } @Test - public void testShouldAddSingleCustomArg() { + public void testShouldAddSingleCustomTrainArg() { GrobidProperties.getProps().put(GrobidPropertyKeys.PROP_GROBID_DELFT_TRAIN_ARGS, "arg1"); File trainingData = new File("test/train.data"); assertThat( @@ -81,7 +81,7 @@ public void testShouldAddSingleCustomArg() { } @Test - public void testShouldAddMultipleCustomArg() { + public void testShouldAddMultipleCustomTrainArg() { GrobidProperties.getProps().put( GrobidPropertyKeys.PROP_GROBID_DELFT_TRAIN_ARGS, "arg1 arg2" ); From 794bba84a7a960e0618244974abad5f62ff69954 Mon Sep 17 00:00:00 2001 From: Daniel Ecer Date: Fri, 26 Jul 2019 17:21:54 +0100 Subject: [PATCH 11/14] extracted trainingData --- .../org/grobid/core/jni/DeLFTModelTest.java | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/grobid-core/src/test/java/org/grobid/core/jni/DeLFTModelTest.java b/grobid-core/src/test/java/org/grobid/core/jni/DeLFTModelTest.java index 5679f4614e..9f3e6c44b7 100644 --- a/grobid-core/src/test/java/org/grobid/core/jni/DeLFTModelTest.java +++ b/grobid-core/src/test/java/org/grobid/core/jni/DeLFTModelTest.java @@ -13,6 +13,8 @@ public class DeLFTModelTest { + private File trainingData = new File("test/train.data"); + @Before public void setUp() { GrobidProperties.getInstance(); @@ -23,12 +25,11 @@ public void setUp() { @Test public void testShouldBuildTrainCommand() { - File trainingData = new File("test/train.data"); assertThat( DeLFTModel.getTrainCommand("model1", trainingData), contains( "python3", "grobidTagger.py", "model1", "train", - "--input", trainingData.getAbsolutePath(), + "--input", this.trainingData.getAbsolutePath(), "--output", GrobidProperties.getModelPath().getAbsolutePath() ) ); @@ -37,12 +38,11 @@ public void testShouldBuildTrainCommand() { @Test public void testShouldAddUseELMO() { GrobidProperties.getProps().put(GrobidPropertyKeys.PROP_GROBID_DELFT_ELMO, "true"); - File trainingData = new File("test/train.data"); assertThat( DeLFTModel.getTrainCommand("model1", trainingData), contains( "python3", "grobidTagger.py", "model1", "train", - "--input", trainingData.getAbsolutePath(), + "--input", this.trainingData.getAbsolutePath(), "--output", GrobidProperties.getModelPath().getAbsolutePath(), "--use-ELMo" ) @@ -54,12 +54,11 @@ public void testShouldUseCustomTrainModule() { GrobidProperties.getProps().put( GrobidPropertyKeys.PROP_GROBID_DELFT_TRAIN_MODULE, "module1.py" ); - File trainingData = new File("test/train.data"); assertThat( DeLFTModel.getTrainCommand("model1", trainingData), contains( "python3", "module1.py", "model1", "train", - "--input", trainingData.getAbsolutePath(), + "--input", this.trainingData.getAbsolutePath(), "--output", GrobidProperties.getModelPath().getAbsolutePath() ) ); @@ -68,12 +67,11 @@ public void testShouldUseCustomTrainModule() { @Test public void testShouldAddSingleCustomTrainArg() { GrobidProperties.getProps().put(GrobidPropertyKeys.PROP_GROBID_DELFT_TRAIN_ARGS, "arg1"); - File trainingData = new File("test/train.data"); assertThat( DeLFTModel.getTrainCommand("model1", trainingData), contains( "python3", "grobidTagger.py", "model1", "train", - "--input", trainingData.getAbsolutePath(), + "--input", this.trainingData.getAbsolutePath(), "--output", GrobidProperties.getModelPath().getAbsolutePath(), "arg1" ) @@ -85,12 +83,11 @@ public void testShouldAddMultipleCustomTrainArg() { GrobidProperties.getProps().put( GrobidPropertyKeys.PROP_GROBID_DELFT_TRAIN_ARGS, "arg1 arg2" ); - File trainingData = new File("test/train.data"); assertThat( DeLFTModel.getTrainCommand("model1", trainingData), contains( "python3", "grobidTagger.py", "model1", "train", - "--input", trainingData.getAbsolutePath(), + "--input", this.trainingData.getAbsolutePath(), "--output", GrobidProperties.getModelPath().getAbsolutePath(), "arg1", "arg2" From fcac500c4081481a7d0f8cf2a72f18669f19e5f0 Mon Sep 17 00:00:00 2001 From: Daniel Ecer Date: Fri, 26 Jul 2019 17:38:18 +0100 Subject: [PATCH 12/14] fixed testShouldReturnEmptyTrainModuleByDefault reset wrong prop --- .../java/org/grobid/core/utilities/GrobidPropertiesTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/grobid-core/src/test/java/org/grobid/core/utilities/GrobidPropertiesTest.java b/grobid-core/src/test/java/org/grobid/core/utilities/GrobidPropertiesTest.java index df00da42db..ac21b06836 100755 --- a/grobid-core/src/test/java/org/grobid/core/utilities/GrobidPropertiesTest.java +++ b/grobid-core/src/test/java/org/grobid/core/utilities/GrobidPropertiesTest.java @@ -90,7 +90,7 @@ public void testShouldReturnConfiguredTrainArgs() { @Test public void testShouldReturnEmptyTrainModuleByDefault() { - GrobidProperties.getProps().remove(GrobidPropertyKeys.PROP_GROBID_DELFT_TRAIN_ARGS); + GrobidProperties.getProps().remove(GrobidPropertyKeys.PROP_GROBID_DELFT_TRAIN_MODULE); assertEquals(GrobidProperties.getDeLFTTrainModule(), ""); } From 9469c6448aae52660ffa063ed4db915f9df80178 Mon Sep 17 00:00:00 2001 From: Daniel Ecer Date: Fri, 26 Jul 2019 17:39:20 +0100 Subject: [PATCH 13/14] moved train module up to match implementation order --- .../core/utilities/GrobidPropertiesTest.java | 31 +++++++++++++------ 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/grobid-core/src/test/java/org/grobid/core/utilities/GrobidPropertiesTest.java b/grobid-core/src/test/java/org/grobid/core/utilities/GrobidPropertiesTest.java index ac21b06836..65ec764d1e 100755 --- a/grobid-core/src/test/java/org/grobid/core/utilities/GrobidPropertiesTest.java +++ b/grobid-core/src/test/java/org/grobid/core/utilities/GrobidPropertiesTest.java @@ -76,6 +76,20 @@ public void testIsDeLFTRedirectOutputTrueIfSet() throws IOException { assertTrue(GrobidProperties.isDeLFTRedirectOutput()); } + @Test + public void testShouldReturnEmptyTrainModuleByDefault() { + GrobidProperties.getProps().remove(GrobidPropertyKeys.PROP_GROBID_DELFT_TRAIN_MODULE); + assertEquals(GrobidProperties.getDeLFTTrainModule(), ""); + } + + @Test + public void testShouldReturnConfiguredModule() { + GrobidProperties.getProps().put( + GrobidPropertyKeys.PROP_GROBID_DELFT_TRAIN_MODULE, "module1" + ); + assertEquals(GrobidProperties.getDeLFTTrainModule(), "module1"); + } + @Test public void testShouldReturnEmptyTrainArgsByDefault() { GrobidProperties.getProps().remove(GrobidPropertyKeys.PROP_GROBID_DELFT_TRAIN_ARGS); @@ -88,21 +102,20 @@ public void testShouldReturnConfiguredTrainArgs() { assertEquals(GrobidProperties.getDeLFTTrainArgs(), "args"); } + /*@Test(expected = GrobidPropertyException.class) @Test - public void testShouldReturnEmptyTrainModuleByDefault() { - GrobidProperties.getProps().remove(GrobidPropertyKeys.PROP_GROBID_DELFT_TRAIN_MODULE); - assertEquals(GrobidProperties.getDeLFTTrainModule(), ""); + public void testShouldReturnEmptyTrainArgsByDefault() { + GrobidProperties.getProps().remove(GrobidPropertyKeys.PROP_GROBID_DELFT_TRAIN_ARGS); + assertEquals(GrobidProperties.getDeLFTTrainArgs(), ""); } @Test - public void testShouldReturnConfiguredModule() { - GrobidProperties.getProps().put( - GrobidPropertyKeys.PROP_GROBID_DELFT_TRAIN_MODULE, "module1" - ); - assertEquals(GrobidProperties.getDeLFTTrainModule(), "module1"); + public void testShouldReturnConfiguredTrainArgs() { + GrobidProperties.getProps().put(GrobidPropertyKeys.PROP_GROBID_DELFT_TRAIN_ARGS, "args"); + assertEquals(GrobidProperties.getDeLFTTrainArgs(), "args"); } - /*@Test(expected = GrobidPropertyException.class) + @Test(expected = GrobidPropertyException.class) public void testCheckPropertiesException_shouldThrowException() { GrobidProperties.getProps().put( GrobidPropertyKeys.PROP_3RD_PARTY_PDFTOXML, ""); From 3b83725c7d8a6494c8d241d0e82bb7a44699bde8 Mon Sep 17 00:00:00 2001 From: Daniel Ecer Date: Fri, 29 Nov 2019 09:02:05 +0000 Subject: [PATCH 14/14] removed commented out test method (rebase issue) --- .../grobid/core/utilities/GrobidPropertiesTest.java | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/grobid-core/src/test/java/org/grobid/core/utilities/GrobidPropertiesTest.java b/grobid-core/src/test/java/org/grobid/core/utilities/GrobidPropertiesTest.java index 65ec764d1e..baf8bf7697 100755 --- a/grobid-core/src/test/java/org/grobid/core/utilities/GrobidPropertiesTest.java +++ b/grobid-core/src/test/java/org/grobid/core/utilities/GrobidPropertiesTest.java @@ -103,19 +103,6 @@ public void testShouldReturnConfiguredTrainArgs() { } /*@Test(expected = GrobidPropertyException.class) - @Test - public void testShouldReturnEmptyTrainArgsByDefault() { - GrobidProperties.getProps().remove(GrobidPropertyKeys.PROP_GROBID_DELFT_TRAIN_ARGS); - assertEquals(GrobidProperties.getDeLFTTrainArgs(), ""); - } - - @Test - public void testShouldReturnConfiguredTrainArgs() { - GrobidProperties.getProps().put(GrobidPropertyKeys.PROP_GROBID_DELFT_TRAIN_ARGS, "args"); - assertEquals(GrobidProperties.getDeLFTTrainArgs(), "args"); - } - - @Test(expected = GrobidPropertyException.class) public void testCheckPropertiesException_shouldThrowException() { GrobidProperties.getProps().put( GrobidPropertyKeys.PROP_3RD_PARTY_PDFTOXML, "");