Skip to content

Commit 481ceff

Browse files
authored
add flavor (#82)
add flavor parameter
1 parent dcf1b50 commit 481ceff

1 file changed

Lines changed: 21 additions & 2 deletions

File tree

grobid_client/grobid_client.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ def process(
114114
segment_sentences=False,
115115
force=True,
116116
verbose=False,
117+
flavor=None
117118
):
118119
batch_size_pdf = self.config["batch_size"]
119120
input_files = []
@@ -147,6 +148,7 @@ def process(
147148
segment_sentences,
148149
force,
149150
verbose,
151+
flavor
150152
)
151153
input_files = []
152154

@@ -185,6 +187,7 @@ def process_batch(
185187
segment_sentences,
186188
force,
187189
verbose=False,
190+
flavor=None
188191
):
189192
if verbose:
190193
print(len(input_files), "files to process in current batch")
@@ -203,6 +206,9 @@ def process_batch(
203206
selected_process = self.process_pdf
204207
if service == 'processCitationList':
205208
selected_process = self.process_txt
209+
210+
if verbose:
211+
print(f"Adding {input_file} to the queue.")
206212

207213
r = executor.submit(
208214
selected_process,
@@ -214,7 +220,8 @@ def process_batch(
214220
include_raw_citations,
215221
include_raw_affiliations,
216222
tei_coordinates,
217-
segment_sentences)
223+
segment_sentences,
224+
flavor)
218225

219226
results.append(r)
220227

@@ -255,7 +262,8 @@ def process_pdf(
255262
tei_coordinates,
256263
segment_sentences,
257264
start=-1,
258-
end=-1
265+
end=-1,
266+
flavor=None
259267
):
260268
pdf_handle = open(pdf_file, "rb")
261269
files = {
@@ -285,6 +293,8 @@ def process_pdf(
285293
the_data["teiCoordinates"] = self.config["coordinates"]
286294
if segment_sentences:
287295
the_data["segmentSentences"] = "1"
296+
if flavor:
297+
the_data["flavor"] = flavor
288298
if start > 0:
289299
the_data["start"] = str(start)
290300
if end > 0:
@@ -368,6 +378,7 @@ def process_txt(
368378

369379
def main():
370380
valid_services = [
381+
"processFulltextDocumentBlank",
371382
"processFulltextDocument",
372383
"processHeaderDocument",
373384
"processReferences",
@@ -441,11 +452,18 @@ def main():
441452
help="print information about processed files in the console",
442453
)
443454

455+
parser.add_argument(
456+
"--flavor",
457+
default=None,
458+
help="Define the flavor to be used for the fulltext extraction",
459+
)
460+
444461
args = parser.parse_args()
445462

446463
input_path = args.input
447464
config_path = args.config
448465
output_path = args.output
466+
flavor = args.flavor
449467

450468
if args.n is not None:
451469
try:
@@ -500,6 +518,7 @@ def main():
500518
segment_sentences=segment_sentences,
501519
force=force,
502520
verbose=verbose,
521+
flavor=flavor
503522
)
504523

505524
runtime = round(time.time() - start_time, 3)

0 commit comments

Comments
 (0)