Conversation
|
line by line difference for the diff --git a/calamari_ocr/ocr/dataset/codec.py b/calamari_ocr/ocr/dataset/codec.py
index 2109e42..ce6f596 100644
--- a/calamari_ocr/ocr/dataset/codec.py
+++ b/calamari_ocr/ocr/dataset/codec.py
@@ -135,7 +135,7 @@ class Codec:
self.char2code[char] = code
def __len__(self):
- """Get the number of characeters in the charset
+ """Get the number of characters in the charset
this is equal to the maximum possible label.
@@ -146,7 +146,7 @@ class Codec:
return len(self.charset)
def size(self):
- """Get the number of characeters in the charset
+ """Get the number of characters in the charset
this is equal to the maximum possible label.
@@ -162,7 +162,7 @@ class Codec:
Parameters
----------
s : str
- sequence of characeters
+ sequence of characters
Returns
-------
diff --git a/docs/source/doc.command-line-usage.rst b/docs/source/doc.command-line-usage.rst
index 7db0f46..b96de2d 100644
--- a/docs/source/doc.command-line-usage.rst
+++ b/docs/source/doc.command-line-usage.rst
@@ -350,7 +350,7 @@ All Parameters
--trainer.current_epoch TRAINER.CURRENT_EPOCH
The epoch to start with. Usually 0, but can be overwritten for resume training. (default: 0)
--trainer.samples_per_epoch TRAINER.SAMPLES_PER_EPOCH
- The number of samples (not batches!) to process per epoch. By default (-1) the size fo the training dataset. (default: -1)
+ The number of samples (not batches!) to process per epoch. By default (-1) the size of the training dataset. (default: -1)
--trainer.scale_epoch_size TRAINER.SCALE_EPOCH_SIZE
Multiply the number of samples per epoch by this factor. This is useful when using the dataset size as samples per epoch (--samples_per_epoch=-1, the default), but if you desire to set it e.g. to the half dataset size
(--scale_epoch_size=0.5) (default: 1)
@@ -373,7 +373,7 @@ All Parameters
--trainer.output_dir TRAINER.OUTPUT_DIR
Dictionary to use to write checkpoints, logging files, and export of best and last model. (default: None)
--trainer.write_checkpoints TRAINER.WRITE_CHECKPOINTS
- Write checkpoints to output_dir during training. Checkpoints are obligatory if you want support toresume the training (see tfaip-resume-training script) (default: True)
+ Write checkpoints to output_dir during training. Checkpoints are obligatory if you want support to resume the training (see tfaip-resume-training script) (default: True)
--trainer.export_best TRAINER.EXPORT_BEST
Continuously export the best model during testing to output_dir/best. (default: None)
--trainer.export_final TRAINER.EXPORT_FINAL
@@ -447,7 +447,7 @@ All Parameters
--scenario.print_eval_limit SCENARIO.PRINT_EVAL_LIMIT
Number of evaluation examples to print per evaluation, use -1 to print all (default: 10)
--scenario.tensorboard_logger_history_size SCENARIO.TENSORBOARD_LOGGER_HISTORY_SIZE
- Number of instances to store for outputing into tensorboard. Default (last n=5) (default: 5)
+ Number of instances to store for outputting into tensorboard. Default (last n=5) (default: 5)
--scenario.export_serve SCENARIO.EXPORT_SERVE
Export the serving model (saved model format) (default: True)
--model MODEL
diff --git a/docs/source/doc.predicting.rst b/docs/source/doc.predicting.rst
index 4a75304..2c1080a 100644
--- a/docs/source/doc.predicting.rst
+++ b/docs/source/doc.predicting.rst
@@ -44,7 +44,7 @@ If instead the `samples (lines) are dynamically created` during the execution an
checkpoint='PATH_TO_THE_MODEL_WITHOUT_EXT')
raw_predictor = predictor.raw().__enter__() # you can also wrap the following lines in a `with`-block
- # somehwere else in your code, just call the raw_predictor with a single image
+ # somewhere else in your code, just call the raw_predictor with a single image
sample = raw_predictor(raw_image) # raw_image is e.g. np.zeros(200, 50)
inputs, prediction, meta = sample.inputs, sample.outputs, sample.meta
# prediction is usually what you are looking for
diff --git a/requirements.txt b/requirements.txt
index 0d14f73..f305a5a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,5 +3,5 @@ h5py==3.1.0
lxml==4.9.2
python-bidi==0.4.2
tensorflow==2.5.1
-git+https://github.com/p42ul/tfaip@3738106#egg=tfaip
+https://github.com/DDMAL/tfaip/archive/refs/tags/v1.0.0.zip#egg=tfaip
xlsxwriter==3.1.2
diff --git a/setup.py b/setup.py
index b77dca7..476bb55 100644
--- a/setup.py
+++ b/setup.py
@@ -41,7 +41,8 @@ setup(
},
python_requires=">=3.7",
install_requires=[r for r in open("requirements.txt").read().split("\n")
- if not r.startswith("git+")],
+ if not r.startswith("https")],
+ # We ignore lines starting with "https" because because we need a DDMAL customized version of tfaip
keywords=["OCR", "optical character recognition", "ocropy", "ocropus", "kraken"],
data_files=[("", ["requirements.txt"] + resources)],
) |
notkaramel
left a comment
There was a problem hiding this comment.
Looks good to me, and also on calamari repo.
We can test this and then merge it to develop
|
The Docker Hub build failure comes from ChatGPT suggests the following:
RUN apt-get update && apt-get install -y build-essential python3-dev
RUN pip install --only-binary=uWSGI -r requirements.txt
RUN pip install --timeout 300 uWSGI==2.0.18
RUN pip install -r requirements.txtWe are not going to worry too much about this for now. We will just retry build... In the new build it looks alright |
|
I'm test running Rodan with |
|
Testing successfully for Text Alignment on |
Resolves: (#1305 #1260)
Update
requirements.txtintext_alignmentwithcalamari-ocrandtfaipfrom DDMAL forksMore details here: #1305 (comment)