Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/python-arabic.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Python CI
name: Python Arabic CI

on:
push:
Expand Down
122 changes: 122 additions & 0 deletions .github/workflows/python-hebrew.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
name: Python Hebrew CI

on:
push:
branches: [ main ]
paths:
- 'python/hebrew/**'
- '.github/workflows/python-hebrew.yml'
pull_request:
paths:
- 'python/hebrew/**'
- '.github/workflows/python-hebrew.yml'

# Cancel in-progress runs for the same workflow and branch
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

permissions:
contents: read
security-events: write

jobs:
dependency-review:
runs-on: ubuntu-latest
if: github.event_name == 'pull_request'
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Dependency Review
uses: actions/dependency-review-action@v3

codeql:
runs-on: ubuntu-latest
timeout-minutes: 15
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Initialize CodeQL
uses: github/codeql-action/init@v2
with:
languages: python

- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v2

infer:
runs-on: ubuntu-latest
timeout-minutes: 10
strategy:
fail-fast: false
matrix:
python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']

steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'
cache-dependency-path: |
python/hebrew/requirements.txt
python/hebrew/setup.py

- name: Install requirements
working-directory: ./python/hebrew
run: |
python -m pip install --upgrade pip
pip install --upgrade --upgrade-strategy eager -r requirements.txt -e .

- name: Create model directory
working-directory: ./python/hebrew
run: |
mkdir -p log_dir/base.cbhg/

- name: Run diacriticization
working-directory: ./python/hebrew
run: |
python diacritize.py --model_kind "cbhg" --config config/cbhg.yml --text 'שלום'

train:
runs-on: ubuntu-latest
timeout-minutes: 20
strategy:
fail-fast: false
matrix:
python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']

steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'
cache-dependency-path: |
python/hebrew/requirements.txt
python/hebrew/setup.py

- name: Install requirements
working-directory: ./python/hebrew
run: |
python -m pip install --upgrade pip
pip install --upgrade --upgrade-strategy eager -r requirements.txt -e .

- name: Prepare test data
working-directory: ./python/hebrew
run: |
mkdir -p data/test
echo "שלום עולם" > data/test/test.txt

- name: Try training (WIP)
working-directory: ./python/hebrew
run: |
python train.py --model "cbhg" --config config/test_cbhg.yml
25 changes: 21 additions & 4 deletions .github/workflows/ruby.yml
Original file line number Diff line number Diff line change
@@ -1,23 +1,40 @@
name: ruby
name: Ruby CI

on:
push:
branches: [ main ]
paths:
- 'lib/**'
- 'spec/**'
- 'Gemfile'
- 'rababa.gemspec'
- '.github/workflows/ruby.yml'
pull_request:
paths:
- 'lib/**'
- 'spec/**'
- 'Gemfile'
- 'rababa.gemspec'
- '.github/workflows/ruby.yml'

# Cancel in-progress runs for the same workflow and branch
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
build:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
ruby-version: ['2.6', '2.7', '3.0', '3.1', '3.2']
ruby-version: ['2.7', '3.0', '3.1', '3.2', '3.3']

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4

- name: Set up Ruby
uses: ruby/setup-ruby@v1
uses: ruby/setup-ruby@v1.171.0
with:
ruby-version: ${{ matrix.ruby-version }}
bundler-cache: true
Expand Down
4 changes: 2 additions & 2 deletions python/arabic/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
torch>=1.9.0,<2.0.0
torch>=1.9.0,<3.0.0
numpy>=1.20.0,<2.0.0
matplotlib>=3.3.3
pandas>=1.3.0
ruamel.yaml>=0.16.12
tensorboard>=2.4.0
diacritization-evaluation==0.5
diacritization-evaluation>=0.5
tqdm>=4.56.0
onnx>=1.9.0
onnxruntime>=1.8.1
Expand Down
38 changes: 19 additions & 19 deletions python/arabic/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from os import environ

import setuptools
from setuptools import find_packages

with open("README.adoc", "r", encoding="utf-8") as fh:
LONG_DESCRIPTION = fh.read()
Expand All @@ -15,13 +16,18 @@
PKG_VERSION = TAG_VERSION.group(1)

setuptools.setup(
name='rababa',
name='rababa-arabic',
version=PKG_VERSION,
author="Ribose",
author_email="open.source@ribose.com",
license='MIT',
description='Rababa for Arabic diacriticization',
# packages=['rababa'],
packages=find_packages(include=[
"*",
"models.*",
"modules.*",
"util.*",
]),
url='https://www.interscript.org',
python_requires='>=3.8, <4',
project_urls={
Expand All @@ -30,23 +36,17 @@
'Tracker': 'https://github.com/interscript/rababa/issues',
},
install_requires=[
'torch>=1.9.0',
'numpy',
'matplotlib',
'pandas',
'ruamel.yaml',
'tensorboard',
'diacritization-evaluation',
'tqdm',
'onnx',
'onnxruntime',
'pyyaml',
'torch>=1.9.0,<3.0.0',
'numpy>=1.20.0,<2.0.0',
'matplotlib>=3.3.3',
'pandas>=1.3.0',
'ruamel.yaml>=0.16.12',
'tensorboard>=2.4.0',
'diacritization-evaluation>=0.5',
'tqdm>=4.56.0',
'onnx>=1.9.0',
'onnxruntime>=1.8.1',
'pyyaml>=5.4.1',
],
# extras_require={'plotting': ['matplotlib>=2.2.0', 'jupyter']},
setup_requires=['pytest-runner'],
tests_require=['pytest'],
# entry_points={
# 'console_scripts': ['my-command=exampleproject.example:main']
# },
# package_data={'exampleproject': ['data/schema.json']}
)
38 changes: 23 additions & 15 deletions python/hebrew/config/test_cbhg.yml
Original file line number Diff line number Diff line change
@@ -1,20 +1,21 @@
session_name: base

data_directory: "data"
data_type: "CA_MSA"
data_type: "test"
log_directory: "log_dir"
load_training_data: true
load_test_data: false
load_validation_data: true
n_training_examples: null # null load all training examples, good for fast loading
n_training_examples: 5 # Using a small number for testing
n_test_examples: null # null load all test examples
n_validation_examples: null # null load all validation examples
test_file_name: "test.csv"
is_data_preprocessed: false # The data file is organized as (original text | text | diacritics)
data_separator: '|' # Required if the data already processed
diacritics_separator: '*' # Required if the data already processed
text_encoder: ArabicEncoderWithStartSymbol
text_cleaner: valid_arabic_cleaners # a white list that uses only Arabic letters, punctuations, and a space
train_file_name: "test.txt"
test_file_name: "test.txt"
is_data_preprocessed: false
data_separator: '|'
diacritics_separator: '*'
text_encoder: HebrewEncoder # Use Hebrew encoder
text_cleaner: basic_cleaners # Adjusted for Hebrew
max_len: 600 # sentences larger than this size will not be used
reconcile: true

Expand All @@ -36,16 +37,23 @@ post_cbhg_use_batch_norm: true

use_mixed_precision: false
optimizer_type: Adam
device: cuda
device: cpu # Using CPU for testing

# GEOMETRY
len_input_symbols: 90
len_niqqud_symbols: 16
len_dagesh_symbols: 3
len_sin_symbols: 4

# LOGGING
evaluate_frequency: 5000
evaluate_with_error_rates_frequency: 5000
n_predicted_text_tensorboard: 10 # To be written to the tensorboard
model_save_frequency: 5000
evaluate_frequency: 10
evaluate_with_error_rates_frequency: 10
n_predicted_text_tensorboard: 5 # To be written to the tensorboard
model_save_frequency: 10
train_plotting_frequency: 50000000 # No plotting for this model
n_steps_avg_losses: [100, 500, 1_000, 5_000] # command line display of average loss values for the last n steps
error_rates_n_batches: 10000 # if calculating error rate is slow, then you can specify the number of batches to be calculated
n_steps_avg_losses: [10, 20, 30, 40] # Reduced for testing
error_rates_n_batches: 5 # Reduced for testing

test_model_path: null # load the last saved model
train_resume_model_path: null # load last saved model
model_path: null
6 changes: 6 additions & 0 deletions python/hebrew/data/eval/test.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
שלום עולם
זה מבחן
בדיקה ניקוד
מערכת ניקוד עברית
ירושלים
תל אביב
4 changes: 2 additions & 2 deletions python/hebrew/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
torch>=1.9.0,<2.0.0
torch>=1.9.0,<3.0.0
numpy>=1.20.0,<2.0.0
matplotlib>=3.3.3
pandas>=1.3.0
ruamel.yaml>=0.16.12
tensorboard>=2.4.0
diacritization-evaluation==0.5
diacritization-evaluation>=0.5
tqdm>=4.56.0
onnx>=1.9.0
onnxruntime>=1.8.1
Expand Down
41 changes: 21 additions & 20 deletions python/hebrew/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from os import environ

import setuptools
from setuptools import find_packages

with open("README.adoc", "r", encoding="utf-8") as fh:
LONG_DESCRIPTION = fh.read()
Expand All @@ -15,13 +16,18 @@
PKG_VERSION = TAG_VERSION.group(1)

setuptools.setup(
name='rababa',
name='rababa-hebrew',
version=PKG_VERSION,
author="Ribose",
author_email="open.source@ribose.com",
license='MIT',
description='Rababa for Arabic diacriticization',
# packages=['rababa'],
description='Rababa for Hebrew diacriticization',
packages=find_packages(include=[
"*",
"models.*",
"modules.*",
"util.*",
]),
url='https://www.interscript.org',
python_requires='>=3.8, <4',
project_urls={
Expand All @@ -30,23 +36,18 @@
'Tracker': 'https://github.com/interscript/rababa/issues',
},
install_requires=[
'torch>=1.9.0',
'numpy',
'matplotlib',
'pandas',
'ruamel.yaml',
'tensorboard',
'diacritization-evaluation',
'tqdm',
'onnx',
'onnxruntime',
'pyyaml',
'torch>=1.9.0,<3.0.0',
'numpy>=1.20.0,<2.0.0',
'matplotlib>=3.3.3',
'pandas>=1.3.0',
'ruamel.yaml>=0.16.12',
'tensorboard>=2.4.0',
'diacritization-evaluation>=0.5',
'tqdm>=4.56.0',
'onnx>=1.9.0',
'onnxruntime>=1.8.1',
'pyyaml>=5.4.1',
'wandb>=0.12.4',
],
# extras_require={'plotting': ['matplotlib>=2.2.0', 'jupyter']},
setup_requires=['pytest-runner'],
tests_require=['pytest'],
# entry_points={
# 'console_scripts': ['my-command=exampleproject.example:main']
# },
# package_data={'exampleproject': ['data/schema.json']}
)
Loading
Loading