Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -82,18 +82,20 @@
"src_length": 1
}
],
"encode_base64": [
"encode_text": [
{
"dst_length": 1,
"input_type": "list",
"intensity": 100.0,
"name": "encode_base64",
"name": "encode_text",
"src_length": 1,
"granularity": "all",
"aug_min": 1,
"aug_max": 10,
"aug_p": 0.3,
"n": 1
"aug_max": 1,
"aug_p": 1.0,
"n": 1,
"p": 1.0,
"encoder": "base64",
"method": "sentence"
}
],
"get_baseline": [
Expand Down
61 changes: 32 additions & 29 deletions augly/tests/text_tests/functional_unit_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
import unittest

from augly import text as txtaugs
from augly.text.augmenters.utils import Encoding
from augly.utils import FUN_FONTS_GREEK_PATH
from nlpaug.util import Method


class FunctionalTextUnitTest(unittest.TestCase):
Expand All @@ -36,6 +38,24 @@ def test_apply_lambda(self) -> None:
augmented_apply_lambda = txtaugs.apply_lambda(self.texts)
self.assertTrue(augmented_apply_lambda[0] == self.texts[0])

def test_base64_sentence(self) -> None:
augmented_words = txtaugs.encode_text(
"Hello, world!", 1, 1, 1.0, Method.SENTENCE, Encoding.BASE64
)
self.assertEqual(augmented_words[0], "SGVsbG8sIHdvcmxkIQ==")

def test_base64_word(self) -> None:
augmented_words_word = txtaugs.encode_text(
"Hello, world!", 1, 1, 1.0, Method.WORD, Encoding.BASE64
)
self.assertEqual(augmented_words_word[0], "SGVsbG8=, world!")

def test_base64_char(self) -> None:
augmented_words_char = txtaugs.encode_text(
"Hello, world!", 1, 1, 1.0, Method.CHAR, Encoding.BASE64
)
self.assertEqual(augmented_words_char[0], "SA==ello LA== dw==orld IQ==")

def test_change_case(self) -> None:
augmented_words = txtaugs.change_case(self.texts[0], cadence=3.0, case="upper")
self.assertTrue(
Expand All @@ -51,35 +71,6 @@ def test_contractions(self) -> None:
augmented_words[0] == "I would call him but I don't know where he's gone"
)

def test_encode_base64_all(self) -> None:
augmented_words = txtaugs.encode_base64("Hello, world!")
self.assertTrue(augmented_words[0] == "SGVsbG8sIHdvcmxkIQ==")

def test_encode_base64_word(self) -> None:
random.seed(42) # Set seed for reproducibility
augmented_words_word = txtaugs.encode_base64(
"Hello, world!", granularity="word", aug_min=1, aug_max=1, aug_p=1.0
)
self.assertEqual(augmented_words_word[0], "SGVsbG8=, world!")

def test_encode_base64_char(self) -> None:
random.seed(42)
augmented_words_char = txtaugs.encode_base64(
"Hello, world!", granularity="char", aug_min=1, aug_max=2, aug_p=1.0
)
self.assertEqual(augmented_words_char[0], "SA==ellbw== LA== wbw==rlZA== IQ==")

def test_encode_base64_general(self) -> None:
random.seed(42)
augmented_words_low_p = txtaugs.encode_base64(
"Hello, world!", granularity="word", aug_min=1, aug_max=2, aug_p=0.1
)
random.seed(42)
augmented_words_high_p = txtaugs.encode_base64(
"Hello, world!", granularity="word", aug_min=1, aug_max=2, aug_p=0.9
)
self.assertTrue(len(augmented_words_high_p[0]) > len(augmented_words_low_p[0]))

def test_get_baseline(self) -> None:
augmented_baseline = txtaugs.get_baseline(self.texts)
self.assertTrue(
Expand Down Expand Up @@ -281,6 +272,18 @@ def test_insert_zero_width_chars(self) -> None:
],
)

def test_leetspeak_sentence(self) -> None:
augmented_words = txtaugs.encode_text(
"Hello, world!", 1, 1, 1.0, Method.SENTENCE, Encoding.LEETSPEAK
)
self.assertEqual(augmented_words[0], "h3110, w0r1d!")

def test_leetspeak_word(self) -> None:
augmented_words = txtaugs.encode_text(
"Hello, world!", 1, 1, 1.0, Method.WORD, Encoding.LEETSPEAK
)
self.assertEqual(augmented_words[0], "h3110, world!")

def test_merge_words(self) -> None:
augmented_split_words = txtaugs.merge_words(self.texts, aug_word_p=0.3, n=1)
self.assertTrue(
Expand Down
107 changes: 81 additions & 26 deletions augly/tests/text_tests/transforms_unit_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
from typing import Any, Dict, List

from augly import text as txtaugs
from augly.text.augmenters.utils import Encoding
from augly.utils import TEXT_METADATA_PATH
from nlpaug.util import Method


def are_equal_metadata(
Expand Down Expand Up @@ -136,57 +138,68 @@ def test_Compose(self) -> None:
are_equal_metadata(self.metadata, self.expected_metadata["compose"]),
)

def test_EncodeBase64(self) -> None:
augmented_text = txtaugs.EncodeBase64(
granularity="all", aug_min=1, aug_max=10, aug_p=0.3, n=1, p=1.0
def test_Base64_Sentence(self) -> None:
augmented_text = txtaugs.EncodeTextTransform(
aug_min=1,
aug_max=1,
aug_p=1.0,
method=Method.SENTENCE,
encoder=Encoding.BASE64,
n=1,
p=1.0,
)(
["Hello, world!"],
metadata=self.metadata,
)

self.assertTrue(augmented_text[0] == "SGVsbG8sIHdvcmxkIQ==")
self.expected_metadata["encode_text"][0]["encoder"] = Encoding.BASE64
self.assertTrue(
are_equal_metadata(self.metadata, self.expected_metadata["encode_base64"])
are_equal_metadata(self.metadata, self.expected_metadata["encode_text"])
)

def test_EncodeBase64_Word(self) -> None:
def test_Base64_Word(self) -> None:
self.metadata = []

random.seed(42)
augmented_text = txtaugs.EncodeBase64(
granularity="word", aug_min=1, aug_max=1, aug_p=1.0, n=1, p=1.0
augmented_text = txtaugs.EncodeTextTransform(
aug_min=1,
aug_max=1,
aug_p=1.0,
method=Method.WORD,
encoder=Encoding.BASE64,
n=1,
p=1.0,
)(
["Hello, world!"],
metadata=self.metadata,
)
self.assertEqual(augmented_text[0], "SGVsbG8=, world!")

expected_metadata = deepcopy(self.expected_metadata["encode_base64"])
expected_metadata[0]["granularity"] = "word"
expected_metadata[0]["aug_p"] = 1.0
expected_metadata[0]["aug_max"] = 1
expected_metadata[0]["intensity"] = 100.0
metadata_expected = deepcopy(self.expected_metadata["encode_text"])
metadata_expected[0]["method"] = "word"
metadata_expected[0]["encoder"] = Encoding.BASE64
self.assertTrue(are_equal_metadata(self.metadata, metadata_expected))

self.assertTrue(are_equal_metadata(self.metadata, expected_metadata))

def test_EncodeBase64_Char(self) -> None:
def test_Base64_Char(self) -> None:
self.metadata = []

random.seed(42)
augmented_text = txtaugs.EncodeBase64(
granularity="char", aug_min=1, aug_max=2, aug_p=1.0, n=1, p=1.0
augmented_text = txtaugs.EncodeTextTransform(
aug_min=1,
aug_max=1,
aug_p=1.0,
method=Method.CHAR,
encoder=Encoding.BASE64,
n=1,
p=1.0,
)(
["Hello, world!"],
metadata=self.metadata,
)
self.assertEqual(augmented_text[0], "SA==ebA==lo LA== wbw==rlZA== IQ==")

expected_metadata = deepcopy(self.expected_metadata["encode_base64"])
expected_metadata[0]["granularity"] = "char"
expected_metadata[0]["aug_p"] = 1.0
expected_metadata[0]["aug_max"] = 2
expected_metadata[0]["intensity"] = 100.0
self.assertEqual(augmented_text[0], "SA==ello LA== wocg==ld IQ==")

expected_metadata = deepcopy(self.expected_metadata["encode_text"])
expected_metadata[0]["method"] = "char"
expected_metadata[0]["encoder"] = Encoding.BASE64
self.assertTrue(are_equal_metadata(self.metadata, expected_metadata))

def test_GetBaseline(self) -> None:
Expand Down Expand Up @@ -278,6 +291,48 @@ def test_InsertZeroWidthChars(self) -> None:
),
)

def test_LeetSpeak_Sentence(self) -> None:
augmented_text = txtaugs.EncodeTextTransform(
aug_min=1,
aug_max=1,
aug_p=1.0,
method=Method.SENTENCE,
encoder=Encoding.LEETSPEAK,
n=1,
p=1.0,
)(
["Hello, world!"],
metadata=self.metadata,
)

self.assertTrue(augmented_text[0] == "h3110, w0r1d!")
self.expected_metadata["encode_text"][0]["encoder"] = Encoding.LEETSPEAK
self.assertTrue(
are_equal_metadata(self.metadata, self.expected_metadata["encode_text"])
)

def test_Leetspeak_Word(self) -> None:
self.metadata = []

augmented_text = txtaugs.EncodeTextTransform(
aug_min=1,
aug_max=1,
aug_p=1.0,
method=Method.WORD,
encoder=Encoding.LEETSPEAK,
n=1,
p=1.0,
)(
["Hello, world!"],
metadata=self.metadata,
)
self.assertEqual(augmented_text[0], "h3110, world!")

metadata_expected = deepcopy(self.expected_metadata["encode_text"])
metadata_expected[0]["method"] = "word"
metadata_expected[0]["encoder"] = Encoding.LEETSPEAK
self.assertTrue(are_equal_metadata(self.metadata, metadata_expected))

def test_MergeWords(self) -> None:
aug_merge_words = txtaugs.MergeWords(aug_word_p=0.3)(
self.texts, metadata=self.metadata
Expand Down
14 changes: 8 additions & 6 deletions augly/text/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
apply_lambda,
change_case,
contractions,
encode_base64,
encode_text,
get_baseline,
insert_punctuation_chars,
insert_text,
Expand All @@ -32,9 +32,10 @@
)
from augly.text.intensity import (
apply_lambda_intensity,
base64_intensity,
change_case_intensity,
contractions_intensity,
encode_base64_intensity,
encode_text_intensity,
get_baseline_intensity,
insert_punctuation_chars_intensity,
insert_text_intensity,
Expand All @@ -56,7 +57,7 @@
ApplyLambda,
ChangeCase,
Contractions,
EncodeBase64,
EncodeTextTransform,
GetBaseline,
InsertPunctuationChars,
InsertText,
Expand All @@ -81,7 +82,7 @@
"ApplyLambda",
"ChangeCase",
"Contractions",
"EncodeBase64",
"EncodeTextTransform",
"GetBaseline",
"InsertPunctuationChars",
"InsertText",
Expand All @@ -101,7 +102,7 @@
"apply_lambda",
"change_case",
"contractions",
"encode_base64",
"encode_text",
"get_baseline",
"insert_punctuation_chars",
"insert_text",
Expand All @@ -119,9 +120,10 @@
"split_words",
"swap_gendered_words",
"apply_lambda_intensity",
"base64_intensity",
"change_case_intensity",
"contractions_intensity",
"encode_base64_intensity",
"encode_text_intensity",
"get_baseline_intensity",
"insert_punctuation_chars_intensity",
"insert_text_intensity",
Expand Down
11 changes: 8 additions & 3 deletions augly/text/augmenters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,31 +7,36 @@

# pyre-unsafe

from augly.text.augmenters.base64 import Base64
from augly.text.augmenters.baseline import BaselineAugmenter
from augly.text.augmenters.bidirectional import BidirectionalAugmenter
from augly.text.augmenters.case import CaseAugmenter
from augly.text.augmenters.contraction import ContractionAugmenter
from augly.text.augmenters.encode_base64 import EncodeBase64
from augly.text.augmenters.encode_text_context import EncodeText
from augly.text.augmenters.encode_text_strategy import EncodeTextAugmentation
from augly.text.augmenters.fun_fonts import FunFontsAugmenter
from augly.text.augmenters.insert_text import InsertTextAugmenter
from augly.text.augmenters.insertion import InsertionAugmenter
from augly.text.augmenters.leetspeak import LeetSpeak
from augly.text.augmenters.letter_replacement import LetterReplacementAugmenter
from augly.text.augmenters.text_replacement import TextReplacementAugmenter
from augly.text.augmenters.typo import TypoAugmenter
from augly.text.augmenters.upside_down import UpsideDownAugmenter
from augly.text.augmenters.word_replacement import WordReplacementAugmenter
from augly.text.augmenters.words_augmenter import WordsAugmenter


__all__ = [
"Base64",
"BaselineAugmenter",
"BidirectionalAugmenter",
"CaseAugmenter",
"ContractionAugmenter",
"EncodeBase64",
"EncodeText",
"EncodeTextAugmentation",
"FunFontsAugmenter",
"InsertTextAugmenter",
"InsertionAugmenter",
"LeetSpeak",
"LetterReplacementAugmenter",
"WordsAugmenter",
"TextReplacementAugmenter",
Expand Down
Loading
Loading