diff --git a/docs/transformers/word-randomizer.md b/docs/transformers/word-randomizer.md new file mode 100644 index 0000000..c64127f --- /dev/null +++ b/docs/transformers/word-randomizer.md @@ -0,0 +1,22 @@ +[source] + +# Word Randomizer +Splits the given text based on a separator and then shuffles it. + +**Interfaces:** [Transformer](api.md#transformer) + +**Data Type Compatibility:** Categorical + +## Parameters +| # | Name | Default | Type | Description | +|---|-----------|---------|--------|-----------------------------------------------------------------| +| 1 | separator | ' ' | string | Should the transformer split the string based on ' ' character? | + +## Example +```php +use Rubix\ML\Transformers\WordOrderRandomizer; +$transformer = new WordOrderRandomizer(); +``` + +## Additional Methods +This transformer does not have any additional methods. diff --git a/src/Transformers/WordOrderRandomizer.php b/src/Transformers/WordOrderRandomizer.php new file mode 100644 index 0000000..e58b710 --- /dev/null +++ b/src/Transformers/WordOrderRandomizer.php @@ -0,0 +1,85 @@ +separator = $separator; + } + + /** + * Return the data types that this transformer is compatible with. + * + * @internal + * + * @return list<\Rubix\ML\DataType> + */ + public function compatibility() : array + { + return DataType::all(); + } + + /** + * Transform the dataset in place. + * + * @param array $samples + */ + public function transform(array &$samples) : void + { + array_walk($samples, [$this, 'randomize']); + } + + /** + * Randomize the text in a sample. + * + * @param list $sample + */ + private function randomize(array &$sample) : void + { + foreach ($sample as &$value) { + if (is_string($value) && !empty($this->separator)) { + $value = explode($this->separator, $value); + shuffle($value); + $value = implode($this->separator, $value); + } + } + } + + /** + * Return the string representation of the object. + * + * @internal + * + * @return string + */ + public function __toString() : string + { + return 'Word Order Randomizer'; + } +} \ No newline at end of file diff --git a/tests/Transformers/WordOrderRandomizerTest.php b/tests/Transformers/WordOrderRandomizerTest.php new file mode 100644 index 0000000..454ae9d --- /dev/null +++ b/tests/Transformers/WordOrderRandomizerTest.php @@ -0,0 +1,61 @@ +dataset = Unlabeled::quick([ + ['Red dining chair.'], + ['Blue,cotton,pillow'], + ]); + + $this->transformer = new WordOrderRandomizer(); + } + + /** + * @test + */ + public function build() : void + { + $this->assertInstanceOf(WordOrderRandomizer::class, $this->transformer); + $this->assertInstanceOf(Transformer::class, $this->transformer); + } + + /** + * @test + */ + public function transform() : void + { + $this->dataset->apply($this->transformer); + + foreach (explode(' ', 'Red dining chair.') as $word) { + $this->assertTrue(str_contains($this->dataset->samples()[0][0], $word)); + } + + $this->assertEquals(['Blue,cotton,pillow'], $this->dataset->samples()[1]); + } +} \ No newline at end of file