@@ -19,45 +19,74 @@ composer require webd/language
1919## Usage
2020
2121``` php
22+ // ------------ n-gram string similarity
23+
24+ use webd\language\StringSimilarity\DiceCoefficient;
25+ use webd\language\StringSimilarity\JaccardSimilarity;
26+
27+
28+ $dice = new DiceCoefficient(2);
29+ $jaccard = new JaccardSimilarity(2);
30+
31+ $a = "context";
32+ $b = "contact";
33+
34+ // 0.5
35+ echo $dice->similarity($a, $b) . PHP_EOL;
36+
37+ // context : ["co", "on", "nt", "te", "ex", "xt"]
38+ // contact : ["co", "on", "nt", "ta", "ac", "ct"]
39+ // jacccard similarity : 3 / 9
40+ // 0.33333
41+ echo $jaccard->similarity($a, $b) . PHP_EOL;
42+
43+ // ------------ string distance
44+
2245use webd\language\StringDistance;
2346
2447$string1 = "You won 10000$";
2548$string2 = "You won 15500$";
2649
2750// 2
28- echo "Edit distance : " . StringDistance::editDistance($string1, $string2) . "\n" ;
51+ echo "Edit distance : " . StringDistance::editDistance($string1, $string2) . PHP_EOL ;
2952
3053// 2
31- echo "Levenshtein : " . StringDistance::levenshtein($string1, $string2) . "\n";
54+ echo "Levenshtein : " . StringDistance::levenshtein($string1, $string2) . PHP_EOL;
55+
56+
57+ $lcs = new \webd\language\LCS($string1, $string2);
58+ // You won 100$
59+ echo $lcs->value() . PHP_EOL;
60+
61+ // 12
62+ echo $lcs->length() . PHP_EOL;
63+
64+ // 4
65+ echo $lcs->distance() . PHP_EOL;
66+
67+ // -------------- jaro-winkler string similarity
3268
3369// 0.96428571428571
34- echo "Jaro-Winkler : " . StringDistance::jaroWinkler($string1, $string2) . "\n" ;
70+ echo "Jaro-Winkler : " . StringDistance::jaroWinkler($string1, $string2) . PHP_EOL ;
3571
3672// 0.98809523809524
37- echo "Jaro-Winkler (prefix scale = 0.2) : " . StringDistance::jaroWinkler($string1, $string2, 0.2) . "\n";
73+ echo "Jaro-Winkler (prefix scale = 0.2) : " . StringDistance::jaroWinkler($string1, $string2, 0.2) . PHP_EOL;
74+
75+ // -------------- stemming
3876
3977use webd\language\PorterStemmer;
78+
4079// analyz
41- echo "analyzing => " . PorterStemmer::stem("analyzing") . "\n" ;
80+ echo "analyzing => " . PorterStemmer::stem("analyzing") . PHP_EOL ;
4281
4382// abandon
44- echo "abandoned => " . PorterStemmer::stem("abandoned") . "\n" ;
83+ echo "abandoned => " . PorterStemmer::stem("abandoned") . PHP_EOL ;
4584
4685// inclin
47- echo "inclination => " . PorterStemmer::stem("inclination") . "\n";
48-
49- $lcs = new \webd\language\LCS($string1, $string2);
50- // You won 100$
51- echo $lcs->value() . "\n";
52-
53- // 12
54- echo $lcs->length() . "\n";
55-
56- // 4
57- echo $lcs->distance() . "\n";
86+ echo "inclination => " . PorterStemmer::stem("inclination") . PHP_EOL;
5887
59- // SpamSum, aka ssdeep, aka Context-Triggered Piecewize Hashing (CTPH):
88+ // ------------- SpamSum, aka ssdeep, aka Context-Triggered Piecewize Hashing (CTPH)
6089$s = new \webd\language\SpamSum;
6190// 192:x+cMdRiWqk2YODjCoG4OU88/ffcQ+lsCYDIlp6+TF244htoJFUjw:krovCLA9byp6+52jhtnjw
62- echo $s->HashString(file_get_contents(__DIR__ . "/SpamSum.php")) . "\n" ;
91+ echo $s->HashString(file_get_contents(__DIR__ . "/SpamSum.php")) . PHP_EOL ;
6392```
0 commit comments