From f15fca7acb650fbf2d98cf1b177125b26509b091 Mon Sep 17 00:00:00 2001 From: David Thalmann Date: Thu, 29 Jan 2015 14:20:28 +0100 Subject: [PATCH 1/2] #21 Added support for embedded directions --- String.php | 127 ++++++++++++++++++++++++++++++++++++------- Test/Unit/String.php | 66 ++++++++++++++++++++++ 2 files changed, 174 insertions(+), 19 deletions(-) diff --git a/String.php b/String.php index be80ad0..c7cf5a2 100644 --- a/String.php +++ b/String.php @@ -68,6 +68,13 @@ class String implements \ArrayAccess, \Countable, \IteratorAggregate { */ const RTL = 1; + /** + * Bi-Directional. + * + * @const int + */ + const BIDI = 2; + /** * ZERO WIDTH NON-BREAKING SPACE (ZWNPBSP, aka byte-order mark, BOM). * @@ -89,6 +96,13 @@ class String implements \ArrayAccess, \Countable, \IteratorAggregate { */ const RLM = 0x200f; + /** + * ARABIC LETTER MARK. + * + * @const int + */ + const ARM = 0x061c; + /** * LEFT-TO-RIGHT EMBEDDING. * @@ -110,6 +124,27 @@ class String implements \ArrayAccess, \Countable, \IteratorAggregate { */ const PDF = 0x202c; + /** + * LEFT-TO-RIGHT ISOLATE. + * + * @const int + */ + const LRI = 0x2066; + + /** + * RIGHT-TO-LEFT ISOLATE. + * + * @const int + */ + const RLI = 0x2067; + + /** + * POP DIRECTIONAL ISOLATE. + * + * @const int + */ + const PDI = 0x2069; + /** * LEFT-TO-RIGHT OVERRIDE. * @@ -746,7 +781,7 @@ public function getWidth ( ) { /** * Get direction of the current string. - * Please, see the self::LTR and self::RTL constants. + * Please, see the self::LTR, self::RTL and self::BIDI constants. * It does not yet support embedding directions. * * @access public @@ -755,32 +790,59 @@ public function getWidth ( ) { public function getDirection ( ) { if(null === $this->_direction) { - - if(null === $this->_string) - $this->_direction = static::LTR; - else - $this->_direction = static::getCharDirection( - mb_substr($this->_string, 0, 1) - ); + // Default + $this->_direction = static::LTR; + + // Check for LRM or RLM/ARM + $hasLRM = 0 !== preg_match('#\x{200e}#u', $this->_string); + $hasRLMOrARM = 0 !== preg_match('#\x{200f}|\x{061c}#u', $this->_string); + + if($hasLRM || $hasRLMOrARM) { + if($hasLRM && $hasRLMOrARM) + $this->_direction = static::BIDI; + elseif($hasLRM) + $this->_direction = static::LTR; + else + $this->_direction = static::RTL; + } + else { + $this->_direction = static::getCharDirection(mb_substr($this->_string, 0, 1)); + + // Check for RLE, RLO or RLI in LTR context -> BIDI + if( static::LTR === $this->_direction + && 0 !== preg_match('#\x{202b}|\x{202e}|\x{2067}#u', $this->_string)) + $this->_direction = static::BIDI; + // Check for LRE, LRO or LRI in RTL context -> BIDI + elseif(static::RTL === $this->_direction + && 0 !== preg_match('#\x{202a}|\x{202d}|\x{2066}#u', $this->_string)) + $this->_direction = static::BIDI; + // Check every other character + else { + foreach ($this as $char) { + if ($this->_direction !== $this->getCharDirection($char)) { + $this->_direction = static::BIDI; + break; + } + } + } + } } return $this->_direction; } /** - * Get character of a specific character. - * Please, see the self::LTR and self::RTL constants. + * Returns true if a specific character is RTL. * * @access public * @param string $char Character. - * @return int + * @return bool */ - public static function getCharDirection ( $char ) { - + public static function isRTL ( $char ) { $c = static::toCode($char); if(!(0x5be <= $c && 0x10b7f >= $c)) - return static::LTR; + return false; if(0x85e >= $c) { @@ -814,10 +876,14 @@ public static function getCharDirection ( $char ) { || (0x830 <= $c && 0x83e >= $c) || (0x840 <= $c && 0x858 >= $c) || 0x85e === $c) - return static::RTL; + return true; } - elseif(0x200f === $c) - return static::RTL; + elseif( static::RLM === $c + || static::ARM === $c + || static::RLO === $c + || static::RLE === $c + || static::RLI === $c) + return true; elseif(0xfb1d <= $c) { if( 0xfb1d === $c @@ -855,10 +921,33 @@ public static function getCharDirection ( $char ) { || (0x10b40 <= $c && 0x10b55 >= $c) || (0x10b58 <= $c && 0x10b72 >= $c) || (0x10b78 <= $c && 0x10b7f >= $c)) - return static::RTL; + return true; } - return static::LTR; + return false; + } + + /** + * Returns true if a specific character is LTR. + * + * @access public + * @param string $char Character. + * @return bool + */ + public static function isLTR ( $char ) { + return !self::isRTL($char); + } + + /** + * Get direction of a specific character. + * Please, see the self::LTR and self::RTL constants. + * + * @access public + * @param string $char Character. + * @return int + */ + public static function getCharDirection ( $char ) { + return self::isRTL($char) ? static::RTL : static::LTR; } /** diff --git a/Test/Unit/String.php b/Test/Unit/String.php index 32d9001..accfd5e 100644 --- a/Test/Unit/String.php +++ b/Test/Unit/String.php @@ -744,6 +744,72 @@ public function case_get_char_direction ( ) { ->isEqualTo(LUT::RTL); } + public function case_get_direction ( ) { + + $LRM = LUT::fromCode(LUT::LRM); + $RLM = LUT::fromCode(LUT::RLM); + $ARM = LUT::fromCode(LUT::ARM); + $RLE = LUT::fromCode(LUT::RLE); + $LRE = LUT::fromCode(LUT::LRE); + $PDF = LUT::fromCode(LUT::PDF); + + $this + ->given($string = new LUT('Left')) + ->when($result = $string->getDirection()) + ->integer($result) + ->isEqualto(LUT::LTR) + + ->given($string = new LUT('اليمين')) + ->when($result = $string->getDirection()) + ->integer($result) + ->isEqualto(LUT::RTL) + + ->given($string = new LUT('Left & اليمين')) + ->when($result = $string->getDirection()) + ->integer($result) + ->isEqualto(LUT::BIDI) + + ->given($string = new LUT($LRM . 'اليمين')) + ->when($result = $string->getDirection()) + ->integer($result) + ->isEqualto(LUT::LTR) + + ->given($string = new LUT($RLM .'Left')) + ->when($result = $string->getDirection()) + ->integer($result) + ->isEqualto(LUT::RTL) + + ->given($string = new LUT($ARM .'Left')) + ->when($result = $string->getDirection()) + ->integer($result) + ->isEqualto(LUT::RTL) + + ->given($string = new LUT($LRM .'Both?' . $RLM)) + ->when($result = $string->getDirection()) + ->integer($result) + ->isEqualto(LUT::BIDI) + + ->given($string = new LUT('Left' . $RLE .'اليمين')) + ->when($result = $string->getDirection()) + ->integer($result) + ->isEqualto(LUT::BIDI) + + ->given($string = new LUT('اليمين' . $LRE .'Left')) + ->when($result = $string->getDirection()) + ->integer($result) + ->isEqualto(LUT::BIDI) + + ->given($string = new LUT('اليمين' . $RLE .'اليمين')) + ->when($result = $string->getDirection()) + ->integer($result) + ->isEqualto(LUT::RTL) + + ->given($string = new LUT('Left' . $LRE .'Left')) + ->when($result = $string->getDirection()) + ->integer($result) + ->isEqualto(LUT::LTR); + } + public function case_get_char_width ( ) { $this From 8ed3cf019a31cff3f8e1e82983a3016a81f791e0 Mon Sep 17 00:00:00 2001 From: David Thalmann Date: Thu, 29 Jan 2015 14:58:17 +0100 Subject: [PATCH 2/2] #21 Changed codepoints in regex to constants. --- String.php | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/String.php b/String.php index c7cf5a2..f25d339 100644 --- a/String.php +++ b/String.php @@ -794,8 +794,9 @@ public function getDirection ( ) { $this->_direction = static::LTR; // Check for LRM or RLM/ARM - $hasLRM = 0 !== preg_match('#\x{200e}#u', $this->_string); - $hasRLMOrARM = 0 !== preg_match('#\x{200f}|\x{061c}#u', $this->_string); + $hasLRM = 0 !== preg_match('#\x{' . dechex(self::LRM) . '}#u', $this->_string); + $hasRLMOrARM = 0 !== preg_match('#\x{' . dechex(self::RLM) . '}|\x{' + . dechex(self::ARM) . '}#u', $this->_string); if($hasLRM || $hasRLMOrARM) { if($hasLRM && $hasRLMOrARM) @@ -810,11 +811,12 @@ public function getDirection ( ) { // Check for RLE, RLO or RLI in LTR context -> BIDI if( static::LTR === $this->_direction - && 0 !== preg_match('#\x{202b}|\x{202e}|\x{2067}#u', $this->_string)) + && 0 !== preg_match('#\x{' . dechex(self::RLE) . '}|\x{' + . dechex(self::RLO) . '}|\x{' . dechex(self::RLI) . '}#u', $this->_string)) $this->_direction = static::BIDI; // Check for LRE, LRO or LRI in RTL context -> BIDI elseif(static::RTL === $this->_direction - && 0 !== preg_match('#\x{202a}|\x{202d}|\x{2066}#u', $this->_string)) + && 0 !== preg_match('#\x{' . dechex(self::LRE) . '}|\x{' . dechex(self::LRO) . '}|\x{' . dechex(self::LRI) . '}#u', $this->_string)) $this->_direction = static::BIDI; // Check every other character else {