Skip to content

Commit

Permalink
[PHP 8.4][Intl] Add grapheme_str_split
Browse files Browse the repository at this point in the history
Add a polyfill for the `grapheme_str_split` function added in PHP 8.4.

Requires PHP 7.3, because the polyfill is based on `\X` Regex, and it
only works properly on PCRE2, which
[only comes with PHP 7.3+](https://php.watch/versions/7.3/pcre2).

Further, there are some cases that the polyfill cannot split complex
characters (such as two consecutive country flag Emojis). This is now
fixed in [PCRE2Project/pcre2#410](PCRE2Project/pcre2#410).
However, this change will likely only make it to PHP 8.4.

References:
 - [RFC: Grapheme cluster for `str_split` function: `grapheme_str_split`](https://wiki.php.net/rfc/grapheme_str_split)
 - [PHP.Watch: PHP 8.4: New `grapheme_str_split` function](https://php.watch/versions/8.4/grapheme_str_split)
  • Loading branch information
Ayesh committed Jun 8, 2024
1 parent e85ab80 commit 3e8ced0
Show file tree
Hide file tree
Showing 12 changed files with 160 additions and 0 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ Polyfills are provided for:
- the `Date*Exception/Error` classes introduced in PHP 8.3;
- the `SQLite3Exception` class introduced in PHP 8.3;
- the `mb_ucfirst` and `mb_lcfirst` functions introduced in PHP 8.4;
- the `grapheme_str_split` function introduced in PHP 8.4 (requires PHP >= 7.3);

It is strongly recommended to upgrade your PHP version and/or install the missing
extensions whenever possible. This polyfill should be used only when there is no
Expand Down
33 changes: 33 additions & 0 deletions src/Intl/Grapheme/Grapheme.php
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
* - grapheme_strrpos - Find position (in grapheme units) of last occurrence of a string
* - grapheme_strstr - Returns part of haystack string from the first occurrence of needle to the end of haystack
* - grapheme_substr - Return part of a string
* - grapheme_str_split - Splits a string into an array of individual or chunks of graphemes.
*
* @author Nicolas Grekas <[email protected]>
*
Expand Down Expand Up @@ -191,6 +192,38 @@ public static function grapheme_strstr($s, $needle, $beforeNeedle = false)
return mb_strstr($s, $needle, $beforeNeedle, 'UTF-8');
}

public static function grapheme_str_split($s, $len = 1) {
if ($len < 0 || $len > 1073741823) {
if (80000 > \PHP_VERSION_ID) {
return false;
}

throw new \ValueError('grapheme_str_split(): Argument #2 ($length) must be greater than 0 and less than or equal to 1073741823.');
}

if ($s === '') {
return [];
}

preg_match_all('/\X/u', $s, $matches);

if (empty($matches[0])) {
return false;
}

if ($len === 1) {
return $matches[0];
}

$chunks = array_chunk($matches[0], $len);

array_walk($chunks, static function(&$value) {
$value = implode('', $value);
});

return $chunks;
}

private static function grapheme_position($s, $needle, $offset, $mode)
{
$needle = (string) $needle;
Expand Down
1 change: 1 addition & 0 deletions src/Intl/Grapheme/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ This component provides a partial, native PHP implementation of the
- [`grapheme_strstr`](https://php.net/grapheme_strstr): Returns part of haystack string from
the first occurrence of needle to the end of haystack
- [`grapheme_substr`](https://php.net/grapheme_substr): Return part of a string
- [`grapheme_str_split](https://php.net/grapheme_str_split): Splits a string into an array of individual or chunks of graphemes.

More information can be found in the
[main Polyfill README](https://github.com/symfony/polyfill/blob/main/README.md).
Expand Down
4 changes: 4 additions & 0 deletions src/Intl/Grapheme/bootstrap.php
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,7 @@ function grapheme_strstr($haystack, $needle, $beforeNeedle = false) { return p\G
if (!function_exists('grapheme_substr')) {
function grapheme_substr($string, $offset, $length = null) { return p\Grapheme::grapheme_substr($string, $offset, $length); }
}

if (\PHP_VERSION_ID >= 70300) {
require __DIR__.'/bootstrap73.php';
}
17 changes: 17 additions & 0 deletions src/Intl/Grapheme/bootstrap73.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
<?php

/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <[email protected]>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

use Symfony\Polyfill\Php84 as p;

if (!function_exists('grapheme_str_split') && function_exists('grapheme_substr')) {
function grapheme_str_split(string $string, int $length = 1) { return p\Php84::grapheme_str_split($string, $length); }
}

3 changes: 3 additions & 0 deletions src/Intl/Grapheme/bootstrap80.php
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,6 @@ function grapheme_strstr(?string $haystack, ?string $needle, ?bool $beforeNeedle
if (!function_exists('grapheme_substr')) {
function grapheme_substr(?string $string, ?int $offset, ?int $length = null): string|false { return p\Grapheme::grapheme_substr((string) $string, (int) $offset, $length); }
}
if (!function_exists('grapheme_str_split')) {
function grapheme_str_split(string $string, int $length = 1): array|false { return p\Grapheme::grapheme_str_split($string, $length); }
}
29 changes: 29 additions & 0 deletions src/Php84/Php84.php
Original file line number Diff line number Diff line change
Expand Up @@ -63,4 +63,33 @@ public static function mb_lcfirst(string $string, ?string $encoding = null): str

return $firstChar . mb_substr($string, 1, null, $encoding);
}

public static function grapheme_str_split(string $string, int $length)
{
if ($length < 0 || $length > 1073741823) {
throw new \ValueError('grapheme_str_split(): Argument #2 ($length) must be greater than 0 and less than or equal to 1073741823.');
}

if ($string === '') {
return [];
}

preg_match_all('/\X/u', $string, $matches);

if (empty($matches[0])) {
return false;
}

if ($length === 1) {
return $matches[0];
}

$chunks = array_chunk($matches[0], $length);

array_walk($chunks, static function(&$value) {
$value = implode('', $value);
});

return $chunks;
}
}
1 change: 1 addition & 0 deletions src/Php84/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ Symfony Polyfill / Php84
This component provides features added to PHP 8.4 core:

- [`mb_ucfirst` and `mb_lcfirst`](https://wiki.php.net/rfc/mb_ucfirst)
- [`grapheme_str_split`](https://wiki.php.net/rfc/grapheme_str_split)

More information can be found in the
[main Polyfill README](https://github.com/symfony/polyfill/blob/main/README.md).
Expand Down
4 changes: 4 additions & 0 deletions src/Php84/bootstrap.php
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,7 @@ function mb_ucfirst($string, ?string $encoding = null): string { return p\Php84:
if (!function_exists('mb_lcfirst')) {
function mb_lcfirst($string, ?string $encoding = null): string { return p\Php84::mb_lcfirst($string, $encoding); }
}

if (\PHP_VERSION_ID >= 70300) {
require __DIR__.'/bootstrap73.php';
}
21 changes: 21 additions & 0 deletions src/Php84/bootstrap73.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<?php

/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <[email protected]>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

use Symfony\Polyfill\Php84 as p;

if (\PHP_VERSION_ID >= 80400) {
return;
}

if (!function_exists('grapheme_str_split') && function_exists('grapheme_substr')) {
function grapheme_str_split(string $string, int $length = 1) { return p\Php84::grapheme_str_split($string, $length); }
}

26 changes: 26 additions & 0 deletions tests/Intl/Grapheme/GraphemeTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -207,4 +207,30 @@ public function testGraphemeStrstr()
$this->assertSame('국어', grapheme_strstr('한국어', ''));
$this->assertSame('ÉJÀ', grapheme_stristr('DÉJÀ', 'é'));
}

/**
* @dataProvider graphemeStrSplitDataProvider
* @requires PHP 7.3
*/
public function testGraphemeStrSplit(string $string, int $length, array $expectedValues) {
$this->assertSame($expectedValues, grapheme_str_split($string, $length));
}

public static function graphemeStrSplitDataProvider(): array {
$return = [
['', 1, []],
['PHP', 1, ['P', 'H', 'P']],
['你好', 1, ['', '']],
['අයේෂ්', 1, ['', 'යේ', 'ෂ්']],
['สวัสดี', 2, ['สวั', 'สดี']],
['土下座🙇‍♀を', 1, ["", "", "", "🙇‍♀", ""]],
];

// https://github.com/PCRE2Project/pcre2/issues/410
if (PCRE_VERSION_MAJOR > 10 && PCRE_VERSION_MAJOR >= 44) {
$return[] = ['土下座🙇‍♀を', 1, ["", "", "", "🙇‍♀", ""]];
}

return $return;
}
}
20 changes: 20 additions & 0 deletions tests/Php84/Php84Test.php
Original file line number Diff line number Diff line change
Expand Up @@ -68,4 +68,24 @@ public static function lcFirstDataProvider(): array {
["ß", "ß"],
];
}

/**
* @dataProvider graphemeStrSplitDataProvider
* @requires PHP 7.3
*/
public function testGraphemeStrSplit(string $string, int $length, array $expectedValues) {
$this->assertSame($expectedValues, grapheme_str_split($string, $length));
}

public static function graphemeStrSplitDataProvider(): array {
return [
['', 1, []],
['PHP', 1, ['P', 'H', 'P']],
['你好', 1, ['', '']],
['අයේෂ්', 1, ['', 'යේ', 'ෂ්']],
['สวัสดี', 2, ['สวั', 'สดี']],
['土下座🙇‍♀を', 1, ["", "", "", "🙇‍♀", ""]],
// ['👭🏻👰🏿‍♂️', 2, ['👭🏻', '👰🏿‍♂️']], // https://github.com/PCRE2Project/pcre2/issues/410
];
}
}

0 comments on commit 3e8ced0

Please sign in to comment.