Text.php 40 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188
  1. <?php
  2. /**
  3. * CakePHP(tm) : Rapid Development Framework (https://cakephp.org)
  4. * Copyright (c) Cake Software Foundation, Inc. (https://cakefoundation.org)
  5. *
  6. * Licensed under The MIT License
  7. * For full copyright and license information, please see the LICENSE.txt
  8. * Redistributions of files must retain the above copyright notice.
  9. *
  10. * @copyright Copyright (c) Cake Software Foundation, Inc. (https://cakefoundation.org)
  11. * @link https://cakephp.org CakePHP(tm) Project
  12. * @since 1.2.0
  13. * @license https://opensource.org/licenses/mit-license.php MIT License
  14. */
  15. namespace Cake\Utility;
  16. use InvalidArgumentException;
  17. /**
  18. * Text handling methods.
  19. */
  20. class Text
  21. {
  22. /**
  23. * Default transliterator.
  24. *
  25. * @var \Transliterator Transliterator instance.
  26. */
  27. protected static $_defaultTransliterator;
  28. /**
  29. * Default transliterator id string.
  30. *
  31. * @var string $_defaultTransliteratorId Transliterator identifier string.
  32. */
  33. protected static $_defaultTransliteratorId = 'Any-Latin; Latin-ASCII; [\u0080-\u7fff] remove';
  34. /**
  35. * Default html tags who must not be count for truncate text.
  36. *
  37. * @var array
  38. */
  39. protected static $_defaultHtmlNoCount = [
  40. 'style',
  41. 'script'
  42. ];
  43. /**
  44. * Generate a random UUID version 4
  45. *
  46. * Warning: This method should not be used as a random seed for any cryptographic operations.
  47. * Instead you should use the openssl or mcrypt extensions.
  48. *
  49. * It should also not be used to create identifiers that have security implications, such as
  50. * 'unguessable' URL identifiers. Instead you should use `Security::randomBytes()` for that.
  51. *
  52. * @see https://www.ietf.org/rfc/rfc4122.txt
  53. * @return string RFC 4122 UUID
  54. * @copyright Matt Farina MIT License https://github.com/lootils/uuid/blob/master/LICENSE
  55. */
  56. public static function uuid()
  57. {
  58. $random = function_exists('random_int') ? 'random_int' : 'mt_rand';
  59. return sprintf(
  60. '%04x%04x-%04x-%04x-%04x-%04x%04x%04x',
  61. // 32 bits for "time_low"
  62. $random(0, 65535),
  63. $random(0, 65535),
  64. // 16 bits for "time_mid"
  65. $random(0, 65535),
  66. // 12 bits before the 0100 of (version) 4 for "time_hi_and_version"
  67. $random(0, 4095) | 0x4000,
  68. // 16 bits, 8 bits for "clk_seq_hi_res",
  69. // 8 bits for "clk_seq_low",
  70. // two most significant bits holds zero and one for variant DCE1.1
  71. $random(0, 0x3fff) | 0x8000,
  72. // 48 bits for "node"
  73. $random(0, 65535),
  74. $random(0, 65535),
  75. $random(0, 65535)
  76. );
  77. }
  78. /**
  79. * Tokenizes a string using $separator, ignoring any instance of $separator that appears between
  80. * $leftBound and $rightBound.
  81. *
  82. * @param string $data The data to tokenize.
  83. * @param string $separator The token to split the data on.
  84. * @param string $leftBound The left boundary to ignore separators in.
  85. * @param string $rightBound The right boundary to ignore separators in.
  86. * @return array|string Array of tokens in $data or original input if empty.
  87. */
  88. public static function tokenize($data, $separator = ',', $leftBound = '(', $rightBound = ')')
  89. {
  90. if (empty($data)) {
  91. return [];
  92. }
  93. $depth = 0;
  94. $offset = 0;
  95. $buffer = '';
  96. $results = [];
  97. $length = mb_strlen($data);
  98. $open = false;
  99. while ($offset <= $length) {
  100. $tmpOffset = -1;
  101. $offsets = [
  102. mb_strpos($data, $separator, $offset),
  103. mb_strpos($data, $leftBound, $offset),
  104. mb_strpos($data, $rightBound, $offset)
  105. ];
  106. for ($i = 0; $i < 3; $i++) {
  107. if ($offsets[$i] !== false && ($offsets[$i] < $tmpOffset || $tmpOffset == -1)) {
  108. $tmpOffset = $offsets[$i];
  109. }
  110. }
  111. if ($tmpOffset !== -1) {
  112. $buffer .= mb_substr($data, $offset, $tmpOffset - $offset);
  113. $char = mb_substr($data, $tmpOffset, 1);
  114. if (!$depth && $char === $separator) {
  115. $results[] = $buffer;
  116. $buffer = '';
  117. } else {
  118. $buffer .= $char;
  119. }
  120. if ($leftBound !== $rightBound) {
  121. if ($char === $leftBound) {
  122. $depth++;
  123. }
  124. if ($char === $rightBound) {
  125. $depth--;
  126. }
  127. } else {
  128. if ($char === $leftBound) {
  129. if (!$open) {
  130. $depth++;
  131. $open = true;
  132. } else {
  133. $depth--;
  134. $open = false;
  135. }
  136. }
  137. }
  138. $tmpOffset += 1;
  139. $offset = $tmpOffset;
  140. } else {
  141. $results[] = $buffer . mb_substr($data, $offset);
  142. $offset = $length + 1;
  143. }
  144. }
  145. if (empty($results) && !empty($buffer)) {
  146. $results[] = $buffer;
  147. }
  148. if (!empty($results)) {
  149. return array_map('trim', $results);
  150. }
  151. return [];
  152. }
  153. /**
  154. * Replaces variable placeholders inside a $str with any given $data. Each key in the $data array
  155. * corresponds to a variable placeholder name in $str.
  156. * Example:
  157. * ```
  158. * Text::insert(':name is :age years old.', ['name' => 'Bob', 'age' => '65']);
  159. * ```
  160. * Returns: Bob is 65 years old.
  161. *
  162. * Available $options are:
  163. *
  164. * - before: The character or string in front of the name of the variable placeholder (Defaults to `:`)
  165. * - after: The character or string after the name of the variable placeholder (Defaults to null)
  166. * - escape: The character or string used to escape the before character / string (Defaults to `\`)
  167. * - format: A regex to use for matching variable placeholders. Default is: `/(?<!\\)\:%s/`
  168. * (Overwrites before, after, breaks escape / clean)
  169. * - clean: A boolean or array with instructions for Text::cleanInsert
  170. *
  171. * @param string $str A string containing variable placeholders
  172. * @param array $data A key => val array where each key stands for a placeholder variable name
  173. * to be replaced with val
  174. * @param array $options An array of options, see description above
  175. * @return string
  176. */
  177. public static function insert($str, $data, array $options = [])
  178. {
  179. $defaults = [
  180. 'before' => ':', 'after' => null, 'escape' => '\\', 'format' => null, 'clean' => false
  181. ];
  182. $options += $defaults;
  183. $format = $options['format'];
  184. $data = (array)$data;
  185. if (empty($data)) {
  186. return $options['clean'] ? static::cleanInsert($str, $options) : $str;
  187. }
  188. if (!isset($format)) {
  189. $format = sprintf(
  190. '/(?<!%s)%s%%s%s/',
  191. preg_quote($options['escape'], '/'),
  192. str_replace('%', '%%', preg_quote($options['before'], '/')),
  193. str_replace('%', '%%', preg_quote($options['after'], '/'))
  194. );
  195. }
  196. if (strpos($str, '?') !== false && is_numeric(key($data))) {
  197. $offset = 0;
  198. while (($pos = strpos($str, '?', $offset)) !== false) {
  199. $val = array_shift($data);
  200. $offset = $pos + strlen($val);
  201. $str = substr_replace($str, $val, $pos, 1);
  202. }
  203. return $options['clean'] ? static::cleanInsert($str, $options) : $str;
  204. }
  205. $dataKeys = array_keys($data);
  206. $hashKeys = array_map('crc32', $dataKeys);
  207. $tempData = array_combine($dataKeys, $hashKeys);
  208. krsort($tempData);
  209. foreach ($tempData as $key => $hashVal) {
  210. $key = sprintf($format, preg_quote($key, '/'));
  211. $str = preg_replace($key, $hashVal, $str);
  212. }
  213. $dataReplacements = array_combine($hashKeys, array_values($data));
  214. foreach ($dataReplacements as $tmpHash => $tmpValue) {
  215. $tmpValue = is_array($tmpValue) ? '' : $tmpValue;
  216. $str = str_replace($tmpHash, $tmpValue, $str);
  217. }
  218. if (!isset($options['format']) && isset($options['before'])) {
  219. $str = str_replace($options['escape'] . $options['before'], $options['before'], $str);
  220. }
  221. return $options['clean'] ? static::cleanInsert($str, $options) : $str;
  222. }
  223. /**
  224. * Cleans up a Text::insert() formatted string with given $options depending on the 'clean' key in
  225. * $options. The default method used is text but html is also available. The goal of this function
  226. * is to replace all whitespace and unneeded markup around placeholders that did not get replaced
  227. * by Text::insert().
  228. *
  229. * @param string $str String to clean.
  230. * @param array $options Options list.
  231. * @return string
  232. * @see \Cake\Utility\Text::insert()
  233. */
  234. public static function cleanInsert($str, array $options)
  235. {
  236. $clean = $options['clean'];
  237. if (!$clean) {
  238. return $str;
  239. }
  240. if ($clean === true) {
  241. $clean = ['method' => 'text'];
  242. }
  243. if (!is_array($clean)) {
  244. $clean = ['method' => $options['clean']];
  245. }
  246. switch ($clean['method']) {
  247. case 'html':
  248. $clean += [
  249. 'word' => '[\w,.]+',
  250. 'andText' => true,
  251. 'replacement' => '',
  252. ];
  253. $kleenex = sprintf(
  254. '/[\s]*[a-z]+=(")(%s%s%s[\s]*)+\\1/i',
  255. preg_quote($options['before'], '/'),
  256. $clean['word'],
  257. preg_quote($options['after'], '/')
  258. );
  259. $str = preg_replace($kleenex, $clean['replacement'], $str);
  260. if ($clean['andText']) {
  261. $options['clean'] = ['method' => 'text'];
  262. $str = static::cleanInsert($str, $options);
  263. }
  264. break;
  265. case 'text':
  266. $clean += [
  267. 'word' => '[\w,.]+',
  268. 'gap' => '[\s]*(?:(?:and|or)[\s]*)?',
  269. 'replacement' => '',
  270. ];
  271. $kleenex = sprintf(
  272. '/(%s%s%s%s|%s%s%s%s)/',
  273. preg_quote($options['before'], '/'),
  274. $clean['word'],
  275. preg_quote($options['after'], '/'),
  276. $clean['gap'],
  277. $clean['gap'],
  278. preg_quote($options['before'], '/'),
  279. $clean['word'],
  280. preg_quote($options['after'], '/')
  281. );
  282. $str = preg_replace($kleenex, $clean['replacement'], $str);
  283. break;
  284. }
  285. return $str;
  286. }
  287. /**
  288. * Wraps text to a specific width, can optionally wrap at word breaks.
  289. *
  290. * ### Options
  291. *
  292. * - `width` The width to wrap to. Defaults to 72.
  293. * - `wordWrap` Only wrap on words breaks (spaces) Defaults to true.
  294. * - `indent` String to indent with. Defaults to null.
  295. * - `indentAt` 0 based index to start indenting at. Defaults to 0.
  296. *
  297. * @param string $text The text to format.
  298. * @param array|int $options Array of options to use, or an integer to wrap the text to.
  299. * @return string Formatted text.
  300. */
  301. public static function wrap($text, $options = [])
  302. {
  303. if (is_numeric($options)) {
  304. $options = ['width' => $options];
  305. }
  306. $options += ['width' => 72, 'wordWrap' => true, 'indent' => null, 'indentAt' => 0];
  307. if ($options['wordWrap']) {
  308. $wrapped = self::wordWrap($text, $options['width'], "\n");
  309. } else {
  310. $wrapped = trim(chunk_split($text, $options['width'] - 1, "\n"));
  311. }
  312. if (!empty($options['indent'])) {
  313. $chunks = explode("\n", $wrapped);
  314. for ($i = $options['indentAt'], $len = count($chunks); $i < $len; $i++) {
  315. $chunks[$i] = $options['indent'] . $chunks[$i];
  316. }
  317. $wrapped = implode("\n", $chunks);
  318. }
  319. return $wrapped;
  320. }
  321. /**
  322. * Wraps a complete block of text to a specific width, can optionally wrap
  323. * at word breaks.
  324. *
  325. * ### Options
  326. *
  327. * - `width` The width to wrap to. Defaults to 72.
  328. * - `wordWrap` Only wrap on words breaks (spaces) Defaults to true.
  329. * - `indent` String to indent with. Defaults to null.
  330. * - `indentAt` 0 based index to start indenting at. Defaults to 0.
  331. *
  332. * @param string $text The text to format.
  333. * @param array|int $options Array of options to use, or an integer to wrap the text to.
  334. * @return string Formatted text.
  335. */
  336. public static function wrapBlock($text, $options = [])
  337. {
  338. if (is_numeric($options)) {
  339. $options = ['width' => $options];
  340. }
  341. $options += ['width' => 72, 'wordWrap' => true, 'indent' => null, 'indentAt' => 0];
  342. if (!empty($options['indentAt']) && $options['indentAt'] === 0) {
  343. $indentLength = !empty($options['indent']) ? strlen($options['indent']) : 0;
  344. $options['width'] -= $indentLength;
  345. return self::wrap($text, $options);
  346. }
  347. $wrapped = self::wrap($text, $options);
  348. if (!empty($options['indent'])) {
  349. $indentationLength = mb_strlen($options['indent']);
  350. $chunks = explode("\n", $wrapped);
  351. $count = count($chunks);
  352. if ($count < 2) {
  353. return $wrapped;
  354. }
  355. $toRewrap = '';
  356. for ($i = $options['indentAt']; $i < $count; $i++) {
  357. $toRewrap .= mb_substr($chunks[$i], $indentationLength) . ' ';
  358. unset($chunks[$i]);
  359. }
  360. $options['width'] -= $indentationLength;
  361. $options['indentAt'] = 0;
  362. $rewrapped = self::wrap($toRewrap, $options);
  363. $newChunks = explode("\n", $rewrapped);
  364. $chunks = array_merge($chunks, $newChunks);
  365. $wrapped = implode("\n", $chunks);
  366. }
  367. return $wrapped;
  368. }
  369. /**
  370. * Unicode and newline aware version of wordwrap.
  371. *
  372. * @param string $text The text to format.
  373. * @param int $width The width to wrap to. Defaults to 72.
  374. * @param string $break The line is broken using the optional break parameter. Defaults to '\n'.
  375. * @param bool $cut If the cut is set to true, the string is always wrapped at the specified width.
  376. * @return string Formatted text.
  377. */
  378. public static function wordWrap($text, $width = 72, $break = "\n", $cut = false)
  379. {
  380. $paragraphs = explode($break, $text);
  381. foreach ($paragraphs as &$paragraph) {
  382. $paragraph = static::_wordWrap($paragraph, $width, $break, $cut);
  383. }
  384. return implode($break, $paragraphs);
  385. }
  386. /**
  387. * Unicode aware version of wordwrap as helper method.
  388. *
  389. * @param string $text The text to format.
  390. * @param int $width The width to wrap to. Defaults to 72.
  391. * @param string $break The line is broken using the optional break parameter. Defaults to '\n'.
  392. * @param bool $cut If the cut is set to true, the string is always wrapped at the specified width.
  393. * @return string Formatted text.
  394. */
  395. protected static function _wordWrap($text, $width = 72, $break = "\n", $cut = false)
  396. {
  397. if ($cut) {
  398. $parts = [];
  399. while (mb_strlen($text) > 0) {
  400. $part = mb_substr($text, 0, $width);
  401. $parts[] = trim($part);
  402. $text = trim(mb_substr($text, mb_strlen($part)));
  403. }
  404. return implode($break, $parts);
  405. }
  406. $parts = [];
  407. while (mb_strlen($text) > 0) {
  408. if ($width >= mb_strlen($text)) {
  409. $parts[] = trim($text);
  410. break;
  411. }
  412. $part = mb_substr($text, 0, $width);
  413. $nextChar = mb_substr($text, $width, 1);
  414. if ($nextChar !== ' ') {
  415. $breakAt = mb_strrpos($part, ' ');
  416. if ($breakAt === false) {
  417. $breakAt = mb_strpos($text, ' ', $width);
  418. }
  419. if ($breakAt === false) {
  420. $parts[] = trim($text);
  421. break;
  422. }
  423. $part = mb_substr($text, 0, $breakAt);
  424. }
  425. $part = trim($part);
  426. $parts[] = $part;
  427. $text = trim(mb_substr($text, mb_strlen($part)));
  428. }
  429. return implode($break, $parts);
  430. }
  431. /**
  432. * Highlights a given phrase in a text. You can specify any expression in highlighter that
  433. * may include the \1 expression to include the $phrase found.
  434. *
  435. * ### Options:
  436. *
  437. * - `format` The piece of HTML with that the phrase will be highlighted
  438. * - `html` If true, will ignore any HTML tags, ensuring that only the correct text is highlighted
  439. * - `regex` A custom regex rule that is used to match words, default is '|$tag|iu'
  440. * - `limit` A limit, optional, defaults to -1 (none)
  441. *
  442. * @param string $text Text to search the phrase in.
  443. * @param string|array $phrase The phrase or phrases that will be searched.
  444. * @param array $options An array of HTML attributes and options.
  445. * @return string The highlighted text
  446. * @link https://book.cakephp.org/3.0/en/core-libraries/text.html#highlighting-substrings
  447. */
  448. public static function highlight($text, $phrase, array $options = [])
  449. {
  450. if (empty($phrase)) {
  451. return $text;
  452. }
  453. $defaults = [
  454. 'format' => '<span class="highlight">\1</span>',
  455. 'html' => false,
  456. 'regex' => '|%s|iu',
  457. 'limit' => -1,
  458. ];
  459. $options += $defaults;
  460. $html = $format = $limit = null;
  461. /**
  462. * @var bool $html
  463. * @var string|array $format
  464. * @var int $limit
  465. */
  466. extract($options);
  467. if (is_array($phrase)) {
  468. $replace = [];
  469. $with = [];
  470. foreach ($phrase as $key => $segment) {
  471. $segment = '(' . preg_quote($segment, '|') . ')';
  472. if ($html) {
  473. $segment = "(?![^<]+>)$segment(?![^<]+>)";
  474. }
  475. $with[] = is_array($format) ? $format[$key] : $format;
  476. $replace[] = sprintf($options['regex'], $segment);
  477. }
  478. return preg_replace($replace, $with, $text, $limit);
  479. }
  480. $phrase = '(' . preg_quote($phrase, '|') . ')';
  481. if ($html) {
  482. $phrase = "(?![^<]+>)$phrase(?![^<]+>)";
  483. }
  484. return preg_replace(sprintf($options['regex'], $phrase), $format, $text, $limit);
  485. }
  486. /**
  487. * Strips given text of all links (<a href=....).
  488. *
  489. * *Warning* This method is not an robust solution in preventing XSS
  490. * or malicious HTML.
  491. *
  492. * @param string $text Text
  493. * @return string The text without links
  494. * @deprecated 3.2.12 This method will be removed in 4.0.0
  495. */
  496. public static function stripLinks($text)
  497. {
  498. deprecationWarning('This method will be removed in 4.0.0.');
  499. do {
  500. $text = preg_replace('#</?a([/\s][^>]*)?(>|$)#i', '', $text, -1, $count);
  501. } while ($count);
  502. return $text;
  503. }
  504. /**
  505. * Truncates text starting from the end.
  506. *
  507. * Cuts a string to the length of $length and replaces the first characters
  508. * with the ellipsis if the text is longer than length.
  509. *
  510. * ### Options:
  511. *
  512. * - `ellipsis` Will be used as beginning and prepended to the trimmed string
  513. * - `exact` If false, $text will not be cut mid-word
  514. *
  515. * @param string $text String to truncate.
  516. * @param int $length Length of returned string, including ellipsis.
  517. * @param array $options An array of options.
  518. * @return string Trimmed string.
  519. */
  520. public static function tail($text, $length = 100, array $options = [])
  521. {
  522. $default = [
  523. 'ellipsis' => '...', 'exact' => true
  524. ];
  525. $options += $default;
  526. $exact = $ellipsis = null;
  527. /**
  528. * @var string $ellipsis
  529. * @var bool $exact
  530. */
  531. extract($options);
  532. if (mb_strlen($text) <= $length) {
  533. return $text;
  534. }
  535. $truncate = mb_substr($text, mb_strlen($text) - $length + mb_strlen($ellipsis));
  536. if (!$exact) {
  537. $spacepos = mb_strpos($truncate, ' ');
  538. $truncate = $spacepos === false ? '' : trim(mb_substr($truncate, $spacepos));
  539. }
  540. return $ellipsis . $truncate;
  541. }
  542. /**
  543. * Truncates text.
  544. *
  545. * Cuts a string to the length of $length and replaces the last characters
  546. * with the ellipsis if the text is longer than length.
  547. *
  548. * ### Options:
  549. *
  550. * - `ellipsis` Will be used as ending and appended to the trimmed string
  551. * - `exact` If false, $text will not be cut mid-word
  552. * - `html` If true, HTML tags would be handled correctly
  553. * - `trimWidth` If true, $text will be truncated with the width
  554. *
  555. * @param string $text String to truncate.
  556. * @param int $length Length of returned string, including ellipsis.
  557. * @param array $options An array of HTML attributes and options.
  558. * @return string Trimmed string.
  559. * @link https://book.cakephp.org/3.0/en/core-libraries/text.html#truncating-text
  560. */
  561. public static function truncate($text, $length = 100, array $options = [])
  562. {
  563. $default = [
  564. 'ellipsis' => '...', 'exact' => true, 'html' => false, 'trimWidth' => false,
  565. ];
  566. if (!empty($options['html']) && strtolower(mb_internal_encoding()) === 'utf-8') {
  567. $default['ellipsis'] = "\xe2\x80\xa6";
  568. }
  569. $options += $default;
  570. $prefix = '';
  571. $suffix = $options['ellipsis'];
  572. if ($options['html']) {
  573. $ellipsisLength = self::_strlen(strip_tags($options['ellipsis']), $options);
  574. $truncateLength = 0;
  575. $totalLength = 0;
  576. $openTags = [];
  577. $truncate = '';
  578. preg_match_all('/(<\/?([\w+]+)[^>]*>)?([^<>]*)/', $text, $tags, PREG_SET_ORDER);
  579. foreach ($tags as $tag) {
  580. $contentLength = 0;
  581. if (!in_array($tag[2], static::$_defaultHtmlNoCount, true)) {
  582. $contentLength = self::_strlen($tag[3], $options);
  583. }
  584. if ($truncate === '') {
  585. if (!preg_match('/img|br|input|hr|area|base|basefont|col|frame|isindex|link|meta|param/i', $tag[2])) {
  586. if (preg_match('/<[\w]+[^>]*>/', $tag[0])) {
  587. array_unshift($openTags, $tag[2]);
  588. } elseif (preg_match('/<\/([\w]+)[^>]*>/', $tag[0], $closeTag)) {
  589. $pos = array_search($closeTag[1], $openTags);
  590. if ($pos !== false) {
  591. array_splice($openTags, $pos, 1);
  592. }
  593. }
  594. }
  595. $prefix .= $tag[1];
  596. if ($totalLength + $contentLength + $ellipsisLength > $length) {
  597. $truncate = $tag[3];
  598. $truncateLength = $length - $totalLength;
  599. } else {
  600. $prefix .= $tag[3];
  601. }
  602. }
  603. $totalLength += $contentLength;
  604. if ($totalLength > $length) {
  605. break;
  606. }
  607. }
  608. if ($totalLength <= $length) {
  609. return $text;
  610. }
  611. $text = $truncate;
  612. $length = $truncateLength;
  613. foreach ($openTags as $tag) {
  614. $suffix .= '</' . $tag . '>';
  615. }
  616. } else {
  617. if (self::_strlen($text, $options) <= $length) {
  618. return $text;
  619. }
  620. $ellipsisLength = self::_strlen($options['ellipsis'], $options);
  621. }
  622. $result = self::_substr($text, 0, $length - $ellipsisLength, $options);
  623. if (!$options['exact']) {
  624. if (self::_substr($text, $length - $ellipsisLength, 1, $options) !== ' ') {
  625. $result = self::_removeLastWord($result);
  626. }
  627. // If result is empty, then we don't need to count ellipsis in the cut.
  628. if (!strlen($result)) {
  629. $result = self::_substr($text, 0, $length, $options);
  630. }
  631. }
  632. return $prefix . $result . $suffix;
  633. }
  634. /**
  635. * Truncate text with specified width.
  636. *
  637. * @param string $text String to truncate.
  638. * @param int $length Length of returned string, including ellipsis.
  639. * @param array $options An array of HTML attributes and options.
  640. * @return string Trimmed string.
  641. * @see \Cake\Utility\Text::truncate()
  642. */
  643. public static function truncateByWidth($text, $length = 100, array $options = [])
  644. {
  645. return static::truncate($text, $length, ['trimWidth' => true] + $options);
  646. }
  647. /**
  648. * Get string length.
  649. *
  650. * ### Options:
  651. *
  652. * - `html` If true, HTML entities will be handled as decoded characters.
  653. * - `trimWidth` If true, the width will return.
  654. *
  655. * @param string $text The string being checked for length
  656. * @param array $options An array of options.
  657. * @return int
  658. */
  659. protected static function _strlen($text, array $options)
  660. {
  661. if (empty($options['trimWidth'])) {
  662. $strlen = 'mb_strlen';
  663. } else {
  664. $strlen = 'mb_strwidth';
  665. }
  666. if (empty($options['html'])) {
  667. return $strlen($text);
  668. }
  669. $pattern = '/&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};/i';
  670. $replace = preg_replace_callback(
  671. $pattern,
  672. function ($match) use ($strlen) {
  673. $utf8 = html_entity_decode($match[0], ENT_HTML5 | ENT_QUOTES, 'UTF-8');
  674. return str_repeat(' ', $strlen($utf8, 'UTF-8'));
  675. },
  676. $text
  677. );
  678. return $strlen($replace);
  679. }
  680. /**
  681. * Return part of a string.
  682. *
  683. * ### Options:
  684. *
  685. * - `html` If true, HTML entities will be handled as decoded characters.
  686. * - `trimWidth` If true, will be truncated with specified width.
  687. *
  688. * @param string $text The input string.
  689. * @param int $start The position to begin extracting.
  690. * @param int $length The desired length.
  691. * @param array $options An array of options.
  692. * @return string
  693. */
  694. protected static function _substr($text, $start, $length, array $options)
  695. {
  696. if (empty($options['trimWidth'])) {
  697. $substr = 'mb_substr';
  698. } else {
  699. $substr = 'mb_strimwidth';
  700. }
  701. $maxPosition = self::_strlen($text, ['trimWidth' => false] + $options);
  702. if ($start < 0) {
  703. $start += $maxPosition;
  704. if ($start < 0) {
  705. $start = 0;
  706. }
  707. }
  708. if ($start >= $maxPosition) {
  709. return '';
  710. }
  711. if ($length === null) {
  712. $length = self::_strlen($text, $options);
  713. }
  714. if ($length < 0) {
  715. $text = self::_substr($text, $start, null, $options);
  716. $start = 0;
  717. $length += self::_strlen($text, $options);
  718. }
  719. if ($length <= 0) {
  720. return '';
  721. }
  722. if (empty($options['html'])) {
  723. return (string)$substr($text, $start, $length);
  724. }
  725. $totalOffset = 0;
  726. $totalLength = 0;
  727. $result = '';
  728. $pattern = '/(&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};)/i';
  729. $parts = preg_split($pattern, $text, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
  730. foreach ($parts as $part) {
  731. $offset = 0;
  732. if ($totalOffset < $start) {
  733. $len = self::_strlen($part, ['trimWidth' => false] + $options);
  734. if ($totalOffset + $len <= $start) {
  735. $totalOffset += $len;
  736. continue;
  737. }
  738. $offset = $start - $totalOffset;
  739. $totalOffset = $start;
  740. }
  741. $len = self::_strlen($part, $options);
  742. if ($offset !== 0 || $totalLength + $len > $length) {
  743. if (strpos($part, '&') === 0 && preg_match($pattern, $part)
  744. && $part !== html_entity_decode($part, ENT_HTML5 | ENT_QUOTES, 'UTF-8')
  745. ) {
  746. // Entities cannot be passed substr.
  747. continue;
  748. }
  749. $part = $substr($part, $offset, $length - $totalLength);
  750. $len = self::_strlen($part, $options);
  751. }
  752. $result .= $part;
  753. $totalLength += $len;
  754. if ($totalLength >= $length) {
  755. break;
  756. }
  757. }
  758. return $result;
  759. }
  760. /**
  761. * Removes the last word from the input text.
  762. *
  763. * @param string $text The input text
  764. * @return string
  765. */
  766. protected static function _removeLastWord($text)
  767. {
  768. $spacepos = mb_strrpos($text, ' ');
  769. if ($spacepos !== false) {
  770. $lastWord = mb_strrpos($text, $spacepos);
  771. // Some languages are written without word separation.
  772. // We recognize a string as a word if it doesn't contain any full-width characters.
  773. if (mb_strwidth($lastWord) === mb_strlen($lastWord)) {
  774. $text = mb_substr($text, 0, $spacepos);
  775. }
  776. return $text;
  777. }
  778. return '';
  779. }
  780. /**
  781. * Extracts an excerpt from the text surrounding the phrase with a number of characters on each side
  782. * determined by radius.
  783. *
  784. * @param string $text String to search the phrase in
  785. * @param string $phrase Phrase that will be searched for
  786. * @param int $radius The amount of characters that will be returned on each side of the founded phrase
  787. * @param string $ellipsis Ending that will be appended
  788. * @return string Modified string
  789. * @link https://book.cakephp.org/3.0/en/core-libraries/text.html#extracting-an-excerpt
  790. */
  791. public static function excerpt($text, $phrase, $radius = 100, $ellipsis = '...')
  792. {
  793. if (empty($text) || empty($phrase)) {
  794. return static::truncate($text, $radius * 2, ['ellipsis' => $ellipsis]);
  795. }
  796. $append = $prepend = $ellipsis;
  797. $phraseLen = mb_strlen($phrase);
  798. $textLen = mb_strlen($text);
  799. $pos = mb_stripos($text, $phrase);
  800. if ($pos === false) {
  801. return mb_substr($text, 0, $radius) . $ellipsis;
  802. }
  803. $startPos = $pos - $radius;
  804. if ($startPos <= 0) {
  805. $startPos = 0;
  806. $prepend = '';
  807. }
  808. $endPos = $pos + $phraseLen + $radius;
  809. if ($endPos >= $textLen) {
  810. $endPos = $textLen;
  811. $append = '';
  812. }
  813. $excerpt = mb_substr($text, $startPos, $endPos - $startPos);
  814. $excerpt = $prepend . $excerpt . $append;
  815. return $excerpt;
  816. }
  817. /**
  818. * Creates a comma separated list where the last two items are joined with 'and', forming natural language.
  819. *
  820. * @param array $list The list to be joined.
  821. * @param string|null $and The word used to join the last and second last items together with. Defaults to 'and'.
  822. * @param string $separator The separator used to join all the other items together. Defaults to ', '.
  823. * @return string The glued together string.
  824. * @link https://book.cakephp.org/3.0/en/core-libraries/text.html#converting-an-array-to-sentence-form
  825. */
  826. public static function toList(array $list, $and = null, $separator = ', ')
  827. {
  828. if ($and === null) {
  829. $and = __d('cake', 'and');
  830. }
  831. if (count($list) > 1) {
  832. return implode($separator, array_slice($list, null, -1)) . ' ' . $and . ' ' . array_pop($list);
  833. }
  834. return array_pop($list);
  835. }
  836. /**
  837. * Check if the string contain multibyte characters
  838. *
  839. * @param string $string value to test
  840. * @return bool
  841. */
  842. public static function isMultibyte($string)
  843. {
  844. $length = strlen($string);
  845. for ($i = 0; $i < $length; $i++) {
  846. $value = ord($string[$i]);
  847. if ($value > 128) {
  848. return true;
  849. }
  850. }
  851. return false;
  852. }
  853. /**
  854. * Converts a multibyte character string
  855. * to the decimal value of the character
  856. *
  857. * @param string $string String to convert.
  858. * @return array
  859. */
  860. public static function utf8($string)
  861. {
  862. $map = [];
  863. $values = [];
  864. $find = 1;
  865. $length = strlen($string);
  866. for ($i = 0; $i < $length; $i++) {
  867. $value = ord($string[$i]);
  868. if ($value < 128) {
  869. $map[] = $value;
  870. } else {
  871. if (empty($values)) {
  872. $find = ($value < 224) ? 2 : 3;
  873. }
  874. $values[] = $value;
  875. if (count($values) === $find) {
  876. if ($find == 3) {
  877. $map[] = (($values[0] % 16) * 4096) + (($values[1] % 64) * 64) + ($values[2] % 64);
  878. } else {
  879. $map[] = (($values[0] % 32) * 64) + ($values[1] % 64);
  880. }
  881. $values = [];
  882. $find = 1;
  883. }
  884. }
  885. }
  886. return $map;
  887. }
  888. /**
  889. * Converts the decimal value of a multibyte character string
  890. * to a string
  891. *
  892. * @param array $array Array
  893. * @return string
  894. */
  895. public static function ascii(array $array)
  896. {
  897. $ascii = '';
  898. foreach ($array as $utf8) {
  899. if ($utf8 < 128) {
  900. $ascii .= chr($utf8);
  901. } elseif ($utf8 < 2048) {
  902. $ascii .= chr(192 + (($utf8 - ($utf8 % 64)) / 64));
  903. $ascii .= chr(128 + ($utf8 % 64));
  904. } else {
  905. $ascii .= chr(224 + (($utf8 - ($utf8 % 4096)) / 4096));
  906. $ascii .= chr(128 + ((($utf8 % 4096) - ($utf8 % 64)) / 64));
  907. $ascii .= chr(128 + ($utf8 % 64));
  908. }
  909. }
  910. return $ascii;
  911. }
  912. /**
  913. * Converts filesize from human readable string to bytes
  914. *
  915. * @param string $size Size in human readable string like '5MB', '5M', '500B', '50kb' etc.
  916. * @param mixed $default Value to be returned when invalid size was used, for example 'Unknown type'
  917. * @return mixed Number of bytes as integer on success, `$default` on failure if not false
  918. * @throws \InvalidArgumentException On invalid Unit type.
  919. * @link https://book.cakephp.org/3.0/en/core-libraries/text.html#Cake\Utility\Text::parseFileSize
  920. */
  921. public static function parseFileSize($size, $default = false)
  922. {
  923. if (ctype_digit($size)) {
  924. return (int)$size;
  925. }
  926. $size = strtoupper($size);
  927. $l = -2;
  928. $i = array_search(substr($size, -2), ['KB', 'MB', 'GB', 'TB', 'PB']);
  929. if ($i === false) {
  930. $l = -1;
  931. $i = array_search(substr($size, -1), ['K', 'M', 'G', 'T', 'P']);
  932. }
  933. if ($i !== false) {
  934. $size = (float)substr($size, 0, $l);
  935. return $size * pow(1024, $i + 1);
  936. }
  937. if (substr($size, -1) === 'B' && ctype_digit(substr($size, 0, -1))) {
  938. $size = substr($size, 0, -1);
  939. return (int)$size;
  940. }
  941. if ($default !== false) {
  942. return $default;
  943. }
  944. throw new InvalidArgumentException('No unit type.');
  945. }
  946. /**
  947. * Get the default transliterator.
  948. *
  949. * @return \Transliterator|null Either a Transliterator instance, or `null`
  950. * in case no transliterator has been set yet.
  951. * @since 3.7.0
  952. */
  953. public static function getTransliterator()
  954. {
  955. return static::$_defaultTransliterator;
  956. }
  957. /**
  958. * Set the default transliterator.
  959. *
  960. * @param \Transliterator $transliterator A `Transliterator` instance.
  961. * @return void
  962. * @since 3.7.0
  963. */
  964. public static function setTransliterator(\Transliterator $transliterator)
  965. {
  966. static::$_defaultTransliterator = $transliterator;
  967. }
  968. /**
  969. * Get default transliterator identifier string.
  970. *
  971. * @return string Transliterator identifier.
  972. */
  973. public static function getTransliteratorId()
  974. {
  975. return static::$_defaultTransliteratorId;
  976. }
  977. /**
  978. * Set default transliterator identifier string.
  979. *
  980. * @param string $transliteratorId Transliterator identifier.
  981. * @return void
  982. */
  983. public static function setTransliteratorId($transliteratorId)
  984. {
  985. static::setTransliterator(transliterator_create($transliteratorId));
  986. static::$_defaultTransliteratorId = $transliteratorId;
  987. }
  988. /**
  989. * Transliterate string.
  990. *
  991. * @param string $string String to transliterate.
  992. * @param \Transliterator|string|null $transliterator Either a Transliterator
  993. * instance, or a transliterator identifier string. If `null`, the default
  994. * transliterator (identifier) set via `setTransliteratorId()` or
  995. * `setTransliterator()` will be used.
  996. * @return string
  997. * @see https://secure.php.net/manual/en/transliterator.transliterate.php
  998. */
  999. public static function transliterate($string, $transliterator = null)
  1000. {
  1001. if (!$transliterator) {
  1002. $transliterator = static::$_defaultTransliterator ?: static::$_defaultTransliteratorId;
  1003. }
  1004. return transliterator_transliterate($transliterator, $string);
  1005. }
  1006. /**
  1007. * Returns a string with all spaces converted to dashes (by default),
  1008. * characters transliterated to ASCII characters, and non word characters removed.
  1009. *
  1010. * ### Options:
  1011. *
  1012. * - `replacement`: Replacement string. Default '-'.
  1013. * - `transliteratorId`: A valid transliterator id string.
  1014. * If `null` (default) the transliterator (identifier) set via
  1015. * `setTransliteratorId()` or `setTransliterator()` will be used.
  1016. * If `false` no transliteration will be done, only non words will be removed.
  1017. * - `preserve`: Specific non-word character to preserve. Default `null`.
  1018. * For e.g. this option can be set to '.' to generate clean file names.
  1019. *
  1020. * @param string $string the string you want to slug
  1021. * @param array $options If string it will be use as replacement character
  1022. * or an array of options.
  1023. * @return string
  1024. * @see setTransliterator()
  1025. * @see setTransliteratorId()
  1026. */
  1027. public static function slug($string, $options = [])
  1028. {
  1029. if (is_string($options)) {
  1030. $options = ['replacement' => $options];
  1031. }
  1032. $options += [
  1033. 'replacement' => '-',
  1034. 'transliteratorId' => null,
  1035. 'preserve' => null
  1036. ];
  1037. if ($options['transliteratorId'] !== false) {
  1038. $string = static::transliterate($string, $options['transliteratorId']);
  1039. }
  1040. $regex = '^\s\p{Ll}\p{Lm}\p{Lo}\p{Lt}\p{Lu}\p{Nd}';
  1041. if ($options['preserve']) {
  1042. $regex .= preg_quote($options['preserve'], '/');
  1043. }
  1044. $quotedReplacement = preg_quote($options['replacement'], '/');
  1045. $map = [
  1046. '/[' . $regex . ']/mu' => ' ',
  1047. '/[\s]+/mu' => $options['replacement'],
  1048. sprintf('/^[%s]+|[%s]+$/', $quotedReplacement, $quotedReplacement) => '',
  1049. ];
  1050. $string = preg_replace(array_keys($map), $map, $string);
  1051. return $string;
  1052. }
  1053. }