1: <?php
2:
3: 4: 5: 6:
7:
8: namespace Nette\Utils;
9:
10: use Nette;
11:
12:
13: 14: 15:
16: class Strings
17: {
18: use Nette\StaticClass;
19:
20: const TRIM_CHARACTERS = " \t\n\r\0\x0B\xC2\xA0";
21:
22:
23: 24: 25: 26: 27:
28: public static function checkEncoding($s)
29: {
30: return $s === self::fixEncoding($s);
31: }
32:
33:
34: 35: 36: 37: 38:
39: public static function fixEncoding($s)
40: {
41:
42: return htmlspecialchars_decode(htmlspecialchars($s, ENT_NOQUOTES | ENT_IGNORE, 'UTF-8'), ENT_NOQUOTES);
43: }
44:
45:
46: 47: 48: 49: 50: 51:
52: public static function chr($code)
53: {
54: if ($code < 0 || ($code >= 0xD800 && $code <= 0xDFFF) || $code > 0x10FFFF) {
55: throw new Nette\InvalidArgumentException('Code point must be in range 0x0 to 0xD7FF or 0xE000 to 0x10FFFF.');
56: }
57: return iconv('UTF-32BE', 'UTF-8//IGNORE', pack('N', $code));
58: }
59:
60:
61: 62: 63: 64: 65: 66:
67: public static function startsWith($haystack, $needle)
68: {
69: return strncmp($haystack, $needle, strlen($needle)) === 0;
70: }
71:
72:
73: 74: 75: 76: 77: 78:
79: public static function endsWith($haystack, $needle)
80: {
81: return strlen($needle) === 0 || substr($haystack, -strlen($needle)) === $needle;
82: }
83:
84:
85: 86: 87: 88: 89: 90:
91: public static function contains($haystack, $needle)
92: {
93: return strpos($haystack, $needle) !== false;
94: }
95:
96:
97: 98: 99: 100: 101: 102: 103:
104: public static function substring($s, $start, $length = null)
105: {
106: if (function_exists('mb_substr')) {
107: return mb_substr($s, $start, $length, 'UTF-8');
108: } elseif ($length === null) {
109: $length = self::length($s);
110: } elseif ($start < 0 && $length < 0) {
111: $start += self::length($s);
112: }
113: return iconv_substr($s, $start, $length, 'UTF-8');
114: }
115:
116:
117: 118: 119: 120: 121:
122: public static function normalize($s)
123: {
124: $s = self::normalizeNewLines($s);
125:
126:
127: $s = preg_replace('#[\x00-\x08\x0B-\x1F\x7F-\x9F]+#u', '', $s);
128:
129:
130: $s = preg_replace('#[\t ]+$#m', '', $s);
131:
132:
133: $s = trim($s, "\n");
134:
135: return $s;
136: }
137:
138:
139: 140: 141: 142: 143:
144: public static function normalizeNewLines($s)
145: {
146: return str_replace(["\r\n", "\r"], "\n", $s);
147: }
148:
149:
150: 151: 152: 153: 154:
155: public static function toAscii($s)
156: {
157: static $transliterator = null;
158: if ($transliterator === null && class_exists('Transliterator', false)) {
159: $transliterator = \Transliterator::create('Any-Latin; Latin-ASCII');
160: }
161:
162: $s = preg_replace('#[^\x09\x0A\x0D\x20-\x7E\xA0-\x{2FF}\x{370}-\x{10FFFF}]#u', '', $s);
163: $s = strtr($s, '`\'"^~?', "\x01\x02\x03\x04\x05\x06");
164: $s = str_replace(
165: ["\xE2\x80\x9E", "\xE2\x80\x9C", "\xE2\x80\x9D", "\xE2\x80\x9A", "\xE2\x80\x98", "\xE2\x80\x99", "\xC2\xB0"],
166: ["\x03", "\x03", "\x03", "\x02", "\x02", "\x02", "\x04"], $s
167: );
168: if ($transliterator !== null) {
169: $s = $transliterator->transliterate($s);
170: }
171: if (ICONV_IMPL === 'glibc') {
172: $s = str_replace(
173: ["\xC2\xBB", "\xC2\xAB", "\xE2\x80\xA6", "\xE2\x84\xA2", "\xC2\xA9", "\xC2\xAE"],
174: ['>>', '<<', '...', 'TM', '(c)', '(R)'], $s
175: );
176: $s = iconv('UTF-8', 'WINDOWS-1250//TRANSLIT//IGNORE', $s);
177: $s = strtr($s, "\xa5\xa3\xbc\x8c\xa7\x8a\xaa\x8d\x8f\x8e\xaf\xb9\xb3\xbe\x9c\x9a\xba\x9d\x9f\x9e"
178: . "\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3"
179: . "\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8"
180: . "\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf8\xf9\xfa\xfb\xfc\xfd\xfe"
181: . "\x96\xa0\x8b\x97\x9b\xa6\xad\xb7",
182: 'ALLSSSSTZZZallssstzzzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTsraaaalccceeeeiiddnnooooruuuuyt- <->|-.');
183: $s = preg_replace('#[^\x00-\x7F]++#', '', $s);
184: } else {
185: $s = iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $s);
186: }
187: $s = str_replace(['`', "'", '"', '^', '~', '?'], '', $s);
188: return strtr($s, "\x01\x02\x03\x04\x05\x06", '`\'"^~?');
189: }
190:
191:
192: 193: 194: 195: 196: 197: 198:
199: public static function webalize($s, $charlist = null, $lower = true)
200: {
201: $s = self::toAscii($s);
202: if ($lower) {
203: $s = strtolower($s);
204: }
205: $s = preg_replace('#[^a-z0-9' . ($charlist !== null ? preg_quote($charlist, '#') : '') . ']+#i', '-', $s);
206: $s = trim($s, '-');
207: return $s;
208: }
209:
210:
211: 212: 213: 214: 215: 216: 217:
218: public static function truncate($s, $maxLen, $append = "\xE2\x80\xA6")
219: {
220: if (self::length($s) > $maxLen) {
221: $maxLen = $maxLen - self::length($append);
222: if ($maxLen < 1) {
223: return $append;
224:
225: } elseif ($matches = self::match($s, '#^.{1,' . $maxLen . '}(?=[\s\x00-/:-@\[-`{-~])#us')) {
226: return $matches[0] . $append;
227:
228: } else {
229: return self::substring($s, 0, $maxLen) . $append;
230: }
231: }
232: return $s;
233: }
234:
235:
236: 237: 238: 239: 240: 241: 242:
243: public static function indent($s, $level = 1, $chars = "\t")
244: {
245: if ($level > 0) {
246: $s = self::replace($s, '#(?:^|[\r\n]+)(?=[^\r\n])#', '$0' . str_repeat($chars, $level));
247: }
248: return $s;
249: }
250:
251:
252: 253: 254: 255: 256:
257: public static function lower($s)
258: {
259: return mb_strtolower($s, 'UTF-8');
260: }
261:
262:
263: 264: 265: 266: 267:
268: public static function firstLower($s)
269: {
270: return self::lower(self::substring($s, 0, 1)) . self::substring($s, 1);
271: }
272:
273:
274: 275: 276: 277: 278:
279: public static function upper($s)
280: {
281: return mb_strtoupper($s, 'UTF-8');
282: }
283:
284:
285: 286: 287: 288: 289:
290: public static function firstUpper($s)
291: {
292: return self::upper(self::substring($s, 0, 1)) . self::substring($s, 1);
293: }
294:
295:
296: 297: 298: 299: 300:
301: public static function capitalize($s)
302: {
303: return mb_convert_case($s, MB_CASE_TITLE, 'UTF-8');
304: }
305:
306:
307: 308: 309: 310: 311: 312: 313:
314: public static function compare($left, $right, $len = null)
315: {
316: if ($len < 0) {
317: $left = self::substring($left, $len, -$len);
318: $right = self::substring($right, $len, -$len);
319: } elseif ($len !== null) {
320: $left = self::substring($left, 0, $len);
321: $right = self::substring($right, 0, $len);
322: }
323: return self::lower($left) === self::lower($right);
324: }
325:
326:
327: 328: 329: 330: 331:
332: public static function findPrefix(...$strings)
333: {
334: if (is_array($strings[0])) {
335: $strings = $strings[0];
336: }
337: $first = array_shift($strings);
338: for ($i = 0; $i < strlen($first); $i++) {
339: foreach ($strings as $s) {
340: if (!isset($s[$i]) || $first[$i] !== $s[$i]) {
341: while ($i && $first[$i - 1] >= "\x80" && $first[$i] >= "\x80" && $first[$i] < "\xC0") {
342: $i--;
343: }
344: return substr($first, 0, $i);
345: }
346: }
347: }
348: return $first;
349: }
350:
351:
352: 353: 354: 355: 356: 357:
358: public static function length($s)
359: {
360: return function_exists('mb_strlen') ? mb_strlen($s, 'UTF-8') : strlen(utf8_decode($s));
361: }
362:
363:
364: 365: 366: 367: 368: 369:
370: public static function trim($s, $charlist = self::TRIM_CHARACTERS)
371: {
372: $charlist = preg_quote($charlist, '#');
373: return self::replace($s, '#^[' . $charlist . ']+|[' . $charlist . ']+\z#u', '');
374: }
375:
376:
377: 378: 379: 380: 381: 382: 383:
384: public static function padLeft($s, $length, $pad = ' ')
385: {
386: $length = max(0, $length - self::length($s));
387: $padLen = self::length($pad);
388: return str_repeat($pad, (int) ($length / $padLen)) . self::substring($pad, 0, $length % $padLen) . $s;
389: }
390:
391:
392: 393: 394: 395: 396: 397: 398:
399: public static function padRight($s, $length, $pad = ' ')
400: {
401: $length = max(0, $length - self::length($s));
402: $padLen = self::length($pad);
403: return $s . str_repeat($pad, (int) ($length / $padLen)) . self::substring($pad, 0, $length % $padLen);
404: }
405:
406:
407: 408: 409: 410: 411:
412: public static function reverse($s)
413: {
414: return iconv('UTF-32LE', 'UTF-8', strrev(iconv('UTF-8', 'UTF-32BE', $s)));
415: }
416:
417:
418: 419: 420: 421:
422: public static function random($length = 10, $charlist = '0-9a-z')
423: {
424: trigger_error(__METHOD__ . '() is deprecated, use Nette\Utils\Random::generate()', E_USER_DEPRECATED);
425: return Random::generate($length, $charlist);
426: }
427:
428:
429: 430: 431: 432: 433: 434: 435:
436: public static function before($haystack, $needle, $nth = 1)
437: {
438: $pos = self::pos($haystack, $needle, $nth);
439: return $pos === false
440: ? false
441: : substr($haystack, 0, $pos);
442: }
443:
444:
445: 446: 447: 448: 449: 450: 451:
452: public static function after($haystack, $needle, $nth = 1)
453: {
454: $pos = self::pos($haystack, $needle, $nth);
455: return $pos === false
456: ? false
457: : (string) substr($haystack, $pos + strlen($needle));
458: }
459:
460:
461: 462: 463: 464: 465: 466: 467:
468: public static function indexOf($haystack, $needle, $nth = 1)
469: {
470: $pos = self::pos($haystack, $needle, $nth);
471: return $pos === false
472: ? false
473: : self::length(substr($haystack, 0, $pos));
474: }
475:
476:
477: 478: 479: 480:
481: private static function pos($haystack, $needle, $nth = 1)
482: {
483: if (!$nth) {
484: return false;
485: } elseif ($nth > 0) {
486: if (strlen($needle) === 0) {
487: return 0;
488: }
489: $pos = 0;
490: while (($pos = strpos($haystack, $needle, $pos)) !== false && --$nth) {
491: $pos++;
492: }
493: } else {
494: $len = strlen($haystack);
495: if (strlen($needle) === 0) {
496: return $len;
497: }
498: $pos = $len - 1;
499: while (($pos = strrpos($haystack, $needle, $pos - $len)) !== false && ++$nth) {
500: $pos--;
501: }
502: }
503: return $pos;
504: }
505:
506:
507: 508: 509: 510: 511: 512: 513:
514: public static function split($subject, $pattern, $flags = 0)
515: {
516: return self::pcre('preg_split', [$pattern, $subject, -1, $flags | PREG_SPLIT_DELIM_CAPTURE]);
517: }
518:
519:
520: 521: 522: 523: 524: 525: 526: 527:
528: public static function match($subject, $pattern, $flags = 0, $offset = 0)
529: {
530: if ($offset > strlen($subject)) {
531: return null;
532: }
533: return self::pcre('preg_match', [$pattern, $subject, &$m, $flags, $offset])
534: ? $m
535: : null;
536: }
537:
538:
539: 540: 541: 542: 543: 544: 545: 546:
547: public static function matchAll($subject, $pattern, $flags = 0, $offset = 0)
548: {
549: if ($offset > strlen($subject)) {
550: return [];
551: }
552: self::pcre('preg_match_all', [
553: $pattern, $subject, &$m,
554: ($flags & PREG_PATTERN_ORDER) ? $flags : ($flags | PREG_SET_ORDER),
555: $offset,
556: ]);
557: return $m;
558: }
559:
560:
561: 562: 563: 564: 565: 566: 567: 568:
569: public static function replace($subject, $pattern, $replacement = null, $limit = -1)
570: {
571: if (is_object($replacement) || is_array($replacement)) {
572: if ($replacement instanceof Nette\Callback) {
573: trigger_error('Nette\Callback is deprecated, use PHP callback.', E_USER_DEPRECATED);
574: $replacement = $replacement->getNative();
575: }
576: if (!is_callable($replacement, false, $textual)) {
577: throw new Nette\InvalidStateException("Callback '$textual' is not callable.");
578: }
579:
580: return self::pcre('preg_replace_callback', [$pattern, $replacement, $subject, $limit]);
581:
582: } elseif ($replacement === null && is_array($pattern)) {
583: $replacement = array_values($pattern);
584: $pattern = array_keys($pattern);
585: }
586:
587: return self::pcre('preg_replace', [$pattern, $replacement, $subject, $limit]);
588: }
589:
590:
591:
592: public static function pcre($func, $args)
593: {
594: static $messages = [
595: PREG_INTERNAL_ERROR => 'Internal error',
596: PREG_BACKTRACK_LIMIT_ERROR => 'Backtrack limit was exhausted',
597: PREG_RECURSION_LIMIT_ERROR => 'Recursion limit was exhausted',
598: PREG_BAD_UTF8_ERROR => 'Malformed UTF-8 data',
599: PREG_BAD_UTF8_OFFSET_ERROR => 'Offset didn\'t correspond to the begin of a valid UTF-8 code point',
600: 6 => 'Failed due to limited JIT stack space',
601: ];
602: $res = Callback::invokeSafe($func, $args, function ($message) use ($args) {
603:
604: throw new RegexpException($message . ' in pattern: ' . implode(' or ', (array) $args[0]));
605: });
606:
607: if (($code = preg_last_error())
608: && ($res === null || !in_array($func, ['preg_filter', 'preg_replace_callback', 'preg_replace'], true))
609: ) {
610: throw new RegexpException((isset($messages[$code]) ? $messages[$code] : 'Unknown error')
611: . ' (pattern: ' . implode(' or ', (array) $args[0]) . ')', $code);
612: }
613: return $res;
614: }
615: }
616: