1<?php 2 3declare(strict_types=1); 4 5/* 6 * This file is part of the league/commonmark package. 7 * 8 * (c) Colin O'Dell <colinodell@gmail.com> 9 * 10 * Original code based on the CommonMark JS reference parser (https://bitly.com/commonmark-js) 11 * - (c) John MacFarlane 12 * 13 * For the full copyright and license information, please view the LICENSE 14 * file that was distributed with this source code. 15 */ 16 17namespace League\CommonMark\Util; 18 19use League\CommonMark\Parser\Cursor; 20 21/** 22 * @psalm-immutable 23 */ 24final class LinkParserHelper 25{ 26 /** 27 * Attempt to parse link destination 28 * 29 * @return string|null The string, or null if no match 30 */ 31 public static function parseLinkDestination(Cursor $cursor): ?string 32 { 33 if ($res = $cursor->match(RegexHelper::REGEX_LINK_DESTINATION_BRACES)) { 34 // Chop off surrounding <..>: 35 return UrlEncoder::unescapeAndEncode( 36 RegexHelper::unescape(\substr($res, 1, -1)) 37 ); 38 } 39 40 if ($cursor->getCurrentCharacter() === '<') { 41 return null; 42 } 43 44 $destination = self::manuallyParseLinkDestination($cursor); 45 if ($destination === null) { 46 return null; 47 } 48 49 return UrlEncoder::unescapeAndEncode( 50 RegexHelper::unescape($destination) 51 ); 52 } 53 54 public static function parseLinkLabel(Cursor $cursor): int 55 { 56 $match = $cursor->match('/^\[(?:[^\\\\\[\]]|\\\\.){0,1000}\]/'); 57 if ($match === null) { 58 return 0; 59 } 60 61 $length = \mb_strlen($match, 'UTF-8'); 62 63 if ($length > 1001) { 64 return 0; 65 } 66 67 return $length; 68 } 69 70 public static function parsePartialLinkLabel(Cursor $cursor): ?string 71 { 72 return $cursor->match('/^(?:[^\\\\\[\]]+|\\\\.?)*/'); 73 } 74 75 /** 76 * Attempt to parse link title (sans quotes) 77 * 78 * @return string|null The string, or null if no match 79 */ 80 public static function parseLinkTitle(Cursor $cursor): ?string 81 { 82 if ($title = $cursor->match('/' . RegexHelper::PARTIAL_LINK_TITLE . '/')) { 83 // Chop off quotes from title and unescape 84 return RegexHelper::unescape(\substr($title, 1, -1)); 85 } 86 87 return null; 88 } 89 90 public static function parsePartialLinkTitle(Cursor $cursor, string $endDelimiter): ?string 91 { 92 $endDelimiter = \preg_quote($endDelimiter, '/'); 93 $regex = \sprintf('/(%s|[^%s\x00])*(?:%s)?/', RegexHelper::PARTIAL_ESCAPED_CHAR, $endDelimiter, $endDelimiter); 94 if (($partialTitle = $cursor->match($regex)) === null) { 95 return null; 96 } 97 98 return RegexHelper::unescape($partialTitle); 99 } 100 101 private static function manuallyParseLinkDestination(Cursor $cursor): ?string 102 { 103 $oldPosition = $cursor->getPosition(); 104 $oldState = $cursor->saveState(); 105 106 $openParens = 0; 107 while (($c = $cursor->getCurrentCharacter()) !== null) { 108 if ($c === '\\' && ($peek = $cursor->peek()) !== null && RegexHelper::isEscapable($peek)) { 109 $cursor->advanceBy(2); 110 } elseif ($c === '(') { 111 $cursor->advanceBy(1); 112 $openParens++; 113 } elseif ($c === ')') { 114 if ($openParens < 1) { 115 break; 116 } 117 118 $cursor->advanceBy(1); 119 $openParens--; 120 } elseif (\preg_match(RegexHelper::REGEX_WHITESPACE_CHAR, $c)) { 121 break; 122 } else { 123 $cursor->advanceBy(1); 124 } 125 } 126 127 if ($openParens !== 0) { 128 return null; 129 } 130 131 if ($cursor->getPosition() === $oldPosition && (! isset($c) || $c !== ')')) { 132 return null; 133 } 134 135 $newPos = $cursor->getPosition(); 136 $cursor->restoreState($oldState); 137 138 $cursor->advanceBy($newPos - $cursor->getPosition()); 139 140 return $cursor->getPreviousText(); 141 } 142} 143