1<?php 2 3declare(strict_types=1); 4 5/* 6 * This file is part of the league/commonmark package. 7 * 8 * (c) Colin O'Dell <colinodell@gmail.com> 9 * 10 * Original code based on the CommonMark JS reference parser (https://bitly.com/commonmark-js) 11 * - (c) John MacFarlane 12 * 13 * For the full copyright and license information, please view the LICENSE 14 * file that was distributed with this source code. 15 */ 16 17namespace League\CommonMark\Reference; 18 19use League\CommonMark\Parser\Cursor; 20use League\CommonMark\Util\LinkParserHelper; 21 22final class ReferenceParser 23{ 24 // Looking for the start of a definition, i.e. `[` 25 private const START_DEFINITION = 0; 26 // Looking for and parsing the label, i.e. `[foo]` within `[foo]` 27 private const LABEL = 1; 28 // Parsing the destination, i.e. `/url` in `[foo]: /url` 29 private const DESTINATION = 2; 30 // Looking for the start of a title, i.e. the first `"` in `[foo]: /url "title"` 31 private const START_TITLE = 3; 32 // Parsing the content of the title, i.e. `title` in `[foo]: /url "title"` 33 private const TITLE = 4; 34 // End state, no matter what kind of lines we add, they won't be references 35 private const PARAGRAPH = 5; 36 37 /** @psalm-readonly-allow-private-mutation */ 38 private string $paragraph = ''; 39 40 /** 41 * @var array<int, ReferenceInterface> 42 * 43 * @psalm-readonly-allow-private-mutation 44 */ 45 private array $references = []; 46 47 /** @psalm-readonly-allow-private-mutation */ 48 private int $state = self::START_DEFINITION; 49 50 /** @psalm-readonly-allow-private-mutation */ 51 private ?string $label = null; 52 53 /** @psalm-readonly-allow-private-mutation */ 54 private ?string $destination = null; 55 56 /** 57 * @var string string 58 * 59 * @psalm-readonly-allow-private-mutation 60 */ 61 private string $title = ''; 62 63 /** @psalm-readonly-allow-private-mutation */ 64 private ?string $titleDelimiter = null; 65 66 /** @psalm-readonly-allow-private-mutation */ 67 private bool $referenceValid = false; 68 69 public function getParagraphContent(): string 70 { 71 return $this->paragraph; 72 } 73 74 /** 75 * @return ReferenceInterface[] 76 */ 77 public function getReferences(): iterable 78 { 79 $this->finishReference(); 80 81 return $this->references; 82 } 83 84 public function hasReferences(): bool 85 { 86 return $this->references !== []; 87 } 88 89 public function parse(string $line): void 90 { 91 if ($this->paragraph !== '') { 92 $this->paragraph .= "\n"; 93 } 94 95 $this->paragraph .= $line; 96 97 $cursor = new Cursor($line); 98 while (! $cursor->isAtEnd()) { 99 $result = false; 100 switch ($this->state) { 101 case self::PARAGRAPH: 102 // We're in a paragraph now. Link reference definitions can only appear at the beginning, so once 103 // we're in a paragraph, there's no going back. 104 return; 105 case self::START_DEFINITION: 106 $result = $this->parseStartDefinition($cursor); 107 break; 108 case self::LABEL: 109 $result = $this->parseLabel($cursor); 110 break; 111 case self::DESTINATION: 112 $result = $this->parseDestination($cursor); 113 break; 114 case self::START_TITLE: 115 $result = $this->parseStartTitle($cursor); 116 break; 117 case self::TITLE: 118 $result = $this->parseTitle($cursor); 119 break; 120 default: 121 // this should never happen 122 break; 123 } 124 125 if (! $result) { 126 $this->state = self::PARAGRAPH; 127 128 return; 129 } 130 } 131 } 132 133 private function parseStartDefinition(Cursor $cursor): bool 134 { 135 $cursor->advanceToNextNonSpaceOrTab(); 136 if ($cursor->isAtEnd() || $cursor->getCurrentCharacter() !== '[') { 137 return false; 138 } 139 140 $this->state = self::LABEL; 141 $this->label = ''; 142 143 $cursor->advance(); 144 if ($cursor->isAtEnd()) { 145 $this->label .= "\n"; 146 } 147 148 return true; 149 } 150 151 private function parseLabel(Cursor $cursor): bool 152 { 153 $cursor->advanceToNextNonSpaceOrTab(); 154 155 $partialLabel = LinkParserHelper::parsePartialLinkLabel($cursor); 156 if ($partialLabel === null) { 157 return false; 158 } 159 160 \assert($this->label !== null); 161 $this->label .= $partialLabel; 162 163 if ($cursor->isAtEnd()) { 164 // label might continue on next line 165 $this->label .= "\n"; 166 167 return true; 168 } 169 170 if ($cursor->getCurrentCharacter() !== ']') { 171 return false; 172 } 173 174 $cursor->advance(); 175 176 // end of label 177 if ($cursor->getCurrentCharacter() !== ':') { 178 return false; 179 } 180 181 $cursor->advance(); 182 183 // spec: A link label can have at most 999 characters inside the square brackets 184 if (\mb_strlen($this->label, 'UTF-8') > 999) { 185 return false; 186 } 187 188 // spec: A link label must contain at least one non-whitespace character 189 if (\trim($this->label) === '') { 190 return false; 191 } 192 193 $cursor->advanceToNextNonSpaceOrTab(); 194 195 $this->state = self::DESTINATION; 196 197 return true; 198 } 199 200 private function parseDestination(Cursor $cursor): bool 201 { 202 $cursor->advanceToNextNonSpaceOrTab(); 203 204 $destination = LinkParserHelper::parseLinkDestination($cursor); 205 if ($destination === null) { 206 return false; 207 } 208 209 $this->destination = $destination; 210 211 $advanced = $cursor->advanceToNextNonSpaceOrTab(); 212 if ($cursor->isAtEnd()) { 213 // Destination was at end of line, so this is a valid reference for sure (and maybe a title). 214 // If not at end of line, wait for title to be valid first. 215 $this->referenceValid = true; 216 $this->paragraph = ''; 217 } elseif ($advanced === 0) { 218 // spec: The title must be separated from the link destination by whitespace 219 return false; 220 } 221 222 $this->state = self::START_TITLE; 223 224 return true; 225 } 226 227 private function parseStartTitle(Cursor $cursor): bool 228 { 229 $cursor->advanceToNextNonSpaceOrTab(); 230 if ($cursor->isAtEnd()) { 231 $this->state = self::START_DEFINITION; 232 233 return true; 234 } 235 236 $this->titleDelimiter = null; 237 switch ($c = $cursor->getCurrentCharacter()) { 238 case '"': 239 case "'": 240 $this->titleDelimiter = $c; 241 break; 242 case '(': 243 $this->titleDelimiter = ')'; 244 break; 245 default: 246 // no title delimter found 247 break; 248 } 249 250 if ($this->titleDelimiter !== null) { 251 $this->state = self::TITLE; 252 $cursor->advance(); 253 if ($cursor->isAtEnd()) { 254 $this->title .= "\n"; 255 } 256 } else { 257 $this->finishReference(); 258 // There might be another reference instead, try that for the same character. 259 $this->state = self::START_DEFINITION; 260 } 261 262 return true; 263 } 264 265 private function parseTitle(Cursor $cursor): bool 266 { 267 \assert($this->titleDelimiter !== null); 268 $title = LinkParserHelper::parsePartialLinkTitle($cursor, $this->titleDelimiter); 269 270 if ($title === null) { 271 // Invalid title, stop 272 return false; 273 } 274 275 // Did we find the end delimiter? 276 $endDelimiterFound = false; 277 if (\substr($title, -1) === $this->titleDelimiter) { 278 $endDelimiterFound = true; 279 // Chop it off 280 $title = \substr($title, 0, -1); 281 } 282 283 $this->title .= $title; 284 285 if (! $endDelimiterFound && $cursor->isAtEnd()) { 286 // Title still going, continue on next line 287 $this->title .= "\n"; 288 289 return true; 290 } 291 292 // We either hit the end delimiter or some extra whitespace 293 $cursor->advanceToNextNonSpaceOrTab(); 294 if (! $cursor->isAtEnd()) { 295 // spec: No further non-whitespace characters may occur on the line. 296 return false; 297 } 298 299 $this->referenceValid = true; 300 $this->finishReference(); 301 $this->paragraph = ''; 302 303 // See if there's another definition 304 $this->state = self::START_DEFINITION; 305 306 return true; 307 } 308 309 private function finishReference(): void 310 { 311 if (! $this->referenceValid) { 312 return; 313 } 314 315 /** @psalm-suppress PossiblyNullArgument -- these can't possibly be null if we're in this state */ 316 $this->references[] = new Reference($this->label, $this->destination, $this->title); 317 318 $this->label = null; 319 $this->referenceValid = false; 320 $this->destination = null; 321 $this->title = ''; 322 $this->titleDelimiter = null; 323 } 324} 325