regex.h
Engine/source/persistence/rapidjson/internal/regex.h
Classes:
class
Regular expression engine with subset of ECMAscript grammar.
Namespaces:
namespace
Public Defines
define
Detailed Description
Public Defines
RAPIDJSON_REGEX_VERBOSE() 0
1 2// Tencent is pleased to support the open source community by making RapidJSON available. 3// 4// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. 5// 6// Licensed under the MIT License (the "License"); you may not use this file except 7// in compliance with the License. You may obtain a copy of the License at 8// 9// http://opensource.org/licenses/MIT 10// 11// Unless required by applicable law or agreed to in writing, software distributed 12// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 13// CONDITIONS OF ANY KIND, either express or implied. See the License for the 14// specific language governing permissions and limitations under the License. 15 16#ifndef RAPIDJSON_INTERNAL_REGEX_H_ 17#define RAPIDJSON_INTERNAL_REGEX_H_ 18 19#include "../allocators.h" 20#include "../stream.h" 21#include "stack.h" 22 23#ifdef __clang__ 24RAPIDJSON_DIAG_PUSH 25RAPIDJSON_DIAG_OFF(padded) 26RAPIDJSON_DIAG_OFF(switch-enum) 27RAPIDJSON_DIAG_OFF(implicit-fallthrough) 28#elif defined(_MSC_VER) 29RAPIDJSON_DIAG_PUSH 30RAPIDJSON_DIAG_OFF(4512) // assignment operator could not be generated 31#endif 32 33#ifdef __GNUC__ 34RAPIDJSON_DIAG_PUSH 35RAPIDJSON_DIAG_OFF(effc++) 36#if __GNUC__ >= 7 37RAPIDJSON_DIAG_OFF(implicit-fallthrough) 38#endif 39#endif 40 41#ifndef RAPIDJSON_REGEX_VERBOSE 42#define RAPIDJSON_REGEX_VERBOSE 0 43#endif 44 45RAPIDJSON_NAMESPACE_BEGIN 46namespace internal { 47 48/////////////////////////////////////////////////////////////////////////////// 49// DecodedStream 50 51template <typename SourceStream, typename Encoding> 52class DecodedStream { 53public: 54 DecodedStream(SourceStream& ss) : ss_(ss), codepoint_() { Decode(); } 55 unsigned Peek() { return codepoint_; } 56 unsigned Take() { 57 unsigned c = codepoint_; 58 if (c) // No further decoding when '\0' 59 Decode(); 60 return c; 61 } 62 63private: 64 void Decode() { 65 if (!Encoding::Decode(ss_, &codepoint_)) 66 codepoint_ = 0; 67 } 68 69 SourceStream& ss_; 70 unsigned codepoint_; 71}; 72 73/////////////////////////////////////////////////////////////////////////////// 74// GenericRegex 75 76static const SizeType kRegexInvalidState = ~<a href="/coding/file/rapidjson_8h/#rapidjson_8h_1a5ed6e6e67250fadbd041127e6386dcb5">SizeType</a>(0); //!< Represents an invalid index in GenericRegex::State::out, out1 77static const SizeType kRegexInvalidRange = ~<a href="/coding/file/rapidjson_8h/#rapidjson_8h_1a5ed6e6e67250fadbd041127e6386dcb5">SizeType</a>(0); 78 79template <typename Encoding, typename Allocator> 80class GenericRegexSearch; 81 82//! Regular expression engine with subset of ECMAscript grammar. 83/*! 84 Supported regular expression syntax: 85 - \c ab Concatenation 86 - \c a|b Alternation 87 - \c a? Zero or one 88 - \c a* Zero or more 89 - \c a+ One or more 90 - \c a{3} Exactly 3 times 91 - \c a{3,} At least 3 times 92 - \c a{3,5} 3 to 5 times 93 - \c (ab) Grouping 94 - \c ^a At the beginning 95 - \c a$ At the end 96 - \c . Any character 97 - \c [abc] Character classes 98 - \c [a-c] Character class range 99 - \c [a-z0-9_] Character class combination 100 - \c [^abc] Negated character classes 101 - \c [^a-c] Negated character class range 102 - \c [\b] Backspace (U+0008) 103 - \c \\| \\\\ ... Escape characters 104 - \c \\f Form feed (U+000C) 105 - \c \\n Line feed (U+000A) 106 - \c \\r Carriage return (U+000D) 107 - \c \\t Tab (U+0009) 108 - \c \\v Vertical tab (U+000B) 109 110 \note This is a Thompson NFA engine, implemented with reference to 111 Cox, Russ. "Regular Expression Matching Can Be Simple And Fast (but is slow in Java, Perl, PHP, Python, Ruby,...).", 112 https://swtch.com/~rsc/regexp/regexp1.html 113*/ 114template <typename Encoding, typename Allocator = CrtAllocator> 115class GenericRegex { 116public: 117 typedef Encoding EncodingType; 118 typedef typename Encoding::Ch Ch; 119 template <typename, typename> friend class GenericRegexSearch; 120 121 GenericRegex(const Ch* source, Allocator* allocator = 0) : 122 ownAllocator_(allocator ? 0 : RAPIDJSON_NEW(Allocator)()), allocator_(allocator ? allocator : ownAllocator_), 123 states_(allocator_, 256), ranges_(allocator_, 256), root_(kRegexInvalidState), stateCount_(), rangeCount_(), 124 anchorBegin_(), anchorEnd_() 125 { 126 GenericStringStream<Encoding> ss(source); 127 DecodedStream<GenericStringStream<Encoding>, Encoding> ds(ss); 128 Parse(ds); 129 } 130 131 ~GenericRegex() 132 { 133 RAPIDJSON_DELETE(ownAllocator_); 134 } 135 136 bool IsValid() const { 137 return root_ != kRegexInvalidState; 138 } 139 140private: 141 enum Operator { 142 kZeroOrOne, 143 kZeroOrMore, 144 kOneOrMore, 145 kConcatenation, 146 kAlternation, 147 kLeftParenthesis 148 }; 149 150 static const unsigned kAnyCharacterClass = 0xFFFFFFFF; //!< For '.' 151 static const unsigned kRangeCharacterClass = 0xFFFFFFFE; 152 static const unsigned kRangeNegationFlag = 0x80000000; 153 154 struct Range { 155 unsigned start; // 156 unsigned end; 157 SizeType next; 158 }; 159 160 struct State { 161 SizeType out; //!< Equals to kInvalid for matching state 162 SizeType out1; //!< Equals to non-kInvalid for split 163 SizeType rangeStart; 164 unsigned codepoint; 165 }; 166 167 struct Frag { 168 Frag(SizeType s, SizeType o, SizeType m) : start(s), out(o), minIndex(m) {} 169 SizeType start; 170 SizeType out; //!< link-list of all output states 171 SizeType minIndex; 172 }; 173 174 State& GetState(SizeType index) { 175 RAPIDJSON_ASSERT(index < stateCount_); 176 return states_.template Bottom<State>()[index]; 177 } 178 179 const State& GetState(SizeType index) const { 180 RAPIDJSON_ASSERT(index < stateCount_); 181 return states_.template Bottom<State>()[index]; 182 } 183 184 Range& GetRange(SizeType index) { 185 RAPIDJSON_ASSERT(index < rangeCount_); 186 return ranges_.template Bottom<Range>()[index]; 187 } 188 189 const Range& GetRange(SizeType index) const { 190 RAPIDJSON_ASSERT(index < rangeCount_); 191 return ranges_.template Bottom<Range>()[index]; 192 } 193 194 template <typename InputStream> 195 void Parse(DecodedStream<InputStream, Encoding>& ds) { 196 Stack<Allocator> operandStack(allocator_, 256); // Frag 197 Stack<Allocator> operatorStack(allocator_, 256); // Operator 198 Stack<Allocator> atomCountStack(allocator_, 256); // unsigned (Atom per parenthesis) 199 200 *atomCountStack.template Push<unsigned>() = 0; 201 202 unsigned codepoint; 203 while (ds.Peek() != 0) { 204 switch (codepoint = ds.Take()) { 205 case '^': 206 anchorBegin_ = true; 207 break; 208 209 case '$': 210 anchorEnd_ = true; 211 break; 212 213 case '|': 214 while (!operatorStack.Empty() && *operatorStack.template Top<Operator>() < kAlternation) 215 if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1))) 216 return; 217 *operatorStack.template Push<Operator>() = kAlternation; 218 *atomCountStack.template Top<unsigned>() = 0; 219 break; 220 221 case '(': 222 *operatorStack.template Push<Operator>() = kLeftParenthesis; 223 *atomCountStack.template Push<unsigned>() = 0; 224 break; 225 226 case ')': 227 while (!operatorStack.Empty() && *operatorStack.template Top<Operator>() != kLeftParenthesis) 228 if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1))) 229 return; 230 if (operatorStack.Empty()) 231 return; 232 operatorStack.template Pop<Operator>(1); 233 atomCountStack.template Pop<unsigned>(1); 234 ImplicitConcatenation(atomCountStack, operatorStack); 235 break; 236 237 case '?': 238 if (!Eval(operandStack, kZeroOrOne)) 239 return; 240 break; 241 242 case '*': 243 if (!Eval(operandStack, kZeroOrMore)) 244 return; 245 break; 246 247 case '+': 248 if (!Eval(operandStack, kOneOrMore)) 249 return; 250 break; 251 252 case '{': 253 { 254 unsigned n, m; 255 if (!ParseUnsigned(ds, &n)) 256 return; 257 258 if (ds.Peek() == ',') { 259 ds.Take(); 260 if (ds.Peek() == '}') 261 m = kInfinityQuantifier; 262 else if (!ParseUnsigned(ds, &m) || m < n) 263 return; 264 } 265 else 266 m = n; 267 268 if (!EvalQuantifier(operandStack, n, m) || ds.Peek() != '}') 269 return; 270 ds.Take(); 271 } 272 break; 273 274 case '.': 275 PushOperand(operandStack, kAnyCharacterClass); 276 ImplicitConcatenation(atomCountStack, operatorStack); 277 break; 278 279 case '[': 280 { 281 SizeType range; 282 if (!ParseRange(ds, &range)) 283 return; 284 SizeType s = NewState(kRegexInvalidState, kRegexInvalidState, kRangeCharacterClass); 285 GetState(s).rangeStart = range; 286 *operandStack.template Push<Frag>() = Frag(s, s, s); 287 } 288 ImplicitConcatenation(atomCountStack, operatorStack); 289 break; 290 291 case '\\': // Escape character 292 if (!CharacterEscape(ds, &codepoint)) 293 return; // Unsupported escape character 294 // fall through to default 295 296 default: // Pattern character 297 PushOperand(operandStack, codepoint); 298 ImplicitConcatenation(atomCountStack, operatorStack); 299 } 300 } 301 302 while (!operatorStack.Empty()) 303 if (!Eval(operandStack, *operatorStack.template Pop<Operator>(1))) 304 return; 305 306 // Link the operand to matching state. 307 if (operandStack.GetSize() == sizeof(Frag)) { 308 Frag* e = operandStack.template Pop<Frag>(1); 309 Patch(e->out, NewState(kRegexInvalidState, kRegexInvalidState, 0)); 310 root_ = e->start; 311 312#if RAPIDJSON_REGEX_VERBOSE 313 printf("root: %d\n", root_); 314 for (SizeType i = 0; i < stateCount_ ; i++) { 315 State& s = GetState(i); 316 printf("[%2d] out: %2d out1: %2d c: '%c'\n", i, s.out, s.out1, (char)s.codepoint); 317 } 318 printf("\n"); 319#endif 320 } 321 } 322 323 SizeType NewState(SizeType out, SizeType out1, unsigned codepoint) { 324 State* s = states_.template Push<State>(); 325 s->out = out; 326 s->out1 = out1; 327 s->codepoint = codepoint; 328 s->rangeStart = kRegexInvalidRange; 329 return stateCount_++; 330 } 331 332 void PushOperand(Stack<Allocator>& operandStack, unsigned codepoint) { 333 SizeType s = NewState(kRegexInvalidState, kRegexInvalidState, codepoint); 334 *operandStack.template Push<Frag>() = Frag(s, s, s); 335 } 336 337 void ImplicitConcatenation(Stack<Allocator>& atomCountStack, Stack<Allocator>& operatorStack) { 338 if (*atomCountStack.template Top<unsigned>()) 339 *operatorStack.template Push<Operator>() = kConcatenation; 340 (*atomCountStack.template Top<unsigned>())++; 341 } 342 343 SizeType Append(SizeType l1, SizeType l2) { 344 SizeType old = l1; 345 while (GetState(l1).out != kRegexInvalidState) 346 l1 = GetState(l1).out; 347 GetState(l1).out = l2; 348 return old; 349 } 350 351 void Patch(SizeType l, SizeType s) { 352 for (SizeType next; l != kRegexInvalidState; l = next) { 353 next = GetState(l).out; 354 GetState(l).out = s; 355 } 356 } 357 358 bool Eval(Stack<Allocator>& operandStack, Operator op) { 359 switch (op) { 360 case kConcatenation: 361 RAPIDJSON_ASSERT(operandStack.GetSize() >= sizeof(Frag) * 2); 362 { 363 Frag e2 = *operandStack.template Pop<Frag>(1); 364 Frag e1 = *operandStack.template Pop<Frag>(1); 365 Patch(e1.out, e2.start); 366 *operandStack.template Push<Frag>() = Frag(e1.start, e2.out, Min(e1.minIndex, e2.minIndex)); 367 } 368 return true; 369 370 case kAlternation: 371 if (operandStack.GetSize() >= sizeof(Frag) * 2) { 372 Frag e2 = *operandStack.template Pop<Frag>(1); 373 Frag e1 = *operandStack.template Pop<Frag>(1); 374 SizeType s = NewState(e1.start, e2.start, 0); 375 *operandStack.template Push<Frag>() = Frag(s, Append(e1.out, e2.out), Min(e1.minIndex, e2.minIndex)); 376 return true; 377 } 378 return false; 379 380 case kZeroOrOne: 381 if (operandStack.GetSize() >= sizeof(Frag)) { 382 Frag e = *operandStack.template Pop<Frag>(1); 383 SizeType s = NewState(kRegexInvalidState, e.start, 0); 384 *operandStack.template Push<Frag>() = Frag(s, Append(e.out, s), e.minIndex); 385 return true; 386 } 387 return false; 388 389 case kZeroOrMore: 390 if (operandStack.GetSize() >= sizeof(Frag)) { 391 Frag e = *operandStack.template Pop<Frag>(1); 392 SizeType s = NewState(kRegexInvalidState, e.start, 0); 393 Patch(e.out, s); 394 *operandStack.template Push<Frag>() = Frag(s, s, e.minIndex); 395 return true; 396 } 397 return false; 398 399 case kOneOrMore: 400 if (operandStack.GetSize() >= sizeof(Frag)) { 401 Frag e = *operandStack.template Pop<Frag>(1); 402 SizeType s = NewState(kRegexInvalidState, e.start, 0); 403 Patch(e.out, s); 404 *operandStack.template Push<Frag>() = Frag(e.start, s, e.minIndex); 405 return true; 406 } 407 return false; 408 409 default: 410 // syntax error (e.g. unclosed kLeftParenthesis) 411 return false; 412 } 413 } 414 415 bool EvalQuantifier(Stack<Allocator>& operandStack, unsigned n, unsigned m) { 416 RAPIDJSON_ASSERT(n <= m); 417 RAPIDJSON_ASSERT(operandStack.GetSize() >= sizeof(Frag)); 418 419 if (n == 0) { 420 if (m == 0) // a{0} not support 421 return false; 422 else if (m == kInfinityQuantifier) 423 Eval(operandStack, kZeroOrMore); // a{0,} -> a* 424 else { 425 Eval(operandStack, kZeroOrOne); // a{0,5} -> a? 426 for (unsigned i = 0; i < m - 1; i++) 427 CloneTopOperand(operandStack); // a{0,5} -> a? a? a? a? a? 428 for (unsigned i = 0; i < m - 1; i++) 429 Eval(operandStack, kConcatenation); // a{0,5} -> a?a?a?a?a? 430 } 431 return true; 432 } 433 434 for (unsigned i = 0; i < n - 1; i++) // a{3} -> a a a 435 CloneTopOperand(operandStack); 436 437 if (m == kInfinityQuantifier) 438 Eval(operandStack, kOneOrMore); // a{3,} -> a a a+ 439 else if (m > n) { 440 CloneTopOperand(operandStack); // a{3,5} -> a a a a 441 Eval(operandStack, kZeroOrOne); // a{3,5} -> a a a a? 442 for (unsigned i = n; i < m - 1; i++) 443 CloneTopOperand(operandStack); // a{3,5} -> a a a a? a? 444 for (unsigned i = n; i < m; i++) 445 Eval(operandStack, kConcatenation); // a{3,5} -> a a aa?a? 446 } 447 448 for (unsigned i = 0; i < n - 1; i++) 449 Eval(operandStack, kConcatenation); // a{3} -> aaa, a{3,} -> aaa+, a{3.5} -> aaaa?a? 450 451 return true; 452 } 453 454 static SizeType Min(SizeType a, SizeType b) { return a < b ? a : b; } 455 456 void CloneTopOperand(Stack<Allocator>& operandStack) { 457 const Frag src = *operandStack.template Top<Frag>(); // Copy constructor to prevent invalidation 458 SizeType count = stateCount_ - src.minIndex; // Assumes top operand contains states in [src->minIndex, stateCount_) 459 State* s = states_.template Push<State>(count); 460 memcpy(s, &GetState(src.minIndex), count * sizeof(State)); 461 for (SizeType j = 0; j < count; j++) { 462 if (s[j].out != kRegexInvalidState) 463 s[j].out += count; 464 if (s[j].out1 != kRegexInvalidState) 465 s[j].out1 += count; 466 } 467 *operandStack.template Push<Frag>() = Frag(src.start + count, src.out + count, src.minIndex + count); 468 stateCount_ += count; 469 } 470 471 template <typename InputStream> 472 bool ParseUnsigned(DecodedStream<InputStream, Encoding>& ds, unsigned* u) { 473 unsigned r = 0; 474 if (ds.Peek() < '0' || ds.Peek() > '9') 475 return false; 476 while (ds.Peek() >= '0' && ds.Peek() <= '9') { 477 if (r >= 429496729 && ds.Peek() > '5') // 2^32 - 1 = 4294967295 478 return false; // overflow 479 r = r * 10 + (ds.Take() - '0'); 480 } 481 *u = r; 482 return true; 483 } 484 485 template <typename InputStream> 486 bool ParseRange(DecodedStream<InputStream, Encoding>& ds, SizeType* range) { 487 bool isBegin = true; 488 bool negate = false; 489 int step = 0; 490 SizeType start = kRegexInvalidRange; 491 SizeType current = kRegexInvalidRange; 492 unsigned codepoint; 493 while ((codepoint = ds.Take()) != 0) { 494 if (isBegin) { 495 isBegin = false; 496 if (codepoint == '^') { 497 negate = true; 498 continue; 499 } 500 } 501 502 switch (codepoint) { 503 case ']': 504 if (start == kRegexInvalidRange) 505 return false; // Error: nothing inside [] 506 if (step == 2) { // Add trailing '-' 507 SizeType r = NewRange('-'); 508 RAPIDJSON_ASSERT(current != kRegexInvalidRange); 509 GetRange(current).next = r; 510 } 511 if (negate) 512 GetRange(start).start |= kRangeNegationFlag; 513 *range = start; 514 return true; 515 516 case '\\': 517 if (ds.Peek() == 'b') { 518 ds.Take(); 519 codepoint = 0x0008; // Escape backspace character 520 } 521 else if (!CharacterEscape(ds, &codepoint)) 522 return false; 523 // fall through to default 524 525 default: 526 switch (step) { 527 case 1: 528 if (codepoint == '-') { 529 step++; 530 break; 531 } 532 // fall through to step 0 for other characters 533 534 case 0: 535 { 536 SizeType r = NewRange(codepoint); 537 if (current != kRegexInvalidRange) 538 GetRange(current).next = r; 539 if (start == kRegexInvalidRange) 540 start = r; 541 current = r; 542 } 543 step = 1; 544 break; 545 546 default: 547 RAPIDJSON_ASSERT(step == 2); 548 GetRange(current).end = codepoint; 549 step = 0; 550 } 551 } 552 } 553 return false; 554 } 555 556 SizeType NewRange(unsigned codepoint) { 557 Range* r = ranges_.template Push<Range>(); 558 r->start = r->end = codepoint; 559 r->next = kRegexInvalidRange; 560 return rangeCount_++; 561 } 562 563 template <typename InputStream> 564 bool CharacterEscape(DecodedStream<InputStream, Encoding>& ds, unsigned* escapedCodepoint) { 565 unsigned codepoint; 566 switch (codepoint = ds.Take()) { 567 case '^': 568 case '$': 569 case '|': 570 case '(': 571 case ')': 572 case '?': 573 case '*': 574 case '+': 575 case '.': 576 case '[': 577 case ']': 578 case '{': 579 case '}': 580 case '\\': 581 *escapedCodepoint = codepoint; return true; 582 case 'f': *escapedCodepoint = 0x000C; return true; 583 case 'n': *escapedCodepoint = 0x000A; return true; 584 case 'r': *escapedCodepoint = 0x000D; return true; 585 case 't': *escapedCodepoint = 0x0009; return true; 586 case 'v': *escapedCodepoint = 0x000B; return true; 587 default: 588 return false; // Unsupported escape character 589 } 590 } 591 592 Allocator* ownAllocator_; 593 Allocator* allocator_; 594 Stack<Allocator> states_; 595 Stack<Allocator> ranges_; 596 SizeType root_; 597 SizeType stateCount_; 598 SizeType rangeCount_; 599 600 static const unsigned kInfinityQuantifier = ~0u; 601 602 // For SearchWithAnchoring() 603 bool anchorBegin_; 604 bool anchorEnd_; 605}; 606 607template <typename RegexType, typename Allocator = CrtAllocator> 608class GenericRegexSearch { 609public: 610 typedef typename RegexType::EncodingType Encoding; 611 typedef typename Encoding::Ch Ch; 612 613 GenericRegexSearch(const RegexType& regex, Allocator* allocator = 0) : 614 regex_(regex), allocator_(allocator), ownAllocator_(0), 615 state0_(allocator, 0), state1_(allocator, 0), stateSet_() 616 { 617 RAPIDJSON_ASSERT(regex_.IsValid()); 618 if (!allocator_) 619 ownAllocator_ = allocator_ = RAPIDJSON_NEW(Allocator)(); 620 stateSet_ = static_cast<unsigned*>(allocator_->Malloc(GetStateSetSize())); 621 state0_.template Reserve<SizeType>(regex_.stateCount_); 622 state1_.template Reserve<SizeType>(regex_.stateCount_); 623 } 624 625 ~GenericRegexSearch() { 626 Allocator::Free(stateSet_); 627 RAPIDJSON_DELETE(ownAllocator_); 628 } 629 630 template <typename InputStream> 631 bool Match(InputStream& is) { 632 return SearchWithAnchoring(is, true, true); 633 } 634 635 bool Match(const Ch* s) { 636 GenericStringStream<Encoding> is(s); 637 return Match(is); 638 } 639 640 template <typename InputStream> 641 bool Search(InputStream& is) { 642 return SearchWithAnchoring(is, regex_.anchorBegin_, regex_.anchorEnd_); 643 } 644 645 bool Search(const Ch* s) { 646 GenericStringStream<Encoding> is(s); 647 return Search(is); 648 } 649 650private: 651 typedef typename RegexType::State State; 652 typedef typename RegexType::Range Range; 653 654 template <typename InputStream> 655 bool SearchWithAnchoring(InputStream& is, bool anchorBegin, bool anchorEnd) { 656 DecodedStream<InputStream, Encoding> ds(is); 657 658 state0_.Clear(); 659 Stack<Allocator> *current = &state0_, *next = &state1_; 660 const size_t stateSetSize = GetStateSetSize(); 661 std::memset(stateSet_, 0, stateSetSize); 662 663 bool matched = AddState(*current, regex_.root_); 664 unsigned codepoint; 665 while (!current->Empty() && (codepoint = ds.Take()) != 0) { 666 std::memset(stateSet_, 0, stateSetSize); 667 next->Clear(); 668 matched = false; 669 for (const SizeType* s = current->template Bottom<SizeType>(); s != current->template End<SizeType>(); ++s) { 670 const State& sr = regex_.GetState(*s); 671 if (sr.codepoint == codepoint || 672 sr.codepoint == RegexType::kAnyCharacterClass || 673 (sr.codepoint == RegexType::kRangeCharacterClass && MatchRange(sr.rangeStart, codepoint))) 674 { 675 matched = AddState(*next, sr.out) || matched; 676 if (!anchorEnd && matched) 677 return true; 678 } 679 if (!anchorBegin) 680 AddState(*next, regex_.root_); 681 } 682 internal::Swap(current, next); 683 } 684 685 return matched; 686 } 687 688 size_t GetStateSetSize() const { 689 return (regex_.stateCount_ + 31) / 32 * 4; 690 } 691 692 // Return whether the added states is a match state 693 bool AddState(Stack<Allocator>& l, SizeType index) { 694 RAPIDJSON_ASSERT(index != kRegexInvalidState); 695 696 const State& s = regex_.GetState(index); 697 if (s.out1 != kRegexInvalidState) { // Split 698 bool matched = AddState(l, s.out); 699 return AddState(l, s.out1) || matched; 700 } 701 else if (!(stateSet_[index >> 5] & (1u << (index & 31)))) { 702 stateSet_[index >> 5] |= (1u << (index & 31)); 703 *l.template PushUnsafe<SizeType>() = index; 704 } 705 return s.out == kRegexInvalidState; // by using PushUnsafe() above, we can ensure s is not validated due to reallocation. 706 } 707 708 bool MatchRange(SizeType rangeIndex, unsigned codepoint) const { 709 bool yes = (regex_.GetRange(rangeIndex).start & RegexType::kRangeNegationFlag) == 0; 710 while (rangeIndex != kRegexInvalidRange) { 711 const Range& r = regex_.GetRange(rangeIndex); 712 if (codepoint >= (r.start & ~RegexType::kRangeNegationFlag) && codepoint <= r.end) 713 return yes; 714 rangeIndex = r.next; 715 } 716 return !yes; 717 } 718 719 const RegexType& regex_; 720 Allocator* allocator_; 721 Allocator* ownAllocator_; 722 Stack<Allocator> state0_; 723 Stack<Allocator> state1_; 724 uint32_t* stateSet_; 725}; 726 727typedef GenericRegex<UTF8<> > Regex; 728typedef GenericRegexSearch<Regex> RegexSearch; 729 730} // namespace internal 731RAPIDJSON_NAMESPACE_END 732 733#ifdef __GNUC__ 734RAPIDJSON_DIAG_POP 735#endif 736 737#if defined(__clang__) || defined(_MSC_VER) 738RAPIDJSON_DIAG_POP 739#endif 740 741#endif // RAPIDJSON_INTERNAL_REGEX_H_ 742