The data contained in this repository can be downloaded to your computer using one of several clients.
Please see the documentation of your version control software client for more information.

Please select the desired protocol below to get the URL.

This URL has Read-Only access.

Statistics
| Branch: | Revision:

main_repo / deps / v8 / src / jsregexp.h @ 40c0f755

History | View | Annotate | Download (48.1 KB)

1
// Copyright 2006-2008 the V8 project authors. All rights reserved.
2
// Redistribution and use in source and binary forms, with or without
3
// modification, are permitted provided that the following conditions are
4
// met:
5
//
6
//     * Redistributions of source code must retain the above copyright
7
//       notice, this list of conditions and the following disclaimer.
8
//     * Redistributions in binary form must reproduce the above
9
//       copyright notice, this list of conditions and the following
10
//       disclaimer in the documentation and/or other materials provided
11
//       with the distribution.
12
//     * Neither the name of Google Inc. nor the names of its
13
//       contributors may be used to endorse or promote products derived
14
//       from this software without specific prior written permission.
15
//
16
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27

    
28
#ifndef V8_JSREGEXP_H_
29
#define V8_JSREGEXP_H_
30

    
31
namespace v8 { namespace internal {
32

    
33

    
34
class RegExpMacroAssembler;
35

    
36

    
37
class RegExpImpl {
38
 public:
39
  static inline bool UseNativeRegexp() {
40
#ifdef ARM
41
    return false;
42
#else
43
    return FLAG_regexp_native;
44
#endif
45
  }
46
  // Creates a regular expression literal in the old space.
47
  // This function calls the garbage collector if necessary.
48
  static Handle<Object> CreateRegExpLiteral(Handle<JSFunction> constructor,
49
                                            Handle<String> pattern,
50
                                            Handle<String> flags,
51
                                            bool* has_pending_exception);
52

    
53
  // Returns a string representation of a regular expression.
54
  // Implements RegExp.prototype.toString, see ECMA-262 section 15.10.6.4.
55
  // This function calls the garbage collector if necessary.
56
  static Handle<String> ToString(Handle<Object> value);
57

    
58
  // Parses the RegExp pattern and prepares the JSRegExp object with
59
  // generic data and choice of implementation - as well as what
60
  // the implementation wants to store in the data field.
61
  // Returns false if compilation fails.
62
  static Handle<Object> Compile(Handle<JSRegExp> re,
63
                                Handle<String> pattern,
64
                                Handle<String> flags);
65

    
66
  // See ECMA-262 section 15.10.6.2.
67
  // This function calls the garbage collector if necessary.
68
  static Handle<Object> Exec(Handle<JSRegExp> regexp,
69
                             Handle<String> subject,
70
                             int index,
71
                             Handle<JSArray> lastMatchInfo);
72

    
73
  // Call RegExp.prototyp.exec(string) in a loop.
74
  // Used by String.prototype.match and String.prototype.replace.
75
  // This function calls the garbage collector if necessary.
76
  static Handle<Object> ExecGlobal(Handle<JSRegExp> regexp,
77
                                   Handle<String> subject,
78
                                   Handle<JSArray> lastMatchInfo);
79

    
80
  // Prepares a JSRegExp object with Irregexp-specific data.
81
  static void IrregexpPrepare(Handle<JSRegExp> re,
82
                              Handle<String> pattern,
83
                              JSRegExp::Flags flags,
84
                              int capture_register_count);
85

    
86

    
87
  static void AtomCompile(Handle<JSRegExp> re,
88
                          Handle<String> pattern,
89
                          JSRegExp::Flags flags,
90
                          Handle<String> match_pattern);
91

    
92
  static Handle<Object> AtomExec(Handle<JSRegExp> regexp,
93
                                 Handle<String> subject,
94
                                 int index,
95
                                 Handle<JSArray> lastMatchInfo);
96

    
97
  // Execute an Irregexp bytecode pattern.
98
  // On a successful match, the result is a JSArray containing
99
  // captured positions. On a failure, the result is the null value.
100
  // Returns an empty handle in case of an exception.
101
  static Handle<Object> IrregexpExec(Handle<JSRegExp> regexp,
102
                                     Handle<String> subject,
103
                                     int index,
104
                                     Handle<JSArray> lastMatchInfo);
105

    
106
  // Offsets in the lastMatchInfo array.
107
  static const int kLastCaptureCount = 0;
108
  static const int kLastSubject = 1;
109
  static const int kLastInput = 2;
110
  static const int kFirstCapture = 3;
111
  static const int kLastMatchOverhead = 3;
112

    
113
  // Used to access the lastMatchInfo array.
114
  static int GetCapture(FixedArray* array, int index) {
115
    return Smi::cast(array->get(index + kFirstCapture))->value();
116
  }
117

    
118
  static void SetLastCaptureCount(FixedArray* array, int to) {
119
    array->set(kLastCaptureCount, Smi::FromInt(to));
120
  }
121

    
122
  static void SetLastSubject(FixedArray* array, String* to) {
123
    array->set(kLastSubject, to);
124
  }
125

    
126
  static void SetLastInput(FixedArray* array, String* to) {
127
    array->set(kLastInput, to);
128
  }
129

    
130
  static void SetCapture(FixedArray* array, int index, int to) {
131
    array->set(index + kFirstCapture, Smi::FromInt(to));
132
  }
133

    
134
  static int GetLastCaptureCount(FixedArray* array) {
135
    return Smi::cast(array->get(kLastCaptureCount))->value();
136
  }
137

    
138
  // For acting on the JSRegExp data FixedArray.
139
  static int IrregexpMaxRegisterCount(FixedArray* re);
140
  static void SetIrregexpMaxRegisterCount(FixedArray* re, int value);
141
  static int IrregexpNumberOfCaptures(FixedArray* re);
142
  static int IrregexpNumberOfRegisters(FixedArray* re);
143
  static ByteArray* IrregexpByteCode(FixedArray* re, bool is_ascii);
144
  static Code* IrregexpNativeCode(FixedArray* re, bool is_ascii);
145

    
146
 private:
147
  static String* last_ascii_string_;
148
  static String* two_byte_cached_string_;
149

    
150
  static bool EnsureCompiledIrregexp(Handle<JSRegExp> re, bool is_ascii);
151

    
152

    
153
  // Set the subject cache.  The previous string buffer is not deleted, so the
154
  // caller should ensure that it doesn't leak.
155
  static void SetSubjectCache(String* subject,
156
                              char* utf8_subject,
157
                              int uft8_length,
158
                              int character_position,
159
                              int utf8_position);
160

    
161
  // A one element cache of the last utf8_subject string and its length.  The
162
  // subject JS String object is cached in the heap.  We also cache a
163
  // translation between position and utf8 position.
164
  static char* utf8_subject_cache_;
165
  static int utf8_length_cache_;
166
  static int utf8_position_;
167
  static int character_position_;
168
};
169

    
170

    
171
class CharacterRange {
172
 public:
173
  CharacterRange() : from_(0), to_(0) { }
174
  // For compatibility with the CHECK_OK macro
175
  CharacterRange(void* null) { ASSERT_EQ(NULL, null); }  //NOLINT
176
  CharacterRange(uc16 from, uc16 to) : from_(from), to_(to) { }
177
  static void AddClassEscape(uc16 type, ZoneList<CharacterRange>* ranges);
178
  static Vector<const uc16> GetWordBounds();
179
  static inline CharacterRange Singleton(uc16 value) {
180
    return CharacterRange(value, value);
181
  }
182
  static inline CharacterRange Range(uc16 from, uc16 to) {
183
    ASSERT(from <= to);
184
    return CharacterRange(from, to);
185
  }
186
  static inline CharacterRange Everything() {
187
    return CharacterRange(0, 0xFFFF);
188
  }
189
  bool Contains(uc16 i) { return from_ <= i && i <= to_; }
190
  uc16 from() const { return from_; }
191
  void set_from(uc16 value) { from_ = value; }
192
  uc16 to() const { return to_; }
193
  void set_to(uc16 value) { to_ = value; }
194
  bool is_valid() { return from_ <= to_; }
195
  bool IsEverything(uc16 max) { return from_ == 0 && to_ >= max; }
196
  bool IsSingleton() { return (from_ == to_); }
197
  void AddCaseEquivalents(ZoneList<CharacterRange>* ranges);
198
  static void Split(ZoneList<CharacterRange>* base,
199
                    Vector<const uc16> overlay,
200
                    ZoneList<CharacterRange>** included,
201
                    ZoneList<CharacterRange>** excluded);
202

    
203
  static const int kRangeCanonicalizeMax = 0x346;
204
  static const int kStartMarker = (1 << 24);
205
  static const int kPayloadMask = (1 << 24) - 1;
206

    
207
 private:
208
  uc16 from_;
209
  uc16 to_;
210
};
211

    
212

    
213
template <typename Node, class Callback>
214
static void DoForEach(Node* node, Callback* callback);
215

    
216

    
217
// A zone splay tree.  The config type parameter encapsulates the
218
// different configurations of a concrete splay tree:
219
//
220
//   typedef Key: the key type
221
//   typedef Value: the value type
222
//   static const kNoKey: the dummy key used when no key is set
223
//   static const kNoValue: the dummy value used to initialize nodes
224
//   int (Compare)(Key& a, Key& b) -> {-1, 0, 1}: comparison function
225
//
226
template <typename Config>
227
class ZoneSplayTree : public ZoneObject {
228
 public:
229
  typedef typename Config::Key Key;
230
  typedef typename Config::Value Value;
231

    
232
  class Locator;
233

    
234
  ZoneSplayTree() : root_(NULL) { }
235

    
236
  // Inserts the given key in this tree with the given value.  Returns
237
  // true if a node was inserted, otherwise false.  If found the locator
238
  // is enabled and provides access to the mapping for the key.
239
  bool Insert(const Key& key, Locator* locator);
240

    
241
  // Looks up the key in this tree and returns true if it was found,
242
  // otherwise false.  If the node is found the locator is enabled and
243
  // provides access to the mapping for the key.
244
  bool Find(const Key& key, Locator* locator);
245

    
246
  // Finds the mapping with the greatest key less than or equal to the
247
  // given key.
248
  bool FindGreatestLessThan(const Key& key, Locator* locator);
249

    
250
  // Find the mapping with the greatest key in this tree.
251
  bool FindGreatest(Locator* locator);
252

    
253
  // Finds the mapping with the least key greater than or equal to the
254
  // given key.
255
  bool FindLeastGreaterThan(const Key& key, Locator* locator);
256

    
257
  // Find the mapping with the least key in this tree.
258
  bool FindLeast(Locator* locator);
259

    
260
  // Remove the node with the given key from the tree.
261
  bool Remove(const Key& key);
262

    
263
  bool is_empty() { return root_ == NULL; }
264

    
265
  // Perform the splay operation for the given key. Moves the node with
266
  // the given key to the top of the tree.  If no node has the given
267
  // key, the last node on the search path is moved to the top of the
268
  // tree.
269
  void Splay(const Key& key);
270

    
271
  class Node : public ZoneObject {
272
   public:
273
    Node(const Key& key, const Value& value)
274
        : key_(key),
275
          value_(value),
276
          left_(NULL),
277
          right_(NULL) { }
278
    Key key() { return key_; }
279
    Value value() { return value_; }
280
    Node* left() { return left_; }
281
    Node* right() { return right_; }
282
   private:
283
    friend class ZoneSplayTree;
284
    friend class Locator;
285
    Key key_;
286
    Value value_;
287
    Node* left_;
288
    Node* right_;
289
  };
290

    
291
  // A locator provides access to a node in the tree without actually
292
  // exposing the node.
293
  class Locator {
294
   public:
295
    explicit Locator(Node* node) : node_(node) { }
296
    Locator() : node_(NULL) { }
297
    const Key& key() { return node_->key_; }
298
    Value& value() { return node_->value_; }
299
    void set_value(const Value& value) { node_->value_ = value; }
300
    inline void bind(Node* node) { node_ = node; }
301
   private:
302
    Node* node_;
303
  };
304

    
305
  template <class Callback>
306
  void ForEach(Callback* c) {
307
    DoForEach<typename ZoneSplayTree<Config>::Node, Callback>(root_, c);
308
  }
309

    
310
 private:
311
  Node* root_;
312
};
313

    
314

    
315
// A set of unsigned integers that behaves especially well on small
316
// integers (< 32).  May do zone-allocation.
317
class OutSet: public ZoneObject {
318
 public:
319
  OutSet() : first_(0), remaining_(NULL), successors_(NULL) { }
320
  OutSet* Extend(unsigned value);
321
  bool Get(unsigned value);
322
  static const unsigned kFirstLimit = 32;
323

    
324
 private:
325
  // Destructively set a value in this set.  In most cases you want
326
  // to use Extend instead to ensure that only one instance exists
327
  // that contains the same values.
328
  void Set(unsigned value);
329

    
330
  // The successors are a list of sets that contain the same values
331
  // as this set and the one more value that is not present in this
332
  // set.
333
  ZoneList<OutSet*>* successors() { return successors_; }
334

    
335
  OutSet(uint32_t first, ZoneList<unsigned>* remaining)
336
      : first_(first), remaining_(remaining), successors_(NULL) { }
337
  uint32_t first_;
338
  ZoneList<unsigned>* remaining_;
339
  ZoneList<OutSet*>* successors_;
340
  friend class Trace;
341
};
342

    
343

    
344
// A mapping from integers, specified as ranges, to a set of integers.
345
// Used for mapping character ranges to choices.
346
class DispatchTable : public ZoneObject {
347
 public:
348
  class Entry {
349
   public:
350
    Entry() : from_(0), to_(0), out_set_(NULL) { }
351
    Entry(uc16 from, uc16 to, OutSet* out_set)
352
        : from_(from), to_(to), out_set_(out_set) { }
353
    uc16 from() { return from_; }
354
    uc16 to() { return to_; }
355
    void set_to(uc16 value) { to_ = value; }
356
    void AddValue(int value) { out_set_ = out_set_->Extend(value); }
357
    OutSet* out_set() { return out_set_; }
358
   private:
359
    uc16 from_;
360
    uc16 to_;
361
    OutSet* out_set_;
362
  };
363

    
364
  class Config {
365
   public:
366
    typedef uc16 Key;
367
    typedef Entry Value;
368
    static const uc16 kNoKey;
369
    static const Entry kNoValue;
370
    static inline int Compare(uc16 a, uc16 b) {
371
      if (a == b)
372
        return 0;
373
      else if (a < b)
374
        return -1;
375
      else
376
        return 1;
377
    }
378
  };
379

    
380
  void AddRange(CharacterRange range, int value);
381
  OutSet* Get(uc16 value);
382
  void Dump();
383

    
384
  template <typename Callback>
385
  void ForEach(Callback* callback) { return tree()->ForEach(callback); }
386
 private:
387
  // There can't be a static empty set since it allocates its
388
  // successors in a zone and caches them.
389
  OutSet* empty() { return &empty_; }
390
  OutSet empty_;
391
  ZoneSplayTree<Config>* tree() { return &tree_; }
392
  ZoneSplayTree<Config> tree_;
393
};
394

    
395

    
396
#define FOR_EACH_NODE_TYPE(VISIT)                                    \
397
  VISIT(End)                                                         \
398
  VISIT(Action)                                                      \
399
  VISIT(Choice)                                                      \
400
  VISIT(BackReference)                                               \
401
  VISIT(Assertion)                                                   \
402
  VISIT(Text)
403

    
404

    
405
#define FOR_EACH_REG_EXP_TREE_TYPE(VISIT)                            \
406
  VISIT(Disjunction)                                                 \
407
  VISIT(Alternative)                                                 \
408
  VISIT(Assertion)                                                   \
409
  VISIT(CharacterClass)                                              \
410
  VISIT(Atom)                                                        \
411
  VISIT(Quantifier)                                                  \
412
  VISIT(Capture)                                                     \
413
  VISIT(Lookahead)                                                   \
414
  VISIT(BackReference)                                               \
415
  VISIT(Empty)                                                       \
416
  VISIT(Text)
417

    
418

    
419
#define FORWARD_DECLARE(Name) class RegExp##Name;
420
FOR_EACH_REG_EXP_TREE_TYPE(FORWARD_DECLARE)
421
#undef FORWARD_DECLARE
422

    
423

    
424
class TextElement {
425
 public:
426
  enum Type {UNINITIALIZED, ATOM, CHAR_CLASS};
427
  TextElement() : type(UNINITIALIZED) { }
428
  explicit TextElement(Type t) : type(t), cp_offset(-1) { }
429
  static TextElement Atom(RegExpAtom* atom);
430
  static TextElement CharClass(RegExpCharacterClass* char_class);
431
  int length();
432
  Type type;
433
  union {
434
    RegExpAtom* u_atom;
435
    RegExpCharacterClass* u_char_class;
436
  } data;
437
  int cp_offset;
438
};
439

    
440

    
441
class Trace;
442

    
443

    
444
struct NodeInfo {
445
  NodeInfo()
446
      : being_analyzed(false),
447
        been_analyzed(false),
448
        follows_word_interest(false),
449
        follows_newline_interest(false),
450
        follows_start_interest(false),
451
        at_end(false),
452
        visited(false) { }
453

    
454
  // Returns true if the interests and assumptions of this node
455
  // matches the given one.
456
  bool Matches(NodeInfo* that) {
457
    return (at_end == that->at_end) &&
458
           (follows_word_interest == that->follows_word_interest) &&
459
           (follows_newline_interest == that->follows_newline_interest) &&
460
           (follows_start_interest == that->follows_start_interest);
461
  }
462

    
463
  // Updates the interests of this node given the interests of the
464
  // node preceding it.
465
  void AddFromPreceding(NodeInfo* that) {
466
    at_end |= that->at_end;
467
    follows_word_interest |= that->follows_word_interest;
468
    follows_newline_interest |= that->follows_newline_interest;
469
    follows_start_interest |= that->follows_start_interest;
470
  }
471

    
472
  bool HasLookbehind() {
473
    return follows_word_interest ||
474
           follows_newline_interest ||
475
           follows_start_interest;
476
  }
477

    
478
  // Sets the interests of this node to include the interests of the
479
  // following node.
480
  void AddFromFollowing(NodeInfo* that) {
481
    follows_word_interest |= that->follows_word_interest;
482
    follows_newline_interest |= that->follows_newline_interest;
483
    follows_start_interest |= that->follows_start_interest;
484
  }
485

    
486
  void ResetCompilationState() {
487
    being_analyzed = false;
488
    been_analyzed = false;
489
  }
490

    
491
  bool being_analyzed: 1;
492
  bool been_analyzed: 1;
493

    
494
  // These bits are set of this node has to know what the preceding
495
  // character was.
496
  bool follows_word_interest: 1;
497
  bool follows_newline_interest: 1;
498
  bool follows_start_interest: 1;
499

    
500
  bool at_end: 1;
501
  bool visited: 1;
502
};
503

    
504

    
505
class SiblingList {
506
 public:
507
  SiblingList() : list_(NULL) { }
508
  int length() {
509
    return list_ == NULL ? 0 : list_->length();
510
  }
511
  void Ensure(RegExpNode* parent) {
512
    if (list_ == NULL) {
513
      list_ = new ZoneList<RegExpNode*>(2);
514
      list_->Add(parent);
515
    }
516
  }
517
  void Add(RegExpNode* node) { list_->Add(node); }
518
  RegExpNode* Get(int index) { return list_->at(index); }
519
 private:
520
  ZoneList<RegExpNode*>* list_;
521
};
522

    
523

    
524
// Details of a quick mask-compare check that can look ahead in the
525
// input stream.
526
class QuickCheckDetails {
527
 public:
528
  QuickCheckDetails()
529
      : characters_(0),
530
        mask_(0),
531
        value_(0),
532
        cannot_match_(false) { }
533
  explicit QuickCheckDetails(int characters)
534
      : characters_(characters),
535
        mask_(0),
536
        value_(0),
537
        cannot_match_(false) { }
538
  bool Rationalize(bool ascii);
539
  // Merge in the information from another branch of an alternation.
540
  void Merge(QuickCheckDetails* other, int from_index);
541
  // Advance the current position by some amount.
542
  void Advance(int by, bool ascii);
543
  void Clear();
544
  bool cannot_match() { return cannot_match_; }
545
  void set_cannot_match() { cannot_match_ = true; }
546
  struct Position {
547
    Position() : mask(0), value(0), determines_perfectly(false) { }
548
    uc16 mask;
549
    uc16 value;
550
    bool determines_perfectly;
551
  };
552
  int characters() { return characters_; }
553
  void set_characters(int characters) { characters_ = characters; }
554
  Position* positions(int index) {
555
    ASSERT(index >= 0);
556
    ASSERT(index < characters_);
557
    return positions_ + index;
558
  }
559
  uint32_t mask() { return mask_; }
560
  uint32_t value() { return value_; }
561

    
562
 private:
563
  // How many characters do we have quick check information from.  This is
564
  // the same for all branches of a choice node.
565
  int characters_;
566
  Position positions_[4];
567
  // These values are the condensate of the above array after Rationalize().
568
  uint32_t mask_;
569
  uint32_t value_;
570
  // If set to true, there is no way this quick check can match at all.
571
  // E.g., if it requires to be at the start of the input, and isn't.
572
  bool cannot_match_;
573
};
574

    
575

    
576
class RegExpNode: public ZoneObject {
577
 public:
578
  RegExpNode() : trace_count_(0) { }
579
  virtual ~RegExpNode();
580
  virtual void Accept(NodeVisitor* visitor) = 0;
581
  // Generates a goto to this node or actually generates the code at this point.
582
  virtual void Emit(RegExpCompiler* compiler, Trace* trace) = 0;
583
  // How many characters must this node consume at a minimum in order to
584
  // succeed.  If we have found at least 'still_to_find' characters that
585
  // must be consumed there is no need to ask any following nodes whether
586
  // they are sure to eat any more characters.
587
  virtual int EatsAtLeast(int still_to_find, int recursion_depth) = 0;
588
  // Emits some quick code that checks whether the preloaded characters match.
589
  // Falls through on certain failure, jumps to the label on possible success.
590
  // If the node cannot make a quick check it does nothing and returns false.
591
  bool EmitQuickCheck(RegExpCompiler* compiler,
592
                      Trace* trace,
593
                      bool preload_has_checked_bounds,
594
                      Label* on_possible_success,
595
                      QuickCheckDetails* details_return,
596
                      bool fall_through_on_failure);
597
  // For a given number of characters this returns a mask and a value.  The
598
  // next n characters are anded with the mask and compared with the value.
599
  // A comparison failure indicates the node cannot match the next n characters.
600
  // A comparison success indicates the node may match.
601
  virtual void GetQuickCheckDetails(QuickCheckDetails* details,
602
                                    RegExpCompiler* compiler,
603
                                    int characters_filled_in,
604
                                    bool not_at_start) = 0;
605
  static const int kNodeIsTooComplexForGreedyLoops = -1;
606
  virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; }
607
  Label* label() { return &label_; }
608
  // If non-generic code is generated for a node (ie the node is not at the
609
  // start of the trace) then it cannot be reused.  This variable sets a limit
610
  // on how often we allow that to happen before we insist on starting a new
611
  // trace and generating generic code for a node that can be reused by flushing
612
  // the deferred actions in the current trace and generating a goto.
613
  static const int kMaxCopiesCodeGenerated = 10;
614

    
615
  NodeInfo* info() { return &info_; }
616

    
617
  void AddSibling(RegExpNode* node) { siblings_.Add(node); }
618

    
619
  // Static version of EnsureSibling that expresses the fact that the
620
  // result has the same type as the input.
621
  template <class C>
622
  static C* EnsureSibling(C* node, NodeInfo* info, bool* cloned) {
623
    return static_cast<C*>(node->EnsureSibling(info, cloned));
624
  }
625

    
626
  SiblingList* siblings() { return &siblings_; }
627
  void set_siblings(SiblingList* other) { siblings_ = *other; }
628

    
629
 protected:
630
  enum LimitResult { DONE, CONTINUE };
631
  LimitResult LimitVersions(RegExpCompiler* compiler, Trace* trace);
632

    
633
  // Returns a sibling of this node whose interests and assumptions
634
  // match the ones in the given node info.  If no sibling exists NULL
635
  // is returned.
636
  RegExpNode* TryGetSibling(NodeInfo* info);
637

    
638
  // Returns a sibling of this node whose interests match the ones in
639
  // the given node info.  The info must not contain any assertions.
640
  // If no node exists a new one will be created by cloning the current
641
  // node.  The result will always be an instance of the same concrete
642
  // class as this node.
643
  RegExpNode* EnsureSibling(NodeInfo* info, bool* cloned);
644

    
645
  // Returns a clone of this node initialized using the copy constructor
646
  // of its concrete class.  Note that the node may have to be pre-
647
  // processed before it is on a usable state.
648
  virtual RegExpNode* Clone() = 0;
649

    
650
 private:
651
  Label label_;
652
  NodeInfo info_;
653
  SiblingList siblings_;
654
  // This variable keeps track of how many times code has been generated for
655
  // this node (in different traces).  We don't keep track of where the
656
  // generated code is located unless the code is generated at the start of
657
  // a trace, in which case it is generic and can be reused by flushing the
658
  // deferred operations in the current trace and generating a goto.
659
  int trace_count_;
660
};
661

    
662

    
663
// A simple closed interval.
664
class Interval {
665
 public:
666
  Interval() : from_(kNone), to_(kNone) { }
667
  Interval(int from, int to) : from_(from), to_(to) { }
668
  Interval Union(Interval that) {
669
    if (that.from_ == kNone)
670
      return *this;
671
    else if (from_ == kNone)
672
      return that;
673
    else
674
      return Interval(Min(from_, that.from_), Max(to_, that.to_));
675
  }
676
  bool Contains(int value) {
677
    return (from_ <= value) && (value <= to_);
678
  }
679
  bool is_empty() { return from_ == kNone; }
680
  int from() { return from_; }
681
  int to() { return to_; }
682
  static Interval Empty() { return Interval(); }
683
  static const int kNone = -1;
684
 private:
685
  int from_;
686
  int to_;
687
};
688

    
689

    
690
class SeqRegExpNode: public RegExpNode {
691
 public:
692
  explicit SeqRegExpNode(RegExpNode* on_success)
693
      : on_success_(on_success) { }
694
  RegExpNode* on_success() { return on_success_; }
695
  void set_on_success(RegExpNode* node) { on_success_ = node; }
696
 private:
697
  RegExpNode* on_success_;
698
};
699

    
700

    
701
class ActionNode: public SeqRegExpNode {
702
 public:
703
  enum Type {
704
    SET_REGISTER,
705
    INCREMENT_REGISTER,
706
    STORE_POSITION,
707
    BEGIN_SUBMATCH,
708
    POSITIVE_SUBMATCH_SUCCESS,
709
    EMPTY_MATCH_CHECK,
710
    CLEAR_CAPTURES
711
  };
712
  static ActionNode* SetRegister(int reg, int val, RegExpNode* on_success);
713
  static ActionNode* IncrementRegister(int reg, RegExpNode* on_success);
714
  static ActionNode* StorePosition(int reg,
715
                                   bool is_capture,
716
                                   RegExpNode* on_success);
717
  static ActionNode* ClearCaptures(Interval range, RegExpNode* on_success);
718
  static ActionNode* BeginSubmatch(int stack_pointer_reg,
719
                                   int position_reg,
720
                                   RegExpNode* on_success);
721
  static ActionNode* PositiveSubmatchSuccess(int stack_pointer_reg,
722
                                             int restore_reg,
723
                                             int clear_capture_count,
724
                                             int clear_capture_from,
725
                                             RegExpNode* on_success);
726
  static ActionNode* EmptyMatchCheck(int start_register,
727
                                     int repetition_register,
728
                                     int repetition_limit,
729
                                     RegExpNode* on_success);
730
  virtual void Accept(NodeVisitor* visitor);
731
  virtual void Emit(RegExpCompiler* compiler, Trace* trace);
732
  virtual int EatsAtLeast(int still_to_find, int recursion_depth);
733
  virtual void GetQuickCheckDetails(QuickCheckDetails* details,
734
                                    RegExpCompiler* compiler,
735
                                    int filled_in,
736
                                    bool not_at_start) {
737
    return on_success()->GetQuickCheckDetails(
738
        details, compiler, filled_in, not_at_start);
739
  }
740
  Type type() { return type_; }
741
  // TODO(erikcorry): We should allow some action nodes in greedy loops.
742
  virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; }
743
  virtual ActionNode* Clone() { return new ActionNode(*this); }
744

    
745
 private:
746
  union {
747
    struct {
748
      int reg;
749
      int value;
750
    } u_store_register;
751
    struct {
752
      int reg;
753
    } u_increment_register;
754
    struct {
755
      int reg;
756
      bool is_capture;
757
    } u_position_register;
758
    struct {
759
      int stack_pointer_register;
760
      int current_position_register;
761
      int clear_register_count;
762
      int clear_register_from;
763
    } u_submatch;
764
    struct {
765
      int start_register;
766
      int repetition_register;
767
      int repetition_limit;
768
    } u_empty_match_check;
769
    struct {
770
      int range_from;
771
      int range_to;
772
    } u_clear_captures;
773
  } data_;
774
  ActionNode(Type type, RegExpNode* on_success)
775
      : SeqRegExpNode(on_success),
776
        type_(type) { }
777
  Type type_;
778
  friend class DotPrinter;
779
};
780

    
781

    
782
class TextNode: public SeqRegExpNode {
783
 public:
784
  TextNode(ZoneList<TextElement>* elms,
785
           RegExpNode* on_success)
786
      : SeqRegExpNode(on_success),
787
        elms_(elms) { }
788
  TextNode(RegExpCharacterClass* that,
789
           RegExpNode* on_success)
790
      : SeqRegExpNode(on_success),
791
        elms_(new ZoneList<TextElement>(1)) {
792
    elms_->Add(TextElement::CharClass(that));
793
  }
794
  virtual void Accept(NodeVisitor* visitor);
795
  virtual void Emit(RegExpCompiler* compiler, Trace* trace);
796
  virtual int EatsAtLeast(int still_to_find, int recursion_depth);
797
  virtual void GetQuickCheckDetails(QuickCheckDetails* details,
798
                                    RegExpCompiler* compiler,
799
                                    int characters_filled_in,
800
                                    bool not_at_start);
801
  ZoneList<TextElement>* elements() { return elms_; }
802
  void MakeCaseIndependent();
803
  virtual int GreedyLoopTextLength();
804
  virtual TextNode* Clone() {
805
    TextNode* result = new TextNode(*this);
806
    result->CalculateOffsets();
807
    return result;
808
  }
809
  void CalculateOffsets();
810

    
811
 private:
812
  enum TextEmitPassType {
813
    NON_ASCII_MATCH,             // Check for characters that can't match.
814
    SIMPLE_CHARACTER_MATCH,      // Case-dependent single character check.
815
    NON_LETTER_CHARACTER_MATCH,  // Check characters that have no case equivs.
816
    CASE_CHARACTER_MATCH,        // Case-independent single character check.
817
    CHARACTER_CLASS_MATCH        // Character class.
818
  };
819
  static bool SkipPass(int pass, bool ignore_case);
820
  static const int kFirstRealPass = SIMPLE_CHARACTER_MATCH;
821
  static const int kLastPass = CHARACTER_CLASS_MATCH;
822
  void TextEmitPass(RegExpCompiler* compiler,
823
                    TextEmitPassType pass,
824
                    bool preloaded,
825
                    Trace* trace,
826
                    bool first_element_checked,
827
                    int* checked_up_to);
828
  int Length();
829
  ZoneList<TextElement>* elms_;
830
};
831

    
832

    
833
class AssertionNode: public SeqRegExpNode {
834
 public:
835
  enum AssertionNodeType {
836
    AT_END,
837
    AT_START,
838
    AT_BOUNDARY,
839
    AT_NON_BOUNDARY,
840
    AFTER_NEWLINE
841
  };
842
  static AssertionNode* AtEnd(RegExpNode* on_success) {
843
    return new AssertionNode(AT_END, on_success);
844
  }
845
  static AssertionNode* AtStart(RegExpNode* on_success) {
846
    return new AssertionNode(AT_START, on_success);
847
  }
848
  static AssertionNode* AtBoundary(RegExpNode* on_success) {
849
    return new AssertionNode(AT_BOUNDARY, on_success);
850
  }
851
  static AssertionNode* AtNonBoundary(RegExpNode* on_success) {
852
    return new AssertionNode(AT_NON_BOUNDARY, on_success);
853
  }
854
  static AssertionNode* AfterNewline(RegExpNode* on_success) {
855
    return new AssertionNode(AFTER_NEWLINE, on_success);
856
  }
857
  virtual void Accept(NodeVisitor* visitor);
858
  virtual void Emit(RegExpCompiler* compiler, Trace* trace);
859
  virtual int EatsAtLeast(int still_to_find, int recursion_depth);
860
  virtual void GetQuickCheckDetails(QuickCheckDetails* details,
861
                                    RegExpCompiler* compiler,
862
                                    int filled_in,
863
                                    bool not_at_start);
864
  virtual AssertionNode* Clone() { return new AssertionNode(*this); }
865
  AssertionNodeType type() { return type_; }
866
 private:
867
  AssertionNode(AssertionNodeType t, RegExpNode* on_success)
868
      : SeqRegExpNode(on_success), type_(t) { }
869
  AssertionNodeType type_;
870
};
871

    
872

    
873
class BackReferenceNode: public SeqRegExpNode {
874
 public:
875
  BackReferenceNode(int start_reg,
876
                    int end_reg,
877
                    RegExpNode* on_success)
878
      : SeqRegExpNode(on_success),
879
        start_reg_(start_reg),
880
        end_reg_(end_reg) { }
881
  virtual void Accept(NodeVisitor* visitor);
882
  int start_register() { return start_reg_; }
883
  int end_register() { return end_reg_; }
884
  virtual void Emit(RegExpCompiler* compiler, Trace* trace);
885
  virtual int EatsAtLeast(int still_to_find, int recursion_depth);
886
  virtual void GetQuickCheckDetails(QuickCheckDetails* details,
887
                                    RegExpCompiler* compiler,
888
                                    int characters_filled_in,
889
                                    bool not_at_start) {
890
    return;
891
  }
892
  virtual BackReferenceNode* Clone() { return new BackReferenceNode(*this); }
893

    
894
 private:
895
  int start_reg_;
896
  int end_reg_;
897
};
898

    
899

    
900
class EndNode: public RegExpNode {
901
 public:
902
  enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS };
903
  explicit EndNode(Action action) : action_(action) { }
904
  virtual void Accept(NodeVisitor* visitor);
905
  virtual void Emit(RegExpCompiler* compiler, Trace* trace);
906
  virtual int EatsAtLeast(int still_to_find, int recursion_depth) { return 0; }
907
  virtual void GetQuickCheckDetails(QuickCheckDetails* details,
908
                                    RegExpCompiler* compiler,
909
                                    int characters_filled_in,
910
                                    bool not_at_start) {
911
    // Returning 0 from EatsAtLeast should ensure we never get here.
912
    UNREACHABLE();
913
  }
914
  virtual EndNode* Clone() { return new EndNode(*this); }
915

    
916
 private:
917
  Action action_;
918
};
919

    
920

    
921
class NegativeSubmatchSuccess: public EndNode {
922
 public:
923
  NegativeSubmatchSuccess(int stack_pointer_reg,
924
                          int position_reg,
925
                          int clear_capture_count,
926
                          int clear_capture_start)
927
      : EndNode(NEGATIVE_SUBMATCH_SUCCESS),
928
        stack_pointer_register_(stack_pointer_reg),
929
        current_position_register_(position_reg),
930
        clear_capture_count_(clear_capture_count),
931
        clear_capture_start_(clear_capture_start) { }
932
  virtual void Emit(RegExpCompiler* compiler, Trace* trace);
933

    
934
 private:
935
  int stack_pointer_register_;
936
  int current_position_register_;
937
  int clear_capture_count_;
938
  int clear_capture_start_;
939
};
940

    
941

    
942
class Guard: public ZoneObject {
943
 public:
944
  enum Relation { LT, GEQ };
945
  Guard(int reg, Relation op, int value)
946
      : reg_(reg),
947
        op_(op),
948
        value_(value) { }
949
  int reg() { return reg_; }
950
  Relation op() { return op_; }
951
  int value() { return value_; }
952

    
953
 private:
954
  int reg_;
955
  Relation op_;
956
  int value_;
957
};
958

    
959

    
960
class GuardedAlternative {
961
 public:
962
  explicit GuardedAlternative(RegExpNode* node) : node_(node), guards_(NULL) { }
963
  void AddGuard(Guard* guard);
964
  RegExpNode* node() { return node_; }
965
  void set_node(RegExpNode* node) { node_ = node; }
966
  ZoneList<Guard*>* guards() { return guards_; }
967

    
968
 private:
969
  RegExpNode* node_;
970
  ZoneList<Guard*>* guards_;
971
};
972

    
973

    
974
class AlternativeGeneration;
975

    
976

    
977
class ChoiceNode: public RegExpNode {
978
 public:
979
  explicit ChoiceNode(int expected_size)
980
      : alternatives_(new ZoneList<GuardedAlternative>(expected_size)),
981
        table_(NULL),
982
        not_at_start_(false),
983
        being_calculated_(false) { }
984
  virtual void Accept(NodeVisitor* visitor);
985
  void AddAlternative(GuardedAlternative node) { alternatives()->Add(node); }
986
  ZoneList<GuardedAlternative>* alternatives() { return alternatives_; }
987
  DispatchTable* GetTable(bool ignore_case);
988
  virtual void Emit(RegExpCompiler* compiler, Trace* trace);
989
  virtual int EatsAtLeast(int still_to_find, int recursion_depth);
990
  int EatsAtLeastHelper(int still_to_find,
991
                        int recursion_depth,
992
                        RegExpNode* ignore_this_node);
993
  virtual void GetQuickCheckDetails(QuickCheckDetails* details,
994
                                    RegExpCompiler* compiler,
995
                                    int characters_filled_in,
996
                                    bool not_at_start);
997
  virtual ChoiceNode* Clone() { return new ChoiceNode(*this); }
998

    
999
  bool being_calculated() { return being_calculated_; }
1000
  bool not_at_start() { return not_at_start_; }
1001
  void set_not_at_start() { not_at_start_ = true; }
1002
  void set_being_calculated(bool b) { being_calculated_ = b; }
1003
  virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; }
1004

    
1005
 protected:
1006
  int GreedyLoopTextLength(GuardedAlternative *alternative);
1007
  ZoneList<GuardedAlternative>* alternatives_;
1008

    
1009
 private:
1010
  friend class DispatchTableConstructor;
1011
  friend class Analysis;
1012
  void GenerateGuard(RegExpMacroAssembler* macro_assembler,
1013
                     Guard *guard,
1014
                     Trace* trace);
1015
  int CalculatePreloadCharacters(RegExpCompiler* compiler);
1016
  void EmitOutOfLineContinuation(RegExpCompiler* compiler,
1017
                                 Trace* trace,
1018
                                 GuardedAlternative alternative,
1019
                                 AlternativeGeneration* alt_gen,
1020
                                 int preload_characters,
1021
                                 bool next_expects_preload);
1022
  DispatchTable* table_;
1023
  // If true, this node is never checked at the start of the input.
1024
  // Allows a new trace to start with at_start() set to false.
1025
  bool not_at_start_;
1026
  bool being_calculated_;
1027
};
1028

    
1029

    
1030
class NegativeLookaheadChoiceNode: public ChoiceNode {
1031
 public:
1032
  explicit NegativeLookaheadChoiceNode(GuardedAlternative this_must_fail,
1033
                                       GuardedAlternative then_do_this)
1034
      : ChoiceNode(2) {
1035
    AddAlternative(this_must_fail);
1036
    AddAlternative(then_do_this);
1037
  }
1038
  virtual int EatsAtLeast(int still_to_find, int recursion_depth);
1039
  virtual void GetQuickCheckDetails(QuickCheckDetails* details,
1040
                                    RegExpCompiler* compiler,
1041
                                    int characters_filled_in,
1042
                                    bool not_at_start);
1043
  // For a negative lookahead we don't emit the quick check for the
1044
  // alternative that is expected to fail.  This is because quick check code
1045
  // starts by loading enough characters for the alternative that takes fewest
1046
  // characters, but on a negative lookahead the negative branch did not take
1047
  // part in that calculation (EatsAtLeast) so the assumptions don't hold.
1048
  virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; }
1049
};
1050

    
1051

    
1052
class LoopChoiceNode: public ChoiceNode {
1053
 public:
1054
  explicit LoopChoiceNode(bool body_can_be_zero_length)
1055
      : ChoiceNode(2),
1056
        loop_node_(NULL),
1057
        continue_node_(NULL),
1058
        body_can_be_zero_length_(body_can_be_zero_length) { }
1059
  void AddLoopAlternative(GuardedAlternative alt);
1060
  void AddContinueAlternative(GuardedAlternative alt);
1061
  virtual void Emit(RegExpCompiler* compiler, Trace* trace);
1062
  virtual int EatsAtLeast(int still_to_find, int recursion_depth);
1063
  virtual void GetQuickCheckDetails(QuickCheckDetails* details,
1064
                                    RegExpCompiler* compiler,
1065
                                    int characters_filled_in,
1066
                                    bool not_at_start);
1067
  virtual LoopChoiceNode* Clone() { return new LoopChoiceNode(*this); }
1068
  RegExpNode* loop_node() { return loop_node_; }
1069
  RegExpNode* continue_node() { return continue_node_; }
1070
  bool body_can_be_zero_length() { return body_can_be_zero_length_; }
1071
  virtual void Accept(NodeVisitor* visitor);
1072

    
1073
 private:
1074
  // AddAlternative is made private for loop nodes because alternatives
1075
  // should not be added freely, we need to keep track of which node
1076
  // goes back to the node itself.
1077
  void AddAlternative(GuardedAlternative node) {
1078
    ChoiceNode::AddAlternative(node);
1079
  }
1080

    
1081
  RegExpNode* loop_node_;
1082
  RegExpNode* continue_node_;
1083
  bool body_can_be_zero_length_;
1084
};
1085

    
1086

    
1087
// There are many ways to generate code for a node.  This class encapsulates
1088
// the current way we should be generating.  In other words it encapsulates
1089
// the current state of the code generator.  The effect of this is that we
1090
// generate code for paths that the matcher can take through the regular
1091
// expression.  A given node in the regexp can be code-generated several times
1092
// as it can be part of several traces.  For example for the regexp:
1093
// /foo(bar|ip)baz/ the code to match baz will be generated twice, once as part
1094
// of the foo-bar-baz trace and once as part of the foo-ip-baz trace.  The code
1095
// to match foo is generated only once (the traces have a common prefix).  The
1096
// code to store the capture is deferred and generated (twice) after the places
1097
// where baz has been matched.
1098
class Trace {
1099
 public:
1100
  // A value for a property that is either known to be true, know to be false,
1101
  // or not known.
1102
  enum TriBool {
1103
    UNKNOWN = -1, FALSE = 0, TRUE = 1
1104
  };
1105

    
1106
  class DeferredAction {
1107
   public:
1108
    DeferredAction(ActionNode::Type type, int reg)
1109
        : type_(type), reg_(reg), next_(NULL) { }
1110
    DeferredAction* next() { return next_; }
1111
    bool Mentions(int reg);
1112
    int reg() { return reg_; }
1113
    ActionNode::Type type() { return type_; }
1114
   private:
1115
    ActionNode::Type type_;
1116
    int reg_;
1117
    DeferredAction* next_;
1118
    friend class Trace;
1119
  };
1120

    
1121
  class DeferredCapture : public DeferredAction {
1122
   public:
1123
    DeferredCapture(int reg, bool is_capture, Trace* trace)
1124
        : DeferredAction(ActionNode::STORE_POSITION, reg),
1125
          cp_offset_(trace->cp_offset()),
1126
          is_capture_(is_capture) { }
1127
    int cp_offset() { return cp_offset_; }
1128
    bool is_capture() { return is_capture_; }
1129
   private:
1130
    int cp_offset_;
1131
    bool is_capture_;
1132
    void set_cp_offset(int cp_offset) { cp_offset_ = cp_offset; }
1133
  };
1134

    
1135
  class DeferredSetRegister : public DeferredAction {
1136
   public:
1137
    DeferredSetRegister(int reg, int value)
1138
        : DeferredAction(ActionNode::SET_REGISTER, reg),
1139
          value_(value) { }
1140
    int value() { return value_; }
1141
   private:
1142
    int value_;
1143
  };
1144

    
1145
  class DeferredClearCaptures : public DeferredAction {
1146
   public:
1147
    explicit DeferredClearCaptures(Interval range)
1148
        : DeferredAction(ActionNode::CLEAR_CAPTURES, -1),
1149
          range_(range) { }
1150
    Interval range() { return range_; }
1151
   private:
1152
    Interval range_;
1153
  };
1154

    
1155
  class DeferredIncrementRegister : public DeferredAction {
1156
   public:
1157
    explicit DeferredIncrementRegister(int reg)
1158
        : DeferredAction(ActionNode::INCREMENT_REGISTER, reg) { }
1159
  };
1160

    
1161
  Trace()
1162
      : cp_offset_(0),
1163
        actions_(NULL),
1164
        backtrack_(NULL),
1165
        stop_node_(NULL),
1166
        loop_label_(NULL),
1167
        characters_preloaded_(0),
1168
        bound_checked_up_to_(0),
1169
        flush_budget_(100),
1170
        at_start_(UNKNOWN) { }
1171

    
1172
  // End the trace.  This involves flushing the deferred actions in the trace
1173
  // and pushing a backtrack location onto the backtrack stack.  Once this is
1174
  // done we can start a new trace or go to one that has already been
1175
  // generated.
1176
  void Flush(RegExpCompiler* compiler, RegExpNode* successor);
1177
  int cp_offset() { return cp_offset_; }
1178
  DeferredAction* actions() { return actions_; }
1179
  // A trivial trace is one that has no deferred actions or other state that
1180
  // affects the assumptions used when generating code.  There is no recorded
1181
  // backtrack location in a trivial trace, so with a trivial trace we will
1182
  // generate code that, on a failure to match, gets the backtrack location
1183
  // from the backtrack stack rather than using a direct jump instruction.  We
1184
  // always start code generation with a trivial trace and non-trivial traces
1185
  // are created as we emit code for nodes or add to the list of deferred
1186
  // actions in the trace.  The location of the code generated for a node using
1187
  // a trivial trace is recorded in a label in the node so that gotos can be
1188
  // generated to that code.
1189
  bool is_trivial() {
1190
    return backtrack_ == NULL &&
1191
           actions_ == NULL &&
1192
           cp_offset_ == 0 &&
1193
           characters_preloaded_ == 0 &&
1194
           bound_checked_up_to_ == 0 &&
1195
           quick_check_performed_.characters() == 0 &&
1196
           at_start_ == UNKNOWN;
1197
  }
1198
  TriBool at_start() { return at_start_; }
1199
  void set_at_start(bool at_start) { at_start_ = at_start ? TRUE : FALSE; }
1200
  Label* backtrack() { return backtrack_; }
1201
  Label* loop_label() { return loop_label_; }
1202
  RegExpNode* stop_node() { return stop_node_; }
1203
  int characters_preloaded() { return characters_preloaded_; }
1204
  int bound_checked_up_to() { return bound_checked_up_to_; }
1205
  int flush_budget() { return flush_budget_; }
1206
  QuickCheckDetails* quick_check_performed() { return &quick_check_performed_; }
1207
  bool mentions_reg(int reg);
1208
  // Returns true if a deferred position store exists to the specified
1209
  // register and stores the offset in the out-parameter.  Otherwise
1210
  // returns false.
1211
  bool GetStoredPosition(int reg, int* cp_offset);
1212
  // These set methods and AdvanceCurrentPositionInTrace should be used only on
1213
  // new traces - the intention is that traces are immutable after creation.
1214
  void add_action(DeferredAction* new_action) {
1215
    ASSERT(new_action->next_ == NULL);
1216
    new_action->next_ = actions_;
1217
    actions_ = new_action;
1218
  }
1219
  void set_backtrack(Label* backtrack) { backtrack_ = backtrack; }
1220
  void set_stop_node(RegExpNode* node) { stop_node_ = node; }
1221
  void set_loop_label(Label* label) { loop_label_ = label; }
1222
  void set_characters_preloaded(int cpre) { characters_preloaded_ = cpre; }
1223
  void set_bound_checked_up_to(int to) { bound_checked_up_to_ = to; }
1224
  void set_flush_budget(int to) { flush_budget_ = to; }
1225
  void set_quick_check_performed(QuickCheckDetails* d) {
1226
    quick_check_performed_ = *d;
1227
  }
1228
  void InvalidateCurrentCharacter();
1229
  void AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler);
1230
 private:
1231
  int FindAffectedRegisters(OutSet* affected_registers);
1232
  void PerformDeferredActions(RegExpMacroAssembler* macro,
1233
                               int max_register,
1234
                               OutSet& affected_registers,
1235
                               OutSet* registers_to_pop,
1236
                               OutSet* registers_to_clear);
1237
  void RestoreAffectedRegisters(RegExpMacroAssembler* macro,
1238
                                int max_register,
1239
                                OutSet& registers_to_pop,
1240
                                OutSet& registers_to_clear);
1241
  int cp_offset_;
1242
  DeferredAction* actions_;
1243
  Label* backtrack_;
1244
  RegExpNode* stop_node_;
1245
  Label* loop_label_;
1246
  int characters_preloaded_;
1247
  int bound_checked_up_to_;
1248
  QuickCheckDetails quick_check_performed_;
1249
  int flush_budget_;
1250
  TriBool at_start_;
1251
};
1252

    
1253

    
1254
class NodeVisitor {
1255
 public:
1256
  virtual ~NodeVisitor() { }
1257
#define DECLARE_VISIT(Type)                                          \
1258
  virtual void Visit##Type(Type##Node* that) = 0;
1259
FOR_EACH_NODE_TYPE(DECLARE_VISIT)
1260
#undef DECLARE_VISIT
1261
  virtual void VisitLoopChoice(LoopChoiceNode* that) { VisitChoice(that); }
1262
};
1263

    
1264

    
1265
// Node visitor used to add the start set of the alternatives to the
1266
// dispatch table of a choice node.
1267
class DispatchTableConstructor: public NodeVisitor {
1268
 public:
1269
  DispatchTableConstructor(DispatchTable* table, bool ignore_case)
1270
      : table_(table),
1271
        choice_index_(-1),
1272
        ignore_case_(ignore_case) { }
1273

    
1274
  void BuildTable(ChoiceNode* node);
1275

    
1276
  void AddRange(CharacterRange range) {
1277
    table()->AddRange(range, choice_index_);
1278
  }
1279

    
1280
  void AddInverse(ZoneList<CharacterRange>* ranges);
1281

    
1282
#define DECLARE_VISIT(Type)                                          \
1283
  virtual void Visit##Type(Type##Node* that);
1284
FOR_EACH_NODE_TYPE(DECLARE_VISIT)
1285
#undef DECLARE_VISIT
1286

    
1287
  DispatchTable* table() { return table_; }
1288
  void set_choice_index(int value) { choice_index_ = value; }
1289

    
1290
 protected:
1291
  DispatchTable *table_;
1292
  int choice_index_;
1293
  bool ignore_case_;
1294
};
1295

    
1296

    
1297
// Assertion propagation moves information about assertions such as
1298
// \b to the affected nodes.  For instance, in /.\b./ information must
1299
// be propagated to the first '.' that whatever follows needs to know
1300
// if it matched a word or a non-word, and to the second '.' that it
1301
// has to check if it succeeds a word or non-word.  In this case the
1302
// result will be something like:
1303
//
1304
//   +-------+        +------------+
1305
//   |   .   |        |      .     |
1306
//   +-------+  --->  +------------+
1307
//   | word? |        | check word |
1308
//   +-------+        +------------+
1309
class Analysis: public NodeVisitor {
1310
 public:
1311
  explicit Analysis(bool ignore_case)
1312
      : ignore_case_(ignore_case) { }
1313
  void EnsureAnalyzed(RegExpNode* node);
1314

    
1315
#define DECLARE_VISIT(Type)                                          \
1316
  virtual void Visit##Type(Type##Node* that);
1317
FOR_EACH_NODE_TYPE(DECLARE_VISIT)
1318
#undef DECLARE_VISIT
1319
  virtual void VisitLoopChoice(LoopChoiceNode* that);
1320

    
1321
 private:
1322
  bool ignore_case_;
1323

    
1324
  DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis);
1325
};
1326

    
1327

    
1328
struct RegExpCompileData {
1329
  RegExpCompileData()
1330
    : tree(NULL),
1331
      node(NULL),
1332
      simple(true),
1333
      contains_anchor(false),
1334
      capture_count(0) { }
1335
  RegExpTree* tree;
1336
  RegExpNode* node;
1337
  bool simple;
1338
  bool contains_anchor;
1339
  Handle<String> error;
1340
  int capture_count;
1341
};
1342

    
1343

    
1344
class RegExpEngine: public AllStatic {
1345
 public:
1346
  struct CompilationResult {
1347
    explicit CompilationResult(const char* error_message)
1348
        : error_message(error_message),
1349
          code(Heap::the_hole_value()),
1350
          num_registers(0) {}
1351
    CompilationResult(Object* code, int registers)
1352
      : error_message(NULL),
1353
        code(code),
1354
        num_registers(registers) {}
1355
    const char* error_message;
1356
    Object* code;
1357
    int num_registers;
1358
  };
1359

    
1360
  static CompilationResult Compile(RegExpCompileData* input,
1361
                                   bool ignore_case,
1362
                                   bool multiline,
1363
                                   Handle<String> pattern,
1364
                                   bool is_ascii);
1365

    
1366
  static void DotPrint(const char* label, RegExpNode* node, bool ignore_case);
1367
};
1368

    
1369

    
1370
} }  // namespace v8::internal
1371

    
1372
#endif  // V8_JSREGEXP_H_