The data contained in this repository can be downloaded to your computer using one of several clients.
Please see the documentation of your version control software client for more information.

Please select the desired protocol below to get the URL.

This URL has Read-Only access.

Statistics
| Branch: | Revision:

main_repo / deps / v8 / test / cctest / test-regexp.cc @ 40c0f755

History | View | Annotate | Download (49 KB)

1
// Copyright 2008 the V8 project authors. All rights reserved.
2
// Redistribution and use in source and binary forms, with or without
3
// modification, are permitted provided that the following conditions are
4
// met:
5
//
6
//     * Redistributions of source code must retain the above copyright
7
//       notice, this list of conditions and the following disclaimer.
8
//     * Redistributions in binary form must reproduce the above
9
//       copyright notice, this list of conditions and the following
10
//       disclaimer in the documentation and/or other materials provided
11
//       with the distribution.
12
//     * Neither the name of Google Inc. nor the names of its
13
//       contributors may be used to endorse or promote products derived
14
//       from this software without specific prior written permission.
15
//
16
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27

    
28

    
29
#include <stdlib.h>
30
#include <set>
31

    
32
#include "v8.h"
33

    
34
#include "string-stream.h"
35
#include "cctest.h"
36
#include "zone-inl.h"
37
#include "parser.h"
38
#include "ast.h"
39
#include "jsregexp-inl.h"
40
#include "regexp-macro-assembler.h"
41
#include "regexp-macro-assembler-irregexp.h"
42
#ifdef ARM
43
#include "regexp-macro-assembler-arm.h"
44
#else  // IA32
45
#include "macro-assembler-ia32.h"
46
#include "regexp-macro-assembler-ia32.h"
47
#endif
48
#include "interpreter-irregexp.h"
49

    
50

    
51
using namespace v8::internal;
52

    
53

    
54
static SmartPointer<const char> Parse(const char* input) {
55
  V8::Initialize(NULL);
56
  v8::HandleScope scope;
57
  ZoneScope zone_scope(DELETE_ON_EXIT);
58
  FlatStringReader reader(CStrVector(input));
59
  RegExpCompileData result;
60
  CHECK(v8::internal::ParseRegExp(&reader, false, &result));
61
  CHECK(result.tree != NULL);
62
  CHECK(result.error.is_null());
63
  SmartPointer<const char> output = result.tree->ToString();
64
  return output;
65
}
66

    
67
static bool CheckSimple(const char* input) {
68
  V8::Initialize(NULL);
69
  v8::HandleScope scope;
70
  unibrow::Utf8InputBuffer<> buffer(input, strlen(input));
71
  ZoneScope zone_scope(DELETE_ON_EXIT);
72
  FlatStringReader reader(CStrVector(input));
73
  RegExpCompileData result;
74
  CHECK(v8::internal::ParseRegExp(&reader, false, &result));
75
  CHECK(result.tree != NULL);
76
  CHECK(result.error.is_null());
77
  return result.simple;
78
}
79

    
80
struct MinMaxPair {
81
  int min_match;
82
  int max_match;
83
};
84

    
85
static MinMaxPair CheckMinMaxMatch(const char* input) {
86
  V8::Initialize(NULL);
87
  v8::HandleScope scope;
88
  unibrow::Utf8InputBuffer<> buffer(input, strlen(input));
89
  ZoneScope zone_scope(DELETE_ON_EXIT);
90
  FlatStringReader reader(CStrVector(input));
91
  RegExpCompileData result;
92
  CHECK(v8::internal::ParseRegExp(&reader, false, &result));
93
  CHECK(result.tree != NULL);
94
  CHECK(result.error.is_null());
95
  int min_match = result.tree->min_match();
96
  int max_match = result.tree->max_match();
97
  MinMaxPair pair = { min_match, max_match };
98
  return pair;
99
}
100

    
101

    
102

    
103
#define CHECK_PARSE_EQ(input, expected) CHECK_EQ(expected, *Parse(input))
104
#define CHECK_SIMPLE(input, simple) CHECK_EQ(simple, CheckSimple(input));
105
#define CHECK_MIN_MAX(input, min, max)                                         \
106
  { MinMaxPair min_max = CheckMinMaxMatch(input);                              \
107
    CHECK_EQ(min, min_max.min_match);                                          \
108
    CHECK_EQ(max, min_max.max_match);                                          \
109
  }
110

    
111
TEST(Parser) {
112
  V8::Initialize(NULL);
113
  CHECK_PARSE_EQ("abc", "'abc'");
114
  CHECK_PARSE_EQ("", "%");
115
  CHECK_PARSE_EQ("abc|def", "(| 'abc' 'def')");
116
  CHECK_PARSE_EQ("abc|def|ghi", "(| 'abc' 'def' 'ghi')");
117
  CHECK_PARSE_EQ("^xxx$", "(: @^i 'xxx' @$i)");
118
  CHECK_PARSE_EQ("ab\\b\\d\\bcd", "(: 'ab' @b [0-9] @b 'cd')");
119
  CHECK_PARSE_EQ("\\w|\\d", "(| [0-9 A-Z _ a-z] [0-9])");
120
  CHECK_PARSE_EQ("a*", "(# 0 - g 'a')");
121
  CHECK_PARSE_EQ("a*?", "(# 0 - n 'a')");
122
  CHECK_PARSE_EQ("abc+", "(: 'ab' (# 1 - g 'c'))");
123
  CHECK_PARSE_EQ("abc+?", "(: 'ab' (# 1 - n 'c'))");
124
  CHECK_PARSE_EQ("xyz?", "(: 'xy' (# 0 1 g 'z'))");
125
  CHECK_PARSE_EQ("xyz??", "(: 'xy' (# 0 1 n 'z'))");
126
  CHECK_PARSE_EQ("xyz{0,1}", "(: 'xy' (# 0 1 g 'z'))");
127
  CHECK_PARSE_EQ("xyz{0,1}?", "(: 'xy' (# 0 1 n 'z'))");
128
  CHECK_PARSE_EQ("xyz{93}", "(: 'xy' (# 93 93 g 'z'))");
129
  CHECK_PARSE_EQ("xyz{93}?", "(: 'xy' (# 93 93 n 'z'))");
130
  CHECK_PARSE_EQ("xyz{1,32}", "(: 'xy' (# 1 32 g 'z'))");
131
  CHECK_PARSE_EQ("xyz{1,32}?", "(: 'xy' (# 1 32 n 'z'))");
132
  CHECK_PARSE_EQ("xyz{1,}", "(: 'xy' (# 1 - g 'z'))");
133
  CHECK_PARSE_EQ("xyz{1,}?", "(: 'xy' (# 1 - n 'z'))");
134
  CHECK_PARSE_EQ("a\\fb\\nc\\rd\\te\\vf", "'a\\x0cb\\x0ac\\x0dd\\x09e\\x0bf'");
135
  CHECK_PARSE_EQ("a\\nb\\bc", "(: 'a\\x0ab' @b 'c')");
136
  CHECK_PARSE_EQ("(?:foo)", "'foo'");
137
  CHECK_PARSE_EQ("(?: foo )", "' foo '");
138
  CHECK_PARSE_EQ("(foo|bar|baz)", "(^ (| 'foo' 'bar' 'baz'))");
139
  CHECK_PARSE_EQ("foo|(bar|baz)|quux", "(| 'foo' (^ (| 'bar' 'baz')) 'quux')");
140
  CHECK_PARSE_EQ("foo(?=bar)baz", "(: 'foo' (-> + 'bar') 'baz')");
141
  CHECK_PARSE_EQ("foo(?!bar)baz", "(: 'foo' (-> - 'bar') 'baz')");
142
  CHECK_PARSE_EQ("()", "(^ %)");
143
  CHECK_PARSE_EQ("(?=)", "(-> + %)");
144
  CHECK_PARSE_EQ("[]", "^[\\x00-\\uffff]");   // Doesn't compile on windows
145
  CHECK_PARSE_EQ("[^]", "[\\x00-\\uffff]");   // \uffff isn't in codepage 1252
146
  CHECK_PARSE_EQ("[x]", "[x]");
147
  CHECK_PARSE_EQ("[xyz]", "[x y z]");
148
  CHECK_PARSE_EQ("[a-zA-Z0-9]", "[a-z A-Z 0-9]");
149
  CHECK_PARSE_EQ("[-123]", "[- 1 2 3]");
150
  CHECK_PARSE_EQ("[^123]", "^[1 2 3]");
151
  CHECK_PARSE_EQ("]", "']'");
152
  CHECK_PARSE_EQ("}", "'}'");
153
  CHECK_PARSE_EQ("[a-b-c]", "[a-b - c]");
154
  CHECK_PARSE_EQ("[\\d]", "[0-9]");
155
  CHECK_PARSE_EQ("[x\\dz]", "[x 0-9 z]");
156
  CHECK_PARSE_EQ("[\\d-z]", "[0-9 - z]");
157
  CHECK_PARSE_EQ("[\\d-\\d]", "[0-9 - 0-9]");
158
  CHECK_PARSE_EQ("[z-\\d]", "[z - 0-9]");
159
  CHECK_PARSE_EQ("\\cj\\cJ\\ci\\cI\\ck\\cK",
160
                 "'\\x0a\\x0a\\x09\\x09\\x0b\\x0b'");
161
  CHECK_PARSE_EQ("\\c!", "'c!'");
162
  CHECK_PARSE_EQ("\\c_", "'c_'");
163
  CHECK_PARSE_EQ("\\c~", "'c~'");
164
  CHECK_PARSE_EQ("[a\\]c]", "[a ] c]");
165
  CHECK_PARSE_EQ("\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ", "'[]{}()%^# '");
166
  CHECK_PARSE_EQ("[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]", "[[ ] { } ( ) % ^ #  ]");
167
  CHECK_PARSE_EQ("\\0", "'\\x00'");
168
  CHECK_PARSE_EQ("\\8", "'8'");
169
  CHECK_PARSE_EQ("\\9", "'9'");
170
  CHECK_PARSE_EQ("\\11", "'\\x09'");
171
  CHECK_PARSE_EQ("\\11a", "'\\x09a'");
172
  CHECK_PARSE_EQ("\\011", "'\\x09'");
173
  CHECK_PARSE_EQ("\\00011", "'\\x0011'");
174
  CHECK_PARSE_EQ("\\118", "'\\x098'");
175
  CHECK_PARSE_EQ("\\111", "'I'");
176
  CHECK_PARSE_EQ("\\1111", "'I1'");
177
  CHECK_PARSE_EQ("(x)(x)(x)\\1", "(: (^ 'x') (^ 'x') (^ 'x') (<- 1))");
178
  CHECK_PARSE_EQ("(x)(x)(x)\\2", "(: (^ 'x') (^ 'x') (^ 'x') (<- 2))");
179
  CHECK_PARSE_EQ("(x)(x)(x)\\3", "(: (^ 'x') (^ 'x') (^ 'x') (<- 3))");
180
  CHECK_PARSE_EQ("(x)(x)(x)\\4", "(: (^ 'x') (^ 'x') (^ 'x') '\\x04')");
181
  CHECK_PARSE_EQ("(x)(x)(x)\\1*", "(: (^ 'x') (^ 'x') (^ 'x')"
182
                               " (# 0 - g (<- 1)))");
183
  CHECK_PARSE_EQ("(x)(x)(x)\\2*", "(: (^ 'x') (^ 'x') (^ 'x')"
184
                               " (# 0 - g (<- 2)))");
185
  CHECK_PARSE_EQ("(x)(x)(x)\\3*", "(: (^ 'x') (^ 'x') (^ 'x')"
186
                               " (# 0 - g (<- 3)))");
187
  CHECK_PARSE_EQ("(x)(x)(x)\\4*", "(: (^ 'x') (^ 'x') (^ 'x')"
188
                               " (# 0 - g '\\x04'))");
189
  CHECK_PARSE_EQ("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\10",
190
              "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
191
              " (^ 'x') (^ 'x') (^ 'x') (^ 'x') (<- 10))");
192
  CHECK_PARSE_EQ("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\11",
193
              "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
194
              " (^ 'x') (^ 'x') (^ 'x') (^ 'x') '\\x09')");
195
  CHECK_PARSE_EQ("(a)\\1", "(: (^ 'a') (<- 1))");
196
  CHECK_PARSE_EQ("(a\\1)", "(^ 'a')");
197
  CHECK_PARSE_EQ("(\\1a)", "(^ 'a')");
198
  CHECK_PARSE_EQ("(?=a)?a", "'a'");
199
  CHECK_PARSE_EQ("(?=a){0,10}a", "'a'");
200
  CHECK_PARSE_EQ("(?=a){1,10}a", "(: (-> + 'a') 'a')");
201
  CHECK_PARSE_EQ("(?=a){9,10}a", "(: (-> + 'a') 'a')");
202
  CHECK_PARSE_EQ("(?!a)?a", "'a'");
203
  CHECK_PARSE_EQ("\\1(a)", "(^ 'a')");
204
  CHECK_PARSE_EQ("(?!(a))\\1", "(-> - (^ 'a'))");
205
  CHECK_PARSE_EQ("(?!\\1(a\\1)\\1)\\1", "(-> - (: (^ 'a') (<- 1)))");
206
  CHECK_PARSE_EQ("[\\0]", "[\\x00]");
207
  CHECK_PARSE_EQ("[\\11]", "[\\x09]");
208
  CHECK_PARSE_EQ("[\\11a]", "[\\x09 a]");
209
  CHECK_PARSE_EQ("[\\011]", "[\\x09]");
210
  CHECK_PARSE_EQ("[\\00011]", "[\\x00 1 1]");
211
  CHECK_PARSE_EQ("[\\118]", "[\\x09 8]");
212
  CHECK_PARSE_EQ("[\\111]", "[I]");
213
  CHECK_PARSE_EQ("[\\1111]", "[I 1]");
214
  CHECK_PARSE_EQ("\\x34", "'\x34'");
215
  CHECK_PARSE_EQ("\\x60", "'\x60'");
216
  CHECK_PARSE_EQ("\\x3z", "'x3z'");
217
  CHECK_PARSE_EQ("\\c", "'c'");
218
  CHECK_PARSE_EQ("\\u0034", "'\x34'");
219
  CHECK_PARSE_EQ("\\u003z", "'u003z'");
220
  CHECK_PARSE_EQ("foo[z]*", "(: 'foo' (# 0 - g [z]))");
221

    
222
  CHECK_SIMPLE("a", true);
223
  CHECK_SIMPLE("a|b", false);
224
  CHECK_SIMPLE("a\\n", false);
225
  CHECK_SIMPLE("^a", false);
226
  CHECK_SIMPLE("a$", false);
227
  CHECK_SIMPLE("a\\b!", false);
228
  CHECK_SIMPLE("a\\Bb", false);
229
  CHECK_SIMPLE("a*", false);
230
  CHECK_SIMPLE("a*?", false);
231
  CHECK_SIMPLE("a?", false);
232
  CHECK_SIMPLE("a??", false);
233
  CHECK_SIMPLE("a{0,1}?", false);
234
  CHECK_SIMPLE("a{1,1}?", false);
235
  CHECK_SIMPLE("a{1,2}?", false);
236
  CHECK_SIMPLE("a+?", false);
237
  CHECK_SIMPLE("(a)", false);
238
  CHECK_SIMPLE("(a)\\1", false);
239
  CHECK_SIMPLE("(\\1a)", false);
240
  CHECK_SIMPLE("\\1(a)", false);
241
  CHECK_SIMPLE("a\\s", false);
242
  CHECK_SIMPLE("a\\S", false);
243
  CHECK_SIMPLE("a\\d", false);
244
  CHECK_SIMPLE("a\\D", false);
245
  CHECK_SIMPLE("a\\w", false);
246
  CHECK_SIMPLE("a\\W", false);
247
  CHECK_SIMPLE("a.", false);
248
  CHECK_SIMPLE("a\\q", false);
249
  CHECK_SIMPLE("a[a]", false);
250
  CHECK_SIMPLE("a[^a]", false);
251
  CHECK_SIMPLE("a[a-z]", false);
252
  CHECK_SIMPLE("a[\\q]", false);
253
  CHECK_SIMPLE("a(?:b)", false);
254
  CHECK_SIMPLE("a(?=b)", false);
255
  CHECK_SIMPLE("a(?!b)", false);
256
  CHECK_SIMPLE("\\x60", false);
257
  CHECK_SIMPLE("\\u0060", false);
258
  CHECK_SIMPLE("\\cA", false);
259
  CHECK_SIMPLE("\\q", false);
260
  CHECK_SIMPLE("\\1112", false);
261
  CHECK_SIMPLE("\\0", false);
262
  CHECK_SIMPLE("(a)\\1", false);
263
  CHECK_SIMPLE("(?=a)?a", false);
264
  CHECK_SIMPLE("(?!a)?a\\1", false);
265
  CHECK_SIMPLE("(?:(?=a))a\\1", false);
266

    
267
  CHECK_PARSE_EQ("a{}", "'a{}'");
268
  CHECK_PARSE_EQ("a{,}", "'a{,}'");
269
  CHECK_PARSE_EQ("a{", "'a{'");
270
  CHECK_PARSE_EQ("a{z}", "'a{z}'");
271
  CHECK_PARSE_EQ("a{1z}", "'a{1z}'");
272
  CHECK_PARSE_EQ("a{12z}", "'a{12z}'");
273
  CHECK_PARSE_EQ("a{12,", "'a{12,'");
274
  CHECK_PARSE_EQ("a{12,3b", "'a{12,3b'");
275
  CHECK_PARSE_EQ("{}", "'{}'");
276
  CHECK_PARSE_EQ("{,}", "'{,}'");
277
  CHECK_PARSE_EQ("{", "'{'");
278
  CHECK_PARSE_EQ("{z}", "'{z}'");
279
  CHECK_PARSE_EQ("{1z}", "'{1z}'");
280
  CHECK_PARSE_EQ("{12z}", "'{12z}'");
281
  CHECK_PARSE_EQ("{12,", "'{12,'");
282
  CHECK_PARSE_EQ("{12,3b", "'{12,3b'");
283

    
284
  CHECK_MIN_MAX("a", 1, 1);
285
  CHECK_MIN_MAX("abc", 3, 3);
286
  CHECK_MIN_MAX("a[bc]d", 3, 3);
287
  CHECK_MIN_MAX("a|bc", 1, 2);
288
  CHECK_MIN_MAX("ab|c", 1, 2);
289
  CHECK_MIN_MAX("a||bc", 0, 2);
290
  CHECK_MIN_MAX("|", 0, 0);
291
  CHECK_MIN_MAX("(?:ab)", 2, 2);
292
  CHECK_MIN_MAX("(?:ab|cde)", 2, 3);
293
  CHECK_MIN_MAX("(?:ab)|cde", 2, 3);
294
  CHECK_MIN_MAX("(ab)", 2, 2);
295
  CHECK_MIN_MAX("(ab|cde)", 2, 3);
296
  CHECK_MIN_MAX("(ab)\\1", 2, 4);
297
  CHECK_MIN_MAX("(ab|cde)\\1", 2, 6);
298
  CHECK_MIN_MAX("(?:ab)?", 0, 2);
299
  CHECK_MIN_MAX("(?:ab)*", 0, RegExpTree::kInfinity);
300
  CHECK_MIN_MAX("(?:ab)+", 2, RegExpTree::kInfinity);
301
  CHECK_MIN_MAX("a?", 0, 1);
302
  CHECK_MIN_MAX("a*", 0, RegExpTree::kInfinity);
303
  CHECK_MIN_MAX("a+", 1, RegExpTree::kInfinity);
304
  CHECK_MIN_MAX("a??", 0, 1);
305
  CHECK_MIN_MAX("a*?", 0, RegExpTree::kInfinity);
306
  CHECK_MIN_MAX("a+?", 1, RegExpTree::kInfinity);
307
  CHECK_MIN_MAX("(?:a?)?", 0, 1);
308
  CHECK_MIN_MAX("(?:a*)?", 0, RegExpTree::kInfinity);
309
  CHECK_MIN_MAX("(?:a+)?", 0, RegExpTree::kInfinity);
310
  CHECK_MIN_MAX("(?:a?)+", 0, RegExpTree::kInfinity);
311
  CHECK_MIN_MAX("(?:a*)+", 0, RegExpTree::kInfinity);
312
  CHECK_MIN_MAX("(?:a+)+", 1, RegExpTree::kInfinity);
313
  CHECK_MIN_MAX("(?:a?)*", 0, RegExpTree::kInfinity);
314
  CHECK_MIN_MAX("(?:a*)*", 0, RegExpTree::kInfinity);
315
  CHECK_MIN_MAX("(?:a+)*", 0, RegExpTree::kInfinity);
316
  CHECK_MIN_MAX("a{0}", 0, 0);
317
  CHECK_MIN_MAX("(?:a+){0}", 0, 0);
318
  CHECK_MIN_MAX("(?:a+){0,0}", 0, 0);
319
  CHECK_MIN_MAX("a*b", 1, RegExpTree::kInfinity);
320
  CHECK_MIN_MAX("a+b", 2, RegExpTree::kInfinity);
321
  CHECK_MIN_MAX("a*b|c", 1, RegExpTree::kInfinity);
322
  CHECK_MIN_MAX("a+b|c", 1, RegExpTree::kInfinity);
323
  CHECK_MIN_MAX("(?:a{5,1000000}){3,1000000}", 15, RegExpTree::kInfinity);
324
  CHECK_MIN_MAX("(?:ab){4,7}", 8, 14);
325
  CHECK_MIN_MAX("a\\bc", 2, 2);
326
  CHECK_MIN_MAX("a\\Bc", 2, 2);
327
  CHECK_MIN_MAX("a\\sc", 3, 3);
328
  CHECK_MIN_MAX("a\\Sc", 3, 3);
329
  CHECK_MIN_MAX("a(?=b)c", 2, 2);
330
  CHECK_MIN_MAX("a(?=bbb|bb)c", 2, 2);
331
  CHECK_MIN_MAX("a(?!bbb|bb)c", 2, 2);
332
}
333

    
334
TEST(ParserRegression) {
335
  CHECK_PARSE_EQ("[A-Z$-][x]", "(! [A-Z $ -] [x])");
336
  CHECK_PARSE_EQ("a{3,4*}", "(: 'a{3,' (# 0 - g '4') '}')");
337
  CHECK_PARSE_EQ("{", "'{'");
338
  CHECK_PARSE_EQ("a|", "(| 'a' %)");
339
}
340

    
341
static void ExpectError(const char* input,
342
                        const char* expected) {
343
  V8::Initialize(NULL);
344
  v8::HandleScope scope;
345
  ZoneScope zone_scope(DELETE_ON_EXIT);
346
  FlatStringReader reader(CStrVector(input));
347
  RegExpCompileData result;
348
  CHECK_EQ(false, v8::internal::ParseRegExp(&reader, false, &result));
349
  CHECK(result.tree == NULL);
350
  CHECK(!result.error.is_null());
351
  SmartPointer<char> str = result.error->ToCString(ALLOW_NULLS);
352
  CHECK_EQ(expected, *str);
353
}
354

    
355

    
356
TEST(Errors) {
357
  V8::Initialize(NULL);
358
  const char* kEndBackslash = "\\ at end of pattern";
359
  ExpectError("\\", kEndBackslash);
360
  const char* kUnterminatedGroup = "Unterminated group";
361
  ExpectError("(foo", kUnterminatedGroup);
362
  const char* kInvalidGroup = "Invalid group";
363
  ExpectError("(?", kInvalidGroup);
364
  const char* kUnterminatedCharacterClass = "Unterminated character class";
365
  ExpectError("[", kUnterminatedCharacterClass);
366
  ExpectError("[a-", kUnterminatedCharacterClass);
367
  const char* kNothingToRepeat = "Nothing to repeat";
368
  ExpectError("*", kNothingToRepeat);
369
  ExpectError("?", kNothingToRepeat);
370
  ExpectError("+", kNothingToRepeat);
371
  ExpectError("{1}", kNothingToRepeat);
372
  ExpectError("{1,2}", kNothingToRepeat);
373
  ExpectError("{1,}", kNothingToRepeat);
374

    
375
  // Check that we don't allow more than kMaxCapture captures
376
  const int kMaxCaptures = 1 << 16;  // Must match RegExpParser::kMaxCaptures.
377
  const char* kTooManyCaptures = "Too many captures";
378
  HeapStringAllocator allocator;
379
  StringStream accumulator(&allocator);
380
  for (int i = 0; i <= kMaxCaptures; i++) {
381
    accumulator.Add("()");
382
  }
383
  SmartPointer<const char> many_captures(accumulator.ToCString());
384
  ExpectError(*many_captures, kTooManyCaptures);
385
}
386

    
387

    
388
static bool IsDigit(uc16 c) {
389
  return ('0' <= c && c <= '9');
390
}
391

    
392

    
393
static bool NotDigit(uc16 c) {
394
  return !IsDigit(c);
395
}
396

    
397

    
398
static bool IsWhiteSpace(uc16 c) {
399
  switch (c) {
400
    case 0x09:
401
    case 0x0A:
402
    case 0x0B:
403
    case 0x0C:
404
    case 0x0d:
405
    case 0x20:
406
    case 0xA0:
407
    case 0x2028:
408
    case 0x2029:
409
      return true;
410
    default:
411
      return unibrow::Space::Is(c);
412
  }
413
}
414

    
415

    
416
static bool NotWhiteSpace(uc16 c) {
417
  return !IsWhiteSpace(c);
418
}
419

    
420

    
421
static bool NotWord(uc16 c) {
422
  return !IsRegExpWord(c);
423
}
424

    
425

    
426
static void TestCharacterClassEscapes(uc16 c, bool (pred)(uc16 c)) {
427
  ZoneScope scope(DELETE_ON_EXIT);
428
  ZoneList<CharacterRange>* ranges = new ZoneList<CharacterRange>(2);
429
  CharacterRange::AddClassEscape(c, ranges);
430
  for (unsigned i = 0; i < (1 << 16); i++) {
431
    bool in_class = false;
432
    for (int j = 0; !in_class && j < ranges->length(); j++) {
433
      CharacterRange& range = ranges->at(j);
434
      in_class = (range.from() <= i && i <= range.to());
435
    }
436
    CHECK_EQ(pred(i), in_class);
437
  }
438
}
439

    
440

    
441
TEST(CharacterClassEscapes) {
442
  TestCharacterClassEscapes('.', IsRegExpNewline);
443
  TestCharacterClassEscapes('d', IsDigit);
444
  TestCharacterClassEscapes('D', NotDigit);
445
  TestCharacterClassEscapes('s', IsWhiteSpace);
446
  TestCharacterClassEscapes('S', NotWhiteSpace);
447
  TestCharacterClassEscapes('w', IsRegExpWord);
448
  TestCharacterClassEscapes('W', NotWord);
449
}
450

    
451

    
452
static RegExpNode* Compile(const char* input, bool multiline, bool is_ascii) {
453
  V8::Initialize(NULL);
454
  FlatStringReader reader(CStrVector(input));
455
  RegExpCompileData compile_data;
456
  if (!v8::internal::ParseRegExp(&reader, multiline, &compile_data))
457
    return NULL;
458
  Handle<String> pattern = Factory::NewStringFromUtf8(CStrVector(input));
459
  RegExpEngine::Compile(&compile_data, false, multiline, pattern, is_ascii);
460
  return compile_data.node;
461
}
462

    
463

    
464
static void Execute(const char* input,
465
                    bool multiline,
466
                    bool is_ascii,
467
                    bool dot_output = false) {
468
  v8::HandleScope scope;
469
  ZoneScope zone_scope(DELETE_ON_EXIT);
470
  RegExpNode* node = Compile(input, multiline, is_ascii);
471
  USE(node);
472
#ifdef DEBUG
473
  if (dot_output) {
474
    RegExpEngine::DotPrint(input, node, false);
475
    exit(0);
476
  }
477
#endif  // DEBUG
478
}
479

    
480

    
481
class TestConfig {
482
 public:
483
  typedef int Key;
484
  typedef int Value;
485
  static const int kNoKey;
486
  static const int kNoValue;
487
  static inline int Compare(int a, int b) {
488
    if (a < b)
489
      return -1;
490
    else if (a > b)
491
      return 1;
492
    else
493
      return 0;
494
  }
495
};
496

    
497

    
498
const int TestConfig::kNoKey = 0;
499
const int TestConfig::kNoValue = 0;
500

    
501

    
502
static int PseudoRandom(int i, int j) {
503
  return ~(~((i * 781) ^ (j * 329)));
504
}
505

    
506

    
507
TEST(SplayTreeSimple) {
508
  static const int kLimit = 1000;
509
  ZoneScope zone_scope(DELETE_ON_EXIT);
510
  ZoneSplayTree<TestConfig> tree;
511
  std::set<int> seen;
512
#define CHECK_MAPS_EQUAL() do {                                      \
513
    for (int k = 0; k < kLimit; k++)                                 \
514
      CHECK_EQ(seen.find(k) != seen.end(), tree.Find(k, &loc));      \
515
  } while (false)
516
  for (int i = 0; i < 50; i++) {
517
    for (int j = 0; j < 50; j++) {
518
      int next = PseudoRandom(i, j) % kLimit;
519
      if (seen.find(next) != seen.end()) {
520
        // We've already seen this one.  Check the value and remove
521
        // it.
522
        ZoneSplayTree<TestConfig>::Locator loc;
523
        CHECK(tree.Find(next, &loc));
524
        CHECK_EQ(next, loc.key());
525
        CHECK_EQ(3 * next, loc.value());
526
        tree.Remove(next);
527
        seen.erase(next);
528
        CHECK_MAPS_EQUAL();
529
      } else {
530
        // Check that it wasn't there already and then add it.
531
        ZoneSplayTree<TestConfig>::Locator loc;
532
        CHECK(!tree.Find(next, &loc));
533
        CHECK(tree.Insert(next, &loc));
534
        CHECK_EQ(next, loc.key());
535
        loc.set_value(3 * next);
536
        seen.insert(next);
537
        CHECK_MAPS_EQUAL();
538
      }
539
      int val = PseudoRandom(j, i) % kLimit;
540
      for (int k = val; k >= 0; k--) {
541
        if (seen.find(val) != seen.end()) {
542
          ZoneSplayTree<TestConfig>::Locator loc;
543
          CHECK(tree.FindGreatestLessThan(val, &loc));
544
          CHECK_EQ(loc.key(), val);
545
          break;
546
        }
547
      }
548
      val = PseudoRandom(i + j, i - j) % kLimit;
549
      for (int k = val; k < kLimit; k++) {
550
        if (seen.find(val) != seen.end()) {
551
          ZoneSplayTree<TestConfig>::Locator loc;
552
          CHECK(tree.FindLeastGreaterThan(val, &loc));
553
          CHECK_EQ(loc.key(), val);
554
          break;
555
        }
556
      }
557
    }
558
  }
559
}
560

    
561

    
562
TEST(DispatchTableConstruction) {
563
  // Initialize test data.
564
  static const int kLimit = 1000;
565
  static const int kRangeCount = 8;
566
  static const int kRangeSize = 16;
567
  uc16 ranges[kRangeCount][2 * kRangeSize];
568
  for (int i = 0; i < kRangeCount; i++) {
569
    Vector<uc16> range(ranges[i], 2 * kRangeSize);
570
    for (int j = 0; j < 2 * kRangeSize; j++) {
571
      range[j] = PseudoRandom(i + 25, j + 87) % kLimit;
572
    }
573
    range.Sort();
574
    for (int j = 1; j < 2 * kRangeSize; j++) {
575
      CHECK(range[j-1] <= range[j]);
576
    }
577
  }
578
  // Enter test data into dispatch table.
579
  ZoneScope zone_scope(DELETE_ON_EXIT);
580
  DispatchTable table;
581
  for (int i = 0; i < kRangeCount; i++) {
582
    uc16* range = ranges[i];
583
    for (int j = 0; j < 2 * kRangeSize; j += 2)
584
      table.AddRange(CharacterRange(range[j], range[j + 1]), i);
585
  }
586
  // Check that the table looks as we would expect
587
  for (int p = 0; p < kLimit; p++) {
588
    OutSet* outs = table.Get(p);
589
    for (int j = 0; j < kRangeCount; j++) {
590
      uc16* range = ranges[j];
591
      bool is_on = false;
592
      for (int k = 0; !is_on && (k < 2 * kRangeSize); k += 2)
593
        is_on = (range[k] <= p && p <= range[k + 1]);
594
      CHECK_EQ(is_on, outs->Get(j));
595
    }
596
  }
597
}
598

    
599

    
600
TEST(MacroAssembler) {
601
  V8::Initialize(NULL);
602
  byte codes[1024];
603
  RegExpMacroAssemblerIrregexp m(Vector<byte>(codes, 1024));
604
  // ^f(o)o.
605
  Label fail, fail2, start;
606
  uc16 foo_chars[3];
607
  foo_chars[0] = 'f';
608
  foo_chars[1] = 'o';
609
  foo_chars[2] = 'o';
610
  Vector<const uc16> foo(foo_chars, 3);
611
  m.SetRegister(4, 42);
612
  m.PushRegister(4, RegExpMacroAssembler::kNoStackLimitCheck);
613
  m.AdvanceRegister(4, 42);
614
  m.GoTo(&start);
615
  m.Fail();
616
  m.Bind(&start);
617
  m.PushBacktrack(&fail2);
618
  m.CheckCharacters(foo, 0, &fail, true);
619
  m.WriteCurrentPositionToRegister(0, 0);
620
  m.PushCurrentPosition();
621
  m.AdvanceCurrentPosition(3);
622
  m.WriteCurrentPositionToRegister(1, 0);
623
  m.PopCurrentPosition();
624
  m.AdvanceCurrentPosition(1);
625
  m.WriteCurrentPositionToRegister(2, 0);
626
  m.AdvanceCurrentPosition(1);
627
  m.WriteCurrentPositionToRegister(3, 0);
628
  m.Succeed();
629

    
630
  m.Bind(&fail);
631
  m.Backtrack();
632
  m.Succeed();
633

    
634
  m.Bind(&fail2);
635
  m.PopRegister(0);
636
  m.Fail();
637

    
638
  v8::HandleScope scope;
639

    
640
  Handle<String> source = Factory::NewStringFromAscii(CStrVector("^f(o)o"));
641
  Handle<ByteArray> array = Handle<ByteArray>::cast(m.GetCode(source));
642
  int captures[5];
643

    
644
  const uc16 str1[] = {'f', 'o', 'o', 'b', 'a', 'r'};
645
  Handle<String> f1_16 =
646
      Factory::NewStringFromTwoByte(Vector<const uc16>(str1, 6));
647

    
648
  CHECK(IrregexpInterpreter::Match(array, f1_16, captures, 0));
649
  CHECK_EQ(0, captures[0]);
650
  CHECK_EQ(3, captures[1]);
651
  CHECK_EQ(1, captures[2]);
652
  CHECK_EQ(2, captures[3]);
653
  CHECK_EQ(84, captures[4]);
654

    
655
  const uc16 str2[] = {'b', 'a', 'r', 'f', 'o', 'o'};
656
  Handle<String> f2_16 =
657
      Factory::NewStringFromTwoByte(Vector<const uc16>(str2, 6));
658

    
659
  CHECK(!IrregexpInterpreter::Match(array, f2_16, captures, 0));
660
  CHECK_EQ(42, captures[0]);
661
}
662

    
663

    
664
#ifndef ARM  // IA32 only tests.
665

    
666
class ContextInitializer {
667
 public:
668
  ContextInitializer() : env_(), scope_(), stack_guard_() {
669
    env_ = v8::Context::New();
670
    env_->Enter();
671
  }
672
  ~ContextInitializer() {
673
    env_->Exit();
674
    env_.Dispose();
675
  }
676
 private:
677
  v8::Persistent<v8::Context> env_;
678
  v8::HandleScope scope_;
679
  v8::internal::StackGuard stack_guard_;
680
};
681

    
682

    
683
static RegExpMacroAssemblerIA32::Result ExecuteIA32(Code* code,
684
                                                    String* input,
685
                                                    int start_offset,
686
                                                    const byte* input_start,
687
                                                    const byte* input_end,
688
                                                    int* captures,
689
                                                    bool at_start) {
690
  return RegExpMacroAssemblerIA32::Execute(
691
      code,
692
      input,
693
      start_offset,
694
      input_start,
695
      input_end,
696
      captures,
697
      at_start);
698
}
699

    
700

    
701
TEST(MacroAssemblerIA32Success) {
702
  v8::V8::Initialize();
703
  ContextInitializer initializer;
704

    
705
  RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 4);
706

    
707
  m.Succeed();
708

    
709
  Handle<String> source = Factory::NewStringFromAscii(CStrVector(""));
710
  Handle<Object> code_object = m.GetCode(source);
711
  Handle<Code> code = Handle<Code>::cast(code_object);
712

    
713
  int captures[4] = {42, 37, 87, 117};
714
  Handle<String> input = Factory::NewStringFromAscii(CStrVector("foofoo"));
715
  Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
716
  const byte* start_adr =
717
      reinterpret_cast<const byte*>(seq_input->GetCharsAddress());
718

    
719
  RegExpMacroAssemblerIA32::Result result =
720
      ExecuteIA32(*code,
721
                  *input,
722
                  0,
723
                  start_adr,
724
                  start_adr + seq_input->length(),
725
                  captures,
726
                  true);
727

    
728
  CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result);
729
  CHECK_EQ(-1, captures[0]);
730
  CHECK_EQ(-1, captures[1]);
731
  CHECK_EQ(-1, captures[2]);
732
  CHECK_EQ(-1, captures[3]);
733
}
734

    
735

    
736
TEST(MacroAssemblerIA32Simple) {
737
  v8::V8::Initialize();
738
  ContextInitializer initializer;
739

    
740
  RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 4);
741

    
742
  uc16 foo_chars[3] = {'f', 'o', 'o'};
743
  Vector<const uc16> foo(foo_chars, 3);
744

    
745
  Label fail;
746
  m.CheckCharacters(foo, 0, &fail, true);
747
  m.WriteCurrentPositionToRegister(0, 0);
748
  m.AdvanceCurrentPosition(3);
749
  m.WriteCurrentPositionToRegister(1, 0);
750
  m.Succeed();
751
  m.Bind(&fail);
752
  m.Fail();
753

    
754
  Handle<String> source = Factory::NewStringFromAscii(CStrVector("^foo"));
755
  Handle<Object> code_object = m.GetCode(source);
756
  Handle<Code> code = Handle<Code>::cast(code_object);
757

    
758
  int captures[4] = {42, 37, 87, 117};
759
  Handle<String> input = Factory::NewStringFromAscii(CStrVector("foofoo"));
760
  Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
761
  Address start_adr = seq_input->GetCharsAddress();
762

    
763
  RegExpMacroAssemblerIA32::Result result =
764
      ExecuteIA32(*code,
765
                  *input,
766
                  0,
767
                  start_adr,
768
                  start_adr + input->length(),
769
                  captures,
770
                  true);
771

    
772
  CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result);
773
  CHECK_EQ(0, captures[0]);
774
  CHECK_EQ(3, captures[1]);
775
  CHECK_EQ(-1, captures[2]);
776
  CHECK_EQ(-1, captures[3]);
777

    
778
  input = Factory::NewStringFromAscii(CStrVector("barbarbar"));
779
  seq_input = Handle<SeqAsciiString>::cast(input);
780
  start_adr = seq_input->GetCharsAddress();
781

    
782
  result = ExecuteIA32(*code,
783
                       *input,
784
                       0,
785
                       start_adr,
786
                       start_adr + input->length(),
787
                       captures,
788
                       true);
789

    
790
  CHECK_EQ(RegExpMacroAssemblerIA32::FAILURE, result);
791
}
792

    
793

    
794
TEST(MacroAssemblerIA32SimpleUC16) {
795
  v8::V8::Initialize();
796
  ContextInitializer initializer;
797

    
798
  RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::UC16, 4);
799

    
800
  uc16 foo_chars[3] = {'f', 'o', 'o'};
801
  Vector<const uc16> foo(foo_chars, 3);
802

    
803
  Label fail;
804
  m.CheckCharacters(foo, 0, &fail, true);
805
  m.WriteCurrentPositionToRegister(0, 0);
806
  m.AdvanceCurrentPosition(3);
807
  m.WriteCurrentPositionToRegister(1, 0);
808
  m.Succeed();
809
  m.Bind(&fail);
810
  m.Fail();
811

    
812
  Handle<String> source = Factory::NewStringFromAscii(CStrVector("^foo"));
813
  Handle<Object> code_object = m.GetCode(source);
814
  Handle<Code> code = Handle<Code>::cast(code_object);
815

    
816
  int captures[4] = {42, 37, 87, 117};
817
  const uc16 input_data[6] = {'f', 'o', 'o', 'f', 'o', '\xa0'};
818
  Handle<String> input =
819
      Factory::NewStringFromTwoByte(Vector<const uc16>(input_data, 6));
820
  Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input);
821
  Address start_adr = seq_input->GetCharsAddress();
822

    
823
  RegExpMacroAssemblerIA32::Result result =
824
      ExecuteIA32(*code,
825
                  *input,
826
                  0,
827
                  start_adr,
828
                  start_adr + input->length(),
829
                  captures,
830
                  true);
831

    
832
  CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result);
833
  CHECK_EQ(0, captures[0]);
834
  CHECK_EQ(3, captures[1]);
835
  CHECK_EQ(-1, captures[2]);
836
  CHECK_EQ(-1, captures[3]);
837

    
838
  const uc16 input_data2[9] = {'b', 'a', 'r', 'b', 'a', 'r', 'b', 'a', '\xa0'};
839
  input = Factory::NewStringFromTwoByte(Vector<const uc16>(input_data2, 9));
840
  seq_input = Handle<SeqTwoByteString>::cast(input);
841
  start_adr = seq_input->GetCharsAddress();
842

    
843
  result = ExecuteIA32(*code,
844
                       *input,
845
                       0,
846
                       start_adr,
847
                       start_adr + input->length() * 2,
848
                       captures,
849
                       true);
850

    
851
  CHECK_EQ(RegExpMacroAssemblerIA32::FAILURE, result);
852
}
853

    
854

    
855
TEST(MacroAssemblerIA32Backtrack) {
856
  v8::V8::Initialize();
857
  ContextInitializer initializer;
858

    
859
  RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 0);
860

    
861
  Label fail;
862
  Label backtrack;
863
  m.LoadCurrentCharacter(10, &fail);
864
  m.Succeed();
865
  m.Bind(&fail);
866
  m.PushBacktrack(&backtrack);
867
  m.LoadCurrentCharacter(10, NULL);
868
  m.Succeed();
869
  m.Bind(&backtrack);
870
  m.Fail();
871

    
872
  Handle<String> source = Factory::NewStringFromAscii(CStrVector(".........."));
873
  Handle<Object> code_object = m.GetCode(source);
874
  Handle<Code> code = Handle<Code>::cast(code_object);
875

    
876
  Handle<String> input = Factory::NewStringFromAscii(CStrVector("foofoo"));
877
  Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
878
  Address start_adr = seq_input->GetCharsAddress();
879

    
880
  RegExpMacroAssemblerIA32::Result result =
881
      ExecuteIA32(*code,
882
                  *input,
883
                  0,
884
                  start_adr,
885
                  start_adr + input->length(),
886
                  NULL,
887
                  true);
888

    
889
  CHECK_EQ(RegExpMacroAssemblerIA32::FAILURE, result);
890
}
891

    
892

    
893
TEST(MacroAssemblerIA32BackReferenceASCII) {
894
  v8::V8::Initialize();
895
  ContextInitializer initializer;
896

    
897
  RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 3);
898

    
899
  m.WriteCurrentPositionToRegister(0, 0);
900
  m.AdvanceCurrentPosition(2);
901
  m.WriteCurrentPositionToRegister(1, 0);
902
  Label nomatch;
903
  m.CheckNotBackReference(0, &nomatch);
904
  m.Fail();
905
  m.Bind(&nomatch);
906
  m.AdvanceCurrentPosition(2);
907
  Label missing_match;
908
  m.CheckNotBackReference(0, &missing_match);
909
  m.WriteCurrentPositionToRegister(2, 0);
910
  m.Succeed();
911
  m.Bind(&missing_match);
912
  m.Fail();
913

    
914
  Handle<String> source = Factory::NewStringFromAscii(CStrVector("^(..)..\1"));
915
  Handle<Object> code_object = m.GetCode(source);
916
  Handle<Code> code = Handle<Code>::cast(code_object);
917

    
918
  Handle<String> input = Factory::NewStringFromAscii(CStrVector("fooofo"));
919
  Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
920
  Address start_adr = seq_input->GetCharsAddress();
921

    
922
  int output[3];
923
  RegExpMacroAssemblerIA32::Result result =
924
      ExecuteIA32(*code,
925
                  *input,
926
                  0,
927
                  start_adr,
928
                  start_adr + input->length(),
929
                  output,
930
                  true);
931

    
932
  CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result);
933
  CHECK_EQ(0, output[0]);
934
  CHECK_EQ(2, output[1]);
935
  CHECK_EQ(6, output[2]);
936
}
937

    
938

    
939
TEST(MacroAssemblerIA32BackReferenceUC16) {
940
  v8::V8::Initialize();
941
  ContextInitializer initializer;
942

    
943
  RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::UC16, 3);
944

    
945
  m.WriteCurrentPositionToRegister(0, 0);
946
  m.AdvanceCurrentPosition(2);
947
  m.WriteCurrentPositionToRegister(1, 0);
948
  Label nomatch;
949
  m.CheckNotBackReference(0, &nomatch);
950
  m.Fail();
951
  m.Bind(&nomatch);
952
  m.AdvanceCurrentPosition(2);
953
  Label missing_match;
954
  m.CheckNotBackReference(0, &missing_match);
955
  m.WriteCurrentPositionToRegister(2, 0);
956
  m.Succeed();
957
  m.Bind(&missing_match);
958
  m.Fail();
959

    
960
  Handle<String> source = Factory::NewStringFromAscii(CStrVector("^(..)..\1"));
961
  Handle<Object> code_object = m.GetCode(source);
962
  Handle<Code> code = Handle<Code>::cast(code_object);
963

    
964
  const uc16 input_data[6] = {'f', 0x2028, 'o', 'o', 'f', 0x2028};
965
  Handle<String> input =
966
      Factory::NewStringFromTwoByte(Vector<const uc16>(input_data, 6));
967
  Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input);
968
  Address start_adr = seq_input->GetCharsAddress();
969

    
970
  int output[3];
971
  RegExpMacroAssemblerIA32::Result result =
972
      ExecuteIA32(*code,
973
                  *input,
974
                  0,
975
                  start_adr,
976
                  start_adr + input->length() * 2,
977
                  output,
978
                  true);
979

    
980
  CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result);
981
  CHECK_EQ(0, output[0]);
982
  CHECK_EQ(2, output[1]);
983
  CHECK_EQ(6, output[2]);
984
}
985

    
986

    
987

    
988
TEST(MacroAssemblerIA32AtStart) {
989
  v8::V8::Initialize();
990
  ContextInitializer initializer;
991

    
992
  RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 0);
993

    
994
  Label not_at_start, newline, fail;
995
  m.CheckNotAtStart(&not_at_start);
996
  // Check that prevchar = '\n' and current = 'f'.
997
  m.CheckCharacter('\n', &newline);
998
  m.Bind(&fail);
999
  m.Fail();
1000
  m.Bind(&newline);
1001
  m.LoadCurrentCharacter(0, &fail);
1002
  m.CheckNotCharacter('f', &fail);
1003
  m.Succeed();
1004

    
1005
  m.Bind(&not_at_start);
1006
  // Check that prevchar = 'o' and current = 'b'.
1007
  Label prevo;
1008
  m.CheckCharacter('o', &prevo);
1009
  m.Fail();
1010
  m.Bind(&prevo);
1011
  m.LoadCurrentCharacter(0, &fail);
1012
  m.CheckNotCharacter('b', &fail);
1013
  m.Succeed();
1014

    
1015
  Handle<String> source = Factory::NewStringFromAscii(CStrVector("(^f|ob)"));
1016
  Handle<Object> code_object = m.GetCode(source);
1017
  Handle<Code> code = Handle<Code>::cast(code_object);
1018

    
1019
  Handle<String> input = Factory::NewStringFromAscii(CStrVector("foobar"));
1020
  Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
1021
  Address start_adr = seq_input->GetCharsAddress();
1022

    
1023
  RegExpMacroAssemblerIA32::Result result =
1024
      ExecuteIA32(*code,
1025
                  *input,
1026
                  0,
1027
                  start_adr,
1028
                  start_adr + input->length(),
1029
                  NULL,
1030
                  true);
1031

    
1032
  CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result);
1033

    
1034
  result = ExecuteIA32(*code,
1035
                       *input,
1036
                       3,
1037
                       start_adr + 3,
1038
                       start_adr + input->length(),
1039
                       NULL,
1040
                       false);
1041

    
1042
  CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result);
1043
}
1044

    
1045

    
1046
TEST(MacroAssemblerIA32BackRefNoCase) {
1047
  v8::V8::Initialize();
1048
  ContextInitializer initializer;
1049

    
1050
  RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 4);
1051

    
1052
  Label fail, succ;
1053

    
1054
  m.WriteCurrentPositionToRegister(0, 0);
1055
  m.WriteCurrentPositionToRegister(2, 0);
1056
  m.AdvanceCurrentPosition(3);
1057
  m.WriteCurrentPositionToRegister(3, 0);
1058
  m.CheckNotBackReferenceIgnoreCase(2, &fail);  // Match "AbC".
1059
  m.CheckNotBackReferenceIgnoreCase(2, &fail);  // Match "ABC".
1060
  Label expected_fail;
1061
  m.CheckNotBackReferenceIgnoreCase(2, &expected_fail);
1062
  m.Bind(&fail);
1063
  m.Fail();
1064

    
1065
  m.Bind(&expected_fail);
1066
  m.AdvanceCurrentPosition(3);  // Skip "xYz"
1067
  m.CheckNotBackReferenceIgnoreCase(2, &succ);
1068
  m.Fail();
1069

    
1070
  m.Bind(&succ);
1071
  m.WriteCurrentPositionToRegister(1, 0);
1072
  m.Succeed();
1073

    
1074
  Handle<String> source =
1075
      Factory::NewStringFromAscii(CStrVector("^(abc)\1\1(?!\1)...(?!\1)"));
1076
  Handle<Object> code_object = m.GetCode(source);
1077
  Handle<Code> code = Handle<Code>::cast(code_object);
1078

    
1079
  Handle<String> input =
1080
      Factory::NewStringFromAscii(CStrVector("aBcAbCABCxYzab"));
1081
  Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
1082
  Address start_adr = seq_input->GetCharsAddress();
1083

    
1084
  int output[4];
1085
  RegExpMacroAssemblerIA32::Result result =
1086
      ExecuteIA32(*code,
1087
                  *input,
1088
                  0,
1089
                  start_adr,
1090
                  start_adr + input->length(),
1091
                  output,
1092
                  true);
1093

    
1094
  CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result);
1095
  CHECK_EQ(0, output[0]);
1096
  CHECK_EQ(12, output[1]);
1097
  CHECK_EQ(0, output[2]);
1098
  CHECK_EQ(3, output[3]);
1099
}
1100

    
1101

    
1102

    
1103
TEST(MacroAssemblerIA32Registers) {
1104
  v8::V8::Initialize();
1105
  ContextInitializer initializer;
1106

    
1107
  RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 5);
1108

    
1109
  uc16 foo_chars[3] = {'f', 'o', 'o'};
1110
  Vector<const uc16> foo(foo_chars, 3);
1111

    
1112
  enum registers { out1, out2, out3, out4, out5, sp, loop_cnt };
1113
  Label fail;
1114
  Label backtrack;
1115
  m.WriteCurrentPositionToRegister(out1, 0);  // Output: [0]
1116
  m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck);
1117
  m.PushBacktrack(&backtrack);
1118
  m.WriteStackPointerToRegister(sp);
1119
  // Fill stack and registers
1120
  m.AdvanceCurrentPosition(2);
1121
  m.WriteCurrentPositionToRegister(out1, 0);
1122
  m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck);
1123
  m.PushBacktrack(&fail);
1124
  // Drop backtrack stack frames.
1125
  m.ReadStackPointerFromRegister(sp);
1126
  // And take the first backtrack (to &backtrack)
1127
  m.Backtrack();
1128

    
1129
  m.PushCurrentPosition();
1130
  m.AdvanceCurrentPosition(2);
1131
  m.PopCurrentPosition();
1132

    
1133
  m.Bind(&backtrack);
1134
  m.PopRegister(out1);
1135
  m.ReadCurrentPositionFromRegister(out1);
1136
  m.AdvanceCurrentPosition(3);
1137
  m.WriteCurrentPositionToRegister(out2, 0);  // [0,3]
1138

    
1139
  Label loop;
1140
  m.SetRegister(loop_cnt, 0);  // loop counter
1141
  m.Bind(&loop);
1142
  m.AdvanceRegister(loop_cnt, 1);
1143
  m.AdvanceCurrentPosition(1);
1144
  m.IfRegisterLT(loop_cnt, 3, &loop);
1145
  m.WriteCurrentPositionToRegister(out3, 0);  // [0,3,6]
1146

    
1147
  Label loop2;
1148
  m.SetRegister(loop_cnt, 2);  // loop counter
1149
  m.Bind(&loop2);
1150
  m.AdvanceRegister(loop_cnt, -1);
1151
  m.AdvanceCurrentPosition(1);
1152
  m.IfRegisterGE(loop_cnt, 0, &loop2);
1153
  m.WriteCurrentPositionToRegister(out4, 0);  // [0,3,6,9]
1154

    
1155
  Label loop3;
1156
  Label exit_loop3;
1157
  m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck);
1158
  m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck);
1159
  m.ReadCurrentPositionFromRegister(out3);
1160
  m.Bind(&loop3);
1161
  m.AdvanceCurrentPosition(1);
1162
  m.CheckGreedyLoop(&exit_loop3);
1163
  m.GoTo(&loop3);
1164
  m.Bind(&exit_loop3);
1165
  m.PopCurrentPosition();
1166
  m.WriteCurrentPositionToRegister(out5, 0);  // [0,3,6,9,9]
1167

    
1168
  m.Succeed();
1169

    
1170
  m.Bind(&fail);
1171
  m.Fail();
1172

    
1173
  Handle<String> source =
1174
      Factory::NewStringFromAscii(CStrVector("<loop test>"));
1175
  Handle<Object> code_object = m.GetCode(source);
1176
  Handle<Code> code = Handle<Code>::cast(code_object);
1177

    
1178
  // String long enough for test (content doesn't matter).
1179
  Handle<String> input =
1180
      Factory::NewStringFromAscii(CStrVector("foofoofoofoofoo"));
1181
  Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
1182
  Address start_adr = seq_input->GetCharsAddress();
1183

    
1184
  int output[5];
1185
  RegExpMacroAssemblerIA32::Result result =
1186
      ExecuteIA32(*code,
1187
                  *input,
1188
                  0,
1189
                  start_adr,
1190
                  start_adr + input->length(),
1191
                  output,
1192
                  true);
1193

    
1194
  CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result);
1195
  CHECK_EQ(0, output[0]);
1196
  CHECK_EQ(3, output[1]);
1197
  CHECK_EQ(6, output[2]);
1198
  CHECK_EQ(9, output[3]);
1199
  CHECK_EQ(9, output[4]);
1200
}
1201

    
1202

    
1203
TEST(MacroAssemblerIA32StackOverflow) {
1204
  v8::V8::Initialize();
1205
  ContextInitializer initializer;
1206

    
1207
  RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 0);
1208

    
1209
  Label loop;
1210
  m.Bind(&loop);
1211
  m.PushBacktrack(&loop);
1212
  m.GoTo(&loop);
1213

    
1214
  Handle<String> source =
1215
      Factory::NewStringFromAscii(CStrVector("<stack overflow test>"));
1216
  Handle<Object> code_object = m.GetCode(source);
1217
  Handle<Code> code = Handle<Code>::cast(code_object);
1218

    
1219
  // String long enough for test (content doesn't matter).
1220
  Handle<String> input =
1221
      Factory::NewStringFromAscii(CStrVector("dummy"));
1222
  Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
1223
  Address start_adr = seq_input->GetCharsAddress();
1224

    
1225
  RegExpMacroAssemblerIA32::Result result =
1226
      ExecuteIA32(*code,
1227
                  *input,
1228
                  0,
1229
                  start_adr,
1230
                  start_adr + input->length(),
1231
                  NULL,
1232
                  true);
1233

    
1234
  CHECK_EQ(RegExpMacroAssemblerIA32::EXCEPTION, result);
1235
  CHECK(Top::has_pending_exception());
1236
  Top::clear_pending_exception();
1237
}
1238

    
1239

    
1240
TEST(MacroAssemblerIA32LotsOfRegisters) {
1241
  v8::V8::Initialize();
1242
  ContextInitializer initializer;
1243

    
1244
  RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 2);
1245

    
1246
  // At least 2048, to ensure the allocated space for registers
1247
  // span one full page.
1248
  const int large_number = 8000;
1249
  m.WriteCurrentPositionToRegister(large_number, 42);
1250
  m.WriteCurrentPositionToRegister(0, 0);
1251
  m.WriteCurrentPositionToRegister(1, 1);
1252
  Label done;
1253
  m.CheckNotBackReference(0, &done);  // Performs a system-stack push.
1254
  m.Bind(&done);
1255
  m.PushRegister(large_number, RegExpMacroAssembler::kNoStackLimitCheck);
1256
  m.PopRegister(1);
1257
  m.Succeed();
1258

    
1259
  Handle<String> source =
1260
      Factory::NewStringFromAscii(CStrVector("<huge register space test>"));
1261
  Handle<Object> code_object = m.GetCode(source);
1262
  Handle<Code> code = Handle<Code>::cast(code_object);
1263

    
1264
  // String long enough for test (content doesn't matter).
1265
  Handle<String> input =
1266
      Factory::NewStringFromAscii(CStrVector("sample text"));
1267
  Handle<SeqAsciiString> seq_input = Handle<SeqAsciiString>::cast(input);
1268
  Address start_adr = seq_input->GetCharsAddress();
1269

    
1270
  int captures[2];
1271
  RegExpMacroAssemblerIA32::Result result =
1272
      ExecuteIA32(*code,
1273
                  *input,
1274
                  0,
1275
                  start_adr,
1276
                  start_adr + input->length(),
1277
                  captures,
1278
                  true);
1279

    
1280
  CHECK_EQ(RegExpMacroAssemblerIA32::SUCCESS, result);
1281
  CHECK_EQ(0, captures[0]);
1282
  CHECK_EQ(42, captures[1]);
1283

    
1284
  Top::clear_pending_exception();
1285
}
1286

    
1287

    
1288

    
1289
#endif  // !defined ARM
1290

    
1291
TEST(AddInverseToTable) {
1292
  static const int kLimit = 1000;
1293
  static const int kRangeCount = 16;
1294
  for (int t = 0; t < 10; t++) {
1295
    ZoneScope zone_scope(DELETE_ON_EXIT);
1296
    ZoneList<CharacterRange>* ranges =
1297
        new ZoneList<CharacterRange>(kRangeCount);
1298
    for (int i = 0; i < kRangeCount; i++) {
1299
      int from = PseudoRandom(t + 87, i + 25) % kLimit;
1300
      int to = from + (PseudoRandom(i + 87, t + 25) % (kLimit / 20));
1301
      if (to > kLimit) to = kLimit;
1302
      ranges->Add(CharacterRange(from, to));
1303
    }
1304
    DispatchTable table;
1305
    DispatchTableConstructor cons(&table, false);
1306
    cons.set_choice_index(0);
1307
    cons.AddInverse(ranges);
1308
    for (int i = 0; i < kLimit; i++) {
1309
      bool is_on = false;
1310
      for (int j = 0; !is_on && j < kRangeCount; j++)
1311
        is_on = ranges->at(j).Contains(i);
1312
      OutSet* set = table.Get(i);
1313
      CHECK_EQ(is_on, set->Get(0) == false);
1314
    }
1315
  }
1316
  ZoneScope zone_scope(DELETE_ON_EXIT);
1317
  ZoneList<CharacterRange>* ranges =
1318
          new ZoneList<CharacterRange>(1);
1319
  ranges->Add(CharacterRange(0xFFF0, 0xFFFE));
1320
  DispatchTable table;
1321
  DispatchTableConstructor cons(&table, false);
1322
  cons.set_choice_index(0);
1323
  cons.AddInverse(ranges);
1324
  CHECK(!table.Get(0xFFFE)->Get(0));
1325
  CHECK(table.Get(0xFFFF)->Get(0));
1326
}
1327

    
1328

    
1329
static uc32 canonicalize(uc32 c) {
1330
  unibrow::uchar canon[unibrow::Ecma262Canonicalize::kMaxWidth];
1331
  int count = unibrow::Ecma262Canonicalize::Convert(c, '\0', canon, NULL);
1332
  if (count == 0) {
1333
    return c;
1334
  } else {
1335
    CHECK_EQ(1, count);
1336
    return canon[0];
1337
  }
1338
}
1339

    
1340

    
1341
TEST(LatinCanonicalize) {
1342
  unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
1343
  for (char lower = 'a'; lower <= 'z'; lower++) {
1344
    char upper = lower + ('A' - 'a');
1345
    CHECK_EQ(canonicalize(lower), canonicalize(upper));
1346
    unibrow::uchar uncanon[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1347
    int length = un_canonicalize.get(lower, '\0', uncanon);
1348
    CHECK_EQ(2, length);
1349
    CHECK_EQ(upper, uncanon[0]);
1350
    CHECK_EQ(lower, uncanon[1]);
1351
  }
1352
  for (uc32 c = 128; c < (1 << 21); c++)
1353
    CHECK_GE(canonicalize(c), 128);
1354
  unibrow::Mapping<unibrow::ToUppercase> to_upper;
1355
  for (uc32 c = 0; c < (1 << 21); c++) {
1356
    unibrow::uchar upper[unibrow::ToUppercase::kMaxWidth];
1357
    int length = to_upper.get(c, '\0', upper);
1358
    if (length == 0) {
1359
      length = 1;
1360
      upper[0] = c;
1361
    }
1362
    uc32 u = upper[0];
1363
    if (length > 1 || (c >= 128 && u < 128))
1364
      u = c;
1365
    CHECK_EQ(u, canonicalize(c));
1366
  }
1367
}
1368

    
1369

    
1370
static uc32 CanonRange(uc32 c) {
1371
  unibrow::uchar canon[unibrow::CanonicalizationRange::kMaxWidth];
1372
  int count = unibrow::CanonicalizationRange::Convert(c, '\0', canon, NULL);
1373
  if (count == 0) {
1374
    return c;
1375
  } else {
1376
    CHECK_EQ(1, count);
1377
    return canon[0];
1378
  }
1379
}
1380

    
1381

    
1382
TEST(RangeCanonicalization) {
1383
  CHECK_NE(CanonRange(0) & CharacterRange::kStartMarker, 0);
1384
  // Check that we arrive at the same result when using the basic
1385
  // range canonicalization primitives as when using immediate
1386
  // canonicalization.
1387
  unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
1388
  for (int i = 0; i < CharacterRange::kRangeCanonicalizeMax; i++) {
1389
    int range = CanonRange(i);
1390
    int indirect_length = 0;
1391
    unibrow::uchar indirect[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1392
    if ((range & CharacterRange::kStartMarker) == 0) {
1393
      indirect_length = un_canonicalize.get(i - range, '\0', indirect);
1394
      for (int i = 0; i < indirect_length; i++)
1395
        indirect[i] += range;
1396
    } else {
1397
      indirect_length = un_canonicalize.get(i, '\0', indirect);
1398
    }
1399
    unibrow::uchar direct[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1400
    int direct_length = un_canonicalize.get(i, '\0', direct);
1401
    CHECK_EQ(direct_length, indirect_length);
1402
  }
1403
  // Check that we arrive at the same results when skipping over
1404
  // canonicalization ranges.
1405
  int next_block = 0;
1406
  while (next_block < CharacterRange::kRangeCanonicalizeMax) {
1407
    uc32 start = CanonRange(next_block);
1408
    CHECK_NE((start & CharacterRange::kStartMarker), 0);
1409
    unsigned dist = start & CharacterRange::kPayloadMask;
1410
    unibrow::uchar first[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1411
    int first_length = un_canonicalize.get(next_block, '\0', first);
1412
    for (unsigned i = 1; i < dist; i++) {
1413
      CHECK_EQ(i, CanonRange(next_block + i));
1414
      unibrow::uchar succ[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1415
      int succ_length = un_canonicalize.get(next_block + i, '\0', succ);
1416
      CHECK_EQ(first_length, succ_length);
1417
      for (int j = 0; j < succ_length; j++) {
1418
        int calc = first[j] + i;
1419
        int found = succ[j];
1420
        CHECK_EQ(calc, found);
1421
      }
1422
    }
1423
    next_block = next_block + dist;
1424
  }
1425
}
1426

    
1427

    
1428
TEST(UncanonicalizeEquivalence) {
1429
  unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
1430
  unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1431
  for (int i = 0; i < (1 << 16); i++) {
1432
    int length = un_canonicalize.get(i, '\0', chars);
1433
    for (int j = 0; j < length; j++) {
1434
      unibrow::uchar chars2[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1435
      int length2 = un_canonicalize.get(chars[j], '\0', chars2);
1436
      CHECK_EQ(length, length2);
1437
      for (int k = 0; k < length; k++)
1438
        CHECK_EQ(static_cast<int>(chars[k]), static_cast<int>(chars2[k]));
1439
    }
1440
  }
1441
}
1442

    
1443

    
1444
static void TestRangeCaseIndependence(CharacterRange input,
1445
                                      Vector<CharacterRange> expected) {
1446
  ZoneScope zone_scope(DELETE_ON_EXIT);
1447
  int count = expected.length();
1448
  ZoneList<CharacterRange>* list = new ZoneList<CharacterRange>(count);
1449
  input.AddCaseEquivalents(list);
1450
  CHECK_EQ(count, list->length());
1451
  for (int i = 0; i < list->length(); i++) {
1452
    CHECK_EQ(expected[i].from(), list->at(i).from());
1453
    CHECK_EQ(expected[i].to(), list->at(i).to());
1454
  }
1455
}
1456

    
1457

    
1458
static void TestSimpleRangeCaseIndependence(CharacterRange input,
1459
                                            CharacterRange expected) {
1460
  EmbeddedVector<CharacterRange, 1> vector;
1461
  vector[0] = expected;
1462
  TestRangeCaseIndependence(input, vector);
1463
}
1464

    
1465

    
1466
TEST(CharacterRangeCaseIndependence) {
1467
  TestSimpleRangeCaseIndependence(CharacterRange::Singleton('a'),
1468
                                  CharacterRange::Singleton('A'));
1469
  TestSimpleRangeCaseIndependence(CharacterRange::Singleton('z'),
1470
                                  CharacterRange::Singleton('Z'));
1471
  TestSimpleRangeCaseIndependence(CharacterRange('a', 'z'),
1472
                                  CharacterRange('A', 'Z'));
1473
  TestSimpleRangeCaseIndependence(CharacterRange('c', 'f'),
1474
                                  CharacterRange('C', 'F'));
1475
  TestSimpleRangeCaseIndependence(CharacterRange('a', 'b'),
1476
                                  CharacterRange('A', 'B'));
1477
  TestSimpleRangeCaseIndependence(CharacterRange('y', 'z'),
1478
                                  CharacterRange('Y', 'Z'));
1479
  TestSimpleRangeCaseIndependence(CharacterRange('a' - 1, 'z' + 1),
1480
                                  CharacterRange('A', 'Z'));
1481
  TestSimpleRangeCaseIndependence(CharacterRange('A', 'Z'),
1482
                                  CharacterRange('a', 'z'));
1483
  TestSimpleRangeCaseIndependence(CharacterRange('C', 'F'),
1484
                                  CharacterRange('c', 'f'));
1485
  TestSimpleRangeCaseIndependence(CharacterRange('A' - 1, 'Z' + 1),
1486
                                  CharacterRange('a', 'z'));
1487
  // Here we need to add [l-z] to complete the case independence of
1488
  // [A-Za-z] but we expect [a-z] to be added since we always add a
1489
  // whole block at a time.
1490
  TestSimpleRangeCaseIndependence(CharacterRange('A', 'k'),
1491
                                  CharacterRange('a', 'z'));
1492
}
1493

    
1494

    
1495
static bool InClass(uc16 c, ZoneList<CharacterRange>* ranges) {
1496
  if (ranges == NULL)
1497
    return false;
1498
  for (int i = 0; i < ranges->length(); i++) {
1499
    CharacterRange range = ranges->at(i);
1500
    if (range.from() <= c && c <= range.to())
1501
      return true;
1502
  }
1503
  return false;
1504
}
1505

    
1506

    
1507
TEST(CharClassDifference) {
1508
  ZoneScope zone_scope(DELETE_ON_EXIT);
1509
  ZoneList<CharacterRange>* base = new ZoneList<CharacterRange>(1);
1510
  base->Add(CharacterRange::Everything());
1511
  Vector<const uc16> overlay = CharacterRange::GetWordBounds();
1512
  ZoneList<CharacterRange>* included = NULL;
1513
  ZoneList<CharacterRange>* excluded = NULL;
1514
  CharacterRange::Split(base, overlay, &included, &excluded);
1515
  for (int i = 0; i < (1 << 16); i++) {
1516
    bool in_base = InClass(i, base);
1517
    if (in_base) {
1518
      bool in_overlay = false;
1519
      for (int j = 0; !in_overlay && j < overlay.length(); j += 2) {
1520
        if (overlay[j] <= i && i <= overlay[j+1])
1521
          in_overlay = true;
1522
      }
1523
      CHECK_EQ(in_overlay, InClass(i, included));
1524
      CHECK_EQ(!in_overlay, InClass(i, excluded));
1525
    } else {
1526
      CHECK(!InClass(i, included));
1527
      CHECK(!InClass(i, excluded));
1528
    }
1529
  }
1530
}
1531

    
1532

    
1533
TEST(Graph) {
1534
  V8::Initialize(NULL);
1535
  Execute("(?:(?:x(.))?\1)+$", false, true, true);
1536
}