The data contained in this repository can be downloaded to your computer using one of several clients.
Please see the documentation of your version control software client for more information.

Please select the desired protocol below to get the URL.

This URL has Read-Only access.

Statistics
| Branch: | Revision:

main_repo / deps / v8 / test / cctest / test-regexp.cc @ f230a1cf

History | View | Annotate | Download (58 KB)

1
// Copyright 2012 the V8 project authors. All rights reserved.
2
// Redistribution and use in source and binary forms, with or without
3
// modification, are permitted provided that the following conditions are
4
// met:
5
//
6
//     * Redistributions of source code must retain the above copyright
7
//       notice, this list of conditions and the following disclaimer.
8
//     * Redistributions in binary form must reproduce the above
9
//       copyright notice, this list of conditions and the following
10
//       disclaimer in the documentation and/or other materials provided
11
//       with the distribution.
12
//     * Neither the name of Google Inc. nor the names of its
13
//       contributors may be used to endorse or promote products derived
14
//       from this software without specific prior written permission.
15
//
16
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27

    
28

    
29
#include <stdlib.h>
30

    
31
#include "v8.h"
32

    
33
#include "ast.h"
34
#include "char-predicates-inl.h"
35
#include "cctest.h"
36
#include "jsregexp.h"
37
#include "parser.h"
38
#include "regexp-macro-assembler.h"
39
#include "regexp-macro-assembler-irregexp.h"
40
#include "string-stream.h"
41
#include "zone-inl.h"
42
#ifdef V8_INTERPRETED_REGEXP
43
#include "interpreter-irregexp.h"
44
#else  // V8_INTERPRETED_REGEXP
45
#include "macro-assembler.h"
46
#include "code.h"
47
#if V8_TARGET_ARCH_ARM
48
#include "arm/assembler-arm.h"
49
#include "arm/macro-assembler-arm.h"
50
#include "arm/regexp-macro-assembler-arm.h"
51
#endif
52
#if V8_TARGET_ARCH_MIPS
53
#include "mips/assembler-mips.h"
54
#include "mips/macro-assembler-mips.h"
55
#include "mips/regexp-macro-assembler-mips.h"
56
#endif
57
#if V8_TARGET_ARCH_X64
58
#include "x64/assembler-x64.h"
59
#include "x64/macro-assembler-x64.h"
60
#include "x64/regexp-macro-assembler-x64.h"
61
#endif
62
#if V8_TARGET_ARCH_IA32
63
#include "ia32/assembler-ia32.h"
64
#include "ia32/macro-assembler-ia32.h"
65
#include "ia32/regexp-macro-assembler-ia32.h"
66
#endif
67
#endif  // V8_INTERPRETED_REGEXP
68

    
69
using namespace v8::internal;
70

    
71

    
72
static bool CheckParse(const char* input) {
73
  V8::Initialize(NULL);
74
  v8::HandleScope scope(CcTest::isolate());
75
  Zone zone(CcTest::i_isolate());
76
  FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
77
  RegExpCompileData result;
78
  return v8::internal::RegExpParser::ParseRegExp(
79
      &reader, false, &result, &zone);
80
}
81

    
82

    
83
static SmartArrayPointer<const char> Parse(const char* input) {
84
  V8::Initialize(NULL);
85
  v8::HandleScope scope(CcTest::isolate());
86
  Zone zone(CcTest::i_isolate());
87
  FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
88
  RegExpCompileData result;
89
  CHECK(v8::internal::RegExpParser::ParseRegExp(
90
      &reader, false, &result, &zone));
91
  CHECK(result.tree != NULL);
92
  CHECK(result.error.is_null());
93
  SmartArrayPointer<const char> output = result.tree->ToString(&zone);
94
  return output;
95
}
96

    
97

    
98
static bool CheckSimple(const char* input) {
99
  V8::Initialize(NULL);
100
  v8::HandleScope scope(CcTest::isolate());
101
  Zone zone(CcTest::i_isolate());
102
  FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
103
  RegExpCompileData result;
104
  CHECK(v8::internal::RegExpParser::ParseRegExp(
105
      &reader, false, &result, &zone));
106
  CHECK(result.tree != NULL);
107
  CHECK(result.error.is_null());
108
  return result.simple;
109
}
110

    
111
struct MinMaxPair {
112
  int min_match;
113
  int max_match;
114
};
115

    
116

    
117
static MinMaxPair CheckMinMaxMatch(const char* input) {
118
  V8::Initialize(NULL);
119
  v8::HandleScope scope(CcTest::isolate());
120
  Zone zone(CcTest::i_isolate());
121
  FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
122
  RegExpCompileData result;
123
  CHECK(v8::internal::RegExpParser::ParseRegExp(
124
      &reader, false, &result, &zone));
125
  CHECK(result.tree != NULL);
126
  CHECK(result.error.is_null());
127
  int min_match = result.tree->min_match();
128
  int max_match = result.tree->max_match();
129
  MinMaxPair pair = { min_match, max_match };
130
  return pair;
131
}
132

    
133

    
134
#define CHECK_PARSE_ERROR(input) CHECK(!CheckParse(input))
135
#define CHECK_PARSE_EQ(input, expected) CHECK_EQ(expected, *Parse(input))
136
#define CHECK_SIMPLE(input, simple) CHECK_EQ(simple, CheckSimple(input));
137
#define CHECK_MIN_MAX(input, min, max)                                         \
138
  { MinMaxPair min_max = CheckMinMaxMatch(input);                              \
139
    CHECK_EQ(min, min_max.min_match);                                          \
140
    CHECK_EQ(max, min_max.max_match);                                          \
141
  }
142

    
143
TEST(Parser) {
144
  V8::Initialize(NULL);
145

    
146
  CHECK_PARSE_ERROR("?");
147

    
148
  CHECK_PARSE_EQ("abc", "'abc'");
149
  CHECK_PARSE_EQ("", "%");
150
  CHECK_PARSE_EQ("abc|def", "(| 'abc' 'def')");
151
  CHECK_PARSE_EQ("abc|def|ghi", "(| 'abc' 'def' 'ghi')");
152
  CHECK_PARSE_EQ("^xxx$", "(: @^i 'xxx' @$i)");
153
  CHECK_PARSE_EQ("ab\\b\\d\\bcd", "(: 'ab' @b [0-9] @b 'cd')");
154
  CHECK_PARSE_EQ("\\w|\\d", "(| [0-9 A-Z _ a-z] [0-9])");
155
  CHECK_PARSE_EQ("a*", "(# 0 - g 'a')");
156
  CHECK_PARSE_EQ("a*?", "(# 0 - n 'a')");
157
  CHECK_PARSE_EQ("abc+", "(: 'ab' (# 1 - g 'c'))");
158
  CHECK_PARSE_EQ("abc+?", "(: 'ab' (# 1 - n 'c'))");
159
  CHECK_PARSE_EQ("xyz?", "(: 'xy' (# 0 1 g 'z'))");
160
  CHECK_PARSE_EQ("xyz??", "(: 'xy' (# 0 1 n 'z'))");
161
  CHECK_PARSE_EQ("xyz{0,1}", "(: 'xy' (# 0 1 g 'z'))");
162
  CHECK_PARSE_EQ("xyz{0,1}?", "(: 'xy' (# 0 1 n 'z'))");
163
  CHECK_PARSE_EQ("xyz{93}", "(: 'xy' (# 93 93 g 'z'))");
164
  CHECK_PARSE_EQ("xyz{93}?", "(: 'xy' (# 93 93 n 'z'))");
165
  CHECK_PARSE_EQ("xyz{1,32}", "(: 'xy' (# 1 32 g 'z'))");
166
  CHECK_PARSE_EQ("xyz{1,32}?", "(: 'xy' (# 1 32 n 'z'))");
167
  CHECK_PARSE_EQ("xyz{1,}", "(: 'xy' (# 1 - g 'z'))");
168
  CHECK_PARSE_EQ("xyz{1,}?", "(: 'xy' (# 1 - n 'z'))");
169
  CHECK_PARSE_EQ("a\\fb\\nc\\rd\\te\\vf", "'a\\x0cb\\x0ac\\x0dd\\x09e\\x0bf'");
170
  CHECK_PARSE_EQ("a\\nb\\bc", "(: 'a\\x0ab' @b 'c')");
171
  CHECK_PARSE_EQ("(?:foo)", "'foo'");
172
  CHECK_PARSE_EQ("(?: foo )", "' foo '");
173
  CHECK_PARSE_EQ("(foo|bar|baz)", "(^ (| 'foo' 'bar' 'baz'))");
174
  CHECK_PARSE_EQ("foo|(bar|baz)|quux", "(| 'foo' (^ (| 'bar' 'baz')) 'quux')");
175
  CHECK_PARSE_EQ("foo(?=bar)baz", "(: 'foo' (-> + 'bar') 'baz')");
176
  CHECK_PARSE_EQ("foo(?!bar)baz", "(: 'foo' (-> - 'bar') 'baz')");
177
  CHECK_PARSE_EQ("()", "(^ %)");
178
  CHECK_PARSE_EQ("(?=)", "(-> + %)");
179
  CHECK_PARSE_EQ("[]", "^[\\x00-\\uffff]");   // Doesn't compile on windows
180
  CHECK_PARSE_EQ("[^]", "[\\x00-\\uffff]");   // \uffff isn't in codepage 1252
181
  CHECK_PARSE_EQ("[x]", "[x]");
182
  CHECK_PARSE_EQ("[xyz]", "[x y z]");
183
  CHECK_PARSE_EQ("[a-zA-Z0-9]", "[a-z A-Z 0-9]");
184
  CHECK_PARSE_EQ("[-123]", "[- 1 2 3]");
185
  CHECK_PARSE_EQ("[^123]", "^[1 2 3]");
186
  CHECK_PARSE_EQ("]", "']'");
187
  CHECK_PARSE_EQ("}", "'}'");
188
  CHECK_PARSE_EQ("[a-b-c]", "[a-b - c]");
189
  CHECK_PARSE_EQ("[\\d]", "[0-9]");
190
  CHECK_PARSE_EQ("[x\\dz]", "[x 0-9 z]");
191
  CHECK_PARSE_EQ("[\\d-z]", "[0-9 - z]");
192
  CHECK_PARSE_EQ("[\\d-\\d]", "[0-9 - 0-9]");
193
  CHECK_PARSE_EQ("[z-\\d]", "[z - 0-9]");
194
  // Control character outside character class.
195
  CHECK_PARSE_EQ("\\cj\\cJ\\ci\\cI\\ck\\cK",
196
                 "'\\x0a\\x0a\\x09\\x09\\x0b\\x0b'");
197
  CHECK_PARSE_EQ("\\c!", "'\\c!'");
198
  CHECK_PARSE_EQ("\\c_", "'\\c_'");
199
  CHECK_PARSE_EQ("\\c~", "'\\c~'");
200
  CHECK_PARSE_EQ("\\c1", "'\\c1'");
201
  // Control character inside character class.
202
  CHECK_PARSE_EQ("[\\c!]", "[\\ c !]");
203
  CHECK_PARSE_EQ("[\\c_]", "[\\x1f]");
204
  CHECK_PARSE_EQ("[\\c~]", "[\\ c ~]");
205
  CHECK_PARSE_EQ("[\\ca]", "[\\x01]");
206
  CHECK_PARSE_EQ("[\\cz]", "[\\x1a]");
207
  CHECK_PARSE_EQ("[\\cA]", "[\\x01]");
208
  CHECK_PARSE_EQ("[\\cZ]", "[\\x1a]");
209
  CHECK_PARSE_EQ("[\\c1]", "[\\x11]");
210

    
211
  CHECK_PARSE_EQ("[a\\]c]", "[a ] c]");
212
  CHECK_PARSE_EQ("\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ", "'[]{}()%^# '");
213
  CHECK_PARSE_EQ("[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]", "[[ ] { } ( ) % ^ #  ]");
214
  CHECK_PARSE_EQ("\\0", "'\\x00'");
215
  CHECK_PARSE_EQ("\\8", "'8'");
216
  CHECK_PARSE_EQ("\\9", "'9'");
217
  CHECK_PARSE_EQ("\\11", "'\\x09'");
218
  CHECK_PARSE_EQ("\\11a", "'\\x09a'");
219
  CHECK_PARSE_EQ("\\011", "'\\x09'");
220
  CHECK_PARSE_EQ("\\00011", "'\\x0011'");
221
  CHECK_PARSE_EQ("\\118", "'\\x098'");
222
  CHECK_PARSE_EQ("\\111", "'I'");
223
  CHECK_PARSE_EQ("\\1111", "'I1'");
224
  CHECK_PARSE_EQ("(x)(x)(x)\\1", "(: (^ 'x') (^ 'x') (^ 'x') (<- 1))");
225
  CHECK_PARSE_EQ("(x)(x)(x)\\2", "(: (^ 'x') (^ 'x') (^ 'x') (<- 2))");
226
  CHECK_PARSE_EQ("(x)(x)(x)\\3", "(: (^ 'x') (^ 'x') (^ 'x') (<- 3))");
227
  CHECK_PARSE_EQ("(x)(x)(x)\\4", "(: (^ 'x') (^ 'x') (^ 'x') '\\x04')");
228
  CHECK_PARSE_EQ("(x)(x)(x)\\1*", "(: (^ 'x') (^ 'x') (^ 'x')"
229
                               " (# 0 - g (<- 1)))");
230
  CHECK_PARSE_EQ("(x)(x)(x)\\2*", "(: (^ 'x') (^ 'x') (^ 'x')"
231
                               " (# 0 - g (<- 2)))");
232
  CHECK_PARSE_EQ("(x)(x)(x)\\3*", "(: (^ 'x') (^ 'x') (^ 'x')"
233
                               " (# 0 - g (<- 3)))");
234
  CHECK_PARSE_EQ("(x)(x)(x)\\4*", "(: (^ 'x') (^ 'x') (^ 'x')"
235
                               " (# 0 - g '\\x04'))");
236
  CHECK_PARSE_EQ("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\10",
237
              "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
238
              " (^ 'x') (^ 'x') (^ 'x') (^ 'x') (<- 10))");
239
  CHECK_PARSE_EQ("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\11",
240
              "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
241
              " (^ 'x') (^ 'x') (^ 'x') (^ 'x') '\\x09')");
242
  CHECK_PARSE_EQ("(a)\\1", "(: (^ 'a') (<- 1))");
243
  CHECK_PARSE_EQ("(a\\1)", "(^ 'a')");
244
  CHECK_PARSE_EQ("(\\1a)", "(^ 'a')");
245
  CHECK_PARSE_EQ("(?=a)?a", "'a'");
246
  CHECK_PARSE_EQ("(?=a){0,10}a", "'a'");
247
  CHECK_PARSE_EQ("(?=a){1,10}a", "(: (-> + 'a') 'a')");
248
  CHECK_PARSE_EQ("(?=a){9,10}a", "(: (-> + 'a') 'a')");
249
  CHECK_PARSE_EQ("(?!a)?a", "'a'");
250
  CHECK_PARSE_EQ("\\1(a)", "(^ 'a')");
251
  CHECK_PARSE_EQ("(?!(a))\\1", "(: (-> - (^ 'a')) (<- 1))");
252
  CHECK_PARSE_EQ("(?!\\1(a\\1)\\1)\\1", "(: (-> - (: (^ 'a') (<- 1))) (<- 1))");
253
  CHECK_PARSE_EQ("[\\0]", "[\\x00]");
254
  CHECK_PARSE_EQ("[\\11]", "[\\x09]");
255
  CHECK_PARSE_EQ("[\\11a]", "[\\x09 a]");
256
  CHECK_PARSE_EQ("[\\011]", "[\\x09]");
257
  CHECK_PARSE_EQ("[\\00011]", "[\\x00 1 1]");
258
  CHECK_PARSE_EQ("[\\118]", "[\\x09 8]");
259
  CHECK_PARSE_EQ("[\\111]", "[I]");
260
  CHECK_PARSE_EQ("[\\1111]", "[I 1]");
261
  CHECK_PARSE_EQ("\\x34", "'\x34'");
262
  CHECK_PARSE_EQ("\\x60", "'\x60'");
263
  CHECK_PARSE_EQ("\\x3z", "'x3z'");
264
  CHECK_PARSE_EQ("\\c", "'\\c'");
265
  CHECK_PARSE_EQ("\\u0034", "'\x34'");
266
  CHECK_PARSE_EQ("\\u003z", "'u003z'");
267
  CHECK_PARSE_EQ("foo[z]*", "(: 'foo' (# 0 - g [z]))");
268

    
269
  CHECK_SIMPLE("", false);
270
  CHECK_SIMPLE("a", true);
271
  CHECK_SIMPLE("a|b", false);
272
  CHECK_SIMPLE("a\\n", false);
273
  CHECK_SIMPLE("^a", false);
274
  CHECK_SIMPLE("a$", false);
275
  CHECK_SIMPLE("a\\b!", false);
276
  CHECK_SIMPLE("a\\Bb", false);
277
  CHECK_SIMPLE("a*", false);
278
  CHECK_SIMPLE("a*?", false);
279
  CHECK_SIMPLE("a?", false);
280
  CHECK_SIMPLE("a??", false);
281
  CHECK_SIMPLE("a{0,1}?", false);
282
  CHECK_SIMPLE("a{1,1}?", false);
283
  CHECK_SIMPLE("a{1,2}?", false);
284
  CHECK_SIMPLE("a+?", false);
285
  CHECK_SIMPLE("(a)", false);
286
  CHECK_SIMPLE("(a)\\1", false);
287
  CHECK_SIMPLE("(\\1a)", false);
288
  CHECK_SIMPLE("\\1(a)", false);
289
  CHECK_SIMPLE("a\\s", false);
290
  CHECK_SIMPLE("a\\S", false);
291
  CHECK_SIMPLE("a\\d", false);
292
  CHECK_SIMPLE("a\\D", false);
293
  CHECK_SIMPLE("a\\w", false);
294
  CHECK_SIMPLE("a\\W", false);
295
  CHECK_SIMPLE("a.", false);
296
  CHECK_SIMPLE("a\\q", false);
297
  CHECK_SIMPLE("a[a]", false);
298
  CHECK_SIMPLE("a[^a]", false);
299
  CHECK_SIMPLE("a[a-z]", false);
300
  CHECK_SIMPLE("a[\\q]", false);
301
  CHECK_SIMPLE("a(?:b)", false);
302
  CHECK_SIMPLE("a(?=b)", false);
303
  CHECK_SIMPLE("a(?!b)", false);
304
  CHECK_SIMPLE("\\x60", false);
305
  CHECK_SIMPLE("\\u0060", false);
306
  CHECK_SIMPLE("\\cA", false);
307
  CHECK_SIMPLE("\\q", false);
308
  CHECK_SIMPLE("\\1112", false);
309
  CHECK_SIMPLE("\\0", false);
310
  CHECK_SIMPLE("(a)\\1", false);
311
  CHECK_SIMPLE("(?=a)?a", false);
312
  CHECK_SIMPLE("(?!a)?a\\1", false);
313
  CHECK_SIMPLE("(?:(?=a))a\\1", false);
314

    
315
  CHECK_PARSE_EQ("a{}", "'a{}'");
316
  CHECK_PARSE_EQ("a{,}", "'a{,}'");
317
  CHECK_PARSE_EQ("a{", "'a{'");
318
  CHECK_PARSE_EQ("a{z}", "'a{z}'");
319
  CHECK_PARSE_EQ("a{1z}", "'a{1z}'");
320
  CHECK_PARSE_EQ("a{12z}", "'a{12z}'");
321
  CHECK_PARSE_EQ("a{12,", "'a{12,'");
322
  CHECK_PARSE_EQ("a{12,3b", "'a{12,3b'");
323
  CHECK_PARSE_EQ("{}", "'{}'");
324
  CHECK_PARSE_EQ("{,}", "'{,}'");
325
  CHECK_PARSE_EQ("{", "'{'");
326
  CHECK_PARSE_EQ("{z}", "'{z}'");
327
  CHECK_PARSE_EQ("{1z}", "'{1z}'");
328
  CHECK_PARSE_EQ("{12z}", "'{12z}'");
329
  CHECK_PARSE_EQ("{12,", "'{12,'");
330
  CHECK_PARSE_EQ("{12,3b", "'{12,3b'");
331

    
332
  CHECK_MIN_MAX("a", 1, 1);
333
  CHECK_MIN_MAX("abc", 3, 3);
334
  CHECK_MIN_MAX("a[bc]d", 3, 3);
335
  CHECK_MIN_MAX("a|bc", 1, 2);
336
  CHECK_MIN_MAX("ab|c", 1, 2);
337
  CHECK_MIN_MAX("a||bc", 0, 2);
338
  CHECK_MIN_MAX("|", 0, 0);
339
  CHECK_MIN_MAX("(?:ab)", 2, 2);
340
  CHECK_MIN_MAX("(?:ab|cde)", 2, 3);
341
  CHECK_MIN_MAX("(?:ab)|cde", 2, 3);
342
  CHECK_MIN_MAX("(ab)", 2, 2);
343
  CHECK_MIN_MAX("(ab|cde)", 2, 3);
344
  CHECK_MIN_MAX("(ab)\\1", 2, 4);
345
  CHECK_MIN_MAX("(ab|cde)\\1", 2, 6);
346
  CHECK_MIN_MAX("(?:ab)?", 0, 2);
347
  CHECK_MIN_MAX("(?:ab)*", 0, RegExpTree::kInfinity);
348
  CHECK_MIN_MAX("(?:ab)+", 2, RegExpTree::kInfinity);
349
  CHECK_MIN_MAX("a?", 0, 1);
350
  CHECK_MIN_MAX("a*", 0, RegExpTree::kInfinity);
351
  CHECK_MIN_MAX("a+", 1, RegExpTree::kInfinity);
352
  CHECK_MIN_MAX("a??", 0, 1);
353
  CHECK_MIN_MAX("a*?", 0, RegExpTree::kInfinity);
354
  CHECK_MIN_MAX("a+?", 1, RegExpTree::kInfinity);
355
  CHECK_MIN_MAX("(?:a?)?", 0, 1);
356
  CHECK_MIN_MAX("(?:a*)?", 0, RegExpTree::kInfinity);
357
  CHECK_MIN_MAX("(?:a+)?", 0, RegExpTree::kInfinity);
358
  CHECK_MIN_MAX("(?:a?)+", 0, RegExpTree::kInfinity);
359
  CHECK_MIN_MAX("(?:a*)+", 0, RegExpTree::kInfinity);
360
  CHECK_MIN_MAX("(?:a+)+", 1, RegExpTree::kInfinity);
361
  CHECK_MIN_MAX("(?:a?)*", 0, RegExpTree::kInfinity);
362
  CHECK_MIN_MAX("(?:a*)*", 0, RegExpTree::kInfinity);
363
  CHECK_MIN_MAX("(?:a+)*", 0, RegExpTree::kInfinity);
364
  CHECK_MIN_MAX("a{0}", 0, 0);
365
  CHECK_MIN_MAX("(?:a+){0}", 0, 0);
366
  CHECK_MIN_MAX("(?:a+){0,0}", 0, 0);
367
  CHECK_MIN_MAX("a*b", 1, RegExpTree::kInfinity);
368
  CHECK_MIN_MAX("a+b", 2, RegExpTree::kInfinity);
369
  CHECK_MIN_MAX("a*b|c", 1, RegExpTree::kInfinity);
370
  CHECK_MIN_MAX("a+b|c", 1, RegExpTree::kInfinity);
371
  CHECK_MIN_MAX("(?:a{5,1000000}){3,1000000}", 15, RegExpTree::kInfinity);
372
  CHECK_MIN_MAX("(?:ab){4,7}", 8, 14);
373
  CHECK_MIN_MAX("a\\bc", 2, 2);
374
  CHECK_MIN_MAX("a\\Bc", 2, 2);
375
  CHECK_MIN_MAX("a\\sc", 3, 3);
376
  CHECK_MIN_MAX("a\\Sc", 3, 3);
377
  CHECK_MIN_MAX("a(?=b)c", 2, 2);
378
  CHECK_MIN_MAX("a(?=bbb|bb)c", 2, 2);
379
  CHECK_MIN_MAX("a(?!bbb|bb)c", 2, 2);
380
}
381

    
382

    
383
TEST(ParserRegression) {
384
  CHECK_PARSE_EQ("[A-Z$-][x]", "(! [A-Z $ -] [x])");
385
  CHECK_PARSE_EQ("a{3,4*}", "(: 'a{3,' (# 0 - g '4') '}')");
386
  CHECK_PARSE_EQ("{", "'{'");
387
  CHECK_PARSE_EQ("a|", "(| 'a' %)");
388
}
389

    
390
static void ExpectError(const char* input,
391
                        const char* expected) {
392
  V8::Initialize(NULL);
393
  v8::HandleScope scope(CcTest::isolate());
394
  Zone zone(CcTest::i_isolate());
395
  FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
396
  RegExpCompileData result;
397
  CHECK(!v8::internal::RegExpParser::ParseRegExp(
398
      &reader, false, &result, &zone));
399
  CHECK(result.tree == NULL);
400
  CHECK(!result.error.is_null());
401
  SmartArrayPointer<char> str = result.error->ToCString(ALLOW_NULLS);
402
  CHECK_EQ(expected, *str);
403
}
404

    
405

    
406
TEST(Errors) {
407
  const char* kEndBackslash = "\\ at end of pattern";
408
  ExpectError("\\", kEndBackslash);
409
  const char* kUnterminatedGroup = "Unterminated group";
410
  ExpectError("(foo", kUnterminatedGroup);
411
  const char* kInvalidGroup = "Invalid group";
412
  ExpectError("(?", kInvalidGroup);
413
  const char* kUnterminatedCharacterClass = "Unterminated character class";
414
  ExpectError("[", kUnterminatedCharacterClass);
415
  ExpectError("[a-", kUnterminatedCharacterClass);
416
  const char* kNothingToRepeat = "Nothing to repeat";
417
  ExpectError("*", kNothingToRepeat);
418
  ExpectError("?", kNothingToRepeat);
419
  ExpectError("+", kNothingToRepeat);
420
  ExpectError("{1}", kNothingToRepeat);
421
  ExpectError("{1,2}", kNothingToRepeat);
422
  ExpectError("{1,}", kNothingToRepeat);
423

    
424
  // Check that we don't allow more than kMaxCapture captures
425
  const int kMaxCaptures = 1 << 16;  // Must match RegExpParser::kMaxCaptures.
426
  const char* kTooManyCaptures = "Too many captures";
427
  HeapStringAllocator allocator;
428
  StringStream accumulator(&allocator);
429
  for (int i = 0; i <= kMaxCaptures; i++) {
430
    accumulator.Add("()");
431
  }
432
  SmartArrayPointer<const char> many_captures(accumulator.ToCString());
433
  ExpectError(*many_captures, kTooManyCaptures);
434
}
435

    
436

    
437
static bool IsDigit(uc16 c) {
438
  return ('0' <= c && c <= '9');
439
}
440

    
441

    
442
static bool NotDigit(uc16 c) {
443
  return !IsDigit(c);
444
}
445

    
446

    
447
static bool IsWhiteSpace(uc16 c) {
448
  switch (c) {
449
    case 0x09:
450
    case 0x0A:
451
    case 0x0B:
452
    case 0x0C:
453
    case 0x0d:
454
    case 0x20:
455
    case 0xA0:
456
    case 0x2028:
457
    case 0x2029:
458
    case 0xFEFF:
459
      return true;
460
    default:
461
      return unibrow::Space::Is(c);
462
  }
463
}
464

    
465

    
466
static bool NotWhiteSpace(uc16 c) {
467
  return !IsWhiteSpace(c);
468
}
469

    
470

    
471
static bool NotWord(uc16 c) {
472
  return !IsRegExpWord(c);
473
}
474

    
475

    
476
static void TestCharacterClassEscapes(uc16 c, bool (pred)(uc16 c)) {
477
  Zone zone(CcTest::i_isolate());
478
  ZoneList<CharacterRange>* ranges =
479
      new(&zone) ZoneList<CharacterRange>(2, &zone);
480
  CharacterRange::AddClassEscape(c, ranges, &zone);
481
  for (unsigned i = 0; i < (1 << 16); i++) {
482
    bool in_class = false;
483
    for (int j = 0; !in_class && j < ranges->length(); j++) {
484
      CharacterRange& range = ranges->at(j);
485
      in_class = (range.from() <= i && i <= range.to());
486
    }
487
    CHECK_EQ(pred(i), in_class);
488
  }
489
}
490

    
491

    
492
TEST(CharacterClassEscapes) {
493
  v8::internal::V8::Initialize(NULL);
494
  TestCharacterClassEscapes('.', IsRegExpNewline);
495
  TestCharacterClassEscapes('d', IsDigit);
496
  TestCharacterClassEscapes('D', NotDigit);
497
  TestCharacterClassEscapes('s', IsWhiteSpace);
498
  TestCharacterClassEscapes('S', NotWhiteSpace);
499
  TestCharacterClassEscapes('w', IsRegExpWord);
500
  TestCharacterClassEscapes('W', NotWord);
501
}
502

    
503

    
504
static RegExpNode* Compile(const char* input,
505
                           bool multiline,
506
                           bool is_ascii,
507
                           Zone* zone) {
508
  V8::Initialize(NULL);
509
  Isolate* isolate = CcTest::i_isolate();
510
  FlatStringReader reader(isolate, CStrVector(input));
511
  RegExpCompileData compile_data;
512
  if (!v8::internal::RegExpParser::ParseRegExp(&reader, multiline,
513
                                               &compile_data, zone))
514
    return NULL;
515
  Handle<String> pattern = isolate->factory()->
516
      NewStringFromUtf8(CStrVector(input));
517
  Handle<String> sample_subject =
518
      isolate->factory()->NewStringFromUtf8(CStrVector(""));
519
  RegExpEngine::Compile(&compile_data,
520
                        false,
521
                        false,
522
                        multiline,
523
                        pattern,
524
                        sample_subject,
525
                        is_ascii,
526
                        zone);
527
  return compile_data.node;
528
}
529

    
530

    
531
static void Execute(const char* input,
532
                    bool multiline,
533
                    bool is_ascii,
534
                    bool dot_output = false) {
535
  v8::HandleScope scope(CcTest::isolate());
536
  Zone zone(CcTest::i_isolate());
537
  RegExpNode* node = Compile(input, multiline, is_ascii, &zone);
538
  USE(node);
539
#ifdef DEBUG
540
  if (dot_output) {
541
    RegExpEngine::DotPrint(input, node, false);
542
    exit(0);
543
  }
544
#endif  // DEBUG
545
}
546

    
547

    
548
class TestConfig {
549
 public:
550
  typedef int Key;
551
  typedef int Value;
552
  static const int kNoKey;
553
  static int NoValue() { return 0; }
554
  static inline int Compare(int a, int b) {
555
    if (a < b)
556
      return -1;
557
    else if (a > b)
558
      return 1;
559
    else
560
      return 0;
561
  }
562
};
563

    
564

    
565
const int TestConfig::kNoKey = 0;
566

    
567

    
568
static unsigned PseudoRandom(int i, int j) {
569
  return ~(~((i * 781) ^ (j * 329)));
570
}
571

    
572

    
573
TEST(SplayTreeSimple) {
574
  v8::internal::V8::Initialize(NULL);
575
  static const unsigned kLimit = 1000;
576
  Zone zone(CcTest::i_isolate());
577
  ZoneSplayTree<TestConfig> tree(&zone);
578
  bool seen[kLimit];
579
  for (unsigned i = 0; i < kLimit; i++) seen[i] = false;
580
#define CHECK_MAPS_EQUAL() do {                                      \
581
    for (unsigned k = 0; k < kLimit; k++)                            \
582
      CHECK_EQ(seen[k], tree.Find(k, &loc));                         \
583
  } while (false)
584
  for (int i = 0; i < 50; i++) {
585
    for (int j = 0; j < 50; j++) {
586
      unsigned next = PseudoRandom(i, j) % kLimit;
587
      if (seen[next]) {
588
        // We've already seen this one.  Check the value and remove
589
        // it.
590
        ZoneSplayTree<TestConfig>::Locator loc;
591
        CHECK(tree.Find(next, &loc));
592
        CHECK_EQ(next, loc.key());
593
        CHECK_EQ(3 * next, loc.value());
594
        tree.Remove(next);
595
        seen[next] = false;
596
        CHECK_MAPS_EQUAL();
597
      } else {
598
        // Check that it wasn't there already and then add it.
599
        ZoneSplayTree<TestConfig>::Locator loc;
600
        CHECK(!tree.Find(next, &loc));
601
        CHECK(tree.Insert(next, &loc));
602
        CHECK_EQ(next, loc.key());
603
        loc.set_value(3 * next);
604
        seen[next] = true;
605
        CHECK_MAPS_EQUAL();
606
      }
607
      int val = PseudoRandom(j, i) % kLimit;
608
      if (seen[val]) {
609
        ZoneSplayTree<TestConfig>::Locator loc;
610
        CHECK(tree.FindGreatestLessThan(val, &loc));
611
        CHECK_EQ(loc.key(), val);
612
        break;
613
      }
614
      val = PseudoRandom(i + j, i - j) % kLimit;
615
      if (seen[val]) {
616
        ZoneSplayTree<TestConfig>::Locator loc;
617
        CHECK(tree.FindLeastGreaterThan(val, &loc));
618
        CHECK_EQ(loc.key(), val);
619
        break;
620
      }
621
    }
622
  }
623
}
624

    
625

    
626
TEST(DispatchTableConstruction) {
627
  v8::internal::V8::Initialize(NULL);
628
  // Initialize test data.
629
  static const int kLimit = 1000;
630
  static const int kRangeCount = 8;
631
  static const int kRangeSize = 16;
632
  uc16 ranges[kRangeCount][2 * kRangeSize];
633
  for (int i = 0; i < kRangeCount; i++) {
634
    Vector<uc16> range(ranges[i], 2 * kRangeSize);
635
    for (int j = 0; j < 2 * kRangeSize; j++) {
636
      range[j] = PseudoRandom(i + 25, j + 87) % kLimit;
637
    }
638
    range.Sort();
639
    for (int j = 1; j < 2 * kRangeSize; j++) {
640
      CHECK(range[j-1] <= range[j]);
641
    }
642
  }
643
  // Enter test data into dispatch table.
644
  Zone zone(CcTest::i_isolate());
645
  DispatchTable table(&zone);
646
  for (int i = 0; i < kRangeCount; i++) {
647
    uc16* range = ranges[i];
648
    for (int j = 0; j < 2 * kRangeSize; j += 2)
649
      table.AddRange(CharacterRange(range[j], range[j + 1]), i, &zone);
650
  }
651
  // Check that the table looks as we would expect
652
  for (int p = 0; p < kLimit; p++) {
653
    OutSet* outs = table.Get(p);
654
    for (int j = 0; j < kRangeCount; j++) {
655
      uc16* range = ranges[j];
656
      bool is_on = false;
657
      for (int k = 0; !is_on && (k < 2 * kRangeSize); k += 2)
658
        is_on = (range[k] <= p && p <= range[k + 1]);
659
      CHECK_EQ(is_on, outs->Get(j));
660
    }
661
  }
662
}
663

    
664

    
665
// Test of debug-only syntax.
666
#ifdef DEBUG
667

    
668
TEST(ParsePossessiveRepetition) {
669
  bool old_flag_value = FLAG_regexp_possessive_quantifier;
670

    
671
  // Enable possessive quantifier syntax.
672
  FLAG_regexp_possessive_quantifier = true;
673

    
674
  CHECK_PARSE_EQ("a*+", "(# 0 - p 'a')");
675
  CHECK_PARSE_EQ("a++", "(# 1 - p 'a')");
676
  CHECK_PARSE_EQ("a?+", "(# 0 1 p 'a')");
677
  CHECK_PARSE_EQ("a{10,20}+", "(# 10 20 p 'a')");
678
  CHECK_PARSE_EQ("za{10,20}+b", "(: 'z' (# 10 20 p 'a') 'b')");
679

    
680
  // Disable possessive quantifier syntax.
681
  FLAG_regexp_possessive_quantifier = false;
682

    
683
  CHECK_PARSE_ERROR("a*+");
684
  CHECK_PARSE_ERROR("a++");
685
  CHECK_PARSE_ERROR("a?+");
686
  CHECK_PARSE_ERROR("a{10,20}+");
687
  CHECK_PARSE_ERROR("a{10,20}+b");
688

    
689
  FLAG_regexp_possessive_quantifier = old_flag_value;
690
}
691

    
692
#endif
693

    
694
// Tests of interpreter.
695

    
696

    
697
#ifndef V8_INTERPRETED_REGEXP
698

    
699
#if V8_TARGET_ARCH_IA32
700
typedef RegExpMacroAssemblerIA32 ArchRegExpMacroAssembler;
701
#elif V8_TARGET_ARCH_X64
702
typedef RegExpMacroAssemblerX64 ArchRegExpMacroAssembler;
703
#elif V8_TARGET_ARCH_ARM
704
typedef RegExpMacroAssemblerARM ArchRegExpMacroAssembler;
705
#elif V8_TARGET_ARCH_MIPS
706
typedef RegExpMacroAssemblerMIPS ArchRegExpMacroAssembler;
707
#endif
708

    
709
class ContextInitializer {
710
 public:
711
  ContextInitializer()
712
      : scope_(CcTest::isolate()),
713
        env_(v8::Context::New(CcTest::isolate())) {
714
    env_->Enter();
715
  }
716
  ~ContextInitializer() {
717
    env_->Exit();
718
  }
719
 private:
720
  v8::HandleScope scope_;
721
  v8::Handle<v8::Context> env_;
722
};
723

    
724

    
725
static ArchRegExpMacroAssembler::Result Execute(Code* code,
726
                                                String* input,
727
                                                int start_offset,
728
                                                const byte* input_start,
729
                                                const byte* input_end,
730
                                                int* captures) {
731
  return NativeRegExpMacroAssembler::Execute(
732
      code,
733
      input,
734
      start_offset,
735
      input_start,
736
      input_end,
737
      captures,
738
      0,
739
      CcTest::i_isolate());
740
}
741

    
742

    
743
TEST(MacroAssemblerNativeSuccess) {
744
  v8::V8::Initialize();
745
  ContextInitializer initializer;
746
  Isolate* isolate = CcTest::i_isolate();
747
  Factory* factory = isolate->factory();
748
  Zone zone(isolate);
749

    
750
  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 4, &zone);
751

    
752
  m.Succeed();
753

    
754
  Handle<String> source = factory->NewStringFromAscii(CStrVector(""));
755
  Handle<Object> code_object = m.GetCode(source);
756
  Handle<Code> code = Handle<Code>::cast(code_object);
757

    
758
  int captures[4] = {42, 37, 87, 117};
759
  Handle<String> input = factory->NewStringFromAscii(CStrVector("foofoo"));
760
  Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
761
  const byte* start_adr =
762
      reinterpret_cast<const byte*>(seq_input->GetCharsAddress());
763

    
764
  NativeRegExpMacroAssembler::Result result =
765
      Execute(*code,
766
              *input,
767
              0,
768
              start_adr,
769
              start_adr + seq_input->length(),
770
              captures);
771

    
772
  CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
773
  CHECK_EQ(-1, captures[0]);
774
  CHECK_EQ(-1, captures[1]);
775
  CHECK_EQ(-1, captures[2]);
776
  CHECK_EQ(-1, captures[3]);
777
}
778

    
779

    
780
TEST(MacroAssemblerNativeSimple) {
781
  v8::V8::Initialize();
782
  ContextInitializer initializer;
783
  Isolate* isolate = CcTest::i_isolate();
784
  Factory* factory = isolate->factory();
785
  Zone zone(isolate);
786

    
787
  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 4, &zone);
788

    
789
  Label fail, backtrack;
790
  m.PushBacktrack(&fail);
791
  m.CheckNotAtStart(NULL);
792
  m.LoadCurrentCharacter(2, NULL);
793
  m.CheckNotCharacter('o', NULL);
794
  m.LoadCurrentCharacter(1, NULL, false);
795
  m.CheckNotCharacter('o', NULL);
796
  m.LoadCurrentCharacter(0, NULL, false);
797
  m.CheckNotCharacter('f', NULL);
798
  m.WriteCurrentPositionToRegister(0, 0);
799
  m.WriteCurrentPositionToRegister(1, 3);
800
  m.AdvanceCurrentPosition(3);
801
  m.PushBacktrack(&backtrack);
802
  m.Succeed();
803
  m.Bind(&backtrack);
804
  m.Backtrack();
805
  m.Bind(&fail);
806
  m.Fail();
807

    
808
  Handle<String> source = factory->NewStringFromAscii(CStrVector("^foo"));
809
  Handle<Object> code_object = m.GetCode(source);
810
  Handle<Code> code = Handle<Code>::cast(code_object);
811

    
812
  int captures[4] = {42, 37, 87, 117};
813
  Handle<String> input = factory->NewStringFromAscii(CStrVector("foofoo"));
814
  Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
815
  Address start_adr = seq_input->GetCharsAddress();
816

    
817
  NativeRegExpMacroAssembler::Result result =
818
      Execute(*code,
819
              *input,
820
              0,
821
              start_adr,
822
              start_adr + input->length(),
823
              captures);
824

    
825
  CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
826
  CHECK_EQ(0, captures[0]);
827
  CHECK_EQ(3, captures[1]);
828
  CHECK_EQ(-1, captures[2]);
829
  CHECK_EQ(-1, captures[3]);
830

    
831
  input = factory->NewStringFromAscii(CStrVector("barbarbar"));
832
  seq_input = Handle<SeqOneByteString>::cast(input);
833
  start_adr = seq_input->GetCharsAddress();
834

    
835
  result = Execute(*code,
836
                   *input,
837
                   0,
838
                   start_adr,
839
                   start_adr + input->length(),
840
                   captures);
841

    
842
  CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
843
}
844

    
845

    
846
TEST(MacroAssemblerNativeSimpleUC16) {
847
  v8::V8::Initialize();
848
  ContextInitializer initializer;
849
  Isolate* isolate = CcTest::i_isolate();
850
  Factory* factory = isolate->factory();
851
  Zone zone(isolate);
852

    
853
  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::UC16, 4, &zone);
854

    
855
  Label fail, backtrack;
856
  m.PushBacktrack(&fail);
857
  m.CheckNotAtStart(NULL);
858
  m.LoadCurrentCharacter(2, NULL);
859
  m.CheckNotCharacter('o', NULL);
860
  m.LoadCurrentCharacter(1, NULL, false);
861
  m.CheckNotCharacter('o', NULL);
862
  m.LoadCurrentCharacter(0, NULL, false);
863
  m.CheckNotCharacter('f', NULL);
864
  m.WriteCurrentPositionToRegister(0, 0);
865
  m.WriteCurrentPositionToRegister(1, 3);
866
  m.AdvanceCurrentPosition(3);
867
  m.PushBacktrack(&backtrack);
868
  m.Succeed();
869
  m.Bind(&backtrack);
870
  m.Backtrack();
871
  m.Bind(&fail);
872
  m.Fail();
873

    
874
  Handle<String> source = factory->NewStringFromAscii(CStrVector("^foo"));
875
  Handle<Object> code_object = m.GetCode(source);
876
  Handle<Code> code = Handle<Code>::cast(code_object);
877

    
878
  int captures[4] = {42, 37, 87, 117};
879
  const uc16 input_data[6] = {'f', 'o', 'o', 'f', 'o',
880
                              static_cast<uc16>(0x2603)};
881
  Handle<String> input =
882
      factory->NewStringFromTwoByte(Vector<const uc16>(input_data, 6));
883
  Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input);
884
  Address start_adr = seq_input->GetCharsAddress();
885

    
886
  NativeRegExpMacroAssembler::Result result =
887
      Execute(*code,
888
              *input,
889
              0,
890
              start_adr,
891
              start_adr + input->length(),
892
              captures);
893

    
894
  CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
895
  CHECK_EQ(0, captures[0]);
896
  CHECK_EQ(3, captures[1]);
897
  CHECK_EQ(-1, captures[2]);
898
  CHECK_EQ(-1, captures[3]);
899

    
900
  const uc16 input_data2[9] = {'b', 'a', 'r', 'b', 'a', 'r', 'b', 'a',
901
                               static_cast<uc16>(0x2603)};
902
  input = factory->NewStringFromTwoByte(Vector<const uc16>(input_data2, 9));
903
  seq_input = Handle<SeqTwoByteString>::cast(input);
904
  start_adr = seq_input->GetCharsAddress();
905

    
906
  result = Execute(*code,
907
                   *input,
908
                   0,
909
                   start_adr,
910
                   start_adr + input->length() * 2,
911
                   captures);
912

    
913
  CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
914
}
915

    
916

    
917
TEST(MacroAssemblerNativeBacktrack) {
918
  v8::V8::Initialize();
919
  ContextInitializer initializer;
920
  Isolate* isolate = CcTest::i_isolate();
921
  Factory* factory = isolate->factory();
922
  Zone zone(isolate);
923

    
924
  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 0, &zone);
925

    
926
  Label fail;
927
  Label backtrack;
928
  m.LoadCurrentCharacter(10, &fail);
929
  m.Succeed();
930
  m.Bind(&fail);
931
  m.PushBacktrack(&backtrack);
932
  m.LoadCurrentCharacter(10, NULL);
933
  m.Succeed();
934
  m.Bind(&backtrack);
935
  m.Fail();
936

    
937
  Handle<String> source = factory->NewStringFromAscii(CStrVector(".........."));
938
  Handle<Object> code_object = m.GetCode(source);
939
  Handle<Code> code = Handle<Code>::cast(code_object);
940

    
941
  Handle<String> input = factory->NewStringFromAscii(CStrVector("foofoo"));
942
  Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
943
  Address start_adr = seq_input->GetCharsAddress();
944

    
945
  NativeRegExpMacroAssembler::Result result =
946
      Execute(*code,
947
              *input,
948
              0,
949
              start_adr,
950
              start_adr + input->length(),
951
              NULL);
952

    
953
  CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
954
}
955

    
956

    
957
TEST(MacroAssemblerNativeBackReferenceASCII) {
958
  v8::V8::Initialize();
959
  ContextInitializer initializer;
960
  Isolate* isolate = CcTest::i_isolate();
961
  Factory* factory = isolate->factory();
962
  Zone zone(isolate);
963

    
964
  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 4, &zone);
965

    
966
  m.WriteCurrentPositionToRegister(0, 0);
967
  m.AdvanceCurrentPosition(2);
968
  m.WriteCurrentPositionToRegister(1, 0);
969
  Label nomatch;
970
  m.CheckNotBackReference(0, &nomatch);
971
  m.Fail();
972
  m.Bind(&nomatch);
973
  m.AdvanceCurrentPosition(2);
974
  Label missing_match;
975
  m.CheckNotBackReference(0, &missing_match);
976
  m.WriteCurrentPositionToRegister(2, 0);
977
  m.Succeed();
978
  m.Bind(&missing_match);
979
  m.Fail();
980

    
981
  Handle<String> source = factory->NewStringFromAscii(CStrVector("^(..)..\1"));
982
  Handle<Object> code_object = m.GetCode(source);
983
  Handle<Code> code = Handle<Code>::cast(code_object);
984

    
985
  Handle<String> input = factory->NewStringFromAscii(CStrVector("fooofo"));
986
  Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
987
  Address start_adr = seq_input->GetCharsAddress();
988

    
989
  int output[4];
990
  NativeRegExpMacroAssembler::Result result =
991
      Execute(*code,
992
              *input,
993
              0,
994
              start_adr,
995
              start_adr + input->length(),
996
              output);
997

    
998
  CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
999
  CHECK_EQ(0, output[0]);
1000
  CHECK_EQ(2, output[1]);
1001
  CHECK_EQ(6, output[2]);
1002
  CHECK_EQ(-1, output[3]);
1003
}
1004

    
1005

    
1006
TEST(MacroAssemblerNativeBackReferenceUC16) {
1007
  v8::V8::Initialize();
1008
  ContextInitializer initializer;
1009
  Isolate* isolate = CcTest::i_isolate();
1010
  Factory* factory = isolate->factory();
1011
  Zone zone(isolate);
1012

    
1013
  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::UC16, 4, &zone);
1014

    
1015
  m.WriteCurrentPositionToRegister(0, 0);
1016
  m.AdvanceCurrentPosition(2);
1017
  m.WriteCurrentPositionToRegister(1, 0);
1018
  Label nomatch;
1019
  m.CheckNotBackReference(0, &nomatch);
1020
  m.Fail();
1021
  m.Bind(&nomatch);
1022
  m.AdvanceCurrentPosition(2);
1023
  Label missing_match;
1024
  m.CheckNotBackReference(0, &missing_match);
1025
  m.WriteCurrentPositionToRegister(2, 0);
1026
  m.Succeed();
1027
  m.Bind(&missing_match);
1028
  m.Fail();
1029

    
1030
  Handle<String> source = factory->NewStringFromAscii(CStrVector("^(..)..\1"));
1031
  Handle<Object> code_object = m.GetCode(source);
1032
  Handle<Code> code = Handle<Code>::cast(code_object);
1033

    
1034
  const uc16 input_data[6] = {'f', 0x2028, 'o', 'o', 'f', 0x2028};
1035
  Handle<String> input =
1036
      factory->NewStringFromTwoByte(Vector<const uc16>(input_data, 6));
1037
  Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input);
1038
  Address start_adr = seq_input->GetCharsAddress();
1039

    
1040
  int output[4];
1041
  NativeRegExpMacroAssembler::Result result =
1042
      Execute(*code,
1043
              *input,
1044
              0,
1045
              start_adr,
1046
              start_adr + input->length() * 2,
1047
              output);
1048

    
1049
  CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1050
  CHECK_EQ(0, output[0]);
1051
  CHECK_EQ(2, output[1]);
1052
  CHECK_EQ(6, output[2]);
1053
  CHECK_EQ(-1, output[3]);
1054
}
1055

    
1056

    
1057

    
1058
TEST(MacroAssemblernativeAtStart) {
1059
  v8::V8::Initialize();
1060
  ContextInitializer initializer;
1061
  Isolate* isolate = CcTest::i_isolate();
1062
  Factory* factory = isolate->factory();
1063
  Zone zone(isolate);
1064

    
1065
  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 0, &zone);
1066

    
1067
  Label not_at_start, newline, fail;
1068
  m.CheckNotAtStart(&not_at_start);
1069
  // Check that prevchar = '\n' and current = 'f'.
1070
  m.CheckCharacter('\n', &newline);
1071
  m.Bind(&fail);
1072
  m.Fail();
1073
  m.Bind(&newline);
1074
  m.LoadCurrentCharacter(0, &fail);
1075
  m.CheckNotCharacter('f', &fail);
1076
  m.Succeed();
1077

    
1078
  m.Bind(&not_at_start);
1079
  // Check that prevchar = 'o' and current = 'b'.
1080
  Label prevo;
1081
  m.CheckCharacter('o', &prevo);
1082
  m.Fail();
1083
  m.Bind(&prevo);
1084
  m.LoadCurrentCharacter(0, &fail);
1085
  m.CheckNotCharacter('b', &fail);
1086
  m.Succeed();
1087

    
1088
  Handle<String> source = factory->NewStringFromAscii(CStrVector("(^f|ob)"));
1089
  Handle<Object> code_object = m.GetCode(source);
1090
  Handle<Code> code = Handle<Code>::cast(code_object);
1091

    
1092
  Handle<String> input = factory->NewStringFromAscii(CStrVector("foobar"));
1093
  Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
1094
  Address start_adr = seq_input->GetCharsAddress();
1095

    
1096
  NativeRegExpMacroAssembler::Result result =
1097
      Execute(*code,
1098
              *input,
1099
              0,
1100
              start_adr,
1101
              start_adr + input->length(),
1102
              NULL);
1103

    
1104
  CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1105

    
1106
  result = Execute(*code,
1107
                   *input,
1108
                   3,
1109
                   start_adr + 3,
1110
                   start_adr + input->length(),
1111
                   NULL);
1112

    
1113
  CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1114
}
1115

    
1116

    
1117
TEST(MacroAssemblerNativeBackRefNoCase) {
1118
  v8::V8::Initialize();
1119
  ContextInitializer initializer;
1120
  Isolate* isolate = CcTest::i_isolate();
1121
  Factory* factory = isolate->factory();
1122
  Zone zone(isolate);
1123

    
1124
  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 4, &zone);
1125

    
1126
  Label fail, succ;
1127

    
1128
  m.WriteCurrentPositionToRegister(0, 0);
1129
  m.WriteCurrentPositionToRegister(2, 0);
1130
  m.AdvanceCurrentPosition(3);
1131
  m.WriteCurrentPositionToRegister(3, 0);
1132
  m.CheckNotBackReferenceIgnoreCase(2, &fail);  // Match "AbC".
1133
  m.CheckNotBackReferenceIgnoreCase(2, &fail);  // Match "ABC".
1134
  Label expected_fail;
1135
  m.CheckNotBackReferenceIgnoreCase(2, &expected_fail);
1136
  m.Bind(&fail);
1137
  m.Fail();
1138

    
1139
  m.Bind(&expected_fail);
1140
  m.AdvanceCurrentPosition(3);  // Skip "xYz"
1141
  m.CheckNotBackReferenceIgnoreCase(2, &succ);
1142
  m.Fail();
1143

    
1144
  m.Bind(&succ);
1145
  m.WriteCurrentPositionToRegister(1, 0);
1146
  m.Succeed();
1147

    
1148
  Handle<String> source =
1149
      factory->NewStringFromAscii(CStrVector("^(abc)\1\1(?!\1)...(?!\1)"));
1150
  Handle<Object> code_object = m.GetCode(source);
1151
  Handle<Code> code = Handle<Code>::cast(code_object);
1152

    
1153
  Handle<String> input =
1154
      factory->NewStringFromAscii(CStrVector("aBcAbCABCxYzab"));
1155
  Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
1156
  Address start_adr = seq_input->GetCharsAddress();
1157

    
1158
  int output[4];
1159
  NativeRegExpMacroAssembler::Result result =
1160
      Execute(*code,
1161
              *input,
1162
              0,
1163
              start_adr,
1164
              start_adr + input->length(),
1165
              output);
1166

    
1167
  CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1168
  CHECK_EQ(0, output[0]);
1169
  CHECK_EQ(12, output[1]);
1170
  CHECK_EQ(0, output[2]);
1171
  CHECK_EQ(3, output[3]);
1172
}
1173

    
1174

    
1175

    
1176
TEST(MacroAssemblerNativeRegisters) {
1177
  v8::V8::Initialize();
1178
  ContextInitializer initializer;
1179
  Isolate* isolate = CcTest::i_isolate();
1180
  Factory* factory = isolate->factory();
1181
  Zone zone(isolate);
1182

    
1183
  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 6, &zone);
1184

    
1185
  uc16 foo_chars[3] = {'f', 'o', 'o'};
1186
  Vector<const uc16> foo(foo_chars, 3);
1187

    
1188
  enum registers { out1, out2, out3, out4, out5, out6, sp, loop_cnt };
1189
  Label fail;
1190
  Label backtrack;
1191
  m.WriteCurrentPositionToRegister(out1, 0);  // Output: [0]
1192
  m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck);
1193
  m.PushBacktrack(&backtrack);
1194
  m.WriteStackPointerToRegister(sp);
1195
  // Fill stack and registers
1196
  m.AdvanceCurrentPosition(2);
1197
  m.WriteCurrentPositionToRegister(out1, 0);
1198
  m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck);
1199
  m.PushBacktrack(&fail);
1200
  // Drop backtrack stack frames.
1201
  m.ReadStackPointerFromRegister(sp);
1202
  // And take the first backtrack (to &backtrack)
1203
  m.Backtrack();
1204

    
1205
  m.PushCurrentPosition();
1206
  m.AdvanceCurrentPosition(2);
1207
  m.PopCurrentPosition();
1208

    
1209
  m.Bind(&backtrack);
1210
  m.PopRegister(out1);
1211
  m.ReadCurrentPositionFromRegister(out1);
1212
  m.AdvanceCurrentPosition(3);
1213
  m.WriteCurrentPositionToRegister(out2, 0);  // [0,3]
1214

    
1215
  Label loop;
1216
  m.SetRegister(loop_cnt, 0);  // loop counter
1217
  m.Bind(&loop);
1218
  m.AdvanceRegister(loop_cnt, 1);
1219
  m.AdvanceCurrentPosition(1);
1220
  m.IfRegisterLT(loop_cnt, 3, &loop);
1221
  m.WriteCurrentPositionToRegister(out3, 0);  // [0,3,6]
1222

    
1223
  Label loop2;
1224
  m.SetRegister(loop_cnt, 2);  // loop counter
1225
  m.Bind(&loop2);
1226
  m.AdvanceRegister(loop_cnt, -1);
1227
  m.AdvanceCurrentPosition(1);
1228
  m.IfRegisterGE(loop_cnt, 0, &loop2);
1229
  m.WriteCurrentPositionToRegister(out4, 0);  // [0,3,6,9]
1230

    
1231
  Label loop3;
1232
  Label exit_loop3;
1233
  m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck);
1234
  m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck);
1235
  m.ReadCurrentPositionFromRegister(out3);
1236
  m.Bind(&loop3);
1237
  m.AdvanceCurrentPosition(1);
1238
  m.CheckGreedyLoop(&exit_loop3);
1239
  m.GoTo(&loop3);
1240
  m.Bind(&exit_loop3);
1241
  m.PopCurrentPosition();
1242
  m.WriteCurrentPositionToRegister(out5, 0);  // [0,3,6,9,9,-1]
1243

    
1244
  m.Succeed();
1245

    
1246
  m.Bind(&fail);
1247
  m.Fail();
1248

    
1249
  Handle<String> source =
1250
      factory->NewStringFromAscii(CStrVector("<loop test>"));
1251
  Handle<Object> code_object = m.GetCode(source);
1252
  Handle<Code> code = Handle<Code>::cast(code_object);
1253

    
1254
  // String long enough for test (content doesn't matter).
1255
  Handle<String> input =
1256
      factory->NewStringFromAscii(CStrVector("foofoofoofoofoo"));
1257
  Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
1258
  Address start_adr = seq_input->GetCharsAddress();
1259

    
1260
  int output[6];
1261
  NativeRegExpMacroAssembler::Result result =
1262
      Execute(*code,
1263
              *input,
1264
              0,
1265
              start_adr,
1266
              start_adr + input->length(),
1267
              output);
1268

    
1269
  CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1270
  CHECK_EQ(0, output[0]);
1271
  CHECK_EQ(3, output[1]);
1272
  CHECK_EQ(6, output[2]);
1273
  CHECK_EQ(9, output[3]);
1274
  CHECK_EQ(9, output[4]);
1275
  CHECK_EQ(-1, output[5]);
1276
}
1277

    
1278

    
1279
TEST(MacroAssemblerStackOverflow) {
1280
  v8::V8::Initialize();
1281
  ContextInitializer initializer;
1282
  Isolate* isolate = CcTest::i_isolate();
1283
  Factory* factory = isolate->factory();
1284
  Zone zone(isolate);
1285

    
1286
  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 0, &zone);
1287

    
1288
  Label loop;
1289
  m.Bind(&loop);
1290
  m.PushBacktrack(&loop);
1291
  m.GoTo(&loop);
1292

    
1293
  Handle<String> source =
1294
      factory->NewStringFromAscii(CStrVector("<stack overflow test>"));
1295
  Handle<Object> code_object = m.GetCode(source);
1296
  Handle<Code> code = Handle<Code>::cast(code_object);
1297

    
1298
  // String long enough for test (content doesn't matter).
1299
  Handle<String> input =
1300
      factory->NewStringFromAscii(CStrVector("dummy"));
1301
  Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
1302
  Address start_adr = seq_input->GetCharsAddress();
1303

    
1304
  NativeRegExpMacroAssembler::Result result =
1305
      Execute(*code,
1306
              *input,
1307
              0,
1308
              start_adr,
1309
              start_adr + input->length(),
1310
              NULL);
1311

    
1312
  CHECK_EQ(NativeRegExpMacroAssembler::EXCEPTION, result);
1313
  CHECK(isolate->has_pending_exception());
1314
  isolate->clear_pending_exception();
1315
}
1316

    
1317

    
1318
TEST(MacroAssemblerNativeLotsOfRegisters) {
1319
  v8::V8::Initialize();
1320
  ContextInitializer initializer;
1321
  Isolate* isolate = CcTest::i_isolate();
1322
  Factory* factory = isolate->factory();
1323
  Zone zone(isolate);
1324

    
1325
  ArchRegExpMacroAssembler m(NativeRegExpMacroAssembler::ASCII, 2, &zone);
1326

    
1327
  // At least 2048, to ensure the allocated space for registers
1328
  // span one full page.
1329
  const int large_number = 8000;
1330
  m.WriteCurrentPositionToRegister(large_number, 42);
1331
  m.WriteCurrentPositionToRegister(0, 0);
1332
  m.WriteCurrentPositionToRegister(1, 1);
1333
  Label done;
1334
  m.CheckNotBackReference(0, &done);  // Performs a system-stack push.
1335
  m.Bind(&done);
1336
  m.PushRegister(large_number, RegExpMacroAssembler::kNoStackLimitCheck);
1337
  m.PopRegister(1);
1338
  m.Succeed();
1339

    
1340
  Handle<String> source =
1341
      factory->NewStringFromAscii(CStrVector("<huge register space test>"));
1342
  Handle<Object> code_object = m.GetCode(source);
1343
  Handle<Code> code = Handle<Code>::cast(code_object);
1344

    
1345
  // String long enough for test (content doesn't matter).
1346
  Handle<String> input =
1347
      factory->NewStringFromAscii(CStrVector("sample text"));
1348
  Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
1349
  Address start_adr = seq_input->GetCharsAddress();
1350

    
1351
  int captures[2];
1352
  NativeRegExpMacroAssembler::Result result =
1353
      Execute(*code,
1354
              *input,
1355
              0,
1356
              start_adr,
1357
              start_adr + input->length(),
1358
              captures);
1359

    
1360
  CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1361
  CHECK_EQ(0, captures[0]);
1362
  CHECK_EQ(42, captures[1]);
1363

    
1364
  isolate->clear_pending_exception();
1365
}
1366

    
1367
#else  // V8_INTERPRETED_REGEXP
1368

    
1369
TEST(MacroAssembler) {
1370
  V8::Initialize(NULL);
1371
  byte codes[1024];
1372
  Zone zone(CcTest::i_isolate());
1373
  RegExpMacroAssemblerIrregexp m(Vector<byte>(codes, 1024), &zone);
1374
  // ^f(o)o.
1375
  Label start, fail, backtrack;
1376

    
1377
  m.SetRegister(4, 42);
1378
  m.PushRegister(4, RegExpMacroAssembler::kNoStackLimitCheck);
1379
  m.AdvanceRegister(4, 42);
1380
  m.GoTo(&start);
1381
  m.Fail();
1382
  m.Bind(&start);
1383
  m.PushBacktrack(&fail);
1384
  m.CheckNotAtStart(NULL);
1385
  m.LoadCurrentCharacter(0, NULL);
1386
  m.CheckNotCharacter('f', NULL);
1387
  m.LoadCurrentCharacter(1, NULL);
1388
  m.CheckNotCharacter('o', NULL);
1389
  m.LoadCurrentCharacter(2, NULL);
1390
  m.CheckNotCharacter('o', NULL);
1391
  m.WriteCurrentPositionToRegister(0, 0);
1392
  m.WriteCurrentPositionToRegister(1, 3);
1393
  m.WriteCurrentPositionToRegister(2, 1);
1394
  m.WriteCurrentPositionToRegister(3, 2);
1395
  m.AdvanceCurrentPosition(3);
1396
  m.PushBacktrack(&backtrack);
1397
  m.Succeed();
1398
  m.Bind(&backtrack);
1399
  m.ClearRegisters(2, 3);
1400
  m.Backtrack();
1401
  m.Bind(&fail);
1402
  m.PopRegister(0);
1403
  m.Fail();
1404

    
1405
  Isolate* isolate = CcTest::i_isolate();
1406
  Factory* factory = isolate->factory();
1407
  HandleScope scope(isolate);
1408

    
1409
  Handle<String> source = factory->NewStringFromAscii(CStrVector("^f(o)o"));
1410
  Handle<ByteArray> array = Handle<ByteArray>::cast(m.GetCode(source));
1411
  int captures[5];
1412

    
1413
  const uc16 str1[] = {'f', 'o', 'o', 'b', 'a', 'r'};
1414
  Handle<String> f1_16 =
1415
      factory->NewStringFromTwoByte(Vector<const uc16>(str1, 6));
1416

    
1417
  CHECK(IrregexpInterpreter::Match(isolate, array, f1_16, captures, 0));
1418
  CHECK_EQ(0, captures[0]);
1419
  CHECK_EQ(3, captures[1]);
1420
  CHECK_EQ(1, captures[2]);
1421
  CHECK_EQ(2, captures[3]);
1422
  CHECK_EQ(84, captures[4]);
1423

    
1424
  const uc16 str2[] = {'b', 'a', 'r', 'f', 'o', 'o'};
1425
  Handle<String> f2_16 =
1426
      factory->NewStringFromTwoByte(Vector<const uc16>(str2, 6));
1427

    
1428
  CHECK(!IrregexpInterpreter::Match(isolate, array, f2_16, captures, 0));
1429
  CHECK_EQ(42, captures[0]);
1430
}
1431

    
1432
#endif  // V8_INTERPRETED_REGEXP
1433

    
1434

    
1435
TEST(AddInverseToTable) {
1436
  v8::internal::V8::Initialize(NULL);
1437
  static const int kLimit = 1000;
1438
  static const int kRangeCount = 16;
1439
  for (int t = 0; t < 10; t++) {
1440
    Zone zone(CcTest::i_isolate());
1441
    ZoneList<CharacterRange>* ranges =
1442
        new(&zone) ZoneList<CharacterRange>(kRangeCount, &zone);
1443
    for (int i = 0; i < kRangeCount; i++) {
1444
      int from = PseudoRandom(t + 87, i + 25) % kLimit;
1445
      int to = from + (PseudoRandom(i + 87, t + 25) % (kLimit / 20));
1446
      if (to > kLimit) to = kLimit;
1447
      ranges->Add(CharacterRange(from, to), &zone);
1448
    }
1449
    DispatchTable table(&zone);
1450
    DispatchTableConstructor cons(&table, false, &zone);
1451
    cons.set_choice_index(0);
1452
    cons.AddInverse(ranges);
1453
    for (int i = 0; i < kLimit; i++) {
1454
      bool is_on = false;
1455
      for (int j = 0; !is_on && j < kRangeCount; j++)
1456
        is_on = ranges->at(j).Contains(i);
1457
      OutSet* set = table.Get(i);
1458
      CHECK_EQ(is_on, set->Get(0) == false);
1459
    }
1460
  }
1461
  Zone zone(CcTest::i_isolate());
1462
  ZoneList<CharacterRange>* ranges =
1463
      new(&zone) ZoneList<CharacterRange>(1, &zone);
1464
  ranges->Add(CharacterRange(0xFFF0, 0xFFFE), &zone);
1465
  DispatchTable table(&zone);
1466
  DispatchTableConstructor cons(&table, false, &zone);
1467
  cons.set_choice_index(0);
1468
  cons.AddInverse(ranges);
1469
  CHECK(!table.Get(0xFFFE)->Get(0));
1470
  CHECK(table.Get(0xFFFF)->Get(0));
1471
}
1472

    
1473

    
1474
static uc32 canonicalize(uc32 c) {
1475
  unibrow::uchar canon[unibrow::Ecma262Canonicalize::kMaxWidth];
1476
  int count = unibrow::Ecma262Canonicalize::Convert(c, '\0', canon, NULL);
1477
  if (count == 0) {
1478
    return c;
1479
  } else {
1480
    CHECK_EQ(1, count);
1481
    return canon[0];
1482
  }
1483
}
1484

    
1485

    
1486
TEST(LatinCanonicalize) {
1487
  unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
1488
  for (char lower = 'a'; lower <= 'z'; lower++) {
1489
    char upper = lower + ('A' - 'a');
1490
    CHECK_EQ(canonicalize(lower), canonicalize(upper));
1491
    unibrow::uchar uncanon[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1492
    int length = un_canonicalize.get(lower, '\0', uncanon);
1493
    CHECK_EQ(2, length);
1494
    CHECK_EQ(upper, uncanon[0]);
1495
    CHECK_EQ(lower, uncanon[1]);
1496
  }
1497
  for (uc32 c = 128; c < (1 << 21); c++)
1498
    CHECK_GE(canonicalize(c), 128);
1499
  unibrow::Mapping<unibrow::ToUppercase> to_upper;
1500
  // Canonicalization is only defined for the Basic Multilingual Plane.
1501
  for (uc32 c = 0; c < (1 << 16); c++) {
1502
    unibrow::uchar upper[unibrow::ToUppercase::kMaxWidth];
1503
    int length = to_upper.get(c, '\0', upper);
1504
    if (length == 0) {
1505
      length = 1;
1506
      upper[0] = c;
1507
    }
1508
    uc32 u = upper[0];
1509
    if (length > 1 || (c >= 128 && u < 128))
1510
      u = c;
1511
    CHECK_EQ(u, canonicalize(c));
1512
  }
1513
}
1514

    
1515

    
1516
static uc32 CanonRangeEnd(uc32 c) {
1517
  unibrow::uchar canon[unibrow::CanonicalizationRange::kMaxWidth];
1518
  int count = unibrow::CanonicalizationRange::Convert(c, '\0', canon, NULL);
1519
  if (count == 0) {
1520
    return c;
1521
  } else {
1522
    CHECK_EQ(1, count);
1523
    return canon[0];
1524
  }
1525
}
1526

    
1527

    
1528
TEST(RangeCanonicalization) {
1529
  // Check that we arrive at the same result when using the basic
1530
  // range canonicalization primitives as when using immediate
1531
  // canonicalization.
1532
  unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
1533
  int block_start = 0;
1534
  while (block_start <= 0xFFFF) {
1535
    uc32 block_end = CanonRangeEnd(block_start);
1536
    unsigned block_length = block_end - block_start + 1;
1537
    if (block_length > 1) {
1538
      unibrow::uchar first[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1539
      int first_length = un_canonicalize.get(block_start, '\0', first);
1540
      for (unsigned i = 1; i < block_length; i++) {
1541
        unibrow::uchar succ[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1542
        int succ_length = un_canonicalize.get(block_start + i, '\0', succ);
1543
        CHECK_EQ(first_length, succ_length);
1544
        for (int j = 0; j < succ_length; j++) {
1545
          int calc = first[j] + i;
1546
          int found = succ[j];
1547
          CHECK_EQ(calc, found);
1548
        }
1549
      }
1550
    }
1551
    block_start = block_start + block_length;
1552
  }
1553
}
1554

    
1555

    
1556
TEST(UncanonicalizeEquivalence) {
1557
  unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
1558
  unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1559
  for (int i = 0; i < (1 << 16); i++) {
1560
    int length = un_canonicalize.get(i, '\0', chars);
1561
    for (int j = 0; j < length; j++) {
1562
      unibrow::uchar chars2[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1563
      int length2 = un_canonicalize.get(chars[j], '\0', chars2);
1564
      CHECK_EQ(length, length2);
1565
      for (int k = 0; k < length; k++)
1566
        CHECK_EQ(static_cast<int>(chars[k]), static_cast<int>(chars2[k]));
1567
    }
1568
  }
1569
}
1570

    
1571

    
1572
static void TestRangeCaseIndependence(CharacterRange input,
1573
                                      Vector<CharacterRange> expected) {
1574
  Zone zone(CcTest::i_isolate());
1575
  int count = expected.length();
1576
  ZoneList<CharacterRange>* list =
1577
      new(&zone) ZoneList<CharacterRange>(count, &zone);
1578
  input.AddCaseEquivalents(list, false, &zone);
1579
  CHECK_EQ(count, list->length());
1580
  for (int i = 0; i < list->length(); i++) {
1581
    CHECK_EQ(expected[i].from(), list->at(i).from());
1582
    CHECK_EQ(expected[i].to(), list->at(i).to());
1583
  }
1584
}
1585

    
1586

    
1587
static void TestSimpleRangeCaseIndependence(CharacterRange input,
1588
                                            CharacterRange expected) {
1589
  EmbeddedVector<CharacterRange, 1> vector;
1590
  vector[0] = expected;
1591
  TestRangeCaseIndependence(input, vector);
1592
}
1593

    
1594

    
1595
TEST(CharacterRangeCaseIndependence) {
1596
  v8::internal::V8::Initialize(NULL);
1597
  TestSimpleRangeCaseIndependence(CharacterRange::Singleton('a'),
1598
                                  CharacterRange::Singleton('A'));
1599
  TestSimpleRangeCaseIndependence(CharacterRange::Singleton('z'),
1600
                                  CharacterRange::Singleton('Z'));
1601
  TestSimpleRangeCaseIndependence(CharacterRange('a', 'z'),
1602
                                  CharacterRange('A', 'Z'));
1603
  TestSimpleRangeCaseIndependence(CharacterRange('c', 'f'),
1604
                                  CharacterRange('C', 'F'));
1605
  TestSimpleRangeCaseIndependence(CharacterRange('a', 'b'),
1606
                                  CharacterRange('A', 'B'));
1607
  TestSimpleRangeCaseIndependence(CharacterRange('y', 'z'),
1608
                                  CharacterRange('Y', 'Z'));
1609
  TestSimpleRangeCaseIndependence(CharacterRange('a' - 1, 'z' + 1),
1610
                                  CharacterRange('A', 'Z'));
1611
  TestSimpleRangeCaseIndependence(CharacterRange('A', 'Z'),
1612
                                  CharacterRange('a', 'z'));
1613
  TestSimpleRangeCaseIndependence(CharacterRange('C', 'F'),
1614
                                  CharacterRange('c', 'f'));
1615
  TestSimpleRangeCaseIndependence(CharacterRange('A' - 1, 'Z' + 1),
1616
                                  CharacterRange('a', 'z'));
1617
  // Here we need to add [l-z] to complete the case independence of
1618
  // [A-Za-z] but we expect [a-z] to be added since we always add a
1619
  // whole block at a time.
1620
  TestSimpleRangeCaseIndependence(CharacterRange('A', 'k'),
1621
                                  CharacterRange('a', 'z'));
1622
}
1623

    
1624

    
1625
static bool InClass(uc16 c, ZoneList<CharacterRange>* ranges) {
1626
  if (ranges == NULL)
1627
    return false;
1628
  for (int i = 0; i < ranges->length(); i++) {
1629
    CharacterRange range = ranges->at(i);
1630
    if (range.from() <= c && c <= range.to())
1631
      return true;
1632
  }
1633
  return false;
1634
}
1635

    
1636

    
1637
TEST(CharClassDifference) {
1638
  v8::internal::V8::Initialize(NULL);
1639
  Zone zone(CcTest::i_isolate());
1640
  ZoneList<CharacterRange>* base =
1641
      new(&zone) ZoneList<CharacterRange>(1, &zone);
1642
  base->Add(CharacterRange::Everything(), &zone);
1643
  Vector<const int> overlay = CharacterRange::GetWordBounds();
1644
  ZoneList<CharacterRange>* included = NULL;
1645
  ZoneList<CharacterRange>* excluded = NULL;
1646
  CharacterRange::Split(base, overlay, &included, &excluded, &zone);
1647
  for (int i = 0; i < (1 << 16); i++) {
1648
    bool in_base = InClass(i, base);
1649
    if (in_base) {
1650
      bool in_overlay = false;
1651
      for (int j = 0; !in_overlay && j < overlay.length(); j += 2) {
1652
        if (overlay[j] <= i && i < overlay[j+1])
1653
          in_overlay = true;
1654
      }
1655
      CHECK_EQ(in_overlay, InClass(i, included));
1656
      CHECK_EQ(!in_overlay, InClass(i, excluded));
1657
    } else {
1658
      CHECK(!InClass(i, included));
1659
      CHECK(!InClass(i, excluded));
1660
    }
1661
  }
1662
}
1663

    
1664

    
1665
TEST(CanonicalizeCharacterSets) {
1666
  v8::internal::V8::Initialize(NULL);
1667
  Zone zone(CcTest::i_isolate());
1668
  ZoneList<CharacterRange>* list =
1669
      new(&zone) ZoneList<CharacterRange>(4, &zone);
1670
  CharacterSet set(list);
1671

    
1672
  list->Add(CharacterRange(10, 20), &zone);
1673
  list->Add(CharacterRange(30, 40), &zone);
1674
  list->Add(CharacterRange(50, 60), &zone);
1675
  set.Canonicalize();
1676
  ASSERT_EQ(3, list->length());
1677
  ASSERT_EQ(10, list->at(0).from());
1678
  ASSERT_EQ(20, list->at(0).to());
1679
  ASSERT_EQ(30, list->at(1).from());
1680
  ASSERT_EQ(40, list->at(1).to());
1681
  ASSERT_EQ(50, list->at(2).from());
1682
  ASSERT_EQ(60, list->at(2).to());
1683

    
1684
  list->Rewind(0);
1685
  list->Add(CharacterRange(10, 20), &zone);
1686
  list->Add(CharacterRange(50, 60), &zone);
1687
  list->Add(CharacterRange(30, 40), &zone);
1688
  set.Canonicalize();
1689
  ASSERT_EQ(3, list->length());
1690
  ASSERT_EQ(10, list->at(0).from());
1691
  ASSERT_EQ(20, list->at(0).to());
1692
  ASSERT_EQ(30, list->at(1).from());
1693
  ASSERT_EQ(40, list->at(1).to());
1694
  ASSERT_EQ(50, list->at(2).from());
1695
  ASSERT_EQ(60, list->at(2).to());
1696

    
1697
  list->Rewind(0);
1698
  list->Add(CharacterRange(30, 40), &zone);
1699
  list->Add(CharacterRange(10, 20), &zone);
1700
  list->Add(CharacterRange(25, 25), &zone);
1701
  list->Add(CharacterRange(100, 100), &zone);
1702
  list->Add(CharacterRange(1, 1), &zone);
1703
  set.Canonicalize();
1704
  ASSERT_EQ(5, list->length());
1705
  ASSERT_EQ(1, list->at(0).from());
1706
  ASSERT_EQ(1, list->at(0).to());
1707
  ASSERT_EQ(10, list->at(1).from());
1708
  ASSERT_EQ(20, list->at(1).to());
1709
  ASSERT_EQ(25, list->at(2).from());
1710
  ASSERT_EQ(25, list->at(2).to());
1711
  ASSERT_EQ(30, list->at(3).from());
1712
  ASSERT_EQ(40, list->at(3).to());
1713
  ASSERT_EQ(100, list->at(4).from());
1714
  ASSERT_EQ(100, list->at(4).to());
1715

    
1716
  list->Rewind(0);
1717
  list->Add(CharacterRange(10, 19), &zone);
1718
  list->Add(CharacterRange(21, 30), &zone);
1719
  list->Add(CharacterRange(20, 20), &zone);
1720
  set.Canonicalize();
1721
  ASSERT_EQ(1, list->length());
1722
  ASSERT_EQ(10, list->at(0).from());
1723
  ASSERT_EQ(30, list->at(0).to());
1724
}
1725

    
1726

    
1727
TEST(CharacterRangeMerge) {
1728
  v8::internal::V8::Initialize(NULL);
1729
  Zone zone(CcTest::i_isolate());
1730
  ZoneList<CharacterRange> l1(4, &zone);
1731
  ZoneList<CharacterRange> l2(4, &zone);
1732
  // Create all combinations of intersections of ranges, both singletons and
1733
  // longer.
1734

    
1735
  int offset = 0;
1736

    
1737
  // The five kinds of singleton intersections:
1738
  //     X
1739
  //   Y      - outside before
1740
  //    Y     - outside touching start
1741
  //     Y    - overlap
1742
  //      Y   - outside touching end
1743
  //       Y  - outside after
1744

    
1745
  for (int i = 0; i < 5; i++) {
1746
    l1.Add(CharacterRange::Singleton(offset + 2), &zone);
1747
    l2.Add(CharacterRange::Singleton(offset + i), &zone);
1748
    offset += 6;
1749
  }
1750

    
1751
  // The seven kinds of singleton/non-singleton intersections:
1752
  //    XXX
1753
  //  Y        - outside before
1754
  //   Y       - outside touching start
1755
  //    Y      - inside touching start
1756
  //     Y     - entirely inside
1757
  //      Y    - inside touching end
1758
  //       Y   - outside touching end
1759
  //        Y  - disjoint after
1760

    
1761
  for (int i = 0; i < 7; i++) {
1762
    l1.Add(CharacterRange::Range(offset + 2, offset + 4), &zone);
1763
    l2.Add(CharacterRange::Singleton(offset + i), &zone);
1764
    offset += 8;
1765
  }
1766

    
1767
  // The eleven kinds of non-singleton intersections:
1768
  //
1769
  //       XXXXXXXX
1770
  // YYYY                  - outside before.
1771
  //   YYYY                - outside touching start.
1772
  //     YYYY              - overlapping start
1773
  //       YYYY            - inside touching start
1774
  //         YYYY          - entirely inside
1775
  //           YYYY        - inside touching end
1776
  //             YYYY      - overlapping end
1777
  //               YYYY    - outside touching end
1778
  //                 YYYY  - outside after
1779
  //       YYYYYYYY        - identical
1780
  //     YYYYYYYYYYYY      - containing entirely.
1781

    
1782
  for (int i = 0; i < 9; i++) {
1783
    l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);  // Length 8.
1784
    l2.Add(CharacterRange::Range(offset + 2 * i, offset + 2 * i + 3), &zone);
1785
    offset += 22;
1786
  }
1787
  l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);
1788
  l2.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);
1789
  offset += 22;
1790
  l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);
1791
  l2.Add(CharacterRange::Range(offset + 4, offset + 17), &zone);
1792
  offset += 22;
1793

    
1794
  // Different kinds of multi-range overlap:
1795
  // XXXXXXXXXXXXXXXXXXXXXX         XXXXXXXXXXXXXXXXXXXXXX
1796
  //   YYYY  Y  YYYY  Y  YYYY  Y  YYYY  Y  YYYY  Y  YYYY  Y
1797

    
1798
  l1.Add(CharacterRange::Range(offset, offset + 21), &zone);
1799
  l1.Add(CharacterRange::Range(offset + 31, offset + 52), &zone);
1800
  for (int i = 0; i < 6; i++) {
1801
    l2.Add(CharacterRange::Range(offset + 2, offset + 5), &zone);
1802
    l2.Add(CharacterRange::Singleton(offset + 8), &zone);
1803
    offset += 9;
1804
  }
1805

    
1806
  ASSERT(CharacterRange::IsCanonical(&l1));
1807
  ASSERT(CharacterRange::IsCanonical(&l2));
1808

    
1809
  ZoneList<CharacterRange> first_only(4, &zone);
1810
  ZoneList<CharacterRange> second_only(4, &zone);
1811
  ZoneList<CharacterRange> both(4, &zone);
1812
}
1813

    
1814

    
1815
TEST(Graph) {
1816
  V8::Initialize(NULL);
1817
  Execute("\\b\\w+\\b", false, true, true);
1818
}