The data contained in this repository can be downloaded to your computer using one of several clients.
Please see the documentation of your version control software client for more information.

Please select the desired protocol below to get the URL.

This URL has Read-Only access.

Statistics
| Branch: | Revision:

main_repo / deps / v8 / src / regexp.js @ f230a1cf

History | View | Annotate | Download (17.3 KB)

1
// Copyright 2012 the V8 project authors. All rights reserved.
2
// Redistribution and use in source and binary forms, with or without
3
// modification, are permitted provided that the following conditions are
4
// met:
5
//
6
//     * Redistributions of source code must retain the above copyright
7
//       notice, this list of conditions and the following disclaimer.
8
//     * Redistributions in binary form must reproduce the above
9
//       copyright notice, this list of conditions and the following
10
//       disclaimer in the documentation and/or other materials provided
11
//       with the distribution.
12
//     * Neither the name of Google Inc. nor the names of its
13
//       contributors may be used to endorse or promote products derived
14
//       from this software without specific prior written permission.
15
//
16
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27

    
28
// This file relies on the fact that the following declaration has been made
29
// in runtime.js:
30
// var $Object = global.Object;
31
// var $Array = global.Array;
32

    
33
var $RegExp = global.RegExp;
34

    
35
// -------------------------------------------------------------------
36

    
37
// A recursive descent parser for Patterns according to the grammar of
38
// ECMA-262 15.10.1, with deviations noted below.
39
function DoConstructRegExp(object, pattern, flags) {
40
  // RegExp : Called as constructor; see ECMA-262, section 15.10.4.
41
  if (IS_REGEXP(pattern)) {
42
    if (!IS_UNDEFINED(flags)) {
43
      throw MakeTypeError('regexp_flags', []);
44
    }
45
    flags = (pattern.global ? 'g' : '')
46
        + (pattern.ignoreCase ? 'i' : '')
47
        + (pattern.multiline ? 'm' : '');
48
    pattern = pattern.source;
49
  }
50

    
51
  pattern = IS_UNDEFINED(pattern) ? '' : ToString(pattern);
52
  flags = IS_UNDEFINED(flags) ? '' : ToString(flags);
53

    
54
  var global = false;
55
  var ignoreCase = false;
56
  var multiline = false;
57
  for (var i = 0; i < flags.length; i++) {
58
    var c = %_CallFunction(flags, i, StringCharAt);
59
    switch (c) {
60
      case 'g':
61
        if (global) {
62
          throw MakeSyntaxError("invalid_regexp_flags", [flags]);
63
        }
64
        global = true;
65
        break;
66
      case 'i':
67
        if (ignoreCase) {
68
          throw MakeSyntaxError("invalid_regexp_flags", [flags]);
69
        }
70
        ignoreCase = true;
71
        break;
72
      case 'm':
73
        if (multiline) {
74
          throw MakeSyntaxError("invalid_regexp_flags", [flags]);
75
        }
76
        multiline = true;
77
        break;
78
      default:
79
        throw MakeSyntaxError("invalid_regexp_flags", [flags]);
80
    }
81
  }
82

    
83
  %RegExpInitializeObject(object, pattern, global, ignoreCase, multiline);
84

    
85
  // Call internal function to compile the pattern.
86
  %RegExpCompile(object, pattern, flags);
87
}
88

    
89

    
90
function RegExpConstructor(pattern, flags) {
91
  if (%_IsConstructCall()) {
92
    DoConstructRegExp(this, pattern, flags);
93
  } else {
94
    // RegExp : Called as function; see ECMA-262, section 15.10.3.1.
95
    if (IS_REGEXP(pattern) && IS_UNDEFINED(flags)) {
96
      return pattern;
97
    }
98
    return new $RegExp(pattern, flags);
99
  }
100
}
101

    
102
// Deprecated RegExp.prototype.compile method.  We behave like the constructor
103
// were called again.  In SpiderMonkey, this method returns the regexp object.
104
// In JSC, it returns undefined.  For compatibility with JSC, we match their
105
// behavior.
106
function RegExpCompile(pattern, flags) {
107
  // Both JSC and SpiderMonkey treat a missing pattern argument as the
108
  // empty subject string, and an actual undefined value passed as the
109
  // pattern as the string 'undefined'.  Note that JSC is inconsistent
110
  // here, treating undefined values differently in
111
  // RegExp.prototype.compile and in the constructor, where they are
112
  // the empty string.  For compatibility with JSC, we match their
113
  // behavior.
114
  if (this == $RegExp.prototype) {
115
    // We don't allow recompiling RegExp.prototype.
116
    throw MakeTypeError('incompatible_method_receiver',
117
                        ['RegExp.prototype.compile', this]);
118
  }
119
  if (IS_UNDEFINED(pattern) && %_ArgumentsLength() != 0) {
120
    DoConstructRegExp(this, 'undefined', flags);
121
  } else {
122
    DoConstructRegExp(this, pattern, flags);
123
  }
124
}
125

    
126

    
127
function DoRegExpExec(regexp, string, index) {
128
  var result = %_RegExpExec(regexp, string, index, lastMatchInfo);
129
  if (result !== null) lastMatchInfoOverride = null;
130
  return result;
131
}
132

    
133

    
134
function BuildResultFromMatchInfo(lastMatchInfo, s) {
135
  var numResults = NUMBER_OF_CAPTURES(lastMatchInfo) >> 1;
136
  var start = lastMatchInfo[CAPTURE0];
137
  var end = lastMatchInfo[CAPTURE1];
138
  var result = %_RegExpConstructResult(numResults, start, s);
139
  result[0] = %_SubString(s, start, end);
140
  var j = REGEXP_FIRST_CAPTURE + 2;
141
  for (var i = 1; i < numResults; i++) {
142
    start = lastMatchInfo[j++];
143
    if (start != -1) {
144
      end = lastMatchInfo[j];
145
      result[i] = %_SubString(s, start, end);
146
    }
147
    j++;
148
  }
149
  return result;
150
}
151

    
152

    
153
function RegExpExecNoTests(regexp, string, start) {
154
  // Must be called with RegExp, string and positive integer as arguments.
155
  var matchInfo = %_RegExpExec(regexp, string, start, lastMatchInfo);
156
  if (matchInfo !== null) {
157
    lastMatchInfoOverride = null;
158
    return BuildResultFromMatchInfo(matchInfo, string);
159
  }
160
  regexp.lastIndex = 0;
161
  return null;
162
}
163

    
164

    
165
function RegExpExec(string) {
166
  if (!IS_REGEXP(this)) {
167
    throw MakeTypeError('incompatible_method_receiver',
168
                        ['RegExp.prototype.exec', this]);
169
  }
170

    
171
  string = TO_STRING_INLINE(string);
172
  var lastIndex = this.lastIndex;
173

    
174
  // Conversion is required by the ES5 specification (RegExp.prototype.exec
175
  // algorithm, step 5) even if the value is discarded for non-global RegExps.
176
  var i = TO_INTEGER(lastIndex);
177

    
178
  var global = this.global;
179
  if (global) {
180
    if (i < 0 || i > string.length) {
181
      this.lastIndex = 0;
182
      return null;
183
    }
184
  } else {
185
    i = 0;
186
  }
187

    
188
  %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [this, string, lastIndex]);
189
  // matchIndices is either null or the lastMatchInfo array.
190
  var matchIndices = %_RegExpExec(this, string, i, lastMatchInfo);
191

    
192
  if (IS_NULL(matchIndices)) {
193
    this.lastIndex = 0;
194
    return null;
195
  }
196

    
197
  // Successful match.
198
  lastMatchInfoOverride = null;
199
  if (global) {
200
    this.lastIndex = lastMatchInfo[CAPTURE1];
201
  }
202
  return BuildResultFromMatchInfo(matchIndices, string);
203
}
204

    
205

    
206
// One-element cache for the simplified test regexp.
207
var regexp_key;
208
var regexp_val;
209

    
210
// Section 15.10.6.3 doesn't actually make sense, but the intention seems to be
211
// that test is defined in terms of String.prototype.exec. However, it probably
212
// means the original value of String.prototype.exec, which is what everybody
213
// else implements.
214
function RegExpTest(string) {
215
  if (!IS_REGEXP(this)) {
216
    throw MakeTypeError('incompatible_method_receiver',
217
                        ['RegExp.prototype.test', this]);
218
  }
219
  string = TO_STRING_INLINE(string);
220

    
221
  var lastIndex = this.lastIndex;
222

    
223
  // Conversion is required by the ES5 specification (RegExp.prototype.exec
224
  // algorithm, step 5) even if the value is discarded for non-global RegExps.
225
  var i = TO_INTEGER(lastIndex);
226

    
227
  if (this.global) {
228
    if (i < 0 || i > string.length) {
229
      this.lastIndex = 0;
230
      return false;
231
    }
232
    %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [this, string, lastIndex]);
233
    // matchIndices is either null or the lastMatchInfo array.
234
    var matchIndices = %_RegExpExec(this, string, i, lastMatchInfo);
235
    if (IS_NULL(matchIndices)) {
236
      this.lastIndex = 0;
237
      return false;
238
    }
239
    lastMatchInfoOverride = null;
240
    this.lastIndex = lastMatchInfo[CAPTURE1];
241
    return true;
242
  } else {
243
    // Non-global regexp.
244
    // Remove irrelevant preceeding '.*' in a non-global test regexp.
245
    // The expression checks whether this.source starts with '.*' and
246
    // that the third char is not a '?'.
247
    var regexp = this;
248
    if (%_StringCharCodeAt(regexp.source, 0) == 46 &&  // '.'
249
        %_StringCharCodeAt(regexp.source, 1) == 42 &&  // '*'
250
        %_StringCharCodeAt(regexp.source, 2) != 63) {  // '?'
251
      regexp = TrimRegExp(regexp);
252
    }
253
    %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [regexp, string, lastIndex]);
254
    // matchIndices is either null or the lastMatchInfo array.
255
    var matchIndices = %_RegExpExec(regexp, string, 0, lastMatchInfo);
256
    if (IS_NULL(matchIndices)) {
257
      this.lastIndex = 0;
258
      return false;
259
    }
260
    lastMatchInfoOverride = null;
261
    return true;
262
  }
263
}
264

    
265
function TrimRegExp(regexp) {
266
  if (!%_ObjectEquals(regexp_key, regexp)) {
267
    regexp_key = regexp;
268
    regexp_val =
269
      new $RegExp(%_SubString(regexp.source, 2, regexp.source.length),
270
                  (regexp.ignoreCase ? regexp.multiline ? "im" : "i"
271
                                     : regexp.multiline ? "m" : ""));
272
  }
273
  return regexp_val;
274
}
275

    
276

    
277
function RegExpToString() {
278
  if (!IS_REGEXP(this)) {
279
    throw MakeTypeError('incompatible_method_receiver',
280
                        ['RegExp.prototype.toString', this]);
281
  }
282
  var result = '/' + this.source + '/';
283
  if (this.global) result += 'g';
284
  if (this.ignoreCase) result += 'i';
285
  if (this.multiline) result += 'm';
286
  return result;
287
}
288

    
289

    
290
// Getters for the static properties lastMatch, lastParen, leftContext, and
291
// rightContext of the RegExp constructor.  The properties are computed based
292
// on the captures array of the last successful match and the subject string
293
// of the last successful match.
294
function RegExpGetLastMatch() {
295
  if (lastMatchInfoOverride !== null) {
296
    return OVERRIDE_MATCH(lastMatchInfoOverride);
297
  }
298
  var regExpSubject = LAST_SUBJECT(lastMatchInfo);
299
  return %_SubString(regExpSubject,
300
                     lastMatchInfo[CAPTURE0],
301
                     lastMatchInfo[CAPTURE1]);
302
}
303

    
304

    
305
function RegExpGetLastParen() {
306
  if (lastMatchInfoOverride) {
307
    var override = lastMatchInfoOverride;
308
    if (override.length <= 3) return '';
309
    return override[override.length - 3];
310
  }
311
  var length = NUMBER_OF_CAPTURES(lastMatchInfo);
312
  if (length <= 2) return '';  // There were no captures.
313
  // We match the SpiderMonkey behavior: return the substring defined by the
314
  // last pair (after the first pair) of elements of the capture array even if
315
  // it is empty.
316
  var regExpSubject = LAST_SUBJECT(lastMatchInfo);
317
  var start = lastMatchInfo[CAPTURE(length - 2)];
318
  var end = lastMatchInfo[CAPTURE(length - 1)];
319
  if (start != -1 && end != -1) {
320
    return %_SubString(regExpSubject, start, end);
321
  }
322
  return "";
323
}
324

    
325

    
326
function RegExpGetLeftContext() {
327
  var start_index;
328
  var subject;
329
  if (!lastMatchInfoOverride) {
330
    start_index = lastMatchInfo[CAPTURE0];
331
    subject = LAST_SUBJECT(lastMatchInfo);
332
  } else {
333
    var override = lastMatchInfoOverride;
334
    start_index = OVERRIDE_POS(override);
335
    subject = OVERRIDE_SUBJECT(override);
336
  }
337
  return %_SubString(subject, 0, start_index);
338
}
339

    
340

    
341
function RegExpGetRightContext() {
342
  var start_index;
343
  var subject;
344
  if (!lastMatchInfoOverride) {
345
    start_index = lastMatchInfo[CAPTURE1];
346
    subject = LAST_SUBJECT(lastMatchInfo);
347
  } else {
348
    var override = lastMatchInfoOverride;
349
    subject = OVERRIDE_SUBJECT(override);
350
    var match = OVERRIDE_MATCH(override);
351
    start_index = OVERRIDE_POS(override) + match.length;
352
  }
353
  return %_SubString(subject, start_index, subject.length);
354
}
355

    
356

    
357
// The properties $1..$9 are the first nine capturing substrings of the last
358
// successful match, or ''.  The function RegExpMakeCaptureGetter will be
359
// called with indices from 1 to 9.
360
function RegExpMakeCaptureGetter(n) {
361
  return function() {
362
    if (lastMatchInfoOverride) {
363
      if (n < lastMatchInfoOverride.length - 2) {
364
        return OVERRIDE_CAPTURE(lastMatchInfoOverride, n);
365
      }
366
      return '';
367
    }
368
    var index = n * 2;
369
    if (index >= NUMBER_OF_CAPTURES(lastMatchInfo)) return '';
370
    var matchStart = lastMatchInfo[CAPTURE(index)];
371
    var matchEnd = lastMatchInfo[CAPTURE(index + 1)];
372
    if (matchStart == -1 || matchEnd == -1) return '';
373
    return %_SubString(LAST_SUBJECT(lastMatchInfo), matchStart, matchEnd);
374
  };
375
}
376

    
377

    
378
// Property of the builtins object for recording the result of the last
379
// regexp match.  The property lastMatchInfo includes the matchIndices
380
// array of the last successful regexp match (an array of start/end index
381
// pairs for the match and all the captured substrings), the invariant is
382
// that there are at least two capture indeces.  The array also contains
383
// the subject string for the last successful match.
384
var lastMatchInfo = new InternalPackedArray(
385
    2,                 // REGEXP_NUMBER_OF_CAPTURES
386
    "",                // Last subject.
387
    UNDEFINED,         // Last input - settable with RegExpSetInput.
388
    0,                 // REGEXP_FIRST_CAPTURE + 0
389
    0                  // REGEXP_FIRST_CAPTURE + 1
390
);
391

    
392
// Override last match info with an array of actual substrings.
393
// Used internally by replace regexp with function.
394
// The array has the format of an "apply" argument for a replacement
395
// function.
396
var lastMatchInfoOverride = null;
397

    
398
// -------------------------------------------------------------------
399

    
400
function SetUpRegExp() {
401
  %CheckIsBootstrapping();
402
  %FunctionSetInstanceClassName($RegExp, 'RegExp');
403
  %SetProperty($RegExp.prototype, 'constructor', $RegExp, DONT_ENUM);
404
  %SetCode($RegExp, RegExpConstructor);
405

    
406
  InstallFunctions($RegExp.prototype, DONT_ENUM, $Array(
407
    "exec", RegExpExec,
408
    "test", RegExpTest,
409
    "toString", RegExpToString,
410
    "compile", RegExpCompile
411
  ));
412

    
413
  // The length of compile is 1 in SpiderMonkey.
414
  %FunctionSetLength($RegExp.prototype.compile, 1);
415

    
416
  // The properties input, $input, and $_ are aliases for each other.  When this
417
  // value is set the value it is set to is coerced to a string.
418
  // Getter and setter for the input.
419
  var RegExpGetInput = function() {
420
    var regExpInput = LAST_INPUT(lastMatchInfo);
421
    return IS_UNDEFINED(regExpInput) ? "" : regExpInput;
422
  };
423
  var RegExpSetInput = function(string) {
424
    LAST_INPUT(lastMatchInfo) = ToString(string);
425
  };
426

    
427
  %OptimizeObjectForAddingMultipleProperties($RegExp, 22);
428
  %DefineOrRedefineAccessorProperty($RegExp, 'input', RegExpGetInput,
429
                                    RegExpSetInput, DONT_DELETE);
430
  %DefineOrRedefineAccessorProperty($RegExp, '$_', RegExpGetInput,
431
                                    RegExpSetInput, DONT_ENUM | DONT_DELETE);
432
  %DefineOrRedefineAccessorProperty($RegExp, '$input', RegExpGetInput,
433
                                    RegExpSetInput, DONT_ENUM | DONT_DELETE);
434

    
435
  // The properties multiline and $* are aliases for each other.  When this
436
  // value is set in SpiderMonkey, the value it is set to is coerced to a
437
  // boolean.  We mimic that behavior with a slight difference: in SpiderMonkey
438
  // the value of the expression 'RegExp.multiline = null' (for instance) is the
439
  // boolean false (i.e., the value after coercion), while in V8 it is the value
440
  // null (i.e., the value before coercion).
441

    
442
  // Getter and setter for multiline.
443
  var multiline = false;
444
  var RegExpGetMultiline = function() { return multiline; };
445
  var RegExpSetMultiline = function(flag) { multiline = flag ? true : false; };
446

    
447
  %DefineOrRedefineAccessorProperty($RegExp, 'multiline', RegExpGetMultiline,
448
                                    RegExpSetMultiline, DONT_DELETE);
449
  %DefineOrRedefineAccessorProperty($RegExp, '$*', RegExpGetMultiline,
450
                                    RegExpSetMultiline,
451
                                    DONT_ENUM | DONT_DELETE);
452

    
453

    
454
  var NoOpSetter = function(ignored) {};
455

    
456

    
457
  // Static properties set by a successful match.
458
  %DefineOrRedefineAccessorProperty($RegExp, 'lastMatch', RegExpGetLastMatch,
459
                                    NoOpSetter, DONT_DELETE);
460
  %DefineOrRedefineAccessorProperty($RegExp, '$&', RegExpGetLastMatch,
461
                                    NoOpSetter, DONT_ENUM | DONT_DELETE);
462
  %DefineOrRedefineAccessorProperty($RegExp, 'lastParen', RegExpGetLastParen,
463
                                    NoOpSetter, DONT_DELETE);
464
  %DefineOrRedefineAccessorProperty($RegExp, '$+', RegExpGetLastParen,
465
                                    NoOpSetter, DONT_ENUM | DONT_DELETE);
466
  %DefineOrRedefineAccessorProperty($RegExp, 'leftContext',
467
                                    RegExpGetLeftContext, NoOpSetter,
468
                                    DONT_DELETE);
469
  %DefineOrRedefineAccessorProperty($RegExp, '$`', RegExpGetLeftContext,
470
                                    NoOpSetter, DONT_ENUM | DONT_DELETE);
471
  %DefineOrRedefineAccessorProperty($RegExp, 'rightContext',
472
                                    RegExpGetRightContext, NoOpSetter,
473
                                    DONT_DELETE);
474
  %DefineOrRedefineAccessorProperty($RegExp, "$'", RegExpGetRightContext,
475
                                    NoOpSetter, DONT_ENUM | DONT_DELETE);
476

    
477
  for (var i = 1; i < 10; ++i) {
478
    %DefineOrRedefineAccessorProperty($RegExp, '$' + i,
479
                                      RegExpMakeCaptureGetter(i), NoOpSetter,
480
                                      DONT_DELETE);
481
  }
482
  %ToFastProperties($RegExp);
483
}
484

    
485
SetUpRegExp();