The data contained in this repository can be downloaded to your computer using one of several clients.
Please see the documentation of your version control software client for more information.

Please select the desired protocol below to get the URL.

This URL has Read-Only access.

Statistics
| Branch: | Revision:

main_repo / deps / libebb / ebb_request_parser.rl @ 40c0f755

History | View | Annotate | Download (12.7 KB)

1 40c0f755 Ryan
/* This file is part of the libebb web server library
2
 *
3
 * Copyright (c) 2008 Ryan Dahl (ry@ndahl.us)
4
 * All rights reserved.
5
 *
6
 * This parser is based on code from Zed Shaw's Mongrel.
7
 * Copyright (c) 2005 Zed A. Shaw
8
 * 
9
 * Permission is hereby granted, free of charge, to any person obtaining
10
 * a copy of this software and associated documentation files (the
11
 * "Software"), to deal in the Software without restriction, including
12
 * without limitation the rights to use, copy, modify, merge, publish,
13
 * distribute, sublicense, and/or sell copies of the Software, and to
14
 * permit persons to whom the Software is furnished to do so, subject to
15
 * the following conditions:
16
 * 
17
 * The above copyright notice and this permission notice shall be
18
 * included in all copies or substantial portions of the Software.
19
 * 
20
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24
 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25
 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26
 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 
27
 */
28
#include "ebb_request_parser.h"
29
30
#include <stdio.h>
31
#include <assert.h>
32
33
static int unhex[] = {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
34
                     ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
35
                     ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
36
                     , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
37
                     ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
38
                     ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
39
                     ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
40
                     ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
41
                     };
42
#define TRUE 1
43
#define FALSE 0
44
#define MIN(a,b) (a < b ? a : b)
45
46
#define REMAINING (pe - p)
47
#define CURRENT (parser->current_request)
48
#define CONTENT_LENGTH (parser->current_request->content_length)
49
#define CALLBACK(FOR)                               \
50
  if(parser->FOR##_mark && CURRENT->on_##FOR) {     \
51
    CURRENT->on_##FOR( CURRENT                      \
52
                , parser->FOR##_mark                \
53
                , p - parser->FOR##_mark            \
54
                );                                  \
55
 }
56
#define HEADER_CALLBACK(FOR)                        \
57
  if(parser->FOR##_mark && CURRENT->on_##FOR) {     \
58
    CURRENT->on_##FOR( CURRENT                      \
59
                , parser->FOR##_mark                \
60
                , p - parser->FOR##_mark            \
61
                , CURRENT->number_of_headers        \
62
                );                                  \
63
 }
64
#define END_REQUEST                        \
65
    if(CURRENT->on_complete)               \
66
      CURRENT->on_complete(CURRENT);       \
67
    CURRENT = NULL;
68
69
70
%%{
71
  machine ebb_request_parser;
72
73
  action mark_header_field   { parser->header_field_mark   = p; }
74
  action mark_header_value   { parser->header_value_mark   = p; }
75
  action mark_fragment       { parser->fragment_mark       = p; }
76
  action mark_query_string   { parser->query_string_mark   = p; }
77
  action mark_request_path   { parser->path_mark           = p; }
78
  action mark_request_uri    { parser->uri_mark            = p; }
79
80
  action write_field { 
81
    HEADER_CALLBACK(header_field);
82
    parser->header_field_mark = NULL;
83
  }
84
85
  action write_value {
86
    HEADER_CALLBACK(header_value);
87
    parser->header_value_mark = NULL;
88
  }
89
90
  action request_uri { 
91
    CALLBACK(uri);
92
    parser->uri_mark = NULL;
93
  }
94
95
  action fragment { 
96
    CALLBACK(fragment);
97
    parser->fragment_mark = NULL;
98
  }
99
100
  action query_string { 
101
    CALLBACK(query_string);
102
    parser->query_string_mark = NULL;
103
  }
104
105
  action request_path {
106
    CALLBACK(path);
107
    parser->path_mark = NULL;
108
  }
109
110
  action content_length {
111
    CURRENT->content_length *= 10;
112
    CURRENT->content_length += *p - '0';
113
  }
114
115
  action use_identity_encoding { CURRENT->transfer_encoding = EBB_IDENTITY; }
116
  action use_chunked_encoding { CURRENT->transfer_encoding = EBB_CHUNKED; }
117
118
  action set_keep_alive { CURRENT->keep_alive = TRUE; }
119
  action set_not_keep_alive { CURRENT->keep_alive = FALSE; }
120
121
  action expect_continue {
122
    CURRENT->expect_continue = TRUE;
123
  }
124
125
  action trailer {
126
    /* not implemenetd yet. (do requests even have trailing headers?) */
127
  }
128
129
  action version_major {
130
    CURRENT->version_major *= 10;
131
    CURRENT->version_major += *p - '0';
132
  }
133
134
  action version_minor {
135
    CURRENT->version_minor *= 10;
136
    CURRENT->version_minor += *p - '0';
137
  }
138
139
  action end_header_line {
140
    CURRENT->number_of_headers++;
141
  }
142
143
  action end_headers {
144
    if(CURRENT->on_headers_complete)
145
      CURRENT->on_headers_complete(CURRENT);
146
  }
147
148
  action add_to_chunk_size {
149
    parser->chunk_size *= 16;
150
    parser->chunk_size += unhex[(int)*p];
151
  }
152
153
  action skip_chunk_data {
154
    skip_body(&p, parser, MIN(parser->chunk_size, REMAINING));
155
    fhold; 
156
    if(parser->chunk_size > REMAINING) {
157
      fbreak;
158
    } else {
159
      fgoto chunk_end; 
160
    }
161
  }
162
163
  action end_chunked_body {
164
    END_REQUEST;
165
    fnext main;
166
  }
167
168
  action start_req {
169
    assert(CURRENT == NULL);
170
    CURRENT = parser->new_request(parser->data);
171
  }
172
173
  action body_logic {
174
    if(CURRENT->transfer_encoding == EBB_CHUNKED) {
175
      fnext ChunkedBody;
176
    } else {
177
      /* this is pretty stupid. i'd prefer to combine this with skip_chunk_data */
178
      parser->chunk_size = CURRENT->content_length;
179
      p += 1;  
180
      skip_body(&p, parser, MIN(REMAINING, CURRENT->content_length));
181
      fhold;
182
      if(parser->chunk_size > REMAINING) {
183
        fbreak;
184
      } 
185
    }
186
  }
187
188
#
189
##
190
###
191
#### HTTP/1.1 STATE MACHINE
192
###
193
##   RequestHeaders and character types are from
194
#    Zed Shaw's beautiful Mongrel parser.
195
196
  CRLF = "\r\n";
197
198
# character types
199
  CTL = (cntrl | 127);
200
  safe = ("$" | "-" | "_" | ".");
201
  extra = ("!" | "*" | "'" | "(" | ")" | ",");
202
  reserved = (";" | "/" | "?" | ":" | "@" | "&" | "=" | "+");
203
  unsafe = (CTL | " " | "\"" | "#" | "%" | "<" | ">");
204
  national = any -- (alpha | digit | reserved | extra | safe | unsafe);
205
  unreserved = (alpha | digit | safe | extra | national);
206
  escape = ("%" xdigit xdigit);
207
  uchar = (unreserved | escape);
208
  pchar = (uchar | ":" | "@" | "&" | "=" | "+");
209
  tspecials = ("(" | ")" | "<" | ">" | "@" | "," | ";" | ":" | "\\" | "\"" | "/" | "[" | "]" | "?" | "=" | "{" | "}" | " " | "\t");
210
211
# elements
212
  token = (ascii -- (CTL | tspecials));
213
  quote = "\"";
214
#  qdtext = token -- "\""; 
215
#  quoted_pair = "\" ascii;
216
#  quoted_string = "\"" (qdtext | quoted_pair )* "\"";
217
218
#  headers
219
220
  Method = ( "COPY"      %{ CURRENT->method = EBB_COPY;      }
221
           | "DELETE"    %{ CURRENT->method = EBB_DELETE;    }
222
           | "GET"       %{ CURRENT->method = EBB_GET;       }
223
           | "HEAD"      %{ CURRENT->method = EBB_HEAD;      }
224
           | "LOCK"      %{ CURRENT->method = EBB_LOCK;      }
225
           | "MKCOL"     %{ CURRENT->method = EBB_MKCOL;     }
226
           | "MOVE"      %{ CURRENT->method = EBB_MOVE;      }
227
           | "OPTIONS"   %{ CURRENT->method = EBB_OPTIONS;   }
228
           | "POST"      %{ CURRENT->method = EBB_POST;      }
229
           | "PROPFIND"  %{ CURRENT->method = EBB_PROPFIND;  }
230
           | "PROPPATCH" %{ CURRENT->method = EBB_PROPPATCH; }
231
           | "PUT"       %{ CURRENT->method = EBB_PUT;       }
232
           | "TRACE"     %{ CURRENT->method = EBB_TRACE;     }
233
           | "UNLOCK"    %{ CURRENT->method = EBB_UNLOCK;    }
234
           ); # Not allowing extension methods
235
236
  HTTP_Version = "HTTP/" digit+ $version_major "." digit+ $version_minor;
237
238
  scheme = ( alpha | digit | "+" | "-" | "." )* ;
239
  absolute_uri = (scheme ":" (uchar | reserved )*);
240
  path = ( pchar+ ( "/" pchar* )* ) ;
241
  query = ( uchar | reserved )* >mark_query_string %query_string ;
242
  param = ( pchar | "/" )* ;
243
  params = ( param ( ";" param )* ) ;
244
  rel_path = ( path? (";" params)? ) ;
245
  absolute_path = ( "/"+ rel_path ) >mark_request_path %request_path ("?" query)?;
246
  Request_URI = ( "*" | absolute_uri | absolute_path ) >mark_request_uri %request_uri;
247
  Fragment = ( uchar | reserved )* >mark_fragment %fragment;
248
249
  field_name = ( token -- ":" )+;
250
  Field_Name = field_name >mark_header_field %write_field;
251
252
  field_value = ((any - " ") any*)?;
253
  Field_Value = field_value >mark_header_value %write_value;
254
255
  hsep = ":" " "*;
256
  header = (field_name hsep field_value) :> CRLF;
257
  Header = ( ("Content-Length"i hsep digit+ $content_length)
258
           | ("Connection"i hsep 
259
               ( "Keep-Alive"i %set_keep_alive
260
               | "close"i %set_not_keep_alive
261
               )
262
             )
263
           | ("Transfer-Encoding"i %use_chunked_encoding hsep "identity" %use_identity_encoding)
264
         # | ("Expect"i hsep "100-continue"i %expect_continue)
265
         # | ("Trailer"i hsep field_value %trailer)
266
           | (Field_Name hsep Field_Value)
267
           ) :> CRLF;
268
269
  Request_Line = ( Method " " Request_URI ("#" Fragment)? " " HTTP_Version CRLF ) ;
270
  RequestHeader = Request_Line (Header %end_header_line)* :> CRLF @end_headers;
271
272
# chunked message
273
  trailing_headers = header*;
274
  #chunk_ext_val   = token | quoted_string;
275
  chunk_ext_val = token*;
276
  chunk_ext_name = token*;
277
  chunk_extension = ( ";" " "* chunk_ext_name ("=" chunk_ext_val)? )*;
278
  last_chunk = "0"+ chunk_extension CRLF;
279
  chunk_size = (xdigit* [1-9a-fA-F] xdigit*) $add_to_chunk_size;
280
  chunk_end  = CRLF;
281
  chunk_body = any >skip_chunk_data;
282
  chunk_begin = chunk_size chunk_extension CRLF;
283
  chunk = chunk_begin chunk_body chunk_end;
284
  ChunkedBody := chunk* last_chunk trailing_headers CRLF @end_chunked_body;
285
286
  Request = RequestHeader >start_req @body_logic;
287
288
  main := Request*; # sequence of requests (for keep-alive)
289
}%%
290
291
%% write data;
292
293
static void
294
skip_body(const char **p, ebb_request_parser *parser, size_t nskip) {
295
  if(CURRENT->on_body && nskip > 0) {
296
    CURRENT->on_body(CURRENT, *p, nskip);
297
  }
298
  CURRENT->body_read += nskip;
299
  parser->chunk_size -= nskip;
300
  *p += nskip;
301
  if(0 == parser->chunk_size) {
302
    parser->eating = FALSE;
303
    if(CURRENT->transfer_encoding == EBB_IDENTITY) {
304
      END_REQUEST;
305
    }
306
  } else {
307
    parser->eating = TRUE;
308
  }
309
}
310
311
void ebb_request_parser_init(ebb_request_parser *parser) 
312
{
313
  int cs = 0;
314
  %% write init;
315
  parser->cs = cs;
316
317
  parser->chunk_size = 0;
318
  parser->eating = 0;
319
  
320
  parser->current_request = NULL;
321
322
  parser->header_field_mark = parser->header_value_mark   = 
323
  parser->query_string_mark = parser->path_mark           = 
324
  parser->uri_mark          = parser->fragment_mark       = NULL;
325
326
  parser->new_request = NULL;
327
}
328
329
330
/** exec **/
331
size_t ebb_request_parser_execute(ebb_request_parser *parser, const char *buffer, size_t len)
332
{
333
  const char *p, *pe;
334
  int cs = parser->cs;
335
336
  assert(parser->new_request && "undefined callback");
337
338
  p = buffer;
339
  pe = buffer+len;
340
341
  if(0 < parser->chunk_size && parser->eating) {
342
    /* eat body */
343
    size_t eat = MIN(len, parser->chunk_size);
344
    skip_body(&p, parser, eat);
345
  } 
346
347
  if(parser->header_field_mark)   parser->header_field_mark   = buffer;
348
  if(parser->header_value_mark)   parser->header_value_mark   = buffer;
349
  if(parser->fragment_mark)       parser->fragment_mark       = buffer;
350
  if(parser->query_string_mark)   parser->query_string_mark   = buffer;
351
  if(parser->path_mark)           parser->path_mark           = buffer;
352
  if(parser->uri_mark)            parser->uri_mark            = buffer;
353
354
  %% write exec;
355
356
  parser->cs = cs;
357
358
  HEADER_CALLBACK(header_field);
359
  HEADER_CALLBACK(header_value);
360
  CALLBACK(fragment);
361
  CALLBACK(query_string);
362
  CALLBACK(path);
363
  CALLBACK(uri);
364
365
  assert(p <= pe && "buffer overflow after parsing execute");
366
367
  return(p - buffer);
368
}
369
370
int ebb_request_parser_has_error(ebb_request_parser *parser) 
371
{
372
  return parser->cs == ebb_request_parser_error;
373
}
374
375
int ebb_request_parser_is_finished(ebb_request_parser *parser) 
376
{
377
  return parser->cs == ebb_request_parser_first_final;
378
}
379
380
void ebb_request_init(ebb_request *request)
381
{
382
  request->expect_continue = FALSE;
383
  request->body_read = 0;
384
  request->content_length = 0;
385
  request->version_major = 0;
386
  request->version_minor = 0;
387
  request->number_of_headers = 0;
388
  request->transfer_encoding = EBB_IDENTITY;
389
  request->keep_alive = -1;
390
391
  request->on_complete = NULL;
392
  request->on_headers_complete = NULL;
393
  request->on_body = NULL;
394
  request->on_header_field = NULL;
395
  request->on_header_value = NULL;
396
  request->on_uri = NULL;
397
  request->on_fragment = NULL;
398
  request->on_path = NULL;
399
  request->on_query_string = NULL;
400
}
401
402
int ebb_request_should_keep_alive(ebb_request *request)
403
{
404
  if(request->keep_alive == -1)
405
    if(request->version_major == 1)
406
      return (request->version_minor != 0);
407
    else if(request->version_major == 0)
408
      return FALSE;
409
    else
410
      return TRUE;
411
  else
412
    return request->keep_alive;
413
}