xref: /PHP-8.4/sapi/cli/php_http_parser.c (revision d1ccb5bd)
1 /* Copyright 2009,2010 Ryan Dahl <ry@tinyclouds.org>
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining a copy
4  * of this software and associated documentation files (the "Software"), to
5  * deal in the Software without restriction, including without limitation the
6  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
7  * sell copies of the Software, and to permit persons to whom the Software is
8  * furnished to do so, subject to the following conditions:
9  *
10  * The above copyright notice and this permission notice shall be included in
11  * all copies or substantial portions of the Software.
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19  * IN THE SOFTWARE.
20  */
21 #include <assert.h>
22 #include <stddef.h>
23 #include "php_http_parser.h"
24 
25 
26 #ifndef MIN
27 # define MIN(a,b) ((a) < (b) ? (a) : (b))
28 #endif
29 
30 
31 #define CALLBACK2(FOR)                                               \
32 do {                                                                 \
33   if (settings->on_##FOR) {                                          \
34     if (0 != settings->on_##FOR(parser)) return (p - data);          \
35   }                                                                  \
36 } while (0)
37 
38 
39 #define MARK(FOR)                                                    \
40 do {                                                                 \
41   FOR##_mark = p;                                                    \
42 } while (0)
43 
44 #define CALLBACK_NOCLEAR(FOR)                                        \
45 do {                                                                 \
46   if (FOR##_mark) {                                                  \
47     if (settings->on_##FOR) {                                        \
48       if (0 != settings->on_##FOR(parser,                            \
49                                  FOR##_mark,                         \
50                                  p - FOR##_mark))                    \
51       {                                                              \
52         return (p - data);                                           \
53       }                                                              \
54     }                                                                \
55   }                                                                  \
56 } while (0)
57 
58 #ifdef PHP_WIN32
59 # undef CALLBACK
60 #endif
61 #define CALLBACK(FOR)                                                \
62 do {                                                                 \
63   CALLBACK_NOCLEAR(FOR);                                             \
64   FOR##_mark = NULL;                                                 \
65 } while (0)
66 
67 
68 #define PROXY_CONNECTION "proxy-connection"
69 #define CONNECTION "connection"
70 #define CONTENT_LENGTH "content-length"
71 #define TRANSFER_ENCODING "transfer-encoding"
72 #define UPGRADE "upgrade"
73 #define CHUNKED "chunked"
74 #define KEEP_ALIVE "keep-alive"
75 #define CLOSE "close"
76 
77 
78 static const char *method_strings[] =
79   { "DELETE"
80   , "GET"
81   , "HEAD"
82   , "POST"
83   , "PUT"
84   , "PATCH"
85   , "CONNECT"
86   , "OPTIONS"
87   , "TRACE"
88   , "COPY"
89   , "LOCK"
90   , "MKCOL"
91   , "MOVE"
92   , "MKCALENDAR"
93   , "PROPFIND"
94   , "PROPPATCH"
95   , "SEARCH"
96   , "UNLOCK"
97   , "REPORT"
98   , "MKACTIVITY"
99   , "CHECKOUT"
100   , "MERGE"
101   , "M-SEARCH"
102   , "NOTIFY"
103   , "SUBSCRIBE"
104   , "UNSUBSCRIBE"
105   , "NOTIMPLEMENTED"
106   };
107 
108 
109 /* Tokens as defined by rfc 2616. Also lowercases them.
110  *        token       = 1*<any CHAR except CTLs or separators>
111  *     separators     = "(" | ")" | "<" | ">" | "@"
112  *                    | "," | ";" | ":" | "\" | <">
113  *                    | "/" | "[" | "]" | "?" | "="
114  *                    | "{" | "}" | SP | HT
115  */
116 static const char tokens[256] = {
117 /*   0 nul    1 soh    2 stx    3 etx    4 eot    5 enq    6 ack    7 bel  */
118         0,       0,       0,       0,       0,       0,       0,       0,
119 /*   8 bs     9 ht    10 nl    11 vt    12 np    13 cr    14 so    15 si   */
120         0,       0,       0,       0,       0,       0,       0,       0,
121 /*  16 dle   17 dc1   18 dc2   19 dc3   20 dc4   21 nak   22 syn   23 etb */
122         0,       0,       0,       0,       0,       0,       0,       0,
123 /*  24 can   25 em    26 sub   27 esc   28 fs    29 gs    30 rs    31 us  */
124         0,       0,       0,       0,       0,       0,       0,       0,
125 /*  32 sp    33  !    34  "    35  #    36  $    37  %    38  &    39  '  */
126        ' ',      '!',     '"',     '#',     '$',     '%',     '&',    '\'',
127 /*  40  (    41  )    42  *    43  +    44  ,    45  -    46  .    47  /  */
128         0,       0,      '*',     '+',      0,      '-',     '.',     '/',
129 /*  48  0    49  1    50  2    51  3    52  4    53  5    54  6    55  7  */
130        '0',     '1',     '2',     '3',     '4',     '5',     '6',     '7',
131 /*  56  8    57  9    58  :    59  ;    60  <    61  =    62  >    63  ?  */
132        '8',     '9',      0,       0,       0,       0,       0,       0,
133 /*  64  @    65  A    66  B    67  C    68  D    69  E    70  F    71  G  */
134         0,      'a',     'b',     'c',     'd',     'e',     'f',     'g',
135 /*  72  H    73  I    74  J    75  K    76  L    77  M    78  N    79  O  */
136        'h',     'i',     'j',     'k',     'l',     'm',     'n',     'o',
137 /*  80  P    81  Q    82  R    83  S    84  T    85  U    86  V    87  W  */
138        'p',     'q',     'r',     's',     't',     'u',     'v',     'w',
139 /*  88  X    89  Y    90  Z    91  [    92  \    93  ]    94  ^    95  _  */
140        'x',     'y',     'z',      0,       0,       0,      '^',     '_',
141 /*  96  `    97  a    98  b    99  c   100  d   101  e   102  f   103  g  */
142        '`',     'a',     'b',     'c',     'd',     'e',     'f',     'g',
143 /* 104  h   105  i   106  j   107  k   108  l   109  m   110  n   111  o  */
144        'h',     'i',     'j',     'k',     'l',     'm',     'n',     'o',
145 /* 112  p   113  q   114  r   115  s   116  t   117  u   118  v   119  w  */
146        'p',     'q',     'r',     's',     't',     'u',     'v',     'w',
147 /* 120  x   121  y   122  z   123  {   124  |   125  }   126  ~   127 del */
148        'x',     'y',     'z',      0,      '|',     '}',     '~',       0 };
149 
150 
151 static const int8_t unhex[256] =
152   {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
153   ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
154   ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
155   , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
156   ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
157   ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
158   ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
159   ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
160   };
161 
162 
163 static const uint8_t normal_url_char[256] = {
164 /*   0 nul    1 soh    2 stx    3 etx    4 eot    5 enq    6 ack    7 bel  */
165         0,       0,       0,       0,       0,       0,       0,       0,
166 /*   8 bs     9 ht    10 nl    11 vt    12 np    13 cr    14 so    15 si   */
167         0,       0,       0,       0,       0,       0,       0,       0,
168 /*  16 dle   17 dc1   18 dc2   19 dc3   20 dc4   21 nak   22 syn   23 etb */
169         0,       0,       0,       0,       0,       0,       0,       0,
170 /*  24 can   25 em    26 sub   27 esc   28 fs    29 gs    30 rs    31 us  */
171         0,       0,       0,       0,       0,       0,       0,       0,
172 /*  32 sp    33  !    34  "    35  #    36  $    37  %    38  &    39  '  */
173         0,       1,       1,       0,       1,       1,       1,       1,
174 /*  40  (    41  )    42  *    43  +    44  ,    45  -    46  .    47  /  */
175         1,       1,       1,       1,       1,       1,       1,       1,
176 /*  48  0    49  1    50  2    51  3    52  4    53  5    54  6    55  7  */
177         1,       1,       1,       1,       1,       1,       1,       1,
178 /*  56  8    57  9    58  :    59  ;    60  <    61  =    62  >    63  ?  */
179         1,       1,       1,       1,       1,       1,       1,       0,
180 /*  64  @    65  A    66  B    67  C    68  D    69  E    70  F    71  G  */
181         1,       1,       1,       1,       1,       1,       1,       1,
182 /*  72  H    73  I    74  J    75  K    76  L    77  M    78  N    79  O  */
183         1,       1,       1,       1,       1,       1,       1,       1,
184 /*  80  P    81  Q    82  R    83  S    84  T    85  U    86  V    87  W  */
185         1,       1,       1,       1,       1,       1,       1,       1,
186 /*  88  X    89  Y    90  Z    91  [    92  \    93  ]    94  ^    95  _  */
187         1,       1,       1,       1,       1,       1,       1,       1,
188 /*  96  `    97  a    98  b    99  c   100  d   101  e   102  f   103  g  */
189         1,       1,       1,       1,       1,       1,       1,       1,
190 /* 104  h   105  i   106  j   107  k   108  l   109  m   110  n   111  o  */
191         1,       1,       1,       1,       1,       1,       1,       1,
192 /* 112  p   113  q   114  r   115  s   116  t   117  u   118  v   119  w  */
193         1,       1,       1,       1,       1,       1,       1,       1,
194 /* 120  x   121  y   122  z   123  {   124  |   125  }   126  ~   127 del */
195         1,       1,       1,       1,       1,       1,       1,       0 };
196 
197 
198 #define PARSING_HEADER(state) (state <= s_headers_almost_done && 0 == (parser->flags & F_TRAILING))
199 
200 
201 enum header_states
202   { h_general = 0
203   , h_C
204   , h_CO
205   , h_CON
206 
207   , h_matching_connection
208   , h_matching_proxy_connection
209   , h_matching_content_length
210   , h_matching_transfer_encoding
211   , h_matching_upgrade
212 
213   , h_connection
214   , h_content_length
215   , h_transfer_encoding
216   , h_upgrade
217 
218   , h_matching_transfer_encoding_chunked
219   , h_matching_connection_keep_alive
220   , h_matching_connection_close
221 
222   , h_transfer_encoding_chunked
223   , h_connection_keep_alive
224   , h_connection_close
225   };
226 
227 
228 enum flags
229   { F_CHUNKED               = 1 << 0
230   , F_CONNECTION_KEEP_ALIVE = 1 << 1
231   , F_CONNECTION_CLOSE      = 1 << 2
232   , F_TRAILING              = 1 << 3
233   , F_UPGRADE               = 1 << 4
234   , F_SKIPBODY              = 1 << 5
235   };
236 
237 
238 #define CR '\r'
239 #define LF '\n'
240 #define LOWER(c) (unsigned char)(c | 0x20)
241 #define TOKEN(c) tokens[(unsigned char)c]
242 
243 
244 #define start_state (parser->type == PHP_HTTP_REQUEST ? s_start_req : s_start_res)
245 
246 
247 #ifdef HTTP_PARSER_STRICT
248 # define STRICT_CHECK(cond) if (cond) goto error
249 # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
250 #else
251 # define STRICT_CHECK(cond)
252 # define NEW_MESSAGE() start_state
253 #endif
254 
255 
php_http_parser_execute(php_http_parser * parser,const php_http_parser_settings * settings,const char * data,size_t len)256 size_t php_http_parser_execute (php_http_parser *parser,
257                             const php_http_parser_settings *settings,
258                             const char *data,
259                             size_t len)
260 {
261   char ch;
262   signed char c;
263   const char *p = data, *pe;
264   size_t to_read;
265 
266   enum state state = (enum state) parser->state;
267   enum header_states header_state = (enum header_states) parser->header_state;
268   uint32_t index = parser->index;
269   uint32_t nread = parser->nread;
270 
271   /* technically we could combine all of these (except for url_mark) into one
272      variable, saving stack space, but it seems more clear to have them
273      separated. */
274   const char *header_field_mark = 0;
275   const char *header_value_mark = 0;
276   const char *fragment_mark = 0;
277   const char *query_string_mark = 0;
278   const char *path_mark = 0;
279   const char *url_mark = 0;
280 
281   if (len == 0) {
282     if (state == s_body_identity_eof) {
283       CALLBACK2(message_complete);
284     }
285     return 0;
286   }
287 
288   if (state == s_header_field)
289     header_field_mark = data;
290   if (state == s_header_value)
291     header_value_mark = data;
292   if (state == s_req_fragment)
293     fragment_mark = data;
294   if (state == s_req_query_string)
295     query_string_mark = data;
296   if (state == s_req_path)
297     path_mark = data;
298   if (state == s_req_path || state == s_req_schema || state == s_req_schema_slash
299       || state == s_req_schema_slash_slash || state == s_req_port
300       || state == s_req_query_string_start || state == s_req_query_string
301       || state == s_req_host
302       || state == s_req_fragment_start || state == s_req_fragment)
303     url_mark = data;
304 
305   for (p=data, pe=data+len; p != pe; p++) {
306     ch = *p;
307 
308     if (PARSING_HEADER(state)) {
309       ++nread;
310       /* Buffer overflow attack */
311       if (nread > PHP_HTTP_MAX_HEADER_SIZE) goto error;
312     }
313 
314     switch (state) {
315 
316       case s_dead:
317         /* this state is used after a 'Connection: close' message
318          * the parser will error out if it reads another message
319          */
320         goto error;
321 
322       case s_start_req_or_res:
323       {
324         if (ch == CR || ch == LF)
325           break;
326         parser->flags = 0;
327         parser->content_length = -1;
328 
329         CALLBACK2(message_begin);
330 
331         if (ch == 'H')
332           state = s_res_or_resp_H;
333         else {
334           parser->type = PHP_HTTP_REQUEST;
335           goto start_req_method_assign;
336         }
337         break;
338       }
339 
340       case s_res_or_resp_H:
341         if (ch == 'T') {
342           parser->type = PHP_HTTP_RESPONSE;
343           state = s_res_HT;
344         } else {
345           if (ch != 'E') goto error;
346           parser->type = PHP_HTTP_REQUEST;
347           parser->method = PHP_HTTP_HEAD;
348           index = 2;
349           state = s_req_method;
350         }
351         break;
352 
353       case s_start_res:
354       {
355         parser->flags = 0;
356         parser->content_length = -1;
357 
358         CALLBACK2(message_begin);
359 
360         switch (ch) {
361           case 'H':
362             state = s_res_H;
363             break;
364 
365           case CR:
366           case LF:
367             break;
368 
369           default:
370             goto error;
371         }
372         break;
373       }
374 
375       case s_res_H:
376         STRICT_CHECK(ch != 'T');
377         state = s_res_HT;
378         break;
379 
380       case s_res_HT:
381         STRICT_CHECK(ch != 'T');
382         state = s_res_HTT;
383         break;
384 
385       case s_res_HTT:
386         STRICT_CHECK(ch != 'P');
387         state = s_res_HTTP;
388         break;
389 
390       case s_res_HTTP:
391         STRICT_CHECK(ch != '/');
392         state = s_res_first_http_major;
393         break;
394 
395       case s_res_first_http_major:
396         if (ch < '1' || ch > '9') goto error;
397         parser->http_major = ch - '0';
398         state = s_res_http_major;
399         break;
400 
401       /* major HTTP version or dot */
402       case s_res_http_major:
403       {
404         if (ch == '.') {
405           state = s_res_first_http_minor;
406           break;
407         }
408 
409         if (ch < '0' || ch > '9') goto error;
410 
411         parser->http_major *= 10;
412         parser->http_major += ch - '0';
413 
414         if (parser->http_major > 999) goto error;
415         break;
416       }
417 
418       /* first digit of minor HTTP version */
419       case s_res_first_http_minor:
420         if (ch < '0' || ch > '9') goto error;
421         parser->http_minor = ch - '0';
422         state = s_res_http_minor;
423         break;
424 
425       /* minor HTTP version or end of request line */
426       case s_res_http_minor:
427       {
428         if (ch == ' ') {
429           state = s_res_first_status_code;
430           break;
431         }
432 
433         if (ch < '0' || ch > '9') goto error;
434 
435         parser->http_minor *= 10;
436         parser->http_minor += ch - '0';
437 
438         if (parser->http_minor > 999) goto error;
439         break;
440       }
441 
442       case s_res_first_status_code:
443       {
444         if (ch < '0' || ch > '9') {
445           if (ch == ' ') {
446             break;
447           }
448           goto error;
449         }
450         parser->status_code = ch - '0';
451         state = s_res_status_code;
452         break;
453       }
454 
455       case s_res_status_code:
456       {
457         if (ch < '0' || ch > '9') {
458           switch (ch) {
459             case ' ':
460               state = s_res_status;
461               break;
462             case CR:
463               state = s_res_line_almost_done;
464               break;
465             case LF:
466               state = s_header_field_start;
467               break;
468             default:
469               goto error;
470           }
471           break;
472         }
473 
474         parser->status_code *= 10;
475         parser->status_code += ch - '0';
476 
477         if (parser->status_code > 999) goto error;
478         break;
479       }
480 
481       case s_res_status:
482         /* the human readable status. e.g. "NOT FOUND"
483          * we are not humans so just ignore this */
484         if (ch == CR) {
485           state = s_res_line_almost_done;
486           break;
487         }
488 
489         if (ch == LF) {
490           state = s_header_field_start;
491           break;
492         }
493         break;
494 
495       case s_res_line_almost_done:
496         STRICT_CHECK(ch != LF);
497         state = s_header_field_start;
498         break;
499 
500       case s_start_req:
501       {
502         if (ch == CR || ch == LF)
503           break;
504         parser->flags = 0;
505         parser->content_length = -1;
506 
507         CALLBACK2(message_begin);
508 
509         if (ch < 'A' || 'Z' < ch) goto error;
510 
511       start_req_method_assign:
512         parser->method = (enum php_http_method) 0;
513         index = 1;
514         switch (ch) {
515           case 'C': parser->method = PHP_HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
516           case 'D': parser->method = PHP_HTTP_DELETE; break;
517           case 'G': parser->method = PHP_HTTP_GET; break;
518           case 'H': parser->method = PHP_HTTP_HEAD; break;
519           case 'L': parser->method = PHP_HTTP_LOCK; break;
520           case 'M': parser->method = PHP_HTTP_MKCOL; /* or MOVE, MKCALENDAR, MKACTIVITY, MERGE, M-SEARCH */ break;
521           case 'N': parser->method = PHP_HTTP_NOTIFY; break;
522           case 'O': parser->method = PHP_HTTP_OPTIONS; break;
523           case 'P': parser->method = PHP_HTTP_POST; /* or PROPFIND or PROPPATCH or PUT */ break;
524           case 'R': parser->method = PHP_HTTP_REPORT; break;
525           case 'S': parser->method = PHP_HTTP_SUBSCRIBE; /* or SEARCH */ break;
526           case 'T': parser->method = PHP_HTTP_TRACE; break;
527           case 'U': parser->method = PHP_HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
528           default: parser->method = PHP_HTTP_NOT_IMPLEMENTED; break;
529         }
530         state = s_req_method;
531         break;
532       }
533       case s_req_method:
534       {
535         const char *matcher;
536         if (ch == '\0')
537           goto error;
538 
539         matcher = method_strings[parser->method];
540         if (ch == ' ') {
541           if (parser->method != PHP_HTTP_NOT_IMPLEMENTED && matcher[index] != '\0') {
542             parser->method = PHP_HTTP_NOT_IMPLEMENTED;
543           }
544           state = s_req_spaces_before_url;
545         } else if (parser->method == PHP_HTTP_NOT_IMPLEMENTED || ch == matcher[index]) {
546           ; /* nada */
547         } else if (parser->method == PHP_HTTP_CONNECT) {
548           if (index == 1 && ch == 'H') {
549             parser->method = PHP_HTTP_CHECKOUT;
550           } else if (index == 2  && ch == 'P') {
551             parser->method = PHP_HTTP_COPY;
552           } else {
553             parser->method = PHP_HTTP_NOT_IMPLEMENTED;
554           }
555         } else if (parser->method == PHP_HTTP_MKCOL) {
556           if (index == 1 && ch == 'O') {
557             parser->method = PHP_HTTP_MOVE;
558           } else if (index == 3 && ch == 'A') {
559             parser->method = PHP_HTTP_MKCALENDAR;
560           } else if (index == 1 && ch == 'E') {
561             parser->method = PHP_HTTP_MERGE;
562           } else if (index == 1 && ch == '-') {
563             parser->method = PHP_HTTP_MSEARCH;
564           } else if (index == 2 && ch == 'A') {
565             parser->method = PHP_HTTP_MKACTIVITY;
566           } else {
567             parser->method = PHP_HTTP_NOT_IMPLEMENTED;
568           }
569         } else if (index == 1 && parser->method == PHP_HTTP_POST && ch == 'R') {
570           parser->method = PHP_HTTP_PROPFIND; /* or HTTP_PROPPATCH */
571         } else if (index == 1 && parser->method == PHP_HTTP_POST && ch == 'U') {
572           parser->method = PHP_HTTP_PUT;
573         } else if (index == 1 && parser->method == PHP_HTTP_POST && ch == 'A') {
574           parser->method = PHP_HTTP_PATCH;
575         } else if (index == 1 && parser->method == PHP_HTTP_SUBSCRIBE && ch == 'E') {
576           parser->method = PHP_HTTP_SEARCH;
577         } else if (index == 2 && parser->method == PHP_HTTP_UNLOCK && ch == 'S') {
578           parser->method = PHP_HTTP_UNSUBSCRIBE;
579         } else if (index == 4 && parser->method == PHP_HTTP_PROPFIND && ch == 'P') {
580           parser->method = PHP_HTTP_PROPPATCH;
581         } else {
582           parser->method = PHP_HTTP_NOT_IMPLEMENTED;
583         }
584 
585         ++index;
586         break;
587       }
588       case s_req_spaces_before_url:
589       {
590         if (ch == ' ') break;
591 
592         if (ch == '/' || ch == '*') {
593           MARK(url);
594           MARK(path);
595           state = s_req_path;
596           break;
597         }
598 
599         c = LOWER(ch);
600 
601         if (c >= 'a' && c <= 'z') {
602           MARK(url);
603           state = s_req_schema;
604           break;
605         }
606 
607         goto error;
608       }
609 
610       case s_req_schema:
611       {
612         c = LOWER(ch);
613 
614         if (c >= 'a' && c <= 'z') break;
615 
616         if (ch == ':') {
617           state = s_req_schema_slash;
618           break;
619         } else if (ch == '.') {
620           state = s_req_host;
621           break;
622         } else if ('0' <= ch && ch <= '9') {
623           state = s_req_host;
624           break;
625         }
626 
627         goto error;
628       }
629 
630       case s_req_schema_slash:
631         STRICT_CHECK(ch != '/');
632         state = s_req_schema_slash_slash;
633         break;
634 
635       case s_req_schema_slash_slash:
636         STRICT_CHECK(ch != '/');
637         state = s_req_host;
638         break;
639 
640       case s_req_host:
641       {
642         c = LOWER(ch);
643         if (c >= 'a' && c <= 'z') break;
644         if ((ch >= '0' && ch <= '9') || ch == '.' || ch == '-') break;
645         switch (ch) {
646           case ':':
647             state = s_req_port;
648             break;
649           case '/':
650             MARK(path);
651             state = s_req_path;
652             break;
653           case ' ':
654             /* The request line looks like:
655              *   "GET http://foo.bar.com HTTP/1.1"
656              * That is, there is no path.
657              */
658             CALLBACK(url);
659             state = s_req_http_start;
660             break;
661           default:
662             goto error;
663         }
664         break;
665       }
666 
667       case s_req_port:
668       {
669         if (ch >= '0' && ch <= '9') break;
670         switch (ch) {
671           case '/':
672             MARK(path);
673             state = s_req_path;
674             break;
675           case ' ':
676             /* The request line looks like:
677              *   "GET http://foo.bar.com:1234 HTTP/1.1"
678              * That is, there is no path.
679              */
680             CALLBACK(url);
681             state = s_req_http_start;
682             break;
683           default:
684             goto error;
685         }
686         break;
687       }
688 
689       case s_req_path:
690       {
691         if (normal_url_char[(unsigned char)ch]) break;
692 
693         switch (ch) {
694           case ' ':
695             CALLBACK(url);
696             CALLBACK(path);
697             state = s_req_http_start;
698             break;
699           case CR:
700             CALLBACK(url);
701             CALLBACK(path);
702             parser->http_major = 0;
703             parser->http_minor = 9;
704             state = s_req_line_almost_done;
705             break;
706           case LF:
707             CALLBACK(url);
708             CALLBACK(path);
709             parser->http_major = 0;
710             parser->http_minor = 9;
711             state = s_header_field_start;
712             break;
713           case '?':
714             CALLBACK(path);
715             state = s_req_query_string_start;
716             break;
717           case '#':
718             CALLBACK(path);
719             state = s_req_fragment_start;
720             break;
721           default:
722             goto error;
723         }
724         break;
725       }
726 
727       case s_req_query_string_start:
728       {
729         if (normal_url_char[(unsigned char)ch]) {
730           MARK(query_string);
731           state = s_req_query_string;
732           break;
733         }
734 
735         switch (ch) {
736           case '?':
737             break; /* XXX ignore extra '?' ... is this right? */
738           case ' ':
739             CALLBACK(url);
740             state = s_req_http_start;
741             break;
742           case CR:
743             CALLBACK(url);
744             parser->http_major = 0;
745             parser->http_minor = 9;
746             state = s_req_line_almost_done;
747             break;
748           case LF:
749             CALLBACK(url);
750             parser->http_major = 0;
751             parser->http_minor = 9;
752             state = s_header_field_start;
753             break;
754           case '#':
755             state = s_req_fragment_start;
756             break;
757           default:
758             goto error;
759         }
760         break;
761       }
762 
763       case s_req_query_string:
764       {
765         if (normal_url_char[(unsigned char)ch]) break;
766 
767         switch (ch) {
768           case '?':
769             /* allow extra '?' in query string */
770             break;
771           case ' ':
772             CALLBACK(url);
773             CALLBACK(query_string);
774             state = s_req_http_start;
775             break;
776           case CR:
777             CALLBACK(url);
778             CALLBACK(query_string);
779             parser->http_major = 0;
780             parser->http_minor = 9;
781             state = s_req_line_almost_done;
782             break;
783           case LF:
784             CALLBACK(url);
785             CALLBACK(query_string);
786             parser->http_major = 0;
787             parser->http_minor = 9;
788             state = s_header_field_start;
789             break;
790           case '#':
791             CALLBACK(query_string);
792             state = s_req_fragment_start;
793             break;
794           default:
795             goto error;
796         }
797         break;
798       }
799 
800       case s_req_fragment_start:
801       {
802         if (normal_url_char[(unsigned char)ch]) {
803           MARK(fragment);
804           state = s_req_fragment;
805           break;
806         }
807 
808         switch (ch) {
809           case ' ':
810             CALLBACK(url);
811             state = s_req_http_start;
812             break;
813           case CR:
814             CALLBACK(url);
815             parser->http_major = 0;
816             parser->http_minor = 9;
817             state = s_req_line_almost_done;
818             break;
819           case LF:
820             CALLBACK(url);
821             parser->http_major = 0;
822             parser->http_minor = 9;
823             state = s_header_field_start;
824             break;
825           case '?':
826             MARK(fragment);
827             state = s_req_fragment;
828             break;
829           case '#':
830             break;
831           default:
832             goto error;
833         }
834         break;
835       }
836 
837       case s_req_fragment:
838       {
839         if (normal_url_char[(unsigned char)ch]) break;
840 
841         switch (ch) {
842           case ' ':
843             CALLBACK(url);
844             CALLBACK(fragment);
845             state = s_req_http_start;
846             break;
847           case CR:
848             CALLBACK(url);
849             CALLBACK(fragment);
850             parser->http_major = 0;
851             parser->http_minor = 9;
852             state = s_req_line_almost_done;
853             break;
854           case LF:
855             CALLBACK(url);
856             CALLBACK(fragment);
857             parser->http_major = 0;
858             parser->http_minor = 9;
859             state = s_header_field_start;
860             break;
861           case '?':
862           case '#':
863             break;
864           default:
865             goto error;
866         }
867         break;
868       }
869 
870       case s_req_http_start:
871         switch (ch) {
872           case 'H':
873             state = s_req_http_H;
874             break;
875           case ' ':
876             break;
877           default:
878             goto error;
879         }
880         break;
881 
882       case s_req_http_H:
883         STRICT_CHECK(ch != 'T');
884         state = s_req_http_HT;
885         break;
886 
887       case s_req_http_HT:
888         STRICT_CHECK(ch != 'T');
889         state = s_req_http_HTT;
890         break;
891 
892       case s_req_http_HTT:
893         STRICT_CHECK(ch != 'P');
894         state = s_req_http_HTTP;
895         break;
896 
897       case s_req_http_HTTP:
898         STRICT_CHECK(ch != '/');
899         state = s_req_first_http_major;
900         break;
901 
902       /* first digit of major HTTP version */
903       case s_req_first_http_major:
904         if (ch < '1' || ch > '9') goto error;
905         parser->http_major = ch - '0';
906         state = s_req_http_major;
907         break;
908 
909       /* major HTTP version or dot */
910       case s_req_http_major:
911       {
912         if (ch == '.') {
913           state = s_req_first_http_minor;
914           break;
915         }
916 
917         if (ch < '0' || ch > '9') goto error;
918 
919         parser->http_major *= 10;
920         parser->http_major += ch - '0';
921 
922         if (parser->http_major > 999) goto error;
923         break;
924       }
925 
926       /* first digit of minor HTTP version */
927       case s_req_first_http_minor:
928         if (ch < '0' || ch > '9') goto error;
929         parser->http_minor = ch - '0';
930         state = s_req_http_minor;
931         break;
932 
933       /* minor HTTP version or end of request line */
934       case s_req_http_minor:
935       {
936         if (ch == CR) {
937           state = s_req_line_almost_done;
938           break;
939         }
940 
941         if (ch == LF) {
942           state = s_header_field_start;
943           break;
944         }
945 
946         /* XXX allow spaces after digit? */
947 
948         if (ch < '0' || ch > '9') goto error;
949 
950         parser->http_minor *= 10;
951         parser->http_minor += ch - '0';
952 
953         if (parser->http_minor > 999) goto error;
954         break;
955       }
956 
957       /* end of request line */
958       case s_req_line_almost_done:
959       {
960         if (ch != LF) goto error;
961         state = s_header_field_start;
962         break;
963       }
964 
965       case s_header_field_start:
966       {
967         if (ch == CR) {
968           state = s_headers_almost_done;
969           break;
970         }
971 
972         if (ch == LF) {
973           /* they might be just sending \n instead of \r\n so this would be
974            * the second \n to denote the end of headers*/
975           state = s_headers_almost_done;
976           goto headers_almost_done;
977         }
978 
979         c = TOKEN(ch);
980 
981         if (!c) goto error;
982 
983         MARK(header_field);
984 
985         index = 0;
986         state = s_header_field;
987 
988         switch (c) {
989           case 'c':
990             header_state = h_C;
991             break;
992 
993           case 'p':
994             header_state = h_matching_proxy_connection;
995             break;
996 
997           case 't':
998             header_state = h_matching_transfer_encoding;
999             break;
1000 
1001           case 'u':
1002             header_state = h_matching_upgrade;
1003             break;
1004 
1005           default:
1006             header_state = h_general;
1007             break;
1008         }
1009         break;
1010       }
1011 
1012       case s_header_field:
1013       {
1014         c = TOKEN(ch);
1015 
1016         if (c) {
1017           switch (header_state) {
1018             case h_general:
1019               break;
1020 
1021             case h_C:
1022               index++;
1023               header_state = (c == 'o' ? h_CO : h_general);
1024               break;
1025 
1026             case h_CO:
1027               index++;
1028               header_state = (c == 'n' ? h_CON : h_general);
1029               break;
1030 
1031             case h_CON:
1032               index++;
1033               switch (c) {
1034                 case 'n':
1035                   header_state = h_matching_connection;
1036                   break;
1037                 case 't':
1038                   header_state = h_matching_content_length;
1039                   break;
1040                 default:
1041                   header_state = h_general;
1042                   break;
1043               }
1044               break;
1045 
1046             /* connection */
1047 
1048             case h_matching_connection:
1049               index++;
1050               if (index > sizeof(CONNECTION)-1
1051                   || c != CONNECTION[index]) {
1052                 header_state = h_general;
1053               } else if (index == sizeof(CONNECTION)-2) {
1054                 header_state = h_connection;
1055               }
1056               break;
1057 
1058             /* proxy-connection */
1059 
1060             case h_matching_proxy_connection:
1061               index++;
1062               if (index > sizeof(PROXY_CONNECTION)-1
1063                   || c != PROXY_CONNECTION[index]) {
1064                 header_state = h_general;
1065               } else if (index == sizeof(PROXY_CONNECTION)-2) {
1066                 header_state = h_connection;
1067               }
1068               break;
1069 
1070             /* content-length */
1071 
1072             case h_matching_content_length:
1073               index++;
1074               if (index > sizeof(CONTENT_LENGTH)-1
1075                   || c != CONTENT_LENGTH[index]) {
1076                 header_state = h_general;
1077               } else if (index == sizeof(CONTENT_LENGTH)-2) {
1078                 header_state = h_content_length;
1079               }
1080               break;
1081 
1082             /* transfer-encoding */
1083 
1084             case h_matching_transfer_encoding:
1085               index++;
1086               if (index > sizeof(TRANSFER_ENCODING)-1
1087                   || c != TRANSFER_ENCODING[index]) {
1088                 header_state = h_general;
1089               } else if (index == sizeof(TRANSFER_ENCODING)-2) {
1090                 header_state = h_transfer_encoding;
1091               }
1092               break;
1093 
1094             /* upgrade */
1095 
1096             case h_matching_upgrade:
1097               index++;
1098               if (index > sizeof(UPGRADE)-1
1099                   || c != UPGRADE[index]) {
1100                 header_state = h_general;
1101               } else if (index == sizeof(UPGRADE)-2) {
1102                 header_state = h_upgrade;
1103               }
1104               break;
1105 
1106             case h_connection:
1107             case h_content_length:
1108             case h_transfer_encoding:
1109             case h_upgrade:
1110               if (ch != ' ') header_state = h_general;
1111               break;
1112 
1113             default:
1114               assert(0 && "Unknown header_state");
1115               break;
1116           }
1117           break;
1118         }
1119 
1120         if (ch == ':') {
1121           CALLBACK(header_field);
1122           state = s_header_value_start;
1123           break;
1124         }
1125 
1126         if (ch == CR) {
1127           state = s_header_almost_done;
1128           CALLBACK(header_field);
1129           break;
1130         }
1131 
1132         if (ch == LF) {
1133           CALLBACK(header_field);
1134           state = s_header_field_start;
1135           break;
1136         }
1137 
1138         goto error;
1139       }
1140 
1141       case s_header_value_start:
1142       {
1143         if (ch == ' ') break;
1144 
1145         MARK(header_value);
1146 
1147         state = s_header_value;
1148         index = 0;
1149 
1150         c = LOWER(ch);
1151 
1152         if (ch == CR) {
1153           CALLBACK(header_value);
1154           header_state = h_general;
1155           state = s_header_almost_done;
1156           break;
1157         }
1158 
1159         if (ch == LF) {
1160           CALLBACK(header_value);
1161           state = s_header_field_start;
1162           break;
1163         }
1164 
1165         switch (header_state) {
1166           case h_upgrade:
1167             parser->flags |= F_UPGRADE;
1168             header_state = h_general;
1169             break;
1170 
1171           case h_transfer_encoding:
1172             /* looking for 'Transfer-Encoding: chunked' */
1173             if ('c' == c) {
1174               header_state = h_matching_transfer_encoding_chunked;
1175             } else {
1176               header_state = h_general;
1177             }
1178             break;
1179 
1180           case h_content_length:
1181             if (ch < '0' || ch > '9') goto error;
1182             parser->content_length = ch - '0';
1183             break;
1184 
1185           case h_connection:
1186             /* looking for 'Connection: keep-alive' */
1187             if (c == 'k') {
1188               header_state = h_matching_connection_keep_alive;
1189             /* looking for 'Connection: close' */
1190             } else if (c == 'c') {
1191               header_state = h_matching_connection_close;
1192             } else {
1193               header_state = h_general;
1194             }
1195             break;
1196 
1197           default:
1198             header_state = h_general;
1199             break;
1200         }
1201         break;
1202       }
1203 
1204       case s_header_value:
1205       {
1206         c = LOWER(ch);
1207 
1208         if (ch == CR) {
1209           CALLBACK(header_value);
1210           state = s_header_almost_done;
1211           break;
1212         }
1213 
1214         if (ch == LF) {
1215           CALLBACK(header_value);
1216           goto header_almost_done;
1217         }
1218 
1219         switch (header_state) {
1220           case h_general:
1221             break;
1222 
1223           case h_connection:
1224           case h_transfer_encoding:
1225             assert(0 && "Shouldn't get here.");
1226             break;
1227 
1228           case h_content_length:
1229             if (ch == ' ') break;
1230             if (ch < '0' || ch > '9') goto error;
1231             parser->content_length *= 10;
1232             parser->content_length += ch - '0';
1233             break;
1234 
1235           /* Transfer-Encoding: chunked */
1236           case h_matching_transfer_encoding_chunked:
1237             index++;
1238             if (index > sizeof(CHUNKED)-1
1239                 || c != CHUNKED[index]) {
1240               header_state = h_general;
1241             } else if (index == sizeof(CHUNKED)-2) {
1242               header_state = h_transfer_encoding_chunked;
1243             }
1244             break;
1245 
1246           /* looking for 'Connection: keep-alive' */
1247           case h_matching_connection_keep_alive:
1248             index++;
1249             if (index > sizeof(KEEP_ALIVE)-1
1250                 || c != KEEP_ALIVE[index]) {
1251               header_state = h_general;
1252             } else if (index == sizeof(KEEP_ALIVE)-2) {
1253               header_state = h_connection_keep_alive;
1254             }
1255             break;
1256 
1257           /* looking for 'Connection: close' */
1258           case h_matching_connection_close:
1259             index++;
1260             if (index > sizeof(CLOSE)-1 || c != CLOSE[index]) {
1261               header_state = h_general;
1262             } else if (index == sizeof(CLOSE)-2) {
1263               header_state = h_connection_close;
1264             }
1265             break;
1266 
1267           case h_transfer_encoding_chunked:
1268           case h_connection_keep_alive:
1269           case h_connection_close:
1270             if (ch != ' ') header_state = h_general;
1271             break;
1272 
1273           default:
1274             state = s_header_value;
1275             header_state = h_general;
1276             break;
1277         }
1278         break;
1279       }
1280 
1281       case s_header_almost_done:
1282       header_almost_done:
1283       {
1284         STRICT_CHECK(ch != LF);
1285 
1286         state = s_header_field_start;
1287 
1288         switch (header_state) {
1289           case h_connection_keep_alive:
1290             parser->flags |= F_CONNECTION_KEEP_ALIVE;
1291             break;
1292           case h_connection_close:
1293             parser->flags |= F_CONNECTION_CLOSE;
1294             break;
1295           case h_transfer_encoding_chunked:
1296             parser->flags |= F_CHUNKED;
1297             break;
1298           default:
1299             break;
1300         }
1301         break;
1302       }
1303 
1304       case s_headers_almost_done:
1305       headers_almost_done:
1306       {
1307         STRICT_CHECK(ch != LF);
1308 
1309         if (parser->flags & F_TRAILING) {
1310           /* End of a chunked request */
1311           CALLBACK2(message_complete);
1312           state = NEW_MESSAGE();
1313           break;
1314         }
1315 
1316         nread = 0;
1317 
1318         if ((parser->flags & F_UPGRADE) || parser->method == PHP_HTTP_CONNECT) {
1319           parser->upgrade = 1;
1320         }
1321 
1322         /* Here we call the headers_complete callback. This is somewhat
1323          * different than other callbacks because if the user returns 1, we
1324          * will interpret that as saying that this message has no body. This
1325          * is needed for the annoying case of receiving a response to a HEAD
1326          * request.
1327          */
1328         if (settings->on_headers_complete) {
1329           switch (settings->on_headers_complete(parser)) {
1330             case 0:
1331               break;
1332 
1333             case 1:
1334               parser->flags |= F_SKIPBODY;
1335               break;
1336 
1337             default:
1338               return p - data; /* Error */
1339           }
1340         }
1341 
1342         /* We cannot meaningfully support upgrade requests, since we only
1343          * support HTTP/1 for now.
1344          */
1345 #if 0
1346         /* Exit, the rest of the connect is in a different protocol. */
1347         if (parser->upgrade) {
1348           CALLBACK2(message_complete);
1349           return (p - data);
1350         }
1351 #endif
1352 
1353         if (parser->flags & F_SKIPBODY) {
1354           CALLBACK2(message_complete);
1355           state = NEW_MESSAGE();
1356         } else if (parser->flags & F_CHUNKED) {
1357           /* chunked encoding - ignore Content-Length header */
1358           state = s_chunk_size_start;
1359         } else {
1360           if (parser->content_length == 0) {
1361             /* Content-Length header given but zero: Content-Length: 0\r\n */
1362             CALLBACK2(message_complete);
1363             state = NEW_MESSAGE();
1364           } else if (parser->content_length > 0) {
1365             /* Content-Length header given and non-zero */
1366             state = s_body_identity;
1367           } else {
1368             if (parser->type == PHP_HTTP_REQUEST || php_http_should_keep_alive(parser)) {
1369               /* Assume content-length 0 - read the next */
1370               CALLBACK2(message_complete);
1371               state = NEW_MESSAGE();
1372             } else {
1373               /* Read body until EOF */
1374               state = s_body_identity_eof;
1375             }
1376           }
1377         }
1378 
1379         break;
1380       }
1381 
1382       case s_body_identity:
1383         assert(pe >= p);
1384 
1385         to_read = MIN((size_t)(pe - p), (size_t)parser->content_length);
1386         if (to_read > 0) {
1387           if (settings->on_body) settings->on_body(parser, p, to_read);
1388           p += to_read - 1;
1389           parser->content_length -= to_read;
1390           if (parser->content_length == 0) {
1391             CALLBACK2(message_complete);
1392             state = NEW_MESSAGE();
1393           }
1394         }
1395         break;
1396 
1397       /* read until EOF */
1398       case s_body_identity_eof:
1399         to_read = pe - p;
1400         if (to_read > 0) {
1401           if (settings->on_body) settings->on_body(parser, p, to_read);
1402           p += to_read - 1;
1403         }
1404         break;
1405 
1406       case s_chunk_size_start:
1407       {
1408         assert(parser->flags & F_CHUNKED);
1409 
1410         c = unhex[(unsigned char)ch];
1411         if (c == -1) goto error;
1412         parser->content_length = c;
1413         state = s_chunk_size;
1414         break;
1415       }
1416 
1417       case s_chunk_size:
1418       {
1419         assert(parser->flags & F_CHUNKED);
1420 
1421         if (ch == CR) {
1422           state = s_chunk_size_almost_done;
1423           break;
1424         }
1425 
1426         c = unhex[(unsigned char)ch];
1427 
1428         if (c == -1) {
1429           if (ch == ';' || ch == ' ') {
1430             state = s_chunk_parameters;
1431             break;
1432           }
1433           goto error;
1434         }
1435 
1436         parser->content_length *= 16;
1437         parser->content_length += c;
1438         break;
1439       }
1440 
1441       case s_chunk_parameters:
1442       {
1443         assert(parser->flags & F_CHUNKED);
1444         /* just ignore this shit. TODO check for overflow */
1445         if (ch == CR) {
1446           state = s_chunk_size_almost_done;
1447           break;
1448         }
1449         break;
1450       }
1451 
1452       case s_chunk_size_almost_done:
1453       {
1454         assert(parser->flags & F_CHUNKED);
1455         STRICT_CHECK(ch != LF);
1456 
1457         if (parser->content_length == 0) {
1458           parser->flags |= F_TRAILING;
1459           state = s_header_field_start;
1460         } else {
1461           state = s_chunk_data;
1462         }
1463         break;
1464       }
1465 
1466       case s_chunk_data:
1467       {
1468         assert(parser->flags & F_CHUNKED);
1469         assert(pe >= p);
1470 
1471         to_read = MIN((size_t)(pe - p), (size_t)(parser->content_length));
1472 
1473         if (to_read > 0) {
1474           if (settings->on_body) settings->on_body(parser, p, to_read);
1475           p += to_read - 1;
1476         }
1477 
1478         if (to_read == (size_t)parser->content_length) {
1479           state = s_chunk_data_almost_done;
1480         }
1481 
1482         parser->content_length -= to_read;
1483         break;
1484       }
1485 
1486       case s_chunk_data_almost_done:
1487         assert(parser->flags & F_CHUNKED);
1488         STRICT_CHECK(ch != CR);
1489         state = s_chunk_data_done;
1490         break;
1491 
1492       case s_chunk_data_done:
1493         assert(parser->flags & F_CHUNKED);
1494         STRICT_CHECK(ch != LF);
1495         state = s_chunk_size_start;
1496         break;
1497 
1498       default:
1499         assert(0 && "unhandled state");
1500         goto error;
1501     }
1502   }
1503 
1504   CALLBACK_NOCLEAR(header_field);
1505   CALLBACK_NOCLEAR(header_value);
1506   CALLBACK_NOCLEAR(fragment);
1507   CALLBACK_NOCLEAR(query_string);
1508   CALLBACK_NOCLEAR(path);
1509   CALLBACK_NOCLEAR(url);
1510 
1511   parser->state = state;
1512   parser->header_state = header_state;
1513   parser->index = index;
1514   parser->nread = nread;
1515 
1516   return len;
1517 
1518 error:
1519   parser->state = s_dead;
1520   return (p - data);
1521 }
1522 
1523 
1524 int
php_http_should_keep_alive(php_http_parser * parser)1525 php_http_should_keep_alive (php_http_parser *parser)
1526 {
1527   if (parser->http_major > 0 && parser->http_minor > 0) {
1528     /* HTTP/1.1 */
1529     if (parser->flags & F_CONNECTION_CLOSE) {
1530       return 0;
1531     } else {
1532       return 1;
1533     }
1534   } else {
1535     /* HTTP/1.0 or earlier */
1536     if (parser->flags & F_CONNECTION_KEEP_ALIVE) {
1537       return 1;
1538     } else {
1539       return 0;
1540     }
1541   }
1542 }
1543 
1544 
php_http_method_str(enum php_http_method m)1545 const char * php_http_method_str (enum php_http_method m)
1546 {
1547   return method_strings[m];
1548 }
1549 
1550 
1551 void
php_http_parser_init(php_http_parser * parser,enum php_http_parser_type t)1552 php_http_parser_init (php_http_parser *parser, enum php_http_parser_type t)
1553 {
1554   parser->type = t;
1555   parser->state = (t == PHP_HTTP_REQUEST ? s_start_req : (t == PHP_HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
1556   parser->nread = 0;
1557   parser->upgrade = 0;
1558   parser->flags = 0;
1559   parser->method = 0;
1560 }
1561