1 /* Copyright 2009,2010 Ryan Dahl <ry@tinyclouds.org>
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to
5 * deal in the Software without restriction, including without limitation the
6 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
7 * sell copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19 * IN THE SOFTWARE.
20 */
21 #include <assert.h>
22 #include <stddef.h>
23 #include "php_http_parser.h"
24
25
26 #ifndef MIN
27 # define MIN(a,b) ((a) < (b) ? (a) : (b))
28 #endif
29
30
31 #define CALLBACK2(FOR) \
32 do { \
33 if (settings->on_##FOR) { \
34 if (0 != settings->on_##FOR(parser)) return (p - data); \
35 } \
36 } while (0)
37
38
39 #define MARK(FOR) \
40 do { \
41 FOR##_mark = p; \
42 } while (0)
43
44 #define CALLBACK_NOCLEAR(FOR) \
45 do { \
46 if (FOR##_mark) { \
47 if (settings->on_##FOR) { \
48 if (0 != settings->on_##FOR(parser, \
49 FOR##_mark, \
50 p - FOR##_mark)) \
51 { \
52 return (p - data); \
53 } \
54 } \
55 } \
56 } while (0)
57
58 #ifdef PHP_WIN32
59 # undef CALLBACK
60 #endif
61 #define CALLBACK(FOR) \
62 do { \
63 CALLBACK_NOCLEAR(FOR); \
64 FOR##_mark = NULL; \
65 } while (0)
66
67
68 #define PROXY_CONNECTION "proxy-connection"
69 #define CONNECTION "connection"
70 #define CONTENT_LENGTH "content-length"
71 #define TRANSFER_ENCODING "transfer-encoding"
72 #define UPGRADE "upgrade"
73 #define CHUNKED "chunked"
74 #define KEEP_ALIVE "keep-alive"
75 #define CLOSE "close"
76
77
78 static const char *method_strings[] =
79 { "DELETE"
80 , "GET"
81 , "HEAD"
82 , "POST"
83 , "PUT"
84 , "PATCH"
85 , "CONNECT"
86 , "OPTIONS"
87 , "TRACE"
88 , "COPY"
89 , "LOCK"
90 , "MKCOL"
91 , "MOVE"
92 , "MKCALENDAR"
93 , "PROPFIND"
94 , "PROPPATCH"
95 , "SEARCH"
96 , "UNLOCK"
97 , "REPORT"
98 , "MKACTIVITY"
99 , "CHECKOUT"
100 , "MERGE"
101 , "M-SEARCH"
102 , "NOTIFY"
103 , "SUBSCRIBE"
104 , "UNSUBSCRIBE"
105 , "NOTIMPLEMENTED"
106 };
107
108
109 /* Tokens as defined by rfc 2616. Also lowercases them.
110 * token = 1*<any CHAR except CTLs or separators>
111 * separators = "(" | ")" | "<" | ">" | "@"
112 * | "," | ";" | ":" | "\" | <">
113 * | "/" | "[" | "]" | "?" | "="
114 * | "{" | "}" | SP | HT
115 */
116 static const char tokens[256] = {
117 /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
118 0, 0, 0, 0, 0, 0, 0, 0,
119 /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
120 0, 0, 0, 0, 0, 0, 0, 0,
121 /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
122 0, 0, 0, 0, 0, 0, 0, 0,
123 /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
124 0, 0, 0, 0, 0, 0, 0, 0,
125 /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
126 ' ', '!', '"', '#', '$', '%', '&', '\'',
127 /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
128 0, 0, '*', '+', 0, '-', '.', '/',
129 /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
130 '0', '1', '2', '3', '4', '5', '6', '7',
131 /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
132 '8', '9', 0, 0, 0, 0, 0, 0,
133 /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
134 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
135 /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
136 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
137 /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
138 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
139 /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
140 'x', 'y', 'z', 0, 0, 0, '^', '_',
141 /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
142 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
143 /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
144 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
145 /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
146 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
147 /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
148 'x', 'y', 'z', 0, '|', '}', '~', 0 };
149
150
151 static const int8_t unhex[256] =
152 {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
153 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
154 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
155 , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
156 ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
157 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
158 ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
159 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
160 };
161
162
163 static const uint8_t normal_url_char[256] = {
164 /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
165 0, 0, 0, 0, 0, 0, 0, 0,
166 /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
167 0, 0, 0, 0, 0, 0, 0, 0,
168 /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
169 0, 0, 0, 0, 0, 0, 0, 0,
170 /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
171 0, 0, 0, 0, 0, 0, 0, 0,
172 /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
173 0, 1, 1, 0, 1, 1, 1, 1,
174 /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
175 1, 1, 1, 1, 1, 1, 1, 1,
176 /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
177 1, 1, 1, 1, 1, 1, 1, 1,
178 /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
179 1, 1, 1, 1, 1, 1, 1, 0,
180 /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
181 1, 1, 1, 1, 1, 1, 1, 1,
182 /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
183 1, 1, 1, 1, 1, 1, 1, 1,
184 /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
185 1, 1, 1, 1, 1, 1, 1, 1,
186 /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
187 1, 1, 1, 1, 1, 1, 1, 1,
188 /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
189 1, 1, 1, 1, 1, 1, 1, 1,
190 /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
191 1, 1, 1, 1, 1, 1, 1, 1,
192 /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
193 1, 1, 1, 1, 1, 1, 1, 1,
194 /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
195 1, 1, 1, 1, 1, 1, 1, 0 };
196
197
198 #define PARSING_HEADER(state) (state <= s_headers_almost_done && 0 == (parser->flags & F_TRAILING))
199
200
201 enum header_states
202 { h_general = 0
203 , h_C
204 , h_CO
205 , h_CON
206
207 , h_matching_connection
208 , h_matching_proxy_connection
209 , h_matching_content_length
210 , h_matching_transfer_encoding
211 , h_matching_upgrade
212
213 , h_connection
214 , h_content_length
215 , h_transfer_encoding
216 , h_upgrade
217
218 , h_matching_transfer_encoding_chunked
219 , h_matching_connection_keep_alive
220 , h_matching_connection_close
221
222 , h_transfer_encoding_chunked
223 , h_connection_keep_alive
224 , h_connection_close
225 };
226
227
228 enum flags
229 { F_CHUNKED = 1 << 0
230 , F_CONNECTION_KEEP_ALIVE = 1 << 1
231 , F_CONNECTION_CLOSE = 1 << 2
232 , F_TRAILING = 1 << 3
233 , F_UPGRADE = 1 << 4
234 , F_SKIPBODY = 1 << 5
235 };
236
237
238 #define CR '\r'
239 #define LF '\n'
240 #define LOWER(c) (unsigned char)(c | 0x20)
241 #define TOKEN(c) tokens[(unsigned char)c]
242
243
244 #define start_state (parser->type == PHP_HTTP_REQUEST ? s_start_req : s_start_res)
245
246
247 #ifdef HTTP_PARSER_STRICT
248 # define STRICT_CHECK(cond) if (cond) goto error
249 # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
250 #else
251 # define STRICT_CHECK(cond)
252 # define NEW_MESSAGE() start_state
253 #endif
254
255
php_http_parser_execute(php_http_parser * parser,const php_http_parser_settings * settings,const char * data,size_t len)256 size_t php_http_parser_execute (php_http_parser *parser,
257 const php_http_parser_settings *settings,
258 const char *data,
259 size_t len)
260 {
261 char ch;
262 signed char c;
263 const char *p = data, *pe;
264 size_t to_read;
265
266 enum state state = (enum state) parser->state;
267 enum header_states header_state = (enum header_states) parser->header_state;
268 uint32_t index = parser->index;
269 uint32_t nread = parser->nread;
270
271 /* technically we could combine all of these (except for url_mark) into one
272 variable, saving stack space, but it seems more clear to have them
273 separated. */
274 const char *header_field_mark = 0;
275 const char *header_value_mark = 0;
276 const char *fragment_mark = 0;
277 const char *query_string_mark = 0;
278 const char *path_mark = 0;
279 const char *url_mark = 0;
280
281 if (len == 0) {
282 if (state == s_body_identity_eof) {
283 CALLBACK2(message_complete);
284 }
285 return 0;
286 }
287
288 if (state == s_header_field)
289 header_field_mark = data;
290 if (state == s_header_value)
291 header_value_mark = data;
292 if (state == s_req_fragment)
293 fragment_mark = data;
294 if (state == s_req_query_string)
295 query_string_mark = data;
296 if (state == s_req_path)
297 path_mark = data;
298 if (state == s_req_path || state == s_req_schema || state == s_req_schema_slash
299 || state == s_req_schema_slash_slash || state == s_req_port
300 || state == s_req_query_string_start || state == s_req_query_string
301 || state == s_req_host
302 || state == s_req_fragment_start || state == s_req_fragment)
303 url_mark = data;
304
305 for (p=data, pe=data+len; p != pe; p++) {
306 ch = *p;
307
308 if (PARSING_HEADER(state)) {
309 ++nread;
310 /* Buffer overflow attack */
311 if (nread > PHP_HTTP_MAX_HEADER_SIZE) goto error;
312 }
313
314 switch (state) {
315
316 case s_dead:
317 /* this state is used after a 'Connection: close' message
318 * the parser will error out if it reads another message
319 */
320 goto error;
321
322 case s_start_req_or_res:
323 {
324 if (ch == CR || ch == LF)
325 break;
326 parser->flags = 0;
327 parser->content_length = -1;
328
329 CALLBACK2(message_begin);
330
331 if (ch == 'H')
332 state = s_res_or_resp_H;
333 else {
334 parser->type = PHP_HTTP_REQUEST;
335 goto start_req_method_assign;
336 }
337 break;
338 }
339
340 case s_res_or_resp_H:
341 if (ch == 'T') {
342 parser->type = PHP_HTTP_RESPONSE;
343 state = s_res_HT;
344 } else {
345 if (ch != 'E') goto error;
346 parser->type = PHP_HTTP_REQUEST;
347 parser->method = PHP_HTTP_HEAD;
348 index = 2;
349 state = s_req_method;
350 }
351 break;
352
353 case s_start_res:
354 {
355 parser->flags = 0;
356 parser->content_length = -1;
357
358 CALLBACK2(message_begin);
359
360 switch (ch) {
361 case 'H':
362 state = s_res_H;
363 break;
364
365 case CR:
366 case LF:
367 break;
368
369 default:
370 goto error;
371 }
372 break;
373 }
374
375 case s_res_H:
376 STRICT_CHECK(ch != 'T');
377 state = s_res_HT;
378 break;
379
380 case s_res_HT:
381 STRICT_CHECK(ch != 'T');
382 state = s_res_HTT;
383 break;
384
385 case s_res_HTT:
386 STRICT_CHECK(ch != 'P');
387 state = s_res_HTTP;
388 break;
389
390 case s_res_HTTP:
391 STRICT_CHECK(ch != '/');
392 state = s_res_first_http_major;
393 break;
394
395 case s_res_first_http_major:
396 if (ch < '1' || ch > '9') goto error;
397 parser->http_major = ch - '0';
398 state = s_res_http_major;
399 break;
400
401 /* major HTTP version or dot */
402 case s_res_http_major:
403 {
404 if (ch == '.') {
405 state = s_res_first_http_minor;
406 break;
407 }
408
409 if (ch < '0' || ch > '9') goto error;
410
411 parser->http_major *= 10;
412 parser->http_major += ch - '0';
413
414 if (parser->http_major > 999) goto error;
415 break;
416 }
417
418 /* first digit of minor HTTP version */
419 case s_res_first_http_minor:
420 if (ch < '0' || ch > '9') goto error;
421 parser->http_minor = ch - '0';
422 state = s_res_http_minor;
423 break;
424
425 /* minor HTTP version or end of request line */
426 case s_res_http_minor:
427 {
428 if (ch == ' ') {
429 state = s_res_first_status_code;
430 break;
431 }
432
433 if (ch < '0' || ch > '9') goto error;
434
435 parser->http_minor *= 10;
436 parser->http_minor += ch - '0';
437
438 if (parser->http_minor > 999) goto error;
439 break;
440 }
441
442 case s_res_first_status_code:
443 {
444 if (ch < '0' || ch > '9') {
445 if (ch == ' ') {
446 break;
447 }
448 goto error;
449 }
450 parser->status_code = ch - '0';
451 state = s_res_status_code;
452 break;
453 }
454
455 case s_res_status_code:
456 {
457 if (ch < '0' || ch > '9') {
458 switch (ch) {
459 case ' ':
460 state = s_res_status;
461 break;
462 case CR:
463 state = s_res_line_almost_done;
464 break;
465 case LF:
466 state = s_header_field_start;
467 break;
468 default:
469 goto error;
470 }
471 break;
472 }
473
474 parser->status_code *= 10;
475 parser->status_code += ch - '0';
476
477 if (parser->status_code > 999) goto error;
478 break;
479 }
480
481 case s_res_status:
482 /* the human readable status. e.g. "NOT FOUND"
483 * we are not humans so just ignore this */
484 if (ch == CR) {
485 state = s_res_line_almost_done;
486 break;
487 }
488
489 if (ch == LF) {
490 state = s_header_field_start;
491 break;
492 }
493 break;
494
495 case s_res_line_almost_done:
496 STRICT_CHECK(ch != LF);
497 state = s_header_field_start;
498 break;
499
500 case s_start_req:
501 {
502 if (ch == CR || ch == LF)
503 break;
504 parser->flags = 0;
505 parser->content_length = -1;
506
507 CALLBACK2(message_begin);
508
509 if (ch < 'A' || 'Z' < ch) goto error;
510
511 start_req_method_assign:
512 parser->method = (enum php_http_method) 0;
513 index = 1;
514 switch (ch) {
515 case 'C': parser->method = PHP_HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
516 case 'D': parser->method = PHP_HTTP_DELETE; break;
517 case 'G': parser->method = PHP_HTTP_GET; break;
518 case 'H': parser->method = PHP_HTTP_HEAD; break;
519 case 'L': parser->method = PHP_HTTP_LOCK; break;
520 case 'M': parser->method = PHP_HTTP_MKCOL; /* or MOVE, MKCALENDAR, MKACTIVITY, MERGE, M-SEARCH */ break;
521 case 'N': parser->method = PHP_HTTP_NOTIFY; break;
522 case 'O': parser->method = PHP_HTTP_OPTIONS; break;
523 case 'P': parser->method = PHP_HTTP_POST; /* or PROPFIND or PROPPATCH or PUT */ break;
524 case 'R': parser->method = PHP_HTTP_REPORT; break;
525 case 'S': parser->method = PHP_HTTP_SUBSCRIBE; /* or SEARCH */ break;
526 case 'T': parser->method = PHP_HTTP_TRACE; break;
527 case 'U': parser->method = PHP_HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
528 default: parser->method = PHP_HTTP_NOT_IMPLEMENTED; break;
529 }
530 state = s_req_method;
531 break;
532 }
533 case s_req_method:
534 {
535 const char *matcher;
536 if (ch == '\0')
537 goto error;
538
539 matcher = method_strings[parser->method];
540 if (ch == ' ') {
541 if (parser->method != PHP_HTTP_NOT_IMPLEMENTED && matcher[index] != '\0') {
542 parser->method = PHP_HTTP_NOT_IMPLEMENTED;
543 }
544 state = s_req_spaces_before_url;
545 } else if (parser->method == PHP_HTTP_NOT_IMPLEMENTED || ch == matcher[index]) {
546 ; /* nada */
547 } else if (parser->method == PHP_HTTP_CONNECT) {
548 if (index == 1 && ch == 'H') {
549 parser->method = PHP_HTTP_CHECKOUT;
550 } else if (index == 2 && ch == 'P') {
551 parser->method = PHP_HTTP_COPY;
552 } else {
553 parser->method = PHP_HTTP_NOT_IMPLEMENTED;
554 }
555 } else if (parser->method == PHP_HTTP_MKCOL) {
556 if (index == 1 && ch == 'O') {
557 parser->method = PHP_HTTP_MOVE;
558 } else if (index == 3 && ch == 'A') {
559 parser->method = PHP_HTTP_MKCALENDAR;
560 } else if (index == 1 && ch == 'E') {
561 parser->method = PHP_HTTP_MERGE;
562 } else if (index == 1 && ch == '-') {
563 parser->method = PHP_HTTP_MSEARCH;
564 } else if (index == 2 && ch == 'A') {
565 parser->method = PHP_HTTP_MKACTIVITY;
566 } else {
567 parser->method = PHP_HTTP_NOT_IMPLEMENTED;
568 }
569 } else if (index == 1 && parser->method == PHP_HTTP_POST && ch == 'R') {
570 parser->method = PHP_HTTP_PROPFIND; /* or HTTP_PROPPATCH */
571 } else if (index == 1 && parser->method == PHP_HTTP_POST && ch == 'U') {
572 parser->method = PHP_HTTP_PUT;
573 } else if (index == 1 && parser->method == PHP_HTTP_POST && ch == 'A') {
574 parser->method = PHP_HTTP_PATCH;
575 } else if (index == 1 && parser->method == PHP_HTTP_SUBSCRIBE && ch == 'E') {
576 parser->method = PHP_HTTP_SEARCH;
577 } else if (index == 2 && parser->method == PHP_HTTP_UNLOCK && ch == 'S') {
578 parser->method = PHP_HTTP_UNSUBSCRIBE;
579 } else if (index == 4 && parser->method == PHP_HTTP_PROPFIND && ch == 'P') {
580 parser->method = PHP_HTTP_PROPPATCH;
581 } else {
582 parser->method = PHP_HTTP_NOT_IMPLEMENTED;
583 }
584
585 ++index;
586 break;
587 }
588 case s_req_spaces_before_url:
589 {
590 if (ch == ' ') break;
591
592 if (ch == '/' || ch == '*') {
593 MARK(url);
594 MARK(path);
595 state = s_req_path;
596 break;
597 }
598
599 c = LOWER(ch);
600
601 if (c >= 'a' && c <= 'z') {
602 MARK(url);
603 state = s_req_schema;
604 break;
605 }
606
607 goto error;
608 }
609
610 case s_req_schema:
611 {
612 c = LOWER(ch);
613
614 if (c >= 'a' && c <= 'z') break;
615
616 if (ch == ':') {
617 state = s_req_schema_slash;
618 break;
619 } else if (ch == '.') {
620 state = s_req_host;
621 break;
622 } else if ('0' <= ch && ch <= '9') {
623 state = s_req_host;
624 break;
625 }
626
627 goto error;
628 }
629
630 case s_req_schema_slash:
631 STRICT_CHECK(ch != '/');
632 state = s_req_schema_slash_slash;
633 break;
634
635 case s_req_schema_slash_slash:
636 STRICT_CHECK(ch != '/');
637 state = s_req_host;
638 break;
639
640 case s_req_host:
641 {
642 c = LOWER(ch);
643 if (c >= 'a' && c <= 'z') break;
644 if ((ch >= '0' && ch <= '9') || ch == '.' || ch == '-') break;
645 switch (ch) {
646 case ':':
647 state = s_req_port;
648 break;
649 case '/':
650 MARK(path);
651 state = s_req_path;
652 break;
653 case ' ':
654 /* The request line looks like:
655 * "GET http://foo.bar.com HTTP/1.1"
656 * That is, there is no path.
657 */
658 CALLBACK(url);
659 state = s_req_http_start;
660 break;
661 default:
662 goto error;
663 }
664 break;
665 }
666
667 case s_req_port:
668 {
669 if (ch >= '0' && ch <= '9') break;
670 switch (ch) {
671 case '/':
672 MARK(path);
673 state = s_req_path;
674 break;
675 case ' ':
676 /* The request line looks like:
677 * "GET http://foo.bar.com:1234 HTTP/1.1"
678 * That is, there is no path.
679 */
680 CALLBACK(url);
681 state = s_req_http_start;
682 break;
683 default:
684 goto error;
685 }
686 break;
687 }
688
689 case s_req_path:
690 {
691 if (normal_url_char[(unsigned char)ch]) break;
692
693 switch (ch) {
694 case ' ':
695 CALLBACK(url);
696 CALLBACK(path);
697 state = s_req_http_start;
698 break;
699 case CR:
700 CALLBACK(url);
701 CALLBACK(path);
702 parser->http_major = 0;
703 parser->http_minor = 9;
704 state = s_req_line_almost_done;
705 break;
706 case LF:
707 CALLBACK(url);
708 CALLBACK(path);
709 parser->http_major = 0;
710 parser->http_minor = 9;
711 state = s_header_field_start;
712 break;
713 case '?':
714 CALLBACK(path);
715 state = s_req_query_string_start;
716 break;
717 case '#':
718 CALLBACK(path);
719 state = s_req_fragment_start;
720 break;
721 default:
722 goto error;
723 }
724 break;
725 }
726
727 case s_req_query_string_start:
728 {
729 if (normal_url_char[(unsigned char)ch]) {
730 MARK(query_string);
731 state = s_req_query_string;
732 break;
733 }
734
735 switch (ch) {
736 case '?':
737 break; /* XXX ignore extra '?' ... is this right? */
738 case ' ':
739 CALLBACK(url);
740 state = s_req_http_start;
741 break;
742 case CR:
743 CALLBACK(url);
744 parser->http_major = 0;
745 parser->http_minor = 9;
746 state = s_req_line_almost_done;
747 break;
748 case LF:
749 CALLBACK(url);
750 parser->http_major = 0;
751 parser->http_minor = 9;
752 state = s_header_field_start;
753 break;
754 case '#':
755 state = s_req_fragment_start;
756 break;
757 default:
758 goto error;
759 }
760 break;
761 }
762
763 case s_req_query_string:
764 {
765 if (normal_url_char[(unsigned char)ch]) break;
766
767 switch (ch) {
768 case '?':
769 /* allow extra '?' in query string */
770 break;
771 case ' ':
772 CALLBACK(url);
773 CALLBACK(query_string);
774 state = s_req_http_start;
775 break;
776 case CR:
777 CALLBACK(url);
778 CALLBACK(query_string);
779 parser->http_major = 0;
780 parser->http_minor = 9;
781 state = s_req_line_almost_done;
782 break;
783 case LF:
784 CALLBACK(url);
785 CALLBACK(query_string);
786 parser->http_major = 0;
787 parser->http_minor = 9;
788 state = s_header_field_start;
789 break;
790 case '#':
791 CALLBACK(query_string);
792 state = s_req_fragment_start;
793 break;
794 default:
795 goto error;
796 }
797 break;
798 }
799
800 case s_req_fragment_start:
801 {
802 if (normal_url_char[(unsigned char)ch]) {
803 MARK(fragment);
804 state = s_req_fragment;
805 break;
806 }
807
808 switch (ch) {
809 case ' ':
810 CALLBACK(url);
811 state = s_req_http_start;
812 break;
813 case CR:
814 CALLBACK(url);
815 parser->http_major = 0;
816 parser->http_minor = 9;
817 state = s_req_line_almost_done;
818 break;
819 case LF:
820 CALLBACK(url);
821 parser->http_major = 0;
822 parser->http_minor = 9;
823 state = s_header_field_start;
824 break;
825 case '?':
826 MARK(fragment);
827 state = s_req_fragment;
828 break;
829 case '#':
830 break;
831 default:
832 goto error;
833 }
834 break;
835 }
836
837 case s_req_fragment:
838 {
839 if (normal_url_char[(unsigned char)ch]) break;
840
841 switch (ch) {
842 case ' ':
843 CALLBACK(url);
844 CALLBACK(fragment);
845 state = s_req_http_start;
846 break;
847 case CR:
848 CALLBACK(url);
849 CALLBACK(fragment);
850 parser->http_major = 0;
851 parser->http_minor = 9;
852 state = s_req_line_almost_done;
853 break;
854 case LF:
855 CALLBACK(url);
856 CALLBACK(fragment);
857 parser->http_major = 0;
858 parser->http_minor = 9;
859 state = s_header_field_start;
860 break;
861 case '?':
862 case '#':
863 break;
864 default:
865 goto error;
866 }
867 break;
868 }
869
870 case s_req_http_start:
871 switch (ch) {
872 case 'H':
873 state = s_req_http_H;
874 break;
875 case ' ':
876 break;
877 default:
878 goto error;
879 }
880 break;
881
882 case s_req_http_H:
883 STRICT_CHECK(ch != 'T');
884 state = s_req_http_HT;
885 break;
886
887 case s_req_http_HT:
888 STRICT_CHECK(ch != 'T');
889 state = s_req_http_HTT;
890 break;
891
892 case s_req_http_HTT:
893 STRICT_CHECK(ch != 'P');
894 state = s_req_http_HTTP;
895 break;
896
897 case s_req_http_HTTP:
898 STRICT_CHECK(ch != '/');
899 state = s_req_first_http_major;
900 break;
901
902 /* first digit of major HTTP version */
903 case s_req_first_http_major:
904 if (ch < '1' || ch > '9') goto error;
905 parser->http_major = ch - '0';
906 state = s_req_http_major;
907 break;
908
909 /* major HTTP version or dot */
910 case s_req_http_major:
911 {
912 if (ch == '.') {
913 state = s_req_first_http_minor;
914 break;
915 }
916
917 if (ch < '0' || ch > '9') goto error;
918
919 parser->http_major *= 10;
920 parser->http_major += ch - '0';
921
922 if (parser->http_major > 999) goto error;
923 break;
924 }
925
926 /* first digit of minor HTTP version */
927 case s_req_first_http_minor:
928 if (ch < '0' || ch > '9') goto error;
929 parser->http_minor = ch - '0';
930 state = s_req_http_minor;
931 break;
932
933 /* minor HTTP version or end of request line */
934 case s_req_http_minor:
935 {
936 if (ch == CR) {
937 state = s_req_line_almost_done;
938 break;
939 }
940
941 if (ch == LF) {
942 state = s_header_field_start;
943 break;
944 }
945
946 /* XXX allow spaces after digit? */
947
948 if (ch < '0' || ch > '9') goto error;
949
950 parser->http_minor *= 10;
951 parser->http_minor += ch - '0';
952
953 if (parser->http_minor > 999) goto error;
954 break;
955 }
956
957 /* end of request line */
958 case s_req_line_almost_done:
959 {
960 if (ch != LF) goto error;
961 state = s_header_field_start;
962 break;
963 }
964
965 case s_header_field_start:
966 {
967 if (ch == CR) {
968 state = s_headers_almost_done;
969 break;
970 }
971
972 if (ch == LF) {
973 /* they might be just sending \n instead of \r\n so this would be
974 * the second \n to denote the end of headers*/
975 state = s_headers_almost_done;
976 goto headers_almost_done;
977 }
978
979 c = TOKEN(ch);
980
981 if (!c) goto error;
982
983 MARK(header_field);
984
985 index = 0;
986 state = s_header_field;
987
988 switch (c) {
989 case 'c':
990 header_state = h_C;
991 break;
992
993 case 'p':
994 header_state = h_matching_proxy_connection;
995 break;
996
997 case 't':
998 header_state = h_matching_transfer_encoding;
999 break;
1000
1001 case 'u':
1002 header_state = h_matching_upgrade;
1003 break;
1004
1005 default:
1006 header_state = h_general;
1007 break;
1008 }
1009 break;
1010 }
1011
1012 case s_header_field:
1013 {
1014 c = TOKEN(ch);
1015
1016 if (c) {
1017 switch (header_state) {
1018 case h_general:
1019 break;
1020
1021 case h_C:
1022 index++;
1023 header_state = (c == 'o' ? h_CO : h_general);
1024 break;
1025
1026 case h_CO:
1027 index++;
1028 header_state = (c == 'n' ? h_CON : h_general);
1029 break;
1030
1031 case h_CON:
1032 index++;
1033 switch (c) {
1034 case 'n':
1035 header_state = h_matching_connection;
1036 break;
1037 case 't':
1038 header_state = h_matching_content_length;
1039 break;
1040 default:
1041 header_state = h_general;
1042 break;
1043 }
1044 break;
1045
1046 /* connection */
1047
1048 case h_matching_connection:
1049 index++;
1050 if (index > sizeof(CONNECTION)-1
1051 || c != CONNECTION[index]) {
1052 header_state = h_general;
1053 } else if (index == sizeof(CONNECTION)-2) {
1054 header_state = h_connection;
1055 }
1056 break;
1057
1058 /* proxy-connection */
1059
1060 case h_matching_proxy_connection:
1061 index++;
1062 if (index > sizeof(PROXY_CONNECTION)-1
1063 || c != PROXY_CONNECTION[index]) {
1064 header_state = h_general;
1065 } else if (index == sizeof(PROXY_CONNECTION)-2) {
1066 header_state = h_connection;
1067 }
1068 break;
1069
1070 /* content-length */
1071
1072 case h_matching_content_length:
1073 index++;
1074 if (index > sizeof(CONTENT_LENGTH)-1
1075 || c != CONTENT_LENGTH[index]) {
1076 header_state = h_general;
1077 } else if (index == sizeof(CONTENT_LENGTH)-2) {
1078 header_state = h_content_length;
1079 }
1080 break;
1081
1082 /* transfer-encoding */
1083
1084 case h_matching_transfer_encoding:
1085 index++;
1086 if (index > sizeof(TRANSFER_ENCODING)-1
1087 || c != TRANSFER_ENCODING[index]) {
1088 header_state = h_general;
1089 } else if (index == sizeof(TRANSFER_ENCODING)-2) {
1090 header_state = h_transfer_encoding;
1091 }
1092 break;
1093
1094 /* upgrade */
1095
1096 case h_matching_upgrade:
1097 index++;
1098 if (index > sizeof(UPGRADE)-1
1099 || c != UPGRADE[index]) {
1100 header_state = h_general;
1101 } else if (index == sizeof(UPGRADE)-2) {
1102 header_state = h_upgrade;
1103 }
1104 break;
1105
1106 case h_connection:
1107 case h_content_length:
1108 case h_transfer_encoding:
1109 case h_upgrade:
1110 if (ch != ' ') header_state = h_general;
1111 break;
1112
1113 default:
1114 assert(0 && "Unknown header_state");
1115 break;
1116 }
1117 break;
1118 }
1119
1120 if (ch == ':') {
1121 CALLBACK(header_field);
1122 state = s_header_value_start;
1123 break;
1124 }
1125
1126 if (ch == CR) {
1127 state = s_header_almost_done;
1128 CALLBACK(header_field);
1129 break;
1130 }
1131
1132 if (ch == LF) {
1133 CALLBACK(header_field);
1134 state = s_header_field_start;
1135 break;
1136 }
1137
1138 goto error;
1139 }
1140
1141 case s_header_value_start:
1142 {
1143 if (ch == ' ') break;
1144
1145 MARK(header_value);
1146
1147 state = s_header_value;
1148 index = 0;
1149
1150 c = LOWER(ch);
1151
1152 if (ch == CR) {
1153 CALLBACK(header_value);
1154 header_state = h_general;
1155 state = s_header_almost_done;
1156 break;
1157 }
1158
1159 if (ch == LF) {
1160 CALLBACK(header_value);
1161 state = s_header_field_start;
1162 break;
1163 }
1164
1165 switch (header_state) {
1166 case h_upgrade:
1167 parser->flags |= F_UPGRADE;
1168 header_state = h_general;
1169 break;
1170
1171 case h_transfer_encoding:
1172 /* looking for 'Transfer-Encoding: chunked' */
1173 if ('c' == c) {
1174 header_state = h_matching_transfer_encoding_chunked;
1175 } else {
1176 header_state = h_general;
1177 }
1178 break;
1179
1180 case h_content_length:
1181 if (ch < '0' || ch > '9') goto error;
1182 parser->content_length = ch - '0';
1183 break;
1184
1185 case h_connection:
1186 /* looking for 'Connection: keep-alive' */
1187 if (c == 'k') {
1188 header_state = h_matching_connection_keep_alive;
1189 /* looking for 'Connection: close' */
1190 } else if (c == 'c') {
1191 header_state = h_matching_connection_close;
1192 } else {
1193 header_state = h_general;
1194 }
1195 break;
1196
1197 default:
1198 header_state = h_general;
1199 break;
1200 }
1201 break;
1202 }
1203
1204 case s_header_value:
1205 {
1206 c = LOWER(ch);
1207
1208 if (ch == CR) {
1209 CALLBACK(header_value);
1210 state = s_header_almost_done;
1211 break;
1212 }
1213
1214 if (ch == LF) {
1215 CALLBACK(header_value);
1216 goto header_almost_done;
1217 }
1218
1219 switch (header_state) {
1220 case h_general:
1221 break;
1222
1223 case h_connection:
1224 case h_transfer_encoding:
1225 assert(0 && "Shouldn't get here.");
1226 break;
1227
1228 case h_content_length:
1229 if (ch == ' ') break;
1230 if (ch < '0' || ch > '9') goto error;
1231 parser->content_length *= 10;
1232 parser->content_length += ch - '0';
1233 break;
1234
1235 /* Transfer-Encoding: chunked */
1236 case h_matching_transfer_encoding_chunked:
1237 index++;
1238 if (index > sizeof(CHUNKED)-1
1239 || c != CHUNKED[index]) {
1240 header_state = h_general;
1241 } else if (index == sizeof(CHUNKED)-2) {
1242 header_state = h_transfer_encoding_chunked;
1243 }
1244 break;
1245
1246 /* looking for 'Connection: keep-alive' */
1247 case h_matching_connection_keep_alive:
1248 index++;
1249 if (index > sizeof(KEEP_ALIVE)-1
1250 || c != KEEP_ALIVE[index]) {
1251 header_state = h_general;
1252 } else if (index == sizeof(KEEP_ALIVE)-2) {
1253 header_state = h_connection_keep_alive;
1254 }
1255 break;
1256
1257 /* looking for 'Connection: close' */
1258 case h_matching_connection_close:
1259 index++;
1260 if (index > sizeof(CLOSE)-1 || c != CLOSE[index]) {
1261 header_state = h_general;
1262 } else if (index == sizeof(CLOSE)-2) {
1263 header_state = h_connection_close;
1264 }
1265 break;
1266
1267 case h_transfer_encoding_chunked:
1268 case h_connection_keep_alive:
1269 case h_connection_close:
1270 if (ch != ' ') header_state = h_general;
1271 break;
1272
1273 default:
1274 state = s_header_value;
1275 header_state = h_general;
1276 break;
1277 }
1278 break;
1279 }
1280
1281 case s_header_almost_done:
1282 header_almost_done:
1283 {
1284 STRICT_CHECK(ch != LF);
1285
1286 state = s_header_field_start;
1287
1288 switch (header_state) {
1289 case h_connection_keep_alive:
1290 parser->flags |= F_CONNECTION_KEEP_ALIVE;
1291 break;
1292 case h_connection_close:
1293 parser->flags |= F_CONNECTION_CLOSE;
1294 break;
1295 case h_transfer_encoding_chunked:
1296 parser->flags |= F_CHUNKED;
1297 break;
1298 default:
1299 break;
1300 }
1301 break;
1302 }
1303
1304 case s_headers_almost_done:
1305 headers_almost_done:
1306 {
1307 STRICT_CHECK(ch != LF);
1308
1309 if (parser->flags & F_TRAILING) {
1310 /* End of a chunked request */
1311 CALLBACK2(message_complete);
1312 state = NEW_MESSAGE();
1313 break;
1314 }
1315
1316 nread = 0;
1317
1318 if ((parser->flags & F_UPGRADE) || parser->method == PHP_HTTP_CONNECT) {
1319 parser->upgrade = 1;
1320 }
1321
1322 /* Here we call the headers_complete callback. This is somewhat
1323 * different than other callbacks because if the user returns 1, we
1324 * will interpret that as saying that this message has no body. This
1325 * is needed for the annoying case of receiving a response to a HEAD
1326 * request.
1327 */
1328 if (settings->on_headers_complete) {
1329 switch (settings->on_headers_complete(parser)) {
1330 case 0:
1331 break;
1332
1333 case 1:
1334 parser->flags |= F_SKIPBODY;
1335 break;
1336
1337 default:
1338 return p - data; /* Error */
1339 }
1340 }
1341
1342 /* We cannot meaningfully support upgrade requests, since we only
1343 * support HTTP/1 for now.
1344 */
1345 #if 0
1346 /* Exit, the rest of the connect is in a different protocol. */
1347 if (parser->upgrade) {
1348 CALLBACK2(message_complete);
1349 return (p - data);
1350 }
1351 #endif
1352
1353 if (parser->flags & F_SKIPBODY) {
1354 CALLBACK2(message_complete);
1355 state = NEW_MESSAGE();
1356 } else if (parser->flags & F_CHUNKED) {
1357 /* chunked encoding - ignore Content-Length header */
1358 state = s_chunk_size_start;
1359 } else {
1360 if (parser->content_length == 0) {
1361 /* Content-Length header given but zero: Content-Length: 0\r\n */
1362 CALLBACK2(message_complete);
1363 state = NEW_MESSAGE();
1364 } else if (parser->content_length > 0) {
1365 /* Content-Length header given and non-zero */
1366 state = s_body_identity;
1367 } else {
1368 if (parser->type == PHP_HTTP_REQUEST || php_http_should_keep_alive(parser)) {
1369 /* Assume content-length 0 - read the next */
1370 CALLBACK2(message_complete);
1371 state = NEW_MESSAGE();
1372 } else {
1373 /* Read body until EOF */
1374 state = s_body_identity_eof;
1375 }
1376 }
1377 }
1378
1379 break;
1380 }
1381
1382 case s_body_identity:
1383 assert(pe >= p);
1384
1385 to_read = MIN((size_t)(pe - p), (size_t)parser->content_length);
1386 if (to_read > 0) {
1387 if (settings->on_body) settings->on_body(parser, p, to_read);
1388 p += to_read - 1;
1389 parser->content_length -= to_read;
1390 if (parser->content_length == 0) {
1391 CALLBACK2(message_complete);
1392 state = NEW_MESSAGE();
1393 }
1394 }
1395 break;
1396
1397 /* read until EOF */
1398 case s_body_identity_eof:
1399 to_read = pe - p;
1400 if (to_read > 0) {
1401 if (settings->on_body) settings->on_body(parser, p, to_read);
1402 p += to_read - 1;
1403 }
1404 break;
1405
1406 case s_chunk_size_start:
1407 {
1408 assert(parser->flags & F_CHUNKED);
1409
1410 c = unhex[(unsigned char)ch];
1411 if (c == -1) goto error;
1412 parser->content_length = c;
1413 state = s_chunk_size;
1414 break;
1415 }
1416
1417 case s_chunk_size:
1418 {
1419 assert(parser->flags & F_CHUNKED);
1420
1421 if (ch == CR) {
1422 state = s_chunk_size_almost_done;
1423 break;
1424 }
1425
1426 c = unhex[(unsigned char)ch];
1427
1428 if (c == -1) {
1429 if (ch == ';' || ch == ' ') {
1430 state = s_chunk_parameters;
1431 break;
1432 }
1433 goto error;
1434 }
1435
1436 parser->content_length *= 16;
1437 parser->content_length += c;
1438 break;
1439 }
1440
1441 case s_chunk_parameters:
1442 {
1443 assert(parser->flags & F_CHUNKED);
1444 /* just ignore this shit. TODO check for overflow */
1445 if (ch == CR) {
1446 state = s_chunk_size_almost_done;
1447 break;
1448 }
1449 break;
1450 }
1451
1452 case s_chunk_size_almost_done:
1453 {
1454 assert(parser->flags & F_CHUNKED);
1455 STRICT_CHECK(ch != LF);
1456
1457 if (parser->content_length == 0) {
1458 parser->flags |= F_TRAILING;
1459 state = s_header_field_start;
1460 } else {
1461 state = s_chunk_data;
1462 }
1463 break;
1464 }
1465
1466 case s_chunk_data:
1467 {
1468 assert(parser->flags & F_CHUNKED);
1469 assert(pe >= p);
1470
1471 to_read = MIN((size_t)(pe - p), (size_t)(parser->content_length));
1472
1473 if (to_read > 0) {
1474 if (settings->on_body) settings->on_body(parser, p, to_read);
1475 p += to_read - 1;
1476 }
1477
1478 if (to_read == (size_t)parser->content_length) {
1479 state = s_chunk_data_almost_done;
1480 }
1481
1482 parser->content_length -= to_read;
1483 break;
1484 }
1485
1486 case s_chunk_data_almost_done:
1487 assert(parser->flags & F_CHUNKED);
1488 STRICT_CHECK(ch != CR);
1489 state = s_chunk_data_done;
1490 break;
1491
1492 case s_chunk_data_done:
1493 assert(parser->flags & F_CHUNKED);
1494 STRICT_CHECK(ch != LF);
1495 state = s_chunk_size_start;
1496 break;
1497
1498 default:
1499 assert(0 && "unhandled state");
1500 goto error;
1501 }
1502 }
1503
1504 CALLBACK_NOCLEAR(header_field);
1505 CALLBACK_NOCLEAR(header_value);
1506 CALLBACK_NOCLEAR(fragment);
1507 CALLBACK_NOCLEAR(query_string);
1508 CALLBACK_NOCLEAR(path);
1509 CALLBACK_NOCLEAR(url);
1510
1511 parser->state = state;
1512 parser->header_state = header_state;
1513 parser->index = index;
1514 parser->nread = nread;
1515
1516 return len;
1517
1518 error:
1519 parser->state = s_dead;
1520 return (p - data);
1521 }
1522
1523
1524 int
php_http_should_keep_alive(php_http_parser * parser)1525 php_http_should_keep_alive (php_http_parser *parser)
1526 {
1527 if (parser->http_major > 0 && parser->http_minor > 0) {
1528 /* HTTP/1.1 */
1529 if (parser->flags & F_CONNECTION_CLOSE) {
1530 return 0;
1531 } else {
1532 return 1;
1533 }
1534 } else {
1535 /* HTTP/1.0 or earlier */
1536 if (parser->flags & F_CONNECTION_KEEP_ALIVE) {
1537 return 1;
1538 } else {
1539 return 0;
1540 }
1541 }
1542 }
1543
1544
php_http_method_str(enum php_http_method m)1545 const char * php_http_method_str (enum php_http_method m)
1546 {
1547 return method_strings[m];
1548 }
1549
1550
1551 void
php_http_parser_init(php_http_parser * parser,enum php_http_parser_type t)1552 php_http_parser_init (php_http_parser *parser, enum php_http_parser_type t)
1553 {
1554 parser->type = t;
1555 parser->state = (t == PHP_HTTP_REQUEST ? s_start_req : (t == PHP_HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
1556 parser->nread = 0;
1557 parser->upgrade = 0;
1558 parser->flags = 0;
1559 parser->method = 0;
1560 }
1561