1 /* Copyright 2009,2010 Ryan Dahl <ry@tinyclouds.org>
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to
5 * deal in the Software without restriction, including without limitation the
6 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
7 * sell copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
19 * IN THE SOFTWARE.
20 */
21 #include <assert.h>
22 #include <stddef.h>
23 #include "php_http_parser.h"
24
25
26 #ifndef MIN
27 # define MIN(a,b) ((a) < (b) ? (a) : (b))
28 #endif
29
30
31 #define CALLBACK2(FOR) \
32 do { \
33 if (settings->on_##FOR) { \
34 if (0 != settings->on_##FOR(parser)) return (p - data); \
35 } \
36 } while (0)
37
38
39 #define MARK(FOR) \
40 do { \
41 FOR##_mark = p; \
42 } while (0)
43
44 #define CALLBACK_NOCLEAR(FOR) \
45 do { \
46 if (FOR##_mark) { \
47 if (settings->on_##FOR) { \
48 if (0 != settings->on_##FOR(parser, \
49 FOR##_mark, \
50 p - FOR##_mark)) \
51 { \
52 return (p - data); \
53 } \
54 } \
55 } \
56 } while (0)
57
58 #ifdef PHP_WIN32
59 # undef CALLBACK
60 #endif
61 #define CALLBACK(FOR) \
62 do { \
63 CALLBACK_NOCLEAR(FOR); \
64 FOR##_mark = NULL; \
65 } while (0)
66
67
68 #define PROXY_CONNECTION "proxy-connection"
69 #define CONNECTION "connection"
70 #define CONTENT_LENGTH "content-length"
71 #define TRANSFER_ENCODING "transfer-encoding"
72 #define UPGRADE "upgrade"
73 #define CHUNKED "chunked"
74 #define KEEP_ALIVE "keep-alive"
75 #define CLOSE "close"
76
77
78 static const char *method_strings[] =
79 { "DELETE"
80 , "GET"
81 , "HEAD"
82 , "POST"
83 , "PUT"
84 , "PATCH"
85 , "CONNECT"
86 , "OPTIONS"
87 , "TRACE"
88 , "COPY"
89 , "LOCK"
90 , "MKCOL"
91 , "MOVE"
92 , "PROPFIND"
93 , "PROPPATCH"
94 , "UNLOCK"
95 , "REPORT"
96 , "MKACTIVITY"
97 , "CHECKOUT"
98 , "MERGE"
99 , "M-SEARCH"
100 , "NOTIFY"
101 , "SUBSCRIBE"
102 , "UNSUBSCRIBE"
103 , "NOTIMPLEMENTED"
104 };
105
106
107 /* Tokens as defined by rfc 2616. Also lowercases them.
108 * token = 1*<any CHAR except CTLs or separators>
109 * separators = "(" | ")" | "<" | ">" | "@"
110 * | "," | ";" | ":" | "\" | <">
111 * | "/" | "[" | "]" | "?" | "="
112 * | "{" | "}" | SP | HT
113 */
114 static const char tokens[256] = {
115 /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
116 0, 0, 0, 0, 0, 0, 0, 0,
117 /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
118 0, 0, 0, 0, 0, 0, 0, 0,
119 /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
120 0, 0, 0, 0, 0, 0, 0, 0,
121 /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
122 0, 0, 0, 0, 0, 0, 0, 0,
123 /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
124 ' ', '!', '"', '#', '$', '%', '&', '\'',
125 /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
126 0, 0, '*', '+', 0, '-', '.', '/',
127 /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
128 '0', '1', '2', '3', '4', '5', '6', '7',
129 /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
130 '8', '9', 0, 0, 0, 0, 0, 0,
131 /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
132 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
133 /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
134 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
135 /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
136 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
137 /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
138 'x', 'y', 'z', 0, 0, 0, '^', '_',
139 /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
140 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
141 /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
142 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
143 /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
144 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
145 /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
146 'x', 'y', 'z', 0, '|', '}', '~', 0 };
147
148
149 static const int8_t unhex[256] =
150 {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
151 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
152 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
153 , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
154 ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
155 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
156 ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
157 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
158 };
159
160
161 static const uint8_t normal_url_char[256] = {
162 /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
163 0, 0, 0, 0, 0, 0, 0, 0,
164 /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
165 0, 0, 0, 0, 0, 0, 0, 0,
166 /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
167 0, 0, 0, 0, 0, 0, 0, 0,
168 /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
169 0, 0, 0, 0, 0, 0, 0, 0,
170 /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
171 0, 1, 1, 0, 1, 1, 1, 1,
172 /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
173 1, 1, 1, 1, 1, 1, 1, 1,
174 /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
175 1, 1, 1, 1, 1, 1, 1, 1,
176 /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
177 1, 1, 1, 1, 1, 1, 1, 0,
178 /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
179 1, 1, 1, 1, 1, 1, 1, 1,
180 /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
181 1, 1, 1, 1, 1, 1, 1, 1,
182 /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
183 1, 1, 1, 1, 1, 1, 1, 1,
184 /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
185 1, 1, 1, 1, 1, 1, 1, 1,
186 /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
187 1, 1, 1, 1, 1, 1, 1, 1,
188 /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
189 1, 1, 1, 1, 1, 1, 1, 1,
190 /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
191 1, 1, 1, 1, 1, 1, 1, 1,
192 /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
193 1, 1, 1, 1, 1, 1, 1, 0 };
194
195
196 enum state
197 { s_dead = 1 /* important that this is > 0 */
198
199 , s_start_req_or_res
200 , s_res_or_resp_H
201 , s_start_res
202 , s_res_H
203 , s_res_HT
204 , s_res_HTT
205 , s_res_HTTP
206 , s_res_first_http_major
207 , s_res_http_major
208 , s_res_first_http_minor
209 , s_res_http_minor
210 , s_res_first_status_code
211 , s_res_status_code
212 , s_res_status
213 , s_res_line_almost_done
214
215 , s_start_req
216
217 , s_req_method
218 , s_req_spaces_before_url
219 , s_req_schema
220 , s_req_schema_slash
221 , s_req_schema_slash_slash
222 , s_req_host
223 , s_req_port
224 , s_req_path
225 , s_req_query_string_start
226 , s_req_query_string
227 , s_req_fragment_start
228 , s_req_fragment
229 , s_req_http_start
230 , s_req_http_H
231 , s_req_http_HT
232 , s_req_http_HTT
233 , s_req_http_HTTP
234 , s_req_first_http_major
235 , s_req_http_major
236 , s_req_first_http_minor
237 , s_req_http_minor
238 , s_req_line_almost_done
239
240 , s_header_field_start
241 , s_header_field
242 , s_header_value_start
243 , s_header_value
244
245 , s_header_almost_done
246
247 , s_headers_almost_done
248 /* Important: 's_headers_almost_done' must be the last 'header' state. All
249 * states beyond this must be 'body' states. It is used for overflow
250 * checking. See the PARSING_HEADER() macro.
251 */
252 , s_chunk_size_start
253 , s_chunk_size
254 , s_chunk_size_almost_done
255 , s_chunk_parameters
256 , s_chunk_data
257 , s_chunk_data_almost_done
258 , s_chunk_data_done
259
260 , s_body_identity
261 , s_body_identity_eof
262 };
263
264
265 #define PARSING_HEADER(state) (state <= s_headers_almost_done && 0 == (parser->flags & F_TRAILING))
266
267
268 enum header_states
269 { h_general = 0
270 , h_C
271 , h_CO
272 , h_CON
273
274 , h_matching_connection
275 , h_matching_proxy_connection
276 , h_matching_content_length
277 , h_matching_transfer_encoding
278 , h_matching_upgrade
279
280 , h_connection
281 , h_content_length
282 , h_transfer_encoding
283 , h_upgrade
284
285 , h_matching_transfer_encoding_chunked
286 , h_matching_connection_keep_alive
287 , h_matching_connection_close
288
289 , h_transfer_encoding_chunked
290 , h_connection_keep_alive
291 , h_connection_close
292 };
293
294
295 enum flags
296 { F_CHUNKED = 1 << 0
297 , F_CONNECTION_KEEP_ALIVE = 1 << 1
298 , F_CONNECTION_CLOSE = 1 << 2
299 , F_TRAILING = 1 << 3
300 , F_UPGRADE = 1 << 4
301 , F_SKIPBODY = 1 << 5
302 };
303
304
305 #define CR '\r'
306 #define LF '\n'
307 #define LOWER(c) (unsigned char)(c | 0x20)
308 #define TOKEN(c) tokens[(unsigned char)c]
309
310
311 #define start_state (parser->type == PHP_HTTP_REQUEST ? s_start_req : s_start_res)
312
313
314 #if HTTP_PARSER_STRICT
315 # define STRICT_CHECK(cond) if (cond) goto error
316 # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
317 #else
318 # define STRICT_CHECK(cond)
319 # define NEW_MESSAGE() start_state
320 #endif
321
322
php_http_parser_execute(php_http_parser * parser,const php_http_parser_settings * settings,const char * data,size_t len)323 size_t php_http_parser_execute (php_http_parser *parser,
324 const php_http_parser_settings *settings,
325 const char *data,
326 size_t len)
327 {
328 char c, ch;
329 const char *p = data, *pe;
330 size_t to_read;
331
332 enum state state = (enum state) parser->state;
333 enum header_states header_state = (enum header_states) parser->header_state;
334 uint32_t index = parser->index;
335 uint32_t nread = parser->nread;
336
337 /* technically we could combine all of these (except for url_mark) into one
338 variable, saving stack space, but it seems more clear to have them
339 separated. */
340 const char *header_field_mark = 0;
341 const char *header_value_mark = 0;
342 const char *fragment_mark = 0;
343 const char *query_string_mark = 0;
344 const char *path_mark = 0;
345 const char *url_mark = 0;
346
347 if (len == 0) {
348 if (state == s_body_identity_eof) {
349 CALLBACK2(message_complete);
350 }
351 return 0;
352 }
353
354 if (state == s_header_field)
355 header_field_mark = data;
356 if (state == s_header_value)
357 header_value_mark = data;
358 if (state == s_req_fragment)
359 fragment_mark = data;
360 if (state == s_req_query_string)
361 query_string_mark = data;
362 if (state == s_req_path)
363 path_mark = data;
364 if (state == s_req_path || state == s_req_schema || state == s_req_schema_slash
365 || state == s_req_schema_slash_slash || state == s_req_port
366 || state == s_req_query_string_start || state == s_req_query_string
367 || state == s_req_host
368 || state == s_req_fragment_start || state == s_req_fragment)
369 url_mark = data;
370
371 for (p=data, pe=data+len; p != pe; p++) {
372 ch = *p;
373
374 if (PARSING_HEADER(state)) {
375 ++nread;
376 /* Buffer overflow attack */
377 if (nread > PHP_HTTP_MAX_HEADER_SIZE) goto error;
378 }
379
380 switch (state) {
381
382 case s_dead:
383 /* this state is used after a 'Connection: close' message
384 * the parser will error out if it reads another message
385 */
386 goto error;
387
388 case s_start_req_or_res:
389 {
390 if (ch == CR || ch == LF)
391 break;
392 parser->flags = 0;
393 parser->content_length = -1;
394
395 CALLBACK2(message_begin);
396
397 if (ch == 'H')
398 state = s_res_or_resp_H;
399 else {
400 parser->type = PHP_HTTP_REQUEST;
401 goto start_req_method_assign;
402 }
403 break;
404 }
405
406 case s_res_or_resp_H:
407 if (ch == 'T') {
408 parser->type = PHP_HTTP_RESPONSE;
409 state = s_res_HT;
410 } else {
411 if (ch != 'E') goto error;
412 parser->type = PHP_HTTP_REQUEST;
413 parser->method = PHP_HTTP_HEAD;
414 index = 2;
415 state = s_req_method;
416 }
417 break;
418
419 case s_start_res:
420 {
421 parser->flags = 0;
422 parser->content_length = -1;
423
424 CALLBACK2(message_begin);
425
426 switch (ch) {
427 case 'H':
428 state = s_res_H;
429 break;
430
431 case CR:
432 case LF:
433 break;
434
435 default:
436 goto error;
437 }
438 break;
439 }
440
441 case s_res_H:
442 STRICT_CHECK(ch != 'T');
443 state = s_res_HT;
444 break;
445
446 case s_res_HT:
447 STRICT_CHECK(ch != 'T');
448 state = s_res_HTT;
449 break;
450
451 case s_res_HTT:
452 STRICT_CHECK(ch != 'P');
453 state = s_res_HTTP;
454 break;
455
456 case s_res_HTTP:
457 STRICT_CHECK(ch != '/');
458 state = s_res_first_http_major;
459 break;
460
461 case s_res_first_http_major:
462 if (ch < '1' || ch > '9') goto error;
463 parser->http_major = ch - '0';
464 state = s_res_http_major;
465 break;
466
467 /* major HTTP version or dot */
468 case s_res_http_major:
469 {
470 if (ch == '.') {
471 state = s_res_first_http_minor;
472 break;
473 }
474
475 if (ch < '0' || ch > '9') goto error;
476
477 parser->http_major *= 10;
478 parser->http_major += ch - '0';
479
480 if (parser->http_major > 999) goto error;
481 break;
482 }
483
484 /* first digit of minor HTTP version */
485 case s_res_first_http_minor:
486 if (ch < '0' || ch > '9') goto error;
487 parser->http_minor = ch - '0';
488 state = s_res_http_minor;
489 break;
490
491 /* minor HTTP version or end of request line */
492 case s_res_http_minor:
493 {
494 if (ch == ' ') {
495 state = s_res_first_status_code;
496 break;
497 }
498
499 if (ch < '0' || ch > '9') goto error;
500
501 parser->http_minor *= 10;
502 parser->http_minor += ch - '0';
503
504 if (parser->http_minor > 999) goto error;
505 break;
506 }
507
508 case s_res_first_status_code:
509 {
510 if (ch < '0' || ch > '9') {
511 if (ch == ' ') {
512 break;
513 }
514 goto error;
515 }
516 parser->status_code = ch - '0';
517 state = s_res_status_code;
518 break;
519 }
520
521 case s_res_status_code:
522 {
523 if (ch < '0' || ch > '9') {
524 switch (ch) {
525 case ' ':
526 state = s_res_status;
527 break;
528 case CR:
529 state = s_res_line_almost_done;
530 break;
531 case LF:
532 state = s_header_field_start;
533 break;
534 default:
535 goto error;
536 }
537 break;
538 }
539
540 parser->status_code *= 10;
541 parser->status_code += ch - '0';
542
543 if (parser->status_code > 999) goto error;
544 break;
545 }
546
547 case s_res_status:
548 /* the human readable status. e.g. "NOT FOUND"
549 * we are not humans so just ignore this */
550 if (ch == CR) {
551 state = s_res_line_almost_done;
552 break;
553 }
554
555 if (ch == LF) {
556 state = s_header_field_start;
557 break;
558 }
559 break;
560
561 case s_res_line_almost_done:
562 STRICT_CHECK(ch != LF);
563 state = s_header_field_start;
564 break;
565
566 case s_start_req:
567 {
568 if (ch == CR || ch == LF)
569 break;
570 parser->flags = 0;
571 parser->content_length = -1;
572
573 CALLBACK2(message_begin);
574
575 if (ch < 'A' || 'Z' < ch) goto error;
576
577 start_req_method_assign:
578 parser->method = (enum php_http_method) 0;
579 index = 1;
580 switch (ch) {
581 case 'C': parser->method = PHP_HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
582 case 'D': parser->method = PHP_HTTP_DELETE; break;
583 case 'G': parser->method = PHP_HTTP_GET; break;
584 case 'H': parser->method = PHP_HTTP_HEAD; break;
585 case 'L': parser->method = PHP_HTTP_LOCK; break;
586 case 'M': parser->method = PHP_HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break;
587 case 'N': parser->method = PHP_HTTP_NOTIFY; break;
588 case 'O': parser->method = PHP_HTTP_OPTIONS; break;
589 case 'P': parser->method = PHP_HTTP_POST; /* or PROPFIND or PROPPATCH or PUT */ break;
590 case 'R': parser->method = PHP_HTTP_REPORT; break;
591 case 'S': parser->method = PHP_HTTP_SUBSCRIBE; break;
592 case 'T': parser->method = PHP_HTTP_TRACE; break;
593 case 'U': parser->method = PHP_HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
594 default: parser->method = PHP_HTTP_NOT_IMPLEMENTED; break;
595 }
596 state = s_req_method;
597 break;
598 }
599
600 case s_req_method:
601 {
602 const char *matcher;
603 if (ch == '\0')
604 goto error;
605
606 matcher = method_strings[parser->method];
607 if (ch == ' ' && (matcher[index] == '\0' || parser->method == PHP_HTTP_NOT_IMPLEMENTED)) {
608 state = s_req_spaces_before_url;
609 } else if (ch == matcher[index]) {
610 ; /* nada */
611 } else if (parser->method == PHP_HTTP_CONNECT) {
612 if (index == 1 && ch == 'H') {
613 parser->method = PHP_HTTP_CHECKOUT;
614 } else if (index == 2 && ch == 'P') {
615 parser->method = PHP_HTTP_COPY;
616 }
617 } else if (parser->method == PHP_HTTP_MKCOL) {
618 if (index == 1 && ch == 'O') {
619 parser->method = PHP_HTTP_MOVE;
620 } else if (index == 1 && ch == 'E') {
621 parser->method = PHP_HTTP_MERGE;
622 } else if (index == 1 && ch == '-') {
623 parser->method = PHP_HTTP_MSEARCH;
624 } else if (index == 2 && ch == 'A') {
625 parser->method = PHP_HTTP_MKACTIVITY;
626 }
627 } else if (index == 1 && parser->method == PHP_HTTP_POST && ch == 'R') {
628 parser->method = PHP_HTTP_PROPFIND; /* or HTTP_PROPPATCH */
629 } else if (index == 1 && parser->method == PHP_HTTP_POST && ch == 'U') {
630 parser->method = PHP_HTTP_PUT;
631 } else if (index == 1 && parser->method == PHP_HTTP_POST && ch == 'A') {
632 parser->method = PHP_HTTP_PATCH;
633 } else if (index == 2 && parser->method == PHP_HTTP_UNLOCK && ch == 'S') {
634 parser->method = PHP_HTTP_UNSUBSCRIBE;
635 } else if (index == 4 && parser->method == PHP_HTTP_PROPFIND && ch == 'P') {
636 parser->method = PHP_HTTP_PROPPATCH;
637 } else {
638 parser->method = PHP_HTTP_NOT_IMPLEMENTED;
639 }
640
641 ++index;
642 break;
643 }
644 case s_req_spaces_before_url:
645 {
646 if (ch == ' ') break;
647
648 if (ch == '/' || ch == '*') {
649 MARK(url);
650 MARK(path);
651 state = s_req_path;
652 break;
653 }
654
655 c = LOWER(ch);
656
657 if (c >= 'a' && c <= 'z') {
658 MARK(url);
659 state = s_req_schema;
660 break;
661 }
662
663 goto error;
664 }
665
666 case s_req_schema:
667 {
668 c = LOWER(ch);
669
670 if (c >= 'a' && c <= 'z') break;
671
672 if (ch == ':') {
673 state = s_req_schema_slash;
674 break;
675 } else if (ch == '.') {
676 state = s_req_host;
677 break;
678 } else if ('0' <= ch && ch <= '9') {
679 state = s_req_host;
680 break;
681 }
682
683 goto error;
684 }
685
686 case s_req_schema_slash:
687 STRICT_CHECK(ch != '/');
688 state = s_req_schema_slash_slash;
689 break;
690
691 case s_req_schema_slash_slash:
692 STRICT_CHECK(ch != '/');
693 state = s_req_host;
694 break;
695
696 case s_req_host:
697 {
698 c = LOWER(ch);
699 if (c >= 'a' && c <= 'z') break;
700 if ((ch >= '0' && ch <= '9') || ch == '.' || ch == '-') break;
701 switch (ch) {
702 case ':':
703 state = s_req_port;
704 break;
705 case '/':
706 MARK(path);
707 state = s_req_path;
708 break;
709 case ' ':
710 /* The request line looks like:
711 * "GET http://foo.bar.com HTTP/1.1"
712 * That is, there is no path.
713 */
714 CALLBACK(url);
715 state = s_req_http_start;
716 break;
717 default:
718 goto error;
719 }
720 break;
721 }
722
723 case s_req_port:
724 {
725 if (ch >= '0' && ch <= '9') break;
726 switch (ch) {
727 case '/':
728 MARK(path);
729 state = s_req_path;
730 break;
731 case ' ':
732 /* The request line looks like:
733 * "GET http://foo.bar.com:1234 HTTP/1.1"
734 * That is, there is no path.
735 */
736 CALLBACK(url);
737 state = s_req_http_start;
738 break;
739 default:
740 goto error;
741 }
742 break;
743 }
744
745 case s_req_path:
746 {
747 if (normal_url_char[(unsigned char)ch]) break;
748
749 switch (ch) {
750 case ' ':
751 CALLBACK(url);
752 CALLBACK(path);
753 state = s_req_http_start;
754 break;
755 case CR:
756 CALLBACK(url);
757 CALLBACK(path);
758 parser->http_major = 0;
759 parser->http_minor = 9;
760 state = s_req_line_almost_done;
761 break;
762 case LF:
763 CALLBACK(url);
764 CALLBACK(path);
765 parser->http_major = 0;
766 parser->http_minor = 9;
767 state = s_header_field_start;
768 break;
769 case '?':
770 CALLBACK(path);
771 state = s_req_query_string_start;
772 break;
773 case '#':
774 CALLBACK(path);
775 state = s_req_fragment_start;
776 break;
777 default:
778 goto error;
779 }
780 break;
781 }
782
783 case s_req_query_string_start:
784 {
785 if (normal_url_char[(unsigned char)ch]) {
786 MARK(query_string);
787 state = s_req_query_string;
788 break;
789 }
790
791 switch (ch) {
792 case '?':
793 break; /* XXX ignore extra '?' ... is this right? */
794 case ' ':
795 CALLBACK(url);
796 state = s_req_http_start;
797 break;
798 case CR:
799 CALLBACK(url);
800 parser->http_major = 0;
801 parser->http_minor = 9;
802 state = s_req_line_almost_done;
803 break;
804 case LF:
805 CALLBACK(url);
806 parser->http_major = 0;
807 parser->http_minor = 9;
808 state = s_header_field_start;
809 break;
810 case '#':
811 state = s_req_fragment_start;
812 break;
813 default:
814 goto error;
815 }
816 break;
817 }
818
819 case s_req_query_string:
820 {
821 if (normal_url_char[(unsigned char)ch]) break;
822
823 switch (ch) {
824 case '?':
825 /* allow extra '?' in query string */
826 break;
827 case ' ':
828 CALLBACK(url);
829 CALLBACK(query_string);
830 state = s_req_http_start;
831 break;
832 case CR:
833 CALLBACK(url);
834 CALLBACK(query_string);
835 parser->http_major = 0;
836 parser->http_minor = 9;
837 state = s_req_line_almost_done;
838 break;
839 case LF:
840 CALLBACK(url);
841 CALLBACK(query_string);
842 parser->http_major = 0;
843 parser->http_minor = 9;
844 state = s_header_field_start;
845 break;
846 case '#':
847 CALLBACK(query_string);
848 state = s_req_fragment_start;
849 break;
850 default:
851 goto error;
852 }
853 break;
854 }
855
856 case s_req_fragment_start:
857 {
858 if (normal_url_char[(unsigned char)ch]) {
859 MARK(fragment);
860 state = s_req_fragment;
861 break;
862 }
863
864 switch (ch) {
865 case ' ':
866 CALLBACK(url);
867 state = s_req_http_start;
868 break;
869 case CR:
870 CALLBACK(url);
871 parser->http_major = 0;
872 parser->http_minor = 9;
873 state = s_req_line_almost_done;
874 break;
875 case LF:
876 CALLBACK(url);
877 parser->http_major = 0;
878 parser->http_minor = 9;
879 state = s_header_field_start;
880 break;
881 case '?':
882 MARK(fragment);
883 state = s_req_fragment;
884 break;
885 case '#':
886 break;
887 default:
888 goto error;
889 }
890 break;
891 }
892
893 case s_req_fragment:
894 {
895 if (normal_url_char[(unsigned char)ch]) break;
896
897 switch (ch) {
898 case ' ':
899 CALLBACK(url);
900 CALLBACK(fragment);
901 state = s_req_http_start;
902 break;
903 case CR:
904 CALLBACK(url);
905 CALLBACK(fragment);
906 parser->http_major = 0;
907 parser->http_minor = 9;
908 state = s_req_line_almost_done;
909 break;
910 case LF:
911 CALLBACK(url);
912 CALLBACK(fragment);
913 parser->http_major = 0;
914 parser->http_minor = 9;
915 state = s_header_field_start;
916 break;
917 case '?':
918 case '#':
919 break;
920 default:
921 goto error;
922 }
923 break;
924 }
925
926 case s_req_http_start:
927 switch (ch) {
928 case 'H':
929 state = s_req_http_H;
930 break;
931 case ' ':
932 break;
933 default:
934 goto error;
935 }
936 break;
937
938 case s_req_http_H:
939 STRICT_CHECK(ch != 'T');
940 state = s_req_http_HT;
941 break;
942
943 case s_req_http_HT:
944 STRICT_CHECK(ch != 'T');
945 state = s_req_http_HTT;
946 break;
947
948 case s_req_http_HTT:
949 STRICT_CHECK(ch != 'P');
950 state = s_req_http_HTTP;
951 break;
952
953 case s_req_http_HTTP:
954 STRICT_CHECK(ch != '/');
955 state = s_req_first_http_major;
956 break;
957
958 /* first digit of major HTTP version */
959 case s_req_first_http_major:
960 if (ch < '1' || ch > '9') goto error;
961 parser->http_major = ch - '0';
962 state = s_req_http_major;
963 break;
964
965 /* major HTTP version or dot */
966 case s_req_http_major:
967 {
968 if (ch == '.') {
969 state = s_req_first_http_minor;
970 break;
971 }
972
973 if (ch < '0' || ch > '9') goto error;
974
975 parser->http_major *= 10;
976 parser->http_major += ch - '0';
977
978 if (parser->http_major > 999) goto error;
979 break;
980 }
981
982 /* first digit of minor HTTP version */
983 case s_req_first_http_minor:
984 if (ch < '0' || ch > '9') goto error;
985 parser->http_minor = ch - '0';
986 state = s_req_http_minor;
987 break;
988
989 /* minor HTTP version or end of request line */
990 case s_req_http_minor:
991 {
992 if (ch == CR) {
993 state = s_req_line_almost_done;
994 break;
995 }
996
997 if (ch == LF) {
998 state = s_header_field_start;
999 break;
1000 }
1001
1002 /* XXX allow spaces after digit? */
1003
1004 if (ch < '0' || ch > '9') goto error;
1005
1006 parser->http_minor *= 10;
1007 parser->http_minor += ch - '0';
1008
1009 if (parser->http_minor > 999) goto error;
1010 break;
1011 }
1012
1013 /* end of request line */
1014 case s_req_line_almost_done:
1015 {
1016 if (ch != LF) goto error;
1017 state = s_header_field_start;
1018 break;
1019 }
1020
1021 case s_header_field_start:
1022 {
1023 if (ch == CR) {
1024 state = s_headers_almost_done;
1025 break;
1026 }
1027
1028 if (ch == LF) {
1029 /* they might be just sending \n instead of \r\n so this would be
1030 * the second \n to denote the end of headers*/
1031 state = s_headers_almost_done;
1032 goto headers_almost_done;
1033 }
1034
1035 c = TOKEN(ch);
1036
1037 if (!c) goto error;
1038
1039 MARK(header_field);
1040
1041 index = 0;
1042 state = s_header_field;
1043
1044 switch (c) {
1045 case 'c':
1046 header_state = h_C;
1047 break;
1048
1049 case 'p':
1050 header_state = h_matching_proxy_connection;
1051 break;
1052
1053 case 't':
1054 header_state = h_matching_transfer_encoding;
1055 break;
1056
1057 case 'u':
1058 header_state = h_matching_upgrade;
1059 break;
1060
1061 default:
1062 header_state = h_general;
1063 break;
1064 }
1065 break;
1066 }
1067
1068 case s_header_field:
1069 {
1070 c = TOKEN(ch);
1071
1072 if (c) {
1073 switch (header_state) {
1074 case h_general:
1075 break;
1076
1077 case h_C:
1078 index++;
1079 header_state = (c == 'o' ? h_CO : h_general);
1080 break;
1081
1082 case h_CO:
1083 index++;
1084 header_state = (c == 'n' ? h_CON : h_general);
1085 break;
1086
1087 case h_CON:
1088 index++;
1089 switch (c) {
1090 case 'n':
1091 header_state = h_matching_connection;
1092 break;
1093 case 't':
1094 header_state = h_matching_content_length;
1095 break;
1096 default:
1097 header_state = h_general;
1098 break;
1099 }
1100 break;
1101
1102 /* connection */
1103
1104 case h_matching_connection:
1105 index++;
1106 if (index > sizeof(CONNECTION)-1
1107 || c != CONNECTION[index]) {
1108 header_state = h_general;
1109 } else if (index == sizeof(CONNECTION)-2) {
1110 header_state = h_connection;
1111 }
1112 break;
1113
1114 /* proxy-connection */
1115
1116 case h_matching_proxy_connection:
1117 index++;
1118 if (index > sizeof(PROXY_CONNECTION)-1
1119 || c != PROXY_CONNECTION[index]) {
1120 header_state = h_general;
1121 } else if (index == sizeof(PROXY_CONNECTION)-2) {
1122 header_state = h_connection;
1123 }
1124 break;
1125
1126 /* content-length */
1127
1128 case h_matching_content_length:
1129 index++;
1130 if (index > sizeof(CONTENT_LENGTH)-1
1131 || c != CONTENT_LENGTH[index]) {
1132 header_state = h_general;
1133 } else if (index == sizeof(CONTENT_LENGTH)-2) {
1134 header_state = h_content_length;
1135 }
1136 break;
1137
1138 /* transfer-encoding */
1139
1140 case h_matching_transfer_encoding:
1141 index++;
1142 if (index > sizeof(TRANSFER_ENCODING)-1
1143 || c != TRANSFER_ENCODING[index]) {
1144 header_state = h_general;
1145 } else if (index == sizeof(TRANSFER_ENCODING)-2) {
1146 header_state = h_transfer_encoding;
1147 }
1148 break;
1149
1150 /* upgrade */
1151
1152 case h_matching_upgrade:
1153 index++;
1154 if (index > sizeof(UPGRADE)-1
1155 || c != UPGRADE[index]) {
1156 header_state = h_general;
1157 } else if (index == sizeof(UPGRADE)-2) {
1158 header_state = h_upgrade;
1159 }
1160 break;
1161
1162 case h_connection:
1163 case h_content_length:
1164 case h_transfer_encoding:
1165 case h_upgrade:
1166 if (ch != ' ') header_state = h_general;
1167 break;
1168
1169 default:
1170 assert(0 && "Unknown header_state");
1171 break;
1172 }
1173 break;
1174 }
1175
1176 if (ch == ':') {
1177 CALLBACK(header_field);
1178 state = s_header_value_start;
1179 break;
1180 }
1181
1182 if (ch == CR) {
1183 state = s_header_almost_done;
1184 CALLBACK(header_field);
1185 break;
1186 }
1187
1188 if (ch == LF) {
1189 CALLBACK(header_field);
1190 state = s_header_field_start;
1191 break;
1192 }
1193
1194 goto error;
1195 }
1196
1197 case s_header_value_start:
1198 {
1199 if (ch == ' ') break;
1200
1201 MARK(header_value);
1202
1203 state = s_header_value;
1204 index = 0;
1205
1206 c = LOWER(ch);
1207
1208 if (ch == CR) {
1209 CALLBACK(header_value);
1210 header_state = h_general;
1211 state = s_header_almost_done;
1212 break;
1213 }
1214
1215 if (ch == LF) {
1216 CALLBACK(header_value);
1217 state = s_header_field_start;
1218 break;
1219 }
1220
1221 switch (header_state) {
1222 case h_upgrade:
1223 parser->flags |= F_UPGRADE;
1224 header_state = h_general;
1225 break;
1226
1227 case h_transfer_encoding:
1228 /* looking for 'Transfer-Encoding: chunked' */
1229 if ('c' == c) {
1230 header_state = h_matching_transfer_encoding_chunked;
1231 } else {
1232 header_state = h_general;
1233 }
1234 break;
1235
1236 case h_content_length:
1237 if (ch < '0' || ch > '9') goto error;
1238 parser->content_length = ch - '0';
1239 break;
1240
1241 case h_connection:
1242 /* looking for 'Connection: keep-alive' */
1243 if (c == 'k') {
1244 header_state = h_matching_connection_keep_alive;
1245 /* looking for 'Connection: close' */
1246 } else if (c == 'c') {
1247 header_state = h_matching_connection_close;
1248 } else {
1249 header_state = h_general;
1250 }
1251 break;
1252
1253 default:
1254 header_state = h_general;
1255 break;
1256 }
1257 break;
1258 }
1259
1260 case s_header_value:
1261 {
1262 c = LOWER(ch);
1263
1264 if (ch == CR) {
1265 CALLBACK(header_value);
1266 state = s_header_almost_done;
1267 break;
1268 }
1269
1270 if (ch == LF) {
1271 CALLBACK(header_value);
1272 goto header_almost_done;
1273 }
1274
1275 switch (header_state) {
1276 case h_general:
1277 break;
1278
1279 case h_connection:
1280 case h_transfer_encoding:
1281 assert(0 && "Shouldn't get here.");
1282 break;
1283
1284 case h_content_length:
1285 if (ch == ' ') break;
1286 if (ch < '0' || ch > '9') goto error;
1287 parser->content_length *= 10;
1288 parser->content_length += ch - '0';
1289 break;
1290
1291 /* Transfer-Encoding: chunked */
1292 case h_matching_transfer_encoding_chunked:
1293 index++;
1294 if (index > sizeof(CHUNKED)-1
1295 || c != CHUNKED[index]) {
1296 header_state = h_general;
1297 } else if (index == sizeof(CHUNKED)-2) {
1298 header_state = h_transfer_encoding_chunked;
1299 }
1300 break;
1301
1302 /* looking for 'Connection: keep-alive' */
1303 case h_matching_connection_keep_alive:
1304 index++;
1305 if (index > sizeof(KEEP_ALIVE)-1
1306 || c != KEEP_ALIVE[index]) {
1307 header_state = h_general;
1308 } else if (index == sizeof(KEEP_ALIVE)-2) {
1309 header_state = h_connection_keep_alive;
1310 }
1311 break;
1312
1313 /* looking for 'Connection: close' */
1314 case h_matching_connection_close:
1315 index++;
1316 if (index > sizeof(CLOSE)-1 || c != CLOSE[index]) {
1317 header_state = h_general;
1318 } else if (index == sizeof(CLOSE)-2) {
1319 header_state = h_connection_close;
1320 }
1321 break;
1322
1323 case h_transfer_encoding_chunked:
1324 case h_connection_keep_alive:
1325 case h_connection_close:
1326 if (ch != ' ') header_state = h_general;
1327 break;
1328
1329 default:
1330 state = s_header_value;
1331 header_state = h_general;
1332 break;
1333 }
1334 break;
1335 }
1336
1337 case s_header_almost_done:
1338 header_almost_done:
1339 {
1340 STRICT_CHECK(ch != LF);
1341
1342 state = s_header_field_start;
1343
1344 switch (header_state) {
1345 case h_connection_keep_alive:
1346 parser->flags |= F_CONNECTION_KEEP_ALIVE;
1347 break;
1348 case h_connection_close:
1349 parser->flags |= F_CONNECTION_CLOSE;
1350 break;
1351 case h_transfer_encoding_chunked:
1352 parser->flags |= F_CHUNKED;
1353 break;
1354 default:
1355 break;
1356 }
1357 break;
1358 }
1359
1360 case s_headers_almost_done:
1361 headers_almost_done:
1362 {
1363 STRICT_CHECK(ch != LF);
1364
1365 if (parser->flags & F_TRAILING) {
1366 /* End of a chunked request */
1367 CALLBACK2(message_complete);
1368 state = NEW_MESSAGE();
1369 break;
1370 }
1371
1372 nread = 0;
1373
1374 if (parser->flags & F_UPGRADE || parser->method == PHP_HTTP_CONNECT) {
1375 parser->upgrade = 1;
1376 }
1377
1378 /* Here we call the headers_complete callback. This is somewhat
1379 * different than other callbacks because if the user returns 1, we
1380 * will interpret that as saying that this message has no body. This
1381 * is needed for the annoying case of recieving a response to a HEAD
1382 * request.
1383 */
1384 if (settings->on_headers_complete) {
1385 switch (settings->on_headers_complete(parser)) {
1386 case 0:
1387 break;
1388
1389 case 1:
1390 parser->flags |= F_SKIPBODY;
1391 break;
1392
1393 default:
1394 return p - data; /* Error */
1395 }
1396 }
1397
1398 /* Exit, the rest of the connect is in a different protocol. */
1399 if (parser->upgrade) {
1400 CALLBACK2(message_complete);
1401 return (p - data);
1402 }
1403
1404 if (parser->flags & F_SKIPBODY) {
1405 CALLBACK2(message_complete);
1406 state = NEW_MESSAGE();
1407 } else if (parser->flags & F_CHUNKED) {
1408 /* chunked encoding - ignore Content-Length header */
1409 state = s_chunk_size_start;
1410 } else {
1411 if (parser->content_length == 0) {
1412 /* Content-Length header given but zero: Content-Length: 0\r\n */
1413 CALLBACK2(message_complete);
1414 state = NEW_MESSAGE();
1415 } else if (parser->content_length > 0) {
1416 /* Content-Length header given and non-zero */
1417 state = s_body_identity;
1418 } else {
1419 if (parser->type == PHP_HTTP_REQUEST || php_http_should_keep_alive(parser)) {
1420 /* Assume content-length 0 - read the next */
1421 CALLBACK2(message_complete);
1422 state = NEW_MESSAGE();
1423 } else {
1424 /* Read body until EOF */
1425 state = s_body_identity_eof;
1426 }
1427 }
1428 }
1429
1430 break;
1431 }
1432
1433 case s_body_identity:
1434 to_read = MIN(pe - p, (size_t)parser->content_length);
1435 if (to_read > 0) {
1436 if (settings->on_body) settings->on_body(parser, p, to_read);
1437 p += to_read - 1;
1438 parser->content_length -= to_read;
1439 if (parser->content_length == 0) {
1440 CALLBACK2(message_complete);
1441 state = NEW_MESSAGE();
1442 }
1443 }
1444 break;
1445
1446 /* read until EOF */
1447 case s_body_identity_eof:
1448 to_read = pe - p;
1449 if (to_read > 0) {
1450 if (settings->on_body) settings->on_body(parser, p, to_read);
1451 p += to_read - 1;
1452 }
1453 break;
1454
1455 case s_chunk_size_start:
1456 {
1457 assert(parser->flags & F_CHUNKED);
1458
1459 c = unhex[(unsigned char)ch];
1460 if (c == -1) goto error;
1461 parser->content_length = c;
1462 state = s_chunk_size;
1463 break;
1464 }
1465
1466 case s_chunk_size:
1467 {
1468 assert(parser->flags & F_CHUNKED);
1469
1470 if (ch == CR) {
1471 state = s_chunk_size_almost_done;
1472 break;
1473 }
1474
1475 c = unhex[(unsigned char)ch];
1476
1477 if (c == -1) {
1478 if (ch == ';' || ch == ' ') {
1479 state = s_chunk_parameters;
1480 break;
1481 }
1482 goto error;
1483 }
1484
1485 parser->content_length *= 16;
1486 parser->content_length += c;
1487 break;
1488 }
1489
1490 case s_chunk_parameters:
1491 {
1492 assert(parser->flags & F_CHUNKED);
1493 /* just ignore this shit. TODO check for overflow */
1494 if (ch == CR) {
1495 state = s_chunk_size_almost_done;
1496 break;
1497 }
1498 break;
1499 }
1500
1501 case s_chunk_size_almost_done:
1502 {
1503 assert(parser->flags & F_CHUNKED);
1504 STRICT_CHECK(ch != LF);
1505
1506 if (parser->content_length == 0) {
1507 parser->flags |= F_TRAILING;
1508 state = s_header_field_start;
1509 } else {
1510 state = s_chunk_data;
1511 }
1512 break;
1513 }
1514
1515 case s_chunk_data:
1516 {
1517 assert(parser->flags & F_CHUNKED);
1518
1519 to_read = MIN(pe - p, (size_t)(parser->content_length));
1520
1521 if (to_read > 0) {
1522 if (settings->on_body) settings->on_body(parser, p, to_read);
1523 p += to_read - 1;
1524 }
1525
1526 if (to_read == parser->content_length) {
1527 state = s_chunk_data_almost_done;
1528 }
1529
1530 parser->content_length -= to_read;
1531 break;
1532 }
1533
1534 case s_chunk_data_almost_done:
1535 assert(parser->flags & F_CHUNKED);
1536 STRICT_CHECK(ch != CR);
1537 state = s_chunk_data_done;
1538 break;
1539
1540 case s_chunk_data_done:
1541 assert(parser->flags & F_CHUNKED);
1542 STRICT_CHECK(ch != LF);
1543 state = s_chunk_size_start;
1544 break;
1545
1546 default:
1547 assert(0 && "unhandled state");
1548 goto error;
1549 }
1550 }
1551
1552 CALLBACK_NOCLEAR(header_field);
1553 CALLBACK_NOCLEAR(header_value);
1554 CALLBACK_NOCLEAR(fragment);
1555 CALLBACK_NOCLEAR(query_string);
1556 CALLBACK_NOCLEAR(path);
1557 CALLBACK_NOCLEAR(url);
1558
1559 parser->state = state;
1560 parser->header_state = header_state;
1561 parser->index = index;
1562 parser->nread = nread;
1563
1564 return len;
1565
1566 error:
1567 parser->state = s_dead;
1568 return (p - data);
1569 }
1570
1571
1572 int
php_http_should_keep_alive(php_http_parser * parser)1573 php_http_should_keep_alive (php_http_parser *parser)
1574 {
1575 if (parser->http_major > 0 && parser->http_minor > 0) {
1576 /* HTTP/1.1 */
1577 if (parser->flags & F_CONNECTION_CLOSE) {
1578 return 0;
1579 } else {
1580 return 1;
1581 }
1582 } else {
1583 /* HTTP/1.0 or earlier */
1584 if (parser->flags & F_CONNECTION_KEEP_ALIVE) {
1585 return 1;
1586 } else {
1587 return 0;
1588 }
1589 }
1590 }
1591
1592
php_http_method_str(enum php_http_method m)1593 const char * php_http_method_str (enum php_http_method m)
1594 {
1595 return method_strings[m];
1596 }
1597
1598
1599 void
php_http_parser_init(php_http_parser * parser,enum php_http_parser_type t)1600 php_http_parser_init (php_http_parser *parser, enum php_http_parser_type t)
1601 {
1602 parser->type = t;
1603 parser->state = (t == PHP_HTTP_REQUEST ? s_start_req : (t == PHP_HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
1604 parser->nread = 0;
1605 parser->upgrade = 0;
1606 parser->flags = 0;
1607 parser->method = 0;
1608 }
1609