xref: /PHP-8.0/ext/standard/pack.c (revision c2a2d243)
1 /*
2    +----------------------------------------------------------------------+
3    | Copyright (c) The PHP Group                                          |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | http://www.php.net/license/3_01.txt                                  |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13    | Author: Chris Schneider <cschneid@relog.ch>                          |
14    +----------------------------------------------------------------------+
15  */
16 
17 #include "php.h"
18 
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <errno.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <fcntl.h>
25 #ifdef PHP_WIN32
26 #define O_RDONLY _O_RDONLY
27 #include "win32/param.h"
28 #else
29 #include <sys/param.h>
30 #endif
31 #include "ext/standard/head.h"
32 #include "php_string.h"
33 #include "pack.h"
34 #if HAVE_PWD_H
35 #ifdef PHP_WIN32
36 #include "win32/pwd.h"
37 #else
38 #include <pwd.h>
39 #endif
40 #endif
41 #include "fsock.h"
42 #if HAVE_NETINET_IN_H
43 #include <netinet/in.h>
44 #endif
45 
46 #define INC_OUTPUTPOS(a,b) \
47 	if ((a) < 0 || ((INT_MAX - outputpos)/((int)b)) < (a)) { \
48 		efree(formatcodes);	\
49 		efree(formatargs);	\
50 		zend_value_error("Type %c: integer overflow in format string", code); \
51 		RETURN_THROWS(); \
52 	} \
53 	outputpos += (a)*(b);
54 
55 /* Whether machine is little endian */
56 char machine_little_endian;
57 
58 /* Mapping of byte from char (8bit) to long for machine endian */
59 static int byte_map[1];
60 
61 /* Mappings of bytes from int (machine dependent) to int for machine endian */
62 static int int_map[sizeof(int)];
63 
64 /* Mappings of bytes from shorts (16bit) for all endian environments */
65 static int machine_endian_short_map[2];
66 static int big_endian_short_map[2];
67 static int little_endian_short_map[2];
68 
69 /* Mappings of bytes from longs (32bit) for all endian environments */
70 static int machine_endian_long_map[4];
71 static int big_endian_long_map[4];
72 static int little_endian_long_map[4];
73 
74 #if SIZEOF_ZEND_LONG > 4
75 /* Mappings of bytes from quads (64bit) for all endian environments */
76 static int machine_endian_longlong_map[8];
77 static int big_endian_longlong_map[8];
78 static int little_endian_longlong_map[8];
79 #endif
80 
81 /* {{{ php_pack */
php_pack(zval * val,size_t size,int * map,char * output)82 static void php_pack(zval *val, size_t size, int *map, char *output)
83 {
84 	size_t i;
85 	char *v;
86 
87 	convert_to_long_ex(val);
88 	v = (char *) &Z_LVAL_P(val);
89 
90 	for (i = 0; i < size; i++) {
91 		*output++ = v[map[i]];
92 	}
93 }
94 /* }}} */
95 
96 /* {{{ php_pack_reverse_int32 */
php_pack_reverse_int32(uint32_t arg)97 static inline uint32_t php_pack_reverse_int32(uint32_t arg)
98 {
99     uint32_t result;
100     result = ((arg & 0xFF) << 24) | ((arg & 0xFF00) << 8) | ((arg >> 8) & 0xFF00) | ((arg >> 24) & 0xFF);
101 
102 	return result;
103 }
104 /* }}} */
105 
106 /* {{{ php_pack */
php_pack_reverse_int64(uint64_t arg)107 static inline uint64_t php_pack_reverse_int64(uint64_t arg)
108 {
109 	union Swap64 {
110 		uint64_t i;
111 		uint32_t ul[2];
112 	} tmp, result;
113 	tmp.i = arg;
114 	result.ul[0] = php_pack_reverse_int32(tmp.ul[1]);
115 	result.ul[1] = php_pack_reverse_int32(tmp.ul[0]);
116 
117 	return result.i;
118 }
119 /* }}} */
120 
121 /* {{{ php_pack_copy_float */
php_pack_copy_float(int is_little_endian,void * dst,float f)122 static void php_pack_copy_float(int is_little_endian, void * dst, float f)
123 {
124 	union Copy32 {
125 		float f;
126 		uint32_t i;
127 	} m;
128 	m.f = f;
129 
130 #ifdef WORDS_BIGENDIAN
131 	if (is_little_endian) {
132 		m.i = php_pack_reverse_int32(m.i);
133 	}
134 #else /* WORDS_BIGENDIAN */
135 	if (!is_little_endian) {
136 		m.i = php_pack_reverse_int32(m.i);
137 	}
138 #endif /* WORDS_BIGENDIAN */
139 
140 	memcpy(dst, &m.f, sizeof(float));
141 }
142 /* }}} */
143 
144 /* {{{ php_pack_copy_double */
php_pack_copy_double(int is_little_endian,void * dst,double d)145 static void php_pack_copy_double(int is_little_endian, void * dst, double d)
146 {
147 	union Copy64 {
148 		double d;
149 		uint64_t i;
150 	} m;
151 	m.d = d;
152 
153 #ifdef WORDS_BIGENDIAN
154 	if (is_little_endian) {
155 		m.i = php_pack_reverse_int64(m.i);
156 	}
157 #else /* WORDS_BIGENDIAN */
158 	if (!is_little_endian) {
159 		m.i = php_pack_reverse_int64(m.i);
160 	}
161 #endif /* WORDS_BIGENDIAN */
162 
163 	memcpy(dst, &m.d, sizeof(double));
164 }
165 /* }}} */
166 
167 /* {{{ php_pack_parse_float */
php_pack_parse_float(int is_little_endian,void * src)168 static float php_pack_parse_float(int is_little_endian, void * src)
169 {
170 	union Copy32 {
171 		float f;
172 		uint32_t i;
173 	} m;
174 	memcpy(&m.i, src, sizeof(float));
175 
176 #ifdef WORDS_BIGENDIAN
177 	if (is_little_endian) {
178 		m.i = php_pack_reverse_int32(m.i);
179 	}
180 #else /* WORDS_BIGENDIAN */
181 	if (!is_little_endian) {
182 		m.i = php_pack_reverse_int32(m.i);
183 	}
184 #endif /* WORDS_BIGENDIAN */
185 
186 	return m.f;
187 }
188 /* }}} */
189 
190 /* {{{ php_pack_parse_double */
php_pack_parse_double(int is_little_endian,void * src)191 static double php_pack_parse_double(int is_little_endian, void * src)
192 {
193 	union Copy64 {
194 		double d;
195 		uint64_t i;
196 	} m;
197 	memcpy(&m.i, src, sizeof(double));
198 
199 #ifdef WORDS_BIGENDIAN
200 	if (is_little_endian) {
201 		m.i = php_pack_reverse_int64(m.i);
202 	}
203 #else /* WORDS_BIGENDIAN */
204 	if (!is_little_endian) {
205 		m.i = php_pack_reverse_int64(m.i);
206 	}
207 #endif /* WORDS_BIGENDIAN */
208 
209 	return m.d;
210 }
211 /* }}} */
212 
213 /* pack() idea stolen from Perl (implemented formats behave the same as there except J and P)
214  * Implemented formats are Z, A, a, h, H, c, C, s, S, i, I, l, L, n, N, q, Q, J, P, f, d, x, X, @.
215  * Added g, G for little endian float and big endian float, added e, E for little endian double and big endian double.
216  */
217 /* {{{ Takes one or more arguments and packs them into a binary string according to the format argument */
PHP_FUNCTION(pack)218 PHP_FUNCTION(pack)
219 {
220 	zval *argv = NULL;
221 	int num_args = 0;
222 	size_t i;
223 	int currentarg;
224 	char *format;
225 	size_t formatlen;
226 	char *formatcodes;
227 	int *formatargs;
228 	size_t formatcount = 0;
229 	int outputpos = 0, outputsize = 0;
230 	zend_string *output;
231 
232 	ZEND_PARSE_PARAMETERS_START(1, -1)
233 		Z_PARAM_STRING(format, formatlen)
234 		Z_PARAM_VARIADIC('*', argv, num_args)
235 	ZEND_PARSE_PARAMETERS_END();
236 
237 	/* We have a maximum of <formatlen> format codes to deal with */
238 	formatcodes = safe_emalloc(formatlen, sizeof(*formatcodes), 0);
239 	formatargs = safe_emalloc(formatlen, sizeof(*formatargs), 0);
240 	currentarg = 0;
241 
242 	/* Preprocess format into formatcodes and formatargs */
243 	for (i = 0; i < formatlen; formatcount++) {
244 		char code = format[i++];
245 		int arg = 1;
246 
247 		/* Handle format arguments if any */
248 		if (i < formatlen) {
249 			char c = format[i];
250 
251 			if (c == '*') {
252 				arg = -1;
253 				i++;
254 			}
255 			else if (c >= '0' && c <= '9') {
256 				arg = atoi(&format[i]);
257 
258 				while (format[i] >= '0' && format[i] <= '9' && i < formatlen) {
259 					i++;
260 				}
261 			}
262 		}
263 
264 		/* Handle special arg '*' for all codes and check argv overflows */
265 		switch ((int) code) {
266 			/* Never uses any args */
267 			case 'x':
268 			case 'X':
269 			case '@':
270 				if (arg < 0) {
271 					php_error_docref(NULL, E_WARNING, "Type %c: '*' ignored", code);
272 					arg = 1;
273 				}
274 				break;
275 
276 			/* Always uses one arg */
277 			case 'a':
278 			case 'A':
279 			case 'Z':
280 			case 'h':
281 			case 'H':
282 				if (currentarg >= num_args) {
283 					efree(formatcodes);
284 					efree(formatargs);
285 					zend_value_error("Type %c: not enough arguments", code);
286 					RETURN_THROWS();
287 				}
288 
289 				if (arg < 0) {
290 					if (!try_convert_to_string(&argv[currentarg])) {
291 						efree(formatcodes);
292 						efree(formatargs);
293 						RETURN_THROWS();
294 					}
295 
296 					arg = Z_STRLEN(argv[currentarg]);
297 					if (code == 'Z') {
298 						/* add one because Z is always NUL-terminated:
299 						 * pack("Z*", "aa") === "aa\0"
300 						 * pack("Z2", "aa") === "a\0" */
301 						arg++;
302 					}
303 				}
304 
305 				currentarg++;
306 				break;
307 
308 			/* Use as many args as specified */
309 			case 'q':
310 			case 'Q':
311 			case 'J':
312 			case 'P':
313 #if SIZEOF_ZEND_LONG < 8
314 					efree(formatcodes);
315 					efree(formatargs);
316 					zend_value_error("64-bit format codes are not available for 32-bit versions of PHP");
317 					RETURN_THROWS();
318 #endif
319 			case 'c':
320 			case 'C':
321 			case 's':
322 			case 'S':
323 			case 'i':
324 			case 'I':
325 			case 'l':
326 			case 'L':
327 			case 'n':
328 			case 'N':
329 			case 'v':
330 			case 'V':
331 			case 'f': /* float */
332 			case 'g': /* little endian float */
333 			case 'G': /* big endian float */
334 			case 'd': /* double */
335 			case 'e': /* little endian double */
336 			case 'E': /* big endian double */
337 				if (arg < 0) {
338 					arg = num_args - currentarg;
339 				}
340 				if (currentarg > INT_MAX - arg) {
341 					goto too_few_args;
342 				}
343 				currentarg += arg;
344 
345 				if (currentarg > num_args) {
346 too_few_args:
347 					efree(formatcodes);
348 					efree(formatargs);
349 					zend_value_error("Type %c: too few arguments", code);
350 					RETURN_THROWS();
351 				}
352 				break;
353 
354 			default:
355 				efree(formatcodes);
356 				efree(formatargs);
357 				zend_value_error("Type %c: unknown format code", code);
358 				RETURN_THROWS();
359 		}
360 
361 		formatcodes[formatcount] = code;
362 		formatargs[formatcount] = arg;
363 	}
364 
365 	if (currentarg < num_args) {
366 		php_error_docref(NULL, E_WARNING, "%d arguments unused", (num_args - currentarg));
367 	}
368 
369 	/* Calculate output length and upper bound while processing*/
370 	for (i = 0; i < formatcount; i++) {
371 	    int code = (int) formatcodes[i];
372 		int arg = formatargs[i];
373 
374 		switch ((int) code) {
375 			case 'h':
376 			case 'H':
377 				INC_OUTPUTPOS((arg + (arg % 2)) / 2,1)	/* 4 bit per arg */
378 				break;
379 
380 			case 'a':
381 			case 'A':
382 			case 'Z':
383 			case 'c':
384 			case 'C':
385 			case 'x':
386 				INC_OUTPUTPOS(arg,1)		/* 8 bit per arg */
387 				break;
388 
389 			case 's':
390 			case 'S':
391 			case 'n':
392 			case 'v':
393 				INC_OUTPUTPOS(arg,2)		/* 16 bit per arg */
394 				break;
395 
396 			case 'i':
397 			case 'I':
398 				INC_OUTPUTPOS(arg,sizeof(int))
399 				break;
400 
401 			case 'l':
402 			case 'L':
403 			case 'N':
404 			case 'V':
405 				INC_OUTPUTPOS(arg,4)		/* 32 bit per arg */
406 				break;
407 
408 #if SIZEOF_ZEND_LONG > 4
409 			case 'q':
410 			case 'Q':
411 			case 'J':
412 			case 'P':
413 				INC_OUTPUTPOS(arg,8)		/* 32 bit per arg */
414 				break;
415 #endif
416 
417 			case 'f': /* float */
418 			case 'g': /* little endian float */
419 			case 'G': /* big endian float */
420 				INC_OUTPUTPOS(arg,sizeof(float))
421 				break;
422 
423 			case 'd': /* double */
424 			case 'e': /* little endian double */
425 			case 'E': /* big endian double */
426 				INC_OUTPUTPOS(arg,sizeof(double))
427 				break;
428 
429 			case 'X':
430 				outputpos -= arg;
431 
432 				if (outputpos < 0) {
433 					php_error_docref(NULL, E_WARNING, "Type %c: outside of string", code);
434 					outputpos = 0;
435 				}
436 				break;
437 
438 			case '@':
439 				outputpos = arg;
440 				break;
441 		}
442 
443 		if (outputsize < outputpos) {
444 			outputsize = outputpos;
445 		}
446 	}
447 
448 	output = zend_string_alloc(outputsize, 0);
449 	outputpos = 0;
450 	currentarg = 0;
451 
452 	/* Do actual packing */
453 	for (i = 0; i < formatcount; i++) {
454 	    int code = (int) formatcodes[i];
455 		int arg = formatargs[i];
456 
457 		switch ((int) code) {
458 			case 'a':
459 			case 'A':
460 			case 'Z': {
461 				size_t arg_cp = (code != 'Z') ? arg : MAX(0, arg - 1);
462 				zend_string *tmp_str;
463 				zend_string *str = zval_get_tmp_string(&argv[currentarg++], &tmp_str);
464 
465 				memset(&ZSTR_VAL(output)[outputpos], (code == 'a' || code == 'Z') ? '\0' : ' ', arg);
466 				memcpy(&ZSTR_VAL(output)[outputpos], ZSTR_VAL(str),
467 					   (ZSTR_LEN(str) < arg_cp) ? ZSTR_LEN(str) : arg_cp);
468 
469 				outputpos += arg;
470 				zend_tmp_string_release(tmp_str);
471 				break;
472 			}
473 
474 			case 'h':
475 			case 'H': {
476 				int nibbleshift = (code == 'h') ? 0 : 4;
477 				int first = 1;
478 				zend_string *tmp_str;
479 				zend_string *str = zval_get_tmp_string(&argv[currentarg++], &tmp_str);
480 				char *v = ZSTR_VAL(str);
481 
482 				outputpos--;
483 				if ((size_t)arg > ZSTR_LEN(str)) {
484 					php_error_docref(NULL, E_WARNING, "Type %c: not enough characters in string", code);
485 					arg = ZSTR_LEN(str);
486 				}
487 
488 				while (arg-- > 0) {
489 					char n = *v++;
490 
491 					if (n >= '0' && n <= '9') {
492 						n -= '0';
493 					} else if (n >= 'A' && n <= 'F') {
494 						n -= ('A' - 10);
495 					} else if (n >= 'a' && n <= 'f') {
496 						n -= ('a' - 10);
497 					} else {
498 						php_error_docref(NULL, E_WARNING, "Type %c: illegal hex digit %c", code, n);
499 						n = 0;
500 					}
501 
502 					if (first--) {
503 						ZSTR_VAL(output)[++outputpos] = 0;
504 					} else {
505 					  first = 1;
506 					}
507 
508 					ZSTR_VAL(output)[outputpos] |= (n << nibbleshift);
509 					nibbleshift = (nibbleshift + 4) & 7;
510 				}
511 
512 				outputpos++;
513 				zend_tmp_string_release(tmp_str);
514 				break;
515 			}
516 
517 			case 'c':
518 			case 'C':
519 				while (arg-- > 0) {
520 					php_pack(&argv[currentarg++], 1, byte_map, &ZSTR_VAL(output)[outputpos]);
521 					outputpos++;
522 				}
523 				break;
524 
525 			case 's':
526 			case 'S':
527 			case 'n':
528 			case 'v': {
529 				int *map = machine_endian_short_map;
530 
531 				if (code == 'n') {
532 					map = big_endian_short_map;
533 				} else if (code == 'v') {
534 					map = little_endian_short_map;
535 				}
536 
537 				while (arg-- > 0) {
538 					php_pack(&argv[currentarg++], 2, map, &ZSTR_VAL(output)[outputpos]);
539 					outputpos += 2;
540 				}
541 				break;
542 			}
543 
544 			case 'i':
545 			case 'I':
546 				while (arg-- > 0) {
547 					php_pack(&argv[currentarg++], sizeof(int), int_map, &ZSTR_VAL(output)[outputpos]);
548 					outputpos += sizeof(int);
549 				}
550 				break;
551 
552 			case 'l':
553 			case 'L':
554 			case 'N':
555 			case 'V': {
556 				int *map = machine_endian_long_map;
557 
558 				if (code == 'N') {
559 					map = big_endian_long_map;
560 				} else if (code == 'V') {
561 					map = little_endian_long_map;
562 				}
563 
564 				while (arg-- > 0) {
565 					php_pack(&argv[currentarg++], 4, map, &ZSTR_VAL(output)[outputpos]);
566 					outputpos += 4;
567 				}
568 				break;
569 			}
570 
571 #if SIZEOF_ZEND_LONG > 4
572 			case 'q':
573 			case 'Q':
574 			case 'J':
575 			case 'P': {
576 				int *map = machine_endian_longlong_map;
577 
578 				if (code == 'J') {
579 					map = big_endian_longlong_map;
580 				} else if (code == 'P') {
581 					map = little_endian_longlong_map;
582 				}
583 
584 				while (arg-- > 0) {
585 					php_pack(&argv[currentarg++], 8, map, &ZSTR_VAL(output)[outputpos]);
586 					outputpos += 8;
587 				}
588 				break;
589 			}
590 #endif
591 
592 			case 'f': {
593 				while (arg-- > 0) {
594 					float v = (float) zval_get_double(&argv[currentarg++]);
595 					memcpy(&ZSTR_VAL(output)[outputpos], &v, sizeof(v));
596 					outputpos += sizeof(v);
597 				}
598 				break;
599 			}
600 
601 			case 'g': {
602 				/* pack little endian float */
603 				while (arg-- > 0) {
604 					float v = (float) zval_get_double(&argv[currentarg++]);
605 					php_pack_copy_float(1, &ZSTR_VAL(output)[outputpos], v);
606 					outputpos += sizeof(v);
607 				}
608 
609 				break;
610 			}
611 			case 'G': {
612 				/* pack big endian float */
613 				while (arg-- > 0) {
614 					float v = (float) zval_get_double(&argv[currentarg++]);
615 					php_pack_copy_float(0, &ZSTR_VAL(output)[outputpos], v);
616 					outputpos += sizeof(v);
617 				}
618 				break;
619 			}
620 
621 			case 'd': {
622 				while (arg-- > 0) {
623 					double v = (double) zval_get_double(&argv[currentarg++]);
624 					memcpy(&ZSTR_VAL(output)[outputpos], &v, sizeof(v));
625 					outputpos += sizeof(v);
626 				}
627 				break;
628 			}
629 
630 			case 'e': {
631 				/* pack little endian double */
632 				while (arg-- > 0) {
633 					double v = (double) zval_get_double(&argv[currentarg++]);
634 					php_pack_copy_double(1, &ZSTR_VAL(output)[outputpos], v);
635 					outputpos += sizeof(v);
636 				}
637 				break;
638 			}
639 
640 			case 'E': {
641 				/* pack big endian double */
642 				while (arg-- > 0) {
643 					double v = (double) zval_get_double(&argv[currentarg++]);
644 					php_pack_copy_double(0, &ZSTR_VAL(output)[outputpos], v);
645 					outputpos += sizeof(v);
646 				}
647 				break;
648 			}
649 
650 			case 'x':
651 				memset(&ZSTR_VAL(output)[outputpos], '\0', arg);
652 				outputpos += arg;
653 				break;
654 
655 			case 'X':
656 				outputpos -= arg;
657 
658 				if (outputpos < 0) {
659 					outputpos = 0;
660 				}
661 				break;
662 
663 			case '@':
664 				if (arg > outputpos) {
665 					memset(&ZSTR_VAL(output)[outputpos], '\0', arg - outputpos);
666 				}
667 				outputpos = arg;
668 				break;
669 		}
670 	}
671 
672 	efree(formatcodes);
673 	efree(formatargs);
674 	ZSTR_VAL(output)[outputpos] = '\0';
675 	ZSTR_LEN(output) = outputpos;
676 	RETURN_NEW_STR(output);
677 }
678 /* }}} */
679 
680 /* {{{ php_unpack */
php_unpack(char * data,size_t size,int issigned,int * map)681 static zend_long php_unpack(char *data, size_t size, int issigned, int *map)
682 {
683 	zend_long result;
684 	char *cresult = (char *) &result;
685 	size_t i;
686 
687 	result = issigned ? -1 : 0;
688 
689 	for (i = 0; i < size; i++) {
690 		cresult[map[i]] = *data++;
691 	}
692 
693 	return result;
694 }
695 /* }}} */
696 
697 /* unpack() is based on Perl's unpack(), but is modified a bit from there.
698  * Rather than depending on error-prone ordered lists or syntactically
699  * unpleasant pass-by-reference, we return an object with named parameters
700  * (like *_fetch_object()). Syntax is "f[repeat]name/...", where "f" is the
701  * formatter char (like pack()), "[repeat]" is the optional repeater argument,
702  * and "name" is the name of the variable to use.
703  * Example: "c2chars/nints" will return an object with fields
704  * chars1, chars2, and ints.
705  * Numeric pack types will return numbers, a and A will return strings,
706  * f and d will return doubles.
707  * Implemented formats are Z, A, a, h, H, c, C, s, S, i, I, l, L, n, N, q, Q, J, P, f, d, x, X, @.
708  * Added g, G for little endian float and big endian float, added e, E for little endian double and big endian double.
709  */
710 /* {{{ Unpack binary string into named array elements according to format argument */
PHP_FUNCTION(unpack)711 PHP_FUNCTION(unpack)
712 {
713 	char *format, *input;
714 	zend_string *formatarg, *inputarg;
715 	zend_long formatlen, inputpos, inputlen;
716 	int i;
717 	zend_long offset = 0;
718 
719 	ZEND_PARSE_PARAMETERS_START(2, 3)
720 		Z_PARAM_STR(formatarg)
721 		Z_PARAM_STR(inputarg)
722 		Z_PARAM_OPTIONAL
723 		Z_PARAM_LONG(offset)
724 	ZEND_PARSE_PARAMETERS_END();
725 
726 	format = ZSTR_VAL(formatarg);
727 	formatlen = ZSTR_LEN(formatarg);
728 	input = ZSTR_VAL(inputarg);
729 	inputlen = ZSTR_LEN(inputarg);
730 	inputpos = 0;
731 
732 
733 	if (offset < 0 || offset > inputlen) {
734 		zend_argument_value_error(3, "must be contained in argument #2 ($data)");
735 		RETURN_THROWS();
736 	}
737 
738 	input += offset;
739 	inputlen -= offset;
740 
741 	array_init(return_value);
742 
743 	while (formatlen-- > 0) {
744 		char type = *(format++);
745 		char c;
746 		int arg = 1, argb;
747 		char *name;
748 		int namelen;
749 		int size=0;
750 
751 		/* Handle format arguments if any */
752 		if (formatlen > 0) {
753 			c = *format;
754 
755 			if (c >= '0' && c <= '9') {
756 				arg = atoi(format);
757 
758 				while (formatlen > 0 && *format >= '0' && *format <= '9') {
759 					format++;
760 					formatlen--;
761 				}
762 			} else if (c == '*') {
763 				arg = -1;
764 				format++;
765 				formatlen--;
766 			}
767 		}
768 
769 		/* Get of new value in array */
770 		name = format;
771 		argb = arg;
772 
773 		while (formatlen > 0 && *format != '/') {
774 			formatlen--;
775 			format++;
776 		}
777 
778 		namelen = format - name;
779 
780 		if (namelen > 200)
781 			namelen = 200;
782 
783 		switch ((int) type) {
784 			/* Never use any input */
785 			case 'X':
786 				size = -1;
787 				if (arg < 0) {
788 					php_error_docref(NULL, E_WARNING, "Type %c: '*' ignored", type);
789 					arg = 1;
790 				}
791 				break;
792 
793 			case '@':
794 				size = 0;
795 				break;
796 
797 			case 'a':
798 			case 'A':
799 			case 'Z':
800 				size = arg;
801 				arg = 1;
802 				break;
803 
804 			case 'h':
805 			case 'H':
806 				size = (arg > 0) ? (arg + (arg % 2)) / 2 : arg;
807 				arg = 1;
808 				break;
809 
810 			/* Use 1 byte of input */
811 			case 'c':
812 			case 'C':
813 			case 'x':
814 				size = 1;
815 				break;
816 
817 			/* Use 2 bytes of input */
818 			case 's':
819 			case 'S':
820 			case 'n':
821 			case 'v':
822 				size = 2;
823 				break;
824 
825 			/* Use sizeof(int) bytes of input */
826 			case 'i':
827 			case 'I':
828 				size = sizeof(int);
829 				break;
830 
831 			/* Use 4 bytes of input */
832 			case 'l':
833 			case 'L':
834 			case 'N':
835 			case 'V':
836 				size = 4;
837 				break;
838 
839 			/* Use 8 bytes of input */
840 			case 'q':
841 			case 'Q':
842 			case 'J':
843 			case 'P':
844 #if SIZEOF_ZEND_LONG > 4
845 				size = 8;
846 				break;
847 #else
848 				zend_value_error("64-bit format codes are not available for 32-bit versions of PHP");
849 				RETURN_THROWS();
850 #endif
851 
852 			/* Use sizeof(float) bytes of input */
853 			case 'f':
854 			case 'g':
855 			case 'G':
856 				size = sizeof(float);
857 				break;
858 
859 			/* Use sizeof(double) bytes of input */
860 			case 'd':
861 			case 'e':
862 			case 'E':
863 				size = sizeof(double);
864 				break;
865 
866 			default:
867 				zend_value_error("Invalid format type %c", type);
868 				RETURN_THROWS();
869 		}
870 
871 		if (size != 0 && size != -1 && size < 0) {
872 			php_error_docref(NULL, E_WARNING, "Type %c: integer overflow", type);
873 			zend_array_destroy(Z_ARR_P(return_value));
874 			RETURN_FALSE;
875 		}
876 
877 		/* Do actual unpacking */
878 		for (i = 0; i != arg; i++ ) {
879 			/* Space for name + number, safe as namelen is ensured <= 200 */
880 			char n[256];
881 
882 			if (arg != 1 || namelen == 0) {
883 				/* Need to add element number to name */
884 				snprintf(n, sizeof(n), "%.*s%d", namelen, name, i + 1);
885 			} else {
886 				/* Truncate name to next format code or end of string */
887 				snprintf(n, sizeof(n), "%.*s", namelen, name);
888 			}
889 
890 			if (size != 0 && size != -1 && INT_MAX - size + 1 < inputpos) {
891 				php_error_docref(NULL, E_WARNING, "Type %c: integer overflow", type);
892 				zend_array_destroy(Z_ARR_P(return_value));
893 				RETURN_FALSE;
894 			}
895 
896 			if ((inputpos + size) <= inputlen) {
897 				switch ((int) type) {
898 					case 'a': {
899 						/* a will not strip any trailing whitespace or null padding */
900 						zend_long len = inputlen - inputpos;	/* Remaining string */
901 
902 						/* If size was given take minimum of len and size */
903 						if ((size >= 0) && (len > size)) {
904 							len = size;
905 						}
906 
907 						size = len;
908 
909 						add_assoc_stringl(return_value, n, &input[inputpos], len);
910 						break;
911 					}
912 					case 'A': {
913 						/* A will strip any trailing whitespace */
914 						char padn = '\0'; char pads = ' '; char padt = '\t'; char padc = '\r'; char padl = '\n';
915 						zend_long len = inputlen - inputpos;	/* Remaining string */
916 
917 						/* If size was given take minimum of len and size */
918 						if ((size >= 0) && (len > size)) {
919 							len = size;
920 						}
921 
922 						size = len;
923 
924 						/* Remove trailing white space and nulls chars from unpacked data */
925 						while (--len >= 0) {
926 							if (input[inputpos + len] != padn
927 								&& input[inputpos + len] != pads
928 								&& input[inputpos + len] != padt
929 								&& input[inputpos + len] != padc
930 								&& input[inputpos + len] != padl
931 							)
932 								break;
933 						}
934 
935 						add_assoc_stringl(return_value, n, &input[inputpos], len + 1);
936 						break;
937 					}
938 					/* New option added for Z to remain in-line with the Perl implementation */
939 					case 'Z': {
940 						/* Z will strip everything after the first null character */
941 						char pad = '\0';
942 						zend_long s,
943 							 len = inputlen - inputpos;	/* Remaining string */
944 
945 						/* If size was given take minimum of len and size */
946 						if ((size >= 0) && (len > size)) {
947 							len = size;
948 						}
949 
950 						size = len;
951 
952 						/* Remove everything after the first null */
953 						for (s=0 ; s < len ; s++) {
954 							if (input[inputpos + s] == pad)
955 								break;
956 						}
957 						len = s;
958 
959 						add_assoc_stringl(return_value, n, &input[inputpos], len);
960 						break;
961 					}
962 
963 
964 					case 'h':
965 					case 'H': {
966 						zend_long len = (inputlen - inputpos) * 2;	/* Remaining */
967 						int nibbleshift = (type == 'h') ? 0 : 4;
968 						int first = 1;
969 						zend_string *buf;
970 						zend_long ipos, opos;
971 
972 						/* If size was given take minimum of len and size */
973 						if (size >= 0 && len > (size * 2)) {
974 							len = size * 2;
975 						}
976 
977 						if (len > 0 && argb > 0) {
978 							len -= argb % 2;
979 						}
980 
981 						buf = zend_string_alloc(len, 0);
982 
983 						for (ipos = opos = 0; opos < len; opos++) {
984 							char cc = (input[inputpos + ipos] >> nibbleshift) & 0xf;
985 
986 							if (cc < 10) {
987 								cc += '0';
988 							} else {
989 								cc += 'a' - 10;
990 							}
991 
992 							ZSTR_VAL(buf)[opos] = cc;
993 							nibbleshift = (nibbleshift + 4) & 7;
994 
995 							if (first-- == 0) {
996 								ipos++;
997 								first = 1;
998 							}
999 						}
1000 
1001 						ZSTR_VAL(buf)[len] = '\0';
1002 						add_assoc_str(return_value, n, buf);
1003 						break;
1004 					}
1005 
1006 					case 'c':
1007 					case 'C': {
1008 						int issigned = (type == 'c') ? (input[inputpos] & 0x80) : 0;
1009 						zend_long v = php_unpack(&input[inputpos], 1, issigned, byte_map);
1010 						add_assoc_long(return_value, n, v);
1011 						break;
1012 					}
1013 
1014 					case 's':
1015 					case 'S':
1016 					case 'n':
1017 					case 'v': {
1018 						zend_long v;
1019 						int issigned = 0;
1020 						int *map = machine_endian_short_map;
1021 
1022 						if (type == 's') {
1023 							issigned = input[inputpos + (machine_little_endian ? 1 : 0)] & 0x80;
1024 						} else if (type == 'n') {
1025 							map = big_endian_short_map;
1026 						} else if (type == 'v') {
1027 							map = little_endian_short_map;
1028 						}
1029 
1030 						v = php_unpack(&input[inputpos], 2, issigned, map);
1031 						add_assoc_long(return_value, n, v);
1032 						break;
1033 					}
1034 
1035 					case 'i':
1036 					case 'I': {
1037 						zend_long v;
1038 						int issigned = 0;
1039 
1040 						if (type == 'i') {
1041 							issigned = input[inputpos + (machine_little_endian ? (sizeof(int) - 1) : 0)] & 0x80;
1042 						}
1043 
1044 						v = php_unpack(&input[inputpos], sizeof(int), issigned, int_map);
1045 						add_assoc_long(return_value, n, v);
1046 						break;
1047 					}
1048 
1049 					case 'l':
1050 					case 'L':
1051 					case 'N':
1052 					case 'V': {
1053 						int issigned = 0;
1054 						int *map = machine_endian_long_map;
1055 						zend_long v = 0;
1056 
1057 						if (type == 'l' || type == 'L') {
1058 							issigned = input[inputpos + (machine_little_endian ? 3 : 0)] & 0x80;
1059 						} else if (type == 'N') {
1060 							issigned = input[inputpos] & 0x80;
1061 							map = big_endian_long_map;
1062 						} else if (type == 'V') {
1063 							issigned = input[inputpos + 3] & 0x80;
1064 							map = little_endian_long_map;
1065 						}
1066 
1067 						if (SIZEOF_ZEND_LONG > 4 && issigned) {
1068 							v = ~INT_MAX;
1069 						}
1070 
1071 						v |= php_unpack(&input[inputpos], 4, issigned, map);
1072 						if (SIZEOF_ZEND_LONG > 4) {
1073  							if (type == 'l') {
1074 								v = (signed int) v;
1075 							} else {
1076 								v = (unsigned int) v;
1077 							}
1078 						}
1079 						add_assoc_long(return_value, n, v);
1080 						break;
1081 					}
1082 
1083 #if SIZEOF_ZEND_LONG > 4
1084 					case 'q':
1085 					case 'Q':
1086 					case 'J':
1087 					case 'P': {
1088 						int issigned = 0;
1089 						int *map = machine_endian_longlong_map;
1090 						zend_long v = 0;
1091 
1092 						if (type == 'q' || type == 'Q') {
1093 							issigned = input[inputpos + (machine_little_endian ? 7 : 0)] & 0x80;
1094 						} else if (type == 'J') {
1095 							issigned = input[inputpos] & 0x80;
1096 							map = big_endian_longlong_map;
1097 						} else if (type == 'P') {
1098 							issigned = input[inputpos + 7] & 0x80;
1099 							map = little_endian_longlong_map;
1100 						}
1101 
1102 						v = php_unpack(&input[inputpos], 8, issigned, map);
1103 
1104 						if (type == 'q') {
1105 							v = (zend_long) v;
1106 						} else {
1107 							v = (zend_ulong) v;
1108 						}
1109 
1110 						add_assoc_long(return_value, n, v);
1111 						break;
1112 					}
1113 #endif
1114 
1115 					case 'f': /* float */
1116 					case 'g': /* little endian float*/
1117 					case 'G': /* big endian float*/
1118 					{
1119 						float v;
1120 
1121 						if (type == 'g') {
1122 							v = php_pack_parse_float(1, &input[inputpos]);
1123 						} else if (type == 'G') {
1124 							v = php_pack_parse_float(0, &input[inputpos]);
1125 						} else {
1126 							memcpy(&v, &input[inputpos], sizeof(float));
1127 						}
1128 
1129 						add_assoc_double(return_value, n, (double)v);
1130 						break;
1131 					}
1132 
1133 
1134 					case 'd': /* double */
1135 					case 'e': /* little endian float */
1136 					case 'E': /* big endian float */
1137 					{
1138 						double v;
1139 						if (type == 'e') {
1140 							v = php_pack_parse_double(1, &input[inputpos]);
1141 						} else if (type == 'E') {
1142 							v = php_pack_parse_double(0, &input[inputpos]);
1143 						} else {
1144 							memcpy(&v, &input[inputpos], sizeof(double));
1145 						}
1146 						add_assoc_double(return_value, n, v);
1147 						break;
1148 					}
1149 
1150 					case 'x':
1151 						/* Do nothing with input, just skip it */
1152 						break;
1153 
1154 					case 'X':
1155 						if (inputpos < size) {
1156 							inputpos = -size;
1157 							i = arg - 1;		/* Break out of for loop */
1158 
1159 							if (arg >= 0) {
1160 								php_error_docref(NULL, E_WARNING, "Type %c: outside of string", type);
1161 							}
1162 						}
1163 						break;
1164 
1165 					case '@':
1166 						if (arg <= inputlen) {
1167 							inputpos = arg;
1168 						} else {
1169 							php_error_docref(NULL, E_WARNING, "Type %c: outside of string", type);
1170 						}
1171 
1172 						i = arg - 1;	/* Done, break out of for loop */
1173 						break;
1174 				}
1175 
1176 				inputpos += size;
1177 				if (inputpos < 0) {
1178 					if (size != -1) { /* only print warning if not working with * */
1179 						php_error_docref(NULL, E_WARNING, "Type %c: outside of string", type);
1180 					}
1181 					inputpos = 0;
1182 				}
1183 			} else if (arg < 0) {
1184 				/* Reached end of input for '*' repeater */
1185 				break;
1186 			} else {
1187 				php_error_docref(NULL, E_WARNING, "Type %c: not enough input, need %d, have " ZEND_LONG_FMT, type, size, inputlen - inputpos);
1188 				zend_array_destroy(Z_ARR_P(return_value));
1189 				RETURN_FALSE;
1190 			}
1191 		}
1192 
1193 		if (formatlen > 0) {
1194 			formatlen--;	/* Skip '/' separator, does no harm if inputlen == 0 */
1195 			format++;
1196 		}
1197 	}
1198 }
1199 /* }}} */
1200 
1201 /* {{{ PHP_MINIT_FUNCTION */
PHP_MINIT_FUNCTION(pack)1202 PHP_MINIT_FUNCTION(pack)
1203 {
1204 	int machine_endian_check = 1;
1205 	int i;
1206 
1207 	machine_little_endian = ((char *)&machine_endian_check)[0];
1208 
1209 	if (machine_little_endian) {
1210 		/* Where to get lo to hi bytes from */
1211 		byte_map[0] = 0;
1212 
1213 		for (i = 0; i < (int)sizeof(int); i++) {
1214 			int_map[i] = i;
1215 		}
1216 
1217 		machine_endian_short_map[0] = 0;
1218 		machine_endian_short_map[1] = 1;
1219 		big_endian_short_map[0] = 1;
1220 		big_endian_short_map[1] = 0;
1221 		little_endian_short_map[0] = 0;
1222 		little_endian_short_map[1] = 1;
1223 
1224 		machine_endian_long_map[0] = 0;
1225 		machine_endian_long_map[1] = 1;
1226 		machine_endian_long_map[2] = 2;
1227 		machine_endian_long_map[3] = 3;
1228 		big_endian_long_map[0] = 3;
1229 		big_endian_long_map[1] = 2;
1230 		big_endian_long_map[2] = 1;
1231 		big_endian_long_map[3] = 0;
1232 		little_endian_long_map[0] = 0;
1233 		little_endian_long_map[1] = 1;
1234 		little_endian_long_map[2] = 2;
1235 		little_endian_long_map[3] = 3;
1236 
1237 #if SIZEOF_ZEND_LONG > 4
1238 		machine_endian_longlong_map[0] = 0;
1239 		machine_endian_longlong_map[1] = 1;
1240 		machine_endian_longlong_map[2] = 2;
1241 		machine_endian_longlong_map[3] = 3;
1242 		machine_endian_longlong_map[4] = 4;
1243 		machine_endian_longlong_map[5] = 5;
1244 		machine_endian_longlong_map[6] = 6;
1245 		machine_endian_longlong_map[7] = 7;
1246 		big_endian_longlong_map[0] = 7;
1247 		big_endian_longlong_map[1] = 6;
1248 		big_endian_longlong_map[2] = 5;
1249 		big_endian_longlong_map[3] = 4;
1250 		big_endian_longlong_map[4] = 3;
1251 		big_endian_longlong_map[5] = 2;
1252 		big_endian_longlong_map[6] = 1;
1253 		big_endian_longlong_map[7] = 0;
1254 		little_endian_longlong_map[0] = 0;
1255 		little_endian_longlong_map[1] = 1;
1256 		little_endian_longlong_map[2] = 2;
1257 		little_endian_longlong_map[3] = 3;
1258 		little_endian_longlong_map[4] = 4;
1259 		little_endian_longlong_map[5] = 5;
1260 		little_endian_longlong_map[6] = 6;
1261 		little_endian_longlong_map[7] = 7;
1262 #endif
1263 	}
1264 	else {
1265 		zval val;
1266 		int size = sizeof(Z_LVAL(val));
1267 		Z_LVAL(val)=0; /*silence a warning*/
1268 
1269 		/* Where to get hi to lo bytes from */
1270 		byte_map[0] = size - 1;
1271 
1272 		for (i = 0; i < (int)sizeof(int); i++) {
1273 			int_map[i] = size - (sizeof(int) - i);
1274 		}
1275 
1276 		machine_endian_short_map[0] = size - 2;
1277 		machine_endian_short_map[1] = size - 1;
1278 		big_endian_short_map[0] = size - 2;
1279 		big_endian_short_map[1] = size - 1;
1280 		little_endian_short_map[0] = size - 1;
1281 		little_endian_short_map[1] = size - 2;
1282 
1283 		machine_endian_long_map[0] = size - 4;
1284 		machine_endian_long_map[1] = size - 3;
1285 		machine_endian_long_map[2] = size - 2;
1286 		machine_endian_long_map[3] = size - 1;
1287 		big_endian_long_map[0] = size - 4;
1288 		big_endian_long_map[1] = size - 3;
1289 		big_endian_long_map[2] = size - 2;
1290 		big_endian_long_map[3] = size - 1;
1291 		little_endian_long_map[0] = size - 1;
1292 		little_endian_long_map[1] = size - 2;
1293 		little_endian_long_map[2] = size - 3;
1294 		little_endian_long_map[3] = size - 4;
1295 
1296 #if SIZEOF_ZEND_LONG > 4
1297 		machine_endian_longlong_map[0] = size - 8;
1298 		machine_endian_longlong_map[1] = size - 7;
1299 		machine_endian_longlong_map[2] = size - 6;
1300 		machine_endian_longlong_map[3] = size - 5;
1301 		machine_endian_longlong_map[4] = size - 4;
1302 		machine_endian_longlong_map[5] = size - 3;
1303 		machine_endian_longlong_map[6] = size - 2;
1304 		machine_endian_longlong_map[7] = size - 1;
1305 		big_endian_longlong_map[0] = size - 8;
1306 		big_endian_longlong_map[1] = size - 7;
1307 		big_endian_longlong_map[2] = size - 6;
1308 		big_endian_longlong_map[3] = size - 5;
1309 		big_endian_longlong_map[4] = size - 4;
1310 		big_endian_longlong_map[5] = size - 3;
1311 		big_endian_longlong_map[6] = size - 2;
1312 		big_endian_longlong_map[7] = size - 1;
1313 		little_endian_longlong_map[0] = size - 1;
1314 		little_endian_longlong_map[1] = size - 2;
1315 		little_endian_longlong_map[2] = size - 3;
1316 		little_endian_longlong_map[3] = size - 4;
1317 		little_endian_longlong_map[4] = size - 5;
1318 		little_endian_longlong_map[5] = size - 6;
1319 		little_endian_longlong_map[6] = size - 7;
1320 		little_endian_longlong_map[7] = size - 8;
1321 #endif
1322 	}
1323 
1324 	return SUCCESS;
1325 }
1326 /* }}} */
1327