xref: /php-src/ext/standard/pack.c (revision 87862835)
1 /*
2    +----------------------------------------------------------------------+
3    | Copyright (c) The PHP Group                                          |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | https://www.php.net/license/3_01.txt                                 |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13    | Author: Chris Schneider <cschneid@relog.ch>                          |
14    +----------------------------------------------------------------------+
15  */
16 
17 #include "php.h"
18 
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <errno.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <fcntl.h>
25 #ifdef PHP_WIN32
26 #define O_RDONLY _O_RDONLY
27 #include "win32/param.h"
28 #else
29 #include <sys/param.h>
30 #endif
31 #include "ext/standard/head.h"
32 #include "php_string.h"
33 #include "pack.h"
34 #ifdef HAVE_PWD_H
35 #ifdef PHP_WIN32
36 #include "win32/pwd.h"
37 #else
38 #include <pwd.h>
39 #endif
40 #endif
41 #include "fsock.h"
42 #ifdef HAVE_NETINET_IN_H
43 #include <netinet/in.h>
44 #endif
45 
46 #define INC_OUTPUTPOS(a,b) \
47 	if ((a) < 0 || ((INT_MAX - outputpos)/((int)b)) < (a)) { \
48 		efree(formatcodes);	\
49 		efree(formatargs);	\
50 		zend_value_error("Type %c: integer overflow in format string", code); \
51 		RETURN_THROWS(); \
52 	} \
53 	outputpos += (a)*(b);
54 
55 #ifdef WORDS_BIGENDIAN
56 #define MACHINE_LITTLE_ENDIAN 0
57 #else
58 #define MACHINE_LITTLE_ENDIAN 1
59 #endif
60 
61 typedef ZEND_SET_ALIGNED(1, uint16_t unaligned_uint16_t);
62 typedef ZEND_SET_ALIGNED(1, uint32_t unaligned_uint32_t);
63 typedef ZEND_SET_ALIGNED(1, uint64_t unaligned_uint64_t);
64 typedef ZEND_SET_ALIGNED(1, unsigned int unaligned_uint);
65 typedef ZEND_SET_ALIGNED(1, int unaligned_int);
66 
67 /* Mapping of byte from char (8bit) to long for machine endian */
68 static int byte_map[1];
69 
70 /* Mappings of bytes from int (machine dependent) to int for machine endian */
71 static int int_map[sizeof(int)];
72 
73 /* Mappings of bytes from shorts (16bit) for all endian environments */
74 static int machine_endian_short_map[2];
75 static int big_endian_short_map[2];
76 static int little_endian_short_map[2];
77 
78 /* Mappings of bytes from longs (32bit) for all endian environments */
79 static int machine_endian_long_map[4];
80 static int big_endian_long_map[4];
81 static int little_endian_long_map[4];
82 
83 #if SIZEOF_ZEND_LONG > 4
84 /* Mappings of bytes from quads (64bit) for all endian environments */
85 static int machine_endian_longlong_map[8];
86 static int big_endian_longlong_map[8];
87 static int little_endian_longlong_map[8];
88 #endif
89 
90 /* {{{ php_pack */
php_pack(zval * val,size_t size,int * map,char * output)91 static void php_pack(zval *val, size_t size, int *map, char *output)
92 {
93 	size_t i;
94 	char *v;
95 
96 	convert_to_long(val);
97 	v = (char *) &Z_LVAL_P(val);
98 
99 	for (i = 0; i < size; i++) {
100 		*output++ = v[map[i]];
101 	}
102 }
103 /* }}} */
104 
php_pack_reverse_int16(uint16_t arg)105 ZEND_ATTRIBUTE_CONST static inline uint16_t php_pack_reverse_int16(uint16_t arg)
106 {
107 	return ((arg & 0xFF) << 8) | ((arg >> 8) & 0xFF);
108 }
109 
110 /* {{{ php_pack_reverse_int32 */
php_pack_reverse_int32(uint32_t arg)111 ZEND_ATTRIBUTE_CONST static inline uint32_t php_pack_reverse_int32(uint32_t arg)
112 {
113 	uint32_t result;
114 	result = ((arg & 0xFF) << 24) | ((arg & 0xFF00) << 8) | ((arg >> 8) & 0xFF00) | ((arg >> 24) & 0xFF);
115 
116 	return result;
117 }
118 /* }}} */
119 
120 /* {{{ php_pack */
php_pack_reverse_int64(uint64_t arg)121 static inline uint64_t php_pack_reverse_int64(uint64_t arg)
122 {
123 	union Swap64 {
124 		uint64_t i;
125 		uint32_t ul[2];
126 	} tmp, result;
127 	tmp.i = arg;
128 	result.ul[0] = php_pack_reverse_int32(tmp.ul[1]);
129 	result.ul[1] = php_pack_reverse_int32(tmp.ul[0]);
130 
131 	return result.i;
132 }
133 /* }}} */
134 
135 /* {{{ php_pack_copy_float */
php_pack_copy_float(int is_little_endian,void * dst,float f)136 static void php_pack_copy_float(int is_little_endian, void * dst, float f)
137 {
138 	union Copy32 {
139 		float f;
140 		uint32_t i;
141 	} m;
142 	m.f = f;
143 
144 #ifdef WORDS_BIGENDIAN
145 	if (is_little_endian) {
146 		m.i = php_pack_reverse_int32(m.i);
147 	}
148 #else /* WORDS_BIGENDIAN */
149 	if (!is_little_endian) {
150 		m.i = php_pack_reverse_int32(m.i);
151 	}
152 #endif /* WORDS_BIGENDIAN */
153 
154 	memcpy(dst, &m.f, sizeof(float));
155 }
156 /* }}} */
157 
158 /* {{{ php_pack_copy_double */
php_pack_copy_double(int is_little_endian,void * dst,double d)159 static void php_pack_copy_double(int is_little_endian, void * dst, double d)
160 {
161 	union Copy64 {
162 		double d;
163 		uint64_t i;
164 	} m;
165 	m.d = d;
166 
167 #ifdef WORDS_BIGENDIAN
168 	if (is_little_endian) {
169 		m.i = php_pack_reverse_int64(m.i);
170 	}
171 #else /* WORDS_BIGENDIAN */
172 	if (!is_little_endian) {
173 		m.i = php_pack_reverse_int64(m.i);
174 	}
175 #endif /* WORDS_BIGENDIAN */
176 
177 	memcpy(dst, &m.d, sizeof(double));
178 }
179 /* }}} */
180 
181 /* {{{ php_pack_parse_float */
php_pack_parse_float(int is_little_endian,void * src)182 static float php_pack_parse_float(int is_little_endian, void * src)
183 {
184 	union Copy32 {
185 		float f;
186 		uint32_t i;
187 	} m;
188 	memcpy(&m.i, src, sizeof(float));
189 
190 #ifdef WORDS_BIGENDIAN
191 	if (is_little_endian) {
192 		m.i = php_pack_reverse_int32(m.i);
193 	}
194 #else /* WORDS_BIGENDIAN */
195 	if (!is_little_endian) {
196 		m.i = php_pack_reverse_int32(m.i);
197 	}
198 #endif /* WORDS_BIGENDIAN */
199 
200 	return m.f;
201 }
202 /* }}} */
203 
204 /* {{{ php_pack_parse_double */
php_pack_parse_double(int is_little_endian,void * src)205 static double php_pack_parse_double(int is_little_endian, void * src)
206 {
207 	union Copy64 {
208 		double d;
209 		uint64_t i;
210 	} m;
211 	memcpy(&m.i, src, sizeof(double));
212 
213 #ifdef WORDS_BIGENDIAN
214 	if (is_little_endian) {
215 		m.i = php_pack_reverse_int64(m.i);
216 	}
217 #else /* WORDS_BIGENDIAN */
218 	if (!is_little_endian) {
219 		m.i = php_pack_reverse_int64(m.i);
220 	}
221 #endif /* WORDS_BIGENDIAN */
222 
223 	return m.d;
224 }
225 /* }}} */
226 
227 /* pack() idea stolen from Perl (implemented formats behave the same as there except J and P)
228  * Implemented formats are Z, A, a, h, H, c, C, s, S, i, I, l, L, n, N, q, Q, J, P, f, d, x, X, @.
229  * Added g, G for little endian float and big endian float, added e, E for little endian double and big endian double.
230  */
231 /* {{{ Takes one or more arguments and packs them into a binary string according to the format argument */
PHP_FUNCTION(pack)232 PHP_FUNCTION(pack)
233 {
234 	zval *argv = NULL;
235 	int num_args = 0;
236 	size_t i;
237 	int currentarg;
238 	char *format;
239 	size_t formatlen;
240 	char *formatcodes;
241 	int *formatargs;
242 	size_t formatcount = 0;
243 	int outputpos = 0, outputsize = 0;
244 	zend_string *output;
245 
246 	ZEND_PARSE_PARAMETERS_START(1, -1)
247 		Z_PARAM_STRING(format, formatlen)
248 		Z_PARAM_VARIADIC('*', argv, num_args)
249 	ZEND_PARSE_PARAMETERS_END();
250 
251 	/* We have a maximum of <formatlen> format codes to deal with */
252 	formatcodes = safe_emalloc(formatlen, sizeof(*formatcodes), 0);
253 	formatargs = safe_emalloc(formatlen, sizeof(*formatargs), 0);
254 	currentarg = 0;
255 
256 	/* Preprocess format into formatcodes and formatargs */
257 	for (i = 0; i < formatlen; formatcount++) {
258 		char code = format[i++];
259 		int arg = 1;
260 
261 		/* Handle format arguments if any */
262 		if (i < formatlen) {
263 			char c = format[i];
264 
265 			if (c == '*') {
266 				arg = -1;
267 				i++;
268 			}
269 			else if (c >= '0' && c <= '9') {
270 				arg = atoi(&format[i]);
271 
272 				while (format[i] >= '0' && format[i] <= '9' && i < formatlen) {
273 					i++;
274 				}
275 			}
276 		}
277 
278 		/* Handle special arg '*' for all codes and check argv overflows */
279 		switch ((int) code) {
280 			/* Never uses any args */
281 			case 'x':
282 			case 'X':
283 			case '@':
284 				if (arg < 0) {
285 					php_error_docref(NULL, E_WARNING, "Type %c: '*' ignored", code);
286 					arg = 1;
287 				}
288 				break;
289 
290 			/* Always uses one arg */
291 			case 'a':
292 			case 'A':
293 			case 'Z':
294 			case 'h':
295 			case 'H':
296 				if (currentarg >= num_args) {
297 					efree(formatcodes);
298 					efree(formatargs);
299 					zend_value_error("Type %c: not enough arguments", code);
300 					RETURN_THROWS();
301 				}
302 
303 				if (arg < 0) {
304 					if (!try_convert_to_string(&argv[currentarg])) {
305 						efree(formatcodes);
306 						efree(formatargs);
307 						RETURN_THROWS();
308 					}
309 
310 					arg = Z_STRLEN(argv[currentarg]);
311 					if (code == 'Z') {
312 						/* add one because Z is always NUL-terminated:
313 						 * pack("Z*", "aa") === "aa\0"
314 						 * pack("Z2", "aa") === "a\0" */
315 						arg++;
316 					}
317 				}
318 
319 				currentarg++;
320 				break;
321 
322 			/* Use as many args as specified */
323 			case 'q':
324 			case 'Q':
325 			case 'J':
326 			case 'P':
327 #if SIZEOF_ZEND_LONG < 8
328 					efree(formatcodes);
329 					efree(formatargs);
330 					zend_value_error("64-bit format codes are not available for 32-bit versions of PHP");
331 					RETURN_THROWS();
332 #endif
333 			case 'c':
334 			case 'C':
335 			case 's':
336 			case 'S':
337 			case 'i':
338 			case 'I':
339 			case 'l':
340 			case 'L':
341 			case 'n':
342 			case 'N':
343 			case 'v':
344 			case 'V':
345 			case 'f': /* float */
346 			case 'g': /* little endian float */
347 			case 'G': /* big endian float */
348 			case 'd': /* double */
349 			case 'e': /* little endian double */
350 			case 'E': /* big endian double */
351 				if (arg < 0) {
352 					arg = num_args - currentarg;
353 				}
354 				if (currentarg > INT_MAX - arg) {
355 					goto too_few_args;
356 				}
357 				currentarg += arg;
358 
359 				if (currentarg > num_args) {
360 too_few_args:
361 					efree(formatcodes);
362 					efree(formatargs);
363 					zend_value_error("Type %c: too few arguments", code);
364 					RETURN_THROWS();
365 				}
366 				break;
367 
368 			default:
369 				efree(formatcodes);
370 				efree(formatargs);
371 				zend_value_error("Type %c: unknown format code", code);
372 				RETURN_THROWS();
373 		}
374 
375 		formatcodes[formatcount] = code;
376 		formatargs[formatcount] = arg;
377 	}
378 
379 	if (currentarg < num_args) {
380 		php_error_docref(NULL, E_WARNING, "%d arguments unused", (num_args - currentarg));
381 	}
382 
383 	/* Calculate output length and upper bound while processing*/
384 	for (i = 0; i < formatcount; i++) {
385 	    int code = (int) formatcodes[i];
386 		int arg = formatargs[i];
387 
388 		switch ((int) code) {
389 			case 'h':
390 			case 'H':
391 				INC_OUTPUTPOS((arg + (arg % 2)) / 2,1)	/* 4 bit per arg */
392 				break;
393 
394 			case 'a':
395 			case 'A':
396 			case 'Z':
397 			case 'c':
398 			case 'C':
399 			case 'x':
400 				INC_OUTPUTPOS(arg,1)		/* 8 bit per arg */
401 				break;
402 
403 			case 's':
404 			case 'S':
405 			case 'n':
406 			case 'v':
407 				INC_OUTPUTPOS(arg,2)		/* 16 bit per arg */
408 				break;
409 
410 			case 'i':
411 			case 'I':
412 				INC_OUTPUTPOS(arg,sizeof(int))
413 				break;
414 
415 			case 'l':
416 			case 'L':
417 			case 'N':
418 			case 'V':
419 				INC_OUTPUTPOS(arg,4)		/* 32 bit per arg */
420 				break;
421 
422 #if SIZEOF_ZEND_LONG > 4
423 			case 'q':
424 			case 'Q':
425 			case 'J':
426 			case 'P':
427 				INC_OUTPUTPOS(arg,8)		/* 32 bit per arg */
428 				break;
429 #endif
430 
431 			case 'f': /* float */
432 			case 'g': /* little endian float */
433 			case 'G': /* big endian float */
434 				INC_OUTPUTPOS(arg,sizeof(float))
435 				break;
436 
437 			case 'd': /* double */
438 			case 'e': /* little endian double */
439 			case 'E': /* big endian double */
440 				INC_OUTPUTPOS(arg,sizeof(double))
441 				break;
442 
443 			case 'X':
444 				outputpos -= arg;
445 
446 				if (outputpos < 0) {
447 					php_error_docref(NULL, E_WARNING, "Type %c: outside of string", code);
448 					outputpos = 0;
449 				}
450 				break;
451 
452 			case '@':
453 				outputpos = arg;
454 				break;
455 		}
456 
457 		if (outputsize < outputpos) {
458 			outputsize = outputpos;
459 		}
460 	}
461 
462 	output = zend_string_alloc(outputsize, 0);
463 	outputpos = 0;
464 	currentarg = 0;
465 
466 	/* Do actual packing */
467 	for (i = 0; i < formatcount; i++) {
468 	    int code = (int) formatcodes[i];
469 		int arg = formatargs[i];
470 
471 		switch ((int) code) {
472 			case 'a':
473 			case 'A':
474 			case 'Z': {
475 				size_t arg_cp = (code != 'Z') ? arg : MAX(0, arg - 1);
476 				zend_string *tmp_str;
477 				zend_string *str = zval_get_tmp_string(&argv[currentarg++], &tmp_str);
478 
479 				memset(&ZSTR_VAL(output)[outputpos], (code == 'a' || code == 'Z') ? '\0' : ' ', arg);
480 				memcpy(&ZSTR_VAL(output)[outputpos], ZSTR_VAL(str),
481 					   (ZSTR_LEN(str) < arg_cp) ? ZSTR_LEN(str) : arg_cp);
482 
483 				outputpos += arg;
484 				zend_tmp_string_release(tmp_str);
485 				break;
486 			}
487 
488 			case 'h':
489 			case 'H': {
490 				int nibbleshift = (code == 'h') ? 0 : 4;
491 				int first = 1;
492 				zend_string *tmp_str;
493 				zend_string *str = zval_get_tmp_string(&argv[currentarg++], &tmp_str);
494 				char *v = ZSTR_VAL(str);
495 
496 				outputpos--;
497 				if ((size_t)arg > ZSTR_LEN(str)) {
498 					php_error_docref(NULL, E_WARNING, "Type %c: not enough characters in string", code);
499 					arg = ZSTR_LEN(str);
500 				}
501 
502 				while (arg-- > 0) {
503 					char n = *v++;
504 
505 					if (n >= '0' && n <= '9') {
506 						n -= '0';
507 					} else if (n >= 'A' && n <= 'F') {
508 						n -= ('A' - 10);
509 					} else if (n >= 'a' && n <= 'f') {
510 						n -= ('a' - 10);
511 					} else {
512 						php_error_docref(NULL, E_WARNING, "Type %c: illegal hex digit %c", code, n);
513 						n = 0;
514 					}
515 
516 					if (first--) {
517 						ZSTR_VAL(output)[++outputpos] = 0;
518 					} else {
519 					  first = 1;
520 					}
521 
522 					ZSTR_VAL(output)[outputpos] |= (n << nibbleshift);
523 					nibbleshift = (nibbleshift + 4) & 7;
524 				}
525 
526 				outputpos++;
527 				zend_tmp_string_release(tmp_str);
528 				break;
529 			}
530 
531 			case 'c':
532 			case 'C':
533 				while (arg-- > 0) {
534 					php_pack(&argv[currentarg++], 1, byte_map, &ZSTR_VAL(output)[outputpos]);
535 					outputpos++;
536 				}
537 				break;
538 
539 			case 's':
540 			case 'S':
541 			case 'n':
542 			case 'v': {
543 				int *map = machine_endian_short_map;
544 
545 				if (code == 'n') {
546 					map = big_endian_short_map;
547 				} else if (code == 'v') {
548 					map = little_endian_short_map;
549 				}
550 
551 				while (arg-- > 0) {
552 					php_pack(&argv[currentarg++], 2, map, &ZSTR_VAL(output)[outputpos]);
553 					outputpos += 2;
554 				}
555 				break;
556 			}
557 
558 			case 'i':
559 			case 'I':
560 				while (arg-- > 0) {
561 					php_pack(&argv[currentarg++], sizeof(int), int_map, &ZSTR_VAL(output)[outputpos]);
562 					outputpos += sizeof(int);
563 				}
564 				break;
565 
566 			case 'l':
567 			case 'L':
568 			case 'N':
569 			case 'V': {
570 				int *map = machine_endian_long_map;
571 
572 				if (code == 'N') {
573 					map = big_endian_long_map;
574 				} else if (code == 'V') {
575 					map = little_endian_long_map;
576 				}
577 
578 				while (arg-- > 0) {
579 					php_pack(&argv[currentarg++], 4, map, &ZSTR_VAL(output)[outputpos]);
580 					outputpos += 4;
581 				}
582 				break;
583 			}
584 
585 #if SIZEOF_ZEND_LONG > 4
586 			case 'q':
587 			case 'Q':
588 			case 'J':
589 			case 'P': {
590 				int *map = machine_endian_longlong_map;
591 
592 				if (code == 'J') {
593 					map = big_endian_longlong_map;
594 				} else if (code == 'P') {
595 					map = little_endian_longlong_map;
596 				}
597 
598 				while (arg-- > 0) {
599 					php_pack(&argv[currentarg++], 8, map, &ZSTR_VAL(output)[outputpos]);
600 					outputpos += 8;
601 				}
602 				break;
603 			}
604 #endif
605 
606 			case 'f': {
607 				while (arg-- > 0) {
608 					float v = (float) zval_get_double(&argv[currentarg++]);
609 					memcpy(&ZSTR_VAL(output)[outputpos], &v, sizeof(v));
610 					outputpos += sizeof(v);
611 				}
612 				break;
613 			}
614 
615 			case 'g': {
616 				/* pack little endian float */
617 				while (arg-- > 0) {
618 					float v = (float) zval_get_double(&argv[currentarg++]);
619 					php_pack_copy_float(1, &ZSTR_VAL(output)[outputpos], v);
620 					outputpos += sizeof(v);
621 				}
622 
623 				break;
624 			}
625 			case 'G': {
626 				/* pack big endian float */
627 				while (arg-- > 0) {
628 					float v = (float) zval_get_double(&argv[currentarg++]);
629 					php_pack_copy_float(0, &ZSTR_VAL(output)[outputpos], v);
630 					outputpos += sizeof(v);
631 				}
632 				break;
633 			}
634 
635 			case 'd': {
636 				while (arg-- > 0) {
637 					double v = (double) zval_get_double(&argv[currentarg++]);
638 					memcpy(&ZSTR_VAL(output)[outputpos], &v, sizeof(v));
639 					outputpos += sizeof(v);
640 				}
641 				break;
642 			}
643 
644 			case 'e': {
645 				/* pack little endian double */
646 				while (arg-- > 0) {
647 					double v = (double) zval_get_double(&argv[currentarg++]);
648 					php_pack_copy_double(1, &ZSTR_VAL(output)[outputpos], v);
649 					outputpos += sizeof(v);
650 				}
651 				break;
652 			}
653 
654 			case 'E': {
655 				/* pack big endian double */
656 				while (arg-- > 0) {
657 					double v = (double) zval_get_double(&argv[currentarg++]);
658 					php_pack_copy_double(0, &ZSTR_VAL(output)[outputpos], v);
659 					outputpos += sizeof(v);
660 				}
661 				break;
662 			}
663 
664 			case 'x':
665 				memset(&ZSTR_VAL(output)[outputpos], '\0', arg);
666 				outputpos += arg;
667 				break;
668 
669 			case 'X':
670 				outputpos -= arg;
671 
672 				if (outputpos < 0) {
673 					outputpos = 0;
674 				}
675 				break;
676 
677 			case '@':
678 				if (arg > outputpos) {
679 					memset(&ZSTR_VAL(output)[outputpos], '\0', arg - outputpos);
680 				}
681 				outputpos = arg;
682 				break;
683 		}
684 	}
685 
686 	efree(formatcodes);
687 	efree(formatargs);
688 	ZSTR_VAL(output)[outputpos] = '\0';
689 	ZSTR_LEN(output) = outputpos;
690 	RETURN_NEW_STR(output);
691 }
692 /* }}} */
693 
694 /* unpack() is based on Perl's unpack(), but is modified a bit from there.
695  * Rather than depending on error-prone ordered lists or syntactically
696  * unpleasant pass-by-reference, we return an object with named parameters
697  * (like *_fetch_object()). Syntax is "f[repeat]name/...", where "f" is the
698  * formatter char (like pack()), "[repeat]" is the optional repeater argument,
699  * and "name" is the name of the variable to use.
700  * Example: "c2chars/nints" will return an object with fields
701  * chars1, chars2, and ints.
702  * Numeric pack types will return numbers, a and A will return strings,
703  * f and d will return doubles.
704  * Implemented formats are Z, A, a, h, H, c, C, s, S, i, I, l, L, n, N, q, Q, J, P, f, d, x, X, @.
705  * Added g, G for little endian float and big endian float, added e, E for little endian double and big endian double.
706  */
707 /* {{{ Unpack binary string into named array elements according to format argument */
PHP_FUNCTION(unpack)708 PHP_FUNCTION(unpack)
709 {
710 	char *format, *input;
711 	zend_string *formatarg, *inputarg;
712 	zend_long formatlen, inputpos, inputlen;
713 	int i;
714 	zend_long offset = 0;
715 
716 	ZEND_PARSE_PARAMETERS_START(2, 3)
717 		Z_PARAM_STR(formatarg)
718 		Z_PARAM_STR(inputarg)
719 		Z_PARAM_OPTIONAL
720 		Z_PARAM_LONG(offset)
721 	ZEND_PARSE_PARAMETERS_END();
722 
723 	format = ZSTR_VAL(formatarg);
724 	formatlen = ZSTR_LEN(formatarg);
725 	input = ZSTR_VAL(inputarg);
726 	inputlen = ZSTR_LEN(inputarg);
727 	inputpos = 0;
728 
729 
730 	if (offset < 0 || offset > inputlen) {
731 		zend_argument_value_error(3, "must be contained in argument #2 ($data)");
732 		RETURN_THROWS();
733 	}
734 
735 	input += offset;
736 	inputlen -= offset;
737 
738 	array_init(return_value);
739 
740 	while (formatlen-- > 0) {
741 		char type = *(format++);
742 		char c;
743 		int repetitions = 1, argb;
744 		char *name;
745 		int namelen;
746 		int size = 0;
747 
748 		/* Handle format arguments if any */
749 		if (formatlen > 0) {
750 			c = *format;
751 
752 			if (c >= '0' && c <= '9') {
753 				errno = 0;
754 				long tmp = strtol(format, NULL, 10);
755 				/* There is not strtoi. We have to check the range ourselves.
756 				 * With 32-bit long the INT_{MIN,MAX} are useless because long == int, but with 64-bit they do limit us to 32-bit. */
757 				if (errno || tmp < INT_MIN || tmp > INT_MAX) {
758 					php_error_docref(NULL, E_WARNING, "Type %c: integer overflow", type);
759 					zend_array_destroy(Z_ARR_P(return_value));
760 					RETURN_FALSE;
761 				}
762 				repetitions = tmp;
763 
764 				while (formatlen > 0 && *format >= '0' && *format <= '9') {
765 					format++;
766 					formatlen--;
767 				}
768 			} else if (c == '*') {
769 				repetitions = -1;
770 				format++;
771 				formatlen--;
772 			}
773 		}
774 
775 		/* Get of new value in array */
776 		name = format;
777 		argb = repetitions;
778 
779 		while (formatlen > 0 && *format != '/') {
780 			formatlen--;
781 			format++;
782 		}
783 
784 		namelen = format - name;
785 
786 		if (namelen > 200)
787 			namelen = 200;
788 
789 		switch ((int) type) {
790 			/* Never use any input */
791 			case 'X':
792 				size = -1;
793 				if (repetitions < 0) {
794 					php_error_docref(NULL, E_WARNING, "Type %c: '*' ignored", type);
795 					repetitions = 1;
796 				}
797 				break;
798 
799 			case '@':
800 				size = 0;
801 				break;
802 
803 			case 'a':
804 			case 'A':
805 			case 'Z':
806 				size = repetitions;
807 				repetitions = 1;
808 				break;
809 
810 			case 'h':
811 			case 'H':
812 				size = (repetitions > 0) ? ((unsigned int) repetitions + 1) / 2 : repetitions;
813 				repetitions = 1;
814 				break;
815 
816 			/* Use 1 byte of input */
817 			case 'c':
818 			case 'C':
819 			case 'x':
820 				size = 1;
821 				break;
822 
823 			/* Use 2 bytes of input */
824 			case 's':
825 			case 'S':
826 			case 'n':
827 			case 'v':
828 				size = 2;
829 				break;
830 
831 			/* Use sizeof(int) bytes of input */
832 			case 'i':
833 			case 'I':
834 				size = sizeof(int);
835 				break;
836 
837 			/* Use 4 bytes of input */
838 			case 'l':
839 			case 'L':
840 			case 'N':
841 			case 'V':
842 				size = 4;
843 				break;
844 
845 			/* Use 8 bytes of input */
846 			case 'q':
847 			case 'Q':
848 			case 'J':
849 			case 'P':
850 #if SIZEOF_ZEND_LONG > 4
851 				size = 8;
852 				break;
853 #else
854 				zend_value_error("64-bit format codes are not available for 32-bit versions of PHP");
855 				RETURN_THROWS();
856 #endif
857 
858 			/* Use sizeof(float) bytes of input */
859 			case 'f':
860 			case 'g':
861 			case 'G':
862 				size = sizeof(float);
863 				break;
864 
865 			/* Use sizeof(double) bytes of input */
866 			case 'd':
867 			case 'e':
868 			case 'E':
869 				size = sizeof(double);
870 				break;
871 
872 			default:
873 				zend_value_error("Invalid format type %c", type);
874 				RETURN_THROWS();
875 		}
876 
877 
878 		/* Do actual unpacking */
879 		for (i = 0; i != repetitions; i++ ) {
880 
881 			if (size != 0 && size != -1 && INT_MAX - size + 1 < inputpos) {
882 				php_error_docref(NULL, E_WARNING, "Type %c: integer overflow", type);
883 				zend_array_destroy(Z_ARR_P(return_value));
884 				RETURN_FALSE;
885 			}
886 
887 			if ((inputpos + size) <= inputlen) {
888 
889 				zend_string* real_name;
890 				zval val;
891 
892 				if (repetitions == 1 && namelen > 0) {
893 					/* Use a part of the formatarg argument directly as the name. */
894 					real_name = zend_string_init_fast(name, namelen);
895 
896 				} else {
897 					/* Need to add the 1-based element number to the name */
898 					char buf[MAX_LENGTH_OF_LONG + 1];
899 					char *res = zend_print_ulong_to_buf(buf + sizeof(buf) - 1, i+1);
900 					size_t digits = buf + sizeof(buf) - 1 - res;
901 					real_name = zend_string_concat2(name, namelen, res, digits);
902 				}
903 
904 				switch ((int) type) {
905 					case 'a': {
906 						/* a will not strip any trailing whitespace or null padding */
907 						zend_long len = inputlen - inputpos;	/* Remaining string */
908 
909 						/* If size was given take minimum of len and size */
910 						if ((size >= 0) && (len > size)) {
911 							len = size;
912 						}
913 
914 						size = len;
915 
916 						ZVAL_STRINGL(&val, &input[inputpos], len);
917 						zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
918 						break;
919 					}
920 					case 'A': {
921 						/* A will strip any trailing whitespace */
922 						char padn = '\0'; char pads = ' '; char padt = '\t'; char padc = '\r'; char padl = '\n';
923 						zend_long len = inputlen - inputpos;	/* Remaining string */
924 
925 						/* If size was given take minimum of len and size */
926 						if ((size >= 0) && (len > size)) {
927 							len = size;
928 						}
929 
930 						size = len;
931 
932 						/* Remove trailing white space and nulls chars from unpacked data */
933 						while (--len >= 0) {
934 							if (input[inputpos + len] != padn
935 								&& input[inputpos + len] != pads
936 								&& input[inputpos + len] != padt
937 								&& input[inputpos + len] != padc
938 								&& input[inputpos + len] != padl
939 							)
940 								break;
941 						}
942 
943 						ZVAL_STRINGL(&val, &input[inputpos], len + 1);
944 						zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
945 						break;
946 					}
947 					/* New option added for Z to remain in-line with the Perl implementation */
948 					case 'Z': {
949 						/* Z will strip everything after the first null character */
950 						char pad = '\0';
951 						zend_long s,
952 							 len = inputlen - inputpos;	/* Remaining string */
953 
954 						/* If size was given take minimum of len and size */
955 						if ((size >= 0) && (len > size)) {
956 							len = size;
957 						}
958 
959 						size = len;
960 
961 						/* Remove everything after the first null */
962 						for (s=0 ; s < len ; s++) {
963 							if (input[inputpos + s] == pad)
964 								break;
965 						}
966 						len = s;
967 
968 						ZVAL_STRINGL(&val, &input[inputpos], len);
969 						zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
970 						break;
971 					}
972 
973 
974 					case 'h':
975 					case 'H': {
976 						zend_long len = (inputlen - inputpos) * 2;	/* Remaining */
977 						int nibbleshift = (type == 'h') ? 0 : 4;
978 						int first = 1;
979 						zend_string *buf;
980 						zend_long ipos, opos;
981 
982 						/* If size was given take minimum of len and size */
983 						if (size >= 0 && len > (size * 2)) {
984 							len = size * 2;
985 						}
986 
987 						if (len > 0 && argb > 0) {
988 							len -= argb % 2;
989 						}
990 
991 						buf = zend_string_alloc(len, 0);
992 
993 						for (ipos = opos = 0; opos < len; opos++) {
994 							char cc = (input[inputpos + ipos] >> nibbleshift) & 0xf;
995 
996 							if (cc < 10) {
997 								cc += '0';
998 							} else {
999 								cc += 'a' - 10;
1000 							}
1001 
1002 							ZSTR_VAL(buf)[opos] = cc;
1003 							nibbleshift = (nibbleshift + 4) & 7;
1004 
1005 							if (first-- == 0) {
1006 								ipos++;
1007 								first = 1;
1008 							}
1009 						}
1010 
1011 						ZSTR_VAL(buf)[len] = '\0';
1012 
1013 						ZVAL_STR(&val, buf);
1014 						zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
1015 						break;
1016 					}
1017 
1018 					case 'c':   /* signed */
1019 					case 'C': { /* unsigned */
1020 						uint8_t x = input[inputpos];
1021 						zend_long v = (type == 'c') ? (int8_t) x : x;
1022 
1023 						ZVAL_LONG(&val, v);
1024 						zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
1025 						break;
1026 					}
1027 
1028 					case 's':   /* signed machine endian   */
1029 					case 'S':   /* unsigned machine endian */
1030 					case 'n':   /* unsigned big endian     */
1031 					case 'v': { /* unsigned little endian  */
1032 						zend_long v = 0;
1033 						uint16_t x = *((unaligned_uint16_t*) &input[inputpos]);
1034 
1035 						if (type == 's') {
1036 							v = (int16_t) x;
1037 						} else if ((type == 'n' && MACHINE_LITTLE_ENDIAN) || (type == 'v' && !MACHINE_LITTLE_ENDIAN)) {
1038 							v = php_pack_reverse_int16(x);
1039 						} else {
1040 							v = x;
1041 						}
1042 
1043 						ZVAL_LONG(&val, v);
1044 						zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
1045 						break;
1046 					}
1047 
1048 					case 'i':   /* signed integer, machine size, machine endian */
1049 					case 'I': { /* unsigned integer, machine size, machine endian */
1050 						zend_long v;
1051 						if (type == 'i') {
1052 							int x = *((unaligned_int*) &input[inputpos]);
1053 							v = x;
1054 						} else {
1055 							unsigned int x = *((unaligned_uint*) &input[inputpos]);
1056 							v = x;
1057 						}
1058 
1059 						ZVAL_LONG(&val, v);
1060 						zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
1061 						break;
1062 					}
1063 
1064 					case 'l':   /* signed machine endian   */
1065 					case 'L':   /* unsigned machine endian */
1066 					case 'N':   /* unsigned big endian     */
1067 					case 'V': { /* unsigned little endian  */
1068 						zend_long v = 0;
1069 						uint32_t x = *((unaligned_uint32_t*) &input[inputpos]);
1070 
1071 						if (type == 'l') {
1072 							v = (int32_t) x;
1073 						} else if ((type == 'N' && MACHINE_LITTLE_ENDIAN) || (type == 'V' && !MACHINE_LITTLE_ENDIAN)) {
1074 							v = php_pack_reverse_int32(x);
1075 						} else {
1076 							v = x;
1077 						}
1078 
1079 						ZVAL_LONG(&val, v);
1080 						zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
1081 
1082 						break;
1083 					}
1084 
1085 #if SIZEOF_ZEND_LONG > 4
1086 					case 'q':   /* signed machine endian   */
1087 					case 'Q':   /* unsigned machine endian */
1088 					case 'J':   /* unsigned big endian     */
1089 					case 'P': { /* unsigned little endian  */
1090 						zend_long v = 0;
1091 						uint64_t x = *((unaligned_uint64_t*) &input[inputpos]);
1092 
1093 						if (type == 'q') {
1094 							v = (int64_t) x;
1095 						} else if ((type == 'J' && MACHINE_LITTLE_ENDIAN) || (type == 'P' && !MACHINE_LITTLE_ENDIAN)) {
1096 							v = php_pack_reverse_int64(x);
1097 						} else {
1098 							v = x;
1099 						}
1100 
1101 						ZVAL_LONG(&val, v);
1102 						zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
1103 						break;
1104 					}
1105 #endif
1106 
1107 					case 'f': /* float */
1108 					case 'g': /* little endian float*/
1109 					case 'G': /* big endian float*/
1110 					{
1111 						float v;
1112 
1113 						if (type == 'g') {
1114 							v = php_pack_parse_float(1, &input[inputpos]);
1115 						} else if (type == 'G') {
1116 							v = php_pack_parse_float(0, &input[inputpos]);
1117 						} else {
1118 							memcpy(&v, &input[inputpos], sizeof(float));
1119 						}
1120 
1121 						ZVAL_DOUBLE(&val, v);
1122 						zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
1123 						break;
1124 					}
1125 
1126 
1127 					case 'd': /* double */
1128 					case 'e': /* little endian float */
1129 					case 'E': /* big endian float */
1130 					{
1131 						double v;
1132 						if (type == 'e') {
1133 							v = php_pack_parse_double(1, &input[inputpos]);
1134 						} else if (type == 'E') {
1135 							v = php_pack_parse_double(0, &input[inputpos]);
1136 						} else {
1137 							memcpy(&v, &input[inputpos], sizeof(double));
1138 						}
1139 
1140 						ZVAL_DOUBLE(&val, v);
1141 						zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
1142 						break;
1143 					}
1144 
1145 					case 'x':
1146 						/* Do nothing with input, just skip it */
1147 						break;
1148 
1149 					case 'X':
1150 						if (inputpos < size) {
1151 							inputpos = -size;
1152 							i = repetitions - 1;		/* Break out of for loop */
1153 
1154 							if (repetitions >= 0) {
1155 								php_error_docref(NULL, E_WARNING, "Type %c: outside of string", type);
1156 							}
1157 						}
1158 						break;
1159 
1160 					case '@':
1161 						if (repetitions <= inputlen) {
1162 							inputpos = repetitions;
1163 						} else {
1164 							php_error_docref(NULL, E_WARNING, "Type %c: outside of string", type);
1165 						}
1166 
1167 						i = repetitions - 1;	/* Done, break out of for loop */
1168 						break;
1169 				}
1170 
1171 				zend_string_release(real_name);
1172 
1173 				inputpos += size;
1174 				if (inputpos < 0) {
1175 					if (size != -1) { /* only print warning if not working with * */
1176 						php_error_docref(NULL, E_WARNING, "Type %c: outside of string", type);
1177 					}
1178 					inputpos = 0;
1179 				}
1180 			} else if (repetitions < 0) {
1181 				/* Reached end of input for '*' repeater */
1182 				break;
1183 			} else {
1184 				php_error_docref(NULL, E_WARNING, "Type %c: not enough input values, need %d values but only " ZEND_LONG_FMT " %s provided", type, size, inputlen - inputpos, inputlen - inputpos == 1 ? "was" : "were");
1185 				zend_array_destroy(Z_ARR_P(return_value));
1186 				RETURN_FALSE;
1187 			}
1188 		}
1189 
1190 		if (formatlen > 0) {
1191 			formatlen--;	/* Skip '/' separator, does no harm if inputlen == 0 */
1192 			format++;
1193 		}
1194 	}
1195 }
1196 /* }}} */
1197 
1198 /* {{{ PHP_MINIT_FUNCTION */
PHP_MINIT_FUNCTION(pack)1199 PHP_MINIT_FUNCTION(pack)
1200 {
1201 	int i;
1202 
1203 	if (MACHINE_LITTLE_ENDIAN) {
1204 		/* Where to get lo to hi bytes from */
1205 		byte_map[0] = 0;
1206 
1207 		for (i = 0; i < (int)sizeof(int); i++) {
1208 			int_map[i] = i;
1209 		}
1210 
1211 		machine_endian_short_map[0] = 0;
1212 		machine_endian_short_map[1] = 1;
1213 		big_endian_short_map[0] = 1;
1214 		big_endian_short_map[1] = 0;
1215 		little_endian_short_map[0] = 0;
1216 		little_endian_short_map[1] = 1;
1217 
1218 		machine_endian_long_map[0] = 0;
1219 		machine_endian_long_map[1] = 1;
1220 		machine_endian_long_map[2] = 2;
1221 		machine_endian_long_map[3] = 3;
1222 		big_endian_long_map[0] = 3;
1223 		big_endian_long_map[1] = 2;
1224 		big_endian_long_map[2] = 1;
1225 		big_endian_long_map[3] = 0;
1226 		little_endian_long_map[0] = 0;
1227 		little_endian_long_map[1] = 1;
1228 		little_endian_long_map[2] = 2;
1229 		little_endian_long_map[3] = 3;
1230 
1231 #if SIZEOF_ZEND_LONG > 4
1232 		machine_endian_longlong_map[0] = 0;
1233 		machine_endian_longlong_map[1] = 1;
1234 		machine_endian_longlong_map[2] = 2;
1235 		machine_endian_longlong_map[3] = 3;
1236 		machine_endian_longlong_map[4] = 4;
1237 		machine_endian_longlong_map[5] = 5;
1238 		machine_endian_longlong_map[6] = 6;
1239 		machine_endian_longlong_map[7] = 7;
1240 		big_endian_longlong_map[0] = 7;
1241 		big_endian_longlong_map[1] = 6;
1242 		big_endian_longlong_map[2] = 5;
1243 		big_endian_longlong_map[3] = 4;
1244 		big_endian_longlong_map[4] = 3;
1245 		big_endian_longlong_map[5] = 2;
1246 		big_endian_longlong_map[6] = 1;
1247 		big_endian_longlong_map[7] = 0;
1248 		little_endian_longlong_map[0] = 0;
1249 		little_endian_longlong_map[1] = 1;
1250 		little_endian_longlong_map[2] = 2;
1251 		little_endian_longlong_map[3] = 3;
1252 		little_endian_longlong_map[4] = 4;
1253 		little_endian_longlong_map[5] = 5;
1254 		little_endian_longlong_map[6] = 6;
1255 		little_endian_longlong_map[7] = 7;
1256 #endif
1257 	}
1258 	else {
1259 		zval val;
1260 		int size = sizeof(Z_LVAL(val));
1261 		Z_LVAL(val)=0; /*silence a warning*/
1262 
1263 		/* Where to get hi to lo bytes from */
1264 		byte_map[0] = size - 1;
1265 
1266 		for (i = 0; i < (int)sizeof(int); i++) {
1267 			int_map[i] = size - (sizeof(int) - i);
1268 		}
1269 
1270 		machine_endian_short_map[0] = size - 2;
1271 		machine_endian_short_map[1] = size - 1;
1272 		big_endian_short_map[0] = size - 2;
1273 		big_endian_short_map[1] = size - 1;
1274 		little_endian_short_map[0] = size - 1;
1275 		little_endian_short_map[1] = size - 2;
1276 
1277 		machine_endian_long_map[0] = size - 4;
1278 		machine_endian_long_map[1] = size - 3;
1279 		machine_endian_long_map[2] = size - 2;
1280 		machine_endian_long_map[3] = size - 1;
1281 		big_endian_long_map[0] = size - 4;
1282 		big_endian_long_map[1] = size - 3;
1283 		big_endian_long_map[2] = size - 2;
1284 		big_endian_long_map[3] = size - 1;
1285 		little_endian_long_map[0] = size - 1;
1286 		little_endian_long_map[1] = size - 2;
1287 		little_endian_long_map[2] = size - 3;
1288 		little_endian_long_map[3] = size - 4;
1289 
1290 #if SIZEOF_ZEND_LONG > 4
1291 		machine_endian_longlong_map[0] = size - 8;
1292 		machine_endian_longlong_map[1] = size - 7;
1293 		machine_endian_longlong_map[2] = size - 6;
1294 		machine_endian_longlong_map[3] = size - 5;
1295 		machine_endian_longlong_map[4] = size - 4;
1296 		machine_endian_longlong_map[5] = size - 3;
1297 		machine_endian_longlong_map[6] = size - 2;
1298 		machine_endian_longlong_map[7] = size - 1;
1299 		big_endian_longlong_map[0] = size - 8;
1300 		big_endian_longlong_map[1] = size - 7;
1301 		big_endian_longlong_map[2] = size - 6;
1302 		big_endian_longlong_map[3] = size - 5;
1303 		big_endian_longlong_map[4] = size - 4;
1304 		big_endian_longlong_map[5] = size - 3;
1305 		big_endian_longlong_map[6] = size - 2;
1306 		big_endian_longlong_map[7] = size - 1;
1307 		little_endian_longlong_map[0] = size - 1;
1308 		little_endian_longlong_map[1] = size - 2;
1309 		little_endian_longlong_map[2] = size - 3;
1310 		little_endian_longlong_map[3] = size - 4;
1311 		little_endian_longlong_map[4] = size - 5;
1312 		little_endian_longlong_map[5] = size - 6;
1313 		little_endian_longlong_map[6] = size - 7;
1314 		little_endian_longlong_map[7] = size - 8;
1315 #endif
1316 	}
1317 
1318 	return SUCCESS;
1319 }
1320 /* }}} */
1321