xref: /php-src/ext/standard/pack.c (revision 25a51461)
1 /*
2    +----------------------------------------------------------------------+
3    | Copyright (c) The PHP Group                                          |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | https://www.php.net/license/3_01.txt                                 |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13    | Author: Chris Schneider <cschneid@relog.ch>                          |
14    +----------------------------------------------------------------------+
15  */
16 
17 #include "php.h"
18 
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <errno.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <fcntl.h>
25 #ifdef PHP_WIN32
26 #define O_RDONLY _O_RDONLY
27 #include "win32/param.h"
28 #else
29 #include <sys/param.h>
30 #endif
31 #include "pack.h"
32 #ifdef HAVE_PWD_H
33 #ifdef PHP_WIN32
34 #include "win32/pwd.h"
35 #else
36 #include <pwd.h>
37 #endif
38 #endif
39 #include "fsock.h"
40 #ifdef HAVE_NETINET_IN_H
41 #include <netinet/in.h>
42 #endif
43 
44 #define INC_OUTPUTPOS(a,b) \
45 	if ((a) < 0 || ((INT_MAX - outputpos)/((int)b)) < (a)) { \
46 		efree(formatcodes);	\
47 		efree(formatargs);	\
48 		zend_value_error("Type %c: integer overflow in format string", code); \
49 		RETURN_THROWS(); \
50 	} \
51 	outputpos += (a)*(b);
52 
53 #ifdef WORDS_BIGENDIAN
54 #define MACHINE_LITTLE_ENDIAN 0
55 #else
56 #define MACHINE_LITTLE_ENDIAN 1
57 #endif
58 
59 typedef ZEND_SET_ALIGNED(1, uint16_t unaligned_uint16_t);
60 typedef ZEND_SET_ALIGNED(1, uint32_t unaligned_uint32_t);
61 typedef ZEND_SET_ALIGNED(1, uint64_t unaligned_uint64_t);
62 typedef ZEND_SET_ALIGNED(1, unsigned int unaligned_uint);
63 typedef ZEND_SET_ALIGNED(1, int unaligned_int);
64 
65 /* Mapping of byte from char (8bit) to long for machine endian */
66 static int byte_map[1];
67 
68 /* Mappings of bytes from int (machine dependent) to int for machine endian */
69 static int int_map[sizeof(int)];
70 
71 /* Mappings of bytes from shorts (16bit) for all endian environments */
72 static int machine_endian_short_map[2];
73 static int big_endian_short_map[2];
74 static int little_endian_short_map[2];
75 
76 /* Mappings of bytes from longs (32bit) for all endian environments */
77 static int machine_endian_long_map[4];
78 static int big_endian_long_map[4];
79 static int little_endian_long_map[4];
80 
81 #if SIZEOF_ZEND_LONG > 4
82 /* Mappings of bytes from quads (64bit) for all endian environments */
83 static int machine_endian_longlong_map[8];
84 static int big_endian_longlong_map[8];
85 static int little_endian_longlong_map[8];
86 #endif
87 
88 /* {{{ php_pack */
php_pack(zval * val,size_t size,int * map,char * output)89 static void php_pack(zval *val, size_t size, int *map, char *output)
90 {
91 	size_t i;
92 	char *v;
93 
94 	convert_to_long(val);
95 	v = (char *) &Z_LVAL_P(val);
96 
97 	for (i = 0; i < size; i++) {
98 		*output++ = v[map[i]];
99 	}
100 }
101 /* }}} */
102 
php_pack_reverse_int16(uint16_t arg)103 ZEND_ATTRIBUTE_CONST static inline uint16_t php_pack_reverse_int16(uint16_t arg)
104 {
105 	return ((arg & 0xFF) << 8) | ((arg >> 8) & 0xFF);
106 }
107 
108 /* {{{ php_pack_reverse_int32 */
php_pack_reverse_int32(uint32_t arg)109 ZEND_ATTRIBUTE_CONST static inline uint32_t php_pack_reverse_int32(uint32_t arg)
110 {
111 	uint32_t result;
112 	result = ((arg & 0xFF) << 24) | ((arg & 0xFF00) << 8) | ((arg >> 8) & 0xFF00) | ((arg >> 24) & 0xFF);
113 
114 	return result;
115 }
116 /* }}} */
117 
118 /* {{{ php_pack */
php_pack_reverse_int64(uint64_t arg)119 static inline uint64_t php_pack_reverse_int64(uint64_t arg)
120 {
121 	union Swap64 {
122 		uint64_t i;
123 		uint32_t ul[2];
124 	} tmp, result;
125 	tmp.i = arg;
126 	result.ul[0] = php_pack_reverse_int32(tmp.ul[1]);
127 	result.ul[1] = php_pack_reverse_int32(tmp.ul[0]);
128 
129 	return result.i;
130 }
131 /* }}} */
132 
133 /* {{{ php_pack_copy_float */
php_pack_copy_float(int is_little_endian,void * dst,float f)134 static void php_pack_copy_float(int is_little_endian, void * dst, float f)
135 {
136 	union Copy32 {
137 		float f;
138 		uint32_t i;
139 	} m;
140 	m.f = f;
141 
142 #ifdef WORDS_BIGENDIAN
143 	if (is_little_endian) {
144 		m.i = php_pack_reverse_int32(m.i);
145 	}
146 #else /* WORDS_BIGENDIAN */
147 	if (!is_little_endian) {
148 		m.i = php_pack_reverse_int32(m.i);
149 	}
150 #endif /* WORDS_BIGENDIAN */
151 
152 	memcpy(dst, &m.f, sizeof(float));
153 }
154 /* }}} */
155 
156 /* {{{ php_pack_copy_double */
php_pack_copy_double(int is_little_endian,void * dst,double d)157 static void php_pack_copy_double(int is_little_endian, void * dst, double d)
158 {
159 	union Copy64 {
160 		double d;
161 		uint64_t i;
162 	} m;
163 	m.d = d;
164 
165 #ifdef WORDS_BIGENDIAN
166 	if (is_little_endian) {
167 		m.i = php_pack_reverse_int64(m.i);
168 	}
169 #else /* WORDS_BIGENDIAN */
170 	if (!is_little_endian) {
171 		m.i = php_pack_reverse_int64(m.i);
172 	}
173 #endif /* WORDS_BIGENDIAN */
174 
175 	memcpy(dst, &m.d, sizeof(double));
176 }
177 /* }}} */
178 
179 /* {{{ php_pack_parse_float */
php_pack_parse_float(int is_little_endian,void * src)180 static float php_pack_parse_float(int is_little_endian, void * src)
181 {
182 	union Copy32 {
183 		float f;
184 		uint32_t i;
185 	} m;
186 	memcpy(&m.i, src, sizeof(float));
187 
188 #ifdef WORDS_BIGENDIAN
189 	if (is_little_endian) {
190 		m.i = php_pack_reverse_int32(m.i);
191 	}
192 #else /* WORDS_BIGENDIAN */
193 	if (!is_little_endian) {
194 		m.i = php_pack_reverse_int32(m.i);
195 	}
196 #endif /* WORDS_BIGENDIAN */
197 
198 	return m.f;
199 }
200 /* }}} */
201 
202 /* {{{ php_pack_parse_double */
php_pack_parse_double(int is_little_endian,void * src)203 static double php_pack_parse_double(int is_little_endian, void * src)
204 {
205 	union Copy64 {
206 		double d;
207 		uint64_t i;
208 	} m;
209 	memcpy(&m.i, src, sizeof(double));
210 
211 #ifdef WORDS_BIGENDIAN
212 	if (is_little_endian) {
213 		m.i = php_pack_reverse_int64(m.i);
214 	}
215 #else /* WORDS_BIGENDIAN */
216 	if (!is_little_endian) {
217 		m.i = php_pack_reverse_int64(m.i);
218 	}
219 #endif /* WORDS_BIGENDIAN */
220 
221 	return m.d;
222 }
223 /* }}} */
224 
225 /* pack() idea stolen from Perl (implemented formats behave the same as there except J and P)
226  * Implemented formats are Z, A, a, h, H, c, C, s, S, i, I, l, L, n, N, q, Q, J, P, f, d, x, X, @.
227  * Added g, G for little endian float and big endian float, added e, E for little endian double and big endian double.
228  */
229 /* {{{ Takes one or more arguments and packs them into a binary string according to the format argument */
PHP_FUNCTION(pack)230 PHP_FUNCTION(pack)
231 {
232 	zval *argv = NULL;
233 	int num_args = 0;
234 	size_t i;
235 	int currentarg;
236 	char *format;
237 	size_t formatlen;
238 	char *formatcodes;
239 	int *formatargs;
240 	size_t formatcount = 0;
241 	int outputpos = 0, outputsize = 0;
242 	zend_string *output;
243 
244 	ZEND_PARSE_PARAMETERS_START(1, -1)
245 		Z_PARAM_STRING(format, formatlen)
246 		Z_PARAM_VARIADIC('*', argv, num_args)
247 	ZEND_PARSE_PARAMETERS_END();
248 
249 	/* We have a maximum of <formatlen> format codes to deal with */
250 	formatcodes = safe_emalloc(formatlen, sizeof(*formatcodes), 0);
251 	formatargs = safe_emalloc(formatlen, sizeof(*formatargs), 0);
252 	currentarg = 0;
253 
254 	/* Preprocess format into formatcodes and formatargs */
255 	for (i = 0; i < formatlen; formatcount++) {
256 		char code = format[i++];
257 		int arg = 1;
258 
259 		/* Handle format arguments if any */
260 		if (i < formatlen) {
261 			char c = format[i];
262 
263 			if (c == '*') {
264 				arg = -1;
265 				i++;
266 			}
267 			else if (c >= '0' && c <= '9') {
268 				arg = atoi(&format[i]);
269 
270 				while (format[i] >= '0' && format[i] <= '9' && i < formatlen) {
271 					i++;
272 				}
273 			}
274 		}
275 
276 		/* Handle special arg '*' for all codes and check argv overflows */
277 		switch ((int) code) {
278 			/* Never uses any args */
279 			case 'x':
280 			case 'X':
281 			case '@':
282 				if (arg < 0) {
283 					php_error_docref(NULL, E_WARNING, "Type %c: '*' ignored", code);
284 					arg = 1;
285 				}
286 				break;
287 
288 			/* Always uses one arg */
289 			case 'a':
290 			case 'A':
291 			case 'Z':
292 			case 'h':
293 			case 'H':
294 				if (currentarg >= num_args) {
295 					efree(formatcodes);
296 					efree(formatargs);
297 					zend_value_error("Type %c: not enough arguments", code);
298 					RETURN_THROWS();
299 				}
300 
301 				if (arg < 0) {
302 					if (!try_convert_to_string(&argv[currentarg])) {
303 						efree(formatcodes);
304 						efree(formatargs);
305 						RETURN_THROWS();
306 					}
307 
308 					arg = Z_STRLEN(argv[currentarg]);
309 					if (code == 'Z') {
310 						/* add one because Z is always NUL-terminated:
311 						 * pack("Z*", "aa") === "aa\0"
312 						 * pack("Z2", "aa") === "a\0" */
313 						arg++;
314 					}
315 				}
316 
317 				currentarg++;
318 				break;
319 
320 			/* Use as many args as specified */
321 			case 'q':
322 			case 'Q':
323 			case 'J':
324 			case 'P':
325 #if SIZEOF_ZEND_LONG < 8
326 					efree(formatcodes);
327 					efree(formatargs);
328 					zend_value_error("64-bit format codes are not available for 32-bit versions of PHP");
329 					RETURN_THROWS();
330 #endif
331 			case 'c':
332 			case 'C':
333 			case 's':
334 			case 'S':
335 			case 'i':
336 			case 'I':
337 			case 'l':
338 			case 'L':
339 			case 'n':
340 			case 'N':
341 			case 'v':
342 			case 'V':
343 			case 'f': /* float */
344 			case 'g': /* little endian float */
345 			case 'G': /* big endian float */
346 			case 'd': /* double */
347 			case 'e': /* little endian double */
348 			case 'E': /* big endian double */
349 				if (arg < 0) {
350 					arg = num_args - currentarg;
351 				}
352 				if (currentarg > INT_MAX - arg) {
353 					goto too_few_args;
354 				}
355 				currentarg += arg;
356 
357 				if (currentarg > num_args) {
358 too_few_args:
359 					efree(formatcodes);
360 					efree(formatargs);
361 					zend_value_error("Type %c: too few arguments", code);
362 					RETURN_THROWS();
363 				}
364 				break;
365 
366 			default:
367 				efree(formatcodes);
368 				efree(formatargs);
369 				zend_value_error("Type %c: unknown format code", code);
370 				RETURN_THROWS();
371 		}
372 
373 		formatcodes[formatcount] = code;
374 		formatargs[formatcount] = arg;
375 	}
376 
377 	if (currentarg < num_args) {
378 		php_error_docref(NULL, E_WARNING, "%d arguments unused", (num_args - currentarg));
379 	}
380 
381 	/* Calculate output length and upper bound while processing*/
382 	for (i = 0; i < formatcount; i++) {
383 	    int code = (int) formatcodes[i];
384 		int arg = formatargs[i];
385 
386 		switch ((int) code) {
387 			case 'h':
388 			case 'H':
389 				INC_OUTPUTPOS((arg + (arg % 2)) / 2,1)	/* 4 bit per arg */
390 				break;
391 
392 			case 'a':
393 			case 'A':
394 			case 'Z':
395 			case 'c':
396 			case 'C':
397 			case 'x':
398 				INC_OUTPUTPOS(arg,1)		/* 8 bit per arg */
399 				break;
400 
401 			case 's':
402 			case 'S':
403 			case 'n':
404 			case 'v':
405 				INC_OUTPUTPOS(arg,2)		/* 16 bit per arg */
406 				break;
407 
408 			case 'i':
409 			case 'I':
410 				INC_OUTPUTPOS(arg,sizeof(int))
411 				break;
412 
413 			case 'l':
414 			case 'L':
415 			case 'N':
416 			case 'V':
417 				INC_OUTPUTPOS(arg,4)		/* 32 bit per arg */
418 				break;
419 
420 #if SIZEOF_ZEND_LONG > 4
421 			case 'q':
422 			case 'Q':
423 			case 'J':
424 			case 'P':
425 				INC_OUTPUTPOS(arg,8)		/* 32 bit per arg */
426 				break;
427 #endif
428 
429 			case 'f': /* float */
430 			case 'g': /* little endian float */
431 			case 'G': /* big endian float */
432 				INC_OUTPUTPOS(arg,sizeof(float))
433 				break;
434 
435 			case 'd': /* double */
436 			case 'e': /* little endian double */
437 			case 'E': /* big endian double */
438 				INC_OUTPUTPOS(arg,sizeof(double))
439 				break;
440 
441 			case 'X':
442 				outputpos -= arg;
443 
444 				if (outputpos < 0) {
445 					php_error_docref(NULL, E_WARNING, "Type %c: outside of string", code);
446 					outputpos = 0;
447 				}
448 				break;
449 
450 			case '@':
451 				outputpos = arg;
452 				break;
453 		}
454 
455 		if (outputsize < outputpos) {
456 			outputsize = outputpos;
457 		}
458 	}
459 
460 	output = zend_string_alloc(outputsize, 0);
461 	outputpos = 0;
462 	currentarg = 0;
463 
464 	/* Do actual packing */
465 	for (i = 0; i < formatcount; i++) {
466 	    int code = (int) formatcodes[i];
467 		int arg = formatargs[i];
468 
469 		switch ((int) code) {
470 			case 'a':
471 			case 'A':
472 			case 'Z': {
473 				size_t arg_cp = (code != 'Z') ? arg : MAX(0, arg - 1);
474 				zend_string *tmp_str;
475 				zend_string *str = zval_get_tmp_string(&argv[currentarg++], &tmp_str);
476 
477 				memset(&ZSTR_VAL(output)[outputpos], (code == 'a' || code == 'Z') ? '\0' : ' ', arg);
478 				memcpy(&ZSTR_VAL(output)[outputpos], ZSTR_VAL(str),
479 					   (ZSTR_LEN(str) < arg_cp) ? ZSTR_LEN(str) : arg_cp);
480 
481 				outputpos += arg;
482 				zend_tmp_string_release(tmp_str);
483 				break;
484 			}
485 
486 			case 'h':
487 			case 'H': {
488 				int nibbleshift = (code == 'h') ? 0 : 4;
489 				int first = 1;
490 				zend_string *tmp_str;
491 				zend_string *str = zval_get_tmp_string(&argv[currentarg++], &tmp_str);
492 				char *v = ZSTR_VAL(str);
493 
494 				outputpos--;
495 				if ((size_t)arg > ZSTR_LEN(str)) {
496 					php_error_docref(NULL, E_WARNING, "Type %c: not enough characters in string", code);
497 					arg = ZSTR_LEN(str);
498 				}
499 
500 				while (arg-- > 0) {
501 					char n = *v++;
502 
503 					if (n >= '0' && n <= '9') {
504 						n -= '0';
505 					} else if (n >= 'A' && n <= 'F') {
506 						n -= ('A' - 10);
507 					} else if (n >= 'a' && n <= 'f') {
508 						n -= ('a' - 10);
509 					} else {
510 						php_error_docref(NULL, E_WARNING, "Type %c: illegal hex digit %c", code, n);
511 						n = 0;
512 					}
513 
514 					if (first--) {
515 						ZSTR_VAL(output)[++outputpos] = 0;
516 					} else {
517 					  first = 1;
518 					}
519 
520 					ZSTR_VAL(output)[outputpos] |= (n << nibbleshift);
521 					nibbleshift = (nibbleshift + 4) & 7;
522 				}
523 
524 				outputpos++;
525 				zend_tmp_string_release(tmp_str);
526 				break;
527 			}
528 
529 			case 'c':
530 			case 'C':
531 				while (arg-- > 0) {
532 					php_pack(&argv[currentarg++], 1, byte_map, &ZSTR_VAL(output)[outputpos]);
533 					outputpos++;
534 				}
535 				break;
536 
537 			case 's':
538 			case 'S':
539 			case 'n':
540 			case 'v': {
541 				int *map = machine_endian_short_map;
542 
543 				if (code == 'n') {
544 					map = big_endian_short_map;
545 				} else if (code == 'v') {
546 					map = little_endian_short_map;
547 				}
548 
549 				while (arg-- > 0) {
550 					php_pack(&argv[currentarg++], 2, map, &ZSTR_VAL(output)[outputpos]);
551 					outputpos += 2;
552 				}
553 				break;
554 			}
555 
556 			case 'i':
557 			case 'I':
558 				while (arg-- > 0) {
559 					php_pack(&argv[currentarg++], sizeof(int), int_map, &ZSTR_VAL(output)[outputpos]);
560 					outputpos += sizeof(int);
561 				}
562 				break;
563 
564 			case 'l':
565 			case 'L':
566 			case 'N':
567 			case 'V': {
568 				int *map = machine_endian_long_map;
569 
570 				if (code == 'N') {
571 					map = big_endian_long_map;
572 				} else if (code == 'V') {
573 					map = little_endian_long_map;
574 				}
575 
576 				while (arg-- > 0) {
577 					php_pack(&argv[currentarg++], 4, map, &ZSTR_VAL(output)[outputpos]);
578 					outputpos += 4;
579 				}
580 				break;
581 			}
582 
583 #if SIZEOF_ZEND_LONG > 4
584 			case 'q':
585 			case 'Q':
586 			case 'J':
587 			case 'P': {
588 				int *map = machine_endian_longlong_map;
589 
590 				if (code == 'J') {
591 					map = big_endian_longlong_map;
592 				} else if (code == 'P') {
593 					map = little_endian_longlong_map;
594 				}
595 
596 				while (arg-- > 0) {
597 					php_pack(&argv[currentarg++], 8, map, &ZSTR_VAL(output)[outputpos]);
598 					outputpos += 8;
599 				}
600 				break;
601 			}
602 #endif
603 
604 			case 'f': {
605 				while (arg-- > 0) {
606 					float v = (float) zval_get_double(&argv[currentarg++]);
607 					memcpy(&ZSTR_VAL(output)[outputpos], &v, sizeof(v));
608 					outputpos += sizeof(v);
609 				}
610 				break;
611 			}
612 
613 			case 'g': {
614 				/* pack little endian float */
615 				while (arg-- > 0) {
616 					float v = (float) zval_get_double(&argv[currentarg++]);
617 					php_pack_copy_float(1, &ZSTR_VAL(output)[outputpos], v);
618 					outputpos += sizeof(v);
619 				}
620 
621 				break;
622 			}
623 			case 'G': {
624 				/* pack big endian float */
625 				while (arg-- > 0) {
626 					float v = (float) zval_get_double(&argv[currentarg++]);
627 					php_pack_copy_float(0, &ZSTR_VAL(output)[outputpos], v);
628 					outputpos += sizeof(v);
629 				}
630 				break;
631 			}
632 
633 			case 'd': {
634 				while (arg-- > 0) {
635 					double v = (double) zval_get_double(&argv[currentarg++]);
636 					memcpy(&ZSTR_VAL(output)[outputpos], &v, sizeof(v));
637 					outputpos += sizeof(v);
638 				}
639 				break;
640 			}
641 
642 			case 'e': {
643 				/* pack little endian double */
644 				while (arg-- > 0) {
645 					double v = (double) zval_get_double(&argv[currentarg++]);
646 					php_pack_copy_double(1, &ZSTR_VAL(output)[outputpos], v);
647 					outputpos += sizeof(v);
648 				}
649 				break;
650 			}
651 
652 			case 'E': {
653 				/* pack big endian double */
654 				while (arg-- > 0) {
655 					double v = (double) zval_get_double(&argv[currentarg++]);
656 					php_pack_copy_double(0, &ZSTR_VAL(output)[outputpos], v);
657 					outputpos += sizeof(v);
658 				}
659 				break;
660 			}
661 
662 			case 'x':
663 				memset(&ZSTR_VAL(output)[outputpos], '\0', arg);
664 				outputpos += arg;
665 				break;
666 
667 			case 'X':
668 				outputpos -= arg;
669 
670 				if (outputpos < 0) {
671 					outputpos = 0;
672 				}
673 				break;
674 
675 			case '@':
676 				if (arg > outputpos) {
677 					memset(&ZSTR_VAL(output)[outputpos], '\0', arg - outputpos);
678 				}
679 				outputpos = arg;
680 				break;
681 		}
682 	}
683 
684 	efree(formatcodes);
685 	efree(formatargs);
686 	ZSTR_VAL(output)[outputpos] = '\0';
687 	ZSTR_LEN(output) = outputpos;
688 	RETURN_NEW_STR(output);
689 }
690 /* }}} */
691 
692 /* unpack() is based on Perl's unpack(), but is modified a bit from there.
693  * Rather than depending on error-prone ordered lists or syntactically
694  * unpleasant pass-by-reference, we return an object with named parameters
695  * (like *_fetch_object()). Syntax is "f[repeat]name/...", where "f" is the
696  * formatter char (like pack()), "[repeat]" is the optional repeater argument,
697  * and "name" is the name of the variable to use.
698  * Example: "c2chars/nints" will return an object with fields
699  * chars1, chars2, and ints.
700  * Numeric pack types will return numbers, a and A will return strings,
701  * f and d will return doubles.
702  * Implemented formats are Z, A, a, h, H, c, C, s, S, i, I, l, L, n, N, q, Q, J, P, f, d, x, X, @.
703  * Added g, G for little endian float and big endian float, added e, E for little endian double and big endian double.
704  */
705 /* {{{ Unpack binary string into named array elements according to format argument */
PHP_FUNCTION(unpack)706 PHP_FUNCTION(unpack)
707 {
708 	char *format, *input;
709 	zend_string *formatarg, *inputarg;
710 	zend_long formatlen, inputpos, inputlen;
711 	int i;
712 	zend_long offset = 0;
713 
714 	ZEND_PARSE_PARAMETERS_START(2, 3)
715 		Z_PARAM_STR(formatarg)
716 		Z_PARAM_STR(inputarg)
717 		Z_PARAM_OPTIONAL
718 		Z_PARAM_LONG(offset)
719 	ZEND_PARSE_PARAMETERS_END();
720 
721 	format = ZSTR_VAL(formatarg);
722 	formatlen = ZSTR_LEN(formatarg);
723 	input = ZSTR_VAL(inputarg);
724 	inputlen = ZSTR_LEN(inputarg);
725 	inputpos = 0;
726 
727 
728 	if (offset < 0 || offset > inputlen) {
729 		zend_argument_value_error(3, "must be contained in argument #2 ($data)");
730 		RETURN_THROWS();
731 	}
732 
733 	input += offset;
734 	inputlen -= offset;
735 
736 	array_init(return_value);
737 
738 	while (formatlen-- > 0) {
739 		char type = *(format++);
740 		char c;
741 		int repetitions = 1, argb;
742 		char *name;
743 		int namelen;
744 		int size = 0;
745 
746 		/* Handle format arguments if any */
747 		if (formatlen > 0) {
748 			c = *format;
749 
750 			if (c >= '0' && c <= '9') {
751 				errno = 0;
752 				long tmp = strtol(format, NULL, 10);
753 				/* There is not strtoi. We have to check the range ourselves.
754 				 * With 32-bit long the INT_{MIN,MAX} are useless because long == int, but with 64-bit they do limit us to 32-bit. */
755 				if (errno || tmp < INT_MIN || tmp > INT_MAX) {
756 					php_error_docref(NULL, E_WARNING, "Type %c: integer overflow", type);
757 					zend_array_destroy(Z_ARR_P(return_value));
758 					RETURN_FALSE;
759 				}
760 				repetitions = tmp;
761 
762 				while (formatlen > 0 && *format >= '0' && *format <= '9') {
763 					format++;
764 					formatlen--;
765 				}
766 			} else if (c == '*') {
767 				repetitions = -1;
768 				format++;
769 				formatlen--;
770 			}
771 		}
772 
773 		/* Get of new value in array */
774 		name = format;
775 		argb = repetitions;
776 
777 		while (formatlen > 0 && *format != '/') {
778 			formatlen--;
779 			format++;
780 		}
781 
782 		namelen = format - name;
783 
784 		if (namelen > 200)
785 			namelen = 200;
786 
787 		switch ((int) type) {
788 			/* Never use any input */
789 			case 'X':
790 				size = -1;
791 				if (repetitions < 0) {
792 					php_error_docref(NULL, E_WARNING, "Type %c: '*' ignored", type);
793 					repetitions = 1;
794 				}
795 				break;
796 
797 			case '@':
798 				size = 0;
799 				break;
800 
801 			case 'a':
802 			case 'A':
803 			case 'Z':
804 				size = repetitions;
805 				repetitions = 1;
806 				break;
807 
808 			case 'h':
809 			case 'H':
810 				size = (repetitions > 0) ? ((unsigned int) repetitions + 1) / 2 : repetitions;
811 				repetitions = 1;
812 				break;
813 
814 			/* Use 1 byte of input */
815 			case 'c':
816 			case 'C':
817 			case 'x':
818 				size = 1;
819 				break;
820 
821 			/* Use 2 bytes of input */
822 			case 's':
823 			case 'S':
824 			case 'n':
825 			case 'v':
826 				size = 2;
827 				break;
828 
829 			/* Use sizeof(int) bytes of input */
830 			case 'i':
831 			case 'I':
832 				size = sizeof(int);
833 				break;
834 
835 			/* Use 4 bytes of input */
836 			case 'l':
837 			case 'L':
838 			case 'N':
839 			case 'V':
840 				size = 4;
841 				break;
842 
843 			/* Use 8 bytes of input */
844 			case 'q':
845 			case 'Q':
846 			case 'J':
847 			case 'P':
848 #if SIZEOF_ZEND_LONG > 4
849 				size = 8;
850 				break;
851 #else
852 				zend_value_error("64-bit format codes are not available for 32-bit versions of PHP");
853 				RETURN_THROWS();
854 #endif
855 
856 			/* Use sizeof(float) bytes of input */
857 			case 'f':
858 			case 'g':
859 			case 'G':
860 				size = sizeof(float);
861 				break;
862 
863 			/* Use sizeof(double) bytes of input */
864 			case 'd':
865 			case 'e':
866 			case 'E':
867 				size = sizeof(double);
868 				break;
869 
870 			default:
871 				zend_value_error("Invalid format type %c", type);
872 				RETURN_THROWS();
873 		}
874 
875 
876 		/* Do actual unpacking */
877 		for (i = 0; i != repetitions; i++ ) {
878 
879 			if (size != 0 && size != -1 && INT_MAX - size + 1 < inputpos) {
880 				php_error_docref(NULL, E_WARNING, "Type %c: integer overflow", type);
881 				zend_array_destroy(Z_ARR_P(return_value));
882 				RETURN_FALSE;
883 			}
884 
885 			if ((inputpos + size) <= inputlen) {
886 
887 				zend_string* real_name;
888 				zval val;
889 
890 				if (repetitions == 1 && namelen > 0) {
891 					/* Use a part of the formatarg argument directly as the name. */
892 					real_name = zend_string_init_fast(name, namelen);
893 
894 				} else {
895 					/* Need to add the 1-based element number to the name */
896 					char buf[MAX_LENGTH_OF_LONG + 1];
897 					char *res = zend_print_ulong_to_buf(buf + sizeof(buf) - 1, i+1);
898 					size_t digits = buf + sizeof(buf) - 1 - res;
899 					real_name = zend_string_concat2(name, namelen, res, digits);
900 				}
901 
902 				switch ((int) type) {
903 					case 'a': {
904 						/* a will not strip any trailing whitespace or null padding */
905 						zend_long len = inputlen - inputpos;	/* Remaining string */
906 
907 						/* If size was given take minimum of len and size */
908 						if ((size >= 0) && (len > size)) {
909 							len = size;
910 						}
911 
912 						size = len;
913 
914 						ZVAL_STRINGL(&val, &input[inputpos], len);
915 						zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
916 						break;
917 					}
918 					case 'A': {
919 						/* A will strip any trailing whitespace */
920 						char padn = '\0'; char pads = ' '; char padt = '\t'; char padc = '\r'; char padl = '\n';
921 						zend_long len = inputlen - inputpos;	/* Remaining string */
922 
923 						/* If size was given take minimum of len and size */
924 						if ((size >= 0) && (len > size)) {
925 							len = size;
926 						}
927 
928 						size = len;
929 
930 						/* Remove trailing white space and nulls chars from unpacked data */
931 						while (--len >= 0) {
932 							if (input[inputpos + len] != padn
933 								&& input[inputpos + len] != pads
934 								&& input[inputpos + len] != padt
935 								&& input[inputpos + len] != padc
936 								&& input[inputpos + len] != padl
937 							)
938 								break;
939 						}
940 
941 						ZVAL_STRINGL(&val, &input[inputpos], len + 1);
942 						zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
943 						break;
944 					}
945 					/* New option added for Z to remain in-line with the Perl implementation */
946 					case 'Z': {
947 						/* Z will strip everything after the first null character */
948 						char pad = '\0';
949 						zend_long s,
950 							 len = inputlen - inputpos;	/* Remaining string */
951 
952 						/* If size was given take minimum of len and size */
953 						if ((size >= 0) && (len > size)) {
954 							len = size;
955 						}
956 
957 						size = len;
958 
959 						/* Remove everything after the first null */
960 						for (s=0 ; s < len ; s++) {
961 							if (input[inputpos + s] == pad)
962 								break;
963 						}
964 						len = s;
965 
966 						ZVAL_STRINGL(&val, &input[inputpos], len);
967 						zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
968 						break;
969 					}
970 
971 
972 					case 'h':
973 					case 'H': {
974 						zend_long len = (inputlen - inputpos) * 2;	/* Remaining */
975 						int nibbleshift = (type == 'h') ? 0 : 4;
976 						int first = 1;
977 						zend_string *buf;
978 						zend_long ipos, opos;
979 
980 						/* If size was given take minimum of len and size */
981 						if (size >= 0 && len > (size * 2)) {
982 							len = size * 2;
983 						}
984 
985 						if (len > 0 && argb > 0) {
986 							len -= argb % 2;
987 						}
988 
989 						buf = zend_string_alloc(len, 0);
990 
991 						for (ipos = opos = 0; opos < len; opos++) {
992 							char cc = (input[inputpos + ipos] >> nibbleshift) & 0xf;
993 
994 							if (cc < 10) {
995 								cc += '0';
996 							} else {
997 								cc += 'a' - 10;
998 							}
999 
1000 							ZSTR_VAL(buf)[opos] = cc;
1001 							nibbleshift = (nibbleshift + 4) & 7;
1002 
1003 							if (first-- == 0) {
1004 								ipos++;
1005 								first = 1;
1006 							}
1007 						}
1008 
1009 						ZSTR_VAL(buf)[len] = '\0';
1010 
1011 						ZVAL_STR(&val, buf);
1012 						zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
1013 						break;
1014 					}
1015 
1016 					case 'c':   /* signed */
1017 					case 'C': { /* unsigned */
1018 						uint8_t x = input[inputpos];
1019 						zend_long v = (type == 'c') ? (int8_t) x : x;
1020 
1021 						ZVAL_LONG(&val, v);
1022 						zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
1023 						break;
1024 					}
1025 
1026 					case 's':   /* signed machine endian   */
1027 					case 'S':   /* unsigned machine endian */
1028 					case 'n':   /* unsigned big endian     */
1029 					case 'v': { /* unsigned little endian  */
1030 						zend_long v = 0;
1031 						uint16_t x = *((unaligned_uint16_t*) &input[inputpos]);
1032 
1033 						if (type == 's') {
1034 							v = (int16_t) x;
1035 						} else if ((type == 'n' && MACHINE_LITTLE_ENDIAN) || (type == 'v' && !MACHINE_LITTLE_ENDIAN)) {
1036 							v = php_pack_reverse_int16(x);
1037 						} else {
1038 							v = x;
1039 						}
1040 
1041 						ZVAL_LONG(&val, v);
1042 						zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
1043 						break;
1044 					}
1045 
1046 					case 'i':   /* signed integer, machine size, machine endian */
1047 					case 'I': { /* unsigned integer, machine size, machine endian */
1048 						zend_long v;
1049 						if (type == 'i') {
1050 							int x = *((unaligned_int*) &input[inputpos]);
1051 							v = x;
1052 						} else {
1053 							unsigned int x = *((unaligned_uint*) &input[inputpos]);
1054 							v = x;
1055 						}
1056 
1057 						ZVAL_LONG(&val, v);
1058 						zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
1059 						break;
1060 					}
1061 
1062 					case 'l':   /* signed machine endian   */
1063 					case 'L':   /* unsigned machine endian */
1064 					case 'N':   /* unsigned big endian     */
1065 					case 'V': { /* unsigned little endian  */
1066 						zend_long v = 0;
1067 						uint32_t x = *((unaligned_uint32_t*) &input[inputpos]);
1068 
1069 						if (type == 'l') {
1070 							v = (int32_t) x;
1071 						} else if ((type == 'N' && MACHINE_LITTLE_ENDIAN) || (type == 'V' && !MACHINE_LITTLE_ENDIAN)) {
1072 							v = php_pack_reverse_int32(x);
1073 						} else {
1074 							v = x;
1075 						}
1076 
1077 						ZVAL_LONG(&val, v);
1078 						zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
1079 
1080 						break;
1081 					}
1082 
1083 #if SIZEOF_ZEND_LONG > 4
1084 					case 'q':   /* signed machine endian   */
1085 					case 'Q':   /* unsigned machine endian */
1086 					case 'J':   /* unsigned big endian     */
1087 					case 'P': { /* unsigned little endian  */
1088 						zend_long v = 0;
1089 						uint64_t x = *((unaligned_uint64_t*) &input[inputpos]);
1090 
1091 						if (type == 'q') {
1092 							v = (int64_t) x;
1093 						} else if ((type == 'J' && MACHINE_LITTLE_ENDIAN) || (type == 'P' && !MACHINE_LITTLE_ENDIAN)) {
1094 							v = php_pack_reverse_int64(x);
1095 						} else {
1096 							v = x;
1097 						}
1098 
1099 						ZVAL_LONG(&val, v);
1100 						zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
1101 						break;
1102 					}
1103 #endif
1104 
1105 					case 'f': /* float */
1106 					case 'g': /* little endian float*/
1107 					case 'G': /* big endian float*/
1108 					{
1109 						float v;
1110 
1111 						if (type == 'g') {
1112 							v = php_pack_parse_float(1, &input[inputpos]);
1113 						} else if (type == 'G') {
1114 							v = php_pack_parse_float(0, &input[inputpos]);
1115 						} else {
1116 							memcpy(&v, &input[inputpos], sizeof(float));
1117 						}
1118 
1119 						ZVAL_DOUBLE(&val, v);
1120 						zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
1121 						break;
1122 					}
1123 
1124 
1125 					case 'd': /* double */
1126 					case 'e': /* little endian float */
1127 					case 'E': /* big endian float */
1128 					{
1129 						double v;
1130 						if (type == 'e') {
1131 							v = php_pack_parse_double(1, &input[inputpos]);
1132 						} else if (type == 'E') {
1133 							v = php_pack_parse_double(0, &input[inputpos]);
1134 						} else {
1135 							memcpy(&v, &input[inputpos], sizeof(double));
1136 						}
1137 
1138 						ZVAL_DOUBLE(&val, v);
1139 						zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
1140 						break;
1141 					}
1142 
1143 					case 'x':
1144 						/* Do nothing with input, just skip it */
1145 						break;
1146 
1147 					case 'X':
1148 						if (inputpos < size) {
1149 							inputpos = -size;
1150 							i = repetitions - 1;		/* Break out of for loop */
1151 
1152 							if (repetitions >= 0) {
1153 								php_error_docref(NULL, E_WARNING, "Type %c: outside of string", type);
1154 							}
1155 						}
1156 						break;
1157 
1158 					case '@':
1159 						if (repetitions <= inputlen) {
1160 							inputpos = repetitions;
1161 						} else {
1162 							php_error_docref(NULL, E_WARNING, "Type %c: outside of string", type);
1163 						}
1164 
1165 						i = repetitions - 1;	/* Done, break out of for loop */
1166 						break;
1167 				}
1168 
1169 				zend_string_release(real_name);
1170 
1171 				inputpos += size;
1172 				if (inputpos < 0) {
1173 					if (size != -1) { /* only print warning if not working with * */
1174 						php_error_docref(NULL, E_WARNING, "Type %c: outside of string", type);
1175 					}
1176 					inputpos = 0;
1177 				}
1178 			} else if (repetitions < 0) {
1179 				/* Reached end of input for '*' repeater */
1180 				break;
1181 			} else {
1182 				php_error_docref(NULL, E_WARNING, "Type %c: not enough input values, need %d values but only " ZEND_LONG_FMT " %s provided", type, size, inputlen - inputpos, inputlen - inputpos == 1 ? "was" : "were");
1183 				zend_array_destroy(Z_ARR_P(return_value));
1184 				RETURN_FALSE;
1185 			}
1186 		}
1187 
1188 		if (formatlen > 0) {
1189 			formatlen--;	/* Skip '/' separator, does no harm if inputlen == 0 */
1190 			format++;
1191 		}
1192 	}
1193 }
1194 /* }}} */
1195 
1196 /* {{{ PHP_MINIT_FUNCTION */
PHP_MINIT_FUNCTION(pack)1197 PHP_MINIT_FUNCTION(pack)
1198 {
1199 	int i;
1200 
1201 	if (MACHINE_LITTLE_ENDIAN) {
1202 		/* Where to get lo to hi bytes from */
1203 		byte_map[0] = 0;
1204 
1205 		for (i = 0; i < (int)sizeof(int); i++) {
1206 			int_map[i] = i;
1207 		}
1208 
1209 		machine_endian_short_map[0] = 0;
1210 		machine_endian_short_map[1] = 1;
1211 		big_endian_short_map[0] = 1;
1212 		big_endian_short_map[1] = 0;
1213 		little_endian_short_map[0] = 0;
1214 		little_endian_short_map[1] = 1;
1215 
1216 		machine_endian_long_map[0] = 0;
1217 		machine_endian_long_map[1] = 1;
1218 		machine_endian_long_map[2] = 2;
1219 		machine_endian_long_map[3] = 3;
1220 		big_endian_long_map[0] = 3;
1221 		big_endian_long_map[1] = 2;
1222 		big_endian_long_map[2] = 1;
1223 		big_endian_long_map[3] = 0;
1224 		little_endian_long_map[0] = 0;
1225 		little_endian_long_map[1] = 1;
1226 		little_endian_long_map[2] = 2;
1227 		little_endian_long_map[3] = 3;
1228 
1229 #if SIZEOF_ZEND_LONG > 4
1230 		machine_endian_longlong_map[0] = 0;
1231 		machine_endian_longlong_map[1] = 1;
1232 		machine_endian_longlong_map[2] = 2;
1233 		machine_endian_longlong_map[3] = 3;
1234 		machine_endian_longlong_map[4] = 4;
1235 		machine_endian_longlong_map[5] = 5;
1236 		machine_endian_longlong_map[6] = 6;
1237 		machine_endian_longlong_map[7] = 7;
1238 		big_endian_longlong_map[0] = 7;
1239 		big_endian_longlong_map[1] = 6;
1240 		big_endian_longlong_map[2] = 5;
1241 		big_endian_longlong_map[3] = 4;
1242 		big_endian_longlong_map[4] = 3;
1243 		big_endian_longlong_map[5] = 2;
1244 		big_endian_longlong_map[6] = 1;
1245 		big_endian_longlong_map[7] = 0;
1246 		little_endian_longlong_map[0] = 0;
1247 		little_endian_longlong_map[1] = 1;
1248 		little_endian_longlong_map[2] = 2;
1249 		little_endian_longlong_map[3] = 3;
1250 		little_endian_longlong_map[4] = 4;
1251 		little_endian_longlong_map[5] = 5;
1252 		little_endian_longlong_map[6] = 6;
1253 		little_endian_longlong_map[7] = 7;
1254 #endif
1255 	}
1256 	else {
1257 		zval val;
1258 		int size = sizeof(Z_LVAL(val));
1259 		Z_LVAL(val)=0; /*silence a warning*/
1260 
1261 		/* Where to get hi to lo bytes from */
1262 		byte_map[0] = size - 1;
1263 
1264 		for (i = 0; i < (int)sizeof(int); i++) {
1265 			int_map[i] = size - (sizeof(int) - i);
1266 		}
1267 
1268 		machine_endian_short_map[0] = size - 2;
1269 		machine_endian_short_map[1] = size - 1;
1270 		big_endian_short_map[0] = size - 2;
1271 		big_endian_short_map[1] = size - 1;
1272 		little_endian_short_map[0] = size - 1;
1273 		little_endian_short_map[1] = size - 2;
1274 
1275 		machine_endian_long_map[0] = size - 4;
1276 		machine_endian_long_map[1] = size - 3;
1277 		machine_endian_long_map[2] = size - 2;
1278 		machine_endian_long_map[3] = size - 1;
1279 		big_endian_long_map[0] = size - 4;
1280 		big_endian_long_map[1] = size - 3;
1281 		big_endian_long_map[2] = size - 2;
1282 		big_endian_long_map[3] = size - 1;
1283 		little_endian_long_map[0] = size - 1;
1284 		little_endian_long_map[1] = size - 2;
1285 		little_endian_long_map[2] = size - 3;
1286 		little_endian_long_map[3] = size - 4;
1287 
1288 #if SIZEOF_ZEND_LONG > 4
1289 		machine_endian_longlong_map[0] = size - 8;
1290 		machine_endian_longlong_map[1] = size - 7;
1291 		machine_endian_longlong_map[2] = size - 6;
1292 		machine_endian_longlong_map[3] = size - 5;
1293 		machine_endian_longlong_map[4] = size - 4;
1294 		machine_endian_longlong_map[5] = size - 3;
1295 		machine_endian_longlong_map[6] = size - 2;
1296 		machine_endian_longlong_map[7] = size - 1;
1297 		big_endian_longlong_map[0] = size - 8;
1298 		big_endian_longlong_map[1] = size - 7;
1299 		big_endian_longlong_map[2] = size - 6;
1300 		big_endian_longlong_map[3] = size - 5;
1301 		big_endian_longlong_map[4] = size - 4;
1302 		big_endian_longlong_map[5] = size - 3;
1303 		big_endian_longlong_map[6] = size - 2;
1304 		big_endian_longlong_map[7] = size - 1;
1305 		little_endian_longlong_map[0] = size - 1;
1306 		little_endian_longlong_map[1] = size - 2;
1307 		little_endian_longlong_map[2] = size - 3;
1308 		little_endian_longlong_map[3] = size - 4;
1309 		little_endian_longlong_map[4] = size - 5;
1310 		little_endian_longlong_map[5] = size - 6;
1311 		little_endian_longlong_map[6] = size - 7;
1312 		little_endian_longlong_map[7] = size - 8;
1313 #endif
1314 	}
1315 
1316 	return SUCCESS;
1317 }
1318 /* }}} */
1319