xref: /PHP-8.2/ext/standard/pack.c (revision 791a6ef1)
1 /*
2    +----------------------------------------------------------------------+
3    | Copyright (c) The PHP Group                                          |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | https://www.php.net/license/3_01.txt                                 |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13    | Author: Chris Schneider <cschneid@relog.ch>                          |
14    +----------------------------------------------------------------------+
15  */
16 
17 #include "php.h"
18 
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <errno.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <fcntl.h>
25 #ifdef PHP_WIN32
26 #define O_RDONLY _O_RDONLY
27 #include "win32/param.h"
28 #else
29 #include <sys/param.h>
30 #endif
31 #include "ext/standard/head.h"
32 #include "php_string.h"
33 #include "pack.h"
34 #ifdef HAVE_PWD_H
35 #ifdef PHP_WIN32
36 #include "win32/pwd.h"
37 #else
38 #include <pwd.h>
39 #endif
40 #endif
41 #include "fsock.h"
42 #ifdef HAVE_NETINET_IN_H
43 #include <netinet/in.h>
44 #endif
45 
46 #define INC_OUTPUTPOS(a,b) \
47 	if ((a) < 0 || ((INT_MAX - outputpos)/((int)b)) < (a)) { \
48 		efree(formatcodes);	\
49 		efree(formatargs);	\
50 		zend_value_error("Type %c: integer overflow in format string", code); \
51 		RETURN_THROWS(); \
52 	} \
53 	outputpos += (a)*(b);
54 
55 #ifdef WORDS_BIGENDIAN
56 #define MACHINE_LITTLE_ENDIAN 0
57 #else
58 #define MACHINE_LITTLE_ENDIAN 1
59 #endif
60 
61 typedef ZEND_SET_ALIGNED(1, uint16_t unaligned_uint16_t);
62 typedef ZEND_SET_ALIGNED(1, uint32_t unaligned_uint32_t);
63 typedef ZEND_SET_ALIGNED(1, uint64_t unaligned_uint64_t);
64 typedef ZEND_SET_ALIGNED(1, unsigned int unaligned_uint);
65 typedef ZEND_SET_ALIGNED(1, int unaligned_int);
66 
67 /* Mapping of byte from char (8bit) to long for machine endian */
68 static int byte_map[1];
69 
70 /* Mappings of bytes from int (machine dependent) to int for machine endian */
71 static int int_map[sizeof(int)];
72 
73 /* Mappings of bytes from shorts (16bit) for all endian environments */
74 static int machine_endian_short_map[2];
75 static int big_endian_short_map[2];
76 static int little_endian_short_map[2];
77 
78 /* Mappings of bytes from longs (32bit) for all endian environments */
79 static int machine_endian_long_map[4];
80 static int big_endian_long_map[4];
81 static int little_endian_long_map[4];
82 
83 #if SIZEOF_ZEND_LONG > 4
84 /* Mappings of bytes from quads (64bit) for all endian environments */
85 static int machine_endian_longlong_map[8];
86 static int big_endian_longlong_map[8];
87 static int little_endian_longlong_map[8];
88 #endif
89 
90 /* {{{ php_pack */
php_pack(zval * val,size_t size,int * map,char * output)91 static void php_pack(zval *val, size_t size, int *map, char *output)
92 {
93 	size_t i;
94 	char *v;
95 
96 	convert_to_long(val);
97 	v = (char *) &Z_LVAL_P(val);
98 
99 	for (i = 0; i < size; i++) {
100 		*output++ = v[map[i]];
101 	}
102 }
103 /* }}} */
104 
php_pack_reverse_int16(uint16_t arg)105 ZEND_ATTRIBUTE_CONST static inline uint16_t php_pack_reverse_int16(uint16_t arg)
106 {
107 	return ((arg & 0xFF) << 8) | ((arg >> 8) & 0xFF);
108 }
109 
110 /* {{{ php_pack_reverse_int32 */
php_pack_reverse_int32(uint32_t arg)111 ZEND_ATTRIBUTE_CONST static inline uint32_t php_pack_reverse_int32(uint32_t arg)
112 {
113 	uint32_t result;
114 	result = ((arg & 0xFF) << 24) | ((arg & 0xFF00) << 8) | ((arg >> 8) & 0xFF00) | ((arg >> 24) & 0xFF);
115 
116 	return result;
117 }
118 /* }}} */
119 
120 /* {{{ php_pack */
php_pack_reverse_int64(uint64_t arg)121 static inline uint64_t php_pack_reverse_int64(uint64_t arg)
122 {
123 	union Swap64 {
124 		uint64_t i;
125 		uint32_t ul[2];
126 	} tmp, result;
127 	tmp.i = arg;
128 	result.ul[0] = php_pack_reverse_int32(tmp.ul[1]);
129 	result.ul[1] = php_pack_reverse_int32(tmp.ul[0]);
130 
131 	return result.i;
132 }
133 /* }}} */
134 
135 /* {{{ php_pack_copy_float */
php_pack_copy_float(int is_little_endian,void * dst,float f)136 static void php_pack_copy_float(int is_little_endian, void * dst, float f)
137 {
138 	union Copy32 {
139 		float f;
140 		uint32_t i;
141 	} m;
142 	m.f = f;
143 
144 #ifdef WORDS_BIGENDIAN
145 	if (is_little_endian) {
146 		m.i = php_pack_reverse_int32(m.i);
147 	}
148 #else /* WORDS_BIGENDIAN */
149 	if (!is_little_endian) {
150 		m.i = php_pack_reverse_int32(m.i);
151 	}
152 #endif /* WORDS_BIGENDIAN */
153 
154 	memcpy(dst, &m.f, sizeof(float));
155 }
156 /* }}} */
157 
158 /* {{{ php_pack_copy_double */
php_pack_copy_double(int is_little_endian,void * dst,double d)159 static void php_pack_copy_double(int is_little_endian, void * dst, double d)
160 {
161 	union Copy64 {
162 		double d;
163 		uint64_t i;
164 	} m;
165 	m.d = d;
166 
167 #ifdef WORDS_BIGENDIAN
168 	if (is_little_endian) {
169 		m.i = php_pack_reverse_int64(m.i);
170 	}
171 #else /* WORDS_BIGENDIAN */
172 	if (!is_little_endian) {
173 		m.i = php_pack_reverse_int64(m.i);
174 	}
175 #endif /* WORDS_BIGENDIAN */
176 
177 	memcpy(dst, &m.d, sizeof(double));
178 }
179 /* }}} */
180 
181 /* {{{ php_pack_parse_float */
php_pack_parse_float(int is_little_endian,void * src)182 static float php_pack_parse_float(int is_little_endian, void * src)
183 {
184 	union Copy32 {
185 		float f;
186 		uint32_t i;
187 	} m;
188 	memcpy(&m.i, src, sizeof(float));
189 
190 #ifdef WORDS_BIGENDIAN
191 	if (is_little_endian) {
192 		m.i = php_pack_reverse_int32(m.i);
193 	}
194 #else /* WORDS_BIGENDIAN */
195 	if (!is_little_endian) {
196 		m.i = php_pack_reverse_int32(m.i);
197 	}
198 #endif /* WORDS_BIGENDIAN */
199 
200 	return m.f;
201 }
202 /* }}} */
203 
204 /* {{{ php_pack_parse_double */
php_pack_parse_double(int is_little_endian,void * src)205 static double php_pack_parse_double(int is_little_endian, void * src)
206 {
207 	union Copy64 {
208 		double d;
209 		uint64_t i;
210 	} m;
211 	memcpy(&m.i, src, sizeof(double));
212 
213 #ifdef WORDS_BIGENDIAN
214 	if (is_little_endian) {
215 		m.i = php_pack_reverse_int64(m.i);
216 	}
217 #else /* WORDS_BIGENDIAN */
218 	if (!is_little_endian) {
219 		m.i = php_pack_reverse_int64(m.i);
220 	}
221 #endif /* WORDS_BIGENDIAN */
222 
223 	return m.d;
224 }
225 /* }}} */
226 
227 /* pack() idea stolen from Perl (implemented formats behave the same as there except J and P)
228  * Implemented formats are Z, A, a, h, H, c, C, s, S, i, I, l, L, n, N, q, Q, J, P, f, d, x, X, @.
229  * Added g, G for little endian float and big endian float, added e, E for little endian double and big endian double.
230  */
231 /* {{{ Takes one or more arguments and packs them into a binary string according to the format argument */
PHP_FUNCTION(pack)232 PHP_FUNCTION(pack)
233 {
234 	zval *argv = NULL;
235 	int num_args = 0;
236 	size_t i;
237 	int currentarg;
238 	char *format;
239 	size_t formatlen;
240 	char *formatcodes;
241 	int *formatargs;
242 	size_t formatcount = 0;
243 	int outputpos = 0, outputsize = 0;
244 	zend_string *output;
245 
246 	ZEND_PARSE_PARAMETERS_START(1, -1)
247 		Z_PARAM_STRING(format, formatlen)
248 		Z_PARAM_VARIADIC('*', argv, num_args)
249 	ZEND_PARSE_PARAMETERS_END();
250 
251 	/* We have a maximum of <formatlen> format codes to deal with */
252 	formatcodes = safe_emalloc(formatlen, sizeof(*formatcodes), 0);
253 	formatargs = safe_emalloc(formatlen, sizeof(*formatargs), 0);
254 	currentarg = 0;
255 
256 	/* Preprocess format into formatcodes and formatargs */
257 	for (i = 0; i < formatlen; formatcount++) {
258 		char code = format[i++];
259 		int arg = 1;
260 
261 		/* Handle format arguments if any */
262 		if (i < formatlen) {
263 			char c = format[i];
264 
265 			if (c == '*') {
266 				arg = -1;
267 				i++;
268 			}
269 			else if (c >= '0' && c <= '9') {
270 				arg = atoi(&format[i]);
271 
272 				while (format[i] >= '0' && format[i] <= '9' && i < formatlen) {
273 					i++;
274 				}
275 			}
276 		}
277 
278 		/* Handle special arg '*' for all codes and check argv overflows */
279 		switch ((int) code) {
280 			/* Never uses any args */
281 			case 'x':
282 			case 'X':
283 			case '@':
284 				if (arg < 0) {
285 					php_error_docref(NULL, E_WARNING, "Type %c: '*' ignored", code);
286 					arg = 1;
287 				}
288 				break;
289 
290 			/* Always uses one arg */
291 			case 'a':
292 			case 'A':
293 			case 'Z':
294 			case 'h':
295 			case 'H':
296 				if (currentarg >= num_args) {
297 					efree(formatcodes);
298 					efree(formatargs);
299 					zend_value_error("Type %c: not enough arguments", code);
300 					RETURN_THROWS();
301 				}
302 
303 				if (arg < 0) {
304 					if (!try_convert_to_string(&argv[currentarg])) {
305 						efree(formatcodes);
306 						efree(formatargs);
307 						RETURN_THROWS();
308 					}
309 
310 					arg = Z_STRLEN(argv[currentarg]);
311 					if (code == 'Z') {
312 						/* add one because Z is always NUL-terminated:
313 						 * pack("Z*", "aa") === "aa\0"
314 						 * pack("Z2", "aa") === "a\0" */
315 						arg++;
316 					}
317 				}
318 
319 				currentarg++;
320 				break;
321 
322 			/* Use as many args as specified */
323 			case 'q':
324 			case 'Q':
325 			case 'J':
326 			case 'P':
327 #if SIZEOF_ZEND_LONG < 8
328 					efree(formatcodes);
329 					efree(formatargs);
330 					zend_value_error("64-bit format codes are not available for 32-bit versions of PHP");
331 					RETURN_THROWS();
332 #endif
333 			case 'c':
334 			case 'C':
335 			case 's':
336 			case 'S':
337 			case 'i':
338 			case 'I':
339 			case 'l':
340 			case 'L':
341 			case 'n':
342 			case 'N':
343 			case 'v':
344 			case 'V':
345 			case 'f': /* float */
346 			case 'g': /* little endian float */
347 			case 'G': /* big endian float */
348 			case 'd': /* double */
349 			case 'e': /* little endian double */
350 			case 'E': /* big endian double */
351 				if (arg < 0) {
352 					arg = num_args - currentarg;
353 				}
354 				if (currentarg > INT_MAX - arg) {
355 					goto too_few_args;
356 				}
357 				currentarg += arg;
358 
359 				if (currentarg > num_args) {
360 too_few_args:
361 					efree(formatcodes);
362 					efree(formatargs);
363 					zend_value_error("Type %c: too few arguments", code);
364 					RETURN_THROWS();
365 				}
366 				break;
367 
368 			default:
369 				efree(formatcodes);
370 				efree(formatargs);
371 				zend_value_error("Type %c: unknown format code", code);
372 				RETURN_THROWS();
373 		}
374 
375 		formatcodes[formatcount] = code;
376 		formatargs[formatcount] = arg;
377 	}
378 
379 	if (currentarg < num_args) {
380 		php_error_docref(NULL, E_WARNING, "%d arguments unused", (num_args - currentarg));
381 	}
382 
383 	/* Calculate output length and upper bound while processing*/
384 	for (i = 0; i < formatcount; i++) {
385 	    int code = (int) formatcodes[i];
386 		int arg = formatargs[i];
387 
388 		switch ((int) code) {
389 			case 'h':
390 			case 'H':
391 				INC_OUTPUTPOS((arg + (arg % 2)) / 2,1)	/* 4 bit per arg */
392 				break;
393 
394 			case 'a':
395 			case 'A':
396 			case 'Z':
397 			case 'c':
398 			case 'C':
399 			case 'x':
400 				INC_OUTPUTPOS(arg,1)		/* 8 bit per arg */
401 				break;
402 
403 			case 's':
404 			case 'S':
405 			case 'n':
406 			case 'v':
407 				INC_OUTPUTPOS(arg,2)		/* 16 bit per arg */
408 				break;
409 
410 			case 'i':
411 			case 'I':
412 				INC_OUTPUTPOS(arg,sizeof(int))
413 				break;
414 
415 			case 'l':
416 			case 'L':
417 			case 'N':
418 			case 'V':
419 				INC_OUTPUTPOS(arg,4)		/* 32 bit per arg */
420 				break;
421 
422 #if SIZEOF_ZEND_LONG > 4
423 			case 'q':
424 			case 'Q':
425 			case 'J':
426 			case 'P':
427 				INC_OUTPUTPOS(arg,8)		/* 32 bit per arg */
428 				break;
429 #endif
430 
431 			case 'f': /* float */
432 			case 'g': /* little endian float */
433 			case 'G': /* big endian float */
434 				INC_OUTPUTPOS(arg,sizeof(float))
435 				break;
436 
437 			case 'd': /* double */
438 			case 'e': /* little endian double */
439 			case 'E': /* big endian double */
440 				INC_OUTPUTPOS(arg,sizeof(double))
441 				break;
442 
443 			case 'X':
444 				outputpos -= arg;
445 
446 				if (outputpos < 0) {
447 					php_error_docref(NULL, E_WARNING, "Type %c: outside of string", code);
448 					outputpos = 0;
449 				}
450 				break;
451 
452 			case '@':
453 				outputpos = arg;
454 				break;
455 		}
456 
457 		if (outputsize < outputpos) {
458 			outputsize = outputpos;
459 		}
460 	}
461 
462 	output = zend_string_alloc(outputsize, 0);
463 	outputpos = 0;
464 	currentarg = 0;
465 
466 	/* Do actual packing */
467 	for (i = 0; i < formatcount; i++) {
468 	    int code = (int) formatcodes[i];
469 		int arg = formatargs[i];
470 
471 		switch ((int) code) {
472 			case 'a':
473 			case 'A':
474 			case 'Z': {
475 				size_t arg_cp = (code != 'Z') ? arg : MAX(0, arg - 1);
476 				zend_string *tmp_str;
477 				zend_string *str = zval_get_tmp_string(&argv[currentarg++], &tmp_str);
478 
479 				memset(&ZSTR_VAL(output)[outputpos], (code == 'a' || code == 'Z') ? '\0' : ' ', arg);
480 				memcpy(&ZSTR_VAL(output)[outputpos], ZSTR_VAL(str),
481 					   (ZSTR_LEN(str) < arg_cp) ? ZSTR_LEN(str) : arg_cp);
482 
483 				outputpos += arg;
484 				zend_tmp_string_release(tmp_str);
485 				break;
486 			}
487 
488 			case 'h':
489 			case 'H': {
490 				int nibbleshift = (code == 'h') ? 0 : 4;
491 				int first = 1;
492 				zend_string *tmp_str;
493 				zend_string *str = zval_get_tmp_string(&argv[currentarg++], &tmp_str);
494 				char *v = ZSTR_VAL(str);
495 
496 				outputpos--;
497 				if ((size_t)arg > ZSTR_LEN(str)) {
498 					php_error_docref(NULL, E_WARNING, "Type %c: not enough characters in string", code);
499 					arg = ZSTR_LEN(str);
500 				}
501 
502 				while (arg-- > 0) {
503 					char n = *v++;
504 
505 					if (n >= '0' && n <= '9') {
506 						n -= '0';
507 					} else if (n >= 'A' && n <= 'F') {
508 						n -= ('A' - 10);
509 					} else if (n >= 'a' && n <= 'f') {
510 						n -= ('a' - 10);
511 					} else {
512 						php_error_docref(NULL, E_WARNING, "Type %c: illegal hex digit %c", code, n);
513 						n = 0;
514 					}
515 
516 					if (first--) {
517 						ZSTR_VAL(output)[++outputpos] = 0;
518 					} else {
519 					  first = 1;
520 					}
521 
522 					ZSTR_VAL(output)[outputpos] |= (n << nibbleshift);
523 					nibbleshift = (nibbleshift + 4) & 7;
524 				}
525 
526 				outputpos++;
527 				zend_tmp_string_release(tmp_str);
528 				break;
529 			}
530 
531 			case 'c':
532 			case 'C':
533 				while (arg-- > 0) {
534 					php_pack(&argv[currentarg++], 1, byte_map, &ZSTR_VAL(output)[outputpos]);
535 					outputpos++;
536 				}
537 				break;
538 
539 			case 's':
540 			case 'S':
541 			case 'n':
542 			case 'v': {
543 				int *map = machine_endian_short_map;
544 
545 				if (code == 'n') {
546 					map = big_endian_short_map;
547 				} else if (code == 'v') {
548 					map = little_endian_short_map;
549 				}
550 
551 				while (arg-- > 0) {
552 					php_pack(&argv[currentarg++], 2, map, &ZSTR_VAL(output)[outputpos]);
553 					outputpos += 2;
554 				}
555 				break;
556 			}
557 
558 			case 'i':
559 			case 'I':
560 				while (arg-- > 0) {
561 					php_pack(&argv[currentarg++], sizeof(int), int_map, &ZSTR_VAL(output)[outputpos]);
562 					outputpos += sizeof(int);
563 				}
564 				break;
565 
566 			case 'l':
567 			case 'L':
568 			case 'N':
569 			case 'V': {
570 				int *map = machine_endian_long_map;
571 
572 				if (code == 'N') {
573 					map = big_endian_long_map;
574 				} else if (code == 'V') {
575 					map = little_endian_long_map;
576 				}
577 
578 				while (arg-- > 0) {
579 					php_pack(&argv[currentarg++], 4, map, &ZSTR_VAL(output)[outputpos]);
580 					outputpos += 4;
581 				}
582 				break;
583 			}
584 
585 #if SIZEOF_ZEND_LONG > 4
586 			case 'q':
587 			case 'Q':
588 			case 'J':
589 			case 'P': {
590 				int *map = machine_endian_longlong_map;
591 
592 				if (code == 'J') {
593 					map = big_endian_longlong_map;
594 				} else if (code == 'P') {
595 					map = little_endian_longlong_map;
596 				}
597 
598 				while (arg-- > 0) {
599 					php_pack(&argv[currentarg++], 8, map, &ZSTR_VAL(output)[outputpos]);
600 					outputpos += 8;
601 				}
602 				break;
603 			}
604 #endif
605 
606 			case 'f': {
607 				while (arg-- > 0) {
608 					float v = (float) zval_get_double(&argv[currentarg++]);
609 					memcpy(&ZSTR_VAL(output)[outputpos], &v, sizeof(v));
610 					outputpos += sizeof(v);
611 				}
612 				break;
613 			}
614 
615 			case 'g': {
616 				/* pack little endian float */
617 				while (arg-- > 0) {
618 					float v = (float) zval_get_double(&argv[currentarg++]);
619 					php_pack_copy_float(1, &ZSTR_VAL(output)[outputpos], v);
620 					outputpos += sizeof(v);
621 				}
622 
623 				break;
624 			}
625 			case 'G': {
626 				/* pack big endian float */
627 				while (arg-- > 0) {
628 					float v = (float) zval_get_double(&argv[currentarg++]);
629 					php_pack_copy_float(0, &ZSTR_VAL(output)[outputpos], v);
630 					outputpos += sizeof(v);
631 				}
632 				break;
633 			}
634 
635 			case 'd': {
636 				while (arg-- > 0) {
637 					double v = (double) zval_get_double(&argv[currentarg++]);
638 					memcpy(&ZSTR_VAL(output)[outputpos], &v, sizeof(v));
639 					outputpos += sizeof(v);
640 				}
641 				break;
642 			}
643 
644 			case 'e': {
645 				/* pack little endian double */
646 				while (arg-- > 0) {
647 					double v = (double) zval_get_double(&argv[currentarg++]);
648 					php_pack_copy_double(1, &ZSTR_VAL(output)[outputpos], v);
649 					outputpos += sizeof(v);
650 				}
651 				break;
652 			}
653 
654 			case 'E': {
655 				/* pack big endian double */
656 				while (arg-- > 0) {
657 					double v = (double) zval_get_double(&argv[currentarg++]);
658 					php_pack_copy_double(0, &ZSTR_VAL(output)[outputpos], v);
659 					outputpos += sizeof(v);
660 				}
661 				break;
662 			}
663 
664 			case 'x':
665 				memset(&ZSTR_VAL(output)[outputpos], '\0', arg);
666 				outputpos += arg;
667 				break;
668 
669 			case 'X':
670 				outputpos -= arg;
671 
672 				if (outputpos < 0) {
673 					outputpos = 0;
674 				}
675 				break;
676 
677 			case '@':
678 				if (arg > outputpos) {
679 					memset(&ZSTR_VAL(output)[outputpos], '\0', arg - outputpos);
680 				}
681 				outputpos = arg;
682 				break;
683 		}
684 	}
685 
686 	efree(formatcodes);
687 	efree(formatargs);
688 	ZSTR_VAL(output)[outputpos] = '\0';
689 	ZSTR_LEN(output) = outputpos;
690 	RETURN_NEW_STR(output);
691 }
692 /* }}} */
693 
694 /* unpack() is based on Perl's unpack(), but is modified a bit from there.
695  * Rather than depending on error-prone ordered lists or syntactically
696  * unpleasant pass-by-reference, we return an object with named parameters
697  * (like *_fetch_object()). Syntax is "f[repeat]name/...", where "f" is the
698  * formatter char (like pack()), "[repeat]" is the optional repeater argument,
699  * and "name" is the name of the variable to use.
700  * Example: "c2chars/nints" will return an object with fields
701  * chars1, chars2, and ints.
702  * Numeric pack types will return numbers, a and A will return strings,
703  * f and d will return doubles.
704  * Implemented formats are Z, A, a, h, H, c, C, s, S, i, I, l, L, n, N, q, Q, J, P, f, d, x, X, @.
705  * Added g, G for little endian float and big endian float, added e, E for little endian double and big endian double.
706  */
707 /* {{{ Unpack binary string into named array elements according to format argument */
PHP_FUNCTION(unpack)708 PHP_FUNCTION(unpack)
709 {
710 	char *format, *input;
711 	zend_string *formatarg, *inputarg;
712 	zend_long formatlen, inputpos, inputlen;
713 	int i;
714 	zend_long offset = 0;
715 
716 	ZEND_PARSE_PARAMETERS_START(2, 3)
717 		Z_PARAM_STR(formatarg)
718 		Z_PARAM_STR(inputarg)
719 		Z_PARAM_OPTIONAL
720 		Z_PARAM_LONG(offset)
721 	ZEND_PARSE_PARAMETERS_END();
722 
723 	format = ZSTR_VAL(formatarg);
724 	formatlen = ZSTR_LEN(formatarg);
725 	input = ZSTR_VAL(inputarg);
726 	inputlen = ZSTR_LEN(inputarg);
727 	inputpos = 0;
728 
729 
730 	if (offset < 0 || offset > inputlen) {
731 		zend_argument_value_error(3, "must be contained in argument #2 ($data)");
732 		RETURN_THROWS();
733 	}
734 
735 	input += offset;
736 	inputlen -= offset;
737 
738 	array_init(return_value);
739 
740 	while (formatlen-- > 0) {
741 		char type = *(format++);
742 		char c;
743 		int repetitions = 1, argb;
744 		char *name;
745 		int namelen;
746 		int size = 0;
747 
748 		/* Handle format arguments if any */
749 		if (formatlen > 0) {
750 			c = *format;
751 
752 			if (c >= '0' && c <= '9') {
753 				errno = 0;
754 				long tmp = strtol(format, NULL, 10);
755 				/* There is not strtoi. We have to check the range ourselves.
756 				 * With 32-bit long the INT_{MIN,MAX} are useless because long == int, but with 64-bit they do limit us to 32-bit. */
757 				if (errno || tmp < INT_MIN || tmp > INT_MAX) {
758 					php_error_docref(NULL, E_WARNING, "Type %c: integer overflow", type);
759 					zend_array_destroy(Z_ARR_P(return_value));
760 					RETURN_FALSE;
761 				}
762 				repetitions = tmp;
763 
764 				while (formatlen > 0 && *format >= '0' && *format <= '9') {
765 					format++;
766 					formatlen--;
767 				}
768 			} else if (c == '*') {
769 				repetitions = -1;
770 				format++;
771 				formatlen--;
772 			}
773 		}
774 
775 		/* Get of new value in array */
776 		name = format;
777 		argb = repetitions;
778 
779 		while (formatlen > 0 && *format != '/') {
780 			formatlen--;
781 			format++;
782 		}
783 
784 		namelen = format - name;
785 
786 		if (namelen > 200)
787 			namelen = 200;
788 
789 		switch ((int) type) {
790 			/* Never use any input */
791 			case 'X':
792 				size = -1;
793 				if (repetitions < 0) {
794 					php_error_docref(NULL, E_WARNING, "Type %c: '*' ignored", type);
795 					repetitions = 1;
796 				}
797 				break;
798 
799 			case '@':
800 				size = 0;
801 				break;
802 
803 			case 'a':
804 			case 'A':
805 			case 'Z':
806 				size = repetitions;
807 				repetitions = 1;
808 				break;
809 
810 			case 'h':
811 			case 'H':
812 				size = (repetitions > 0) ? ((unsigned int) repetitions + 1) / 2 : repetitions;
813 				repetitions = 1;
814 				break;
815 
816 			/* Use 1 byte of input */
817 			case 'c':
818 			case 'C':
819 			case 'x':
820 				size = 1;
821 				break;
822 
823 			/* Use 2 bytes of input */
824 			case 's':
825 			case 'S':
826 			case 'n':
827 			case 'v':
828 				size = 2;
829 				break;
830 
831 			/* Use sizeof(int) bytes of input */
832 			case 'i':
833 			case 'I':
834 				size = sizeof(int);
835 				break;
836 
837 			/* Use 4 bytes of input */
838 			case 'l':
839 			case 'L':
840 			case 'N':
841 			case 'V':
842 				size = 4;
843 				break;
844 
845 			/* Use 8 bytes of input */
846 			case 'q':
847 			case 'Q':
848 			case 'J':
849 			case 'P':
850 #if SIZEOF_ZEND_LONG > 4
851 				size = 8;
852 				break;
853 #else
854 				zend_value_error("64-bit format codes are not available for 32-bit versions of PHP");
855 				RETURN_THROWS();
856 #endif
857 
858 			/* Use sizeof(float) bytes of input */
859 			case 'f':
860 			case 'g':
861 			case 'G':
862 				size = sizeof(float);
863 				break;
864 
865 			/* Use sizeof(double) bytes of input */
866 			case 'd':
867 			case 'e':
868 			case 'E':
869 				size = sizeof(double);
870 				break;
871 
872 			default:
873 				zend_value_error("Invalid format type %c", type);
874 				RETURN_THROWS();
875 		}
876 
877 
878 		/* Do actual unpacking */
879 		for (i = 0; i != repetitions; i++ ) {
880 
881 			if (size != 0 && size != -1 && INT_MAX - size + 1 < inputpos) {
882 				php_error_docref(NULL, E_WARNING, "Type %c: integer overflow", type);
883 				zend_array_destroy(Z_ARR_P(return_value));
884 				RETURN_FALSE;
885 			}
886 
887 			if ((inputpos + size) <= inputlen) {
888 
889 				zend_string* real_name;
890 				zval val;
891 
892 				if (repetitions == 1 && namelen > 0) {
893 					/* Use a part of the formatarg argument directly as the name. */
894 					real_name = zend_string_init_fast(name, namelen);
895 
896 				} else {
897 					/* Need to add the 1-based element number to the name */
898 					char buf[MAX_LENGTH_OF_LONG + 1];
899 					char *res = zend_print_ulong_to_buf(buf + sizeof(buf) - 1, i+1);
900 					size_t digits = buf + sizeof(buf) - 1 - res;
901 					real_name = zend_string_concat2(name, namelen, res, digits);
902 				}
903 
904 				switch ((int) type) {
905 					case 'a': {
906 						/* a will not strip any trailing whitespace or null padding */
907 						zend_long len = inputlen - inputpos;	/* Remaining string */
908 
909 						/* If size was given take minimum of len and size */
910 						if ((size >= 0) && (len > size)) {
911 							len = size;
912 						}
913 
914 						size = len;
915 
916 						ZVAL_STRINGL(&val, &input[inputpos], len);
917 						zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
918 						break;
919 					}
920 					case 'A': {
921 						/* A will strip any trailing whitespace */
922 						char padn = '\0'; char pads = ' '; char padt = '\t'; char padc = '\r'; char padl = '\n';
923 						zend_long len = inputlen - inputpos;	/* Remaining string */
924 
925 						/* If size was given take minimum of len and size */
926 						if ((size >= 0) && (len > size)) {
927 							len = size;
928 						}
929 
930 						size = len;
931 
932 						/* Remove trailing white space and nulls chars from unpacked data */
933 						while (--len >= 0) {
934 							if (input[inputpos + len] != padn
935 								&& input[inputpos + len] != pads
936 								&& input[inputpos + len] != padt
937 								&& input[inputpos + len] != padc
938 								&& input[inputpos + len] != padl
939 							)
940 								break;
941 						}
942 
943 						ZVAL_STRINGL(&val, &input[inputpos], len + 1);
944 						zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
945 						break;
946 					}
947 					/* New option added for Z to remain in-line with the Perl implementation */
948 					case 'Z': {
949 						/* Z will strip everything after the first null character */
950 						char pad = '\0';
951 						zend_long s,
952 							 len = inputlen - inputpos;	/* Remaining string */
953 
954 						/* If size was given take minimum of len and size */
955 						if ((size >= 0) && (len > size)) {
956 							len = size;
957 						}
958 
959 						size = len;
960 
961 						/* Remove everything after the first null */
962 						for (s=0 ; s < len ; s++) {
963 							if (input[inputpos + s] == pad)
964 								break;
965 						}
966 						len = s;
967 
968 						ZVAL_STRINGL(&val, &input[inputpos], len);
969 						zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
970 						break;
971 					}
972 
973 
974 					case 'h':
975 					case 'H': {
976 						zend_long len = (inputlen - inputpos) * 2;	/* Remaining */
977 						int nibbleshift = (type == 'h') ? 0 : 4;
978 						int first = 1;
979 						zend_string *buf;
980 						zend_long ipos, opos;
981 
982 
983 						if (size > INT_MAX / 2) {
984 							zend_string_release(real_name);
985 							zend_argument_value_error(1, "repeater must be less than or equal to %d", INT_MAX / 2);
986 							RETURN_THROWS();
987 						}
988 
989 						/* If size was given take minimum of len and size */
990 						if (size >= 0 && len > (size * 2)) {
991 							len = size * 2;
992 						}
993 
994 						if (len > 0 && argb > 0) {
995 							len -= argb % 2;
996 						}
997 
998 						buf = zend_string_alloc(len, 0);
999 
1000 						for (ipos = opos = 0; opos < len; opos++) {
1001 							char cc = (input[inputpos + ipos] >> nibbleshift) & 0xf;
1002 
1003 							if (cc < 10) {
1004 								cc += '0';
1005 							} else {
1006 								cc += 'a' - 10;
1007 							}
1008 
1009 							ZSTR_VAL(buf)[opos] = cc;
1010 							nibbleshift = (nibbleshift + 4) & 7;
1011 
1012 							if (first-- == 0) {
1013 								ipos++;
1014 								first = 1;
1015 							}
1016 						}
1017 
1018 						ZSTR_VAL(buf)[len] = '\0';
1019 
1020 						ZVAL_STR(&val, buf);
1021 						zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
1022 						break;
1023 					}
1024 
1025 					case 'c':   /* signed */
1026 					case 'C': { /* unsigned */
1027 						uint8_t x = input[inputpos];
1028 						zend_long v = (type == 'c') ? (int8_t) x : x;
1029 
1030 						ZVAL_LONG(&val, v);
1031 						zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
1032 						break;
1033 					}
1034 
1035 					case 's':   /* signed machine endian   */
1036 					case 'S':   /* unsigned machine endian */
1037 					case 'n':   /* unsigned big endian     */
1038 					case 'v': { /* unsigned little endian  */
1039 						zend_long v = 0;
1040 						uint16_t x = *((unaligned_uint16_t*) &input[inputpos]);
1041 
1042 						if (type == 's') {
1043 							v = (int16_t) x;
1044 						} else if ((type == 'n' && MACHINE_LITTLE_ENDIAN) || (type == 'v' && !MACHINE_LITTLE_ENDIAN)) {
1045 							v = php_pack_reverse_int16(x);
1046 						} else {
1047 							v = x;
1048 						}
1049 
1050 						ZVAL_LONG(&val, v);
1051 						zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
1052 						break;
1053 					}
1054 
1055 					case 'i':   /* signed integer, machine size, machine endian */
1056 					case 'I': { /* unsigned integer, machine size, machine endian */
1057 						zend_long v;
1058 						if (type == 'i') {
1059 							int x = *((unaligned_int*) &input[inputpos]);
1060 							v = x;
1061 						} else {
1062 							unsigned int x = *((unaligned_uint*) &input[inputpos]);
1063 							v = x;
1064 						}
1065 
1066 						ZVAL_LONG(&val, v);
1067 						zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
1068 						break;
1069 					}
1070 
1071 					case 'l':   /* signed machine endian   */
1072 					case 'L':   /* unsigned machine endian */
1073 					case 'N':   /* unsigned big endian     */
1074 					case 'V': { /* unsigned little endian  */
1075 						zend_long v = 0;
1076 						uint32_t x = *((unaligned_uint32_t*) &input[inputpos]);
1077 
1078 						if (type == 'l') {
1079 							v = (int32_t) x;
1080 						} else if ((type == 'N' && MACHINE_LITTLE_ENDIAN) || (type == 'V' && !MACHINE_LITTLE_ENDIAN)) {
1081 							v = php_pack_reverse_int32(x);
1082 						} else {
1083 							v = x;
1084 						}
1085 
1086 						ZVAL_LONG(&val, v);
1087 						zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
1088 
1089 						break;
1090 					}
1091 
1092 #if SIZEOF_ZEND_LONG > 4
1093 					case 'q':   /* signed machine endian   */
1094 					case 'Q':   /* unsigned machine endian */
1095 					case 'J':   /* unsigned big endian     */
1096 					case 'P': { /* unsigned little endian  */
1097 						zend_long v = 0;
1098 						uint64_t x = *((unaligned_uint64_t*) &input[inputpos]);
1099 
1100 						if (type == 'q') {
1101 							v = (int64_t) x;
1102 						} else if ((type == 'J' && MACHINE_LITTLE_ENDIAN) || (type == 'P' && !MACHINE_LITTLE_ENDIAN)) {
1103 							v = php_pack_reverse_int64(x);
1104 						} else {
1105 							v = x;
1106 						}
1107 
1108 						ZVAL_LONG(&val, v);
1109 						zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
1110 						break;
1111 					}
1112 #endif
1113 
1114 					case 'f': /* float */
1115 					case 'g': /* little endian float*/
1116 					case 'G': /* big endian float*/
1117 					{
1118 						float v;
1119 
1120 						if (type == 'g') {
1121 							v = php_pack_parse_float(1, &input[inputpos]);
1122 						} else if (type == 'G') {
1123 							v = php_pack_parse_float(0, &input[inputpos]);
1124 						} else {
1125 							memcpy(&v, &input[inputpos], sizeof(float));
1126 						}
1127 
1128 						ZVAL_DOUBLE(&val, v);
1129 						zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
1130 						break;
1131 					}
1132 
1133 
1134 					case 'd': /* double */
1135 					case 'e': /* little endian float */
1136 					case 'E': /* big endian float */
1137 					{
1138 						double v;
1139 						if (type == 'e') {
1140 							v = php_pack_parse_double(1, &input[inputpos]);
1141 						} else if (type == 'E') {
1142 							v = php_pack_parse_double(0, &input[inputpos]);
1143 						} else {
1144 							memcpy(&v, &input[inputpos], sizeof(double));
1145 						}
1146 
1147 						ZVAL_DOUBLE(&val, v);
1148 						zend_symtable_update(Z_ARRVAL_P(return_value), real_name, &val);
1149 						break;
1150 					}
1151 
1152 					case 'x':
1153 						/* Do nothing with input, just skip it */
1154 						break;
1155 
1156 					case 'X':
1157 						if (inputpos < size) {
1158 							inputpos = -size;
1159 							i = repetitions - 1;		/* Break out of for loop */
1160 
1161 							if (repetitions >= 0) {
1162 								php_error_docref(NULL, E_WARNING, "Type %c: outside of string", type);
1163 							}
1164 						}
1165 						break;
1166 
1167 					case '@':
1168 						if (repetitions <= inputlen) {
1169 							inputpos = repetitions;
1170 						} else {
1171 							php_error_docref(NULL, E_WARNING, "Type %c: outside of string", type);
1172 						}
1173 
1174 						i = repetitions - 1;	/* Done, break out of for loop */
1175 						break;
1176 				}
1177 
1178 				zend_string_release(real_name);
1179 
1180 				inputpos += size;
1181 				if (inputpos < 0) {
1182 					if (size != -1) { /* only print warning if not working with * */
1183 						php_error_docref(NULL, E_WARNING, "Type %c: outside of string", type);
1184 					}
1185 					inputpos = 0;
1186 				}
1187 			} else if (repetitions < 0) {
1188 				/* Reached end of input for '*' repeater */
1189 				break;
1190 			} else {
1191 				php_error_docref(NULL, E_WARNING, "Type %c: not enough input, need %d, have " ZEND_LONG_FMT, type, size, inputlen - inputpos);
1192 				zend_array_destroy(Z_ARR_P(return_value));
1193 				RETURN_FALSE;
1194 			}
1195 		}
1196 
1197 		if (formatlen > 0) {
1198 			formatlen--;	/* Skip '/' separator, does no harm if inputlen == 0 */
1199 			format++;
1200 		}
1201 	}
1202 }
1203 /* }}} */
1204 
1205 /* {{{ PHP_MINIT_FUNCTION */
PHP_MINIT_FUNCTION(pack)1206 PHP_MINIT_FUNCTION(pack)
1207 {
1208 	int i;
1209 
1210 	if (MACHINE_LITTLE_ENDIAN) {
1211 		/* Where to get lo to hi bytes from */
1212 		byte_map[0] = 0;
1213 
1214 		for (i = 0; i < (int)sizeof(int); i++) {
1215 			int_map[i] = i;
1216 		}
1217 
1218 		machine_endian_short_map[0] = 0;
1219 		machine_endian_short_map[1] = 1;
1220 		big_endian_short_map[0] = 1;
1221 		big_endian_short_map[1] = 0;
1222 		little_endian_short_map[0] = 0;
1223 		little_endian_short_map[1] = 1;
1224 
1225 		machine_endian_long_map[0] = 0;
1226 		machine_endian_long_map[1] = 1;
1227 		machine_endian_long_map[2] = 2;
1228 		machine_endian_long_map[3] = 3;
1229 		big_endian_long_map[0] = 3;
1230 		big_endian_long_map[1] = 2;
1231 		big_endian_long_map[2] = 1;
1232 		big_endian_long_map[3] = 0;
1233 		little_endian_long_map[0] = 0;
1234 		little_endian_long_map[1] = 1;
1235 		little_endian_long_map[2] = 2;
1236 		little_endian_long_map[3] = 3;
1237 
1238 #if SIZEOF_ZEND_LONG > 4
1239 		machine_endian_longlong_map[0] = 0;
1240 		machine_endian_longlong_map[1] = 1;
1241 		machine_endian_longlong_map[2] = 2;
1242 		machine_endian_longlong_map[3] = 3;
1243 		machine_endian_longlong_map[4] = 4;
1244 		machine_endian_longlong_map[5] = 5;
1245 		machine_endian_longlong_map[6] = 6;
1246 		machine_endian_longlong_map[7] = 7;
1247 		big_endian_longlong_map[0] = 7;
1248 		big_endian_longlong_map[1] = 6;
1249 		big_endian_longlong_map[2] = 5;
1250 		big_endian_longlong_map[3] = 4;
1251 		big_endian_longlong_map[4] = 3;
1252 		big_endian_longlong_map[5] = 2;
1253 		big_endian_longlong_map[6] = 1;
1254 		big_endian_longlong_map[7] = 0;
1255 		little_endian_longlong_map[0] = 0;
1256 		little_endian_longlong_map[1] = 1;
1257 		little_endian_longlong_map[2] = 2;
1258 		little_endian_longlong_map[3] = 3;
1259 		little_endian_longlong_map[4] = 4;
1260 		little_endian_longlong_map[5] = 5;
1261 		little_endian_longlong_map[6] = 6;
1262 		little_endian_longlong_map[7] = 7;
1263 #endif
1264 	}
1265 	else {
1266 		zval val;
1267 		int size = sizeof(Z_LVAL(val));
1268 		Z_LVAL(val)=0; /*silence a warning*/
1269 
1270 		/* Where to get hi to lo bytes from */
1271 		byte_map[0] = size - 1;
1272 
1273 		for (i = 0; i < (int)sizeof(int); i++) {
1274 			int_map[i] = size - (sizeof(int) - i);
1275 		}
1276 
1277 		machine_endian_short_map[0] = size - 2;
1278 		machine_endian_short_map[1] = size - 1;
1279 		big_endian_short_map[0] = size - 2;
1280 		big_endian_short_map[1] = size - 1;
1281 		little_endian_short_map[0] = size - 1;
1282 		little_endian_short_map[1] = size - 2;
1283 
1284 		machine_endian_long_map[0] = size - 4;
1285 		machine_endian_long_map[1] = size - 3;
1286 		machine_endian_long_map[2] = size - 2;
1287 		machine_endian_long_map[3] = size - 1;
1288 		big_endian_long_map[0] = size - 4;
1289 		big_endian_long_map[1] = size - 3;
1290 		big_endian_long_map[2] = size - 2;
1291 		big_endian_long_map[3] = size - 1;
1292 		little_endian_long_map[0] = size - 1;
1293 		little_endian_long_map[1] = size - 2;
1294 		little_endian_long_map[2] = size - 3;
1295 		little_endian_long_map[3] = size - 4;
1296 
1297 #if SIZEOF_ZEND_LONG > 4
1298 		machine_endian_longlong_map[0] = size - 8;
1299 		machine_endian_longlong_map[1] = size - 7;
1300 		machine_endian_longlong_map[2] = size - 6;
1301 		machine_endian_longlong_map[3] = size - 5;
1302 		machine_endian_longlong_map[4] = size - 4;
1303 		machine_endian_longlong_map[5] = size - 3;
1304 		machine_endian_longlong_map[6] = size - 2;
1305 		machine_endian_longlong_map[7] = size - 1;
1306 		big_endian_longlong_map[0] = size - 8;
1307 		big_endian_longlong_map[1] = size - 7;
1308 		big_endian_longlong_map[2] = size - 6;
1309 		big_endian_longlong_map[3] = size - 5;
1310 		big_endian_longlong_map[4] = size - 4;
1311 		big_endian_longlong_map[5] = size - 3;
1312 		big_endian_longlong_map[6] = size - 2;
1313 		big_endian_longlong_map[7] = size - 1;
1314 		little_endian_longlong_map[0] = size - 1;
1315 		little_endian_longlong_map[1] = size - 2;
1316 		little_endian_longlong_map[2] = size - 3;
1317 		little_endian_longlong_map[3] = size - 4;
1318 		little_endian_longlong_map[4] = size - 5;
1319 		little_endian_longlong_map[5] = size - 6;
1320 		little_endian_longlong_map[6] = size - 7;
1321 		little_endian_longlong_map[7] = size - 8;
1322 #endif
1323 	}
1324 
1325 	return SUCCESS;
1326 }
1327 /* }}} */
1328