xref: /php-src/ext/standard/scanf.c (revision b7356692)
1 /*
2    +----------------------------------------------------------------------+
3    | Copyright (c) The PHP Group                                          |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | https://www.php.net/license/3_01.txt                                 |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13    | Author: Clayton Collie <clcollie@mindspring.com>                     |
14    +----------------------------------------------------------------------+
15 */
16 
17 /*
18 	scanf.c --
19 
20 	This file contains the base code which implements sscanf and by extension
21 	fscanf. Original code is from TCL8.3.0 and bears the following copyright:
22 
23 	This software is copyrighted by the Regents of the University of
24 	California, Sun Microsystems, Inc., Scriptics Corporation,
25 	and other parties.  The following terms apply to all files associated
26 	with the software unless explicitly disclaimed in individual files.
27 
28 	The authors hereby grant permission to use, copy, modify, distribute,
29 	and license this software and its documentation for any purpose, provided
30 	that existing copyright notices are retained in all copies and that this
31 	notice is included verbatim in any distributions. No written agreement,
32 	license, or royalty fee is required for any of the authorized uses.
33 	Modifications to this software may be copyrighted by their authors
34 	and need not follow the licensing terms described here, provided that
35 	the new terms are clearly indicated on the first page of each file where
36 	they apply.
37 
38 	IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY
39 	FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
40 	ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY
41 	DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE
42 	POSSIBILITY OF SUCH DAMAGE.
43 
44 	THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
45 	INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY,
46 	FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT.  THIS SOFTWARE
47 	IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE
48 	NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
49 	MODIFICATIONS.
50 
51 	GOVERNMENT USE: If you are acquiring this software on behalf of the
52 	U.S. government, the Government shall have only "Restricted Rights"
53 	in the software and related documentation as defined in the Federal
54 	Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2).  If you
55 	are acquiring the software on behalf of the Department of Defense, the
56 	software shall be classified as "Commercial Computer Software" and the
57 	Government shall have only "Restricted Rights" as defined in Clause
58 	252.227-7013 (c) (1) of DFARs.  Notwithstanding the foregoing, the
59 	authors grant the U.S. Government and others acting in its behalf
60 	permission to use and distribute the software in accordance with the
61 	terms specified in this license.
62 */
63 
64 #include <stdio.h>
65 #include <limits.h>
66 #include <ctype.h>
67 #include "php.h"
68 #include "php_variables.h"
69 #include <locale.h>
70 #include "zend_execute.h"
71 #include "zend_operators.h"
72 #include "zend_strtod.h"
73 #include "php_globals.h"
74 #include "basic_functions.h"
75 #include "scanf.h"
76 
77 /*
78  * Flag values used internally by [f|s]canf.
79  */
80 #define SCAN_NOSKIP     0x1       /* Don't skip blanks. */
81 #define SCAN_SUPPRESS	0x2	  /* Suppress assignment. */
82 #define SCAN_UNSIGNED	0x4	  /* Read an unsigned value. */
83 #define SCAN_WIDTH      0x8       /* A width value was supplied. */
84 
85 #define SCAN_SIGNOK     0x10      /* A +/- character is allowed. */
86 #define SCAN_NODIGITS   0x20      /* No digits have been scanned. */
87 #define SCAN_NOZERO     0x40      /* No zero digits have been scanned. */
88 #define SCAN_XOK        0x80      /* An 'x' is allowed. */
89 #define SCAN_PTOK       0x100     /* Decimal point is allowed. */
90 #define SCAN_EXPOK      0x200     /* An exponent is allowed. */
91 
92 #define UCHAR(x)		(zend_uchar)(x)
93 
94 /*
95  * The following structure contains the information associated with
96  * a character set.
97  */
98 typedef struct CharSet {
99 	int exclude;		/* 1 if this is an exclusion set. */
100 	int nchars;
101 	char *chars;
102 	int nranges;
103 	struct Range {
104 		char start;
105 		char end;
106 	} *ranges;
107 } CharSet;
108 
109 typedef zend_long (*int_string_formater)(const char*, char**, int);
110 
111 /*
112  * Declarations for functions used only in this file.
113  */
114 static char *BuildCharSet(CharSet *cset, char *format);
115 static int	CharInSet(CharSet *cset, int ch);
116 static void	ReleaseCharSet(CharSet *cset);
117 static inline void scan_set_error_return(int numVars, zval *return_value);
118 
119 
120 /* {{{ BuildCharSet
121  *----------------------------------------------------------------------
122  *
123  * BuildCharSet --
124  *
125  *	This function examines a character set format specification
126  *	and builds a CharSet containing the individual characters and
127  *	character ranges specified.
128  *
129  * Results:
130  *	Returns the next format position.
131  *
132  * Side effects:
133  *	Initializes the charset.
134  *
135  *----------------------------------------------------------------------
136  */
BuildCharSet(CharSet * cset,char * format)137 static char * BuildCharSet(CharSet *cset, char *format)
138 {
139 	char *ch, start;
140 	int  nranges;
141 	char *end;
142 
143 	memset(cset, 0, sizeof(CharSet));
144 
145 	ch = format;
146 	if (*ch == '^') {
147 		cset->exclude = 1;
148 		ch = ++format;
149 	}
150 	end = format + 1;	/* verify this - cc */
151 
152 	/*
153 	 * Find the close bracket so we can overallocate the set.
154 	 */
155 	if (*ch == ']') {
156 		ch = end++;
157 	}
158 	nranges = 0;
159 	while (*ch != ']') {
160 		if (*ch == '-') {
161 			nranges++;
162 		}
163 		ch = end++;
164 	}
165 
166 	cset->chars = (char *) safe_emalloc(sizeof(char), (end - format - 1), 0);
167 	if (nranges > 0) {
168 		cset->ranges = (struct Range *) safe_emalloc(sizeof(struct Range), nranges, 0);
169 	} else {
170 		cset->ranges = NULL;
171 	}
172 
173 	/*
174 	 * Now build the character set.
175 	 */
176 	cset->nchars = cset->nranges = 0;
177 	ch    = format++;
178 	start = *ch;
179 	if (*ch == ']' || *ch == '-') {
180 		cset->chars[cset->nchars++] = *ch;
181 		ch = format++;
182 	}
183 	while (*ch != ']') {
184 		if (*format == '-') {
185 			/*
186 			 * This may be the first character of a range, so don't add
187 			 * it yet.
188 			 */
189 			start = *ch;
190 		} else if (*ch == '-') {
191 			/*
192 			 * Check to see if this is the last character in the set, in which
193 			 * case it is not a range and we should add the previous character
194 			 * as well as the dash.
195 			 */
196 			if (*format == ']') {
197 				cset->chars[cset->nchars++] = start;
198 				cset->chars[cset->nchars++] = *ch;
199 			} else {
200 				ch = format++;
201 
202 				/*
203 				 * Check to see if the range is in reverse order.
204 				 */
205 				if (start < *ch) {
206 					cset->ranges[cset->nranges].start = start;
207 					cset->ranges[cset->nranges].end = *ch;
208 				} else {
209 					cset->ranges[cset->nranges].start = *ch;
210 					cset->ranges[cset->nranges].end = start;
211 				}
212 				cset->nranges++;
213 			}
214 		} else {
215 			cset->chars[cset->nchars++] = *ch;
216 		}
217 		ch = format++;
218 	}
219 	return format;
220 }
221 /* }}} */
222 
223 /* {{{ CharInSet
224  *----------------------------------------------------------------------
225  *
226  * CharInSet --
227  *
228  *	Check to see if a character matches the given set.
229  *
230  * Results:
231  *	Returns non-zero if the character matches the given set.
232  *
233  * Side effects:
234  *	None.
235  *
236  *----------------------------------------------------------------------
237  */
CharInSet(CharSet * cset,int c)238 static int CharInSet(CharSet *cset, int c)
239 {
240 	char ch = (char) c;
241 	int i, match = 0;
242 
243 	for (i = 0; i < cset->nchars; i++) {
244 		if (cset->chars[i] == ch) {
245 			match = 1;
246 			break;
247 		}
248 	}
249 	if (!match) {
250 		for (i = 0; i < cset->nranges; i++) {
251 			if ((cset->ranges[i].start <= ch)
252 				&& (ch <= cset->ranges[i].end)) {
253 				match = 1;
254 				break;
255 			}
256 		}
257 	}
258 	return (cset->exclude ? !match : match);
259 }
260 /* }}} */
261 
262 /* {{{ ReleaseCharSet
263  *----------------------------------------------------------------------
264  *
265  * ReleaseCharSet --
266  *
267  *	Free the storage associated with a character set.
268  *
269  * Results:
270  *	None.
271  *
272  * Side effects:
273  *	None.
274  *
275  *----------------------------------------------------------------------
276  */
ReleaseCharSet(CharSet * cset)277 static void ReleaseCharSet(CharSet *cset)
278 {
279 	efree((char *)cset->chars);
280 	if (cset->ranges) {
281 		efree((char *)cset->ranges);
282 	}
283 }
284 /* }}} */
285 
286 /* {{{ ValidateFormat
287  *----------------------------------------------------------------------
288  *
289  * ValidateFormat --
290  *
291  *	Parse the format string and verify that it is properly formed
292  *	and that there are exactly enough variables on the command line.
293  *
294  * Results:
295  *    FAILURE or SUCCESS.
296  *
297  * Side effects:
298  *     May set php_error based on abnormal conditions.
299  *
300  * Parameters :
301  *     format     The format string.
302  *     numVars    The number of variables passed to the scan command.
303  *     totalSubs  The number of variables that will be required.
304  *
305  *----------------------------------------------------------------------
306 */
ValidateFormat(char * format,int numVars,int * totalSubs)307 PHPAPI int ValidateFormat(char *format, int numVars, int *totalSubs)
308 {
309 #define STATIC_LIST_SIZE 16
310 	int gotXpg, gotSequential, value, i, flags;
311 	char *end, *ch = NULL;
312 	int staticAssign[STATIC_LIST_SIZE];
313 	int *nassign = staticAssign;
314 	int objIndex, xpgSize, nspace = STATIC_LIST_SIZE;
315 
316 	/*
317 	 * Initialize an array that records the number of times a variable
318 	 * is assigned to by the format string.  We use this to detect if
319 	 * a variable is multiply assigned or left unassigned.
320 	 */
321 	if (numVars > nspace) {
322 		nassign = (int*)safe_emalloc(sizeof(int), numVars, 0);
323 		nspace = numVars;
324 	}
325 	for (i = 0; i < nspace; i++) {
326 		nassign[i] = 0;
327 	}
328 
329 	xpgSize = objIndex = gotXpg = gotSequential = 0;
330 
331 	while (*format != '\0') {
332 		ch = format++;
333 		flags = 0;
334 
335 		if (*ch != '%') {
336 			continue;
337 		}
338 		ch = format++;
339 		if (*ch == '%') {
340 			continue;
341 		}
342 		if (*ch == '*') {
343 			flags |= SCAN_SUPPRESS;
344 			ch = format++;
345 			goto xpgCheckDone;
346 		}
347 
348 		if ( isdigit( (int)*ch ) ) {
349 			/*
350 			 * Check for an XPG3-style %n$ specification.  Note: there
351 			 * must not be a mixture of XPG3 specs and non-XPG3 specs
352 			 * in the same format string.
353 			 */
354 			value = ZEND_STRTOUL(format-1, &end, 10);
355 			if (*end != '$') {
356 				goto notXpg;
357 			}
358 			format = end+1;
359 			ch     = format++;
360 			gotXpg = 1;
361 			if (gotSequential) {
362 				goto mixedXPG;
363 			}
364 			objIndex = value - 1;
365 			if ((objIndex < 0) || (numVars && (objIndex >= numVars))) {
366 				goto badIndex;
367 			} else if (numVars == 0) {
368 				/*
369 				 * In the case where no vars are specified, the user can
370 				 * specify %9999$ legally, so we have to consider special
371 				 * rules for growing the assign array.  'value' is
372 				 * guaranteed to be > 0.
373 				 */
374 
375 				/* set a lower artificial limit on this
376 				 * in the interest of security and resource friendliness
377 				 * 255 arguments should be more than enough. - cc
378 				 */
379 				if (value > SCAN_MAX_ARGS) {
380 					goto badIndex;
381 				}
382 
383 				xpgSize = (xpgSize > value) ? xpgSize : value;
384 			}
385 			goto xpgCheckDone;
386 		}
387 
388 notXpg:
389 		gotSequential = 1;
390 		if (gotXpg) {
391 mixedXPG:
392 			zend_value_error("%s", "cannot mix \"%\" and \"%n$\" conversion specifiers");
393 			goto error;
394 		}
395 
396 xpgCheckDone:
397 		/*
398 		 * Parse any width specifier.
399 		 */
400 		if (isdigit(UCHAR(*ch))) {
401 			value = ZEND_STRTOUL(format-1, &format, 10);
402 			flags |= SCAN_WIDTH;
403 			ch = format++;
404 		}
405 
406 		/*
407 		 * Ignore size specifier.
408 		 */
409 		if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
410 			ch = format++;
411 		}
412 
413 		if (!(flags & SCAN_SUPPRESS) && numVars && (objIndex >= numVars)) {
414 			goto badIndex;
415 		}
416 
417 		/*
418 		 * Handle the various field types.
419 		 */
420 		switch (*ch) {
421 			case 'n':
422 			case 'd':
423 			case 'D':
424 			case 'i':
425 			case 'o':
426 			case 'x':
427 			case 'X':
428 			case 'u':
429 			case 'f':
430 			case 'e':
431 			case 'E':
432 			case 'g':
433 			case 's':
434 				break;
435 
436 			case 'c':
437 				/* we differ here with the TCL implementation in allowing for */
438 				/* a character width specification, to be more consistent with */
439 				/* ANSI. since Zend auto allocates space for vars, this is no */
440 				/* problem - cc                                               */
441 				/*
442 				if (flags & SCAN_WIDTH) {
443 					php_error_docref(NULL, E_WARNING, "Field width may not be specified in %c conversion");
444 					goto error;
445 				}
446 				*/
447 				break;
448 
449 			case '[':
450 				if (*format == '\0') {
451 					goto badSet;
452 				}
453 				ch = format++;
454 				if (*ch == '^') {
455 					if (*format == '\0') {
456 						goto badSet;
457 					}
458 					ch = format++;
459 				}
460 				if (*ch == ']') {
461 					if (*format == '\0') {
462 						goto badSet;
463 					}
464 					ch = format++;
465 				}
466 				while (*ch != ']') {
467 					if (*format == '\0') {
468 						goto badSet;
469 					}
470 					ch = format++;
471 				}
472 				break;
473 badSet:
474 				zend_value_error("Unmatched [ in format string");
475 				goto error;
476 
477 			default: {
478 				zend_value_error("Bad scan conversion character \"%c\"", *ch);
479 				goto error;
480 			}
481 		}
482 
483 		if (!(flags & SCAN_SUPPRESS)) {
484 			if (objIndex >= nspace) {
485 				/*
486 				 * Expand the nassign buffer.  If we are using XPG specifiers,
487 				 * make sure that we grow to a large enough size.  xpgSize is
488 				 * guaranteed to be at least one larger than objIndex.
489 				 */
490 				value = nspace;
491 				if (xpgSize) {
492 					nspace = xpgSize;
493 				} else {
494 					nspace += STATIC_LIST_SIZE;
495 				}
496 				if (nassign == staticAssign) {
497 					nassign = (void *)safe_emalloc(nspace, sizeof(int), 0);
498 					for (i = 0; i < STATIC_LIST_SIZE; ++i) {
499 						nassign[i] = staticAssign[i];
500 					}
501 				} else {
502 					nassign = (void *)erealloc((void *)nassign, nspace * sizeof(int));
503 				}
504 				for (i = value; i < nspace; i++) {
505 					nassign[i] = 0;
506 				}
507 			}
508 			nassign[objIndex]++;
509 			objIndex++;
510 		}
511 	} /* while (*format != '\0') */
512 
513 	/*
514 	 * Verify that all of the variable were assigned exactly once.
515 	 */
516 	if (numVars == 0) {
517 		if (xpgSize) {
518 			numVars = xpgSize;
519 		} else {
520 			numVars = objIndex;
521 		}
522 	}
523 	if (totalSubs) {
524 		*totalSubs = numVars;
525 	}
526 	for (i = 0; i < numVars; i++) {
527 		if (nassign[i] > 1) {
528 			zend_value_error("%s", "Variable is assigned by multiple \"%n$\" conversion specifiers");
529 			goto error;
530 		} else if (!xpgSize && (nassign[i] == 0)) {
531 			/*
532 			 * If the space is empty, and xpgSize is 0 (means XPG wasn't
533 			 * used, and/or numVars != 0), then too many vars were given
534 			 */
535 			zend_value_error("Variable is not assigned by any conversion specifiers");
536 			goto error;
537 		}
538 	}
539 
540 	if (nassign != staticAssign) {
541 		efree((char *)nassign);
542 	}
543 	return SCAN_SUCCESS;
544 
545 badIndex:
546 	if (gotXpg) {
547 		zend_value_error("%s", "\"%n$\" argument index out of range");
548 	} else {
549 		zend_value_error("Different numbers of variable names and field specifiers");
550 	}
551 
552 error:
553 	if (nassign != staticAssign) {
554 		efree((char *)nassign);
555 	}
556 	return SCAN_ERROR_INVALID_FORMAT;
557 #undef STATIC_LIST_SIZE
558 }
559 /* }}} */
560 
561 /* {{{ php_sscanf_internal
562  * This is the internal function which does processing on behalf of
563  * both sscanf() and fscanf()
564  *
565  * parameters :
566  * 		string		literal string to be processed
567  * 		format		format string
568  *		argCount	total number of elements in the args array
569  *		args		arguments passed in from user function (f|s)scanf
570  * 		varStart	offset (in args) of 1st variable passed in to (f|s)scanf
571  *		return_value set with the results of the scan
572  */
573 
php_sscanf_internal(char * string,char * format,int argCount,zval * args,int varStart,zval * return_value)574 PHPAPI int php_sscanf_internal( char *string, char *format,
575 				int argCount, zval *args,
576 				int varStart, zval *return_value)
577 {
578 	int  numVars, nconversions, totalVars = -1;
579 	int  i, result;
580 	zend_long value;
581 	int  objIndex;
582 	char *end, *baseString;
583 	zval *current;
584 	char op   = 0;
585 	int  base = 0;
586 	int  underflow = 0;
587 	size_t width;
588 	int_string_formater fn = NULL;
589 	char *ch, sch;
590 	int  flags;
591 	char buf[64];	/* Temporary buffer to hold scanned number
592 					 * strings before they are passed to strtoul() */
593 
594 	/* do some sanity checking */
595 	if ((varStart > argCount) || (varStart < 0)){
596 		varStart = SCAN_MAX_ARGS + 1;
597 	}
598 	numVars = argCount - varStart;
599 	if (numVars < 0) {
600 		numVars = 0;
601 	}
602 
603 	/*
604 	 * Check for errors in the format string.
605 	 */
606 	if (ValidateFormat(format, numVars, &totalVars) != SCAN_SUCCESS) {
607 		scan_set_error_return( numVars, return_value );
608 		return SCAN_ERROR_INVALID_FORMAT;
609 	}
610 
611 	objIndex = numVars ? varStart : 0;
612 
613 	/*
614 	 * If any variables are passed, make sure they are all passed by reference
615 	 */
616 	if (numVars) {
617 		for (i = varStart;i < argCount;i++){
618 			ZEND_ASSERT(Z_ISREF(args[i]) && "Parameter must be passed by reference");
619 		}
620 	}
621 
622 	/*
623 	 * Allocate space for the result objects. Only happens when no variables
624 	 * are specified
625 	 */
626 	if (!numVars) {
627 		zval tmp;
628 
629 		/* allocate an array for return */
630 		array_init(return_value);
631 
632 		for (i = 0; i < totalVars; i++) {
633 			ZVAL_NULL(&tmp);
634 			if (add_next_index_zval(return_value, &tmp) == FAILURE) {
635 				scan_set_error_return(0, return_value);
636 				return FAILURE;
637 			}
638 		}
639 		varStart = 0; /* Array index starts from 0 */
640 	}
641 
642 	baseString = string;
643 
644 	/*
645 	 * Iterate over the format string filling in the result objects until
646 	 * we reach the end of input, the end of the format string, or there
647 	 * is a mismatch.
648 	 */
649 	nconversions = 0;
650 	/* note ! - we need to limit the loop for objIndex to keep it in bounds */
651 
652 	while (*format != '\0') {
653 		ch    = format++;
654 		flags = 0;
655 
656 		/*
657 		 * If we see whitespace in the format, skip whitespace in the string.
658 		 */
659 		if ( isspace( (int)*ch ) ) {
660 			sch = *string;
661 			while ( isspace( (int)sch ) ) {
662 				if (*string == '\0') {
663 					goto done;
664 				}
665 				string++;
666 				sch = *string;
667 			}
668 			continue;
669 		}
670 
671 		if (*ch != '%') {
672 literal:
673 			if (*string == '\0') {
674 				underflow = 1;
675 				goto done;
676 			}
677 			sch = *string;
678 			string++;
679 			if (*ch != sch) {
680 				goto done;
681 			}
682 			continue;
683 		}
684 
685 		ch = format++;
686 		if (*ch == '%') {
687 			goto literal;
688 		}
689 
690 		/*
691 		 * Check for assignment suppression ('*') or an XPG3-style
692 		 * assignment ('%n$').
693 		 */
694 		if (*ch == '*') {
695 			flags |= SCAN_SUPPRESS;
696 			ch = format++;
697 		} else if ( isdigit(UCHAR(*ch))) {
698 			value = ZEND_STRTOUL(format-1, &end, 10);
699 			if (*end == '$') {
700 				format = end+1;
701 				ch = format++;
702 				objIndex = varStart + value - 1;
703 			}
704 		}
705 
706 		/*
707 		 * Parse any width specifier.
708 		 */
709 		if ( isdigit(UCHAR(*ch))) {
710 			width = ZEND_STRTOUL(format-1, &format, 10);
711 			ch = format++;
712 		} else {
713 			width = 0;
714 		}
715 
716 		/*
717 		 * Ignore size specifier.
718 		 */
719 		if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
720 			ch = format++;
721 		}
722 
723 		/*
724 		 * Handle the various field types.
725 		 */
726 		switch (*ch) {
727 			case 'n':
728 				if (!(flags & SCAN_SUPPRESS)) {
729 					if (numVars && objIndex >= argCount) {
730 						break;
731 					} else if (numVars) {
732 						current = args + objIndex++;
733 						ZEND_TRY_ASSIGN_REF_LONG(current, (zend_long) (string - baseString));
734 					} else {
735 						add_index_long(return_value, objIndex++, string - baseString);
736 					}
737 				}
738 				nconversions++;
739 				continue;
740 
741 			case 'd':
742 			case 'D':
743 				op = 'i';
744 				base = 10;
745 				fn = (int_string_formater)ZEND_STRTOL_PTR;
746 				break;
747 			case 'i':
748 				op = 'i';
749 				base = 0;
750 				fn = (int_string_formater)ZEND_STRTOL_PTR;
751 				break;
752 			case 'o':
753 				op = 'i';
754 				base = 8;
755 				fn = (int_string_formater)ZEND_STRTOL_PTR;
756 				break;
757 			case 'x':
758 			case 'X':
759 				op = 'i';
760 				base = 16;
761 				fn = (int_string_formater)ZEND_STRTOL_PTR;
762 				break;
763 			case 'u':
764 				op = 'i';
765 				base = 10;
766 				flags |= SCAN_UNSIGNED;
767 				fn = (int_string_formater)ZEND_STRTOUL_PTR;
768 				break;
769 
770 			case 'f':
771 			case 'e':
772 			case 'E':
773 			case 'g':
774 				op = 'f';
775 				break;
776 
777 			case 's':
778 				op = 's';
779 				break;
780 
781 			case 'c':
782 				op = 's';
783 				flags |= SCAN_NOSKIP;
784 				/*-cc-*/
785 				if (0 == width) {
786 					width = 1;
787 				}
788 				/*-cc-*/
789 				break;
790 			case '[':
791 				op = '[';
792 				flags |= SCAN_NOSKIP;
793 				break;
794 		}   /* switch */
795 
796 		/*
797 		 * At this point, we will need additional characters from the
798 		 * string to proceed.
799 		 */
800 		if (*string == '\0') {
801 			underflow = 1;
802 			goto done;
803 		}
804 
805 		/*
806 		 * Skip any leading whitespace at the beginning of a field unless
807 		 * the format suppresses this behavior.
808 		 */
809 		if (!(flags & SCAN_NOSKIP)) {
810 			while (*string != '\0') {
811 				sch = *string;
812 				if (! isspace((int)sch) ) {
813 					break;
814 				}
815 				string++;
816 			}
817 			if (*string == '\0') {
818 				underflow = 1;
819 				goto done;
820 			}
821 		}
822 
823 		/*
824 		 * Perform the requested scanning operation.
825 		 */
826 		switch (op) {
827 			case 'c':
828 			case 's':
829 				/*
830 				 * Scan a string up to width characters or whitespace.
831 				 */
832 				if (width == 0) {
833 					width = (size_t) ~0;
834 				}
835 				end = string;
836 				while (*end != '\0') {
837 					sch = *end;
838 					if ( isspace( (int)sch ) ) {
839 						break;
840 					}
841 					end++;
842 					if (--width == 0) {
843 					   break;
844 					}
845 				}
846 				if (!(flags & SCAN_SUPPRESS)) {
847 					if (numVars && objIndex >= argCount) {
848 						break;
849 					} else if (numVars) {
850 						current = args + objIndex++;
851 						ZEND_TRY_ASSIGN_REF_STRINGL(current, string, end - string);
852 					} else {
853 						add_index_stringl(return_value, objIndex++, string, end-string);
854 					}
855 				}
856 				string = end;
857 				break;
858 
859 			case '[': {
860 				CharSet cset;
861 
862 				if (width == 0) {
863 					width = (size_t) ~0;
864 				}
865 				end = string;
866 
867 				format = BuildCharSet(&cset, format);
868 				while (*end != '\0') {
869 					sch = *end;
870 					if (!CharInSet(&cset, (int)sch)) {
871 						break;
872 					}
873 					end++;
874 					if (--width == 0) {
875 						break;
876 					}
877 				}
878 				ReleaseCharSet(&cset);
879 
880 				if (string == end) {
881 					/*
882 					 * Nothing matched the range, stop processing
883 					 */
884 					goto done;
885 				}
886 				if (!(flags & SCAN_SUPPRESS)) {
887 					if (numVars && objIndex >= argCount) {
888 						break;
889 					} else if (numVars) {
890 						current = args + objIndex++;
891 						ZEND_TRY_ASSIGN_REF_STRINGL(current, string, end - string);
892 					} else {
893 						add_index_stringl(return_value, objIndex++, string, end-string);
894 					}
895 				}
896 				string = end;
897 				break;
898 			}
899 /*
900 			case 'c':
901 			   / Scan a single character./
902 
903 				sch = *string;
904 				string++;
905 				if (!(flags & SCAN_SUPPRESS)) {
906 					if (numVars) {
907 						char __buf[2];
908 						__buf[0] = sch;
909 						__buf[1] = '\0';
910 						current = args[objIndex++];
911 						zval_ptr_dtor_nogc(*current);
912 						ZVAL_STRINGL( *current, __buf, 1);
913 					} else {
914 						add_index_stringl(return_value, objIndex++, &sch, 1);
915 					}
916 				}
917 				break;
918 */
919 			case 'i':
920 				/*
921 				 * Scan an unsigned or signed integer.
922 				 */
923 				/*-cc-*/
924 				buf[0] = '\0';
925 				/*-cc-*/
926 				if ((width == 0) || (width > sizeof(buf) - 1)) {
927 					width = sizeof(buf) - 1;
928 				}
929 
930 				flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO;
931 				for (end = buf; width > 0; width--) {
932 					switch (*string) {
933 						/*
934 						 * The 0 digit has special meaning at the beginning of
935 						 * a number.  If we are unsure of the base, it
936 						 * indicates that we are in base 8 or base 16 (if it is
937 						 * followed by an 'x').
938 						 */
939 						case '0':
940 							/*-cc-*/
941 							if (base == 16) {
942 								flags |= SCAN_XOK;
943 							}
944 							/*-cc-*/
945 							if (base == 0) {
946 								base = 8;
947 								flags |= SCAN_XOK;
948 							}
949 							if (flags & SCAN_NOZERO) {
950 								flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO);
951 							} else {
952 								flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
953 							}
954 							goto addToInt;
955 
956 						case '1': case '2': case '3': case '4':
957 						case '5': case '6': case '7':
958 							if (base == 0) {
959 								base = 10;
960 							}
961 							flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
962 							goto addToInt;
963 
964 						case '8': case '9':
965 							if (base == 0) {
966 								base = 10;
967 							}
968 							if (base <= 8) {
969 							   break;
970 							}
971 							flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
972 							goto addToInt;
973 
974 						case 'A': case 'B': case 'C':
975 						case 'D': case 'E': case 'F':
976 						case 'a': case 'b': case 'c':
977 						case 'd': case 'e': case 'f':
978 							if (base <= 10) {
979 								break;
980 							}
981 							flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
982 							goto addToInt;
983 
984 						case '+': case '-':
985 							if (flags & SCAN_SIGNOK) {
986 								flags &= ~SCAN_SIGNOK;
987 								goto addToInt;
988 							}
989 							break;
990 
991 						case 'x': case 'X':
992 							if ((flags & SCAN_XOK) && (end == buf+1)) {
993 								base = 16;
994 								flags &= ~SCAN_XOK;
995 								goto addToInt;
996 							}
997 							break;
998 					}
999 
1000 					/*
1001 					 * We got an illegal character so we are done accumulating.
1002 					 */
1003 					break;
1004 
1005 addToInt:
1006 					/*
1007 					 * Add the character to the temporary buffer.
1008 					 */
1009 					*end++ = *string++;
1010 					if (*string == '\0') {
1011 						break;
1012 					}
1013 				}
1014 
1015 				/*
1016 				 * Check to see if we need to back up because we only got a
1017 				 * sign or a trailing x after a 0.
1018 				 */
1019 				if (flags & SCAN_NODIGITS) {
1020 					if (*string == '\0') {
1021 						underflow = 1;
1022 					}
1023 					goto done;
1024 				} else if (end[-1] == 'x' || end[-1] == 'X') {
1025 					end--;
1026 					string--;
1027 				}
1028 
1029 				/*
1030 				 * Scan the value from the temporary buffer.  If we are
1031 				 * returning a large unsigned value, we have to convert it back
1032 				 * to a string since PHP only supports signed values.
1033 				 */
1034 				if (!(flags & SCAN_SUPPRESS)) {
1035 					*end = '\0';
1036 					value = (zend_long) (*fn)(buf, NULL, base);
1037 					if ((flags & SCAN_UNSIGNED) && (value < 0)) {
1038 						snprintf(buf, sizeof(buf), ZEND_ULONG_FMT, value); /* INTL: ISO digit */
1039 						if (numVars && objIndex >= argCount) {
1040 							break;
1041 						} else if (numVars) {
1042 							 /* change passed value type to string */
1043 							current = args + objIndex++;
1044 							ZEND_TRY_ASSIGN_REF_STRING(current, buf);
1045 						} else {
1046 							add_index_string(return_value, objIndex++, buf);
1047 						}
1048 					} else {
1049 						if (numVars && objIndex >= argCount) {
1050 							break;
1051 						} else if (numVars) {
1052 							current = args + objIndex++;
1053 							ZEND_TRY_ASSIGN_REF_LONG(current, value);
1054 						} else {
1055 							add_index_long(return_value, objIndex++, value);
1056 						}
1057 					}
1058 				}
1059 				break;
1060 
1061 			case 'f':
1062 				/*
1063 				 * Scan a floating point number
1064 				 */
1065 				buf[0] = '\0';     /* call me pedantic */
1066 				if ((width == 0) || (width > sizeof(buf) - 1)) {
1067 					width = sizeof(buf) - 1;
1068 				}
1069 				flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_PTOK | SCAN_EXPOK;
1070 				for (end = buf; width > 0; width--) {
1071 					switch (*string) {
1072 						case '0': case '1': case '2': case '3':
1073 						case '4': case '5': case '6': case '7':
1074 						case '8': case '9':
1075 							flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS);
1076 							goto addToFloat;
1077 						case '+':
1078 						case '-':
1079 							if (flags & SCAN_SIGNOK) {
1080 								flags &= ~SCAN_SIGNOK;
1081 								goto addToFloat;
1082 							}
1083 							break;
1084 						case '.':
1085 							if (flags & SCAN_PTOK) {
1086 								flags &= ~(SCAN_SIGNOK | SCAN_PTOK);
1087 								goto addToFloat;
1088 							}
1089 							break;
1090 						case 'e':
1091 						case 'E':
1092 							/*
1093 							 * An exponent is not allowed until there has
1094 							 * been at least one digit.
1095 							 */
1096 							if ((flags & (SCAN_NODIGITS | SCAN_EXPOK)) == SCAN_EXPOK) {
1097 								flags = (flags & ~(SCAN_EXPOK|SCAN_PTOK))
1098 									| SCAN_SIGNOK | SCAN_NODIGITS;
1099 								goto addToFloat;
1100 							}
1101 							break;
1102 					}
1103 
1104 					/*
1105 					 * We got an illegal character so we are done accumulating.
1106 					 */
1107 					break;
1108 
1109 addToFloat:
1110 					/*
1111 					 * Add the character to the temporary buffer.
1112 					 */
1113 					*end++ = *string++;
1114 					if (*string == '\0') {
1115 						break;
1116 					}
1117 				}
1118 
1119 				/*
1120 				 * Check to see if we need to back up because we saw a
1121 				 * trailing 'e' or sign.
1122 				 */
1123 				if (flags & SCAN_NODIGITS) {
1124 					if (flags & SCAN_EXPOK) {
1125 						/*
1126 						 * There were no digits at all so scanning has
1127 						 * failed and we are done.
1128 						 */
1129 						if (*string == '\0') {
1130 							underflow = 1;
1131 						}
1132 						goto done;
1133 					}
1134 
1135 					/*
1136 					 * We got a bad exponent ('e' and maybe a sign).
1137 					 */
1138 					end--;
1139 					string--;
1140 					if (*end != 'e' && *end != 'E') {
1141 						end--;
1142 						string--;
1143 					}
1144 				}
1145 
1146 				/*
1147 				 * Scan the value from the temporary buffer.
1148 				 */
1149 				if (!(flags & SCAN_SUPPRESS)) {
1150 					double dvalue;
1151 					*end = '\0';
1152 					dvalue = zend_strtod(buf, NULL);
1153 					if (numVars && objIndex >= argCount) {
1154 						break;
1155 					} else if (numVars) {
1156 						current = args + objIndex++;
1157 						ZEND_TRY_ASSIGN_REF_DOUBLE(current, dvalue);
1158 					} else {
1159 						add_index_double(return_value, objIndex++, dvalue );
1160 					}
1161 				}
1162 				break;
1163 		} /* switch (op) */
1164 		nconversions++;
1165 	} /*  while (*format != '\0') */
1166 
1167 done:
1168 	result = SCAN_SUCCESS;
1169 
1170 	if (underflow && (0==nconversions)) {
1171 		scan_set_error_return( numVars, return_value );
1172 		result = SCAN_ERROR_EOF;
1173 	} else if (numVars) {
1174 		zval_ptr_dtor(return_value );
1175 		ZVAL_LONG(return_value, nconversions);
1176 	} else if (nconversions < totalVars) {
1177 		/* TODO: not all elements converted. we need to prune the list - cc */
1178 	}
1179 	return result;
1180 }
1181 /* }}} */
1182 
1183 /* the compiler choked when i tried to make this a macro    */
scan_set_error_return(int numVars,zval * return_value)1184 static inline void scan_set_error_return(int numVars, zval *return_value) /* {{{ */
1185 {
1186 	if (numVars) {
1187 		ZVAL_LONG(return_value, SCAN_ERROR_EOF);  /* EOF marker */
1188 	} else {
1189 		/* convert_to_null calls destructor */
1190 		convert_to_null(return_value);
1191 	}
1192 }
1193 /* }}} */
1194