xref: /PHP-8.0/ext/standard/scanf.c (revision 1b2ec73c)
1 /*
2    +----------------------------------------------------------------------+
3    | Copyright (c) The PHP Group                                          |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | http://www.php.net/license/3_01.txt                                  |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13    | Author: Clayton Collie <clcollie@mindspring.com>                     |
14    +----------------------------------------------------------------------+
15 */
16 
17 /*
18 	scanf.c --
19 
20 	This file contains the base code which implements sscanf and by extension
21 	fscanf. Original code is from TCL8.3.0 and bears the following copyright:
22 
23 	This software is copyrighted by the Regents of the University of
24 	California, Sun Microsystems, Inc., Scriptics Corporation,
25 	and other parties.  The following terms apply to all files associated
26 	with the software unless explicitly disclaimed in individual files.
27 
28 	The authors hereby grant permission to use, copy, modify, distribute,
29 	and license this software and its documentation for any purpose, provided
30 	that existing copyright notices are retained in all copies and that this
31 	notice is included verbatim in any distributions. No written agreement,
32 	license, or royalty fee is required for any of the authorized uses.
33 	Modifications to this software may be copyrighted by their authors
34 	and need not follow the licensing terms described here, provided that
35 	the new terms are clearly indicated on the first page of each file where
36 	they apply.
37 
38 	IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY
39 	FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
40 	ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY
41 	DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE
42 	POSSIBILITY OF SUCH DAMAGE.
43 
44 	THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
45 	INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY,
46 	FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT.  THIS SOFTWARE
47 	IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE
48 	NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
49 	MODIFICATIONS.
50 
51 	GOVERNMENT USE: If you are acquiring this software on behalf of the
52 	U.S. government, the Government shall have only "Restricted Rights"
53 	in the software and related documentation as defined in the Federal
54 	Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2).  If you
55 	are acquiring the software on behalf of the Department of Defense, the
56 	software shall be classified as "Commercial Computer Software" and the
57 	Government shall have only "Restricted Rights" as defined in Clause
58 	252.227-7013 (c) (1) of DFARs.  Notwithstanding the foregoing, the
59 	authors grant the U.S. Government and others acting in its behalf
60 	permission to use and distribute the software in accordance with the
61 	terms specified in this license.
62 */
63 
64 #include <stdio.h>
65 #include <limits.h>
66 #include <ctype.h>
67 #include "php.h"
68 #include "php_variables.h"
69 #include <locale.h>
70 #include "zend_execute.h"
71 #include "zend_operators.h"
72 #include "zend_strtod.h"
73 #include "php_globals.h"
74 #include "basic_functions.h"
75 #include "scanf.h"
76 
77 /*
78  * Flag values used internally by [f|s]canf.
79  */
80 #define SCAN_NOSKIP     0x1       /* Don't skip blanks. */
81 #define SCAN_SUPPRESS	0x2	  /* Suppress assignment. */
82 #define SCAN_UNSIGNED	0x4	  /* Read an unsigned value. */
83 #define SCAN_WIDTH      0x8       /* A width value was supplied. */
84 
85 #define SCAN_SIGNOK     0x10      /* A +/- character is allowed. */
86 #define SCAN_NODIGITS   0x20      /* No digits have been scanned. */
87 #define SCAN_NOZERO     0x40      /* No zero digits have been scanned. */
88 #define SCAN_XOK        0x80      /* An 'x' is allowed. */
89 #define SCAN_PTOK       0x100     /* Decimal point is allowed. */
90 #define SCAN_EXPOK      0x200     /* An exponent is allowed. */
91 
92 #define UCHAR(x)		(zend_uchar)(x)
93 
94 /*
95  * The following structure contains the information associated with
96  * a character set.
97  */
98 typedef struct CharSet {
99 	int exclude;		/* 1 if this is an exclusion set. */
100 	int nchars;
101 	char *chars;
102 	int nranges;
103 	struct Range {
104 		char start;
105 		char end;
106 	} *ranges;
107 } CharSet;
108 
109 /*
110  * Declarations for functions used only in this file.
111  */
112 static char *BuildCharSet(CharSet *cset, char *format);
113 static int	CharInSet(CharSet *cset, int ch);
114 static void	ReleaseCharSet(CharSet *cset);
115 static inline void scan_set_error_return(int numVars, zval *return_value);
116 
117 
118 /* {{{ BuildCharSet
119  *----------------------------------------------------------------------
120  *
121  * BuildCharSet --
122  *
123  *	This function examines a character set format specification
124  *	and builds a CharSet containing the individual characters and
125  *	character ranges specified.
126  *
127  * Results:
128  *	Returns the next format position.
129  *
130  * Side effects:
131  *	Initializes the charset.
132  *
133  *----------------------------------------------------------------------
134  */
BuildCharSet(CharSet * cset,char * format)135 static char * BuildCharSet(CharSet *cset, char *format)
136 {
137 	char *ch, start;
138 	int  nranges;
139 	char *end;
140 
141 	memset(cset, 0, sizeof(CharSet));
142 
143 	ch = format;
144 	if (*ch == '^') {
145 		cset->exclude = 1;
146 		ch = ++format;
147 	}
148 	end = format + 1;	/* verify this - cc */
149 
150 	/*
151 	 * Find the close bracket so we can overallocate the set.
152 	 */
153 	if (*ch == ']') {
154 		ch = end++;
155 	}
156 	nranges = 0;
157 	while (*ch != ']') {
158 		if (*ch == '-') {
159 			nranges++;
160 		}
161 		ch = end++;
162 	}
163 
164 	cset->chars = (char *) safe_emalloc(sizeof(char), (end - format - 1), 0);
165 	if (nranges > 0) {
166 		cset->ranges = (struct Range *) safe_emalloc(sizeof(struct Range), nranges, 0);
167 	} else {
168 		cset->ranges = NULL;
169 	}
170 
171 	/*
172 	 * Now build the character set.
173 	 */
174 	cset->nchars = cset->nranges = 0;
175 	ch    = format++;
176 	start = *ch;
177 	if (*ch == ']' || *ch == '-') {
178 		cset->chars[cset->nchars++] = *ch;
179 		ch = format++;
180 	}
181 	while (*ch != ']') {
182 		if (*format == '-') {
183 			/*
184 			 * This may be the first character of a range, so don't add
185 			 * it yet.
186 			 */
187 			start = *ch;
188 		} else if (*ch == '-') {
189 			/*
190 			 * Check to see if this is the last character in the set, in which
191 			 * case it is not a range and we should add the previous character
192 			 * as well as the dash.
193 			 */
194 			if (*format == ']') {
195 				cset->chars[cset->nchars++] = start;
196 				cset->chars[cset->nchars++] = *ch;
197 			} else {
198 				ch = format++;
199 
200 				/*
201 				 * Check to see if the range is in reverse order.
202 				 */
203 				if (start < *ch) {
204 					cset->ranges[cset->nranges].start = start;
205 					cset->ranges[cset->nranges].end = *ch;
206 				} else {
207 					cset->ranges[cset->nranges].start = *ch;
208 					cset->ranges[cset->nranges].end = start;
209 				}
210 				cset->nranges++;
211 			}
212 		} else {
213 			cset->chars[cset->nchars++] = *ch;
214 		}
215 		ch = format++;
216 	}
217 	return format;
218 }
219 /* }}} */
220 
221 /* {{{ CharInSet
222  *----------------------------------------------------------------------
223  *
224  * CharInSet --
225  *
226  *	Check to see if a character matches the given set.
227  *
228  * Results:
229  *	Returns non-zero if the character matches the given set.
230  *
231  * Side effects:
232  *	None.
233  *
234  *----------------------------------------------------------------------
235  */
CharInSet(CharSet * cset,int c)236 static int CharInSet(CharSet *cset, int c)
237 {
238 	char ch = (char) c;
239 	int i, match = 0;
240 
241 	for (i = 0; i < cset->nchars; i++) {
242 		if (cset->chars[i] == ch) {
243 			match = 1;
244 			break;
245 		}
246 	}
247 	if (!match) {
248 		for (i = 0; i < cset->nranges; i++) {
249 			if ((cset->ranges[i].start <= ch)
250 				&& (ch <= cset->ranges[i].end)) {
251 				match = 1;
252 				break;
253 			}
254 		}
255 	}
256 	return (cset->exclude ? !match : match);
257 }
258 /* }}} */
259 
260 /* {{{ ReleaseCharSet
261  *----------------------------------------------------------------------
262  *
263  * ReleaseCharSet --
264  *
265  *	Free the storage associated with a character set.
266  *
267  * Results:
268  *	None.
269  *
270  * Side effects:
271  *	None.
272  *
273  *----------------------------------------------------------------------
274  */
ReleaseCharSet(CharSet * cset)275 static void ReleaseCharSet(CharSet *cset)
276 {
277 	efree((char *)cset->chars);
278 	if (cset->ranges) {
279 		efree((char *)cset->ranges);
280 	}
281 }
282 /* }}} */
283 
284 /* {{{ ValidateFormat
285  *----------------------------------------------------------------------
286  *
287  * ValidateFormat --
288  *
289  *	Parse the format string and verify that it is properly formed
290  *	and that there are exactly enough variables on the command line.
291  *
292  * Results:
293  *    FAILURE or SUCCESS.
294  *
295  * Side effects:
296  *     May set php_error based on abnormal conditions.
297  *
298  * Parameters :
299  *     format     The format string.
300  *     numVars    The number of variables passed to the scan command.
301  *     totalSubs  The number of variables that will be required.
302  *
303  *----------------------------------------------------------------------
304 */
ValidateFormat(char * format,int numVars,int * totalSubs)305 PHPAPI int ValidateFormat(char *format, int numVars, int *totalSubs)
306 {
307 #define STATIC_LIST_SIZE 16
308 	int gotXpg, gotSequential, value, i, flags;
309 	char *end, *ch = NULL;
310 	int staticAssign[STATIC_LIST_SIZE];
311 	int *nassign = staticAssign;
312 	int objIndex, xpgSize, nspace = STATIC_LIST_SIZE;
313 
314 	/*
315 	 * Initialize an array that records the number of times a variable
316 	 * is assigned to by the format string.  We use this to detect if
317 	 * a variable is multiply assigned or left unassigned.
318 	 */
319 	if (numVars > nspace) {
320 		nassign = (int*)safe_emalloc(sizeof(int), numVars, 0);
321 		nspace = numVars;
322 	}
323 	for (i = 0; i < nspace; i++) {
324 		nassign[i] = 0;
325 	}
326 
327 	xpgSize = objIndex = gotXpg = gotSequential = 0;
328 
329 	while (*format != '\0') {
330 		ch = format++;
331 		flags = 0;
332 
333 		if (*ch != '%') {
334 			continue;
335 		}
336 		ch = format++;
337 		if (*ch == '%') {
338 			continue;
339 		}
340 		if (*ch == '*') {
341 			flags |= SCAN_SUPPRESS;
342 			ch = format++;
343 			goto xpgCheckDone;
344 		}
345 
346 		if ( isdigit( (int)*ch ) ) {
347 			/*
348 			 * Check for an XPG3-style %n$ specification.  Note: there
349 			 * must not be a mixture of XPG3 specs and non-XPG3 specs
350 			 * in the same format string.
351 			 */
352 			value = ZEND_STRTOUL(format-1, &end, 10);
353 			if (*end != '$') {
354 				goto notXpg;
355 			}
356 			format = end+1;
357 			ch     = format++;
358 			gotXpg = 1;
359 			if (gotSequential) {
360 				goto mixedXPG;
361 			}
362 			objIndex = value - 1;
363 			if ((objIndex < 0) || (numVars && (objIndex >= numVars))) {
364 				goto badIndex;
365 			} else if (numVars == 0) {
366 				/*
367 				 * In the case where no vars are specified, the user can
368 				 * specify %9999$ legally, so we have to consider special
369 				 * rules for growing the assign array.  'value' is
370 				 * guaranteed to be > 0.
371 				 */
372 
373 				/* set a lower artificial limit on this
374 				 * in the interest of security and resource friendliness
375 				 * 255 arguments should be more than enough. - cc
376 				 */
377 				if (value > SCAN_MAX_ARGS) {
378 					goto badIndex;
379 				}
380 
381 				xpgSize = (xpgSize > value) ? xpgSize : value;
382 			}
383 			goto xpgCheckDone;
384 		}
385 
386 notXpg:
387 		gotSequential = 1;
388 		if (gotXpg) {
389 mixedXPG:
390 			zend_value_error("%s", "cannot mix \"%\" and \"%n$\" conversion specifiers");
391 			goto error;
392 		}
393 
394 xpgCheckDone:
395 		/*
396 		 * Parse any width specifier.
397 		 */
398 		if (isdigit(UCHAR(*ch))) {
399 			value = ZEND_STRTOUL(format-1, &format, 10);
400 			flags |= SCAN_WIDTH;
401 			ch = format++;
402 		}
403 
404 		/*
405 		 * Ignore size specifier.
406 		 */
407 		if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
408 			ch = format++;
409 		}
410 
411 		if (!(flags & SCAN_SUPPRESS) && numVars && (objIndex >= numVars)) {
412 			goto badIndex;
413 		}
414 
415 		/*
416 		 * Handle the various field types.
417 		 */
418 		switch (*ch) {
419 			case 'n':
420 			case 'd':
421 			case 'D':
422 			case 'i':
423 			case 'o':
424 			case 'x':
425 			case 'X':
426 			case 'u':
427 			case 'f':
428 			case 'e':
429 			case 'E':
430 			case 'g':
431 			case 's':
432 				break;
433 
434 			case 'c':
435 				/* we differ here with the TCL implementation in allowing for */
436 				/* a character width specification, to be more consistent with */
437 				/* ANSI. since Zend auto allocates space for vars, this is no */
438 				/* problem - cc                                               */
439 				/*
440 				if (flags & SCAN_WIDTH) {
441 					php_error_docref(NULL, E_WARNING, "Field width may not be specified in %c conversion");
442 					goto error;
443 				}
444 				*/
445 				break;
446 
447 			case '[':
448 				if (*format == '\0') {
449 					goto badSet;
450 				}
451 				ch = format++;
452 				if (*ch == '^') {
453 					if (*format == '\0') {
454 						goto badSet;
455 					}
456 					ch = format++;
457 				}
458 				if (*ch == ']') {
459 					if (*format == '\0') {
460 						goto badSet;
461 					}
462 					ch = format++;
463 				}
464 				while (*ch != ']') {
465 					if (*format == '\0') {
466 						goto badSet;
467 					}
468 					ch = format++;
469 				}
470 				break;
471 badSet:
472 				zend_value_error("Unmatched [ in format string");
473 				goto error;
474 
475 			default: {
476 				zend_value_error("Bad scan conversion character \"%c\"", *ch);
477 				goto error;
478 			}
479 		}
480 
481 		if (!(flags & SCAN_SUPPRESS)) {
482 			if (objIndex >= nspace) {
483 				/*
484 				 * Expand the nassign buffer.  If we are using XPG specifiers,
485 				 * make sure that we grow to a large enough size.  xpgSize is
486 				 * guaranteed to be at least one larger than objIndex.
487 				 */
488 				value = nspace;
489 				if (xpgSize) {
490 					nspace = xpgSize;
491 				} else {
492 					nspace += STATIC_LIST_SIZE;
493 				}
494 				if (nassign == staticAssign) {
495 					nassign = (void *)safe_emalloc(nspace, sizeof(int), 0);
496 					for (i = 0; i < STATIC_LIST_SIZE; ++i) {
497 						nassign[i] = staticAssign[i];
498 					}
499 				} else {
500 					nassign = (void *)erealloc((void *)nassign, nspace * sizeof(int));
501 				}
502 				for (i = value; i < nspace; i++) {
503 					nassign[i] = 0;
504 				}
505 			}
506 			nassign[objIndex]++;
507 			objIndex++;
508 		}
509 	} /* while (*format != '\0') */
510 
511 	/*
512 	 * Verify that all of the variable were assigned exactly once.
513 	 */
514 	if (numVars == 0) {
515 		if (xpgSize) {
516 			numVars = xpgSize;
517 		} else {
518 			numVars = objIndex;
519 		}
520 	}
521 	if (totalSubs) {
522 		*totalSubs = numVars;
523 	}
524 	for (i = 0; i < numVars; i++) {
525 		if (nassign[i] > 1) {
526 			zend_value_error("%s", "Variable is assigned by multiple \"%n$\" conversion specifiers");
527 			goto error;
528 		} else if (!xpgSize && (nassign[i] == 0)) {
529 			/*
530 			 * If the space is empty, and xpgSize is 0 (means XPG wasn't
531 			 * used, and/or numVars != 0), then too many vars were given
532 			 */
533 			zend_value_error("Variable is not assigned by any conversion specifiers");
534 			goto error;
535 		}
536 	}
537 
538 	if (nassign != staticAssign) {
539 		efree((char *)nassign);
540 	}
541 	return SCAN_SUCCESS;
542 
543 badIndex:
544 	if (gotXpg) {
545 		zend_value_error("%s", "\"%n$\" argument index out of range");
546 	} else {
547 		zend_value_error("Different numbers of variable names and field specifiers");
548 	}
549 
550 error:
551 	if (nassign != staticAssign) {
552 		efree((char *)nassign);
553 	}
554 	return SCAN_ERROR_INVALID_FORMAT;
555 #undef STATIC_LIST_SIZE
556 }
557 /* }}} */
558 
559 /* {{{ php_sscanf_internal
560  * This is the internal function which does processing on behalf of
561  * both sscanf() and fscanf()
562  *
563  * parameters :
564  * 		string		literal string to be processed
565  * 		format		format string
566  *		argCount	total number of elements in the args array
567  *		args		arguments passed in from user function (f|s)scanf
568  * 		varStart	offset (in args) of 1st variable passed in to (f|s)scanf
569  *		return_value set with the results of the scan
570  */
571 
php_sscanf_internal(char * string,char * format,int argCount,zval * args,int varStart,zval * return_value)572 PHPAPI int php_sscanf_internal( char *string, char *format,
573 				int argCount, zval *args,
574 				int varStart, zval *return_value)
575 {
576 	int  numVars, nconversions, totalVars = -1;
577 	int  i, result;
578 	zend_long value;
579 	int  objIndex;
580 	char *end, *baseString;
581 	zval *current;
582 	char op   = 0;
583 	int  base = 0;
584 	int  underflow = 0;
585 	size_t width;
586 	zend_long (*fn)() = NULL;
587 	char *ch, sch;
588 	int  flags;
589 	char buf[64];	/* Temporary buffer to hold scanned number
590 					 * strings before they are passed to strtoul() */
591 
592 	/* do some sanity checking */
593 	if ((varStart > argCount) || (varStart < 0)){
594 		varStart = SCAN_MAX_ARGS + 1;
595 	}
596 	numVars = argCount - varStart;
597 	if (numVars < 0) {
598 		numVars = 0;
599 	}
600 
601 	/*
602 	 * Check for errors in the format string.
603 	 */
604 	if (ValidateFormat(format, numVars, &totalVars) != SCAN_SUCCESS) {
605 		scan_set_error_return( numVars, return_value );
606 		return SCAN_ERROR_INVALID_FORMAT;
607 	}
608 
609 	objIndex = numVars ? varStart : 0;
610 
611 	/*
612 	 * If any variables are passed, make sure they are all passed by reference
613 	 */
614 	if (numVars) {
615 		for (i = varStart;i < argCount;i++){
616 			ZEND_ASSERT(Z_ISREF(args[i]) && "Parameter must be passed by reference");
617 		}
618 	}
619 
620 	/*
621 	 * Allocate space for the result objects. Only happens when no variables
622 	 * are specified
623 	 */
624 	if (!numVars) {
625 		zval tmp;
626 
627 		/* allocate an array for return */
628 		array_init(return_value);
629 
630 		for (i = 0; i < totalVars; i++) {
631 			ZVAL_NULL(&tmp);
632 			if (add_next_index_zval(return_value, &tmp) == FAILURE) {
633 				scan_set_error_return(0, return_value);
634 				return FAILURE;
635 			}
636 		}
637 		varStart = 0; /* Array index starts from 0 */
638 	}
639 
640 	baseString = string;
641 
642 	/*
643 	 * Iterate over the format string filling in the result objects until
644 	 * we reach the end of input, the end of the format string, or there
645 	 * is a mismatch.
646 	 */
647 	nconversions = 0;
648 	/* note ! - we need to limit the loop for objIndex to keep it in bounds */
649 
650 	while (*format != '\0') {
651 		ch    = format++;
652 		flags = 0;
653 
654 		/*
655 		 * If we see whitespace in the format, skip whitespace in the string.
656 		 */
657 		if ( isspace( (int)*ch ) ) {
658 			sch = *string;
659 			while ( isspace( (int)sch ) ) {
660 				if (*string == '\0') {
661 					goto done;
662 				}
663 				string++;
664 				sch = *string;
665 			}
666 			continue;
667 		}
668 
669 		if (*ch != '%') {
670 literal:
671 			if (*string == '\0') {
672 				underflow = 1;
673 				goto done;
674 			}
675 			sch = *string;
676 			string++;
677 			if (*ch != sch) {
678 				goto done;
679 			}
680 			continue;
681 		}
682 
683 		ch = format++;
684 		if (*ch == '%') {
685 			goto literal;
686 		}
687 
688 		/*
689 		 * Check for assignment suppression ('*') or an XPG3-style
690 		 * assignment ('%n$').
691 		 */
692 		if (*ch == '*') {
693 			flags |= SCAN_SUPPRESS;
694 			ch = format++;
695 		} else if ( isdigit(UCHAR(*ch))) {
696 			value = ZEND_STRTOUL(format-1, &end, 10);
697 			if (*end == '$') {
698 				format = end+1;
699 				ch = format++;
700 				objIndex = varStart + value - 1;
701 			}
702 		}
703 
704 		/*
705 		 * Parse any width specifier.
706 		 */
707 		if ( isdigit(UCHAR(*ch))) {
708 			width = ZEND_STRTOUL(format-1, &format, 10);
709 			ch = format++;
710 		} else {
711 			width = 0;
712 		}
713 
714 		/*
715 		 * Ignore size specifier.
716 		 */
717 		if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
718 			ch = format++;
719 		}
720 
721 		/*
722 		 * Handle the various field types.
723 		 */
724 		switch (*ch) {
725 			case 'n':
726 				if (!(flags & SCAN_SUPPRESS)) {
727 					if (numVars && objIndex >= argCount) {
728 						break;
729 					} else if (numVars) {
730 						current = args + objIndex++;
731 						ZEND_TRY_ASSIGN_REF_LONG(current, (zend_long) (string - baseString));
732 					} else {
733 						add_index_long(return_value, objIndex++, string - baseString);
734 					}
735 				}
736 				nconversions++;
737 				continue;
738 
739 			case 'd':
740 			case 'D':
741 				op = 'i';
742 				base = 10;
743 				fn = (zend_long (*)())ZEND_STRTOL_PTR;
744 				break;
745 			case 'i':
746 				op = 'i';
747 				base = 0;
748 				fn = (zend_long (*)())ZEND_STRTOL_PTR;
749 				break;
750 			case 'o':
751 				op = 'i';
752 				base = 8;
753 				fn = (zend_long (*)())ZEND_STRTOL_PTR;
754 				break;
755 			case 'x':
756 			case 'X':
757 				op = 'i';
758 				base = 16;
759 				fn = (zend_long (*)())ZEND_STRTOL_PTR;
760 				break;
761 			case 'u':
762 				op = 'i';
763 				base = 10;
764 				flags |= SCAN_UNSIGNED;
765 				fn = (zend_long (*)())ZEND_STRTOUL_PTR;
766 				break;
767 
768 			case 'f':
769 			case 'e':
770 			case 'E':
771 			case 'g':
772 				op = 'f';
773 				break;
774 
775 			case 's':
776 				op = 's';
777 				break;
778 
779 			case 'c':
780 				op = 's';
781 				flags |= SCAN_NOSKIP;
782 				/*-cc-*/
783 				if (0 == width) {
784 					width = 1;
785 				}
786 				/*-cc-*/
787 				break;
788 			case '[':
789 				op = '[';
790 				flags |= SCAN_NOSKIP;
791 				break;
792 		}   /* switch */
793 
794 		/*
795 		 * At this point, we will need additional characters from the
796 		 * string to proceed.
797 		 */
798 		if (*string == '\0') {
799 			underflow = 1;
800 			goto done;
801 		}
802 
803 		/*
804 		 * Skip any leading whitespace at the beginning of a field unless
805 		 * the format suppresses this behavior.
806 		 */
807 		if (!(flags & SCAN_NOSKIP)) {
808 			while (*string != '\0') {
809 				sch = *string;
810 				if (! isspace((int)sch) ) {
811 					break;
812 				}
813 				string++;
814 			}
815 			if (*string == '\0') {
816 				underflow = 1;
817 				goto done;
818 			}
819 		}
820 
821 		/*
822 		 * Perform the requested scanning operation.
823 		 */
824 		switch (op) {
825 			case 'c':
826 			case 's':
827 				/*
828 				 * Scan a string up to width characters or whitespace.
829 				 */
830 				if (width == 0) {
831 					width = (size_t) ~0;
832 				}
833 				end = string;
834 				while (*end != '\0') {
835 					sch = *end;
836 					if ( isspace( (int)sch ) ) {
837 						break;
838 					}
839 					end++;
840 					if (--width == 0) {
841 					   break;
842 					}
843 				}
844 				if (!(flags & SCAN_SUPPRESS)) {
845 					if (numVars && objIndex >= argCount) {
846 						break;
847 					} else if (numVars) {
848 						current = args + objIndex++;
849 						ZEND_TRY_ASSIGN_REF_STRINGL(current, string, end - string);
850 					} else {
851 						add_index_stringl(return_value, objIndex++, string, end-string);
852 					}
853 				}
854 				string = end;
855 				break;
856 
857 			case '[': {
858 				CharSet cset;
859 
860 				if (width == 0) {
861 					width = (size_t) ~0;
862 				}
863 				end = string;
864 
865 				format = BuildCharSet(&cset, format);
866 				while (*end != '\0') {
867 					sch = *end;
868 					if (!CharInSet(&cset, (int)sch)) {
869 						break;
870 					}
871 					end++;
872 					if (--width == 0) {
873 						break;
874 					}
875 				}
876 				ReleaseCharSet(&cset);
877 
878 				if (string == end) {
879 					/*
880 					 * Nothing matched the range, stop processing
881 					 */
882 					goto done;
883 				}
884 				if (!(flags & SCAN_SUPPRESS)) {
885 					if (numVars && objIndex >= argCount) {
886 						break;
887 					} else if (numVars) {
888 						current = args + objIndex++;
889 						ZEND_TRY_ASSIGN_REF_STRINGL(current, string, end - string);
890 					} else {
891 						add_index_stringl(return_value, objIndex++, string, end-string);
892 					}
893 				}
894 				string = end;
895 				break;
896 			}
897 /*
898 			case 'c':
899 			   / Scan a single character./
900 
901 				sch = *string;
902 				string++;
903 				if (!(flags & SCAN_SUPPRESS)) {
904 					if (numVars) {
905 						char __buf[2];
906 						__buf[0] = sch;
907 						__buf[1] = '\0';
908 						current = args[objIndex++];
909 						zval_ptr_dtor_nogc(*current);
910 						ZVAL_STRINGL( *current, __buf, 1);
911 					} else {
912 						add_index_stringl(return_value, objIndex++, &sch, 1);
913 					}
914 				}
915 				break;
916 */
917 			case 'i':
918 				/*
919 				 * Scan an unsigned or signed integer.
920 				 */
921 				/*-cc-*/
922 				buf[0] = '\0';
923 				/*-cc-*/
924 				if ((width == 0) || (width > sizeof(buf) - 1)) {
925 					width = sizeof(buf) - 1;
926 				}
927 
928 				flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO;
929 				for (end = buf; width > 0; width--) {
930 					switch (*string) {
931 						/*
932 						 * The 0 digit has special meaning at the beginning of
933 						 * a number.  If we are unsure of the base, it
934 						 * indicates that we are in base 8 or base 16 (if it is
935 						 * followed by an 'x').
936 						 */
937 						case '0':
938 							/*-cc-*/
939 							if (base == 16) {
940 								flags |= SCAN_XOK;
941 							}
942 							/*-cc-*/
943 							if (base == 0) {
944 								base = 8;
945 								flags |= SCAN_XOK;
946 							}
947 							if (flags & SCAN_NOZERO) {
948 								flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO);
949 							} else {
950 								flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
951 							}
952 							goto addToInt;
953 
954 						case '1': case '2': case '3': case '4':
955 						case '5': case '6': case '7':
956 							if (base == 0) {
957 								base = 10;
958 							}
959 							flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
960 							goto addToInt;
961 
962 						case '8': case '9':
963 							if (base == 0) {
964 								base = 10;
965 							}
966 							if (base <= 8) {
967 							   break;
968 							}
969 							flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
970 							goto addToInt;
971 
972 						case 'A': case 'B': case 'C':
973 						case 'D': case 'E': case 'F':
974 						case 'a': case 'b': case 'c':
975 						case 'd': case 'e': case 'f':
976 							if (base <= 10) {
977 								break;
978 							}
979 							flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
980 							goto addToInt;
981 
982 						case '+': case '-':
983 							if (flags & SCAN_SIGNOK) {
984 								flags &= ~SCAN_SIGNOK;
985 								goto addToInt;
986 							}
987 							break;
988 
989 						case 'x': case 'X':
990 							if ((flags & SCAN_XOK) && (end == buf+1)) {
991 								base = 16;
992 								flags &= ~SCAN_XOK;
993 								goto addToInt;
994 							}
995 							break;
996 					}
997 
998 					/*
999 					 * We got an illegal character so we are done accumulating.
1000 					 */
1001 					break;
1002 
1003 addToInt:
1004 					/*
1005 					 * Add the character to the temporary buffer.
1006 					 */
1007 					*end++ = *string++;
1008 					if (*string == '\0') {
1009 						break;
1010 					}
1011 				}
1012 
1013 				/*
1014 				 * Check to see if we need to back up because we only got a
1015 				 * sign or a trailing x after a 0.
1016 				 */
1017 				if (flags & SCAN_NODIGITS) {
1018 					if (*string == '\0') {
1019 						underflow = 1;
1020 					}
1021 					goto done;
1022 				} else if (end[-1] == 'x' || end[-1] == 'X') {
1023 					end--;
1024 					string--;
1025 				}
1026 
1027 				/*
1028 				 * Scan the value from the temporary buffer.  If we are
1029 				 * returning a large unsigned value, we have to convert it back
1030 				 * to a string since PHP only supports signed values.
1031 				 */
1032 				if (!(flags & SCAN_SUPPRESS)) {
1033 					*end = '\0';
1034 					value = (zend_long) (*fn)(buf, NULL, base);
1035 					if ((flags & SCAN_UNSIGNED) && (value < 0)) {
1036 						snprintf(buf, sizeof(buf), ZEND_ULONG_FMT, value); /* INTL: ISO digit */
1037 						if (numVars && objIndex >= argCount) {
1038 							break;
1039 						} else if (numVars) {
1040 							 /* change passed value type to string */
1041 							current = args + objIndex++;
1042 							ZEND_TRY_ASSIGN_REF_STRING(current, buf);
1043 						} else {
1044 							add_index_string(return_value, objIndex++, buf);
1045 						}
1046 					} else {
1047 						if (numVars && objIndex >= argCount) {
1048 							break;
1049 						} else if (numVars) {
1050 							current = args + objIndex++;
1051 							ZEND_TRY_ASSIGN_REF_LONG(current, value);
1052 						} else {
1053 							add_index_long(return_value, objIndex++, value);
1054 						}
1055 					}
1056 				}
1057 				break;
1058 
1059 			case 'f':
1060 				/*
1061 				 * Scan a floating point number
1062 				 */
1063 				buf[0] = '\0';     /* call me pedantic */
1064 				if ((width == 0) || (width > sizeof(buf) - 1)) {
1065 					width = sizeof(buf) - 1;
1066 				}
1067 				flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_PTOK | SCAN_EXPOK;
1068 				for (end = buf; width > 0; width--) {
1069 					switch (*string) {
1070 						case '0': case '1': case '2': case '3':
1071 						case '4': case '5': case '6': case '7':
1072 						case '8': case '9':
1073 							flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS);
1074 							goto addToFloat;
1075 						case '+':
1076 						case '-':
1077 							if (flags & SCAN_SIGNOK) {
1078 								flags &= ~SCAN_SIGNOK;
1079 								goto addToFloat;
1080 							}
1081 							break;
1082 						case '.':
1083 							if (flags & SCAN_PTOK) {
1084 								flags &= ~(SCAN_SIGNOK | SCAN_PTOK);
1085 								goto addToFloat;
1086 							}
1087 							break;
1088 						case 'e':
1089 						case 'E':
1090 							/*
1091 							 * An exponent is not allowed until there has
1092 							 * been at least one digit.
1093 							 */
1094 							if ((flags & (SCAN_NODIGITS | SCAN_EXPOK)) == SCAN_EXPOK) {
1095 								flags = (flags & ~(SCAN_EXPOK|SCAN_PTOK))
1096 									| SCAN_SIGNOK | SCAN_NODIGITS;
1097 								goto addToFloat;
1098 							}
1099 							break;
1100 					}
1101 
1102 					/*
1103 					 * We got an illegal character so we are done accumulating.
1104 					 */
1105 					break;
1106 
1107 addToFloat:
1108 					/*
1109 					 * Add the character to the temporary buffer.
1110 					 */
1111 					*end++ = *string++;
1112 					if (*string == '\0') {
1113 						break;
1114 					}
1115 				}
1116 
1117 				/*
1118 				 * Check to see if we need to back up because we saw a
1119 				 * trailing 'e' or sign.
1120 				 */
1121 				if (flags & SCAN_NODIGITS) {
1122 					if (flags & SCAN_EXPOK) {
1123 						/*
1124 						 * There were no digits at all so scanning has
1125 						 * failed and we are done.
1126 						 */
1127 						if (*string == '\0') {
1128 							underflow = 1;
1129 						}
1130 						goto done;
1131 					}
1132 
1133 					/*
1134 					 * We got a bad exponent ('e' and maybe a sign).
1135 					 */
1136 					end--;
1137 					string--;
1138 					if (*end != 'e' && *end != 'E') {
1139 						end--;
1140 						string--;
1141 					}
1142 				}
1143 
1144 				/*
1145 				 * Scan the value from the temporary buffer.
1146 				 */
1147 				if (!(flags & SCAN_SUPPRESS)) {
1148 					double dvalue;
1149 					*end = '\0';
1150 					dvalue = zend_strtod(buf, NULL);
1151 					if (numVars && objIndex >= argCount) {
1152 						break;
1153 					} else if (numVars) {
1154 						current = args + objIndex++;
1155 						ZEND_TRY_ASSIGN_REF_DOUBLE(current, dvalue);
1156 					} else {
1157 						add_index_double(return_value, objIndex++, dvalue );
1158 					}
1159 				}
1160 				break;
1161 		} /* switch (op) */
1162 		nconversions++;
1163 	} /*  while (*format != '\0') */
1164 
1165 done:
1166 	result = SCAN_SUCCESS;
1167 
1168 	if (underflow && (0==nconversions)) {
1169 		scan_set_error_return( numVars, return_value );
1170 		result = SCAN_ERROR_EOF;
1171 	} else if (numVars) {
1172 		zval_ptr_dtor(return_value );
1173 		ZVAL_LONG(return_value, nconversions);
1174 	} else if (nconversions < totalVars) {
1175 		/* TODO: not all elements converted. we need to prune the list - cc */
1176 	}
1177 	return result;
1178 }
1179 /* }}} */
1180 
1181 /* the compiler choked when i tried to make this a macro    */
scan_set_error_return(int numVars,zval * return_value)1182 static inline void scan_set_error_return(int numVars, zval *return_value) /* {{{ */
1183 {
1184 	if (numVars) {
1185 		ZVAL_LONG(return_value, SCAN_ERROR_EOF);  /* EOF marker */
1186 	} else {
1187 		/* convert_to_null calls destructor */
1188 		convert_to_null(return_value);
1189 	}
1190 }
1191 /* }}} */
1192