xref: /PHP-7.4/ext/standard/scanf.c (revision e188e417)
1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 7                                                        |
4    +----------------------------------------------------------------------+
5    | Copyright (c) The PHP Group                                          |
6    +----------------------------------------------------------------------+
7    | This source file is subject to version 3.01 of the PHP license,      |
8    | that is bundled with this package in the file LICENSE, and is        |
9    | available through the world-wide-web at the following url:           |
10    | http://www.php.net/license/3_01.txt                                  |
11    | If you did not receive a copy of the PHP license and are unable to   |
12    | obtain it through the world-wide-web, please send a note to          |
13    | license@php.net so we can mail you a copy immediately.               |
14    +----------------------------------------------------------------------+
15    | Author: Clayton Collie <clcollie@mindspring.com>                     |
16    +----------------------------------------------------------------------+
17 */
18 
19 /*
20 	scanf.c --
21 
22 	This file contains the base code which implements sscanf and by extension
23 	fscanf. Original code is from TCL8.3.0 and bears the following copyright:
24 
25 	This software is copyrighted by the Regents of the University of
26 	California, Sun Microsystems, Inc., Scriptics Corporation,
27 	and other parties.  The following terms apply to all files associated
28 	with the software unless explicitly disclaimed in individual files.
29 
30 	The authors hereby grant permission to use, copy, modify, distribute,
31 	and license this software and its documentation for any purpose, provided
32 	that existing copyright notices are retained in all copies and that this
33 	notice is included verbatim in any distributions. No written agreement,
34 	license, or royalty fee is required for any of the authorized uses.
35 	Modifications to this software may be copyrighted by their authors
36 	and need not follow the licensing terms described here, provided that
37 	the new terms are clearly indicated on the first page of each file where
38 	they apply.
39 
40 	IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY
41 	FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
42 	ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY
43 	DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE
44 	POSSIBILITY OF SUCH DAMAGE.
45 
46 	THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
47 	INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY,
48 	FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT.  THIS SOFTWARE
49 	IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE
50 	NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
51 	MODIFICATIONS.
52 
53 	GOVERNMENT USE: If you are acquiring this software on behalf of the
54 	U.S. government, the Government shall have only "Restricted Rights"
55 	in the software and related documentation as defined in the Federal
56 	Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2).  If you
57 	are acquiring the software on behalf of the Department of Defense, the
58 	software shall be classified as "Commercial Computer Software" and the
59 	Government shall have only "Restricted Rights" as defined in Clause
60 	252.227-7013 (c) (1) of DFARs.  Notwithstanding the foregoing, the
61 	authors grant the U.S. Government and others acting in its behalf
62 	permission to use and distribute the software in accordance with the
63 	terms specified in this license.
64 */
65 
66 #include <stdio.h>
67 #include <limits.h>
68 #include <ctype.h>
69 #include "php.h"
70 #include "php_variables.h"
71 #include <locale.h>
72 #include "zend_execute.h"
73 #include "zend_operators.h"
74 #include "zend_strtod.h"
75 #include "php_globals.h"
76 #include "basic_functions.h"
77 #include "scanf.h"
78 
79 /*
80  * Flag values used internally by [f|s]canf.
81  */
82 #define SCAN_NOSKIP     0x1       /* Don't skip blanks. */
83 #define SCAN_SUPPRESS	0x2	  /* Suppress assignment. */
84 #define SCAN_UNSIGNED	0x4	  /* Read an unsigned value. */
85 #define SCAN_WIDTH      0x8       /* A width value was supplied. */
86 
87 #define SCAN_SIGNOK     0x10      /* A +/- character is allowed. */
88 #define SCAN_NODIGITS   0x20      /* No digits have been scanned. */
89 #define SCAN_NOZERO     0x40      /* No zero digits have been scanned. */
90 #define SCAN_XOK        0x80      /* An 'x' is allowed. */
91 #define SCAN_PTOK       0x100     /* Decimal point is allowed. */
92 #define SCAN_EXPOK      0x200     /* An exponent is allowed. */
93 
94 #define UCHAR(x)		(zend_uchar)(x)
95 
96 /*
97  * The following structure contains the information associated with
98  * a character set.
99  */
100 typedef struct CharSet {
101 	int exclude;		/* 1 if this is an exclusion set. */
102 	int nchars;
103 	char *chars;
104 	int nranges;
105 	struct Range {
106 		char start;
107 		char end;
108 	} *ranges;
109 } CharSet;
110 
111 /*
112  * Declarations for functions used only in this file.
113  */
114 static char *BuildCharSet(CharSet *cset, char *format);
115 static int	CharInSet(CharSet *cset, int ch);
116 static void	ReleaseCharSet(CharSet *cset);
117 static inline void scan_set_error_return(int numVars, zval *return_value);
118 
119 
120 /* {{{ BuildCharSet
121  *----------------------------------------------------------------------
122  *
123  * BuildCharSet --
124  *
125  *	This function examines a character set format specification
126  *	and builds a CharSet containing the individual characters and
127  *	character ranges specified.
128  *
129  * Results:
130  *	Returns the next format position.
131  *
132  * Side effects:
133  *	Initializes the charset.
134  *
135  *----------------------------------------------------------------------
136  */
BuildCharSet(CharSet * cset,char * format)137 static char * BuildCharSet(CharSet *cset, char *format)
138 {
139 	char *ch, start;
140 	int  nranges;
141 	char *end;
142 
143 	memset(cset, 0, sizeof(CharSet));
144 
145 	ch = format;
146 	if (*ch == '^') {
147 		cset->exclude = 1;
148 		ch = ++format;
149 	}
150 	end = format + 1;	/* verify this - cc */
151 
152 	/*
153 	 * Find the close bracket so we can overallocate the set.
154 	 */
155 	if (*ch == ']') {
156 		ch = end++;
157 	}
158 	nranges = 0;
159 	while (*ch != ']') {
160 		if (*ch == '-') {
161 			nranges++;
162 		}
163 		ch = end++;
164 	}
165 
166 	cset->chars = (char *) safe_emalloc(sizeof(char), (end - format - 1), 0);
167 	if (nranges > 0) {
168 		cset->ranges = (struct Range *) safe_emalloc(sizeof(struct Range), nranges, 0);
169 	} else {
170 		cset->ranges = NULL;
171 	}
172 
173 	/*
174 	 * Now build the character set.
175 	 */
176 	cset->nchars = cset->nranges = 0;
177 	ch    = format++;
178 	start = *ch;
179 	if (*ch == ']' || *ch == '-') {
180 		cset->chars[cset->nchars++] = *ch;
181 		ch = format++;
182 	}
183 	while (*ch != ']') {
184 		if (*format == '-') {
185 			/*
186 			 * This may be the first character of a range, so don't add
187 			 * it yet.
188 			 */
189 			start = *ch;
190 		} else if (*ch == '-') {
191 			/*
192 			 * Check to see if this is the last character in the set, in which
193 			 * case it is not a range and we should add the previous character
194 			 * as well as the dash.
195 			 */
196 			if (*format == ']') {
197 				cset->chars[cset->nchars++] = start;
198 				cset->chars[cset->nchars++] = *ch;
199 			} else {
200 				ch = format++;
201 
202 				/*
203 				 * Check to see if the range is in reverse order.
204 				 */
205 				if (start < *ch) {
206 					cset->ranges[cset->nranges].start = start;
207 					cset->ranges[cset->nranges].end = *ch;
208 				} else {
209 					cset->ranges[cset->nranges].start = *ch;
210 					cset->ranges[cset->nranges].end = start;
211 				}
212 				cset->nranges++;
213 			}
214 		} else {
215 			cset->chars[cset->nchars++] = *ch;
216 		}
217 		ch = format++;
218 	}
219 	return format;
220 }
221 /* }}} */
222 
223 /* {{{ CharInSet
224  *----------------------------------------------------------------------
225  *
226  * CharInSet --
227  *
228  *	Check to see if a character matches the given set.
229  *
230  * Results:
231  *	Returns non-zero if the character matches the given set.
232  *
233  * Side effects:
234  *	None.
235  *
236  *----------------------------------------------------------------------
237  */
CharInSet(CharSet * cset,int c)238 static int CharInSet(CharSet *cset, int c)
239 {
240 	char ch = (char) c;
241 	int i, match = 0;
242 
243 	for (i = 0; i < cset->nchars; i++) {
244 		if (cset->chars[i] == ch) {
245 			match = 1;
246 			break;
247 		}
248 	}
249 	if (!match) {
250 		for (i = 0; i < cset->nranges; i++) {
251 			if ((cset->ranges[i].start <= ch)
252 				&& (ch <= cset->ranges[i].end)) {
253 				match = 1;
254 				break;
255 			}
256 		}
257 	}
258 	return (cset->exclude ? !match : match);
259 }
260 /* }}} */
261 
262 /* {{{ ReleaseCharSet
263  *----------------------------------------------------------------------
264  *
265  * ReleaseCharSet --
266  *
267  *	Free the storage associated with a character set.
268  *
269  * Results:
270  *	None.
271  *
272  * Side effects:
273  *	None.
274  *
275  *----------------------------------------------------------------------
276  */
ReleaseCharSet(CharSet * cset)277 static void ReleaseCharSet(CharSet *cset)
278 {
279 	efree((char *)cset->chars);
280 	if (cset->ranges) {
281 		efree((char *)cset->ranges);
282 	}
283 }
284 /* }}} */
285 
286 /* {{{ ValidateFormat
287  *----------------------------------------------------------------------
288  *
289  * ValidateFormat --
290  *
291  *	Parse the format string and verify that it is properly formed
292  *	and that there are exactly enough variables on the command line.
293  *
294  * Results:
295  *    FAILURE or SUCCESS.
296  *
297  * Side effects:
298  *     May set php_error based on abnormal conditions.
299  *
300  * Parameters :
301  *     format     The format string.
302  *     numVars    The number of variables passed to the scan command.
303  *     totalSubs  The number of variables that will be required.
304  *
305  *----------------------------------------------------------------------
306 */
ValidateFormat(char * format,int numVars,int * totalSubs)307 PHPAPI int ValidateFormat(char *format, int numVars, int *totalSubs)
308 {
309 #define STATIC_LIST_SIZE 16
310 	int gotXpg, gotSequential, value, i, flags;
311 	char *end, *ch = NULL;
312 	int staticAssign[STATIC_LIST_SIZE];
313 	int *nassign = staticAssign;
314 	int objIndex, xpgSize, nspace = STATIC_LIST_SIZE;
315 
316 	/*
317 	 * Initialize an array that records the number of times a variable
318 	 * is assigned to by the format string.  We use this to detect if
319 	 * a variable is multiply assigned or left unassigned.
320 	 */
321 	if (numVars > nspace) {
322 		nassign = (int*)safe_emalloc(sizeof(int), numVars, 0);
323 		nspace = numVars;
324 	}
325 	for (i = 0; i < nspace; i++) {
326 		nassign[i] = 0;
327 	}
328 
329 	xpgSize = objIndex = gotXpg = gotSequential = 0;
330 
331 	while (*format != '\0') {
332 		ch = format++;
333 		flags = 0;
334 
335 		if (*ch != '%') {
336 			continue;
337 		}
338 		ch = format++;
339 		if (*ch == '%') {
340 			continue;
341 		}
342 		if (*ch == '*') {
343 			flags |= SCAN_SUPPRESS;
344 			ch = format++;
345 			goto xpgCheckDone;
346 		}
347 
348 		if ( isdigit( (int)*ch ) ) {
349 			/*
350 			 * Check for an XPG3-style %n$ specification.  Note: there
351 			 * must not be a mixture of XPG3 specs and non-XPG3 specs
352 			 * in the same format string.
353 			 */
354 			value = ZEND_STRTOUL(format-1, &end, 10);
355 			if (*end != '$') {
356 				goto notXpg;
357 			}
358 			format = end+1;
359 			ch     = format++;
360 			gotXpg = 1;
361 			if (gotSequential) {
362 				goto mixedXPG;
363 			}
364 			objIndex = value - 1;
365 			if ((objIndex < 0) || (numVars && (objIndex >= numVars))) {
366 				goto badIndex;
367 			} else if (numVars == 0) {
368 				/*
369 				 * In the case where no vars are specified, the user can
370 				 * specify %9999$ legally, so we have to consider special
371 				 * rules for growing the assign array.  'value' is
372 				 * guaranteed to be > 0.
373 				 */
374 
375 				/* set a lower artificial limit on this
376 				 * in the interest of security and resource friendliness
377 				 * 255 arguments should be more than enough. - cc
378 				 */
379 				if (value > SCAN_MAX_ARGS) {
380 					goto badIndex;
381 				}
382 
383 				xpgSize = (xpgSize > value) ? xpgSize : value;
384 			}
385 			goto xpgCheckDone;
386 		}
387 
388 notXpg:
389 		gotSequential = 1;
390 		if (gotXpg) {
391 mixedXPG:
392 			php_error_docref(NULL, E_WARNING, "%s", "cannot mix \"%\" and \"%n$\" conversion specifiers");
393 			goto error;
394 		}
395 
396 xpgCheckDone:
397 		/*
398 		 * Parse any width specifier.
399 		 */
400 		if (isdigit(UCHAR(*ch))) {
401 			value = ZEND_STRTOUL(format-1, &format, 10);
402 			flags |= SCAN_WIDTH;
403 			ch = format++;
404 		}
405 
406 		/*
407 		 * Ignore size specifier.
408 		 */
409 		if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
410 			ch = format++;
411 		}
412 
413 		if (!(flags & SCAN_SUPPRESS) && numVars && (objIndex >= numVars)) {
414 			goto badIndex;
415 		}
416 
417 		/*
418 		 * Handle the various field types.
419 		 */
420 		switch (*ch) {
421 			case 'n':
422 			case 'd':
423 			case 'D':
424 			case 'i':
425 			case 'o':
426 			case 'x':
427 			case 'X':
428 			case 'u':
429 			case 'f':
430 			case 'e':
431 			case 'E':
432 			case 'g':
433 			case 's':
434 				break;
435 
436 			case 'c':
437 				/* we differ here with the TCL implementation in allowing for */
438 				/* a character width specification, to be more consistent with */
439 				/* ANSI. since Zend auto allocates space for vars, this is no */
440 				/* problem - cc                                               */
441 				/*
442 				if (flags & SCAN_WIDTH) {
443 					php_error_docref(NULL, E_WARNING, "Field width may not be specified in %c conversion");
444 					goto error;
445 				}
446 				*/
447 				break;
448 
449 			case '[':
450 				if (*format == '\0') {
451 					goto badSet;
452 				}
453 				ch = format++;
454 				if (*ch == '^') {
455 					if (*format == '\0') {
456 						goto badSet;
457 					}
458 					ch = format++;
459 				}
460 				if (*ch == ']') {
461 					if (*format == '\0') {
462 						goto badSet;
463 					}
464 					ch = format++;
465 				}
466 				while (*ch != ']') {
467 					if (*format == '\0') {
468 						goto badSet;
469 					}
470 					ch = format++;
471 				}
472 				break;
473 badSet:
474 				php_error_docref(NULL, E_WARNING, "Unmatched [ in format string");
475 				goto error;
476 
477 			default: {
478 				php_error_docref(NULL, E_WARNING, "Bad scan conversion character \"%c\"", *ch);
479 				goto error;
480 			}
481 		}
482 
483 		if (!(flags & SCAN_SUPPRESS)) {
484 			if (objIndex >= nspace) {
485 				/*
486 				 * Expand the nassign buffer.  If we are using XPG specifiers,
487 				 * make sure that we grow to a large enough size.  xpgSize is
488 				 * guaranteed to be at least one larger than objIndex.
489 				 */
490 				value = nspace;
491 				if (xpgSize) {
492 					nspace = xpgSize;
493 				} else {
494 					nspace += STATIC_LIST_SIZE;
495 				}
496 				if (nassign == staticAssign) {
497 					nassign = (void *)safe_emalloc(nspace, sizeof(int), 0);
498 					for (i = 0; i < STATIC_LIST_SIZE; ++i) {
499 						nassign[i] = staticAssign[i];
500 					}
501 				} else {
502 					nassign = (void *)erealloc((void *)nassign, nspace * sizeof(int));
503 				}
504 				for (i = value; i < nspace; i++) {
505 					nassign[i] = 0;
506 				}
507 			}
508 			nassign[objIndex]++;
509 			objIndex++;
510 		}
511 	} /* while (*format != '\0') */
512 
513 	/*
514 	 * Verify that all of the variable were assigned exactly once.
515 	 */
516 	if (numVars == 0) {
517 		if (xpgSize) {
518 			numVars = xpgSize;
519 		} else {
520 			numVars = objIndex;
521 		}
522 	}
523 	if (totalSubs) {
524 		*totalSubs = numVars;
525 	}
526 	for (i = 0; i < numVars; i++) {
527 		if (nassign[i] > 1) {
528 			php_error_docref(NULL, E_WARNING, "%s", "Variable is assigned by multiple \"%n$\" conversion specifiers");
529 			goto error;
530 		} else if (!xpgSize && (nassign[i] == 0)) {
531 			/*
532 			 * If the space is empty, and xpgSize is 0 (means XPG wasn't
533 			 * used, and/or numVars != 0), then too many vars were given
534 			 */
535 			php_error_docref(NULL, E_WARNING, "Variable is not assigned by any conversion specifiers");
536 			goto error;
537 		}
538 	}
539 
540 	if (nassign != staticAssign) {
541 		efree((char *)nassign);
542 	}
543 	return SCAN_SUCCESS;
544 
545 badIndex:
546 	if (gotXpg) {
547 		php_error_docref(NULL, E_WARNING, "%s", "\"%n$\" argument index out of range");
548 	} else {
549 		php_error_docref(NULL, E_WARNING, "Different numbers of variable names and field specifiers");
550 	}
551 
552 error:
553 	if (nassign != staticAssign) {
554 		efree((char *)nassign);
555 	}
556 	return SCAN_ERROR_INVALID_FORMAT;
557 #undef STATIC_LIST_SIZE
558 }
559 /* }}} */
560 
561 /* {{{ php_sscanf_internal
562  * This is the internal function which does processing on behalf of
563  * both sscanf() and fscanf()
564  *
565  * parameters :
566  * 		string		literal string to be processed
567  * 		format		format string
568  *		argCount	total number of elements in the args array
569  *		args		arguments passed in from user function (f|s)scanf
570  * 		varStart	offset (in args) of 1st variable passed in to (f|s)scanf
571  *		return_value set with the results of the scan
572  */
573 
php_sscanf_internal(char * string,char * format,int argCount,zval * args,int varStart,zval * return_value)574 PHPAPI int php_sscanf_internal( char *string, char *format,
575 				int argCount, zval *args,
576 				int varStart, zval *return_value)
577 {
578 	int  numVars, nconversions, totalVars = -1;
579 	int  i, result;
580 	zend_long value;
581 	int  objIndex;
582 	char *end, *baseString;
583 	zval *current;
584 	char op   = 0;
585 	int  base = 0;
586 	int  underflow = 0;
587 	size_t width;
588 	zend_long (*fn)() = NULL;
589 	char *ch, sch;
590 	int  flags;
591 	char buf[64];	/* Temporary buffer to hold scanned number
592 					 * strings before they are passed to strtoul() */
593 
594 	/* do some sanity checking */
595 	if ((varStart > argCount) || (varStart < 0)){
596 		varStart = SCAN_MAX_ARGS + 1;
597 	}
598 	numVars = argCount - varStart;
599 	if (numVars < 0) {
600 		numVars = 0;
601 	}
602 
603 #if 0
604 	zend_printf("<br>in sscanf_internal : <br> string is \"%s\", format = \"%s\"<br> NumVars = %d. VarStart = %d<br>-------------------------<br>",
605 					string, format, numVars, varStart);
606 #endif
607 	/*
608 	 * Check for errors in the format string.
609 	 */
610 	if (ValidateFormat(format, numVars, &totalVars) != SCAN_SUCCESS) {
611 		scan_set_error_return( numVars, return_value );
612 		return SCAN_ERROR_INVALID_FORMAT;
613 	}
614 
615 	objIndex = numVars ? varStart : 0;
616 
617 	/*
618 	 * If any variables are passed, make sure they are all passed by reference
619 	 */
620 	if (numVars) {
621 		for (i = varStart;i < argCount;i++){
622 			if ( ! Z_ISREF(args[ i ] ) ) {
623 				php_error_docref(NULL, E_WARNING, "Parameter %d must be passed by reference", i);
624 				scan_set_error_return(numVars, return_value);
625 				return SCAN_ERROR_VAR_PASSED_BYVAL;
626 			}
627 		}
628 	}
629 
630 	/*
631 	 * Allocate space for the result objects. Only happens when no variables
632 	 * are specified
633 	 */
634 	if (!numVars) {
635 		zval tmp;
636 
637 		/* allocate an array for return */
638 		array_init(return_value);
639 
640 		for (i = 0; i < totalVars; i++) {
641 			ZVAL_NULL(&tmp);
642 			if (add_next_index_zval(return_value, &tmp) == FAILURE) {
643 				scan_set_error_return(0, return_value);
644 				return FAILURE;
645 			}
646 		}
647 		varStart = 0; /* Array index starts from 0 */
648 	}
649 
650 	baseString = string;
651 
652 	/*
653 	 * Iterate over the format string filling in the result objects until
654 	 * we reach the end of input, the end of the format string, or there
655 	 * is a mismatch.
656 	 */
657 	nconversions = 0;
658 	/* note ! - we need to limit the loop for objIndex to keep it in bounds */
659 
660 	while (*format != '\0') {
661 		ch    = format++;
662 		flags = 0;
663 
664 		/*
665 		 * If we see whitespace in the format, skip whitespace in the string.
666 		 */
667 		if ( isspace( (int)*ch ) ) {
668 			sch = *string;
669 			while ( isspace( (int)sch ) ) {
670 				if (*string == '\0') {
671 					goto done;
672 				}
673 				string++;
674 				sch = *string;
675 			}
676 			continue;
677 		}
678 
679 		if (*ch != '%') {
680 literal:
681 			if (*string == '\0') {
682 				underflow = 1;
683 				goto done;
684 			}
685 			sch = *string;
686 			string++;
687 			if (*ch != sch) {
688 				goto done;
689 			}
690 			continue;
691 		}
692 
693 		ch = format++;
694 		if (*ch == '%') {
695 			goto literal;
696 		}
697 
698 		/*
699 		 * Check for assignment suppression ('*') or an XPG3-style
700 		 * assignment ('%n$').
701 		 */
702 		if (*ch == '*') {
703 			flags |= SCAN_SUPPRESS;
704 			ch = format++;
705 		} else if ( isdigit(UCHAR(*ch))) {
706 			value = ZEND_STRTOUL(format-1, &end, 10);
707 			if (*end == '$') {
708 				format = end+1;
709 				ch = format++;
710 				objIndex = varStart + value - 1;
711 			}
712 		}
713 
714 		/*
715 		 * Parse any width specifier.
716 		 */
717 		if ( isdigit(UCHAR(*ch))) {
718 			width = ZEND_STRTOUL(format-1, &format, 10);
719 			ch = format++;
720 		} else {
721 			width = 0;
722 		}
723 
724 		/*
725 		 * Ignore size specifier.
726 		 */
727 		if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
728 			ch = format++;
729 		}
730 
731 		/*
732 		 * Handle the various field types.
733 		 */
734 		switch (*ch) {
735 			case 'n':
736 				if (!(flags & SCAN_SUPPRESS)) {
737 					if (numVars && objIndex >= argCount) {
738 						break;
739 					} else if (numVars) {
740 						current = args + objIndex++;
741 						ZEND_TRY_ASSIGN_REF_LONG(current, (zend_long) (string - baseString));
742 					} else {
743 						add_index_long(return_value, objIndex++, string - baseString);
744 					}
745 				}
746 				nconversions++;
747 				continue;
748 
749 			case 'd':
750 			case 'D':
751 				op = 'i';
752 				base = 10;
753 				fn = (zend_long (*)())ZEND_STRTOL_PTR;
754 				break;
755 			case 'i':
756 				op = 'i';
757 				base = 0;
758 				fn = (zend_long (*)())ZEND_STRTOL_PTR;
759 				break;
760 			case 'o':
761 				op = 'i';
762 				base = 8;
763 				fn = (zend_long (*)())ZEND_STRTOL_PTR;
764 				break;
765 			case 'x':
766 			case 'X':
767 				op = 'i';
768 				base = 16;
769 				fn = (zend_long (*)())ZEND_STRTOL_PTR;
770 				break;
771 			case 'u':
772 				op = 'i';
773 				base = 10;
774 				flags |= SCAN_UNSIGNED;
775 				fn = (zend_long (*)())ZEND_STRTOUL_PTR;
776 				break;
777 
778 			case 'f':
779 			case 'e':
780 			case 'E':
781 			case 'g':
782 				op = 'f';
783 				break;
784 
785 			case 's':
786 				op = 's';
787 				break;
788 
789 			case 'c':
790 				op = 's';
791 				flags |= SCAN_NOSKIP;
792 				/*-cc-*/
793 				if (0 == width) {
794 					width = 1;
795 				}
796 				/*-cc-*/
797 				break;
798 			case '[':
799 				op = '[';
800 				flags |= SCAN_NOSKIP;
801 				break;
802 		}   /* switch */
803 
804 		/*
805 		 * At this point, we will need additional characters from the
806 		 * string to proceed.
807 		 */
808 		if (*string == '\0') {
809 			underflow = 1;
810 			goto done;
811 		}
812 
813 		/*
814 		 * Skip any leading whitespace at the beginning of a field unless
815 		 * the format suppresses this behavior.
816 		 */
817 		if (!(flags & SCAN_NOSKIP)) {
818 			while (*string != '\0') {
819 				sch = *string;
820 				if (! isspace((int)sch) ) {
821 					break;
822 				}
823 				string++;
824 			}
825 			if (*string == '\0') {
826 				underflow = 1;
827 				goto done;
828 			}
829 		}
830 
831 		/*
832 		 * Perform the requested scanning operation.
833 		 */
834 		switch (op) {
835 			case 'c':
836 			case 's':
837 				/*
838 				 * Scan a string up to width characters or whitespace.
839 				 */
840 				if (width == 0) {
841 					width = (size_t) ~0;
842 				}
843 				end = string;
844 				while (*end != '\0') {
845 					sch = *end;
846 					if ( isspace( (int)sch ) ) {
847 						break;
848 					}
849 					end++;
850 					if (--width == 0) {
851 					   break;
852 					}
853 				}
854 				if (!(flags & SCAN_SUPPRESS)) {
855 					if (numVars && objIndex >= argCount) {
856 						break;
857 					} else if (numVars) {
858 						current = args + objIndex++;
859 						ZEND_TRY_ASSIGN_REF_STRINGL(current, string, end - string);
860 					} else {
861 						add_index_stringl(return_value, objIndex++, string, end-string);
862 					}
863 				}
864 				string = end;
865 				break;
866 
867 			case '[': {
868 				CharSet cset;
869 
870 				if (width == 0) {
871 					width = (size_t) ~0;
872 				}
873 				end = string;
874 
875 				format = BuildCharSet(&cset, format);
876 				while (*end != '\0') {
877 					sch = *end;
878 					if (!CharInSet(&cset, (int)sch)) {
879 						break;
880 					}
881 					end++;
882 					if (--width == 0) {
883 						break;
884 					}
885 				}
886 				ReleaseCharSet(&cset);
887 
888 				if (string == end) {
889 					/*
890 					 * Nothing matched the range, stop processing
891 					 */
892 					goto done;
893 				}
894 				if (!(flags & SCAN_SUPPRESS)) {
895 					if (numVars && objIndex >= argCount) {
896 						break;
897 					} else if (numVars) {
898 						current = args + objIndex++;
899 						ZEND_TRY_ASSIGN_REF_STRINGL(current, string, end - string);
900 					} else {
901 						add_index_stringl(return_value, objIndex++, string, end-string);
902 					}
903 				}
904 				string = end;
905 				break;
906 			}
907 /*
908 			case 'c':
909 			   / Scan a single character./
910 
911 				sch = *string;
912 				string++;
913 				if (!(flags & SCAN_SUPPRESS)) {
914 					if (numVars) {
915 						char __buf[2];
916 						__buf[0] = sch;
917 						__buf[1] = '\0';
918 						current = args[objIndex++];
919 						zval_dtor(*current);
920 						ZVAL_STRINGL( *current, __buf, 1);
921 					} else {
922 						add_index_stringl(return_value, objIndex++, &sch, 1);
923 					}
924 				}
925 				break;
926 */
927 			case 'i':
928 				/*
929 				 * Scan an unsigned or signed integer.
930 				 */
931 				/*-cc-*/
932 				buf[0] = '\0';
933 				/*-cc-*/
934 				if ((width == 0) || (width > sizeof(buf) - 1)) {
935 					width = sizeof(buf) - 1;
936 				}
937 
938 				flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO;
939 				for (end = buf; width > 0; width--) {
940 					switch (*string) {
941 						/*
942 						 * The 0 digit has special meaning at the beginning of
943 						 * a number.  If we are unsure of the base, it
944 						 * indicates that we are in base 8 or base 16 (if it is
945 						 * followed by an 'x').
946 						 */
947 						case '0':
948 							/*-cc-*/
949 							if (base == 16) {
950 								flags |= SCAN_XOK;
951 							}
952 							/*-cc-*/
953 							if (base == 0) {
954 								base = 8;
955 								flags |= SCAN_XOK;
956 							}
957 							if (flags & SCAN_NOZERO) {
958 								flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO);
959 							} else {
960 								flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
961 							}
962 							goto addToInt;
963 
964 						case '1': case '2': case '3': case '4':
965 						case '5': case '6': case '7':
966 							if (base == 0) {
967 								base = 10;
968 							}
969 							flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
970 							goto addToInt;
971 
972 						case '8': case '9':
973 							if (base == 0) {
974 								base = 10;
975 							}
976 							if (base <= 8) {
977 							   break;
978 							}
979 							flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
980 							goto addToInt;
981 
982 						case 'A': case 'B': case 'C':
983 						case 'D': case 'E': case 'F':
984 						case 'a': case 'b': case 'c':
985 						case 'd': case 'e': case 'f':
986 							if (base <= 10) {
987 								break;
988 							}
989 							flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
990 							goto addToInt;
991 
992 						case '+': case '-':
993 							if (flags & SCAN_SIGNOK) {
994 								flags &= ~SCAN_SIGNOK;
995 								goto addToInt;
996 							}
997 							break;
998 
999 						case 'x': case 'X':
1000 							if ((flags & SCAN_XOK) && (end == buf+1)) {
1001 								base = 16;
1002 								flags &= ~SCAN_XOK;
1003 								goto addToInt;
1004 							}
1005 							break;
1006 					}
1007 
1008 					/*
1009 					 * We got an illegal character so we are done accumulating.
1010 					 */
1011 					break;
1012 
1013 addToInt:
1014 					/*
1015 					 * Add the character to the temporary buffer.
1016 					 */
1017 					*end++ = *string++;
1018 					if (*string == '\0') {
1019 						break;
1020 					}
1021 				}
1022 
1023 				/*
1024 				 * Check to see if we need to back up because we only got a
1025 				 * sign or a trailing x after a 0.
1026 				 */
1027 				if (flags & SCAN_NODIGITS) {
1028 					if (*string == '\0') {
1029 						underflow = 1;
1030 					}
1031 					goto done;
1032 				} else if (end[-1] == 'x' || end[-1] == 'X') {
1033 					end--;
1034 					string--;
1035 				}
1036 
1037 				/*
1038 				 * Scan the value from the temporary buffer.  If we are
1039 				 * returning a large unsigned value, we have to convert it back
1040 				 * to a string since PHP only supports signed values.
1041 				 */
1042 				if (!(flags & SCAN_SUPPRESS)) {
1043 					*end = '\0';
1044 					value = (zend_long) (*fn)(buf, NULL, base);
1045 					if ((flags & SCAN_UNSIGNED) && (value < 0)) {
1046 						snprintf(buf, sizeof(buf), ZEND_ULONG_FMT, value); /* INTL: ISO digit */
1047 						if (numVars && objIndex >= argCount) {
1048 							break;
1049 						} else if (numVars) {
1050 							 /* change passed value type to string */
1051 							current = args + objIndex++;
1052 							ZEND_TRY_ASSIGN_REF_STRING(current, buf);
1053 						} else {
1054 							add_index_string(return_value, objIndex++, buf);
1055 						}
1056 					} else {
1057 						if (numVars && objIndex >= argCount) {
1058 							break;
1059 						} else if (numVars) {
1060 							current = args + objIndex++;
1061 							ZEND_TRY_ASSIGN_REF_LONG(current, value);
1062 						} else {
1063 							add_index_long(return_value, objIndex++, value);
1064 						}
1065 					}
1066 				}
1067 				break;
1068 
1069 			case 'f':
1070 				/*
1071 				 * Scan a floating point number
1072 				 */
1073 				buf[0] = '\0';     /* call me pedantic */
1074 				if ((width == 0) || (width > sizeof(buf) - 1)) {
1075 					width = sizeof(buf) - 1;
1076 				}
1077 				flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_PTOK | SCAN_EXPOK;
1078 				for (end = buf; width > 0; width--) {
1079 					switch (*string) {
1080 						case '0': case '1': case '2': case '3':
1081 						case '4': case '5': case '6': case '7':
1082 						case '8': case '9':
1083 							flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS);
1084 							goto addToFloat;
1085 						case '+':
1086 						case '-':
1087 							if (flags & SCAN_SIGNOK) {
1088 								flags &= ~SCAN_SIGNOK;
1089 								goto addToFloat;
1090 							}
1091 							break;
1092 						case '.':
1093 							if (flags & SCAN_PTOK) {
1094 								flags &= ~(SCAN_SIGNOK | SCAN_PTOK);
1095 								goto addToFloat;
1096 							}
1097 							break;
1098 						case 'e':
1099 						case 'E':
1100 							/*
1101 							 * An exponent is not allowed until there has
1102 							 * been at least one digit.
1103 							 */
1104 							if ((flags & (SCAN_NODIGITS | SCAN_EXPOK)) == SCAN_EXPOK) {
1105 								flags = (flags & ~(SCAN_EXPOK|SCAN_PTOK))
1106 									| SCAN_SIGNOK | SCAN_NODIGITS;
1107 								goto addToFloat;
1108 							}
1109 							break;
1110 					}
1111 
1112 					/*
1113 					 * We got an illegal character so we are done accumulating.
1114 					 */
1115 					break;
1116 
1117 addToFloat:
1118 					/*
1119 					 * Add the character to the temporary buffer.
1120 					 */
1121 					*end++ = *string++;
1122 					if (*string == '\0') {
1123 						break;
1124 					}
1125 				}
1126 
1127 				/*
1128 				 * Check to see if we need to back up because we saw a
1129 				 * trailing 'e' or sign.
1130 				 */
1131 				if (flags & SCAN_NODIGITS) {
1132 					if (flags & SCAN_EXPOK) {
1133 						/*
1134 						 * There were no digits at all so scanning has
1135 						 * failed and we are done.
1136 						 */
1137 						if (*string == '\0') {
1138 							underflow = 1;
1139 						}
1140 						goto done;
1141 					}
1142 
1143 					/*
1144 					 * We got a bad exponent ('e' and maybe a sign).
1145 					 */
1146 					end--;
1147 					string--;
1148 					if (*end != 'e' && *end != 'E') {
1149 						end--;
1150 						string--;
1151 					}
1152 				}
1153 
1154 				/*
1155 				 * Scan the value from the temporary buffer.
1156 				 */
1157 				if (!(flags & SCAN_SUPPRESS)) {
1158 					double dvalue;
1159 					*end = '\0';
1160 					dvalue = zend_strtod(buf, NULL);
1161 					if (numVars && objIndex >= argCount) {
1162 						break;
1163 					} else if (numVars) {
1164 						current = args + objIndex++;
1165 						ZEND_TRY_ASSIGN_REF_DOUBLE(current, dvalue);
1166 					} else {
1167 						add_index_double(return_value, objIndex++, dvalue );
1168 					}
1169 				}
1170 				break;
1171 		} /* switch (op) */
1172 		nconversions++;
1173 	} /*  while (*format != '\0') */
1174 
1175 done:
1176 	result = SCAN_SUCCESS;
1177 
1178 	if (underflow && (0==nconversions)) {
1179 		scan_set_error_return( numVars, return_value );
1180 		result = SCAN_ERROR_EOF;
1181 	} else if (numVars) {
1182 		zval_ptr_dtor(return_value );
1183 		ZVAL_LONG(return_value, nconversions);
1184 	} else if (nconversions < totalVars) {
1185 		/* TODO: not all elements converted. we need to prune the list - cc */
1186 	}
1187 	return result;
1188 }
1189 /* }}} */
1190 
1191 /* the compiler choked when i tried to make this a macro    */
scan_set_error_return(int numVars,zval * return_value)1192 static inline void scan_set_error_return(int numVars, zval *return_value) /* {{{ */
1193 {
1194 	if (numVars) {
1195 		ZVAL_LONG(return_value, SCAN_ERROR_EOF);  /* EOF marker */
1196 	} else {
1197 		/* convert_to_null calls destructor */
1198 		convert_to_null(return_value);
1199 	}
1200 }
1201 /* }}} */
1202