xref: /PHP-7.3/ext/standard/scanf.c (revision 8d3f8ca1)
1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 7                                                        |
4    +----------------------------------------------------------------------+
5    | Copyright (c) 1997-2018 The PHP Group                                |
6    +----------------------------------------------------------------------+
7    | This source file is subject to version 3.01 of the PHP license,      |
8    | that is bundled with this package in the file LICENSE, and is        |
9    | available through the world-wide-web at the following url:           |
10    | http://www.php.net/license/3_01.txt                                  |
11    | If you did not receive a copy of the PHP license and are unable to   |
12    | obtain it through the world-wide-web, please send a note to          |
13    | license@php.net so we can mail you a copy immediately.               |
14    +----------------------------------------------------------------------+
15    | Author: Clayton Collie <clcollie@mindspring.com>                     |
16    +----------------------------------------------------------------------+
17 */
18 
19 /*
20 	scanf.c --
21 
22 	This file contains the base code which implements sscanf and by extension
23 	fscanf. Original code is from TCL8.3.0 and bears the following copyright:
24 
25 	This software is copyrighted by the Regents of the University of
26 	California, Sun Microsystems, Inc., Scriptics Corporation,
27 	and other parties.  The following terms apply to all files associated
28 	with the software unless explicitly disclaimed in individual files.
29 
30 	The authors hereby grant permission to use, copy, modify, distribute,
31 	and license this software and its documentation for any purpose, provided
32 	that existing copyright notices are retained in all copies and that this
33 	notice is included verbatim in any distributions. No written agreement,
34 	license, or royalty fee is required for any of the authorized uses.
35 	Modifications to this software may be copyrighted by their authors
36 	and need not follow the licensing terms described here, provided that
37 	the new terms are clearly indicated on the first page of each file where
38 	they apply.
39 
40 	IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY
41 	FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
42 	ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY
43 	DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE
44 	POSSIBILITY OF SUCH DAMAGE.
45 
46 	THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
47 	INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY,
48 	FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT.  THIS SOFTWARE
49 	IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE
50 	NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
51 	MODIFICATIONS.
52 
53 	GOVERNMENT USE: If you are acquiring this software on behalf of the
54 	U.S. government, the Government shall have only "Restricted Rights"
55 	in the software and related documentation as defined in the Federal
56 	Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2).  If you
57 	are acquiring the software on behalf of the Department of Defense, the
58 	software shall be classified as "Commercial Computer Software" and the
59 	Government shall have only "Restricted Rights" as defined in Clause
60 	252.227-7013 (c) (1) of DFARs.  Notwithstanding the foregoing, the
61 	authors grant the U.S. Government and others acting in its behalf
62 	permission to use and distribute the software in accordance with the
63 	terms specified in this license.
64 */
65 
66 #include <stdio.h>
67 #include <limits.h>
68 #include <ctype.h>
69 #include "php.h"
70 #include "php_variables.h"
71 #ifdef HAVE_LOCALE_H
72 #include <locale.h>
73 #endif
74 #include "zend_execute.h"
75 #include "zend_operators.h"
76 #include "zend_strtod.h"
77 #include "php_globals.h"
78 #include "basic_functions.h"
79 #include "scanf.h"
80 
81 /*
82  * Flag values used internally by [f|s]canf.
83  */
84 #define SCAN_NOSKIP     0x1       /* Don't skip blanks. */
85 #define SCAN_SUPPRESS	0x2	  /* Suppress assignment. */
86 #define SCAN_UNSIGNED	0x4	  /* Read an unsigned value. */
87 #define SCAN_WIDTH      0x8       /* A width value was supplied. */
88 
89 #define SCAN_SIGNOK     0x10      /* A +/- character is allowed. */
90 #define SCAN_NODIGITS   0x20      /* No digits have been scanned. */
91 #define SCAN_NOZERO     0x40      /* No zero digits have been scanned. */
92 #define SCAN_XOK        0x80      /* An 'x' is allowed. */
93 #define SCAN_PTOK       0x100     /* Decimal point is allowed. */
94 #define SCAN_EXPOK      0x200     /* An exponent is allowed. */
95 
96 #define UCHAR(x)		(zend_uchar)(x)
97 
98 /*
99  * The following structure contains the information associated with
100  * a character set.
101  */
102 typedef struct CharSet {
103 	int exclude;		/* 1 if this is an exclusion set. */
104 	int nchars;
105 	char *chars;
106 	int nranges;
107 	struct Range {
108 		char start;
109 		char end;
110 	} *ranges;
111 } CharSet;
112 
113 /*
114  * Declarations for functions used only in this file.
115  */
116 static char *BuildCharSet(CharSet *cset, char *format);
117 static int	CharInSet(CharSet *cset, int ch);
118 static void	ReleaseCharSet(CharSet *cset);
119 static inline void scan_set_error_return(int numVars, zval *return_value);
120 
121 
122 /* {{{ BuildCharSet
123  *----------------------------------------------------------------------
124  *
125  * BuildCharSet --
126  *
127  *	This function examines a character set format specification
128  *	and builds a CharSet containing the individual characters and
129  *	character ranges specified.
130  *
131  * Results:
132  *	Returns the next format position.
133  *
134  * Side effects:
135  *	Initializes the charset.
136  *
137  *----------------------------------------------------------------------
138  */
BuildCharSet(CharSet * cset,char * format)139 static char * BuildCharSet(CharSet *cset, char *format)
140 {
141 	char *ch, start;
142 	int  nranges;
143 	char *end;
144 
145 	memset(cset, 0, sizeof(CharSet));
146 
147 	ch = format;
148 	if (*ch == '^') {
149 		cset->exclude = 1;
150 		ch = ++format;
151 	}
152 	end = format + 1;	/* verify this - cc */
153 
154 	/*
155 	 * Find the close bracket so we can overallocate the set.
156 	 */
157 	if (*ch == ']') {
158 		ch = end++;
159 	}
160 	nranges = 0;
161 	while (*ch != ']') {
162 		if (*ch == '-') {
163 			nranges++;
164 		}
165 		ch = end++;
166 	}
167 
168 	cset->chars = (char *) safe_emalloc(sizeof(char), (end - format - 1), 0);
169 	if (nranges > 0) {
170 		cset->ranges = (struct Range *) safe_emalloc(sizeof(struct Range), nranges, 0);
171 	} else {
172 		cset->ranges = NULL;
173 	}
174 
175 	/*
176 	 * Now build the character set.
177 	 */
178 	cset->nchars = cset->nranges = 0;
179 	ch    = format++;
180 	start = *ch;
181 	if (*ch == ']' || *ch == '-') {
182 		cset->chars[cset->nchars++] = *ch;
183 		ch = format++;
184 	}
185 	while (*ch != ']') {
186 		if (*format == '-') {
187 			/*
188 			 * This may be the first character of a range, so don't add
189 			 * it yet.
190 			 */
191 			start = *ch;
192 		} else if (*ch == '-') {
193 			/*
194 			 * Check to see if this is the last character in the set, in which
195 			 * case it is not a range and we should add the previous character
196 			 * as well as the dash.
197 			 */
198 			if (*format == ']') {
199 				cset->chars[cset->nchars++] = start;
200 				cset->chars[cset->nchars++] = *ch;
201 			} else {
202 				ch = format++;
203 
204 				/*
205 				 * Check to see if the range is in reverse order.
206 				 */
207 				if (start < *ch) {
208 					cset->ranges[cset->nranges].start = start;
209 					cset->ranges[cset->nranges].end = *ch;
210 				} else {
211 					cset->ranges[cset->nranges].start = *ch;
212 					cset->ranges[cset->nranges].end = start;
213 				}
214 				cset->nranges++;
215 			}
216 		} else {
217 			cset->chars[cset->nchars++] = *ch;
218 		}
219 		ch = format++;
220 	}
221 	return format;
222 }
223 /* }}} */
224 
225 /* {{{ CharInSet
226  *----------------------------------------------------------------------
227  *
228  * CharInSet --
229  *
230  *	Check to see if a character matches the given set.
231  *
232  * Results:
233  *	Returns non-zero if the character matches the given set.
234  *
235  * Side effects:
236  *	None.
237  *
238  *----------------------------------------------------------------------
239  */
CharInSet(CharSet * cset,int c)240 static int CharInSet(CharSet *cset, int c)
241 {
242 	char ch = (char) c;
243 	int i, match = 0;
244 
245 	for (i = 0; i < cset->nchars; i++) {
246 		if (cset->chars[i] == ch) {
247 			match = 1;
248 			break;
249 		}
250 	}
251 	if (!match) {
252 		for (i = 0; i < cset->nranges; i++) {
253 			if ((cset->ranges[i].start <= ch)
254 				&& (ch <= cset->ranges[i].end)) {
255 				match = 1;
256 				break;
257 			}
258 		}
259 	}
260 	return (cset->exclude ? !match : match);
261 }
262 /* }}} */
263 
264 /* {{{ ReleaseCharSet
265  *----------------------------------------------------------------------
266  *
267  * ReleaseCharSet --
268  *
269  *	Free the storage associated with a character set.
270  *
271  * Results:
272  *	None.
273  *
274  * Side effects:
275  *	None.
276  *
277  *----------------------------------------------------------------------
278  */
ReleaseCharSet(CharSet * cset)279 static void ReleaseCharSet(CharSet *cset)
280 {
281 	efree((char *)cset->chars);
282 	if (cset->ranges) {
283 		efree((char *)cset->ranges);
284 	}
285 }
286 /* }}} */
287 
288 /* {{{ ValidateFormat
289  *----------------------------------------------------------------------
290  *
291  * ValidateFormat --
292  *
293  *	Parse the format string and verify that it is properly formed
294  *	and that there are exactly enough variables on the command line.
295  *
296  * Results:
297  *    FAILURE or SUCCESS.
298  *
299  * Side effects:
300  *     May set php_error based on abnormal conditions.
301  *
302  * Parameters :
303  *     format     The format string.
304  *     numVars    The number of variables passed to the scan command.
305  *     totalSubs  The number of variables that will be required.
306  *
307  *----------------------------------------------------------------------
308 */
ValidateFormat(char * format,int numVars,int * totalSubs)309 PHPAPI int ValidateFormat(char *format, int numVars, int *totalSubs)
310 {
311 #define STATIC_LIST_SIZE 16
312 	int gotXpg, gotSequential, value, i, flags;
313 	char *end, *ch = NULL;
314 	int staticAssign[STATIC_LIST_SIZE];
315 	int *nassign = staticAssign;
316 	int objIndex, xpgSize, nspace = STATIC_LIST_SIZE;
317 
318 	/*
319 	 * Initialize an array that records the number of times a variable
320 	 * is assigned to by the format string.  We use this to detect if
321 	 * a variable is multiply assigned or left unassigned.
322 	 */
323 	if (numVars > nspace) {
324 		nassign = (int*)safe_emalloc(sizeof(int), numVars, 0);
325 		nspace = numVars;
326 	}
327 	for (i = 0; i < nspace; i++) {
328 		nassign[i] = 0;
329 	}
330 
331 	xpgSize = objIndex = gotXpg = gotSequential = 0;
332 
333 	while (*format != '\0') {
334 		ch = format++;
335 		flags = 0;
336 
337 		if (*ch != '%') {
338 			continue;
339 		}
340 		ch = format++;
341 		if (*ch == '%') {
342 			continue;
343 		}
344 		if (*ch == '*') {
345 			flags |= SCAN_SUPPRESS;
346 			ch = format++;
347 			goto xpgCheckDone;
348 		}
349 
350 		if ( isdigit( (int)*ch ) ) {
351 			/*
352 			 * Check for an XPG3-style %n$ specification.  Note: there
353 			 * must not be a mixture of XPG3 specs and non-XPG3 specs
354 			 * in the same format string.
355 			 */
356 			value = ZEND_STRTOUL(format-1, &end, 10);
357 			if (*end != '$') {
358 				goto notXpg;
359 			}
360 			format = end+1;
361 			ch     = format++;
362 			gotXpg = 1;
363 			if (gotSequential) {
364 				goto mixedXPG;
365 			}
366 			objIndex = value - 1;
367 			if ((objIndex < 0) || (numVars && (objIndex >= numVars))) {
368 				goto badIndex;
369 			} else if (numVars == 0) {
370 				/*
371 				 * In the case where no vars are specified, the user can
372 				 * specify %9999$ legally, so we have to consider special
373 				 * rules for growing the assign array.  'value' is
374 				 * guaranteed to be > 0.
375 				 */
376 
377 				/* set a lower artificial limit on this
378 				 * in the interest of security and resource friendliness
379 				 * 255 arguments should be more than enough. - cc
380 				 */
381 				if (value > SCAN_MAX_ARGS) {
382 					goto badIndex;
383 				}
384 
385 				xpgSize = (xpgSize > value) ? xpgSize : value;
386 			}
387 			goto xpgCheckDone;
388 		}
389 
390 notXpg:
391 		gotSequential = 1;
392 		if (gotXpg) {
393 mixedXPG:
394 			php_error_docref(NULL, E_WARNING, "%s", "cannot mix \"%\" and \"%n$\" conversion specifiers");
395 			goto error;
396 		}
397 
398 xpgCheckDone:
399 		/*
400 		 * Parse any width specifier.
401 		 */
402 		if (isdigit(UCHAR(*ch))) {
403 			value = ZEND_STRTOUL(format-1, &format, 10);
404 			flags |= SCAN_WIDTH;
405 			ch = format++;
406 		}
407 
408 		/*
409 		 * Ignore size specifier.
410 		 */
411 		if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
412 			ch = format++;
413 		}
414 
415 		if (!(flags & SCAN_SUPPRESS) && numVars && (objIndex >= numVars)) {
416 			goto badIndex;
417 		}
418 
419 		/*
420 		 * Handle the various field types.
421 		 */
422 		switch (*ch) {
423 			case 'n':
424 			case 'd':
425 			case 'D':
426 			case 'i':
427 			case 'o':
428 			case 'x':
429 			case 'X':
430 			case 'u':
431 			case 'f':
432 			case 'e':
433 			case 'E':
434 			case 'g':
435 			case 's':
436 				break;
437 
438 			case 'c':
439 				/* we differ here with the TCL implementation in allowing for */
440 				/* a character width specification, to be more consistent with */
441 				/* ANSI. since Zend auto allocates space for vars, this is no */
442 				/* problem - cc                                               */
443 				/*
444 				if (flags & SCAN_WIDTH) {
445 					php_error_docref(NULL, E_WARNING, "Field width may not be specified in %c conversion");
446 					goto error;
447 				}
448 				*/
449 				break;
450 
451 			case '[':
452 				if (*format == '\0') {
453 					goto badSet;
454 				}
455 				ch = format++;
456 				if (*ch == '^') {
457 					if (*format == '\0') {
458 						goto badSet;
459 					}
460 					ch = format++;
461 				}
462 				if (*ch == ']') {
463 					if (*format == '\0') {
464 						goto badSet;
465 					}
466 					ch = format++;
467 				}
468 				while (*ch != ']') {
469 					if (*format == '\0') {
470 						goto badSet;
471 					}
472 					ch = format++;
473 				}
474 				break;
475 badSet:
476 				php_error_docref(NULL, E_WARNING, "Unmatched [ in format string");
477 				goto error;
478 
479 			default: {
480 				php_error_docref(NULL, E_WARNING, "Bad scan conversion character \"%c\"", *ch);
481 				goto error;
482 			}
483 		}
484 
485 		if (!(flags & SCAN_SUPPRESS)) {
486 			if (objIndex >= nspace) {
487 				/*
488 				 * Expand the nassign buffer.  If we are using XPG specifiers,
489 				 * make sure that we grow to a large enough size.  xpgSize is
490 				 * guaranteed to be at least one larger than objIndex.
491 				 */
492 				value = nspace;
493 				if (xpgSize) {
494 					nspace = xpgSize;
495 				} else {
496 					nspace += STATIC_LIST_SIZE;
497 				}
498 				if (nassign == staticAssign) {
499 					nassign = (void *)safe_emalloc(nspace, sizeof(int), 0);
500 					for (i = 0; i < STATIC_LIST_SIZE; ++i) {
501 						nassign[i] = staticAssign[i];
502 					}
503 				} else {
504 					nassign = (void *)erealloc((void *)nassign, nspace * sizeof(int));
505 				}
506 				for (i = value; i < nspace; i++) {
507 					nassign[i] = 0;
508 				}
509 			}
510 			nassign[objIndex]++;
511 			objIndex++;
512 		}
513 	} /* while (*format != '\0') */
514 
515 	/*
516 	 * Verify that all of the variable were assigned exactly once.
517 	 */
518 	if (numVars == 0) {
519 		if (xpgSize) {
520 			numVars = xpgSize;
521 		} else {
522 			numVars = objIndex;
523 		}
524 	}
525 	if (totalSubs) {
526 		*totalSubs = numVars;
527 	}
528 	for (i = 0; i < numVars; i++) {
529 		if (nassign[i] > 1) {
530 			php_error_docref(NULL, E_WARNING, "%s", "Variable is assigned by multiple \"%n$\" conversion specifiers");
531 			goto error;
532 		} else if (!xpgSize && (nassign[i] == 0)) {
533 			/*
534 			 * If the space is empty, and xpgSize is 0 (means XPG wasn't
535 			 * used, and/or numVars != 0), then too many vars were given
536 			 */
537 			php_error_docref(NULL, E_WARNING, "Variable is not assigned by any conversion specifiers");
538 			goto error;
539 		}
540 	}
541 
542 	if (nassign != staticAssign) {
543 		efree((char *)nassign);
544 	}
545 	return SCAN_SUCCESS;
546 
547 badIndex:
548 	if (gotXpg) {
549 		php_error_docref(NULL, E_WARNING, "%s", "\"%n$\" argument index out of range");
550 	} else {
551 		php_error_docref(NULL, E_WARNING, "Different numbers of variable names and field specifiers");
552 	}
553 
554 error:
555 	if (nassign != staticAssign) {
556 		efree((char *)nassign);
557 	}
558 	return SCAN_ERROR_INVALID_FORMAT;
559 #undef STATIC_LIST_SIZE
560 }
561 /* }}} */
562 
563 /* {{{ php_sscanf_internal
564  * This is the internal function which does processing on behalf of
565  * both sscanf() and fscanf()
566  *
567  * parameters :
568  * 		string		literal string to be processed
569  * 		format		format string
570  *		argCount	total number of elements in the args array
571  *		args		arguments passed in from user function (f|s)scanf
572  * 		varStart	offset (in args) of 1st variable passed in to (f|s)scanf
573  *		return_value set with the results of the scan
574  */
575 
php_sscanf_internal(char * string,char * format,int argCount,zval * args,int varStart,zval * return_value)576 PHPAPI int php_sscanf_internal( char *string, char *format,
577 				int argCount, zval *args,
578 				int varStart, zval *return_value)
579 {
580 	int  numVars, nconversions, totalVars = -1;
581 	int  i, result;
582 	zend_long value;
583 	int  objIndex;
584 	char *end, *baseString;
585 	zval *current;
586 	char op   = 0;
587 	int  base = 0;
588 	int  underflow = 0;
589 	size_t width;
590 	zend_long (*fn)() = NULL;
591 	char *ch, sch;
592 	int  flags;
593 	char buf[64];	/* Temporary buffer to hold scanned number
594 					 * strings before they are passed to strtoul() */
595 
596 	/* do some sanity checking */
597 	if ((varStart > argCount) || (varStart < 0)){
598 		varStart = SCAN_MAX_ARGS + 1;
599 	}
600 	numVars = argCount - varStart;
601 	if (numVars < 0) {
602 		numVars = 0;
603 	}
604 
605 #if 0
606 	zend_printf("<br>in sscanf_internal : <br> string is \"%s\", format = \"%s\"<br> NumVars = %d. VarStart = %d<br>-------------------------<br>",
607 					string, format, numVars, varStart);
608 #endif
609 	/*
610 	 * Check for errors in the format string.
611 	 */
612 	if (ValidateFormat(format, numVars, &totalVars) != SCAN_SUCCESS) {
613 		scan_set_error_return( numVars, return_value );
614 		return SCAN_ERROR_INVALID_FORMAT;
615 	}
616 
617 	objIndex = numVars ? varStart : 0;
618 
619 	/*
620 	 * If any variables are passed, make sure they are all passed by reference
621 	 */
622 	if (numVars) {
623 		for (i = varStart;i < argCount;i++){
624 			if ( ! Z_ISREF(args[ i ] ) ) {
625 				php_error_docref(NULL, E_WARNING, "Parameter %d must be passed by reference", i);
626 				scan_set_error_return(numVars, return_value);
627 				return SCAN_ERROR_VAR_PASSED_BYVAL;
628 			}
629 		}
630 	}
631 
632 	/*
633 	 * Allocate space for the result objects. Only happens when no variables
634 	 * are specified
635 	 */
636 	if (!numVars) {
637 		zval tmp;
638 
639 		/* allocate an array for return */
640 		array_init(return_value);
641 
642 		for (i = 0; i < totalVars; i++) {
643 			ZVAL_NULL(&tmp);
644 			if (add_next_index_zval(return_value, &tmp) == FAILURE) {
645 				scan_set_error_return(0, return_value);
646 				return FAILURE;
647 			}
648 		}
649 		varStart = 0; /* Array index starts from 0 */
650 	}
651 
652 	baseString = string;
653 
654 	/*
655 	 * Iterate over the format string filling in the result objects until
656 	 * we reach the end of input, the end of the format string, or there
657 	 * is a mismatch.
658 	 */
659 	nconversions = 0;
660 	/* note ! - we need to limit the loop for objIndex to keep it in bounds */
661 
662 	while (*format != '\0') {
663 		ch    = format++;
664 		flags = 0;
665 
666 		/*
667 		 * If we see whitespace in the format, skip whitespace in the string.
668 		 */
669 		if ( isspace( (int)*ch ) ) {
670 			sch = *string;
671 			while ( isspace( (int)sch ) ) {
672 				if (*string == '\0') {
673 					goto done;
674 				}
675 				string++;
676 				sch = *string;
677 			}
678 			continue;
679 		}
680 
681 		if (*ch != '%') {
682 literal:
683 			if (*string == '\0') {
684 				underflow = 1;
685 				goto done;
686 			}
687 			sch = *string;
688 			string++;
689 			if (*ch != sch) {
690 				goto done;
691 			}
692 			continue;
693 		}
694 
695 		ch = format++;
696 		if (*ch == '%') {
697 			goto literal;
698 		}
699 
700 		/*
701 		 * Check for assignment suppression ('*') or an XPG3-style
702 		 * assignment ('%n$').
703 		 */
704 		if (*ch == '*') {
705 			flags |= SCAN_SUPPRESS;
706 			ch = format++;
707 		} else if ( isdigit(UCHAR(*ch))) {
708 			value = ZEND_STRTOUL(format-1, &end, 10);
709 			if (*end == '$') {
710 				format = end+1;
711 				ch = format++;
712 				objIndex = varStart + value - 1;
713 			}
714 		}
715 
716 		/*
717 		 * Parse any width specifier.
718 		 */
719 		if ( isdigit(UCHAR(*ch))) {
720 			width = ZEND_STRTOUL(format-1, &format, 10);
721 			ch = format++;
722 		} else {
723 			width = 0;
724 		}
725 
726 		/*
727 		 * Ignore size specifier.
728 		 */
729 		if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
730 			ch = format++;
731 		}
732 
733 		/*
734 		 * Handle the various field types.
735 		 */
736 		switch (*ch) {
737 			case 'n':
738 				if (!(flags & SCAN_SUPPRESS)) {
739 					if (numVars && objIndex >= argCount) {
740 						break;
741 					} else if (numVars) {
742 						current = Z_REFVAL(args[objIndex++]);
743 						zval_ptr_dtor(current);
744 						ZVAL_LONG(current, (zend_long)(string - baseString) );
745 					} else {
746 						add_index_long(return_value, objIndex++, string - baseString);
747 					}
748 				}
749 				nconversions++;
750 				continue;
751 
752 			case 'd':
753 			case 'D':
754 				op = 'i';
755 				base = 10;
756 				fn = (zend_long (*)())ZEND_STRTOL_PTR;
757 				break;
758 			case 'i':
759 				op = 'i';
760 				base = 0;
761 				fn = (zend_long (*)())ZEND_STRTOL_PTR;
762 				break;
763 			case 'o':
764 				op = 'i';
765 				base = 8;
766 				fn = (zend_long (*)())ZEND_STRTOL_PTR;
767 				break;
768 			case 'x':
769 			case 'X':
770 				op = 'i';
771 				base = 16;
772 				fn = (zend_long (*)())ZEND_STRTOL_PTR;
773 				break;
774 			case 'u':
775 				op = 'i';
776 				base = 10;
777 				flags |= SCAN_UNSIGNED;
778 				fn = (zend_long (*)())ZEND_STRTOUL_PTR;
779 				break;
780 
781 			case 'f':
782 			case 'e':
783 			case 'E':
784 			case 'g':
785 				op = 'f';
786 				break;
787 
788 			case 's':
789 				op = 's';
790 				break;
791 
792 			case 'c':
793 				op = 's';
794 				flags |= SCAN_NOSKIP;
795 				/*-cc-*/
796 				if (0 == width) {
797 					width = 1;
798 				}
799 				/*-cc-*/
800 				break;
801 			case '[':
802 				op = '[';
803 				flags |= SCAN_NOSKIP;
804 				break;
805 		}   /* switch */
806 
807 		/*
808 		 * At this point, we will need additional characters from the
809 		 * string to proceed.
810 		 */
811 		if (*string == '\0') {
812 			underflow = 1;
813 			goto done;
814 		}
815 
816 		/*
817 		 * Skip any leading whitespace at the beginning of a field unless
818 		 * the format suppresses this behavior.
819 		 */
820 		if (!(flags & SCAN_NOSKIP)) {
821 			while (*string != '\0') {
822 				sch = *string;
823 				if (! isspace((int)sch) ) {
824 					break;
825 				}
826 				string++;
827 			}
828 			if (*string == '\0') {
829 				underflow = 1;
830 				goto done;
831 			}
832 		}
833 
834 		/*
835 		 * Perform the requested scanning operation.
836 		 */
837 		switch (op) {
838 			case 'c':
839 			case 's':
840 				/*
841 				 * Scan a string up to width characters or whitespace.
842 				 */
843 				if (width == 0) {
844 					width = (size_t) ~0;
845 				}
846 				end = string;
847 				while (*end != '\0') {
848 					sch = *end;
849 					if ( isspace( (int)sch ) ) {
850 						break;
851 					}
852 					end++;
853 					if (--width == 0) {
854 					   break;
855 					}
856 				}
857 				if (!(flags & SCAN_SUPPRESS)) {
858 					if (numVars && objIndex >= argCount) {
859 						break;
860 					} else if (numVars) {
861 						current = Z_REFVAL(args[objIndex++]);
862 						zval_ptr_dtor(current);
863 						ZVAL_STRINGL(current, string, end-string);
864 					} else {
865 						add_index_stringl(return_value, objIndex++, string, end-string);
866 					}
867 				}
868 				string = end;
869 				break;
870 
871 			case '[': {
872 				CharSet cset;
873 
874 				if (width == 0) {
875 					width = (size_t) ~0;
876 				}
877 				end = string;
878 
879 				format = BuildCharSet(&cset, format);
880 				while (*end != '\0') {
881 					sch = *end;
882 					if (!CharInSet(&cset, (int)sch)) {
883 						break;
884 					}
885 					end++;
886 					if (--width == 0) {
887 						break;
888 					}
889 				}
890 				ReleaseCharSet(&cset);
891 
892 				if (string == end) {
893 					/*
894 					 * Nothing matched the range, stop processing
895 					 */
896 					goto done;
897 				}
898 				if (!(flags & SCAN_SUPPRESS)) {
899 					if (numVars && objIndex >= argCount) {
900 						break;
901 					} else if (numVars) {
902 						current = Z_REFVAL(args[objIndex++]);
903 						zval_ptr_dtor(current);
904 						ZVAL_STRINGL(current, string, end-string);
905 					} else {
906 						add_index_stringl(return_value, objIndex++, string, end-string);
907 					}
908 				}
909 				string = end;
910 				break;
911 			}
912 /*
913 			case 'c':
914 			   / Scan a single character./
915 
916 				sch = *string;
917 				string++;
918 				if (!(flags & SCAN_SUPPRESS)) {
919 					if (numVars) {
920 						char __buf[2];
921 						__buf[0] = sch;
922 						__buf[1] = '\0';
923 						current = args[objIndex++];
924 						zval_dtor(*current);
925 						ZVAL_STRINGL( *current, __buf, 1);
926 					} else {
927 						add_index_stringl(return_value, objIndex++, &sch, 1);
928 					}
929 				}
930 				break;
931 */
932 			case 'i':
933 				/*
934 				 * Scan an unsigned or signed integer.
935 				 */
936 				/*-cc-*/
937 				buf[0] = '\0';
938 				/*-cc-*/
939 				if ((width == 0) || (width > sizeof(buf) - 1)) {
940 					width = sizeof(buf) - 1;
941 				}
942 
943 				flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO;
944 				for (end = buf; width > 0; width--) {
945 					switch (*string) {
946 						/*
947 						 * The 0 digit has special meaning at the beginning of
948 						 * a number.  If we are unsure of the base, it
949 						 * indicates that we are in base 8 or base 16 (if it is
950 						 * followed by an 'x').
951 						 */
952 						case '0':
953 							/*-cc-*/
954 							if (base == 16) {
955 								flags |= SCAN_XOK;
956 							}
957 							/*-cc-*/
958 							if (base == 0) {
959 								base = 8;
960 								flags |= SCAN_XOK;
961 							}
962 							if (flags & SCAN_NOZERO) {
963 								flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO);
964 							} else {
965 								flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
966 							}
967 							goto addToInt;
968 
969 						case '1': case '2': case '3': case '4':
970 						case '5': case '6': case '7':
971 							if (base == 0) {
972 								base = 10;
973 							}
974 							flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
975 							goto addToInt;
976 
977 						case '8': case '9':
978 							if (base == 0) {
979 								base = 10;
980 							}
981 							if (base <= 8) {
982 							   break;
983 							}
984 							flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
985 							goto addToInt;
986 
987 						case 'A': case 'B': case 'C':
988 						case 'D': case 'E': case 'F':
989 						case 'a': case 'b': case 'c':
990 						case 'd': case 'e': case 'f':
991 							if (base <= 10) {
992 								break;
993 							}
994 							flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
995 							goto addToInt;
996 
997 						case '+': case '-':
998 							if (flags & SCAN_SIGNOK) {
999 								flags &= ~SCAN_SIGNOK;
1000 								goto addToInt;
1001 							}
1002 							break;
1003 
1004 						case 'x': case 'X':
1005 							if ((flags & SCAN_XOK) && (end == buf+1)) {
1006 								base = 16;
1007 								flags &= ~SCAN_XOK;
1008 								goto addToInt;
1009 							}
1010 							break;
1011 					}
1012 
1013 					/*
1014 					 * We got an illegal character so we are done accumulating.
1015 					 */
1016 					break;
1017 
1018 addToInt:
1019 					/*
1020 					 * Add the character to the temporary buffer.
1021 					 */
1022 					*end++ = *string++;
1023 					if (*string == '\0') {
1024 						break;
1025 					}
1026 				}
1027 
1028 				/*
1029 				 * Check to see if we need to back up because we only got a
1030 				 * sign or a trailing x after a 0.
1031 				 */
1032 				if (flags & SCAN_NODIGITS) {
1033 					if (*string == '\0') {
1034 						underflow = 1;
1035 					}
1036 					goto done;
1037 				} else if (end[-1] == 'x' || end[-1] == 'X') {
1038 					end--;
1039 					string--;
1040 				}
1041 
1042 				/*
1043 				 * Scan the value from the temporary buffer.  If we are
1044 				 * returning a large unsigned value, we have to convert it back
1045 				 * to a string since PHP only supports signed values.
1046 				 */
1047 				if (!(flags & SCAN_SUPPRESS)) {
1048 					*end = '\0';
1049 					value = (zend_long) (*fn)(buf, NULL, base);
1050 					if ((flags & SCAN_UNSIGNED) && (value < 0)) {
1051 						snprintf(buf, sizeof(buf), ZEND_ULONG_FMT, value); /* INTL: ISO digit */
1052 						if (numVars && objIndex >= argCount) {
1053 							break;
1054 						} else if (numVars) {
1055 						  /* change passed value type to string */
1056 							current = Z_REFVAL(args[objIndex++]);
1057 							zval_ptr_dtor(current);
1058 							ZVAL_STRING(current, buf);
1059 						} else {
1060 							add_index_string(return_value, objIndex++, buf);
1061 						}
1062 					} else {
1063 						if (numVars && objIndex >= argCount) {
1064 							break;
1065 						} else if (numVars) {
1066 							current = Z_REFVAL(args[objIndex++]);
1067 							zval_ptr_dtor(current);
1068 							ZVAL_LONG(current, value);
1069 						} else {
1070 							add_index_long(return_value, objIndex++, value);
1071 						}
1072 					}
1073 				}
1074 				break;
1075 
1076 			case 'f':
1077 				/*
1078 				 * Scan a floating point number
1079 				 */
1080 				buf[0] = '\0';     /* call me pedantic */
1081 				if ((width == 0) || (width > sizeof(buf) - 1)) {
1082 					width = sizeof(buf) - 1;
1083 				}
1084 				flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_PTOK | SCAN_EXPOK;
1085 				for (end = buf; width > 0; width--) {
1086 					switch (*string) {
1087 						case '0': case '1': case '2': case '3':
1088 						case '4': case '5': case '6': case '7':
1089 						case '8': case '9':
1090 							flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS);
1091 							goto addToFloat;
1092 						case '+':
1093 						case '-':
1094 							if (flags & SCAN_SIGNOK) {
1095 								flags &= ~SCAN_SIGNOK;
1096 								goto addToFloat;
1097 							}
1098 							break;
1099 						case '.':
1100 							if (flags & SCAN_PTOK) {
1101 								flags &= ~(SCAN_SIGNOK | SCAN_PTOK);
1102 								goto addToFloat;
1103 							}
1104 							break;
1105 						case 'e':
1106 						case 'E':
1107 							/*
1108 							 * An exponent is not allowed until there has
1109 							 * been at least one digit.
1110 							 */
1111 							if ((flags & (SCAN_NODIGITS | SCAN_EXPOK)) == SCAN_EXPOK) {
1112 								flags = (flags & ~(SCAN_EXPOK|SCAN_PTOK))
1113 									| SCAN_SIGNOK | SCAN_NODIGITS;
1114 								goto addToFloat;
1115 							}
1116 							break;
1117 					}
1118 
1119 					/*
1120 					 * We got an illegal character so we are done accumulating.
1121 					 */
1122 					break;
1123 
1124 addToFloat:
1125 					/*
1126 					 * Add the character to the temporary buffer.
1127 					 */
1128 					*end++ = *string++;
1129 					if (*string == '\0') {
1130 						break;
1131 					}
1132 				}
1133 
1134 				/*
1135 				 * Check to see if we need to back up because we saw a
1136 				 * trailing 'e' or sign.
1137 				 */
1138 				if (flags & SCAN_NODIGITS) {
1139 					if (flags & SCAN_EXPOK) {
1140 						/*
1141 						 * There were no digits at all so scanning has
1142 						 * failed and we are done.
1143 						 */
1144 						if (*string == '\0') {
1145 							underflow = 1;
1146 						}
1147 						goto done;
1148 					}
1149 
1150 					/*
1151 					 * We got a bad exponent ('e' and maybe a sign).
1152 					 */
1153 					end--;
1154 					string--;
1155 					if (*end != 'e' && *end != 'E') {
1156 						end--;
1157 						string--;
1158 					}
1159 				}
1160 
1161 				/*
1162 				 * Scan the value from the temporary buffer.
1163 				 */
1164 				if (!(flags & SCAN_SUPPRESS)) {
1165 					double dvalue;
1166 					*end = '\0';
1167 					dvalue = zend_strtod(buf, NULL);
1168 					if (numVars && objIndex >= argCount) {
1169 						break;
1170 					} else if (numVars) {
1171 						current = Z_REFVAL(args[objIndex++]);
1172 						zval_ptr_dtor(current);
1173 						ZVAL_DOUBLE(current, dvalue);
1174 					} else {
1175 						add_index_double(return_value, objIndex++, dvalue );
1176 					}
1177 				}
1178 				break;
1179 		} /* switch (op) */
1180 		nconversions++;
1181 	} /*  while (*format != '\0') */
1182 
1183 done:
1184 	result = SCAN_SUCCESS;
1185 
1186 	if (underflow && (0==nconversions)) {
1187 		scan_set_error_return( numVars, return_value );
1188 		result = SCAN_ERROR_EOF;
1189 	} else if (numVars) {
1190 		zval_ptr_dtor(return_value );
1191 		ZVAL_LONG(return_value, nconversions);
1192 	} else if (nconversions < totalVars) {
1193 		/* TODO: not all elements converted. we need to prune the list - cc */
1194 	}
1195 	return result;
1196 }
1197 /* }}} */
1198 
1199 /* the compiler choked when i tried to make this a macro    */
scan_set_error_return(int numVars,zval * return_value)1200 static inline void scan_set_error_return(int numVars, zval *return_value) /* {{{ */
1201 {
1202 	if (numVars) {
1203 		ZVAL_LONG(return_value, SCAN_ERROR_EOF);  /* EOF marker */
1204 	} else {
1205 		/* convert_to_null calls destructor */
1206 		convert_to_null(return_value);
1207 	}
1208 }
1209 /* }}} */
1210 
1211 /*
1212  * Local variables:
1213  * tab-width: 4
1214  * c-basic-offset: 4
1215  * End:
1216  * vim600: sw=4 ts=4 fdm=marker
1217  * vim<600: sw=4 ts=4
1218  */
1219