xref: /PHP-7.1/ext/standard/scanf.c (revision ccd4716e)
1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 7                                                        |
4    +----------------------------------------------------------------------+
5    | Copyright (c) 1997-2018 The PHP Group                                |
6    +----------------------------------------------------------------------+
7    | This source file is subject to version 3.01 of the PHP license,      |
8    | that is bundled with this package in the file LICENSE, and is        |
9    | available through the world-wide-web at the following url:           |
10    | http://www.php.net/license/3_01.txt                                  |
11    | If you did not receive a copy of the PHP license and are unable to   |
12    | obtain it through the world-wide-web, please send a note to          |
13    | license@php.net so we can mail you a copy immediately.               |
14    +----------------------------------------------------------------------+
15    | Author: Clayton Collie <clcollie@mindspring.com>                     |
16    +----------------------------------------------------------------------+
17 */
18 
19 /* $Id$ */
20 
21 /*
22 	scanf.c --
23 
24 	This file contains the base code which implements sscanf and by extension
25 	fscanf. Original code is from TCL8.3.0 and bears the following copyright:
26 
27 	This software is copyrighted by the Regents of the University of
28 	California, Sun Microsystems, Inc., Scriptics Corporation,
29 	and other parties.  The following terms apply to all files associated
30 	with the software unless explicitly disclaimed in individual files.
31 
32 	The authors hereby grant permission to use, copy, modify, distribute,
33 	and license this software and its documentation for any purpose, provided
34 	that existing copyright notices are retained in all copies and that this
35 	notice is included verbatim in any distributions. No written agreement,
36 	license, or royalty fee is required for any of the authorized uses.
37 	Modifications to this software may be copyrighted by their authors
38 	and need not follow the licensing terms described here, provided that
39 	the new terms are clearly indicated on the first page of each file where
40 	they apply.
41 
42 	IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY
43 	FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
44 	ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY
45 	DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE
46 	POSSIBILITY OF SUCH DAMAGE.
47 
48 	THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
49 	INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY,
50 	FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT.  THIS SOFTWARE
51 	IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE
52 	NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
53 	MODIFICATIONS.
54 
55 	GOVERNMENT USE: If you are acquiring this software on behalf of the
56 	U.S. government, the Government shall have only "Restricted Rights"
57 	in the software and related documentation as defined in the Federal
58 	Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2).  If you
59 	are acquiring the software on behalf of the Department of Defense, the
60 	software shall be classified as "Commercial Computer Software" and the
61 	Government shall have only "Restricted Rights" as defined in Clause
62 	252.227-7013 (c) (1) of DFARs.  Notwithstanding the foregoing, the
63 	authors grant the U.S. Government and others acting in its behalf
64 	permission to use and distribute the software in accordance with the
65 	terms specified in this license.
66 */
67 
68 #include <stdio.h>
69 #include <limits.h>
70 #include <ctype.h>
71 #include "php.h"
72 #include "php_variables.h"
73 #ifdef HAVE_LOCALE_H
74 #include <locale.h>
75 #endif
76 #include "zend_execute.h"
77 #include "zend_operators.h"
78 #include "zend_strtod.h"
79 #include "php_globals.h"
80 #include "basic_functions.h"
81 #include "scanf.h"
82 
83 /*
84  * Flag values used internally by [f|s]canf.
85  */
86 #define SCAN_NOSKIP     0x1       /* Don't skip blanks. */
87 #define SCAN_SUPPRESS	0x2	  /* Suppress assignment. */
88 #define SCAN_UNSIGNED	0x4	  /* Read an unsigned value. */
89 #define SCAN_WIDTH      0x8       /* A width value was supplied. */
90 
91 #define SCAN_SIGNOK     0x10      /* A +/- character is allowed. */
92 #define SCAN_NODIGITS   0x20      /* No digits have been scanned. */
93 #define SCAN_NOZERO     0x40      /* No zero digits have been scanned. */
94 #define SCAN_XOK        0x80      /* An 'x' is allowed. */
95 #define SCAN_PTOK       0x100     /* Decimal point is allowed. */
96 #define SCAN_EXPOK      0x200     /* An exponent is allowed. */
97 
98 #define UCHAR(x)		(zend_uchar)(x)
99 
100 /*
101  * The following structure contains the information associated with
102  * a character set.
103  */
104 typedef struct CharSet {
105 	int exclude;		/* 1 if this is an exclusion set. */
106 	int nchars;
107 	char *chars;
108 	int nranges;
109 	struct Range {
110 		char start;
111 		char end;
112 	} *ranges;
113 } CharSet;
114 
115 /*
116  * Declarations for functions used only in this file.
117  */
118 static char *BuildCharSet(CharSet *cset, char *format);
119 static int	CharInSet(CharSet *cset, int ch);
120 static void	ReleaseCharSet(CharSet *cset);
121 static inline void scan_set_error_return(int numVars, zval *return_value);
122 
123 
124 /* {{{ BuildCharSet
125  *----------------------------------------------------------------------
126  *
127  * BuildCharSet --
128  *
129  *	This function examines a character set format specification
130  *	and builds a CharSet containing the individual characters and
131  *	character ranges specified.
132  *
133  * Results:
134  *	Returns the next format position.
135  *
136  * Side effects:
137  *	Initializes the charset.
138  *
139  *----------------------------------------------------------------------
140  */
BuildCharSet(CharSet * cset,char * format)141 static char * BuildCharSet(CharSet *cset, char *format)
142 {
143 	char *ch, start;
144 	int  nranges;
145 	char *end;
146 
147 	memset(cset, 0, sizeof(CharSet));
148 
149 	ch = format;
150 	if (*ch == '^') {
151 		cset->exclude = 1;
152 		ch = ++format;
153 	}
154 	end = format + 1;	/* verify this - cc */
155 
156 	/*
157 	 * Find the close bracket so we can overallocate the set.
158 	 */
159 	if (*ch == ']') {
160 		ch = end++;
161 	}
162 	nranges = 0;
163 	while (*ch != ']') {
164 		if (*ch == '-') {
165 			nranges++;
166 		}
167 		ch = end++;
168 	}
169 
170 	cset->chars = (char *) safe_emalloc(sizeof(char), (end - format - 1), 0);
171 	if (nranges > 0) {
172 		cset->ranges = (struct Range *) safe_emalloc(sizeof(struct Range), nranges, 0);
173 	} else {
174 		cset->ranges = NULL;
175 	}
176 
177 	/*
178 	 * Now build the character set.
179 	 */
180 	cset->nchars = cset->nranges = 0;
181 	ch    = format++;
182 	start = *ch;
183 	if (*ch == ']' || *ch == '-') {
184 		cset->chars[cset->nchars++] = *ch;
185 		ch = format++;
186 	}
187 	while (*ch != ']') {
188 		if (*format == '-') {
189 			/*
190 			 * This may be the first character of a range, so don't add
191 			 * it yet.
192 			 */
193 			start = *ch;
194 		} else if (*ch == '-') {
195 			/*
196 			 * Check to see if this is the last character in the set, in which
197 			 * case it is not a range and we should add the previous character
198 			 * as well as the dash.
199 			 */
200 			if (*format == ']') {
201 				cset->chars[cset->nchars++] = start;
202 				cset->chars[cset->nchars++] = *ch;
203 			} else {
204 				ch = format++;
205 
206 				/*
207 				 * Check to see if the range is in reverse order.
208 				 */
209 				if (start < *ch) {
210 					cset->ranges[cset->nranges].start = start;
211 					cset->ranges[cset->nranges].end = *ch;
212 				} else {
213 					cset->ranges[cset->nranges].start = *ch;
214 					cset->ranges[cset->nranges].end = start;
215 				}
216 				cset->nranges++;
217 			}
218 		} else {
219 			cset->chars[cset->nchars++] = *ch;
220 		}
221 		ch = format++;
222 	}
223 	return format;
224 }
225 /* }}} */
226 
227 /* {{{ CharInSet
228  *----------------------------------------------------------------------
229  *
230  * CharInSet --
231  *
232  *	Check to see if a character matches the given set.
233  *
234  * Results:
235  *	Returns non-zero if the character matches the given set.
236  *
237  * Side effects:
238  *	None.
239  *
240  *----------------------------------------------------------------------
241  */
CharInSet(CharSet * cset,int c)242 static int CharInSet(CharSet *cset, int c)
243 {
244 	char ch = (char) c;
245 	int i, match = 0;
246 
247 	for (i = 0; i < cset->nchars; i++) {
248 		if (cset->chars[i] == ch) {
249 			match = 1;
250 			break;
251 		}
252 	}
253 	if (!match) {
254 		for (i = 0; i < cset->nranges; i++) {
255 			if ((cset->ranges[i].start <= ch)
256 				&& (ch <= cset->ranges[i].end)) {
257 				match = 1;
258 				break;
259 			}
260 		}
261 	}
262 	return (cset->exclude ? !match : match);
263 }
264 /* }}} */
265 
266 /* {{{ ReleaseCharSet
267  *----------------------------------------------------------------------
268  *
269  * ReleaseCharSet --
270  *
271  *	Free the storage associated with a character set.
272  *
273  * Results:
274  *	None.
275  *
276  * Side effects:
277  *	None.
278  *
279  *----------------------------------------------------------------------
280  */
ReleaseCharSet(CharSet * cset)281 static void ReleaseCharSet(CharSet *cset)
282 {
283 	efree((char *)cset->chars);
284 	if (cset->ranges) {
285 		efree((char *)cset->ranges);
286 	}
287 }
288 /* }}} */
289 
290 /* {{{ ValidateFormat
291  *----------------------------------------------------------------------
292  *
293  * ValidateFormat --
294  *
295  *	Parse the format string and verify that it is properly formed
296  *	and that there are exactly enough variables on the command line.
297  *
298  * Results:
299  *    FAILURE or SUCCESS.
300  *
301  * Side effects:
302  *     May set php_error based on abnormal conditions.
303  *
304  * Parameters :
305  *     format     The format string.
306  *     numVars    The number of variables passed to the scan command.
307  *     totalSubs  The number of variables that will be required.
308  *
309  *----------------------------------------------------------------------
310 */
ValidateFormat(char * format,int numVars,int * totalSubs)311 PHPAPI int ValidateFormat(char *format, int numVars, int *totalSubs)
312 {
313 #define STATIC_LIST_SIZE 16
314 	int gotXpg, gotSequential, value, i, flags;
315 	char *end, *ch = NULL;
316 	int staticAssign[STATIC_LIST_SIZE];
317 	int *nassign = staticAssign;
318 	int objIndex, xpgSize, nspace = STATIC_LIST_SIZE;
319 
320 	/*
321 	 * Initialize an array that records the number of times a variable
322 	 * is assigned to by the format string.  We use this to detect if
323 	 * a variable is multiply assigned or left unassigned.
324 	 */
325 	if (numVars > nspace) {
326 		nassign = (int*)safe_emalloc(sizeof(int), numVars, 0);
327 		nspace = numVars;
328 	}
329 	for (i = 0; i < nspace; i++) {
330 		nassign[i] = 0;
331 	}
332 
333 	xpgSize = objIndex = gotXpg = gotSequential = 0;
334 
335 	while (*format != '\0') {
336 		ch = format++;
337 		flags = 0;
338 
339 		if (*ch != '%') {
340 			continue;
341 		}
342 		ch = format++;
343 		if (*ch == '%') {
344 			continue;
345 		}
346 		if (*ch == '*') {
347 			flags |= SCAN_SUPPRESS;
348 			ch = format++;
349 			goto xpgCheckDone;
350 		}
351 
352 		if ( isdigit( (int)*ch ) ) {
353 			/*
354 			 * Check for an XPG3-style %n$ specification.  Note: there
355 			 * must not be a mixture of XPG3 specs and non-XPG3 specs
356 			 * in the same format string.
357 			 */
358 			value = ZEND_STRTOUL(format-1, &end, 10);
359 			if (*end != '$') {
360 				goto notXpg;
361 			}
362 			format = end+1;
363 			ch     = format++;
364 			gotXpg = 1;
365 			if (gotSequential) {
366 				goto mixedXPG;
367 			}
368 			objIndex = value - 1;
369 			if ((objIndex < 0) || (numVars && (objIndex >= numVars))) {
370 				goto badIndex;
371 			} else if (numVars == 0) {
372 				/*
373 				 * In the case where no vars are specified, the user can
374 				 * specify %9999$ legally, so we have to consider special
375 				 * rules for growing the assign array.  'value' is
376 				 * guaranteed to be > 0.
377 				 */
378 
379 				/* set a lower artificial limit on this
380 				 * in the interest of security and resource friendliness
381 				 * 255 arguments should be more than enough. - cc
382 				 */
383 				if (value > SCAN_MAX_ARGS) {
384 					goto badIndex;
385 				}
386 
387 				xpgSize = (xpgSize > value) ? xpgSize : value;
388 			}
389 			goto xpgCheckDone;
390 		}
391 
392 notXpg:
393 		gotSequential = 1;
394 		if (gotXpg) {
395 mixedXPG:
396 			php_error_docref(NULL, E_WARNING, "%s", "cannot mix \"%\" and \"%n$\" conversion specifiers");
397 			goto error;
398 		}
399 
400 xpgCheckDone:
401 		/*
402 		 * Parse any width specifier.
403 		 */
404 		if (isdigit(UCHAR(*ch))) {
405 			value = ZEND_STRTOUL(format-1, &format, 10);
406 			flags |= SCAN_WIDTH;
407 			ch = format++;
408 		}
409 
410 		/*
411 		 * Ignore size specifier.
412 		 */
413 		if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
414 			ch = format++;
415 		}
416 
417 		if (!(flags & SCAN_SUPPRESS) && numVars && (objIndex >= numVars)) {
418 			goto badIndex;
419 		}
420 
421 		/*
422 		 * Handle the various field types.
423 		 */
424 		switch (*ch) {
425 			case 'n':
426 			case 'd':
427 			case 'D':
428 			case 'i':
429 			case 'o':
430 			case 'x':
431 			case 'X':
432 			case 'u':
433 			case 'f':
434 			case 'e':
435 			case 'E':
436 			case 'g':
437 			case 's':
438 				break;
439 
440 			case 'c':
441 				/* we differ here with the TCL implementation in allowing for */
442 				/* a character width specification, to be more consistent with */
443 				/* ANSI. since Zend auto allocates space for vars, this is no */
444 				/* problem - cc                                               */
445 				/*
446 				if (flags & SCAN_WIDTH) {
447 					php_error_docref(NULL, E_WARNING, "Field width may not be specified in %c conversion");
448 					goto error;
449 				}
450 				*/
451 				break;
452 
453 			case '[':
454 				if (*format == '\0') {
455 					goto badSet;
456 				}
457 				ch = format++;
458 				if (*ch == '^') {
459 					if (*format == '\0') {
460 						goto badSet;
461 					}
462 					ch = format++;
463 				}
464 				if (*ch == ']') {
465 					if (*format == '\0') {
466 						goto badSet;
467 					}
468 					ch = format++;
469 				}
470 				while (*ch != ']') {
471 					if (*format == '\0') {
472 						goto badSet;
473 					}
474 					ch = format++;
475 				}
476 				break;
477 badSet:
478 				php_error_docref(NULL, E_WARNING, "Unmatched [ in format string");
479 				goto error;
480 
481 			default: {
482 				php_error_docref(NULL, E_WARNING, "Bad scan conversion character \"%c\"", *ch);
483 				goto error;
484 			}
485 		}
486 
487 		if (!(flags & SCAN_SUPPRESS)) {
488 			if (objIndex >= nspace) {
489 				/*
490 				 * Expand the nassign buffer.  If we are using XPG specifiers,
491 				 * make sure that we grow to a large enough size.  xpgSize is
492 				 * guaranteed to be at least one larger than objIndex.
493 				 */
494 				value = nspace;
495 				if (xpgSize) {
496 					nspace = xpgSize;
497 				} else {
498 					nspace += STATIC_LIST_SIZE;
499 				}
500 				if (nassign == staticAssign) {
501 					nassign = (void *)safe_emalloc(nspace, sizeof(int), 0);
502 					for (i = 0; i < STATIC_LIST_SIZE; ++i) {
503 						nassign[i] = staticAssign[i];
504 					}
505 				} else {
506 					nassign = (void *)erealloc((void *)nassign, nspace * sizeof(int));
507 				}
508 				for (i = value; i < nspace; i++) {
509 					nassign[i] = 0;
510 				}
511 			}
512 			nassign[objIndex]++;
513 			objIndex++;
514 		}
515 	} /* while (*format != '\0') */
516 
517 	/*
518 	 * Verify that all of the variable were assigned exactly once.
519 	 */
520 	if (numVars == 0) {
521 		if (xpgSize) {
522 			numVars = xpgSize;
523 		} else {
524 			numVars = objIndex;
525 		}
526 	}
527 	if (totalSubs) {
528 		*totalSubs = numVars;
529 	}
530 	for (i = 0; i < numVars; i++) {
531 		if (nassign[i] > 1) {
532 			php_error_docref(NULL, E_WARNING, "%s", "Variable is assigned by multiple \"%n$\" conversion specifiers");
533 			goto error;
534 		} else if (!xpgSize && (nassign[i] == 0)) {
535 			/*
536 			 * If the space is empty, and xpgSize is 0 (means XPG wasn't
537 			 * used, and/or numVars != 0), then too many vars were given
538 			 */
539 			php_error_docref(NULL, E_WARNING, "Variable is not assigned by any conversion specifiers");
540 			goto error;
541 		}
542 	}
543 
544 	if (nassign != staticAssign) {
545 		efree((char *)nassign);
546 	}
547 	return SCAN_SUCCESS;
548 
549 badIndex:
550 	if (gotXpg) {
551 		php_error_docref(NULL, E_WARNING, "%s", "\"%n$\" argument index out of range");
552 	} else {
553 		php_error_docref(NULL, E_WARNING, "Different numbers of variable names and field specifiers");
554 	}
555 
556 error:
557 	if (nassign != staticAssign) {
558 		efree((char *)nassign);
559 	}
560 	return SCAN_ERROR_INVALID_FORMAT;
561 #undef STATIC_LIST_SIZE
562 }
563 /* }}} */
564 
565 /* {{{ php_sscanf_internal
566  * This is the internal function which does processing on behalf of
567  * both sscanf() and fscanf()
568  *
569  * parameters :
570  * 		string		literal string to be processed
571  * 		format		format string
572  *		argCount	total number of elements in the args array
573  *		args		arguments passed in from user function (f|s)scanf
574  * 		varStart	offset (in args) of 1st variable passed in to (f|s)scanf
575  *		return_value set with the results of the scan
576  */
577 
php_sscanf_internal(char * string,char * format,int argCount,zval * args,int varStart,zval * return_value)578 PHPAPI int php_sscanf_internal( char *string, char *format,
579 				int argCount, zval *args,
580 				int varStart, zval *return_value)
581 {
582 	int  numVars, nconversions, totalVars = -1;
583 	int  i, result;
584 	zend_long value;
585 	int  objIndex;
586 	char *end, *baseString;
587 	zval *current;
588 	char op   = 0;
589 	int  base = 0;
590 	int  underflow = 0;
591 	size_t width;
592 	zend_long (*fn)() = NULL;
593 	char *ch, sch;
594 	int  flags;
595 	char buf[64];	/* Temporary buffer to hold scanned number
596 					 * strings before they are passed to strtoul() */
597 
598 	/* do some sanity checking */
599 	if ((varStart > argCount) || (varStart < 0)){
600 		varStart = SCAN_MAX_ARGS + 1;
601 	}
602 	numVars = argCount - varStart;
603 	if (numVars < 0) {
604 		numVars = 0;
605 	}
606 
607 #if 0
608 	zend_printf("<br>in sscanf_internal : <br> string is \"%s\", format = \"%s\"<br> NumVars = %d. VarStart = %d<br>-------------------------<br>",
609 					string, format, numVars, varStart);
610 #endif
611 	/*
612 	 * Check for errors in the format string.
613 	 */
614 	if (ValidateFormat(format, numVars, &totalVars) != SCAN_SUCCESS) {
615 		scan_set_error_return( numVars, return_value );
616 		return SCAN_ERROR_INVALID_FORMAT;
617 	}
618 
619 	objIndex = numVars ? varStart : 0;
620 
621 	/*
622 	 * If any variables are passed, make sure they are all passed by reference
623 	 */
624 	if (numVars) {
625 		for (i = varStart;i < argCount;i++){
626 			if ( ! Z_ISREF(args[ i ] ) ) {
627 				php_error_docref(NULL, E_WARNING, "Parameter %d must be passed by reference", i);
628 				scan_set_error_return(numVars, return_value);
629 				return SCAN_ERROR_VAR_PASSED_BYVAL;
630 			}
631 		}
632 	}
633 
634 	/*
635 	 * Allocate space for the result objects. Only happens when no variables
636 	 * are specified
637 	 */
638 	if (!numVars) {
639 		zval tmp;
640 
641 		/* allocate an array for return */
642 		array_init(return_value);
643 
644 		for (i = 0; i < totalVars; i++) {
645 			ZVAL_NULL(&tmp);
646 			if (add_next_index_zval(return_value, &tmp) == FAILURE) {
647 				scan_set_error_return(0, return_value);
648 				return FAILURE;
649 			}
650 		}
651 		varStart = 0; /* Array index starts from 0 */
652 	}
653 
654 	baseString = string;
655 
656 	/*
657 	 * Iterate over the format string filling in the result objects until
658 	 * we reach the end of input, the end of the format string, or there
659 	 * is a mismatch.
660 	 */
661 	nconversions = 0;
662 	/* note ! - we need to limit the loop for objIndex to keep it in bounds */
663 
664 	while (*format != '\0') {
665 		ch    = format++;
666 		flags = 0;
667 
668 		/*
669 		 * If we see whitespace in the format, skip whitespace in the string.
670 		 */
671 		if ( isspace( (int)*ch ) ) {
672 			sch = *string;
673 			while ( isspace( (int)sch ) ) {
674 				if (*string == '\0') {
675 					goto done;
676 				}
677 				string++;
678 				sch = *string;
679 			}
680 			continue;
681 		}
682 
683 		if (*ch != '%') {
684 literal:
685 			if (*string == '\0') {
686 				underflow = 1;
687 				goto done;
688 			}
689 			sch = *string;
690 			string++;
691 			if (*ch != sch) {
692 				goto done;
693 			}
694 			continue;
695 		}
696 
697 		ch = format++;
698 		if (*ch == '%') {
699 			goto literal;
700 		}
701 
702 		/*
703 		 * Check for assignment suppression ('*') or an XPG3-style
704 		 * assignment ('%n$').
705 		 */
706 		if (*ch == '*') {
707 			flags |= SCAN_SUPPRESS;
708 			ch = format++;
709 		} else if ( isdigit(UCHAR(*ch))) {
710 			value = ZEND_STRTOUL(format-1, &end, 10);
711 			if (*end == '$') {
712 				format = end+1;
713 				ch = format++;
714 				objIndex = varStart + value - 1;
715 			}
716 		}
717 
718 		/*
719 		 * Parse any width specifier.
720 		 */
721 		if ( isdigit(UCHAR(*ch))) {
722 			width = ZEND_STRTOUL(format-1, &format, 10);
723 			ch = format++;
724 		} else {
725 			width = 0;
726 		}
727 
728 		/*
729 		 * Ignore size specifier.
730 		 */
731 		if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
732 			ch = format++;
733 		}
734 
735 		/*
736 		 * Handle the various field types.
737 		 */
738 		switch (*ch) {
739 			case 'n':
740 				if (!(flags & SCAN_SUPPRESS)) {
741 					if (numVars && objIndex >= argCount) {
742 						break;
743 					} else if (numVars) {
744 						current = Z_REFVAL(args[objIndex++]);
745 						zval_ptr_dtor(current);
746 						ZVAL_LONG(current, (zend_long)(string - baseString) );
747 					} else {
748 						add_index_long(return_value, objIndex++, string - baseString);
749 					}
750 				}
751 				nconversions++;
752 				continue;
753 
754 			case 'd':
755 			case 'D':
756 				op = 'i';
757 				base = 10;
758 				fn = (zend_long (*)())ZEND_STRTOL_PTR;
759 				break;
760 			case 'i':
761 				op = 'i';
762 				base = 0;
763 				fn = (zend_long (*)())ZEND_STRTOL_PTR;
764 				break;
765 			case 'o':
766 				op = 'i';
767 				base = 8;
768 				fn = (zend_long (*)())ZEND_STRTOL_PTR;
769 				break;
770 			case 'x':
771 			case 'X':
772 				op = 'i';
773 				base = 16;
774 				fn = (zend_long (*)())ZEND_STRTOL_PTR;
775 				break;
776 			case 'u':
777 				op = 'i';
778 				base = 10;
779 				flags |= SCAN_UNSIGNED;
780 				fn = (zend_long (*)())ZEND_STRTOUL_PTR;
781 				break;
782 
783 			case 'f':
784 			case 'e':
785 			case 'E':
786 			case 'g':
787 				op = 'f';
788 				break;
789 
790 			case 's':
791 				op = 's';
792 				break;
793 
794 			case 'c':
795 				op = 's';
796 				flags |= SCAN_NOSKIP;
797 				/*-cc-*/
798 				if (0 == width) {
799 					width = 1;
800 				}
801 				/*-cc-*/
802 				break;
803 			case '[':
804 				op = '[';
805 				flags |= SCAN_NOSKIP;
806 				break;
807 		}   /* switch */
808 
809 		/*
810 		 * At this point, we will need additional characters from the
811 		 * string to proceed.
812 		 */
813 		if (*string == '\0') {
814 			underflow = 1;
815 			goto done;
816 		}
817 
818 		/*
819 		 * Skip any leading whitespace at the beginning of a field unless
820 		 * the format suppresses this behavior.
821 		 */
822 		if (!(flags & SCAN_NOSKIP)) {
823 			while (*string != '\0') {
824 				sch = *string;
825 				if (! isspace((int)sch) ) {
826 					break;
827 				}
828 				string++;
829 			}
830 			if (*string == '\0') {
831 				underflow = 1;
832 				goto done;
833 			}
834 		}
835 
836 		/*
837 		 * Perform the requested scanning operation.
838 		 */
839 		switch (op) {
840 			case 'c':
841 			case 's':
842 				/*
843 				 * Scan a string up to width characters or whitespace.
844 				 */
845 				if (width == 0) {
846 					width = (size_t) ~0;
847 				}
848 				end = string;
849 				while (*end != '\0') {
850 					sch = *end;
851 					if ( isspace( (int)sch ) ) {
852 						break;
853 					}
854 					end++;
855 					if (--width == 0) {
856 					   break;
857 					}
858 				}
859 				if (!(flags & SCAN_SUPPRESS)) {
860 					if (numVars && objIndex >= argCount) {
861 						break;
862 					} else if (numVars) {
863 						current = Z_REFVAL(args[objIndex++]);
864 						zval_ptr_dtor(current);
865 						ZVAL_STRINGL(current, string, end-string);
866 					} else {
867 						add_index_stringl(return_value, objIndex++, string, end-string);
868 					}
869 				}
870 				string = end;
871 				break;
872 
873 			case '[': {
874 				CharSet cset;
875 
876 				if (width == 0) {
877 					width = (size_t) ~0;
878 				}
879 				end = string;
880 
881 				format = BuildCharSet(&cset, format);
882 				while (*end != '\0') {
883 					sch = *end;
884 					if (!CharInSet(&cset, (int)sch)) {
885 						break;
886 					}
887 					end++;
888 					if (--width == 0) {
889 						break;
890 					}
891 				}
892 				ReleaseCharSet(&cset);
893 
894 				if (string == end) {
895 					/*
896 					 * Nothing matched the range, stop processing
897 					 */
898 					goto done;
899 				}
900 				if (!(flags & SCAN_SUPPRESS)) {
901 					if (numVars && objIndex >= argCount) {
902 						break;
903 					} else if (numVars) {
904 						current = Z_REFVAL(args[objIndex++]);
905 						zval_ptr_dtor(current);
906 						ZVAL_STRINGL(current, string, end-string);
907 					} else {
908 						add_index_stringl(return_value, objIndex++, string, end-string);
909 					}
910 				}
911 				string = end;
912 				break;
913 			}
914 /*
915 			case 'c':
916 			   / Scan a single character./
917 
918 				sch = *string;
919 				string++;
920 				if (!(flags & SCAN_SUPPRESS)) {
921 					if (numVars) {
922 						char __buf[2];
923 						__buf[0] = sch;
924 						__buf[1] = '\0';;
925 						current = args[objIndex++];
926 						zval_dtor(*current);
927 						ZVAL_STRINGL( *current, __buf, 1);
928 					} else {
929 						add_index_stringl(return_value, objIndex++, &sch, 1);
930 					}
931 				}
932 				break;
933 */
934 			case 'i':
935 				/*
936 				 * Scan an unsigned or signed integer.
937 				 */
938 				/*-cc-*/
939 				buf[0] = '\0';
940 				/*-cc-*/
941 				if ((width == 0) || (width > sizeof(buf) - 1)) {
942 					width = sizeof(buf) - 1;
943 				}
944 
945 				flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO;
946 				for (end = buf; width > 0; width--) {
947 					switch (*string) {
948 						/*
949 						 * The 0 digit has special meaning at the beginning of
950 						 * a number.  If we are unsure of the base, it
951 						 * indicates that we are in base 8 or base 16 (if it is
952 						 * followed by an 'x').
953 						 */
954 						case '0':
955 							/*-cc-*/
956 							if (base == 16) {
957 								flags |= SCAN_XOK;
958 							}
959 							/*-cc-*/
960 							if (base == 0) {
961 								base = 8;
962 								flags |= SCAN_XOK;
963 							}
964 							if (flags & SCAN_NOZERO) {
965 								flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO);
966 							} else {
967 								flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
968 							}
969 							goto addToInt;
970 
971 						case '1': case '2': case '3': case '4':
972 						case '5': case '6': case '7':
973 							if (base == 0) {
974 								base = 10;
975 							}
976 							flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
977 							goto addToInt;
978 
979 						case '8': case '9':
980 							if (base == 0) {
981 								base = 10;
982 							}
983 							if (base <= 8) {
984 							   break;
985 							}
986 							flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
987 							goto addToInt;
988 
989 						case 'A': case 'B': case 'C':
990 						case 'D': case 'E': case 'F':
991 						case 'a': case 'b': case 'c':
992 						case 'd': case 'e': case 'f':
993 							if (base <= 10) {
994 								break;
995 							}
996 							flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
997 							goto addToInt;
998 
999 						case '+': case '-':
1000 							if (flags & SCAN_SIGNOK) {
1001 								flags &= ~SCAN_SIGNOK;
1002 								goto addToInt;
1003 							}
1004 							break;
1005 
1006 						case 'x': case 'X':
1007 							if ((flags & SCAN_XOK) && (end == buf+1)) {
1008 								base = 16;
1009 								flags &= ~SCAN_XOK;
1010 								goto addToInt;
1011 							}
1012 							break;
1013 					}
1014 
1015 					/*
1016 					 * We got an illegal character so we are done accumulating.
1017 					 */
1018 					break;
1019 
1020 addToInt:
1021 					/*
1022 					 * Add the character to the temporary buffer.
1023 					 */
1024 					*end++ = *string++;
1025 					if (*string == '\0') {
1026 						break;
1027 					}
1028 				}
1029 
1030 				/*
1031 				 * Check to see if we need to back up because we only got a
1032 				 * sign or a trailing x after a 0.
1033 				 */
1034 				if (flags & SCAN_NODIGITS) {
1035 					if (*string == '\0') {
1036 						underflow = 1;
1037 					}
1038 					goto done;
1039 				} else if (end[-1] == 'x' || end[-1] == 'X') {
1040 					end--;
1041 					string--;
1042 				}
1043 
1044 				/*
1045 				 * Scan the value from the temporary buffer.  If we are
1046 				 * returning a large unsigned value, we have to convert it back
1047 				 * to a string since PHP only supports signed values.
1048 				 */
1049 				if (!(flags & SCAN_SUPPRESS)) {
1050 					*end = '\0';
1051 					value = (zend_long) (*fn)(buf, NULL, base);
1052 					if ((flags & SCAN_UNSIGNED) && (value < 0)) {
1053 						snprintf(buf, sizeof(buf), ZEND_ULONG_FMT, value); /* INTL: ISO digit */
1054 						if (numVars && objIndex >= argCount) {
1055 							break;
1056 						} else if (numVars) {
1057 						  /* change passed value type to string */
1058 							current = Z_REFVAL(args[objIndex++]);
1059 							zval_ptr_dtor(current);
1060 							ZVAL_STRING(current, buf);
1061 						} else {
1062 							add_index_string(return_value, objIndex++, buf);
1063 						}
1064 					} else {
1065 						if (numVars && objIndex >= argCount) {
1066 							break;
1067 						} else if (numVars) {
1068 							current = Z_REFVAL(args[objIndex++]);
1069 							zval_ptr_dtor(current);
1070 							ZVAL_LONG(current, value);
1071 						} else {
1072 							add_index_long(return_value, objIndex++, value);
1073 						}
1074 					}
1075 				}
1076 				break;
1077 
1078 			case 'f':
1079 				/*
1080 				 * Scan a floating point number
1081 				 */
1082 				buf[0] = '\0';     /* call me pedantic */
1083 				if ((width == 0) || (width > sizeof(buf) - 1)) {
1084 					width = sizeof(buf) - 1;
1085 				}
1086 				flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_PTOK | SCAN_EXPOK;
1087 				for (end = buf; width > 0; width--) {
1088 					switch (*string) {
1089 						case '0': case '1': case '2': case '3':
1090 						case '4': case '5': case '6': case '7':
1091 						case '8': case '9':
1092 							flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS);
1093 							goto addToFloat;
1094 						case '+':
1095 						case '-':
1096 							if (flags & SCAN_SIGNOK) {
1097 								flags &= ~SCAN_SIGNOK;
1098 								goto addToFloat;
1099 							}
1100 							break;
1101 						case '.':
1102 							if (flags & SCAN_PTOK) {
1103 								flags &= ~(SCAN_SIGNOK | SCAN_PTOK);
1104 								goto addToFloat;
1105 							}
1106 							break;
1107 						case 'e':
1108 						case 'E':
1109 							/*
1110 							 * An exponent is not allowed until there has
1111 							 * been at least one digit.
1112 							 */
1113 							if ((flags & (SCAN_NODIGITS | SCAN_EXPOK)) == SCAN_EXPOK) {
1114 								flags = (flags & ~(SCAN_EXPOK|SCAN_PTOK))
1115 									| SCAN_SIGNOK | SCAN_NODIGITS;
1116 								goto addToFloat;
1117 							}
1118 							break;
1119 					}
1120 
1121 					/*
1122 					 * We got an illegal character so we are done accumulating.
1123 					 */
1124 					break;
1125 
1126 addToFloat:
1127 					/*
1128 					 * Add the character to the temporary buffer.
1129 					 */
1130 					*end++ = *string++;
1131 					if (*string == '\0') {
1132 						break;
1133 					}
1134 				}
1135 
1136 				/*
1137 				 * Check to see if we need to back up because we saw a
1138 				 * trailing 'e' or sign.
1139 				 */
1140 				if (flags & SCAN_NODIGITS) {
1141 					if (flags & SCAN_EXPOK) {
1142 						/*
1143 						 * There were no digits at all so scanning has
1144 						 * failed and we are done.
1145 						 */
1146 						if (*string == '\0') {
1147 							underflow = 1;
1148 						}
1149 						goto done;
1150 					}
1151 
1152 					/*
1153 					 * We got a bad exponent ('e' and maybe a sign).
1154 					 */
1155 					end--;
1156 					string--;
1157 					if (*end != 'e' && *end != 'E') {
1158 						end--;
1159 						string--;
1160 					}
1161 				}
1162 
1163 				/*
1164 				 * Scan the value from the temporary buffer.
1165 				 */
1166 				if (!(flags & SCAN_SUPPRESS)) {
1167 					double dvalue;
1168 					*end = '\0';
1169 					dvalue = zend_strtod(buf, NULL);
1170 					if (numVars && objIndex >= argCount) {
1171 						break;
1172 					} else if (numVars) {
1173 						current = Z_REFVAL(args[objIndex++]);
1174 						zval_ptr_dtor(current);
1175 						ZVAL_DOUBLE(current, dvalue);
1176 					} else {
1177 						add_index_double(return_value, objIndex++, dvalue );
1178 					}
1179 				}
1180 				break;
1181 		} /* switch (op) */
1182 		nconversions++;
1183 	} /*  while (*format != '\0') */
1184 
1185 done:
1186 	result = SCAN_SUCCESS;
1187 
1188 	if (underflow && (0==nconversions)) {
1189 		scan_set_error_return( numVars, return_value );
1190 		result = SCAN_ERROR_EOF;
1191 	} else if (numVars) {
1192 		convert_to_long(return_value );
1193 		Z_LVAL_P(return_value) = nconversions;
1194 	} else if (nconversions < totalVars) {
1195 		/* TODO: not all elements converted. we need to prune the list - cc */
1196 	}
1197 	return result;
1198 }
1199 /* }}} */
1200 
1201 /* the compiler choked when i tried to make this a macro    */
scan_set_error_return(int numVars,zval * return_value)1202 static inline void scan_set_error_return(int numVars, zval *return_value) /* {{{ */
1203 {
1204 	if (numVars) {
1205 		ZVAL_LONG(return_value, SCAN_ERROR_EOF);  /* EOF marker */
1206 	} else {
1207 		/* convert_to_null calls destructor */
1208 		convert_to_null(return_value);
1209 	}
1210 }
1211 /* }}} */
1212 
1213 /*
1214  * Local variables:
1215  * tab-width: 4
1216  * c-basic-offset: 4
1217  * End:
1218  * vim600: sw=4 ts=4 fdm=marker
1219  * vim<600: sw=4 ts=4
1220  */
1221