xref: /PHP-5.3/ext/standard/scanf.c (revision a2045ff3)
1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 5                                                        |
4    +----------------------------------------------------------------------+
5    | Copyright (c) 1997-2013 The PHP Group                                |
6    +----------------------------------------------------------------------+
7    | This source file is subject to version 3.01 of the PHP license,      |
8    | that is bundled with this package in the file LICENSE, and is        |
9    | available through the world-wide-web at the following url:           |
10    | http://www.php.net/license/3_01.txt                                  |
11    | If you did not receive a copy of the PHP license and are unable to   |
12    | obtain it through the world-wide-web, please send a note to          |
13    | license@php.net so we can mail you a copy immediately.               |
14    +----------------------------------------------------------------------+
15    | Author: Clayton Collie <clcollie@mindspring.com>                     |
16    +----------------------------------------------------------------------+
17 */
18 
19 /* $Id$ */
20 
21 /*
22 	scanf.c --
23 
24 	This file contains the base code which implements sscanf and by extension
25 	fscanf. Original code is from TCL8.3.0 and bears the following copyright:
26 
27 	This software is copyrighted by the Regents of the University of
28 	California, Sun Microsystems, Inc., Scriptics Corporation,
29 	and other parties.  The following terms apply to all files associated
30 	with the software unless explicitly disclaimed in individual files.
31 
32 	The authors hereby grant permission to use, copy, modify, distribute,
33 	and license this software and its documentation for any purpose, provided
34 	that existing copyright notices are retained in all copies and that this
35 	notice is included verbatim in any distributions. No written agreement,
36 	license, or royalty fee is required for any of the authorized uses.
37 	Modifications to this software may be copyrighted by their authors
38 	and need not follow the licensing terms described here, provided that
39 	the new terms are clearly indicated on the first page of each file where
40 	they apply.
41 
42 	IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY
43 	FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
44 	ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY
45 	DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE
46 	POSSIBILITY OF SUCH DAMAGE.
47 
48 	THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
49 	INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY,
50 	FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT.  THIS SOFTWARE
51 	IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE
52 	NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
53 	MODIFICATIONS.
54 
55 	GOVERNMENT USE: If you are acquiring this software on behalf of the
56 	U.S. government, the Government shall have only "Restricted Rights"
57 	in the software and related documentation as defined in the Federal
58 	Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2).  If you
59 	are acquiring the software on behalf of the Department of Defense, the
60 	software shall be classified as "Commercial Computer Software" and the
61 	Government shall have only "Restricted Rights" as defined in Clause
62 	252.227-7013 (c) (1) of DFARs.  Notwithstanding the foregoing, the
63 	authors grant the U.S. Government and others acting in its behalf
64 	permission to use and distribute the software in accordance with the
65 	terms specified in this license.
66 */
67 
68 #include <stdio.h>
69 #include <limits.h>
70 #include <ctype.h>
71 #include "php.h"
72 #include "php_variables.h"
73 #ifdef HAVE_LOCALE_H
74 #include <locale.h>
75 #endif
76 #include "zend_execute.h"
77 #include "zend_operators.h"
78 #include "zend_strtod.h"
79 #include "php_globals.h"
80 #include "basic_functions.h"
81 #include "scanf.h"
82 
83 /*
84  * Flag values used internally by [f|s]canf.
85  */
86 #define SCAN_NOSKIP     0x1       /* Don't skip blanks. */
87 #define SCAN_SUPPRESS	0x2	  /* Suppress assignment. */
88 #define SCAN_UNSIGNED	0x4	  /* Read an unsigned value. */
89 #define SCAN_WIDTH      0x8       /* A width value was supplied. */
90 
91 #define SCAN_SIGNOK     0x10      /* A +/- character is allowed. */
92 #define SCAN_NODIGITS   0x20      /* No digits have been scanned. */
93 #define SCAN_NOZERO     0x40      /* No zero digits have been scanned. */
94 #define SCAN_XOK        0x80      /* An 'x' is allowed. */
95 #define SCAN_PTOK       0x100     /* Decimal point is allowed. */
96 #define SCAN_EXPOK      0x200     /* An exponent is allowed. */
97 
98 #define UCHAR(x)		(zend_uchar)(x)
99 
100 /*
101  * The following structure contains the information associated with
102  * a character set.
103  */
104 typedef struct CharSet {
105 	int exclude;		/* 1 if this is an exclusion set. */
106 	int nchars;
107 	char *chars;
108 	int nranges;
109 	struct Range {
110 		char start;
111 		char end;
112 	} *ranges;
113 } CharSet;
114 
115 /*
116  * Declarations for functions used only in this file.
117  */
118 static char *BuildCharSet(CharSet *cset, char *format);
119 static int	CharInSet(CharSet *cset, int ch);
120 static void	ReleaseCharSet(CharSet *cset);
121 static inline void scan_set_error_return(int numVars, zval **return_value);
122 
123 
124 /* {{{ BuildCharSet
125  *----------------------------------------------------------------------
126  *
127  * BuildCharSet --
128  *
129  *	This function examines a character set format specification
130  *	and builds a CharSet containing the individual characters and
131  *	character ranges specified.
132  *
133  * Results:
134  *	Returns the next format position.
135  *
136  * Side effects:
137  *	Initializes the charset.
138  *
139  *----------------------------------------------------------------------
140  */
BuildCharSet(CharSet * cset,char * format)141 static char * BuildCharSet(CharSet *cset, char *format)
142 {
143 	char *ch, start;
144 	int  nranges;
145 	char *end;
146 
147 	memset(cset, 0, sizeof(CharSet));
148 
149 	ch = format;
150 	if (*ch == '^') {
151 		cset->exclude = 1;
152 		ch = ++format;
153 	}
154 	end = format + 1;	/* verify this - cc */
155 
156 	/*
157 	 * Find the close bracket so we can overallocate the set.
158 	 */
159 	if (*ch == ']') {
160 		ch = end++;
161 	}
162 	nranges = 0;
163 	while (*ch != ']') {
164 		if (*ch == '-') {
165 			nranges++;
166 		}
167 		ch = end++;
168 	}
169 
170 	cset->chars = (char *) safe_emalloc(sizeof(char), (end - format - 1), 0);
171 	if (nranges > 0) {
172 		cset->ranges = (struct Range *) safe_emalloc(sizeof(struct Range), nranges, 0);
173 	} else {
174 		cset->ranges = NULL;
175 	}
176 
177 	/*
178 	 * Now build the character set.
179 	 */
180 	cset->nchars = cset->nranges = 0;
181 	ch    = format++;
182 	start = *ch;
183 	if (*ch == ']' || *ch == '-') {
184 		cset->chars[cset->nchars++] = *ch;
185 		ch = format++;
186 	}
187 	while (*ch != ']') {
188 		if (*format == '-') {
189 			/*
190 			 * This may be the first character of a range, so don't add
191 			 * it yet.
192 			 */
193 			start = *ch;
194 		} else if (*ch == '-') {
195 			/*
196 			 * Check to see if this is the last character in the set, in which
197 			 * case it is not a range and we should add the previous character
198 			 * as well as the dash.
199 			 */
200 			if (*format == ']') {
201 				cset->chars[cset->nchars++] = start;
202 				cset->chars[cset->nchars++] = *ch;
203 			} else {
204 				ch = format++;
205 
206 				/*
207 				 * Check to see if the range is in reverse order.
208 				 */
209 				if (start < *ch) {
210 					cset->ranges[cset->nranges].start = start;
211 					cset->ranges[cset->nranges].end = *ch;
212 				} else {
213 					cset->ranges[cset->nranges].start = *ch;
214 					cset->ranges[cset->nranges].end = start;
215 				}
216 				cset->nranges++;
217 			}
218 		} else {
219 			cset->chars[cset->nchars++] = *ch;
220 		}
221 		ch = format++;
222 	}
223 	return format;
224 }
225 /* }}} */
226 
227 /* {{{ CharInSet
228  *----------------------------------------------------------------------
229  *
230  * CharInSet --
231  *
232  *	Check to see if a character matches the given set.
233  *
234  * Results:
235  *	Returns non-zero if the character matches the given set.
236  *
237  * Side effects:
238  *	None.
239  *
240  *----------------------------------------------------------------------
241  */
CharInSet(CharSet * cset,int c)242 static int CharInSet(CharSet *cset, int c)
243 {
244 	char ch = (char) c;
245 	int i, match = 0;
246 
247 	for (i = 0; i < cset->nchars; i++) {
248 		if (cset->chars[i] == ch) {
249 			match = 1;
250 			break;
251 		}
252 	}
253 	if (!match) {
254 		for (i = 0; i < cset->nranges; i++) {
255 			if ((cset->ranges[i].start <= ch)
256 				&& (ch <= cset->ranges[i].end)) {
257 				match = 1;
258 				break;
259 			}
260 		}
261 	}
262 	return (cset->exclude ? !match : match);
263 }
264 /* }}} */
265 
266 /* {{{ ReleaseCharSet
267  *----------------------------------------------------------------------
268  *
269  * ReleaseCharSet --
270  *
271  *	Free the storage associated with a character set.
272  *
273  * Results:
274  *	None.
275  *
276  * Side effects:
277  *	None.
278  *
279  *----------------------------------------------------------------------
280  */
ReleaseCharSet(CharSet * cset)281 static void ReleaseCharSet(CharSet *cset)
282 {
283 	efree((char *)cset->chars);
284 	if (cset->ranges) {
285 		efree((char *)cset->ranges);
286 	}
287 }
288 /* }}} */
289 
290 /* {{{ ValidateFormat
291  *----------------------------------------------------------------------
292  *
293  * ValidateFormat --
294  *
295  *	Parse the format string and verify that it is properly formed
296  *	and that there are exactly enough variables on the command line.
297  *
298  * Results:
299  *    FAILURE or SUCCESS.
300  *
301  * Side effects:
302  *     May set php_error based on abnormal conditions.
303  *
304  * Parameters :
305  *     format     The format string.
306  *     numVars    The number of variables passed to the scan command.
307  *     totalSubs  The number of variables that will be required.
308  *
309  *----------------------------------------------------------------------
310 */
ValidateFormat(char * format,int numVars,int * totalSubs)311 PHPAPI int ValidateFormat(char *format, int numVars, int *totalSubs)
312 {
313 #define STATIC_LIST_SIZE 16
314 	int gotXpg, gotSequential, value, i, flags;
315 	char *end, *ch = NULL;
316 	int staticAssign[STATIC_LIST_SIZE];
317 	int *nassign = staticAssign;
318 	int objIndex, xpgSize, nspace = STATIC_LIST_SIZE;
319 	TSRMLS_FETCH();
320 
321 	/*
322 	 * Initialize an array that records the number of times a variable
323 	 * is assigned to by the format string.  We use this to detect if
324 	 * a variable is multiply assigned or left unassigned.
325 	 */
326 	if (numVars > nspace) {
327 		nassign = (int*)safe_emalloc(sizeof(int), numVars, 0);
328 		nspace = numVars;
329 	}
330 	for (i = 0; i < nspace; i++) {
331 		nassign[i] = 0;
332 	}
333 
334 	xpgSize = objIndex = gotXpg = gotSequential = 0;
335 
336 	while (*format != '\0') {
337 		ch = format++;
338 		flags = 0;
339 
340 		if (*ch != '%') {
341 			continue;
342 		}
343 		ch = format++;
344 		if (*ch == '%') {
345 			continue;
346 		}
347 		if (*ch == '*') {
348 			flags |= SCAN_SUPPRESS;
349 			ch = format++;
350 			goto xpgCheckDone;
351 		}
352 
353 		if ( isdigit( (int)*ch ) ) {
354 			/*
355 			 * Check for an XPG3-style %n$ specification.  Note: there
356 			 * must not be a mixture of XPG3 specs and non-XPG3 specs
357 			 * in the same format string.
358 			 */
359 			value = strtoul(format-1, &end, 10);
360 			if (*end != '$') {
361 				goto notXpg;
362 			}
363 			format = end+1;
364 			ch     = format++;
365 			gotXpg = 1;
366 			if (gotSequential) {
367 				goto mixedXPG;
368 			}
369 			objIndex = value - 1;
370 			if ((objIndex < 0) || (numVars && (objIndex >= numVars))) {
371 				goto badIndex;
372 			} else if (numVars == 0) {
373 				/*
374 				 * In the case where no vars are specified, the user can
375 				 * specify %9999$ legally, so we have to consider special
376 				 * rules for growing the assign array.  'value' is
377 				 * guaranteed to be > 0.
378 				 */
379 
380 				/* set a lower artificial limit on this
381 				 * in the interest of security and resource friendliness
382 				 * 255 arguments should be more than enough. - cc
383 				 */
384 				if (value > SCAN_MAX_ARGS) {
385 					goto badIndex;
386 				}
387 
388 				xpgSize = (xpgSize > value) ? xpgSize : value;
389 			}
390 			goto xpgCheckDone;
391 		}
392 
393 notXpg:
394 		gotSequential = 1;
395 		if (gotXpg) {
396 mixedXPG:
397 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", "cannot mix \"%\" and \"%n$\" conversion specifiers");
398 			goto error;
399 		}
400 
401 xpgCheckDone:
402 		/*
403 		 * Parse any width specifier.
404 		 */
405 		if (isdigit(UCHAR(*ch))) {
406 			value = strtoul(format-1, &format, 10);
407 			flags |= SCAN_WIDTH;
408 			ch = format++;
409 		}
410 
411 		/*
412 		 * Ignore size specifier.
413 		 */
414 		if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
415 			ch = format++;
416 		}
417 
418 		if (!(flags & SCAN_SUPPRESS) && numVars && (objIndex >= numVars)) {
419 			goto badIndex;
420 		}
421 
422 		/*
423 		 * Handle the various field types.
424 		 */
425 		switch (*ch) {
426 			case 'n':
427 			case 'd':
428 			case 'D':
429 			case 'i':
430 			case 'o':
431 			case 'x':
432 			case 'X':
433 			case 'u':
434 			case 'f':
435 			case 'e':
436 			case 'E':
437 			case 'g':
438 			case 's':
439 				break;
440 
441 			case 'c':
442 				/* we differ here with the TCL implementation in allowing for */
443 				/* a character width specification, to be more consistent with */
444 				/* ANSI. since Zend auto allocates space for vars, this is no */
445 				/* problem - cc                                               */
446 				/*
447 				if (flags & SCAN_WIDTH) {
448 					php_error_docref(NULL TSRMLS_CC, E_WARNING, "Field width may not be specified in %c conversion");
449 					goto error;
450 				}
451 				*/
452 				break;
453 
454 			case '[':
455 				if (*format == '\0') {
456 					goto badSet;
457 				}
458 				ch = format++;
459 				if (*ch == '^') {
460 					if (*format == '\0') {
461 						goto badSet;
462 					}
463 					ch = format++;
464 				}
465 				if (*ch == ']') {
466 					if (*format == '\0') {
467 						goto badSet;
468 					}
469 					ch = format++;
470 				}
471 				while (*ch != ']') {
472 					if (*format == '\0') {
473 						goto badSet;
474 					}
475 					ch = format++;
476 				}
477 				break;
478 badSet:
479 				php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unmatched [ in format string");
480 				goto error;
481 
482 			default: {
483 				php_error_docref(NULL TSRMLS_CC, E_WARNING, "Bad scan conversion character \"%c\"", *ch);
484 				goto error;
485 			}
486 		}
487 
488 		if (!(flags & SCAN_SUPPRESS)) {
489 			if (objIndex >= nspace) {
490 				/*
491 				 * Expand the nassign buffer.  If we are using XPG specifiers,
492 				 * make sure that we grow to a large enough size.  xpgSize is
493 				 * guaranteed to be at least one larger than objIndex.
494 				 */
495 				value = nspace;
496 				if (xpgSize) {
497 					nspace = xpgSize;
498 				} else {
499 					nspace += STATIC_LIST_SIZE;
500 				}
501 				if (nassign == staticAssign) {
502 					nassign = (void *)safe_emalloc(nspace, sizeof(int), 0);
503 					for (i = 0; i < STATIC_LIST_SIZE; ++i) {
504 						nassign[i] = staticAssign[i];
505 					}
506 				} else {
507 					nassign = (void *)erealloc((void *)nassign, nspace * sizeof(int));
508 				}
509 				for (i = value; i < nspace; i++) {
510 					nassign[i] = 0;
511 				}
512 			}
513 			nassign[objIndex]++;
514 			objIndex++;
515 		}
516 	} /* while (*format != '\0') */
517 
518 	/*
519 	 * Verify that all of the variable were assigned exactly once.
520 	 */
521 	if (numVars == 0) {
522 		if (xpgSize) {
523 			numVars = xpgSize;
524 		} else {
525 			numVars = objIndex;
526 		}
527 	}
528 	if (totalSubs) {
529 		*totalSubs = numVars;
530 	}
531 	for (i = 0; i < numVars; i++) {
532 		if (nassign[i] > 1) {
533 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", "Variable is assigned by multiple \"%n$\" conversion specifiers");
534 			goto error;
535 		} else if (!xpgSize && (nassign[i] == 0)) {
536 			/*
537 			 * If the space is empty, and xpgSize is 0 (means XPG wasn't
538 			 * used, and/or numVars != 0), then too many vars were given
539 			 */
540 			php_error_docref(NULL TSRMLS_CC, E_WARNING, "Variable is not assigned by any conversion specifiers");
541 			goto error;
542 		}
543 	}
544 
545 	if (nassign != staticAssign) {
546 		efree((char *)nassign);
547 	}
548 	return SCAN_SUCCESS;
549 
550 badIndex:
551 	if (gotXpg) {
552 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", "\"%n$\" argument index out of range");
553 	} else {
554 		php_error_docref(NULL TSRMLS_CC, E_WARNING, "Different numbers of variable names and field specifiers");
555 	}
556 
557 error:
558 	if (nassign != staticAssign) {
559 		efree((char *)nassign);
560 	}
561 	return SCAN_ERROR_INVALID_FORMAT;
562 #undef STATIC_LIST_SIZE
563 }
564 /* }}} */
565 
566 /* {{{ php_sscanf_internal
567  * This is the internal function which does processing on behalf of
568  * both sscanf() and fscanf()
569  *
570  * parameters :
571  * 		string		literal string to be processed
572  * 		format		format string
573  *		argCount	total number of elements in the args array
574  *		args		arguments passed in from user function (f|s)scanf
575  * 		varStart	offset (in args) of 1st variable passed in to (f|s)scanf
576  *		return_value set with the results of the scan
577  */
578 
php_sscanf_internal(char * string,char * format,int argCount,zval *** args,int varStart,zval ** return_value TSRMLS_DC)579 PHPAPI int php_sscanf_internal( char *string, char *format,
580 				int argCount, zval ***args,
581 				int varStart, zval **return_value TSRMLS_DC)
582 {
583 	int  numVars, nconversions, totalVars = -1;
584 	int  i, result;
585 	long value;
586 	int  objIndex;
587 	char *end, *baseString;
588 	zval **current;
589 	char op   = 0;
590 	int  base = 0;
591 	int  underflow = 0;
592 	size_t width;
593 	long (*fn)() = NULL;
594 	char *ch, sch;
595 	int  flags;
596 	char buf[64];	/* Temporary buffer to hold scanned number
597 					 * strings before they are passed to strtoul() */
598 
599 	/* do some sanity checking */
600 	if ((varStart > argCount) || (varStart < 0)){
601 		varStart = SCAN_MAX_ARGS + 1;
602 	}
603 	numVars = argCount - varStart;
604 	if (numVars < 0) {
605 		numVars = 0;
606 	}
607 
608 #if 0
609 	zend_printf("<br>in sscanf_internal : <br> string is \"%s\", format = \"%s\"<br> NumVars = %d. VarStart = %d<br>-------------------------<br>",
610 					string, format, numVars, varStart);
611 #endif
612 	/*
613 	 * Check for errors in the format string.
614 	 */
615 	if (ValidateFormat(format, numVars, &totalVars) != SCAN_SUCCESS) {
616 		scan_set_error_return( numVars, return_value );
617 		return SCAN_ERROR_INVALID_FORMAT;
618 	}
619 
620 	objIndex = numVars ? varStart : 0;
621 
622 	/*
623 	 * If any variables are passed, make sure they are all passed by reference
624 	 */
625 	if (numVars) {
626 		for (i = varStart;i < argCount;i++){
627 			if ( ! PZVAL_IS_REF( *args[ i ] ) ) {
628 				php_error_docref(NULL TSRMLS_CC, E_WARNING, "Parameter %d must be passed by reference", i);
629 				scan_set_error_return(numVars, return_value);
630 				return SCAN_ERROR_VAR_PASSED_BYVAL;
631 			}
632 		}
633 	}
634 
635 	/*
636 	 * Allocate space for the result objects. Only happens when no variables
637 	 * are specified
638 	 */
639 	if (!numVars) {
640 		zval *tmp;
641 
642 		/* allocate an array for return */
643 		array_init(*return_value);
644 
645 		for (i = 0; i < totalVars; i++) {
646 			MAKE_STD_ZVAL(tmp);
647 			ZVAL_NULL(tmp);
648 			if (add_next_index_zval(*return_value, tmp) == FAILURE) {
649 				scan_set_error_return(0, return_value);
650 				return FAILURE;
651 			}
652 		}
653 		varStart = 0; /* Array index starts from 0 */
654 	}
655 
656 	baseString = string;
657 
658 	/*
659 	 * Iterate over the format string filling in the result objects until
660 	 * we reach the end of input, the end of the format string, or there
661 	 * is a mismatch.
662 	 */
663 	nconversions = 0;
664 	/* note ! - we need to limit the loop for objIndex to keep it in bounds */
665 
666 	while (*format != '\0') {
667 		ch    = format++;
668 		flags = 0;
669 
670 		/*
671 		 * If we see whitespace in the format, skip whitespace in the string.
672 		 */
673 		if ( isspace( (int)*ch ) ) {
674 			sch = *string;
675 			while ( isspace( (int)sch ) ) {
676 				if (*string == '\0') {
677 					goto done;
678 				}
679 				string++;
680 				sch = *string;
681 			}
682 			continue;
683 		}
684 
685 		if (*ch != '%') {
686 literal:
687 			if (*string == '\0') {
688 				underflow = 1;
689 				goto done;
690 			}
691 			sch = *string;
692 			string++;
693 			if (*ch != sch) {
694 				goto done;
695 			}
696 			continue;
697 		}
698 
699 		ch = format++;
700 		if (*ch == '%') {
701 			goto literal;
702 		}
703 
704 		/*
705 		 * Check for assignment suppression ('*') or an XPG3-style
706 		 * assignment ('%n$').
707 		 */
708 		if (*ch == '*') {
709 			flags |= SCAN_SUPPRESS;
710 			ch = format++;
711 		} else if ( isdigit(UCHAR(*ch))) {
712 			value = strtoul(format-1, &end, 10);
713 			if (*end == '$') {
714 				format = end+1;
715 				ch = format++;
716 				objIndex = varStart + value - 1;
717 			}
718 		}
719 
720 		/*
721 		 * Parse any width specifier.
722 		 */
723 		if ( isdigit(UCHAR(*ch))) {
724 			width = strtoul(format-1, &format, 10);
725 			ch = format++;
726 		} else {
727 			width = 0;
728 		}
729 
730 		/*
731 		 * Ignore size specifier.
732 		 */
733 		if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
734 			ch = format++;
735 		}
736 
737 		/*
738 		 * Handle the various field types.
739 		 */
740 		switch (*ch) {
741 			case 'n':
742 				if (!(flags & SCAN_SUPPRESS)) {
743 					if (numVars && objIndex >= argCount) {
744 						break;
745 					} else if (numVars) {
746 						zend_uint refcount;
747 
748 						current = args[objIndex++];
749 						refcount = Z_REFCOUNT_PP(current);
750 						zval_dtor( *current );
751 						ZVAL_LONG( *current, (long)(string - baseString) );
752 						Z_SET_REFCOUNT_PP(current, refcount);
753 						Z_SET_ISREF_PP(current);
754 					} else {
755 						add_index_long(*return_value, objIndex++, string - baseString);
756 					}
757 				}
758 				nconversions++;
759 				continue;
760 
761 			case 'd':
762 			case 'D':
763 				op = 'i';
764 				base = 10;
765 				fn = (long (*)())strtol;
766 				break;
767 			case 'i':
768 				op = 'i';
769 				base = 0;
770 				fn = (long (*)())strtol;
771 				break;
772 			case 'o':
773 				op = 'i';
774 				base = 8;
775 				fn = (long (*)())strtol;
776 				break;
777 			case 'x':
778 			case 'X':
779 				op = 'i';
780 				base = 16;
781 				fn = (long (*)())strtol;
782 				break;
783 			case 'u':
784 				op = 'i';
785 				base = 10;
786 				flags |= SCAN_UNSIGNED;
787 				fn = (long (*)())strtoul;
788 				break;
789 
790 			case 'f':
791 			case 'e':
792 			case 'E':
793 			case 'g':
794 				op = 'f';
795 				break;
796 
797 			case 's':
798 				op = 's';
799 				break;
800 
801 			case 'c':
802 				op = 's';
803 				flags |= SCAN_NOSKIP;
804 				/*-cc-*/
805 				if (0 == width) {
806 					width = 1;
807 				}
808 				/*-cc-*/
809 				break;
810 			case '[':
811 				op = '[';
812 				flags |= SCAN_NOSKIP;
813 				break;
814 		}   /* switch */
815 
816 		/*
817 		 * At this point, we will need additional characters from the
818 		 * string to proceed.
819 		 */
820 		if (*string == '\0') {
821 			underflow = 1;
822 			goto done;
823 		}
824 
825 		/*
826 		 * Skip any leading whitespace at the beginning of a field unless
827 		 * the format suppresses this behavior.
828 		 */
829 		if (!(flags & SCAN_NOSKIP)) {
830 			while (*string != '\0') {
831 				sch = *string;
832 				if (! isspace((int)sch) ) {
833 					break;
834 				}
835 				string++;
836 			}
837 			if (*string == '\0') {
838 				underflow = 1;
839 				goto done;
840 			}
841 		}
842 
843 		/*
844 		 * Perform the requested scanning operation.
845 		 */
846 		switch (op) {
847 			case 'c':
848 			case 's':
849 				/*
850 				 * Scan a string up to width characters or whitespace.
851 				 */
852 				if (width == 0) {
853 					width = (size_t) ~0;
854 				}
855 				end = string;
856 				while (*end != '\0') {
857 					sch = *end;
858 					if ( isspace( (int)sch ) ) {
859 						break;
860 					}
861 					end++;
862 					if (--width == 0) {
863 					   break;
864 					}
865 				}
866 				if (!(flags & SCAN_SUPPRESS)) {
867 					if (numVars && objIndex >= argCount) {
868 						break;
869 					} else if (numVars) {
870 						zend_uint refcount;
871 
872 						current = args[objIndex++];
873 						refcount = Z_REFCOUNT_PP(current);
874 						zval_dtor( *current );
875 						ZVAL_STRINGL( *current, string, end-string, 1);
876 						Z_SET_REFCOUNT_PP(current, refcount);
877 						Z_SET_ISREF_PP(current);
878 					} else {
879 						add_index_stringl( *return_value, objIndex++, string, end-string, 1);
880 					}
881 				}
882 				string = end;
883 				break;
884 
885 			case '[': {
886 				CharSet cset;
887 
888 				if (width == 0) {
889 					width = (size_t) ~0;
890 				}
891 				end = string;
892 
893 				format = BuildCharSet(&cset, format);
894 				while (*end != '\0') {
895 					sch = *end;
896 					if (!CharInSet(&cset, (int)sch)) {
897 						break;
898 					}
899 					end++;
900 					if (--width == 0) {
901 						break;
902 					}
903 				}
904 				ReleaseCharSet(&cset);
905 
906 				if (string == end) {
907 					/*
908 					 * Nothing matched the range, stop processing
909 					 */
910 					goto done;
911 				}
912 				if (!(flags & SCAN_SUPPRESS)) {
913 					if (numVars && objIndex >= argCount) {
914 						break;
915 					} else if (numVars) {
916 						current = args[objIndex++];
917 						zval_dtor( *current );
918 						ZVAL_STRINGL( *current, string, end-string, 1);
919 					} else {
920 						add_index_stringl(*return_value, objIndex++, string, end-string, 1);
921 					}
922 				}
923 				string = end;
924 				break;
925 			}
926 /*
927 			case 'c':
928 			   / Scan a single character./
929 
930 				sch = *string;
931 				string++;
932 				if (!(flags & SCAN_SUPPRESS)) {
933 					if (numVars) {
934 						char __buf[2];
935 						__buf[0] = sch;
936 						__buf[1] = '\0';;
937 						current = args[objIndex++];
938 						zval_dtor(*current);
939 						ZVAL_STRINGL( *current, __buf, 1, 1);
940 					} else {
941 						add_index_stringl(*return_value, objIndex++, &sch, 1, 1);
942 					}
943 				}
944 				break;
945 */
946 			case 'i':
947 				/*
948 				 * Scan an unsigned or signed integer.
949 				 */
950 				/*-cc-*/
951 				buf[0] = '\0';
952 				/*-cc-*/
953 				if ((width == 0) || (width > sizeof(buf) - 1)) {
954 					width = sizeof(buf) - 1;
955 				}
956 
957 				flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO;
958 				for (end = buf; width > 0; width--) {
959 					switch (*string) {
960 						/*
961 						 * The 0 digit has special meaning at the beginning of
962 						 * a number.  If we are unsure of the base, it
963 						 * indicates that we are in base 8 or base 16 (if it is
964 						 * followed by an 'x').
965 						 */
966 						case '0':
967 							/*-cc-*/
968 							if (base == 16) {
969 								flags |= SCAN_XOK;
970 							}
971 							/*-cc-*/
972 							if (base == 0) {
973 								base = 8;
974 								flags |= SCAN_XOK;
975 							}
976 							if (flags & SCAN_NOZERO) {
977 								flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO);
978 							} else {
979 								flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
980 							}
981 							goto addToInt;
982 
983 						case '1': case '2': case '3': case '4':
984 						case '5': case '6': case '7':
985 							if (base == 0) {
986 								base = 10;
987 							}
988 							flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
989 							goto addToInt;
990 
991 						case '8': case '9':
992 							if (base == 0) {
993 								base = 10;
994 							}
995 							if (base <= 8) {
996 							   break;
997 							}
998 							flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
999 							goto addToInt;
1000 
1001 						case 'A': case 'B': case 'C':
1002 						case 'D': case 'E': case 'F':
1003 						case 'a': case 'b': case 'c':
1004 						case 'd': case 'e': case 'f':
1005 							if (base <= 10) {
1006 								break;
1007 							}
1008 							flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
1009 							goto addToInt;
1010 
1011 						case '+': case '-':
1012 							if (flags & SCAN_SIGNOK) {
1013 								flags &= ~SCAN_SIGNOK;
1014 								goto addToInt;
1015 							}
1016 							break;
1017 
1018 						case 'x': case 'X':
1019 							if ((flags & SCAN_XOK) && (end == buf+1)) {
1020 								base = 16;
1021 								flags &= ~SCAN_XOK;
1022 								goto addToInt;
1023 							}
1024 							break;
1025 					}
1026 
1027 					/*
1028 					 * We got an illegal character so we are done accumulating.
1029 					 */
1030 					break;
1031 
1032 addToInt:
1033 					/*
1034 					 * Add the character to the temporary buffer.
1035 					 */
1036 					*end++ = *string++;
1037 					if (*string == '\0') {
1038 						break;
1039 					}
1040 				}
1041 
1042 				/*
1043 				 * Check to see if we need to back up because we only got a
1044 				 * sign or a trailing x after a 0.
1045 				 */
1046 				if (flags & SCAN_NODIGITS) {
1047 					if (*string == '\0') {
1048 						underflow = 1;
1049 					}
1050 					goto done;
1051 				} else if (end[-1] == 'x' || end[-1] == 'X') {
1052 					end--;
1053 					string--;
1054 				}
1055 
1056 				/*
1057 				 * Scan the value from the temporary buffer.  If we are
1058 				 * returning a large unsigned value, we have to convert it back
1059 				 * to a string since PHP only supports signed values.
1060 				 */
1061 				if (!(flags & SCAN_SUPPRESS)) {
1062 					*end = '\0';
1063 					value = (long) (*fn)(buf, NULL, base);
1064 					if ((flags & SCAN_UNSIGNED) && (value < 0)) {
1065 						snprintf(buf, sizeof(buf), "%lu", value); /* INTL: ISO digit */
1066 						if (numVars && objIndex >= argCount) {
1067 							break;
1068 						} else if (numVars) {
1069 						  /* change passed value type to string */
1070 							current = args[objIndex++];
1071 							zval_dtor(*current);
1072 							ZVAL_STRING( *current, buf, 1 );
1073 						} else {
1074 							add_index_string(*return_value, objIndex++, buf, 1);
1075 						}
1076 					} else {
1077 						if (numVars && objIndex >= argCount) {
1078 							break;
1079 						} else if (numVars) {
1080 							current = args[objIndex++];
1081 							zval_dtor(*current);
1082 							ZVAL_LONG(*current, value);
1083 						} else {
1084 							add_index_long(*return_value, objIndex++, value);
1085 						}
1086 					}
1087 				}
1088 				break;
1089 
1090 			case 'f':
1091 				/*
1092 				 * Scan a floating point number
1093 				 */
1094 				buf[0] = '\0';     /* call me pedantic */
1095 				if ((width == 0) || (width > sizeof(buf) - 1)) {
1096 					width = sizeof(buf) - 1;
1097 				}
1098 				flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_PTOK | SCAN_EXPOK;
1099 				for (end = buf; width > 0; width--) {
1100 					switch (*string) {
1101 						case '0': case '1': case '2': case '3':
1102 						case '4': case '5': case '6': case '7':
1103 						case '8': case '9':
1104 							flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS);
1105 							goto addToFloat;
1106 						case '+':
1107 						case '-':
1108 							if (flags & SCAN_SIGNOK) {
1109 								flags &= ~SCAN_SIGNOK;
1110 								goto addToFloat;
1111 							}
1112 							break;
1113 						case '.':
1114 							if (flags & SCAN_PTOK) {
1115 								flags &= ~(SCAN_SIGNOK | SCAN_PTOK);
1116 								goto addToFloat;
1117 							}
1118 							break;
1119 						case 'e':
1120 						case 'E':
1121 							/*
1122 							 * An exponent is not allowed until there has
1123 							 * been at least one digit.
1124 							 */
1125 							if ((flags & (SCAN_NODIGITS | SCAN_EXPOK)) == SCAN_EXPOK) {
1126 								flags = (flags & ~(SCAN_EXPOK|SCAN_PTOK))
1127 									| SCAN_SIGNOK | SCAN_NODIGITS;
1128 								goto addToFloat;
1129 							}
1130 							break;
1131 					}
1132 
1133 					/*
1134 					 * We got an illegal character so we are done accumulating.
1135 					 */
1136 					break;
1137 
1138 addToFloat:
1139 					/*
1140 					 * Add the character to the temporary buffer.
1141 					 */
1142 					*end++ = *string++;
1143 					if (*string == '\0') {
1144 						break;
1145 					}
1146 				}
1147 
1148 				/*
1149 				 * Check to see if we need to back up because we saw a
1150 				 * trailing 'e' or sign.
1151 				 */
1152 				if (flags & SCAN_NODIGITS) {
1153 					if (flags & SCAN_EXPOK) {
1154 						/*
1155 						 * There were no digits at all so scanning has
1156 						 * failed and we are done.
1157 						 */
1158 						if (*string == '\0') {
1159 							underflow = 1;
1160 						}
1161 						goto done;
1162 					}
1163 
1164 					/*
1165 					 * We got a bad exponent ('e' and maybe a sign).
1166 					 */
1167 					end--;
1168 					string--;
1169 					if (*end != 'e' && *end != 'E') {
1170 						end--;
1171 						string--;
1172 					}
1173 				}
1174 
1175 				/*
1176 				 * Scan the value from the temporary buffer.
1177 				 */
1178 				if (!(flags & SCAN_SUPPRESS)) {
1179 					double dvalue;
1180 					*end = '\0';
1181 					dvalue = zend_strtod(buf, NULL);
1182 					if (numVars && objIndex >= argCount) {
1183 						break;
1184 					} else if (numVars) {
1185 						current = args[objIndex++];
1186 						zval_dtor(*current);
1187 						ZVAL_DOUBLE(*current, dvalue);
1188 					} else {
1189 						add_index_double( *return_value, objIndex++, dvalue );
1190 					}
1191 				}
1192 				break;
1193 		} /* switch (op) */
1194 		nconversions++;
1195 	} /*  while (*format != '\0') */
1196 
1197 done:
1198 	result = SCAN_SUCCESS;
1199 
1200 	if (underflow && (0==nconversions)) {
1201 		scan_set_error_return( numVars, return_value );
1202 		result = SCAN_ERROR_EOF;
1203 	} else if (numVars) {
1204 		convert_to_long( *return_value );
1205 		Z_LVAL_PP(return_value) = nconversions;
1206 	} else if (nconversions < totalVars) {
1207 		/* TODO: not all elements converted. we need to prune the list - cc */
1208 	}
1209 	return result;
1210 }
1211 /* }}} */
1212 
1213 /* the compiler choked when i tried to make this a macro    */
scan_set_error_return(int numVars,zval ** return_value)1214 static inline void scan_set_error_return(int numVars, zval **return_value) /* {{{ */
1215 {
1216 	if (numVars) {
1217 		Z_TYPE_PP(return_value) = IS_LONG;
1218 		Z_LVAL_PP(return_value) = SCAN_ERROR_EOF;  /* EOF marker */
1219 	} else {
1220 		/* convert_to_null calls destructor */
1221 		convert_to_null( *return_value );
1222 	}
1223 }
1224 /* }}} */
1225 
1226 /*
1227  * Local variables:
1228  * tab-width: 4
1229  * c-basic-offset: 4
1230  * End:
1231  * vim600: sw=4 ts=4 fdm=marker
1232  * vim<600: sw=4 ts=4
1233  */
1234