1 /*
2 +----------------------------------------------------------------------+
3 | PHP Version 5 |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 1997-2013 The PHP Group |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
15 | Author: Clayton Collie <clcollie@mindspring.com> |
16 +----------------------------------------------------------------------+
17 */
18
19 /* $Id$ */
20
21 /*
22 scanf.c --
23
24 This file contains the base code which implements sscanf and by extension
25 fscanf. Original code is from TCL8.3.0 and bears the following copyright:
26
27 This software is copyrighted by the Regents of the University of
28 California, Sun Microsystems, Inc., Scriptics Corporation,
29 and other parties. The following terms apply to all files associated
30 with the software unless explicitly disclaimed in individual files.
31
32 The authors hereby grant permission to use, copy, modify, distribute,
33 and license this software and its documentation for any purpose, provided
34 that existing copyright notices are retained in all copies and that this
35 notice is included verbatim in any distributions. No written agreement,
36 license, or royalty fee is required for any of the authorized uses.
37 Modifications to this software may be copyrighted by their authors
38 and need not follow the licensing terms described here, provided that
39 the new terms are clearly indicated on the first page of each file where
40 they apply.
41
42 IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY
43 FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
44 ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY
45 DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE
46 POSSIBILITY OF SUCH DAMAGE.
47
48 THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
49 INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY,
50 FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. THIS SOFTWARE
51 IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE
52 NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
53 MODIFICATIONS.
54
55 GOVERNMENT USE: If you are acquiring this software on behalf of the
56 U.S. government, the Government shall have only "Restricted Rights"
57 in the software and related documentation as defined in the Federal
58 Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2). If you
59 are acquiring the software on behalf of the Department of Defense, the
60 software shall be classified as "Commercial Computer Software" and the
61 Government shall have only "Restricted Rights" as defined in Clause
62 252.227-7013 (c) (1) of DFARs. Notwithstanding the foregoing, the
63 authors grant the U.S. Government and others acting in its behalf
64 permission to use and distribute the software in accordance with the
65 terms specified in this license.
66 */
67
68 #include <stdio.h>
69 #include <limits.h>
70 #include <ctype.h>
71 #include "php.h"
72 #include "php_variables.h"
73 #ifdef HAVE_LOCALE_H
74 #include <locale.h>
75 #endif
76 #include "zend_execute.h"
77 #include "zend_operators.h"
78 #include "zend_strtod.h"
79 #include "php_globals.h"
80 #include "basic_functions.h"
81 #include "scanf.h"
82
83 /*
84 * Flag values used internally by [f|s]canf.
85 */
86 #define SCAN_NOSKIP 0x1 /* Don't skip blanks. */
87 #define SCAN_SUPPRESS 0x2 /* Suppress assignment. */
88 #define SCAN_UNSIGNED 0x4 /* Read an unsigned value. */
89 #define SCAN_WIDTH 0x8 /* A width value was supplied. */
90
91 #define SCAN_SIGNOK 0x10 /* A +/- character is allowed. */
92 #define SCAN_NODIGITS 0x20 /* No digits have been scanned. */
93 #define SCAN_NOZERO 0x40 /* No zero digits have been scanned. */
94 #define SCAN_XOK 0x80 /* An 'x' is allowed. */
95 #define SCAN_PTOK 0x100 /* Decimal point is allowed. */
96 #define SCAN_EXPOK 0x200 /* An exponent is allowed. */
97
98 #define UCHAR(x) (zend_uchar)(x)
99
100 /*
101 * The following structure contains the information associated with
102 * a character set.
103 */
104 typedef struct CharSet {
105 int exclude; /* 1 if this is an exclusion set. */
106 int nchars;
107 char *chars;
108 int nranges;
109 struct Range {
110 char start;
111 char end;
112 } *ranges;
113 } CharSet;
114
115 /*
116 * Declarations for functions used only in this file.
117 */
118 static char *BuildCharSet(CharSet *cset, char *format);
119 static int CharInSet(CharSet *cset, int ch);
120 static void ReleaseCharSet(CharSet *cset);
121 static inline void scan_set_error_return(int numVars, zval **return_value);
122
123
124 /* {{{ BuildCharSet
125 *----------------------------------------------------------------------
126 *
127 * BuildCharSet --
128 *
129 * This function examines a character set format specification
130 * and builds a CharSet containing the individual characters and
131 * character ranges specified.
132 *
133 * Results:
134 * Returns the next format position.
135 *
136 * Side effects:
137 * Initializes the charset.
138 *
139 *----------------------------------------------------------------------
140 */
BuildCharSet(CharSet * cset,char * format)141 static char * BuildCharSet(CharSet *cset, char *format)
142 {
143 char *ch, start;
144 int nranges;
145 char *end;
146
147 memset(cset, 0, sizeof(CharSet));
148
149 ch = format;
150 if (*ch == '^') {
151 cset->exclude = 1;
152 ch = ++format;
153 }
154 end = format + 1; /* verify this - cc */
155
156 /*
157 * Find the close bracket so we can overallocate the set.
158 */
159 if (*ch == ']') {
160 ch = end++;
161 }
162 nranges = 0;
163 while (*ch != ']') {
164 if (*ch == '-') {
165 nranges++;
166 }
167 ch = end++;
168 }
169
170 cset->chars = (char *) safe_emalloc(sizeof(char), (end - format - 1), 0);
171 if (nranges > 0) {
172 cset->ranges = (struct Range *) safe_emalloc(sizeof(struct Range), nranges, 0);
173 } else {
174 cset->ranges = NULL;
175 }
176
177 /*
178 * Now build the character set.
179 */
180 cset->nchars = cset->nranges = 0;
181 ch = format++;
182 start = *ch;
183 if (*ch == ']' || *ch == '-') {
184 cset->chars[cset->nchars++] = *ch;
185 ch = format++;
186 }
187 while (*ch != ']') {
188 if (*format == '-') {
189 /*
190 * This may be the first character of a range, so don't add
191 * it yet.
192 */
193 start = *ch;
194 } else if (*ch == '-') {
195 /*
196 * Check to see if this is the last character in the set, in which
197 * case it is not a range and we should add the previous character
198 * as well as the dash.
199 */
200 if (*format == ']') {
201 cset->chars[cset->nchars++] = start;
202 cset->chars[cset->nchars++] = *ch;
203 } else {
204 ch = format++;
205
206 /*
207 * Check to see if the range is in reverse order.
208 */
209 if (start < *ch) {
210 cset->ranges[cset->nranges].start = start;
211 cset->ranges[cset->nranges].end = *ch;
212 } else {
213 cset->ranges[cset->nranges].start = *ch;
214 cset->ranges[cset->nranges].end = start;
215 }
216 cset->nranges++;
217 }
218 } else {
219 cset->chars[cset->nchars++] = *ch;
220 }
221 ch = format++;
222 }
223 return format;
224 }
225 /* }}} */
226
227 /* {{{ CharInSet
228 *----------------------------------------------------------------------
229 *
230 * CharInSet --
231 *
232 * Check to see if a character matches the given set.
233 *
234 * Results:
235 * Returns non-zero if the character matches the given set.
236 *
237 * Side effects:
238 * None.
239 *
240 *----------------------------------------------------------------------
241 */
CharInSet(CharSet * cset,int c)242 static int CharInSet(CharSet *cset, int c)
243 {
244 char ch = (char) c;
245 int i, match = 0;
246
247 for (i = 0; i < cset->nchars; i++) {
248 if (cset->chars[i] == ch) {
249 match = 1;
250 break;
251 }
252 }
253 if (!match) {
254 for (i = 0; i < cset->nranges; i++) {
255 if ((cset->ranges[i].start <= ch)
256 && (ch <= cset->ranges[i].end)) {
257 match = 1;
258 break;
259 }
260 }
261 }
262 return (cset->exclude ? !match : match);
263 }
264 /* }}} */
265
266 /* {{{ ReleaseCharSet
267 *----------------------------------------------------------------------
268 *
269 * ReleaseCharSet --
270 *
271 * Free the storage associated with a character set.
272 *
273 * Results:
274 * None.
275 *
276 * Side effects:
277 * None.
278 *
279 *----------------------------------------------------------------------
280 */
ReleaseCharSet(CharSet * cset)281 static void ReleaseCharSet(CharSet *cset)
282 {
283 efree((char *)cset->chars);
284 if (cset->ranges) {
285 efree((char *)cset->ranges);
286 }
287 }
288 /* }}} */
289
290 /* {{{ ValidateFormat
291 *----------------------------------------------------------------------
292 *
293 * ValidateFormat --
294 *
295 * Parse the format string and verify that it is properly formed
296 * and that there are exactly enough variables on the command line.
297 *
298 * Results:
299 * FAILURE or SUCCESS.
300 *
301 * Side effects:
302 * May set php_error based on abnormal conditions.
303 *
304 * Parameters :
305 * format The format string.
306 * numVars The number of variables passed to the scan command.
307 * totalSubs The number of variables that will be required.
308 *
309 *----------------------------------------------------------------------
310 */
ValidateFormat(char * format,int numVars,int * totalSubs)311 PHPAPI int ValidateFormat(char *format, int numVars, int *totalSubs)
312 {
313 #define STATIC_LIST_SIZE 16
314 int gotXpg, gotSequential, value, i, flags;
315 char *end, *ch = NULL;
316 int staticAssign[STATIC_LIST_SIZE];
317 int *nassign = staticAssign;
318 int objIndex, xpgSize, nspace = STATIC_LIST_SIZE;
319 TSRMLS_FETCH();
320
321 /*
322 * Initialize an array that records the number of times a variable
323 * is assigned to by the format string. We use this to detect if
324 * a variable is multiply assigned or left unassigned.
325 */
326 if (numVars > nspace) {
327 nassign = (int*)safe_emalloc(sizeof(int), numVars, 0);
328 nspace = numVars;
329 }
330 for (i = 0; i < nspace; i++) {
331 nassign[i] = 0;
332 }
333
334 xpgSize = objIndex = gotXpg = gotSequential = 0;
335
336 while (*format != '\0') {
337 ch = format++;
338 flags = 0;
339
340 if (*ch != '%') {
341 continue;
342 }
343 ch = format++;
344 if (*ch == '%') {
345 continue;
346 }
347 if (*ch == '*') {
348 flags |= SCAN_SUPPRESS;
349 ch = format++;
350 goto xpgCheckDone;
351 }
352
353 if ( isdigit( (int)*ch ) ) {
354 /*
355 * Check for an XPG3-style %n$ specification. Note: there
356 * must not be a mixture of XPG3 specs and non-XPG3 specs
357 * in the same format string.
358 */
359 value = strtoul(format-1, &end, 10);
360 if (*end != '$') {
361 goto notXpg;
362 }
363 format = end+1;
364 ch = format++;
365 gotXpg = 1;
366 if (gotSequential) {
367 goto mixedXPG;
368 }
369 objIndex = value - 1;
370 if ((objIndex < 0) || (numVars && (objIndex >= numVars))) {
371 goto badIndex;
372 } else if (numVars == 0) {
373 /*
374 * In the case where no vars are specified, the user can
375 * specify %9999$ legally, so we have to consider special
376 * rules for growing the assign array. 'value' is
377 * guaranteed to be > 0.
378 */
379
380 /* set a lower artificial limit on this
381 * in the interest of security and resource friendliness
382 * 255 arguments should be more than enough. - cc
383 */
384 if (value > SCAN_MAX_ARGS) {
385 goto badIndex;
386 }
387
388 xpgSize = (xpgSize > value) ? xpgSize : value;
389 }
390 goto xpgCheckDone;
391 }
392
393 notXpg:
394 gotSequential = 1;
395 if (gotXpg) {
396 mixedXPG:
397 php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", "cannot mix \"%\" and \"%n$\" conversion specifiers");
398 goto error;
399 }
400
401 xpgCheckDone:
402 /*
403 * Parse any width specifier.
404 */
405 if (isdigit(UCHAR(*ch))) {
406 value = strtoul(format-1, &format, 10);
407 flags |= SCAN_WIDTH;
408 ch = format++;
409 }
410
411 /*
412 * Ignore size specifier.
413 */
414 if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
415 ch = format++;
416 }
417
418 if (!(flags & SCAN_SUPPRESS) && numVars && (objIndex >= numVars)) {
419 goto badIndex;
420 }
421
422 /*
423 * Handle the various field types.
424 */
425 switch (*ch) {
426 case 'n':
427 case 'd':
428 case 'D':
429 case 'i':
430 case 'o':
431 case 'x':
432 case 'X':
433 case 'u':
434 case 'f':
435 case 'e':
436 case 'E':
437 case 'g':
438 case 's':
439 break;
440
441 case 'c':
442 /* we differ here with the TCL implementation in allowing for */
443 /* a character width specification, to be more consistent with */
444 /* ANSI. since Zend auto allocates space for vars, this is no */
445 /* problem - cc */
446 /*
447 if (flags & SCAN_WIDTH) {
448 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Field width may not be specified in %c conversion");
449 goto error;
450 }
451 */
452 break;
453
454 case '[':
455 if (*format == '\0') {
456 goto badSet;
457 }
458 ch = format++;
459 if (*ch == '^') {
460 if (*format == '\0') {
461 goto badSet;
462 }
463 ch = format++;
464 }
465 if (*ch == ']') {
466 if (*format == '\0') {
467 goto badSet;
468 }
469 ch = format++;
470 }
471 while (*ch != ']') {
472 if (*format == '\0') {
473 goto badSet;
474 }
475 ch = format++;
476 }
477 break;
478 badSet:
479 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unmatched [ in format string");
480 goto error;
481
482 default: {
483 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Bad scan conversion character \"%c\"", *ch);
484 goto error;
485 }
486 }
487
488 if (!(flags & SCAN_SUPPRESS)) {
489 if (objIndex >= nspace) {
490 /*
491 * Expand the nassign buffer. If we are using XPG specifiers,
492 * make sure that we grow to a large enough size. xpgSize is
493 * guaranteed to be at least one larger than objIndex.
494 */
495 value = nspace;
496 if (xpgSize) {
497 nspace = xpgSize;
498 } else {
499 nspace += STATIC_LIST_SIZE;
500 }
501 if (nassign == staticAssign) {
502 nassign = (void *)safe_emalloc(nspace, sizeof(int), 0);
503 for (i = 0; i < STATIC_LIST_SIZE; ++i) {
504 nassign[i] = staticAssign[i];
505 }
506 } else {
507 nassign = (void *)erealloc((void *)nassign, nspace * sizeof(int));
508 }
509 for (i = value; i < nspace; i++) {
510 nassign[i] = 0;
511 }
512 }
513 nassign[objIndex]++;
514 objIndex++;
515 }
516 } /* while (*format != '\0') */
517
518 /*
519 * Verify that all of the variable were assigned exactly once.
520 */
521 if (numVars == 0) {
522 if (xpgSize) {
523 numVars = xpgSize;
524 } else {
525 numVars = objIndex;
526 }
527 }
528 if (totalSubs) {
529 *totalSubs = numVars;
530 }
531 for (i = 0; i < numVars; i++) {
532 if (nassign[i] > 1) {
533 php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", "Variable is assigned by multiple \"%n$\" conversion specifiers");
534 goto error;
535 } else if (!xpgSize && (nassign[i] == 0)) {
536 /*
537 * If the space is empty, and xpgSize is 0 (means XPG wasn't
538 * used, and/or numVars != 0), then too many vars were given
539 */
540 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Variable is not assigned by any conversion specifiers");
541 goto error;
542 }
543 }
544
545 if (nassign != staticAssign) {
546 efree((char *)nassign);
547 }
548 return SCAN_SUCCESS;
549
550 badIndex:
551 if (gotXpg) {
552 php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", "\"%n$\" argument index out of range");
553 } else {
554 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Different numbers of variable names and field specifiers");
555 }
556
557 error:
558 if (nassign != staticAssign) {
559 efree((char *)nassign);
560 }
561 return SCAN_ERROR_INVALID_FORMAT;
562 #undef STATIC_LIST_SIZE
563 }
564 /* }}} */
565
566 /* {{{ php_sscanf_internal
567 * This is the internal function which does processing on behalf of
568 * both sscanf() and fscanf()
569 *
570 * parameters :
571 * string literal string to be processed
572 * format format string
573 * argCount total number of elements in the args array
574 * args arguments passed in from user function (f|s)scanf
575 * varStart offset (in args) of 1st variable passed in to (f|s)scanf
576 * return_value set with the results of the scan
577 */
578
php_sscanf_internal(char * string,char * format,int argCount,zval *** args,int varStart,zval ** return_value TSRMLS_DC)579 PHPAPI int php_sscanf_internal( char *string, char *format,
580 int argCount, zval ***args,
581 int varStart, zval **return_value TSRMLS_DC)
582 {
583 int numVars, nconversions, totalVars = -1;
584 int i, result;
585 long value;
586 int objIndex;
587 char *end, *baseString;
588 zval **current;
589 char op = 0;
590 int base = 0;
591 int underflow = 0;
592 size_t width;
593 long (*fn)() = NULL;
594 char *ch, sch;
595 int flags;
596 char buf[64]; /* Temporary buffer to hold scanned number
597 * strings before they are passed to strtoul() */
598
599 /* do some sanity checking */
600 if ((varStart > argCount) || (varStart < 0)){
601 varStart = SCAN_MAX_ARGS + 1;
602 }
603 numVars = argCount - varStart;
604 if (numVars < 0) {
605 numVars = 0;
606 }
607
608 #if 0
609 zend_printf("<br>in sscanf_internal : <br> string is \"%s\", format = \"%s\"<br> NumVars = %d. VarStart = %d<br>-------------------------<br>",
610 string, format, numVars, varStart);
611 #endif
612 /*
613 * Check for errors in the format string.
614 */
615 if (ValidateFormat(format, numVars, &totalVars) != SCAN_SUCCESS) {
616 scan_set_error_return( numVars, return_value );
617 return SCAN_ERROR_INVALID_FORMAT;
618 }
619
620 objIndex = numVars ? varStart : 0;
621
622 /*
623 * If any variables are passed, make sure they are all passed by reference
624 */
625 if (numVars) {
626 for (i = varStart;i < argCount;i++){
627 if ( ! PZVAL_IS_REF( *args[ i ] ) ) {
628 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Parameter %d must be passed by reference", i);
629 scan_set_error_return(numVars, return_value);
630 return SCAN_ERROR_VAR_PASSED_BYVAL;
631 }
632 }
633 }
634
635 /*
636 * Allocate space for the result objects. Only happens when no variables
637 * are specified
638 */
639 if (!numVars) {
640 zval *tmp;
641
642 /* allocate an array for return */
643 array_init(*return_value);
644
645 for (i = 0; i < totalVars; i++) {
646 MAKE_STD_ZVAL(tmp);
647 ZVAL_NULL(tmp);
648 if (add_next_index_zval(*return_value, tmp) == FAILURE) {
649 scan_set_error_return(0, return_value);
650 return FAILURE;
651 }
652 }
653 varStart = 0; /* Array index starts from 0 */
654 }
655
656 baseString = string;
657
658 /*
659 * Iterate over the format string filling in the result objects until
660 * we reach the end of input, the end of the format string, or there
661 * is a mismatch.
662 */
663 nconversions = 0;
664 /* note ! - we need to limit the loop for objIndex to keep it in bounds */
665
666 while (*format != '\0') {
667 ch = format++;
668 flags = 0;
669
670 /*
671 * If we see whitespace in the format, skip whitespace in the string.
672 */
673 if ( isspace( (int)*ch ) ) {
674 sch = *string;
675 while ( isspace( (int)sch ) ) {
676 if (*string == '\0') {
677 goto done;
678 }
679 string++;
680 sch = *string;
681 }
682 continue;
683 }
684
685 if (*ch != '%') {
686 literal:
687 if (*string == '\0') {
688 underflow = 1;
689 goto done;
690 }
691 sch = *string;
692 string++;
693 if (*ch != sch) {
694 goto done;
695 }
696 continue;
697 }
698
699 ch = format++;
700 if (*ch == '%') {
701 goto literal;
702 }
703
704 /*
705 * Check for assignment suppression ('*') or an XPG3-style
706 * assignment ('%n$').
707 */
708 if (*ch == '*') {
709 flags |= SCAN_SUPPRESS;
710 ch = format++;
711 } else if ( isdigit(UCHAR(*ch))) {
712 value = strtoul(format-1, &end, 10);
713 if (*end == '$') {
714 format = end+1;
715 ch = format++;
716 objIndex = varStart + value - 1;
717 }
718 }
719
720 /*
721 * Parse any width specifier.
722 */
723 if ( isdigit(UCHAR(*ch))) {
724 width = strtoul(format-1, &format, 10);
725 ch = format++;
726 } else {
727 width = 0;
728 }
729
730 /*
731 * Ignore size specifier.
732 */
733 if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
734 ch = format++;
735 }
736
737 /*
738 * Handle the various field types.
739 */
740 switch (*ch) {
741 case 'n':
742 if (!(flags & SCAN_SUPPRESS)) {
743 if (numVars && objIndex >= argCount) {
744 break;
745 } else if (numVars) {
746 zend_uint refcount;
747
748 current = args[objIndex++];
749 refcount = Z_REFCOUNT_PP(current);
750 zval_dtor( *current );
751 ZVAL_LONG( *current, (long)(string - baseString) );
752 Z_SET_REFCOUNT_PP(current, refcount);
753 Z_SET_ISREF_PP(current);
754 } else {
755 add_index_long(*return_value, objIndex++, string - baseString);
756 }
757 }
758 nconversions++;
759 continue;
760
761 case 'd':
762 case 'D':
763 op = 'i';
764 base = 10;
765 fn = (long (*)())strtol;
766 break;
767 case 'i':
768 op = 'i';
769 base = 0;
770 fn = (long (*)())strtol;
771 break;
772 case 'o':
773 op = 'i';
774 base = 8;
775 fn = (long (*)())strtol;
776 break;
777 case 'x':
778 case 'X':
779 op = 'i';
780 base = 16;
781 fn = (long (*)())strtol;
782 break;
783 case 'u':
784 op = 'i';
785 base = 10;
786 flags |= SCAN_UNSIGNED;
787 fn = (long (*)())strtoul;
788 break;
789
790 case 'f':
791 case 'e':
792 case 'E':
793 case 'g':
794 op = 'f';
795 break;
796
797 case 's':
798 op = 's';
799 break;
800
801 case 'c':
802 op = 's';
803 flags |= SCAN_NOSKIP;
804 /*-cc-*/
805 if (0 == width) {
806 width = 1;
807 }
808 /*-cc-*/
809 break;
810 case '[':
811 op = '[';
812 flags |= SCAN_NOSKIP;
813 break;
814 } /* switch */
815
816 /*
817 * At this point, we will need additional characters from the
818 * string to proceed.
819 */
820 if (*string == '\0') {
821 underflow = 1;
822 goto done;
823 }
824
825 /*
826 * Skip any leading whitespace at the beginning of a field unless
827 * the format suppresses this behavior.
828 */
829 if (!(flags & SCAN_NOSKIP)) {
830 while (*string != '\0') {
831 sch = *string;
832 if (! isspace((int)sch) ) {
833 break;
834 }
835 string++;
836 }
837 if (*string == '\0') {
838 underflow = 1;
839 goto done;
840 }
841 }
842
843 /*
844 * Perform the requested scanning operation.
845 */
846 switch (op) {
847 case 'c':
848 case 's':
849 /*
850 * Scan a string up to width characters or whitespace.
851 */
852 if (width == 0) {
853 width = (size_t) ~0;
854 }
855 end = string;
856 while (*end != '\0') {
857 sch = *end;
858 if ( isspace( (int)sch ) ) {
859 break;
860 }
861 end++;
862 if (--width == 0) {
863 break;
864 }
865 }
866 if (!(flags & SCAN_SUPPRESS)) {
867 if (numVars && objIndex >= argCount) {
868 break;
869 } else if (numVars) {
870 zend_uint refcount;
871
872 current = args[objIndex++];
873 refcount = Z_REFCOUNT_PP(current);
874 zval_dtor( *current );
875 ZVAL_STRINGL( *current, string, end-string, 1);
876 Z_SET_REFCOUNT_PP(current, refcount);
877 Z_SET_ISREF_PP(current);
878 } else {
879 add_index_stringl( *return_value, objIndex++, string, end-string, 1);
880 }
881 }
882 string = end;
883 break;
884
885 case '[': {
886 CharSet cset;
887
888 if (width == 0) {
889 width = (size_t) ~0;
890 }
891 end = string;
892
893 format = BuildCharSet(&cset, format);
894 while (*end != '\0') {
895 sch = *end;
896 if (!CharInSet(&cset, (int)sch)) {
897 break;
898 }
899 end++;
900 if (--width == 0) {
901 break;
902 }
903 }
904 ReleaseCharSet(&cset);
905
906 if (string == end) {
907 /*
908 * Nothing matched the range, stop processing
909 */
910 goto done;
911 }
912 if (!(flags & SCAN_SUPPRESS)) {
913 if (numVars && objIndex >= argCount) {
914 break;
915 } else if (numVars) {
916 current = args[objIndex++];
917 zval_dtor( *current );
918 ZVAL_STRINGL( *current, string, end-string, 1);
919 } else {
920 add_index_stringl(*return_value, objIndex++, string, end-string, 1);
921 }
922 }
923 string = end;
924 break;
925 }
926 /*
927 case 'c':
928 / Scan a single character./
929
930 sch = *string;
931 string++;
932 if (!(flags & SCAN_SUPPRESS)) {
933 if (numVars) {
934 char __buf[2];
935 __buf[0] = sch;
936 __buf[1] = '\0';;
937 current = args[objIndex++];
938 zval_dtor(*current);
939 ZVAL_STRINGL( *current, __buf, 1, 1);
940 } else {
941 add_index_stringl(*return_value, objIndex++, &sch, 1, 1);
942 }
943 }
944 break;
945 */
946 case 'i':
947 /*
948 * Scan an unsigned or signed integer.
949 */
950 /*-cc-*/
951 buf[0] = '\0';
952 /*-cc-*/
953 if ((width == 0) || (width > sizeof(buf) - 1)) {
954 width = sizeof(buf) - 1;
955 }
956
957 flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO;
958 for (end = buf; width > 0; width--) {
959 switch (*string) {
960 /*
961 * The 0 digit has special meaning at the beginning of
962 * a number. If we are unsure of the base, it
963 * indicates that we are in base 8 or base 16 (if it is
964 * followed by an 'x').
965 */
966 case '0':
967 /*-cc-*/
968 if (base == 16) {
969 flags |= SCAN_XOK;
970 }
971 /*-cc-*/
972 if (base == 0) {
973 base = 8;
974 flags |= SCAN_XOK;
975 }
976 if (flags & SCAN_NOZERO) {
977 flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO);
978 } else {
979 flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
980 }
981 goto addToInt;
982
983 case '1': case '2': case '3': case '4':
984 case '5': case '6': case '7':
985 if (base == 0) {
986 base = 10;
987 }
988 flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
989 goto addToInt;
990
991 case '8': case '9':
992 if (base == 0) {
993 base = 10;
994 }
995 if (base <= 8) {
996 break;
997 }
998 flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
999 goto addToInt;
1000
1001 case 'A': case 'B': case 'C':
1002 case 'D': case 'E': case 'F':
1003 case 'a': case 'b': case 'c':
1004 case 'd': case 'e': case 'f':
1005 if (base <= 10) {
1006 break;
1007 }
1008 flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
1009 goto addToInt;
1010
1011 case '+': case '-':
1012 if (flags & SCAN_SIGNOK) {
1013 flags &= ~SCAN_SIGNOK;
1014 goto addToInt;
1015 }
1016 break;
1017
1018 case 'x': case 'X':
1019 if ((flags & SCAN_XOK) && (end == buf+1)) {
1020 base = 16;
1021 flags &= ~SCAN_XOK;
1022 goto addToInt;
1023 }
1024 break;
1025 }
1026
1027 /*
1028 * We got an illegal character so we are done accumulating.
1029 */
1030 break;
1031
1032 addToInt:
1033 /*
1034 * Add the character to the temporary buffer.
1035 */
1036 *end++ = *string++;
1037 if (*string == '\0') {
1038 break;
1039 }
1040 }
1041
1042 /*
1043 * Check to see if we need to back up because we only got a
1044 * sign or a trailing x after a 0.
1045 */
1046 if (flags & SCAN_NODIGITS) {
1047 if (*string == '\0') {
1048 underflow = 1;
1049 }
1050 goto done;
1051 } else if (end[-1] == 'x' || end[-1] == 'X') {
1052 end--;
1053 string--;
1054 }
1055
1056 /*
1057 * Scan the value from the temporary buffer. If we are
1058 * returning a large unsigned value, we have to convert it back
1059 * to a string since PHP only supports signed values.
1060 */
1061 if (!(flags & SCAN_SUPPRESS)) {
1062 *end = '\0';
1063 value = (long) (*fn)(buf, NULL, base);
1064 if ((flags & SCAN_UNSIGNED) && (value < 0)) {
1065 snprintf(buf, sizeof(buf), "%lu", value); /* INTL: ISO digit */
1066 if (numVars && objIndex >= argCount) {
1067 break;
1068 } else if (numVars) {
1069 /* change passed value type to string */
1070 current = args[objIndex++];
1071 zval_dtor(*current);
1072 ZVAL_STRING( *current, buf, 1 );
1073 } else {
1074 add_index_string(*return_value, objIndex++, buf, 1);
1075 }
1076 } else {
1077 if (numVars && objIndex >= argCount) {
1078 break;
1079 } else if (numVars) {
1080 current = args[objIndex++];
1081 zval_dtor(*current);
1082 ZVAL_LONG(*current, value);
1083 } else {
1084 add_index_long(*return_value, objIndex++, value);
1085 }
1086 }
1087 }
1088 break;
1089
1090 case 'f':
1091 /*
1092 * Scan a floating point number
1093 */
1094 buf[0] = '\0'; /* call me pedantic */
1095 if ((width == 0) || (width > sizeof(buf) - 1)) {
1096 width = sizeof(buf) - 1;
1097 }
1098 flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_PTOK | SCAN_EXPOK;
1099 for (end = buf; width > 0; width--) {
1100 switch (*string) {
1101 case '0': case '1': case '2': case '3':
1102 case '4': case '5': case '6': case '7':
1103 case '8': case '9':
1104 flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS);
1105 goto addToFloat;
1106 case '+':
1107 case '-':
1108 if (flags & SCAN_SIGNOK) {
1109 flags &= ~SCAN_SIGNOK;
1110 goto addToFloat;
1111 }
1112 break;
1113 case '.':
1114 if (flags & SCAN_PTOK) {
1115 flags &= ~(SCAN_SIGNOK | SCAN_PTOK);
1116 goto addToFloat;
1117 }
1118 break;
1119 case 'e':
1120 case 'E':
1121 /*
1122 * An exponent is not allowed until there has
1123 * been at least one digit.
1124 */
1125 if ((flags & (SCAN_NODIGITS | SCAN_EXPOK)) == SCAN_EXPOK) {
1126 flags = (flags & ~(SCAN_EXPOK|SCAN_PTOK))
1127 | SCAN_SIGNOK | SCAN_NODIGITS;
1128 goto addToFloat;
1129 }
1130 break;
1131 }
1132
1133 /*
1134 * We got an illegal character so we are done accumulating.
1135 */
1136 break;
1137
1138 addToFloat:
1139 /*
1140 * Add the character to the temporary buffer.
1141 */
1142 *end++ = *string++;
1143 if (*string == '\0') {
1144 break;
1145 }
1146 }
1147
1148 /*
1149 * Check to see if we need to back up because we saw a
1150 * trailing 'e' or sign.
1151 */
1152 if (flags & SCAN_NODIGITS) {
1153 if (flags & SCAN_EXPOK) {
1154 /*
1155 * There were no digits at all so scanning has
1156 * failed and we are done.
1157 */
1158 if (*string == '\0') {
1159 underflow = 1;
1160 }
1161 goto done;
1162 }
1163
1164 /*
1165 * We got a bad exponent ('e' and maybe a sign).
1166 */
1167 end--;
1168 string--;
1169 if (*end != 'e' && *end != 'E') {
1170 end--;
1171 string--;
1172 }
1173 }
1174
1175 /*
1176 * Scan the value from the temporary buffer.
1177 */
1178 if (!(flags & SCAN_SUPPRESS)) {
1179 double dvalue;
1180 *end = '\0';
1181 dvalue = zend_strtod(buf, NULL);
1182 if (numVars && objIndex >= argCount) {
1183 break;
1184 } else if (numVars) {
1185 current = args[objIndex++];
1186 zval_dtor(*current);
1187 ZVAL_DOUBLE(*current, dvalue);
1188 } else {
1189 add_index_double( *return_value, objIndex++, dvalue );
1190 }
1191 }
1192 break;
1193 } /* switch (op) */
1194 nconversions++;
1195 } /* while (*format != '\0') */
1196
1197 done:
1198 result = SCAN_SUCCESS;
1199
1200 if (underflow && (0==nconversions)) {
1201 scan_set_error_return( numVars, return_value );
1202 result = SCAN_ERROR_EOF;
1203 } else if (numVars) {
1204 convert_to_long( *return_value );
1205 Z_LVAL_PP(return_value) = nconversions;
1206 } else if (nconversions < totalVars) {
1207 /* TODO: not all elements converted. we need to prune the list - cc */
1208 }
1209 return result;
1210 }
1211 /* }}} */
1212
1213 /* the compiler choked when i tried to make this a macro */
scan_set_error_return(int numVars,zval ** return_value)1214 static inline void scan_set_error_return(int numVars, zval **return_value) /* {{{ */
1215 {
1216 if (numVars) {
1217 Z_TYPE_PP(return_value) = IS_LONG;
1218 Z_LVAL_PP(return_value) = SCAN_ERROR_EOF; /* EOF marker */
1219 } else {
1220 /* convert_to_null calls destructor */
1221 convert_to_null( *return_value );
1222 }
1223 }
1224 /* }}} */
1225
1226 /*
1227 * Local variables:
1228 * tab-width: 4
1229 * c-basic-offset: 4
1230 * End:
1231 * vim600: sw=4 ts=4 fdm=marker
1232 * vim<600: sw=4 ts=4
1233 */
1234