1 /*
2 +----------------------------------------------------------------------+
3 | Copyright (c) The PHP Group |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | http://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Author: Clayton Collie <clcollie@mindspring.com> |
14 +----------------------------------------------------------------------+
15 */
16
17 /*
18 scanf.c --
19
20 This file contains the base code which implements sscanf and by extension
21 fscanf. Original code is from TCL8.3.0 and bears the following copyright:
22
23 This software is copyrighted by the Regents of the University of
24 California, Sun Microsystems, Inc., Scriptics Corporation,
25 and other parties. The following terms apply to all files associated
26 with the software unless explicitly disclaimed in individual files.
27
28 The authors hereby grant permission to use, copy, modify, distribute,
29 and license this software and its documentation for any purpose, provided
30 that existing copyright notices are retained in all copies and that this
31 notice is included verbatim in any distributions. No written agreement,
32 license, or royalty fee is required for any of the authorized uses.
33 Modifications to this software may be copyrighted by their authors
34 and need not follow the licensing terms described here, provided that
35 the new terms are clearly indicated on the first page of each file where
36 they apply.
37
38 IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY
39 FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
40 ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY
41 DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE
42 POSSIBILITY OF SUCH DAMAGE.
43
44 THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
45 INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY,
46 FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. THIS SOFTWARE
47 IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE
48 NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
49 MODIFICATIONS.
50
51 GOVERNMENT USE: If you are acquiring this software on behalf of the
52 U.S. government, the Government shall have only "Restricted Rights"
53 in the software and related documentation as defined in the Federal
54 Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2). If you
55 are acquiring the software on behalf of the Department of Defense, the
56 software shall be classified as "Commercial Computer Software" and the
57 Government shall have only "Restricted Rights" as defined in Clause
58 252.227-7013 (c) (1) of DFARs. Notwithstanding the foregoing, the
59 authors grant the U.S. Government and others acting in its behalf
60 permission to use and distribute the software in accordance with the
61 terms specified in this license.
62 */
63
64 #include <stdio.h>
65 #include <limits.h>
66 #include <ctype.h>
67 #include "php.h"
68 #include "php_variables.h"
69 #include <locale.h>
70 #include "zend_execute.h"
71 #include "zend_operators.h"
72 #include "zend_strtod.h"
73 #include "php_globals.h"
74 #include "basic_functions.h"
75 #include "scanf.h"
76
77 /*
78 * Flag values used internally by [f|s]canf.
79 */
80 #define SCAN_NOSKIP 0x1 /* Don't skip blanks. */
81 #define SCAN_SUPPRESS 0x2 /* Suppress assignment. */
82 #define SCAN_UNSIGNED 0x4 /* Read an unsigned value. */
83 #define SCAN_WIDTH 0x8 /* A width value was supplied. */
84
85 #define SCAN_SIGNOK 0x10 /* A +/- character is allowed. */
86 #define SCAN_NODIGITS 0x20 /* No digits have been scanned. */
87 #define SCAN_NOZERO 0x40 /* No zero digits have been scanned. */
88 #define SCAN_XOK 0x80 /* An 'x' is allowed. */
89 #define SCAN_PTOK 0x100 /* Decimal point is allowed. */
90 #define SCAN_EXPOK 0x200 /* An exponent is allowed. */
91
92 #define UCHAR(x) (zend_uchar)(x)
93
94 /*
95 * The following structure contains the information associated with
96 * a character set.
97 */
98 typedef struct CharSet {
99 int exclude; /* 1 if this is an exclusion set. */
100 int nchars;
101 char *chars;
102 int nranges;
103 struct Range {
104 char start;
105 char end;
106 } *ranges;
107 } CharSet;
108
109 /*
110 * Declarations for functions used only in this file.
111 */
112 static char *BuildCharSet(CharSet *cset, char *format);
113 static int CharInSet(CharSet *cset, int ch);
114 static void ReleaseCharSet(CharSet *cset);
115 static inline void scan_set_error_return(int numVars, zval *return_value);
116
117
118 /* {{{ BuildCharSet
119 *----------------------------------------------------------------------
120 *
121 * BuildCharSet --
122 *
123 * This function examines a character set format specification
124 * and builds a CharSet containing the individual characters and
125 * character ranges specified.
126 *
127 * Results:
128 * Returns the next format position.
129 *
130 * Side effects:
131 * Initializes the charset.
132 *
133 *----------------------------------------------------------------------
134 */
BuildCharSet(CharSet * cset,char * format)135 static char * BuildCharSet(CharSet *cset, char *format)
136 {
137 char *ch, start;
138 int nranges;
139 char *end;
140
141 memset(cset, 0, sizeof(CharSet));
142
143 ch = format;
144 if (*ch == '^') {
145 cset->exclude = 1;
146 ch = ++format;
147 }
148 end = format + 1; /* verify this - cc */
149
150 /*
151 * Find the close bracket so we can overallocate the set.
152 */
153 if (*ch == ']') {
154 ch = end++;
155 }
156 nranges = 0;
157 while (*ch != ']') {
158 if (*ch == '-') {
159 nranges++;
160 }
161 ch = end++;
162 }
163
164 cset->chars = (char *) safe_emalloc(sizeof(char), (end - format - 1), 0);
165 if (nranges > 0) {
166 cset->ranges = (struct Range *) safe_emalloc(sizeof(struct Range), nranges, 0);
167 } else {
168 cset->ranges = NULL;
169 }
170
171 /*
172 * Now build the character set.
173 */
174 cset->nchars = cset->nranges = 0;
175 ch = format++;
176 start = *ch;
177 if (*ch == ']' || *ch == '-') {
178 cset->chars[cset->nchars++] = *ch;
179 ch = format++;
180 }
181 while (*ch != ']') {
182 if (*format == '-') {
183 /*
184 * This may be the first character of a range, so don't add
185 * it yet.
186 */
187 start = *ch;
188 } else if (*ch == '-') {
189 /*
190 * Check to see if this is the last character in the set, in which
191 * case it is not a range and we should add the previous character
192 * as well as the dash.
193 */
194 if (*format == ']') {
195 cset->chars[cset->nchars++] = start;
196 cset->chars[cset->nchars++] = *ch;
197 } else {
198 ch = format++;
199
200 /*
201 * Check to see if the range is in reverse order.
202 */
203 if (start < *ch) {
204 cset->ranges[cset->nranges].start = start;
205 cset->ranges[cset->nranges].end = *ch;
206 } else {
207 cset->ranges[cset->nranges].start = *ch;
208 cset->ranges[cset->nranges].end = start;
209 }
210 cset->nranges++;
211 }
212 } else {
213 cset->chars[cset->nchars++] = *ch;
214 }
215 ch = format++;
216 }
217 return format;
218 }
219 /* }}} */
220
221 /* {{{ CharInSet
222 *----------------------------------------------------------------------
223 *
224 * CharInSet --
225 *
226 * Check to see if a character matches the given set.
227 *
228 * Results:
229 * Returns non-zero if the character matches the given set.
230 *
231 * Side effects:
232 * None.
233 *
234 *----------------------------------------------------------------------
235 */
CharInSet(CharSet * cset,int c)236 static int CharInSet(CharSet *cset, int c)
237 {
238 char ch = (char) c;
239 int i, match = 0;
240
241 for (i = 0; i < cset->nchars; i++) {
242 if (cset->chars[i] == ch) {
243 match = 1;
244 break;
245 }
246 }
247 if (!match) {
248 for (i = 0; i < cset->nranges; i++) {
249 if ((cset->ranges[i].start <= ch)
250 && (ch <= cset->ranges[i].end)) {
251 match = 1;
252 break;
253 }
254 }
255 }
256 return (cset->exclude ? !match : match);
257 }
258 /* }}} */
259
260 /* {{{ ReleaseCharSet
261 *----------------------------------------------------------------------
262 *
263 * ReleaseCharSet --
264 *
265 * Free the storage associated with a character set.
266 *
267 * Results:
268 * None.
269 *
270 * Side effects:
271 * None.
272 *
273 *----------------------------------------------------------------------
274 */
ReleaseCharSet(CharSet * cset)275 static void ReleaseCharSet(CharSet *cset)
276 {
277 efree((char *)cset->chars);
278 if (cset->ranges) {
279 efree((char *)cset->ranges);
280 }
281 }
282 /* }}} */
283
284 /* {{{ ValidateFormat
285 *----------------------------------------------------------------------
286 *
287 * ValidateFormat --
288 *
289 * Parse the format string and verify that it is properly formed
290 * and that there are exactly enough variables on the command line.
291 *
292 * Results:
293 * FAILURE or SUCCESS.
294 *
295 * Side effects:
296 * May set php_error based on abnormal conditions.
297 *
298 * Parameters :
299 * format The format string.
300 * numVars The number of variables passed to the scan command.
301 * totalSubs The number of variables that will be required.
302 *
303 *----------------------------------------------------------------------
304 */
ValidateFormat(char * format,int numVars,int * totalSubs)305 PHPAPI int ValidateFormat(char *format, int numVars, int *totalSubs)
306 {
307 #define STATIC_LIST_SIZE 16
308 int gotXpg, gotSequential, value, i, flags;
309 char *end, *ch = NULL;
310 int staticAssign[STATIC_LIST_SIZE];
311 int *nassign = staticAssign;
312 int objIndex, xpgSize, nspace = STATIC_LIST_SIZE;
313
314 /*
315 * Initialize an array that records the number of times a variable
316 * is assigned to by the format string. We use this to detect if
317 * a variable is multiply assigned or left unassigned.
318 */
319 if (numVars > nspace) {
320 nassign = (int*)safe_emalloc(sizeof(int), numVars, 0);
321 nspace = numVars;
322 }
323 for (i = 0; i < nspace; i++) {
324 nassign[i] = 0;
325 }
326
327 xpgSize = objIndex = gotXpg = gotSequential = 0;
328
329 while (*format != '\0') {
330 ch = format++;
331 flags = 0;
332
333 if (*ch != '%') {
334 continue;
335 }
336 ch = format++;
337 if (*ch == '%') {
338 continue;
339 }
340 if (*ch == '*') {
341 flags |= SCAN_SUPPRESS;
342 ch = format++;
343 goto xpgCheckDone;
344 }
345
346 if ( isdigit( (int)*ch ) ) {
347 /*
348 * Check for an XPG3-style %n$ specification. Note: there
349 * must not be a mixture of XPG3 specs and non-XPG3 specs
350 * in the same format string.
351 */
352 value = ZEND_STRTOUL(format-1, &end, 10);
353 if (*end != '$') {
354 goto notXpg;
355 }
356 format = end+1;
357 ch = format++;
358 gotXpg = 1;
359 if (gotSequential) {
360 goto mixedXPG;
361 }
362 objIndex = value - 1;
363 if ((objIndex < 0) || (numVars && (objIndex >= numVars))) {
364 goto badIndex;
365 } else if (numVars == 0) {
366 /*
367 * In the case where no vars are specified, the user can
368 * specify %9999$ legally, so we have to consider special
369 * rules for growing the assign array. 'value' is
370 * guaranteed to be > 0.
371 */
372
373 /* set a lower artificial limit on this
374 * in the interest of security and resource friendliness
375 * 255 arguments should be more than enough. - cc
376 */
377 if (value > SCAN_MAX_ARGS) {
378 goto badIndex;
379 }
380
381 xpgSize = (xpgSize > value) ? xpgSize : value;
382 }
383 goto xpgCheckDone;
384 }
385
386 notXpg:
387 gotSequential = 1;
388 if (gotXpg) {
389 mixedXPG:
390 zend_value_error("%s", "cannot mix \"%\" and \"%n$\" conversion specifiers");
391 goto error;
392 }
393
394 xpgCheckDone:
395 /*
396 * Parse any width specifier.
397 */
398 if (isdigit(UCHAR(*ch))) {
399 value = ZEND_STRTOUL(format-1, &format, 10);
400 flags |= SCAN_WIDTH;
401 ch = format++;
402 }
403
404 /*
405 * Ignore size specifier.
406 */
407 if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
408 ch = format++;
409 }
410
411 if (!(flags & SCAN_SUPPRESS) && numVars && (objIndex >= numVars)) {
412 goto badIndex;
413 }
414
415 /*
416 * Handle the various field types.
417 */
418 switch (*ch) {
419 case 'n':
420 case 'd':
421 case 'D':
422 case 'i':
423 case 'o':
424 case 'x':
425 case 'X':
426 case 'u':
427 case 'f':
428 case 'e':
429 case 'E':
430 case 'g':
431 case 's':
432 break;
433
434 case 'c':
435 /* we differ here with the TCL implementation in allowing for */
436 /* a character width specification, to be more consistent with */
437 /* ANSI. since Zend auto allocates space for vars, this is no */
438 /* problem - cc */
439 /*
440 if (flags & SCAN_WIDTH) {
441 php_error_docref(NULL, E_WARNING, "Field width may not be specified in %c conversion");
442 goto error;
443 }
444 */
445 break;
446
447 case '[':
448 if (*format == '\0') {
449 goto badSet;
450 }
451 ch = format++;
452 if (*ch == '^') {
453 if (*format == '\0') {
454 goto badSet;
455 }
456 ch = format++;
457 }
458 if (*ch == ']') {
459 if (*format == '\0') {
460 goto badSet;
461 }
462 ch = format++;
463 }
464 while (*ch != ']') {
465 if (*format == '\0') {
466 goto badSet;
467 }
468 ch = format++;
469 }
470 break;
471 badSet:
472 zend_value_error("Unmatched [ in format string");
473 goto error;
474
475 default: {
476 zend_value_error("Bad scan conversion character \"%c\"", *ch);
477 goto error;
478 }
479 }
480
481 if (!(flags & SCAN_SUPPRESS)) {
482 if (objIndex >= nspace) {
483 /*
484 * Expand the nassign buffer. If we are using XPG specifiers,
485 * make sure that we grow to a large enough size. xpgSize is
486 * guaranteed to be at least one larger than objIndex.
487 */
488 value = nspace;
489 if (xpgSize) {
490 nspace = xpgSize;
491 } else {
492 nspace += STATIC_LIST_SIZE;
493 }
494 if (nassign == staticAssign) {
495 nassign = (void *)safe_emalloc(nspace, sizeof(int), 0);
496 for (i = 0; i < STATIC_LIST_SIZE; ++i) {
497 nassign[i] = staticAssign[i];
498 }
499 } else {
500 nassign = (void *)erealloc((void *)nassign, nspace * sizeof(int));
501 }
502 for (i = value; i < nspace; i++) {
503 nassign[i] = 0;
504 }
505 }
506 nassign[objIndex]++;
507 objIndex++;
508 }
509 } /* while (*format != '\0') */
510
511 /*
512 * Verify that all of the variable were assigned exactly once.
513 */
514 if (numVars == 0) {
515 if (xpgSize) {
516 numVars = xpgSize;
517 } else {
518 numVars = objIndex;
519 }
520 }
521 if (totalSubs) {
522 *totalSubs = numVars;
523 }
524 for (i = 0; i < numVars; i++) {
525 if (nassign[i] > 1) {
526 zend_value_error("%s", "Variable is assigned by multiple \"%n$\" conversion specifiers");
527 goto error;
528 } else if (!xpgSize && (nassign[i] == 0)) {
529 /*
530 * If the space is empty, and xpgSize is 0 (means XPG wasn't
531 * used, and/or numVars != 0), then too many vars were given
532 */
533 zend_value_error("Variable is not assigned by any conversion specifiers");
534 goto error;
535 }
536 }
537
538 if (nassign != staticAssign) {
539 efree((char *)nassign);
540 }
541 return SCAN_SUCCESS;
542
543 badIndex:
544 if (gotXpg) {
545 zend_value_error("%s", "\"%n$\" argument index out of range");
546 } else {
547 zend_value_error("Different numbers of variable names and field specifiers");
548 }
549
550 error:
551 if (nassign != staticAssign) {
552 efree((char *)nassign);
553 }
554 return SCAN_ERROR_INVALID_FORMAT;
555 #undef STATIC_LIST_SIZE
556 }
557 /* }}} */
558
559 /* {{{ php_sscanf_internal
560 * This is the internal function which does processing on behalf of
561 * both sscanf() and fscanf()
562 *
563 * parameters :
564 * string literal string to be processed
565 * format format string
566 * argCount total number of elements in the args array
567 * args arguments passed in from user function (f|s)scanf
568 * varStart offset (in args) of 1st variable passed in to (f|s)scanf
569 * return_value set with the results of the scan
570 */
571
php_sscanf_internal(char * string,char * format,int argCount,zval * args,int varStart,zval * return_value)572 PHPAPI int php_sscanf_internal( char *string, char *format,
573 int argCount, zval *args,
574 int varStart, zval *return_value)
575 {
576 int numVars, nconversions, totalVars = -1;
577 int i, result;
578 zend_long value;
579 int objIndex;
580 char *end, *baseString;
581 zval *current;
582 char op = 0;
583 int base = 0;
584 int underflow = 0;
585 size_t width;
586 zend_long (*fn)() = NULL;
587 char *ch, sch;
588 int flags;
589 char buf[64]; /* Temporary buffer to hold scanned number
590 * strings before they are passed to strtoul() */
591
592 /* do some sanity checking */
593 if ((varStart > argCount) || (varStart < 0)){
594 varStart = SCAN_MAX_ARGS + 1;
595 }
596 numVars = argCount - varStart;
597 if (numVars < 0) {
598 numVars = 0;
599 }
600
601 /*
602 * Check for errors in the format string.
603 */
604 if (ValidateFormat(format, numVars, &totalVars) != SCAN_SUCCESS) {
605 scan_set_error_return( numVars, return_value );
606 return SCAN_ERROR_INVALID_FORMAT;
607 }
608
609 objIndex = numVars ? varStart : 0;
610
611 /*
612 * If any variables are passed, make sure they are all passed by reference
613 */
614 if (numVars) {
615 for (i = varStart;i < argCount;i++){
616 ZEND_ASSERT(Z_ISREF(args[i]) && "Parameter must be passed by reference");
617 }
618 }
619
620 /*
621 * Allocate space for the result objects. Only happens when no variables
622 * are specified
623 */
624 if (!numVars) {
625 zval tmp;
626
627 /* allocate an array for return */
628 array_init(return_value);
629
630 for (i = 0; i < totalVars; i++) {
631 ZVAL_NULL(&tmp);
632 if (add_next_index_zval(return_value, &tmp) == FAILURE) {
633 scan_set_error_return(0, return_value);
634 return FAILURE;
635 }
636 }
637 varStart = 0; /* Array index starts from 0 */
638 }
639
640 baseString = string;
641
642 /*
643 * Iterate over the format string filling in the result objects until
644 * we reach the end of input, the end of the format string, or there
645 * is a mismatch.
646 */
647 nconversions = 0;
648 /* note ! - we need to limit the loop for objIndex to keep it in bounds */
649
650 while (*format != '\0') {
651 ch = format++;
652 flags = 0;
653
654 /*
655 * If we see whitespace in the format, skip whitespace in the string.
656 */
657 if ( isspace( (int)*ch ) ) {
658 sch = *string;
659 while ( isspace( (int)sch ) ) {
660 if (*string == '\0') {
661 goto done;
662 }
663 string++;
664 sch = *string;
665 }
666 continue;
667 }
668
669 if (*ch != '%') {
670 literal:
671 if (*string == '\0') {
672 underflow = 1;
673 goto done;
674 }
675 sch = *string;
676 string++;
677 if (*ch != sch) {
678 goto done;
679 }
680 continue;
681 }
682
683 ch = format++;
684 if (*ch == '%') {
685 goto literal;
686 }
687
688 /*
689 * Check for assignment suppression ('*') or an XPG3-style
690 * assignment ('%n$').
691 */
692 if (*ch == '*') {
693 flags |= SCAN_SUPPRESS;
694 ch = format++;
695 } else if ( isdigit(UCHAR(*ch))) {
696 value = ZEND_STRTOUL(format-1, &end, 10);
697 if (*end == '$') {
698 format = end+1;
699 ch = format++;
700 objIndex = varStart + value - 1;
701 }
702 }
703
704 /*
705 * Parse any width specifier.
706 */
707 if ( isdigit(UCHAR(*ch))) {
708 width = ZEND_STRTOUL(format-1, &format, 10);
709 ch = format++;
710 } else {
711 width = 0;
712 }
713
714 /*
715 * Ignore size specifier.
716 */
717 if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
718 ch = format++;
719 }
720
721 /*
722 * Handle the various field types.
723 */
724 switch (*ch) {
725 case 'n':
726 if (!(flags & SCAN_SUPPRESS)) {
727 if (numVars && objIndex >= argCount) {
728 break;
729 } else if (numVars) {
730 current = args + objIndex++;
731 ZEND_TRY_ASSIGN_REF_LONG(current, (zend_long) (string - baseString));
732 } else {
733 add_index_long(return_value, objIndex++, string - baseString);
734 }
735 }
736 nconversions++;
737 continue;
738
739 case 'd':
740 case 'D':
741 op = 'i';
742 base = 10;
743 fn = (zend_long (*)())ZEND_STRTOL_PTR;
744 break;
745 case 'i':
746 op = 'i';
747 base = 0;
748 fn = (zend_long (*)())ZEND_STRTOL_PTR;
749 break;
750 case 'o':
751 op = 'i';
752 base = 8;
753 fn = (zend_long (*)())ZEND_STRTOL_PTR;
754 break;
755 case 'x':
756 case 'X':
757 op = 'i';
758 base = 16;
759 fn = (zend_long (*)())ZEND_STRTOL_PTR;
760 break;
761 case 'u':
762 op = 'i';
763 base = 10;
764 flags |= SCAN_UNSIGNED;
765 fn = (zend_long (*)())ZEND_STRTOUL_PTR;
766 break;
767
768 case 'f':
769 case 'e':
770 case 'E':
771 case 'g':
772 op = 'f';
773 break;
774
775 case 's':
776 op = 's';
777 break;
778
779 case 'c':
780 op = 's';
781 flags |= SCAN_NOSKIP;
782 /*-cc-*/
783 if (0 == width) {
784 width = 1;
785 }
786 /*-cc-*/
787 break;
788 case '[':
789 op = '[';
790 flags |= SCAN_NOSKIP;
791 break;
792 } /* switch */
793
794 /*
795 * At this point, we will need additional characters from the
796 * string to proceed.
797 */
798 if (*string == '\0') {
799 underflow = 1;
800 goto done;
801 }
802
803 /*
804 * Skip any leading whitespace at the beginning of a field unless
805 * the format suppresses this behavior.
806 */
807 if (!(flags & SCAN_NOSKIP)) {
808 while (*string != '\0') {
809 sch = *string;
810 if (! isspace((int)sch) ) {
811 break;
812 }
813 string++;
814 }
815 if (*string == '\0') {
816 underflow = 1;
817 goto done;
818 }
819 }
820
821 /*
822 * Perform the requested scanning operation.
823 */
824 switch (op) {
825 case 'c':
826 case 's':
827 /*
828 * Scan a string up to width characters or whitespace.
829 */
830 if (width == 0) {
831 width = (size_t) ~0;
832 }
833 end = string;
834 while (*end != '\0') {
835 sch = *end;
836 if ( isspace( (int)sch ) ) {
837 break;
838 }
839 end++;
840 if (--width == 0) {
841 break;
842 }
843 }
844 if (!(flags & SCAN_SUPPRESS)) {
845 if (numVars && objIndex >= argCount) {
846 break;
847 } else if (numVars) {
848 current = args + objIndex++;
849 ZEND_TRY_ASSIGN_REF_STRINGL(current, string, end - string);
850 } else {
851 add_index_stringl(return_value, objIndex++, string, end-string);
852 }
853 }
854 string = end;
855 break;
856
857 case '[': {
858 CharSet cset;
859
860 if (width == 0) {
861 width = (size_t) ~0;
862 }
863 end = string;
864
865 format = BuildCharSet(&cset, format);
866 while (*end != '\0') {
867 sch = *end;
868 if (!CharInSet(&cset, (int)sch)) {
869 break;
870 }
871 end++;
872 if (--width == 0) {
873 break;
874 }
875 }
876 ReleaseCharSet(&cset);
877
878 if (string == end) {
879 /*
880 * Nothing matched the range, stop processing
881 */
882 goto done;
883 }
884 if (!(flags & SCAN_SUPPRESS)) {
885 if (numVars && objIndex >= argCount) {
886 break;
887 } else if (numVars) {
888 current = args + objIndex++;
889 ZEND_TRY_ASSIGN_REF_STRINGL(current, string, end - string);
890 } else {
891 add_index_stringl(return_value, objIndex++, string, end-string);
892 }
893 }
894 string = end;
895 break;
896 }
897 /*
898 case 'c':
899 / Scan a single character./
900
901 sch = *string;
902 string++;
903 if (!(flags & SCAN_SUPPRESS)) {
904 if (numVars) {
905 char __buf[2];
906 __buf[0] = sch;
907 __buf[1] = '\0';
908 current = args[objIndex++];
909 zval_ptr_dtor_nogc(*current);
910 ZVAL_STRINGL( *current, __buf, 1);
911 } else {
912 add_index_stringl(return_value, objIndex++, &sch, 1);
913 }
914 }
915 break;
916 */
917 case 'i':
918 /*
919 * Scan an unsigned or signed integer.
920 */
921 /*-cc-*/
922 buf[0] = '\0';
923 /*-cc-*/
924 if ((width == 0) || (width > sizeof(buf) - 1)) {
925 width = sizeof(buf) - 1;
926 }
927
928 flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO;
929 for (end = buf; width > 0; width--) {
930 switch (*string) {
931 /*
932 * The 0 digit has special meaning at the beginning of
933 * a number. If we are unsure of the base, it
934 * indicates that we are in base 8 or base 16 (if it is
935 * followed by an 'x').
936 */
937 case '0':
938 /*-cc-*/
939 if (base == 16) {
940 flags |= SCAN_XOK;
941 }
942 /*-cc-*/
943 if (base == 0) {
944 base = 8;
945 flags |= SCAN_XOK;
946 }
947 if (flags & SCAN_NOZERO) {
948 flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO);
949 } else {
950 flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
951 }
952 goto addToInt;
953
954 case '1': case '2': case '3': case '4':
955 case '5': case '6': case '7':
956 if (base == 0) {
957 base = 10;
958 }
959 flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
960 goto addToInt;
961
962 case '8': case '9':
963 if (base == 0) {
964 base = 10;
965 }
966 if (base <= 8) {
967 break;
968 }
969 flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
970 goto addToInt;
971
972 case 'A': case 'B': case 'C':
973 case 'D': case 'E': case 'F':
974 case 'a': case 'b': case 'c':
975 case 'd': case 'e': case 'f':
976 if (base <= 10) {
977 break;
978 }
979 flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
980 goto addToInt;
981
982 case '+': case '-':
983 if (flags & SCAN_SIGNOK) {
984 flags &= ~SCAN_SIGNOK;
985 goto addToInt;
986 }
987 break;
988
989 case 'x': case 'X':
990 if ((flags & SCAN_XOK) && (end == buf+1)) {
991 base = 16;
992 flags &= ~SCAN_XOK;
993 goto addToInt;
994 }
995 break;
996 }
997
998 /*
999 * We got an illegal character so we are done accumulating.
1000 */
1001 break;
1002
1003 addToInt:
1004 /*
1005 * Add the character to the temporary buffer.
1006 */
1007 *end++ = *string++;
1008 if (*string == '\0') {
1009 break;
1010 }
1011 }
1012
1013 /*
1014 * Check to see if we need to back up because we only got a
1015 * sign or a trailing x after a 0.
1016 */
1017 if (flags & SCAN_NODIGITS) {
1018 if (*string == '\0') {
1019 underflow = 1;
1020 }
1021 goto done;
1022 } else if (end[-1] == 'x' || end[-1] == 'X') {
1023 end--;
1024 string--;
1025 }
1026
1027 /*
1028 * Scan the value from the temporary buffer. If we are
1029 * returning a large unsigned value, we have to convert it back
1030 * to a string since PHP only supports signed values.
1031 */
1032 if (!(flags & SCAN_SUPPRESS)) {
1033 *end = '\0';
1034 value = (zend_long) (*fn)(buf, NULL, base);
1035 if ((flags & SCAN_UNSIGNED) && (value < 0)) {
1036 snprintf(buf, sizeof(buf), ZEND_ULONG_FMT, value); /* INTL: ISO digit */
1037 if (numVars && objIndex >= argCount) {
1038 break;
1039 } else if (numVars) {
1040 /* change passed value type to string */
1041 current = args + objIndex++;
1042 ZEND_TRY_ASSIGN_REF_STRING(current, buf);
1043 } else {
1044 add_index_string(return_value, objIndex++, buf);
1045 }
1046 } else {
1047 if (numVars && objIndex >= argCount) {
1048 break;
1049 } else if (numVars) {
1050 current = args + objIndex++;
1051 ZEND_TRY_ASSIGN_REF_LONG(current, value);
1052 } else {
1053 add_index_long(return_value, objIndex++, value);
1054 }
1055 }
1056 }
1057 break;
1058
1059 case 'f':
1060 /*
1061 * Scan a floating point number
1062 */
1063 buf[0] = '\0'; /* call me pedantic */
1064 if ((width == 0) || (width > sizeof(buf) - 1)) {
1065 width = sizeof(buf) - 1;
1066 }
1067 flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_PTOK | SCAN_EXPOK;
1068 for (end = buf; width > 0; width--) {
1069 switch (*string) {
1070 case '0': case '1': case '2': case '3':
1071 case '4': case '5': case '6': case '7':
1072 case '8': case '9':
1073 flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS);
1074 goto addToFloat;
1075 case '+':
1076 case '-':
1077 if (flags & SCAN_SIGNOK) {
1078 flags &= ~SCAN_SIGNOK;
1079 goto addToFloat;
1080 }
1081 break;
1082 case '.':
1083 if (flags & SCAN_PTOK) {
1084 flags &= ~(SCAN_SIGNOK | SCAN_PTOK);
1085 goto addToFloat;
1086 }
1087 break;
1088 case 'e':
1089 case 'E':
1090 /*
1091 * An exponent is not allowed until there has
1092 * been at least one digit.
1093 */
1094 if ((flags & (SCAN_NODIGITS | SCAN_EXPOK)) == SCAN_EXPOK) {
1095 flags = (flags & ~(SCAN_EXPOK|SCAN_PTOK))
1096 | SCAN_SIGNOK | SCAN_NODIGITS;
1097 goto addToFloat;
1098 }
1099 break;
1100 }
1101
1102 /*
1103 * We got an illegal character so we are done accumulating.
1104 */
1105 break;
1106
1107 addToFloat:
1108 /*
1109 * Add the character to the temporary buffer.
1110 */
1111 *end++ = *string++;
1112 if (*string == '\0') {
1113 break;
1114 }
1115 }
1116
1117 /*
1118 * Check to see if we need to back up because we saw a
1119 * trailing 'e' or sign.
1120 */
1121 if (flags & SCAN_NODIGITS) {
1122 if (flags & SCAN_EXPOK) {
1123 /*
1124 * There were no digits at all so scanning has
1125 * failed and we are done.
1126 */
1127 if (*string == '\0') {
1128 underflow = 1;
1129 }
1130 goto done;
1131 }
1132
1133 /*
1134 * We got a bad exponent ('e' and maybe a sign).
1135 */
1136 end--;
1137 string--;
1138 if (*end != 'e' && *end != 'E') {
1139 end--;
1140 string--;
1141 }
1142 }
1143
1144 /*
1145 * Scan the value from the temporary buffer.
1146 */
1147 if (!(flags & SCAN_SUPPRESS)) {
1148 double dvalue;
1149 *end = '\0';
1150 dvalue = zend_strtod(buf, NULL);
1151 if (numVars && objIndex >= argCount) {
1152 break;
1153 } else if (numVars) {
1154 current = args + objIndex++;
1155 ZEND_TRY_ASSIGN_REF_DOUBLE(current, dvalue);
1156 } else {
1157 add_index_double(return_value, objIndex++, dvalue );
1158 }
1159 }
1160 break;
1161 } /* switch (op) */
1162 nconversions++;
1163 } /* while (*format != '\0') */
1164
1165 done:
1166 result = SCAN_SUCCESS;
1167
1168 if (underflow && (0==nconversions)) {
1169 scan_set_error_return( numVars, return_value );
1170 result = SCAN_ERROR_EOF;
1171 } else if (numVars) {
1172 zval_ptr_dtor(return_value );
1173 ZVAL_LONG(return_value, nconversions);
1174 } else if (nconversions < totalVars) {
1175 /* TODO: not all elements converted. we need to prune the list - cc */
1176 }
1177 return result;
1178 }
1179 /* }}} */
1180
1181 /* the compiler choked when i tried to make this a macro */
scan_set_error_return(int numVars,zval * return_value)1182 static inline void scan_set_error_return(int numVars, zval *return_value) /* {{{ */
1183 {
1184 if (numVars) {
1185 ZVAL_LONG(return_value, SCAN_ERROR_EOF); /* EOF marker */
1186 } else {
1187 /* convert_to_null calls destructor */
1188 convert_to_null(return_value);
1189 }
1190 }
1191 /* }}} */
1192