1 /*
2 +----------------------------------------------------------------------+
3 | PHP Version 7 |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 1997-2018 The PHP Group |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
15 | Author: Clayton Collie <clcollie@mindspring.com> |
16 +----------------------------------------------------------------------+
17 */
18
19 /*
20 scanf.c --
21
22 This file contains the base code which implements sscanf and by extension
23 fscanf. Original code is from TCL8.3.0 and bears the following copyright:
24
25 This software is copyrighted by the Regents of the University of
26 California, Sun Microsystems, Inc., Scriptics Corporation,
27 and other parties. The following terms apply to all files associated
28 with the software unless explicitly disclaimed in individual files.
29
30 The authors hereby grant permission to use, copy, modify, distribute,
31 and license this software and its documentation for any purpose, provided
32 that existing copyright notices are retained in all copies and that this
33 notice is included verbatim in any distributions. No written agreement,
34 license, or royalty fee is required for any of the authorized uses.
35 Modifications to this software may be copyrighted by their authors
36 and need not follow the licensing terms described here, provided that
37 the new terms are clearly indicated on the first page of each file where
38 they apply.
39
40 IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY
41 FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
42 ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY
43 DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE
44 POSSIBILITY OF SUCH DAMAGE.
45
46 THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
47 INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY,
48 FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. THIS SOFTWARE
49 IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE
50 NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
51 MODIFICATIONS.
52
53 GOVERNMENT USE: If you are acquiring this software on behalf of the
54 U.S. government, the Government shall have only "Restricted Rights"
55 in the software and related documentation as defined in the Federal
56 Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2). If you
57 are acquiring the software on behalf of the Department of Defense, the
58 software shall be classified as "Commercial Computer Software" and the
59 Government shall have only "Restricted Rights" as defined in Clause
60 252.227-7013 (c) (1) of DFARs. Notwithstanding the foregoing, the
61 authors grant the U.S. Government and others acting in its behalf
62 permission to use and distribute the software in accordance with the
63 terms specified in this license.
64 */
65
66 #include <stdio.h>
67 #include <limits.h>
68 #include <ctype.h>
69 #include "php.h"
70 #include "php_variables.h"
71 #ifdef HAVE_LOCALE_H
72 #include <locale.h>
73 #endif
74 #include "zend_execute.h"
75 #include "zend_operators.h"
76 #include "zend_strtod.h"
77 #include "php_globals.h"
78 #include "basic_functions.h"
79 #include "scanf.h"
80
81 /*
82 * Flag values used internally by [f|s]canf.
83 */
84 #define SCAN_NOSKIP 0x1 /* Don't skip blanks. */
85 #define SCAN_SUPPRESS 0x2 /* Suppress assignment. */
86 #define SCAN_UNSIGNED 0x4 /* Read an unsigned value. */
87 #define SCAN_WIDTH 0x8 /* A width value was supplied. */
88
89 #define SCAN_SIGNOK 0x10 /* A +/- character is allowed. */
90 #define SCAN_NODIGITS 0x20 /* No digits have been scanned. */
91 #define SCAN_NOZERO 0x40 /* No zero digits have been scanned. */
92 #define SCAN_XOK 0x80 /* An 'x' is allowed. */
93 #define SCAN_PTOK 0x100 /* Decimal point is allowed. */
94 #define SCAN_EXPOK 0x200 /* An exponent is allowed. */
95
96 #define UCHAR(x) (zend_uchar)(x)
97
98 /*
99 * The following structure contains the information associated with
100 * a character set.
101 */
102 typedef struct CharSet {
103 int exclude; /* 1 if this is an exclusion set. */
104 int nchars;
105 char *chars;
106 int nranges;
107 struct Range {
108 char start;
109 char end;
110 } *ranges;
111 } CharSet;
112
113 /*
114 * Declarations for functions used only in this file.
115 */
116 static char *BuildCharSet(CharSet *cset, char *format);
117 static int CharInSet(CharSet *cset, int ch);
118 static void ReleaseCharSet(CharSet *cset);
119 static inline void scan_set_error_return(int numVars, zval *return_value);
120
121
122 /* {{{ BuildCharSet
123 *----------------------------------------------------------------------
124 *
125 * BuildCharSet --
126 *
127 * This function examines a character set format specification
128 * and builds a CharSet containing the individual characters and
129 * character ranges specified.
130 *
131 * Results:
132 * Returns the next format position.
133 *
134 * Side effects:
135 * Initializes the charset.
136 *
137 *----------------------------------------------------------------------
138 */
BuildCharSet(CharSet * cset,char * format)139 static char * BuildCharSet(CharSet *cset, char *format)
140 {
141 char *ch, start;
142 int nranges;
143 char *end;
144
145 memset(cset, 0, sizeof(CharSet));
146
147 ch = format;
148 if (*ch == '^') {
149 cset->exclude = 1;
150 ch = ++format;
151 }
152 end = format + 1; /* verify this - cc */
153
154 /*
155 * Find the close bracket so we can overallocate the set.
156 */
157 if (*ch == ']') {
158 ch = end++;
159 }
160 nranges = 0;
161 while (*ch != ']') {
162 if (*ch == '-') {
163 nranges++;
164 }
165 ch = end++;
166 }
167
168 cset->chars = (char *) safe_emalloc(sizeof(char), (end - format - 1), 0);
169 if (nranges > 0) {
170 cset->ranges = (struct Range *) safe_emalloc(sizeof(struct Range), nranges, 0);
171 } else {
172 cset->ranges = NULL;
173 }
174
175 /*
176 * Now build the character set.
177 */
178 cset->nchars = cset->nranges = 0;
179 ch = format++;
180 start = *ch;
181 if (*ch == ']' || *ch == '-') {
182 cset->chars[cset->nchars++] = *ch;
183 ch = format++;
184 }
185 while (*ch != ']') {
186 if (*format == '-') {
187 /*
188 * This may be the first character of a range, so don't add
189 * it yet.
190 */
191 start = *ch;
192 } else if (*ch == '-') {
193 /*
194 * Check to see if this is the last character in the set, in which
195 * case it is not a range and we should add the previous character
196 * as well as the dash.
197 */
198 if (*format == ']') {
199 cset->chars[cset->nchars++] = start;
200 cset->chars[cset->nchars++] = *ch;
201 } else {
202 ch = format++;
203
204 /*
205 * Check to see if the range is in reverse order.
206 */
207 if (start < *ch) {
208 cset->ranges[cset->nranges].start = start;
209 cset->ranges[cset->nranges].end = *ch;
210 } else {
211 cset->ranges[cset->nranges].start = *ch;
212 cset->ranges[cset->nranges].end = start;
213 }
214 cset->nranges++;
215 }
216 } else {
217 cset->chars[cset->nchars++] = *ch;
218 }
219 ch = format++;
220 }
221 return format;
222 }
223 /* }}} */
224
225 /* {{{ CharInSet
226 *----------------------------------------------------------------------
227 *
228 * CharInSet --
229 *
230 * Check to see if a character matches the given set.
231 *
232 * Results:
233 * Returns non-zero if the character matches the given set.
234 *
235 * Side effects:
236 * None.
237 *
238 *----------------------------------------------------------------------
239 */
CharInSet(CharSet * cset,int c)240 static int CharInSet(CharSet *cset, int c)
241 {
242 char ch = (char) c;
243 int i, match = 0;
244
245 for (i = 0; i < cset->nchars; i++) {
246 if (cset->chars[i] == ch) {
247 match = 1;
248 break;
249 }
250 }
251 if (!match) {
252 for (i = 0; i < cset->nranges; i++) {
253 if ((cset->ranges[i].start <= ch)
254 && (ch <= cset->ranges[i].end)) {
255 match = 1;
256 break;
257 }
258 }
259 }
260 return (cset->exclude ? !match : match);
261 }
262 /* }}} */
263
264 /* {{{ ReleaseCharSet
265 *----------------------------------------------------------------------
266 *
267 * ReleaseCharSet --
268 *
269 * Free the storage associated with a character set.
270 *
271 * Results:
272 * None.
273 *
274 * Side effects:
275 * None.
276 *
277 *----------------------------------------------------------------------
278 */
ReleaseCharSet(CharSet * cset)279 static void ReleaseCharSet(CharSet *cset)
280 {
281 efree((char *)cset->chars);
282 if (cset->ranges) {
283 efree((char *)cset->ranges);
284 }
285 }
286 /* }}} */
287
288 /* {{{ ValidateFormat
289 *----------------------------------------------------------------------
290 *
291 * ValidateFormat --
292 *
293 * Parse the format string and verify that it is properly formed
294 * and that there are exactly enough variables on the command line.
295 *
296 * Results:
297 * FAILURE or SUCCESS.
298 *
299 * Side effects:
300 * May set php_error based on abnormal conditions.
301 *
302 * Parameters :
303 * format The format string.
304 * numVars The number of variables passed to the scan command.
305 * totalSubs The number of variables that will be required.
306 *
307 *----------------------------------------------------------------------
308 */
ValidateFormat(char * format,int numVars,int * totalSubs)309 PHPAPI int ValidateFormat(char *format, int numVars, int *totalSubs)
310 {
311 #define STATIC_LIST_SIZE 16
312 int gotXpg, gotSequential, value, i, flags;
313 char *end, *ch = NULL;
314 int staticAssign[STATIC_LIST_SIZE];
315 int *nassign = staticAssign;
316 int objIndex, xpgSize, nspace = STATIC_LIST_SIZE;
317
318 /*
319 * Initialize an array that records the number of times a variable
320 * is assigned to by the format string. We use this to detect if
321 * a variable is multiply assigned or left unassigned.
322 */
323 if (numVars > nspace) {
324 nassign = (int*)safe_emalloc(sizeof(int), numVars, 0);
325 nspace = numVars;
326 }
327 for (i = 0; i < nspace; i++) {
328 nassign[i] = 0;
329 }
330
331 xpgSize = objIndex = gotXpg = gotSequential = 0;
332
333 while (*format != '\0') {
334 ch = format++;
335 flags = 0;
336
337 if (*ch != '%') {
338 continue;
339 }
340 ch = format++;
341 if (*ch == '%') {
342 continue;
343 }
344 if (*ch == '*') {
345 flags |= SCAN_SUPPRESS;
346 ch = format++;
347 goto xpgCheckDone;
348 }
349
350 if ( isdigit( (int)*ch ) ) {
351 /*
352 * Check for an XPG3-style %n$ specification. Note: there
353 * must not be a mixture of XPG3 specs and non-XPG3 specs
354 * in the same format string.
355 */
356 value = ZEND_STRTOUL(format-1, &end, 10);
357 if (*end != '$') {
358 goto notXpg;
359 }
360 format = end+1;
361 ch = format++;
362 gotXpg = 1;
363 if (gotSequential) {
364 goto mixedXPG;
365 }
366 objIndex = value - 1;
367 if ((objIndex < 0) || (numVars && (objIndex >= numVars))) {
368 goto badIndex;
369 } else if (numVars == 0) {
370 /*
371 * In the case where no vars are specified, the user can
372 * specify %9999$ legally, so we have to consider special
373 * rules for growing the assign array. 'value' is
374 * guaranteed to be > 0.
375 */
376
377 /* set a lower artificial limit on this
378 * in the interest of security and resource friendliness
379 * 255 arguments should be more than enough. - cc
380 */
381 if (value > SCAN_MAX_ARGS) {
382 goto badIndex;
383 }
384
385 xpgSize = (xpgSize > value) ? xpgSize : value;
386 }
387 goto xpgCheckDone;
388 }
389
390 notXpg:
391 gotSequential = 1;
392 if (gotXpg) {
393 mixedXPG:
394 php_error_docref(NULL, E_WARNING, "%s", "cannot mix \"%\" and \"%n$\" conversion specifiers");
395 goto error;
396 }
397
398 xpgCheckDone:
399 /*
400 * Parse any width specifier.
401 */
402 if (isdigit(UCHAR(*ch))) {
403 value = ZEND_STRTOUL(format-1, &format, 10);
404 flags |= SCAN_WIDTH;
405 ch = format++;
406 }
407
408 /*
409 * Ignore size specifier.
410 */
411 if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
412 ch = format++;
413 }
414
415 if (!(flags & SCAN_SUPPRESS) && numVars && (objIndex >= numVars)) {
416 goto badIndex;
417 }
418
419 /*
420 * Handle the various field types.
421 */
422 switch (*ch) {
423 case 'n':
424 case 'd':
425 case 'D':
426 case 'i':
427 case 'o':
428 case 'x':
429 case 'X':
430 case 'u':
431 case 'f':
432 case 'e':
433 case 'E':
434 case 'g':
435 case 's':
436 break;
437
438 case 'c':
439 /* we differ here with the TCL implementation in allowing for */
440 /* a character width specification, to be more consistent with */
441 /* ANSI. since Zend auto allocates space for vars, this is no */
442 /* problem - cc */
443 /*
444 if (flags & SCAN_WIDTH) {
445 php_error_docref(NULL, E_WARNING, "Field width may not be specified in %c conversion");
446 goto error;
447 }
448 */
449 break;
450
451 case '[':
452 if (*format == '\0') {
453 goto badSet;
454 }
455 ch = format++;
456 if (*ch == '^') {
457 if (*format == '\0') {
458 goto badSet;
459 }
460 ch = format++;
461 }
462 if (*ch == ']') {
463 if (*format == '\0') {
464 goto badSet;
465 }
466 ch = format++;
467 }
468 while (*ch != ']') {
469 if (*format == '\0') {
470 goto badSet;
471 }
472 ch = format++;
473 }
474 break;
475 badSet:
476 php_error_docref(NULL, E_WARNING, "Unmatched [ in format string");
477 goto error;
478
479 default: {
480 php_error_docref(NULL, E_WARNING, "Bad scan conversion character \"%c\"", *ch);
481 goto error;
482 }
483 }
484
485 if (!(flags & SCAN_SUPPRESS)) {
486 if (objIndex >= nspace) {
487 /*
488 * Expand the nassign buffer. If we are using XPG specifiers,
489 * make sure that we grow to a large enough size. xpgSize is
490 * guaranteed to be at least one larger than objIndex.
491 */
492 value = nspace;
493 if (xpgSize) {
494 nspace = xpgSize;
495 } else {
496 nspace += STATIC_LIST_SIZE;
497 }
498 if (nassign == staticAssign) {
499 nassign = (void *)safe_emalloc(nspace, sizeof(int), 0);
500 for (i = 0; i < STATIC_LIST_SIZE; ++i) {
501 nassign[i] = staticAssign[i];
502 }
503 } else {
504 nassign = (void *)erealloc((void *)nassign, nspace * sizeof(int));
505 }
506 for (i = value; i < nspace; i++) {
507 nassign[i] = 0;
508 }
509 }
510 nassign[objIndex]++;
511 objIndex++;
512 }
513 } /* while (*format != '\0') */
514
515 /*
516 * Verify that all of the variable were assigned exactly once.
517 */
518 if (numVars == 0) {
519 if (xpgSize) {
520 numVars = xpgSize;
521 } else {
522 numVars = objIndex;
523 }
524 }
525 if (totalSubs) {
526 *totalSubs = numVars;
527 }
528 for (i = 0; i < numVars; i++) {
529 if (nassign[i] > 1) {
530 php_error_docref(NULL, E_WARNING, "%s", "Variable is assigned by multiple \"%n$\" conversion specifiers");
531 goto error;
532 } else if (!xpgSize && (nassign[i] == 0)) {
533 /*
534 * If the space is empty, and xpgSize is 0 (means XPG wasn't
535 * used, and/or numVars != 0), then too many vars were given
536 */
537 php_error_docref(NULL, E_WARNING, "Variable is not assigned by any conversion specifiers");
538 goto error;
539 }
540 }
541
542 if (nassign != staticAssign) {
543 efree((char *)nassign);
544 }
545 return SCAN_SUCCESS;
546
547 badIndex:
548 if (gotXpg) {
549 php_error_docref(NULL, E_WARNING, "%s", "\"%n$\" argument index out of range");
550 } else {
551 php_error_docref(NULL, E_WARNING, "Different numbers of variable names and field specifiers");
552 }
553
554 error:
555 if (nassign != staticAssign) {
556 efree((char *)nassign);
557 }
558 return SCAN_ERROR_INVALID_FORMAT;
559 #undef STATIC_LIST_SIZE
560 }
561 /* }}} */
562
563 /* {{{ php_sscanf_internal
564 * This is the internal function which does processing on behalf of
565 * both sscanf() and fscanf()
566 *
567 * parameters :
568 * string literal string to be processed
569 * format format string
570 * argCount total number of elements in the args array
571 * args arguments passed in from user function (f|s)scanf
572 * varStart offset (in args) of 1st variable passed in to (f|s)scanf
573 * return_value set with the results of the scan
574 */
575
php_sscanf_internal(char * string,char * format,int argCount,zval * args,int varStart,zval * return_value)576 PHPAPI int php_sscanf_internal( char *string, char *format,
577 int argCount, zval *args,
578 int varStart, zval *return_value)
579 {
580 int numVars, nconversions, totalVars = -1;
581 int i, result;
582 zend_long value;
583 int objIndex;
584 char *end, *baseString;
585 zval *current;
586 char op = 0;
587 int base = 0;
588 int underflow = 0;
589 size_t width;
590 zend_long (*fn)() = NULL;
591 char *ch, sch;
592 int flags;
593 char buf[64]; /* Temporary buffer to hold scanned number
594 * strings before they are passed to strtoul() */
595
596 /* do some sanity checking */
597 if ((varStart > argCount) || (varStart < 0)){
598 varStart = SCAN_MAX_ARGS + 1;
599 }
600 numVars = argCount - varStart;
601 if (numVars < 0) {
602 numVars = 0;
603 }
604
605 #if 0
606 zend_printf("<br>in sscanf_internal : <br> string is \"%s\", format = \"%s\"<br> NumVars = %d. VarStart = %d<br>-------------------------<br>",
607 string, format, numVars, varStart);
608 #endif
609 /*
610 * Check for errors in the format string.
611 */
612 if (ValidateFormat(format, numVars, &totalVars) != SCAN_SUCCESS) {
613 scan_set_error_return( numVars, return_value );
614 return SCAN_ERROR_INVALID_FORMAT;
615 }
616
617 objIndex = numVars ? varStart : 0;
618
619 /*
620 * If any variables are passed, make sure they are all passed by reference
621 */
622 if (numVars) {
623 for (i = varStart;i < argCount;i++){
624 if ( ! Z_ISREF(args[ i ] ) ) {
625 php_error_docref(NULL, E_WARNING, "Parameter %d must be passed by reference", i);
626 scan_set_error_return(numVars, return_value);
627 return SCAN_ERROR_VAR_PASSED_BYVAL;
628 }
629 }
630 }
631
632 /*
633 * Allocate space for the result objects. Only happens when no variables
634 * are specified
635 */
636 if (!numVars) {
637 zval tmp;
638
639 /* allocate an array for return */
640 array_init(return_value);
641
642 for (i = 0; i < totalVars; i++) {
643 ZVAL_NULL(&tmp);
644 if (add_next_index_zval(return_value, &tmp) == FAILURE) {
645 scan_set_error_return(0, return_value);
646 return FAILURE;
647 }
648 }
649 varStart = 0; /* Array index starts from 0 */
650 }
651
652 baseString = string;
653
654 /*
655 * Iterate over the format string filling in the result objects until
656 * we reach the end of input, the end of the format string, or there
657 * is a mismatch.
658 */
659 nconversions = 0;
660 /* note ! - we need to limit the loop for objIndex to keep it in bounds */
661
662 while (*format != '\0') {
663 ch = format++;
664 flags = 0;
665
666 /*
667 * If we see whitespace in the format, skip whitespace in the string.
668 */
669 if ( isspace( (int)*ch ) ) {
670 sch = *string;
671 while ( isspace( (int)sch ) ) {
672 if (*string == '\0') {
673 goto done;
674 }
675 string++;
676 sch = *string;
677 }
678 continue;
679 }
680
681 if (*ch != '%') {
682 literal:
683 if (*string == '\0') {
684 underflow = 1;
685 goto done;
686 }
687 sch = *string;
688 string++;
689 if (*ch != sch) {
690 goto done;
691 }
692 continue;
693 }
694
695 ch = format++;
696 if (*ch == '%') {
697 goto literal;
698 }
699
700 /*
701 * Check for assignment suppression ('*') or an XPG3-style
702 * assignment ('%n$').
703 */
704 if (*ch == '*') {
705 flags |= SCAN_SUPPRESS;
706 ch = format++;
707 } else if ( isdigit(UCHAR(*ch))) {
708 value = ZEND_STRTOUL(format-1, &end, 10);
709 if (*end == '$') {
710 format = end+1;
711 ch = format++;
712 objIndex = varStart + value - 1;
713 }
714 }
715
716 /*
717 * Parse any width specifier.
718 */
719 if ( isdigit(UCHAR(*ch))) {
720 width = ZEND_STRTOUL(format-1, &format, 10);
721 ch = format++;
722 } else {
723 width = 0;
724 }
725
726 /*
727 * Ignore size specifier.
728 */
729 if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
730 ch = format++;
731 }
732
733 /*
734 * Handle the various field types.
735 */
736 switch (*ch) {
737 case 'n':
738 if (!(flags & SCAN_SUPPRESS)) {
739 if (numVars && objIndex >= argCount) {
740 break;
741 } else if (numVars) {
742 current = Z_REFVAL(args[objIndex++]);
743 zval_ptr_dtor(current);
744 ZVAL_LONG(current, (zend_long)(string - baseString) );
745 } else {
746 add_index_long(return_value, objIndex++, string - baseString);
747 }
748 }
749 nconversions++;
750 continue;
751
752 case 'd':
753 case 'D':
754 op = 'i';
755 base = 10;
756 fn = (zend_long (*)())ZEND_STRTOL_PTR;
757 break;
758 case 'i':
759 op = 'i';
760 base = 0;
761 fn = (zend_long (*)())ZEND_STRTOL_PTR;
762 break;
763 case 'o':
764 op = 'i';
765 base = 8;
766 fn = (zend_long (*)())ZEND_STRTOL_PTR;
767 break;
768 case 'x':
769 case 'X':
770 op = 'i';
771 base = 16;
772 fn = (zend_long (*)())ZEND_STRTOL_PTR;
773 break;
774 case 'u':
775 op = 'i';
776 base = 10;
777 flags |= SCAN_UNSIGNED;
778 fn = (zend_long (*)())ZEND_STRTOUL_PTR;
779 break;
780
781 case 'f':
782 case 'e':
783 case 'E':
784 case 'g':
785 op = 'f';
786 break;
787
788 case 's':
789 op = 's';
790 break;
791
792 case 'c':
793 op = 's';
794 flags |= SCAN_NOSKIP;
795 /*-cc-*/
796 if (0 == width) {
797 width = 1;
798 }
799 /*-cc-*/
800 break;
801 case '[':
802 op = '[';
803 flags |= SCAN_NOSKIP;
804 break;
805 } /* switch */
806
807 /*
808 * At this point, we will need additional characters from the
809 * string to proceed.
810 */
811 if (*string == '\0') {
812 underflow = 1;
813 goto done;
814 }
815
816 /*
817 * Skip any leading whitespace at the beginning of a field unless
818 * the format suppresses this behavior.
819 */
820 if (!(flags & SCAN_NOSKIP)) {
821 while (*string != '\0') {
822 sch = *string;
823 if (! isspace((int)sch) ) {
824 break;
825 }
826 string++;
827 }
828 if (*string == '\0') {
829 underflow = 1;
830 goto done;
831 }
832 }
833
834 /*
835 * Perform the requested scanning operation.
836 */
837 switch (op) {
838 case 'c':
839 case 's':
840 /*
841 * Scan a string up to width characters or whitespace.
842 */
843 if (width == 0) {
844 width = (size_t) ~0;
845 }
846 end = string;
847 while (*end != '\0') {
848 sch = *end;
849 if ( isspace( (int)sch ) ) {
850 break;
851 }
852 end++;
853 if (--width == 0) {
854 break;
855 }
856 }
857 if (!(flags & SCAN_SUPPRESS)) {
858 if (numVars && objIndex >= argCount) {
859 break;
860 } else if (numVars) {
861 current = Z_REFVAL(args[objIndex++]);
862 zval_ptr_dtor(current);
863 ZVAL_STRINGL(current, string, end-string);
864 } else {
865 add_index_stringl(return_value, objIndex++, string, end-string);
866 }
867 }
868 string = end;
869 break;
870
871 case '[': {
872 CharSet cset;
873
874 if (width == 0) {
875 width = (size_t) ~0;
876 }
877 end = string;
878
879 format = BuildCharSet(&cset, format);
880 while (*end != '\0') {
881 sch = *end;
882 if (!CharInSet(&cset, (int)sch)) {
883 break;
884 }
885 end++;
886 if (--width == 0) {
887 break;
888 }
889 }
890 ReleaseCharSet(&cset);
891
892 if (string == end) {
893 /*
894 * Nothing matched the range, stop processing
895 */
896 goto done;
897 }
898 if (!(flags & SCAN_SUPPRESS)) {
899 if (numVars && objIndex >= argCount) {
900 break;
901 } else if (numVars) {
902 current = Z_REFVAL(args[objIndex++]);
903 zval_ptr_dtor(current);
904 ZVAL_STRINGL(current, string, end-string);
905 } else {
906 add_index_stringl(return_value, objIndex++, string, end-string);
907 }
908 }
909 string = end;
910 break;
911 }
912 /*
913 case 'c':
914 / Scan a single character./
915
916 sch = *string;
917 string++;
918 if (!(flags & SCAN_SUPPRESS)) {
919 if (numVars) {
920 char __buf[2];
921 __buf[0] = sch;
922 __buf[1] = '\0';
923 current = args[objIndex++];
924 zval_dtor(*current);
925 ZVAL_STRINGL( *current, __buf, 1);
926 } else {
927 add_index_stringl(return_value, objIndex++, &sch, 1);
928 }
929 }
930 break;
931 */
932 case 'i':
933 /*
934 * Scan an unsigned or signed integer.
935 */
936 /*-cc-*/
937 buf[0] = '\0';
938 /*-cc-*/
939 if ((width == 0) || (width > sizeof(buf) - 1)) {
940 width = sizeof(buf) - 1;
941 }
942
943 flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO;
944 for (end = buf; width > 0; width--) {
945 switch (*string) {
946 /*
947 * The 0 digit has special meaning at the beginning of
948 * a number. If we are unsure of the base, it
949 * indicates that we are in base 8 or base 16 (if it is
950 * followed by an 'x').
951 */
952 case '0':
953 /*-cc-*/
954 if (base == 16) {
955 flags |= SCAN_XOK;
956 }
957 /*-cc-*/
958 if (base == 0) {
959 base = 8;
960 flags |= SCAN_XOK;
961 }
962 if (flags & SCAN_NOZERO) {
963 flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO);
964 } else {
965 flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
966 }
967 goto addToInt;
968
969 case '1': case '2': case '3': case '4':
970 case '5': case '6': case '7':
971 if (base == 0) {
972 base = 10;
973 }
974 flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
975 goto addToInt;
976
977 case '8': case '9':
978 if (base == 0) {
979 base = 10;
980 }
981 if (base <= 8) {
982 break;
983 }
984 flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
985 goto addToInt;
986
987 case 'A': case 'B': case 'C':
988 case 'D': case 'E': case 'F':
989 case 'a': case 'b': case 'c':
990 case 'd': case 'e': case 'f':
991 if (base <= 10) {
992 break;
993 }
994 flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
995 goto addToInt;
996
997 case '+': case '-':
998 if (flags & SCAN_SIGNOK) {
999 flags &= ~SCAN_SIGNOK;
1000 goto addToInt;
1001 }
1002 break;
1003
1004 case 'x': case 'X':
1005 if ((flags & SCAN_XOK) && (end == buf+1)) {
1006 base = 16;
1007 flags &= ~SCAN_XOK;
1008 goto addToInt;
1009 }
1010 break;
1011 }
1012
1013 /*
1014 * We got an illegal character so we are done accumulating.
1015 */
1016 break;
1017
1018 addToInt:
1019 /*
1020 * Add the character to the temporary buffer.
1021 */
1022 *end++ = *string++;
1023 if (*string == '\0') {
1024 break;
1025 }
1026 }
1027
1028 /*
1029 * Check to see if we need to back up because we only got a
1030 * sign or a trailing x after a 0.
1031 */
1032 if (flags & SCAN_NODIGITS) {
1033 if (*string == '\0') {
1034 underflow = 1;
1035 }
1036 goto done;
1037 } else if (end[-1] == 'x' || end[-1] == 'X') {
1038 end--;
1039 string--;
1040 }
1041
1042 /*
1043 * Scan the value from the temporary buffer. If we are
1044 * returning a large unsigned value, we have to convert it back
1045 * to a string since PHP only supports signed values.
1046 */
1047 if (!(flags & SCAN_SUPPRESS)) {
1048 *end = '\0';
1049 value = (zend_long) (*fn)(buf, NULL, base);
1050 if ((flags & SCAN_UNSIGNED) && (value < 0)) {
1051 snprintf(buf, sizeof(buf), ZEND_ULONG_FMT, value); /* INTL: ISO digit */
1052 if (numVars && objIndex >= argCount) {
1053 break;
1054 } else if (numVars) {
1055 /* change passed value type to string */
1056 current = Z_REFVAL(args[objIndex++]);
1057 zval_ptr_dtor(current);
1058 ZVAL_STRING(current, buf);
1059 } else {
1060 add_index_string(return_value, objIndex++, buf);
1061 }
1062 } else {
1063 if (numVars && objIndex >= argCount) {
1064 break;
1065 } else if (numVars) {
1066 current = Z_REFVAL(args[objIndex++]);
1067 zval_ptr_dtor(current);
1068 ZVAL_LONG(current, value);
1069 } else {
1070 add_index_long(return_value, objIndex++, value);
1071 }
1072 }
1073 }
1074 break;
1075
1076 case 'f':
1077 /*
1078 * Scan a floating point number
1079 */
1080 buf[0] = '\0'; /* call me pedantic */
1081 if ((width == 0) || (width > sizeof(buf) - 1)) {
1082 width = sizeof(buf) - 1;
1083 }
1084 flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_PTOK | SCAN_EXPOK;
1085 for (end = buf; width > 0; width--) {
1086 switch (*string) {
1087 case '0': case '1': case '2': case '3':
1088 case '4': case '5': case '6': case '7':
1089 case '8': case '9':
1090 flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS);
1091 goto addToFloat;
1092 case '+':
1093 case '-':
1094 if (flags & SCAN_SIGNOK) {
1095 flags &= ~SCAN_SIGNOK;
1096 goto addToFloat;
1097 }
1098 break;
1099 case '.':
1100 if (flags & SCAN_PTOK) {
1101 flags &= ~(SCAN_SIGNOK | SCAN_PTOK);
1102 goto addToFloat;
1103 }
1104 break;
1105 case 'e':
1106 case 'E':
1107 /*
1108 * An exponent is not allowed until there has
1109 * been at least one digit.
1110 */
1111 if ((flags & (SCAN_NODIGITS | SCAN_EXPOK)) == SCAN_EXPOK) {
1112 flags = (flags & ~(SCAN_EXPOK|SCAN_PTOK))
1113 | SCAN_SIGNOK | SCAN_NODIGITS;
1114 goto addToFloat;
1115 }
1116 break;
1117 }
1118
1119 /*
1120 * We got an illegal character so we are done accumulating.
1121 */
1122 break;
1123
1124 addToFloat:
1125 /*
1126 * Add the character to the temporary buffer.
1127 */
1128 *end++ = *string++;
1129 if (*string == '\0') {
1130 break;
1131 }
1132 }
1133
1134 /*
1135 * Check to see if we need to back up because we saw a
1136 * trailing 'e' or sign.
1137 */
1138 if (flags & SCAN_NODIGITS) {
1139 if (flags & SCAN_EXPOK) {
1140 /*
1141 * There were no digits at all so scanning has
1142 * failed and we are done.
1143 */
1144 if (*string == '\0') {
1145 underflow = 1;
1146 }
1147 goto done;
1148 }
1149
1150 /*
1151 * We got a bad exponent ('e' and maybe a sign).
1152 */
1153 end--;
1154 string--;
1155 if (*end != 'e' && *end != 'E') {
1156 end--;
1157 string--;
1158 }
1159 }
1160
1161 /*
1162 * Scan the value from the temporary buffer.
1163 */
1164 if (!(flags & SCAN_SUPPRESS)) {
1165 double dvalue;
1166 *end = '\0';
1167 dvalue = zend_strtod(buf, NULL);
1168 if (numVars && objIndex >= argCount) {
1169 break;
1170 } else if (numVars) {
1171 current = Z_REFVAL(args[objIndex++]);
1172 zval_ptr_dtor(current);
1173 ZVAL_DOUBLE(current, dvalue);
1174 } else {
1175 add_index_double(return_value, objIndex++, dvalue );
1176 }
1177 }
1178 break;
1179 } /* switch (op) */
1180 nconversions++;
1181 } /* while (*format != '\0') */
1182
1183 done:
1184 result = SCAN_SUCCESS;
1185
1186 if (underflow && (0==nconversions)) {
1187 scan_set_error_return( numVars, return_value );
1188 result = SCAN_ERROR_EOF;
1189 } else if (numVars) {
1190 zval_ptr_dtor(return_value );
1191 ZVAL_LONG(return_value, nconversions);
1192 } else if (nconversions < totalVars) {
1193 /* TODO: not all elements converted. we need to prune the list - cc */
1194 }
1195 return result;
1196 }
1197 /* }}} */
1198
1199 /* the compiler choked when i tried to make this a macro */
scan_set_error_return(int numVars,zval * return_value)1200 static inline void scan_set_error_return(int numVars, zval *return_value) /* {{{ */
1201 {
1202 if (numVars) {
1203 ZVAL_LONG(return_value, SCAN_ERROR_EOF); /* EOF marker */
1204 } else {
1205 /* convert_to_null calls destructor */
1206 convert_to_null(return_value);
1207 }
1208 }
1209 /* }}} */
1210
1211 /*
1212 * Local variables:
1213 * tab-width: 4
1214 * c-basic-offset: 4
1215 * End:
1216 * vim600: sw=4 ts=4 fdm=marker
1217 * vim<600: sw=4 ts=4
1218 */
1219