1 /*
2 +----------------------------------------------------------------------+
3 | PHP Version 7 |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 1997-2018 The PHP Group |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
15 | Author: Clayton Collie <clcollie@mindspring.com> |
16 +----------------------------------------------------------------------+
17 */
18
19 /* $Id$ */
20
21 /*
22 scanf.c --
23
24 This file contains the base code which implements sscanf and by extension
25 fscanf. Original code is from TCL8.3.0 and bears the following copyright:
26
27 This software is copyrighted by the Regents of the University of
28 California, Sun Microsystems, Inc., Scriptics Corporation,
29 and other parties. The following terms apply to all files associated
30 with the software unless explicitly disclaimed in individual files.
31
32 The authors hereby grant permission to use, copy, modify, distribute,
33 and license this software and its documentation for any purpose, provided
34 that existing copyright notices are retained in all copies and that this
35 notice is included verbatim in any distributions. No written agreement,
36 license, or royalty fee is required for any of the authorized uses.
37 Modifications to this software may be copyrighted by their authors
38 and need not follow the licensing terms described here, provided that
39 the new terms are clearly indicated on the first page of each file where
40 they apply.
41
42 IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY
43 FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
44 ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY
45 DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE
46 POSSIBILITY OF SUCH DAMAGE.
47
48 THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
49 INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY,
50 FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. THIS SOFTWARE
51 IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE
52 NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
53 MODIFICATIONS.
54
55 GOVERNMENT USE: If you are acquiring this software on behalf of the
56 U.S. government, the Government shall have only "Restricted Rights"
57 in the software and related documentation as defined in the Federal
58 Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2). If you
59 are acquiring the software on behalf of the Department of Defense, the
60 software shall be classified as "Commercial Computer Software" and the
61 Government shall have only "Restricted Rights" as defined in Clause
62 252.227-7013 (c) (1) of DFARs. Notwithstanding the foregoing, the
63 authors grant the U.S. Government and others acting in its behalf
64 permission to use and distribute the software in accordance with the
65 terms specified in this license.
66 */
67
68 #include <stdio.h>
69 #include <limits.h>
70 #include <ctype.h>
71 #include "php.h"
72 #include "php_variables.h"
73 #ifdef HAVE_LOCALE_H
74 #include <locale.h>
75 #endif
76 #include "zend_execute.h"
77 #include "zend_operators.h"
78 #include "zend_strtod.h"
79 #include "php_globals.h"
80 #include "basic_functions.h"
81 #include "scanf.h"
82
83 /*
84 * Flag values used internally by [f|s]canf.
85 */
86 #define SCAN_NOSKIP 0x1 /* Don't skip blanks. */
87 #define SCAN_SUPPRESS 0x2 /* Suppress assignment. */
88 #define SCAN_UNSIGNED 0x4 /* Read an unsigned value. */
89 #define SCAN_WIDTH 0x8 /* A width value was supplied. */
90
91 #define SCAN_SIGNOK 0x10 /* A +/- character is allowed. */
92 #define SCAN_NODIGITS 0x20 /* No digits have been scanned. */
93 #define SCAN_NOZERO 0x40 /* No zero digits have been scanned. */
94 #define SCAN_XOK 0x80 /* An 'x' is allowed. */
95 #define SCAN_PTOK 0x100 /* Decimal point is allowed. */
96 #define SCAN_EXPOK 0x200 /* An exponent is allowed. */
97
98 #define UCHAR(x) (zend_uchar)(x)
99
100 /*
101 * The following structure contains the information associated with
102 * a character set.
103 */
104 typedef struct CharSet {
105 int exclude; /* 1 if this is an exclusion set. */
106 int nchars;
107 char *chars;
108 int nranges;
109 struct Range {
110 char start;
111 char end;
112 } *ranges;
113 } CharSet;
114
115 /*
116 * Declarations for functions used only in this file.
117 */
118 static char *BuildCharSet(CharSet *cset, char *format);
119 static int CharInSet(CharSet *cset, int ch);
120 static void ReleaseCharSet(CharSet *cset);
121 static inline void scan_set_error_return(int numVars, zval *return_value);
122
123
124 /* {{{ BuildCharSet
125 *----------------------------------------------------------------------
126 *
127 * BuildCharSet --
128 *
129 * This function examines a character set format specification
130 * and builds a CharSet containing the individual characters and
131 * character ranges specified.
132 *
133 * Results:
134 * Returns the next format position.
135 *
136 * Side effects:
137 * Initializes the charset.
138 *
139 *----------------------------------------------------------------------
140 */
BuildCharSet(CharSet * cset,char * format)141 static char * BuildCharSet(CharSet *cset, char *format)
142 {
143 char *ch, start;
144 int nranges;
145 char *end;
146
147 memset(cset, 0, sizeof(CharSet));
148
149 ch = format;
150 if (*ch == '^') {
151 cset->exclude = 1;
152 ch = ++format;
153 }
154 end = format + 1; /* verify this - cc */
155
156 /*
157 * Find the close bracket so we can overallocate the set.
158 */
159 if (*ch == ']') {
160 ch = end++;
161 }
162 nranges = 0;
163 while (*ch != ']') {
164 if (*ch == '-') {
165 nranges++;
166 }
167 ch = end++;
168 }
169
170 cset->chars = (char *) safe_emalloc(sizeof(char), (end - format - 1), 0);
171 if (nranges > 0) {
172 cset->ranges = (struct Range *) safe_emalloc(sizeof(struct Range), nranges, 0);
173 } else {
174 cset->ranges = NULL;
175 }
176
177 /*
178 * Now build the character set.
179 */
180 cset->nchars = cset->nranges = 0;
181 ch = format++;
182 start = *ch;
183 if (*ch == ']' || *ch == '-') {
184 cset->chars[cset->nchars++] = *ch;
185 ch = format++;
186 }
187 while (*ch != ']') {
188 if (*format == '-') {
189 /*
190 * This may be the first character of a range, so don't add
191 * it yet.
192 */
193 start = *ch;
194 } else if (*ch == '-') {
195 /*
196 * Check to see if this is the last character in the set, in which
197 * case it is not a range and we should add the previous character
198 * as well as the dash.
199 */
200 if (*format == ']') {
201 cset->chars[cset->nchars++] = start;
202 cset->chars[cset->nchars++] = *ch;
203 } else {
204 ch = format++;
205
206 /*
207 * Check to see if the range is in reverse order.
208 */
209 if (start < *ch) {
210 cset->ranges[cset->nranges].start = start;
211 cset->ranges[cset->nranges].end = *ch;
212 } else {
213 cset->ranges[cset->nranges].start = *ch;
214 cset->ranges[cset->nranges].end = start;
215 }
216 cset->nranges++;
217 }
218 } else {
219 cset->chars[cset->nchars++] = *ch;
220 }
221 ch = format++;
222 }
223 return format;
224 }
225 /* }}} */
226
227 /* {{{ CharInSet
228 *----------------------------------------------------------------------
229 *
230 * CharInSet --
231 *
232 * Check to see if a character matches the given set.
233 *
234 * Results:
235 * Returns non-zero if the character matches the given set.
236 *
237 * Side effects:
238 * None.
239 *
240 *----------------------------------------------------------------------
241 */
CharInSet(CharSet * cset,int c)242 static int CharInSet(CharSet *cset, int c)
243 {
244 char ch = (char) c;
245 int i, match = 0;
246
247 for (i = 0; i < cset->nchars; i++) {
248 if (cset->chars[i] == ch) {
249 match = 1;
250 break;
251 }
252 }
253 if (!match) {
254 for (i = 0; i < cset->nranges; i++) {
255 if ((cset->ranges[i].start <= ch)
256 && (ch <= cset->ranges[i].end)) {
257 match = 1;
258 break;
259 }
260 }
261 }
262 return (cset->exclude ? !match : match);
263 }
264 /* }}} */
265
266 /* {{{ ReleaseCharSet
267 *----------------------------------------------------------------------
268 *
269 * ReleaseCharSet --
270 *
271 * Free the storage associated with a character set.
272 *
273 * Results:
274 * None.
275 *
276 * Side effects:
277 * None.
278 *
279 *----------------------------------------------------------------------
280 */
ReleaseCharSet(CharSet * cset)281 static void ReleaseCharSet(CharSet *cset)
282 {
283 efree((char *)cset->chars);
284 if (cset->ranges) {
285 efree((char *)cset->ranges);
286 }
287 }
288 /* }}} */
289
290 /* {{{ ValidateFormat
291 *----------------------------------------------------------------------
292 *
293 * ValidateFormat --
294 *
295 * Parse the format string and verify that it is properly formed
296 * and that there are exactly enough variables on the command line.
297 *
298 * Results:
299 * FAILURE or SUCCESS.
300 *
301 * Side effects:
302 * May set php_error based on abnormal conditions.
303 *
304 * Parameters :
305 * format The format string.
306 * numVars The number of variables passed to the scan command.
307 * totalSubs The number of variables that will be required.
308 *
309 *----------------------------------------------------------------------
310 */
ValidateFormat(char * format,int numVars,int * totalSubs)311 PHPAPI int ValidateFormat(char *format, int numVars, int *totalSubs)
312 {
313 #define STATIC_LIST_SIZE 16
314 int gotXpg, gotSequential, value, i, flags;
315 char *end, *ch = NULL;
316 int staticAssign[STATIC_LIST_SIZE];
317 int *nassign = staticAssign;
318 int objIndex, xpgSize, nspace = STATIC_LIST_SIZE;
319
320 /*
321 * Initialize an array that records the number of times a variable
322 * is assigned to by the format string. We use this to detect if
323 * a variable is multiply assigned or left unassigned.
324 */
325 if (numVars > nspace) {
326 nassign = (int*)safe_emalloc(sizeof(int), numVars, 0);
327 nspace = numVars;
328 }
329 for (i = 0; i < nspace; i++) {
330 nassign[i] = 0;
331 }
332
333 xpgSize = objIndex = gotXpg = gotSequential = 0;
334
335 while (*format != '\0') {
336 ch = format++;
337 flags = 0;
338
339 if (*ch != '%') {
340 continue;
341 }
342 ch = format++;
343 if (*ch == '%') {
344 continue;
345 }
346 if (*ch == '*') {
347 flags |= SCAN_SUPPRESS;
348 ch = format++;
349 goto xpgCheckDone;
350 }
351
352 if ( isdigit( (int)*ch ) ) {
353 /*
354 * Check for an XPG3-style %n$ specification. Note: there
355 * must not be a mixture of XPG3 specs and non-XPG3 specs
356 * in the same format string.
357 */
358 value = ZEND_STRTOUL(format-1, &end, 10);
359 if (*end != '$') {
360 goto notXpg;
361 }
362 format = end+1;
363 ch = format++;
364 gotXpg = 1;
365 if (gotSequential) {
366 goto mixedXPG;
367 }
368 objIndex = value - 1;
369 if ((objIndex < 0) || (numVars && (objIndex >= numVars))) {
370 goto badIndex;
371 } else if (numVars == 0) {
372 /*
373 * In the case where no vars are specified, the user can
374 * specify %9999$ legally, so we have to consider special
375 * rules for growing the assign array. 'value' is
376 * guaranteed to be > 0.
377 */
378
379 /* set a lower artificial limit on this
380 * in the interest of security and resource friendliness
381 * 255 arguments should be more than enough. - cc
382 */
383 if (value > SCAN_MAX_ARGS) {
384 goto badIndex;
385 }
386
387 xpgSize = (xpgSize > value) ? xpgSize : value;
388 }
389 goto xpgCheckDone;
390 }
391
392 notXpg:
393 gotSequential = 1;
394 if (gotXpg) {
395 mixedXPG:
396 php_error_docref(NULL, E_WARNING, "%s", "cannot mix \"%\" and \"%n$\" conversion specifiers");
397 goto error;
398 }
399
400 xpgCheckDone:
401 /*
402 * Parse any width specifier.
403 */
404 if (isdigit(UCHAR(*ch))) {
405 value = ZEND_STRTOUL(format-1, &format, 10);
406 flags |= SCAN_WIDTH;
407 ch = format++;
408 }
409
410 /*
411 * Ignore size specifier.
412 */
413 if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
414 ch = format++;
415 }
416
417 if (!(flags & SCAN_SUPPRESS) && numVars && (objIndex >= numVars)) {
418 goto badIndex;
419 }
420
421 /*
422 * Handle the various field types.
423 */
424 switch (*ch) {
425 case 'n':
426 case 'd':
427 case 'D':
428 case 'i':
429 case 'o':
430 case 'x':
431 case 'X':
432 case 'u':
433 case 'f':
434 case 'e':
435 case 'E':
436 case 'g':
437 case 's':
438 break;
439
440 case 'c':
441 /* we differ here with the TCL implementation in allowing for */
442 /* a character width specification, to be more consistent with */
443 /* ANSI. since Zend auto allocates space for vars, this is no */
444 /* problem - cc */
445 /*
446 if (flags & SCAN_WIDTH) {
447 php_error_docref(NULL, E_WARNING, "Field width may not be specified in %c conversion");
448 goto error;
449 }
450 */
451 break;
452
453 case '[':
454 if (*format == '\0') {
455 goto badSet;
456 }
457 ch = format++;
458 if (*ch == '^') {
459 if (*format == '\0') {
460 goto badSet;
461 }
462 ch = format++;
463 }
464 if (*ch == ']') {
465 if (*format == '\0') {
466 goto badSet;
467 }
468 ch = format++;
469 }
470 while (*ch != ']') {
471 if (*format == '\0') {
472 goto badSet;
473 }
474 ch = format++;
475 }
476 break;
477 badSet:
478 php_error_docref(NULL, E_WARNING, "Unmatched [ in format string");
479 goto error;
480
481 default: {
482 php_error_docref(NULL, E_WARNING, "Bad scan conversion character \"%c\"", *ch);
483 goto error;
484 }
485 }
486
487 if (!(flags & SCAN_SUPPRESS)) {
488 if (objIndex >= nspace) {
489 /*
490 * Expand the nassign buffer. If we are using XPG specifiers,
491 * make sure that we grow to a large enough size. xpgSize is
492 * guaranteed to be at least one larger than objIndex.
493 */
494 value = nspace;
495 if (xpgSize) {
496 nspace = xpgSize;
497 } else {
498 nspace += STATIC_LIST_SIZE;
499 }
500 if (nassign == staticAssign) {
501 nassign = (void *)safe_emalloc(nspace, sizeof(int), 0);
502 for (i = 0; i < STATIC_LIST_SIZE; ++i) {
503 nassign[i] = staticAssign[i];
504 }
505 } else {
506 nassign = (void *)erealloc((void *)nassign, nspace * sizeof(int));
507 }
508 for (i = value; i < nspace; i++) {
509 nassign[i] = 0;
510 }
511 }
512 nassign[objIndex]++;
513 objIndex++;
514 }
515 } /* while (*format != '\0') */
516
517 /*
518 * Verify that all of the variable were assigned exactly once.
519 */
520 if (numVars == 0) {
521 if (xpgSize) {
522 numVars = xpgSize;
523 } else {
524 numVars = objIndex;
525 }
526 }
527 if (totalSubs) {
528 *totalSubs = numVars;
529 }
530 for (i = 0; i < numVars; i++) {
531 if (nassign[i] > 1) {
532 php_error_docref(NULL, E_WARNING, "%s", "Variable is assigned by multiple \"%n$\" conversion specifiers");
533 goto error;
534 } else if (!xpgSize && (nassign[i] == 0)) {
535 /*
536 * If the space is empty, and xpgSize is 0 (means XPG wasn't
537 * used, and/or numVars != 0), then too many vars were given
538 */
539 php_error_docref(NULL, E_WARNING, "Variable is not assigned by any conversion specifiers");
540 goto error;
541 }
542 }
543
544 if (nassign != staticAssign) {
545 efree((char *)nassign);
546 }
547 return SCAN_SUCCESS;
548
549 badIndex:
550 if (gotXpg) {
551 php_error_docref(NULL, E_WARNING, "%s", "\"%n$\" argument index out of range");
552 } else {
553 php_error_docref(NULL, E_WARNING, "Different numbers of variable names and field specifiers");
554 }
555
556 error:
557 if (nassign != staticAssign) {
558 efree((char *)nassign);
559 }
560 return SCAN_ERROR_INVALID_FORMAT;
561 #undef STATIC_LIST_SIZE
562 }
563 /* }}} */
564
565 /* {{{ php_sscanf_internal
566 * This is the internal function which does processing on behalf of
567 * both sscanf() and fscanf()
568 *
569 * parameters :
570 * string literal string to be processed
571 * format format string
572 * argCount total number of elements in the args array
573 * args arguments passed in from user function (f|s)scanf
574 * varStart offset (in args) of 1st variable passed in to (f|s)scanf
575 * return_value set with the results of the scan
576 */
577
php_sscanf_internal(char * string,char * format,int argCount,zval * args,int varStart,zval * return_value)578 PHPAPI int php_sscanf_internal( char *string, char *format,
579 int argCount, zval *args,
580 int varStart, zval *return_value)
581 {
582 int numVars, nconversions, totalVars = -1;
583 int i, result;
584 zend_long value;
585 int objIndex;
586 char *end, *baseString;
587 zval *current;
588 char op = 0;
589 int base = 0;
590 int underflow = 0;
591 size_t width;
592 zend_long (*fn)() = NULL;
593 char *ch, sch;
594 int flags;
595 char buf[64]; /* Temporary buffer to hold scanned number
596 * strings before they are passed to strtoul() */
597
598 /* do some sanity checking */
599 if ((varStart > argCount) || (varStart < 0)){
600 varStart = SCAN_MAX_ARGS + 1;
601 }
602 numVars = argCount - varStart;
603 if (numVars < 0) {
604 numVars = 0;
605 }
606
607 #if 0
608 zend_printf("<br>in sscanf_internal : <br> string is \"%s\", format = \"%s\"<br> NumVars = %d. VarStart = %d<br>-------------------------<br>",
609 string, format, numVars, varStart);
610 #endif
611 /*
612 * Check for errors in the format string.
613 */
614 if (ValidateFormat(format, numVars, &totalVars) != SCAN_SUCCESS) {
615 scan_set_error_return( numVars, return_value );
616 return SCAN_ERROR_INVALID_FORMAT;
617 }
618
619 objIndex = numVars ? varStart : 0;
620
621 /*
622 * If any variables are passed, make sure they are all passed by reference
623 */
624 if (numVars) {
625 for (i = varStart;i < argCount;i++){
626 if ( ! Z_ISREF(args[ i ] ) ) {
627 php_error_docref(NULL, E_WARNING, "Parameter %d must be passed by reference", i);
628 scan_set_error_return(numVars, return_value);
629 return SCAN_ERROR_VAR_PASSED_BYVAL;
630 }
631 }
632 }
633
634 /*
635 * Allocate space for the result objects. Only happens when no variables
636 * are specified
637 */
638 if (!numVars) {
639 zval tmp;
640
641 /* allocate an array for return */
642 array_init(return_value);
643
644 for (i = 0; i < totalVars; i++) {
645 ZVAL_NULL(&tmp);
646 if (add_next_index_zval(return_value, &tmp) == FAILURE) {
647 scan_set_error_return(0, return_value);
648 return FAILURE;
649 }
650 }
651 varStart = 0; /* Array index starts from 0 */
652 }
653
654 baseString = string;
655
656 /*
657 * Iterate over the format string filling in the result objects until
658 * we reach the end of input, the end of the format string, or there
659 * is a mismatch.
660 */
661 nconversions = 0;
662 /* note ! - we need to limit the loop for objIndex to keep it in bounds */
663
664 while (*format != '\0') {
665 ch = format++;
666 flags = 0;
667
668 /*
669 * If we see whitespace in the format, skip whitespace in the string.
670 */
671 if ( isspace( (int)*ch ) ) {
672 sch = *string;
673 while ( isspace( (int)sch ) ) {
674 if (*string == '\0') {
675 goto done;
676 }
677 string++;
678 sch = *string;
679 }
680 continue;
681 }
682
683 if (*ch != '%') {
684 literal:
685 if (*string == '\0') {
686 underflow = 1;
687 goto done;
688 }
689 sch = *string;
690 string++;
691 if (*ch != sch) {
692 goto done;
693 }
694 continue;
695 }
696
697 ch = format++;
698 if (*ch == '%') {
699 goto literal;
700 }
701
702 /*
703 * Check for assignment suppression ('*') or an XPG3-style
704 * assignment ('%n$').
705 */
706 if (*ch == '*') {
707 flags |= SCAN_SUPPRESS;
708 ch = format++;
709 } else if ( isdigit(UCHAR(*ch))) {
710 value = ZEND_STRTOUL(format-1, &end, 10);
711 if (*end == '$') {
712 format = end+1;
713 ch = format++;
714 objIndex = varStart + value - 1;
715 }
716 }
717
718 /*
719 * Parse any width specifier.
720 */
721 if ( isdigit(UCHAR(*ch))) {
722 width = ZEND_STRTOUL(format-1, &format, 10);
723 ch = format++;
724 } else {
725 width = 0;
726 }
727
728 /*
729 * Ignore size specifier.
730 */
731 if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
732 ch = format++;
733 }
734
735 /*
736 * Handle the various field types.
737 */
738 switch (*ch) {
739 case 'n':
740 if (!(flags & SCAN_SUPPRESS)) {
741 if (numVars && objIndex >= argCount) {
742 break;
743 } else if (numVars) {
744 current = Z_REFVAL(args[objIndex++]);
745 zval_ptr_dtor(current);
746 ZVAL_LONG(current, (zend_long)(string - baseString) );
747 } else {
748 add_index_long(return_value, objIndex++, string - baseString);
749 }
750 }
751 nconversions++;
752 continue;
753
754 case 'd':
755 case 'D':
756 op = 'i';
757 base = 10;
758 fn = (zend_long (*)())ZEND_STRTOL_PTR;
759 break;
760 case 'i':
761 op = 'i';
762 base = 0;
763 fn = (zend_long (*)())ZEND_STRTOL_PTR;
764 break;
765 case 'o':
766 op = 'i';
767 base = 8;
768 fn = (zend_long (*)())ZEND_STRTOL_PTR;
769 break;
770 case 'x':
771 case 'X':
772 op = 'i';
773 base = 16;
774 fn = (zend_long (*)())ZEND_STRTOL_PTR;
775 break;
776 case 'u':
777 op = 'i';
778 base = 10;
779 flags |= SCAN_UNSIGNED;
780 fn = (zend_long (*)())ZEND_STRTOUL_PTR;
781 break;
782
783 case 'f':
784 case 'e':
785 case 'E':
786 case 'g':
787 op = 'f';
788 break;
789
790 case 's':
791 op = 's';
792 break;
793
794 case 'c':
795 op = 's';
796 flags |= SCAN_NOSKIP;
797 /*-cc-*/
798 if (0 == width) {
799 width = 1;
800 }
801 /*-cc-*/
802 break;
803 case '[':
804 op = '[';
805 flags |= SCAN_NOSKIP;
806 break;
807 } /* switch */
808
809 /*
810 * At this point, we will need additional characters from the
811 * string to proceed.
812 */
813 if (*string == '\0') {
814 underflow = 1;
815 goto done;
816 }
817
818 /*
819 * Skip any leading whitespace at the beginning of a field unless
820 * the format suppresses this behavior.
821 */
822 if (!(flags & SCAN_NOSKIP)) {
823 while (*string != '\0') {
824 sch = *string;
825 if (! isspace((int)sch) ) {
826 break;
827 }
828 string++;
829 }
830 if (*string == '\0') {
831 underflow = 1;
832 goto done;
833 }
834 }
835
836 /*
837 * Perform the requested scanning operation.
838 */
839 switch (op) {
840 case 'c':
841 case 's':
842 /*
843 * Scan a string up to width characters or whitespace.
844 */
845 if (width == 0) {
846 width = (size_t) ~0;
847 }
848 end = string;
849 while (*end != '\0') {
850 sch = *end;
851 if ( isspace( (int)sch ) ) {
852 break;
853 }
854 end++;
855 if (--width == 0) {
856 break;
857 }
858 }
859 if (!(flags & SCAN_SUPPRESS)) {
860 if (numVars && objIndex >= argCount) {
861 break;
862 } else if (numVars) {
863 current = Z_REFVAL(args[objIndex++]);
864 zval_ptr_dtor(current);
865 ZVAL_STRINGL(current, string, end-string);
866 } else {
867 add_index_stringl(return_value, objIndex++, string, end-string);
868 }
869 }
870 string = end;
871 break;
872
873 case '[': {
874 CharSet cset;
875
876 if (width == 0) {
877 width = (size_t) ~0;
878 }
879 end = string;
880
881 format = BuildCharSet(&cset, format);
882 while (*end != '\0') {
883 sch = *end;
884 if (!CharInSet(&cset, (int)sch)) {
885 break;
886 }
887 end++;
888 if (--width == 0) {
889 break;
890 }
891 }
892 ReleaseCharSet(&cset);
893
894 if (string == end) {
895 /*
896 * Nothing matched the range, stop processing
897 */
898 goto done;
899 }
900 if (!(flags & SCAN_SUPPRESS)) {
901 if (numVars && objIndex >= argCount) {
902 break;
903 } else if (numVars) {
904 current = Z_REFVAL(args[objIndex++]);
905 zval_ptr_dtor(current);
906 ZVAL_STRINGL(current, string, end-string);
907 } else {
908 add_index_stringl(return_value, objIndex++, string, end-string);
909 }
910 }
911 string = end;
912 break;
913 }
914 /*
915 case 'c':
916 / Scan a single character./
917
918 sch = *string;
919 string++;
920 if (!(flags & SCAN_SUPPRESS)) {
921 if (numVars) {
922 char __buf[2];
923 __buf[0] = sch;
924 __buf[1] = '\0';
925 current = args[objIndex++];
926 zval_dtor(*current);
927 ZVAL_STRINGL( *current, __buf, 1);
928 } else {
929 add_index_stringl(return_value, objIndex++, &sch, 1);
930 }
931 }
932 break;
933 */
934 case 'i':
935 /*
936 * Scan an unsigned or signed integer.
937 */
938 /*-cc-*/
939 buf[0] = '\0';
940 /*-cc-*/
941 if ((width == 0) || (width > sizeof(buf) - 1)) {
942 width = sizeof(buf) - 1;
943 }
944
945 flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO;
946 for (end = buf; width > 0; width--) {
947 switch (*string) {
948 /*
949 * The 0 digit has special meaning at the beginning of
950 * a number. If we are unsure of the base, it
951 * indicates that we are in base 8 or base 16 (if it is
952 * followed by an 'x').
953 */
954 case '0':
955 /*-cc-*/
956 if (base == 16) {
957 flags |= SCAN_XOK;
958 }
959 /*-cc-*/
960 if (base == 0) {
961 base = 8;
962 flags |= SCAN_XOK;
963 }
964 if (flags & SCAN_NOZERO) {
965 flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO);
966 } else {
967 flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
968 }
969 goto addToInt;
970
971 case '1': case '2': case '3': case '4':
972 case '5': case '6': case '7':
973 if (base == 0) {
974 base = 10;
975 }
976 flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
977 goto addToInt;
978
979 case '8': case '9':
980 if (base == 0) {
981 base = 10;
982 }
983 if (base <= 8) {
984 break;
985 }
986 flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
987 goto addToInt;
988
989 case 'A': case 'B': case 'C':
990 case 'D': case 'E': case 'F':
991 case 'a': case 'b': case 'c':
992 case 'd': case 'e': case 'f':
993 if (base <= 10) {
994 break;
995 }
996 flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
997 goto addToInt;
998
999 case '+': case '-':
1000 if (flags & SCAN_SIGNOK) {
1001 flags &= ~SCAN_SIGNOK;
1002 goto addToInt;
1003 }
1004 break;
1005
1006 case 'x': case 'X':
1007 if ((flags & SCAN_XOK) && (end == buf+1)) {
1008 base = 16;
1009 flags &= ~SCAN_XOK;
1010 goto addToInt;
1011 }
1012 break;
1013 }
1014
1015 /*
1016 * We got an illegal character so we are done accumulating.
1017 */
1018 break;
1019
1020 addToInt:
1021 /*
1022 * Add the character to the temporary buffer.
1023 */
1024 *end++ = *string++;
1025 if (*string == '\0') {
1026 break;
1027 }
1028 }
1029
1030 /*
1031 * Check to see if we need to back up because we only got a
1032 * sign or a trailing x after a 0.
1033 */
1034 if (flags & SCAN_NODIGITS) {
1035 if (*string == '\0') {
1036 underflow = 1;
1037 }
1038 goto done;
1039 } else if (end[-1] == 'x' || end[-1] == 'X') {
1040 end--;
1041 string--;
1042 }
1043
1044 /*
1045 * Scan the value from the temporary buffer. If we are
1046 * returning a large unsigned value, we have to convert it back
1047 * to a string since PHP only supports signed values.
1048 */
1049 if (!(flags & SCAN_SUPPRESS)) {
1050 *end = '\0';
1051 value = (zend_long) (*fn)(buf, NULL, base);
1052 if ((flags & SCAN_UNSIGNED) && (value < 0)) {
1053 snprintf(buf, sizeof(buf), ZEND_ULONG_FMT, value); /* INTL: ISO digit */
1054 if (numVars && objIndex >= argCount) {
1055 break;
1056 } else if (numVars) {
1057 /* change passed value type to string */
1058 current = Z_REFVAL(args[objIndex++]);
1059 zval_ptr_dtor(current);
1060 ZVAL_STRING(current, buf);
1061 } else {
1062 add_index_string(return_value, objIndex++, buf);
1063 }
1064 } else {
1065 if (numVars && objIndex >= argCount) {
1066 break;
1067 } else if (numVars) {
1068 current = Z_REFVAL(args[objIndex++]);
1069 zval_ptr_dtor(current);
1070 ZVAL_LONG(current, value);
1071 } else {
1072 add_index_long(return_value, objIndex++, value);
1073 }
1074 }
1075 }
1076 break;
1077
1078 case 'f':
1079 /*
1080 * Scan a floating point number
1081 */
1082 buf[0] = '\0'; /* call me pedantic */
1083 if ((width == 0) || (width > sizeof(buf) - 1)) {
1084 width = sizeof(buf) - 1;
1085 }
1086 flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_PTOK | SCAN_EXPOK;
1087 for (end = buf; width > 0; width--) {
1088 switch (*string) {
1089 case '0': case '1': case '2': case '3':
1090 case '4': case '5': case '6': case '7':
1091 case '8': case '9':
1092 flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS);
1093 goto addToFloat;
1094 case '+':
1095 case '-':
1096 if (flags & SCAN_SIGNOK) {
1097 flags &= ~SCAN_SIGNOK;
1098 goto addToFloat;
1099 }
1100 break;
1101 case '.':
1102 if (flags & SCAN_PTOK) {
1103 flags &= ~(SCAN_SIGNOK | SCAN_PTOK);
1104 goto addToFloat;
1105 }
1106 break;
1107 case 'e':
1108 case 'E':
1109 /*
1110 * An exponent is not allowed until there has
1111 * been at least one digit.
1112 */
1113 if ((flags & (SCAN_NODIGITS | SCAN_EXPOK)) == SCAN_EXPOK) {
1114 flags = (flags & ~(SCAN_EXPOK|SCAN_PTOK))
1115 | SCAN_SIGNOK | SCAN_NODIGITS;
1116 goto addToFloat;
1117 }
1118 break;
1119 }
1120
1121 /*
1122 * We got an illegal character so we are done accumulating.
1123 */
1124 break;
1125
1126 addToFloat:
1127 /*
1128 * Add the character to the temporary buffer.
1129 */
1130 *end++ = *string++;
1131 if (*string == '\0') {
1132 break;
1133 }
1134 }
1135
1136 /*
1137 * Check to see if we need to back up because we saw a
1138 * trailing 'e' or sign.
1139 */
1140 if (flags & SCAN_NODIGITS) {
1141 if (flags & SCAN_EXPOK) {
1142 /*
1143 * There were no digits at all so scanning has
1144 * failed and we are done.
1145 */
1146 if (*string == '\0') {
1147 underflow = 1;
1148 }
1149 goto done;
1150 }
1151
1152 /*
1153 * We got a bad exponent ('e' and maybe a sign).
1154 */
1155 end--;
1156 string--;
1157 if (*end != 'e' && *end != 'E') {
1158 end--;
1159 string--;
1160 }
1161 }
1162
1163 /*
1164 * Scan the value from the temporary buffer.
1165 */
1166 if (!(flags & SCAN_SUPPRESS)) {
1167 double dvalue;
1168 *end = '\0';
1169 dvalue = zend_strtod(buf, NULL);
1170 if (numVars && objIndex >= argCount) {
1171 break;
1172 } else if (numVars) {
1173 current = Z_REFVAL(args[objIndex++]);
1174 zval_ptr_dtor(current);
1175 ZVAL_DOUBLE(current, dvalue);
1176 } else {
1177 add_index_double(return_value, objIndex++, dvalue );
1178 }
1179 }
1180 break;
1181 } /* switch (op) */
1182 nconversions++;
1183 } /* while (*format != '\0') */
1184
1185 done:
1186 result = SCAN_SUCCESS;
1187
1188 if (underflow && (0==nconversions)) {
1189 scan_set_error_return( numVars, return_value );
1190 result = SCAN_ERROR_EOF;
1191 } else if (numVars) {
1192 convert_to_long(return_value );
1193 Z_LVAL_P(return_value) = nconversions;
1194 } else if (nconversions < totalVars) {
1195 /* TODO: not all elements converted. we need to prune the list - cc */
1196 }
1197 return result;
1198 }
1199 /* }}} */
1200
1201 /* the compiler choked when i tried to make this a macro */
scan_set_error_return(int numVars,zval * return_value)1202 static inline void scan_set_error_return(int numVars, zval *return_value) /* {{{ */
1203 {
1204 if (numVars) {
1205 ZVAL_LONG(return_value, SCAN_ERROR_EOF); /* EOF marker */
1206 } else {
1207 /* convert_to_null calls destructor */
1208 convert_to_null(return_value);
1209 }
1210 }
1211 /* }}} */
1212
1213 /*
1214 * Local variables:
1215 * tab-width: 4
1216 * c-basic-offset: 4
1217 * End:
1218 * vim600: sw=4 ts=4 fdm=marker
1219 * vim<600: sw=4 ts=4
1220 */
1221