xref: /php-src/ext/date/lib/parse_iso_intervals.re (revision 06d4c70e)
1/*
2 * The MIT License (MIT)
3 *
4 * Copyright (c) 2015-2019 Derick Rethans
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25#include "timelib.h"
26#include "timelib_private.h"
27
28#include <ctype.h>
29
30#if defined(_MSC_VER)
31# define strtoll(s, f, b) _atoi64(s)
32#elif !defined(HAVE_STRTOLL)
33# if defined(HAVE_ATOLL)
34#  define strtoll(s, f, b) atoll(s)
35# else
36#  define strtoll(s, f, b) strtol(s, f, b)
37# endif
38#endif
39
40#define EOI      257
41
42#define TIMELIB_PERIOD  260
43#define TIMELIB_ISO_DATE 261
44#define TIMELIB_ERROR   999
45
46typedef unsigned char uchar;
47
48#define   BSIZE	   8192
49
50#define   YYCTYPE      uchar
51#define   YYCURSOR     cursor
52#define   YYLIMIT      s->lim
53#define   YYMARKER     s->ptr
54#define   YYFILL(n)    return EOI;
55
56#define   RET(i)       {s->cur = cursor; return i;}
57
58#define timelib_string_free timelib_free
59
60#define TIMELIB_INIT  s->cur = cursor; str = timelib_string(s); ptr = str
61#define TIMELIB_DEINIT timelib_string_free(str)
62
63#ifdef DEBUG_PARSER
64#define DEBUG_OUTPUT(s) printf("%s\n", s);
65#define YYDEBUG(s,c) { if (s != -1) { printf("state: %d ", s); printf("[%c]\n", c); } }
66#else
67#define DEBUG_OUTPUT(s)
68#define YYDEBUG(s,c)
69#endif
70
71typedef struct _Scanner {
72	int           fd;
73	uchar        *lim, *str, *ptr, *cur, *tok, *pos;
74	unsigned int  line, len;
75	timelib_error_container *errors;
76
77	timelib_time     *begin;
78	timelib_time     *end;
79	timelib_rel_time *period;
80	int               recurrences;
81
82	int have_period;
83	int have_recurrences;
84	int have_date;
85	int have_begin_date;
86	int have_end_date;
87} Scanner;
88
89static void add_error(Scanner *s, const char *error)
90{
91	s->errors->error_count++;
92	s->errors->error_messages = timelib_realloc(s->errors->error_messages, s->errors->error_count * sizeof(timelib_error_message));
93	s->errors->error_messages[s->errors->error_count - 1].position = s->tok ? s->tok - s->str : 0;
94	s->errors->error_messages[s->errors->error_count - 1].character = s->tok ? *s->tok : 0;
95	s->errors->error_messages[s->errors->error_count - 1].message = timelib_strdup(error);
96}
97
98static char *timelib_string(Scanner *s)
99{
100	char *tmp = timelib_calloc(1, s->cur - s->tok + 1);
101	memcpy(tmp, s->tok, s->cur - s->tok);
102
103	return tmp;
104}
105
106static timelib_sll timelib_get_nr(const char **ptr, int max_length)
107{
108	const char *begin, *end;
109	char *str;
110	timelib_sll tmp_nr = TIMELIB_UNSET;
111	int len = 0;
112
113	while ((**ptr < '0') || (**ptr > '9')) {
114		if (**ptr == '\0') {
115			return TIMELIB_UNSET;
116		}
117		++*ptr;
118	}
119	begin = *ptr;
120	while ((**ptr >= '0') && (**ptr <= '9') && len < max_length) {
121		++*ptr;
122		++len;
123	}
124	end = *ptr;
125	str = timelib_calloc(1, end - begin + 1);
126	memcpy(str, begin, end - begin);
127	tmp_nr = strtoll(str, NULL, 10);
128	timelib_free(str);
129	return tmp_nr;
130}
131
132static timelib_ull timelib_get_unsigned_nr(const char **ptr, int max_length)
133{
134	timelib_ull dir = 1;
135
136	while (((**ptr < '0') || (**ptr > '9')) && (**ptr != '+') && (**ptr != '-')) {
137		if (**ptr == '\0') {
138			return TIMELIB_UNSET;
139		}
140		++*ptr;
141	}
142
143	while (**ptr == '+' || **ptr == '-')
144	{
145		if (**ptr == '-') {
146			dir *= -1;
147		}
148		++*ptr;
149	}
150	return dir * timelib_get_nr(ptr, max_length);
151}
152
153#define timelib_split_free(arg) {       \
154	int i;                         \
155	for (i = 0; i < arg.c; i++) {  \
156		timelib_free(arg.v[i]);    \
157	}                              \
158	if (arg.v) {                   \
159		timelib_free(arg.v);       \
160	}                              \
161}
162
163/* date parser's scan function too large for VC6 - VC7.x
164   drop the optimization solves the problem */
165#ifdef PHP_WIN32
166#pragma optimize( "", off )
167#endif
168static int scan(Scanner *s)
169{
170	uchar *cursor = s->cur;
171	char *str;
172	const char *ptr = NULL;
173
174std:
175	s->tok = cursor;
176	s->len = 0;
177/*!re2c
178
179/* */
180any = [\000-\377];
181number = [0-9]+;
182
183hour24lz = [01][0-9] | "2"[0-4];
184minutelz = [0-5][0-9];
185monthlz = "0" [1-9] | "1" [0-2];
186monthlzz = "0" [0-9] | "1" [0-2];
187daylz   = "0" [1-9] | [1-2][0-9] | "3" [01];
188daylzz  = "0" [0-9] | [1-2][0-9] | "3" [01];
189secondlz = minutelz;
190year4 = [0-9]{4};
191weekofyear = "0"[1-9] | [1-4][0-9] | "5"[0-3];
192
193space = [ \t]+;
194datetimebasic  = year4 monthlz daylz "T" hour24lz minutelz secondlz "Z";
195datetimeextended  = year4 "-" monthlz "-" daylz "T" hour24lz ':' minutelz ':' secondlz "Z";
196period   = "P" (number "Y")? (number "M")? (number "W")? (number "D")? ("T" (number "H")? (number "M")? (number "S")?)?;
197combinedrep = "P" year4 "-" monthlzz "-" daylzz "T" hour24lz ':' minutelz ':' secondlz;
198
199recurrences = "R" number;
200
201isoweekday       = year4 "-"? "W" weekofyear "-"? [0-7];
202isoweek          = year4 "-"? "W" weekofyear;
203
204*/
205
206/*!re2c
207	/* so that vim highlights correctly */
208	recurrences
209	{
210		DEBUG_OUTPUT("recurrences");
211		TIMELIB_INIT;
212		ptr++;
213		s->recurrences = timelib_get_unsigned_nr(&ptr, 9);
214		TIMELIB_DEINIT;
215		s->have_recurrences = 1;
216		return TIMELIB_PERIOD;
217	}
218
219	datetimebasic| datetimeextended
220	{
221		timelib_time *current;
222
223		if (s->have_date || s->have_period) {
224			current = s->end;
225			s->have_end_date = 1;
226		} else {
227			current = s->begin;
228			s->have_begin_date = 1;
229		}
230		DEBUG_OUTPUT("datetimebasic | datetimeextended");
231		TIMELIB_INIT;
232		current->y = timelib_get_nr(&ptr, 4);
233		current->m = timelib_get_nr(&ptr, 2);
234		current->d = timelib_get_nr(&ptr, 2);
235		current->h = timelib_get_nr(&ptr, 2);
236		current->i = timelib_get_nr(&ptr, 2);
237		current->s = timelib_get_nr(&ptr, 2);
238		s->have_date = 1;
239		TIMELIB_DEINIT;
240		return TIMELIB_ISO_DATE;
241	}
242
243	period
244	{
245		timelib_sll nr;
246		int         in_time = 0;
247		DEBUG_OUTPUT("period");
248		TIMELIB_INIT;
249		ptr++;
250		do {
251			if ( *ptr == 'T' ) {
252				in_time = 1;
253				ptr++;
254			}
255			if ( *ptr == '\0' ) {
256				add_error(s, "Missing expected time part");
257				break;
258			}
259
260			nr = timelib_get_unsigned_nr(&ptr, 12);
261			switch (*ptr) {
262				case 'Y': s->period->y = nr; break;
263				case 'W': s->period->d += nr * 7; break;
264				case 'D': s->period->d += nr; break;
265				case 'H': s->period->h = nr; break;
266				case 'S': s->period->s = nr; break;
267				case 'M':
268					if (in_time) {
269						s->period->i = nr;
270					} else {
271						s->period->m = nr;
272					}
273					break;
274				default:
275					add_error(s, "Undefined period specifier");
276					break;
277			}
278			ptr++;
279		} while (!s->errors->error_count && *ptr);
280		s->have_period = 1;
281		TIMELIB_DEINIT;
282		return TIMELIB_PERIOD;
283	}
284
285	combinedrep
286	{
287		DEBUG_OUTPUT("combinedrep");
288		TIMELIB_INIT;
289		s->period->y = timelib_get_unsigned_nr(&ptr, 4);
290		ptr++;
291		s->period->m = timelib_get_unsigned_nr(&ptr, 2);
292		ptr++;
293		s->period->d = timelib_get_unsigned_nr(&ptr, 2);
294		ptr++;
295		s->period->h = timelib_get_unsigned_nr(&ptr, 2);
296		ptr++;
297		s->period->i = timelib_get_unsigned_nr(&ptr, 2);
298		ptr++;
299		s->period->s = timelib_get_unsigned_nr(&ptr, 2);
300		s->have_period = 1;
301		TIMELIB_DEINIT;
302		return TIMELIB_PERIOD;
303	}
304
305	[ .,\t/]
306	{
307		goto std;
308	}
309
310	"\000"|"\n"
311	{
312		s->pos = cursor; s->line++;
313		goto std;
314	}
315
316	any
317	{
318		add_error(s, "Unexpected character");
319		goto std;
320	}
321*/
322}
323#ifdef PHP_WIN32
324#pragma optimize( "", on )
325#endif
326
327/*!max:re2c */
328
329void timelib_strtointerval(const char *s, size_t len,
330                           timelib_time **begin, timelib_time **end,
331						   timelib_rel_time **period, int *recurrences,
332						   timelib_error_container **errors)
333{
334	Scanner in;
335	int t;
336	const char *e = s + len - 1;
337
338	memset(&in, 0, sizeof(in));
339	in.errors = timelib_malloc(sizeof(timelib_error_container));
340	in.errors->warning_count = 0;
341	in.errors->warning_messages = NULL;
342	in.errors->error_count = 0;
343	in.errors->error_messages = NULL;
344
345	if (len > 0) {
346		while (isspace(*s) && s < e) {
347			s++;
348		}
349		while (isspace(*e) && e > s) {
350			e--;
351		}
352	}
353	if (e - s < 0) {
354		add_error(&in, "Empty string");
355		if (errors) {
356			*errors = in.errors;
357		} else {
358			timelib_error_container_dtor(in.errors);
359		}
360		return;
361	}
362	e++;
363
364	/* init cursor */
365	in.str = timelib_malloc((e - s) + YYMAXFILL);
366	memset(in.str, 0, (e - s) + YYMAXFILL);
367	memcpy(in.str, s, (e - s));
368	in.lim = in.str + (e - s) + YYMAXFILL;
369	in.cur = in.str;
370
371	/* init value containers */
372	in.begin = timelib_time_ctor();
373	in.begin->y = TIMELIB_UNSET;
374	in.begin->d = TIMELIB_UNSET;
375	in.begin->m = TIMELIB_UNSET;
376	in.begin->h = TIMELIB_UNSET;
377	in.begin->i = TIMELIB_UNSET;
378	in.begin->s = TIMELIB_UNSET;
379	in.begin->us = 0;
380	in.begin->z = 0;
381	in.begin->dst = 0;
382	in.begin->is_localtime = 0;
383	in.begin->zone_type = TIMELIB_ZONETYPE_OFFSET;
384
385	in.end = timelib_time_ctor();
386	in.end->y = TIMELIB_UNSET;
387	in.end->d = TIMELIB_UNSET;
388	in.end->m = TIMELIB_UNSET;
389	in.end->h = TIMELIB_UNSET;
390	in.end->i = TIMELIB_UNSET;
391	in.end->s = TIMELIB_UNSET;
392	in.end->us = 0;
393	in.end->z = 0;
394	in.end->dst = 0;
395	in.end->is_localtime = 0;
396	in.end->zone_type = TIMELIB_ZONETYPE_OFFSET;
397
398	in.period = timelib_rel_time_ctor();
399	in.period->y = 0;
400	in.period->d = 0;
401	in.period->m = 0;
402	in.period->h = 0;
403	in.period->i = 0;
404	in.period->s = 0;
405	in.period->weekday = 0;
406	in.period->weekday_behavior = 0;
407	in.period->first_last_day_of = 0;
408	in.period->days = TIMELIB_UNSET;
409
410	in.recurrences = 1;
411
412	do {
413		t = scan(&in);
414#ifdef DEBUG_PARSER
415		printf("%d\n", t);
416#endif
417	} while(t != EOI);
418
419	timelib_free(in.str);
420	if (errors) {
421		*errors = in.errors;
422	} else {
423		timelib_error_container_dtor(in.errors);
424	}
425	if (in.have_begin_date) {
426		*begin = in.begin;
427	} else {
428		timelib_time_dtor(in.begin);
429	}
430	if (in.have_end_date) {
431		*end   = in.end;
432	} else {
433		timelib_time_dtor(in.end);
434	}
435	if (in.have_period) {
436		*period = in.period;
437	} else {
438		timelib_rel_time_dtor(in.period);
439	}
440	if (in.have_recurrences) {
441		*recurrences = in.recurrences;
442	}
443}
444
445
446/*
447 * vim: syntax=c
448 */
449