xref: /php-src/ext/date/lib/parse_posix.c (revision 66ea59e3)
1 /*
2  * The MIT License (MIT)
3  *
4  * Copyright (c) 2021 MongoDB, Inc.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "timelib.h"
26 #include "timelib_private.h"
27 
28 // This section adds the missing 'strndup' implementation on Windows.
29 #if TIMELIB_USE_BUILTIN_STRNDUP == 1
30 # include <stdlib.h>
31 # include <string.h>
32 
33 /**
34  * char* timelib_strndup(const char* s, size_t n)
35  *
36  * Returns a pointer to a copy of 's' with at most 'n' characters
37  * in memory obtained from 'malloc', or 'NULL' if insufficient
38  * memory was available.  The result is always 'NULL' terminated.
39  */
timelib_strndup(const char * s,size_t n)40 static char* timelib_strndup(const char* s, size_t n)
41 {
42 	char* result;
43 	size_t len = strlen(s);
44 
45 	if (n < len) {
46 		len = n;
47 	}
48 
49 	result = (char*)malloc(len + 1);
50 	if (!result) {
51 		return 0;
52 	}
53 
54 	result[len] = '\0';
55 	return (char*)memcpy(result, s, len);
56 }
57 #endif
58 
59 /* Forwards declrations */
60 static timelib_posix_trans_info *timelib_posix_trans_info_ctor(void);
61 static void timelib_posix_trans_info_dtor(timelib_posix_trans_info* ts);
62 
63 /* "<" [+-]? .+? ">" */
read_description_numeric_abbr(char ** ptr)64 static char *read_description_numeric_abbr(char **ptr)
65 {
66 	const char *begin = *ptr + 1;
67 
68 	// skip '<'
69 	(*ptr)++;
70 
71 	while (**ptr != '\0' && **ptr != '>') {
72 		(*ptr)++;
73 	}
74 
75 	if (**ptr == '\0') {
76 		return NULL;
77 	}
78 
79 	if (**ptr == '>') {
80 		(*ptr)++;
81 	}
82 
83 	// Abbreviation may not be empty
84 	if (*ptr - begin - 1 < 1) {
85 		return NULL;
86 	}
87 
88 	return timelib_strndup(begin, *ptr - begin - 1);
89 }
90 
91 /* [A-Z]+ */
read_description_abbr(char ** ptr)92 static char *read_description_abbr(char **ptr)
93 {
94 	const char *begin = *ptr;
95 
96 	// Find the end
97 	while ((**ptr >= 'A' && **ptr <= 'Z') || (**ptr >= 'a' && **ptr <= 'z')) {
98 		(*ptr)++;
99 	}
100 
101 	// Abbreviation may not be empty
102 	if (*ptr - begin < 1) {
103 		return NULL;
104 	}
105 
106 	return timelib_strndup(begin, *ptr - begin);
107 }
108 
109 /* "<" [+-]? .+? ">" | [A-Z]+ */
read_description(char ** ptr)110 static char *read_description(char **ptr)
111 {
112 	if (**ptr == '<') {
113 		return read_description_numeric_abbr(ptr);
114 	} else {
115 		return read_description_abbr(ptr);
116 	}
117 }
118 
119 /* [+-]? */
read_sign(char ** ptr)120 static int read_sign(char **ptr)
121 {
122 	int bias = 1;
123 
124 	if (**ptr == '+') {
125 		(*ptr)++;
126 	} else if (**ptr == '-') {
127 		bias = -1;
128 		(*ptr)++;
129 	}
130 
131 	return bias;
132 }
133 
134 /* [0-9]+ */
read_number(char ** ptr)135 static timelib_sll read_number(char **ptr)
136 {
137 	const char *begin = *ptr;
138 	int acc = 0;
139 
140 	// skip leading 0's
141 	while (**ptr == '0') {
142 		(*ptr)++;
143 	}
144 
145 	while (**ptr >= '0' && **ptr <= '9') {
146 		acc = acc * 10;
147 		acc += (**ptr) - '0';
148 		(*ptr)++;
149 	}
150 
151 	if (begin == *ptr) {
152 		return TIMELIB_UNSET;
153 	}
154 
155 	return acc;
156 }
157 
158 /* [+-]? [0-9]+ ( ":" [0-9]+ ( ":" [0-9]+ )? )? */
read_offset(char ** ptr)159 static timelib_sll read_offset(char **ptr)
160 {
161 	const char *begin;
162 	int bias = read_sign(ptr);
163 	int hours = 0;
164 	int minutes = 0;
165 	int seconds = 0;
166 
167 	begin = *ptr;
168 
169 	// read through to : or non-digit for hours
170 	hours = read_number(ptr);
171 	if (hours == TIMELIB_UNSET) {
172 		return hours;
173 	}
174 
175 	// check for optional minutes
176 	if (**ptr == ':') {
177 		(*ptr)++; // skip ':'
178 		minutes = read_number(ptr);
179 		if (minutes == TIMELIB_UNSET) {
180 			return minutes;
181 		}
182 	}
183 
184 	// check for optional seconds
185 	if (**ptr == ':') {
186 		(*ptr)++; // skip ':'
187 		seconds = read_number(ptr);
188 		if (seconds == TIMELIB_UNSET) {
189 			return seconds;
190 		}
191 	}
192 
193 	if (begin == *ptr) {
194 		return TIMELIB_UNSET;
195 	}
196 
197 	// multiplication with -1, because the offset in the identifier is the
198 	// 'wrong' way around as for example EST5 is UTC-5 (and not +5)
199 	return -1 * bias * (hours * 3600 + minutes * 60 + seconds);
200 }
201 
202 
203 // Mw.m.d
read_trans_spec_mwd(char ** ptr)204 static timelib_posix_trans_info* read_trans_spec_mwd(char **ptr)
205 {
206 	timelib_posix_trans_info *tmp = timelib_posix_trans_info_ctor();
207 
208 	tmp->type = TIMELIB_POSIX_TRANS_TYPE_MWD;
209 
210 	// Skip 'M'
211 	(*ptr)++;
212 
213 	tmp->mwd.month = read_number(ptr);
214 	if (tmp->mwd.month == TIMELIB_UNSET) {
215 		goto fail;
216 	}
217 
218 	// check for '.' and skip it
219 	if (**ptr != '.') {
220 		goto fail;
221 	}
222 	(*ptr)++;
223 
224 	tmp->mwd.week = read_number(ptr);
225 	if (tmp->mwd.week == TIMELIB_UNSET) {
226 		goto fail;
227 	}
228 
229 	// check for '.' and skip it
230 	if (**ptr != '.') {
231 		goto fail;
232 	}
233 	(*ptr)++;
234 
235 	tmp->mwd.dow = read_number(ptr);
236 	if (tmp->mwd.dow == TIMELIB_UNSET) {
237 		goto fail;
238 	}
239 
240 	return tmp;
241 
242 fail:
243 	timelib_posix_trans_info_dtor(tmp);
244 	return NULL;
245 }
246 
247 // (Jn | n | Mw.m.d) ( /time )?
read_transition_spec(char ** ptr)248 static timelib_posix_trans_info* read_transition_spec(char **ptr)
249 {
250 	timelib_posix_trans_info *tmp;
251 
252 	if (**ptr == 'M') {
253 		tmp = read_trans_spec_mwd(ptr);
254 		if (!tmp) {
255 			return NULL;
256 		}
257 	} else {
258 		tmp = timelib_posix_trans_info_ctor();
259 
260 		if (**ptr == 'J') {
261 			tmp->type = TIMELIB_POSIX_TRANS_TYPE_JULIAN_NO_FEB29;
262 			(*ptr)++;
263 		}
264 
265 		tmp->days = read_number(ptr);
266 		if (tmp->days == TIMELIB_UNSET) {
267 			goto fail;
268 		}
269 	}
270 
271 	// Check for the optional hour
272 	if (**ptr == '/') {
273 		(*ptr)++;
274 		tmp->hour = read_offset(ptr);
275 		if (tmp->hour == TIMELIB_UNSET) {
276 			goto fail;
277 		}
278 		// as the bias for normal offsets = -1, we need to reverse it here
279 		tmp->hour = -tmp->hour;
280 	}
281 
282 	return tmp;
283 
284 fail:
285 	timelib_posix_trans_info_dtor(tmp);
286 	return NULL;
287 }
288 
timelib_posix_trans_info_ctor(void)289 static timelib_posix_trans_info* timelib_posix_trans_info_ctor(void)
290 {
291 	timelib_posix_trans_info *tmp;
292 
293 	tmp = timelib_calloc(1, sizeof(timelib_posix_trans_info));
294 	tmp->type = TIMELIB_POSIX_TRANS_TYPE_JULIAN_FEB29;
295 	tmp->hour = 2 * 3600;
296 
297 	return tmp;
298 }
299 
timelib_posix_trans_info_dtor(timelib_posix_trans_info * ts)300 static void timelib_posix_trans_info_dtor(timelib_posix_trans_info* ts)
301 {
302 	timelib_free(ts);
303 }
304 
timelib_posix_str_dtor(timelib_posix_str * ps)305 void timelib_posix_str_dtor(timelib_posix_str *ps)
306 {
307 	if (ps->std) {
308 		timelib_free(ps->std);
309 	}
310 	if (ps->dst) {
311 		timelib_free(ps->dst);
312 	}
313 	if (ps->dst_begin) {
314 		timelib_posix_trans_info_dtor(ps->dst_begin);
315 	}
316 	if (ps->dst_end) {
317 		timelib_posix_trans_info_dtor(ps->dst_end);
318 	}
319 
320 	timelib_free(ps);
321 }
322 
timelib_parse_posix_str(const char * posix)323 timelib_posix_str* timelib_parse_posix_str(const char *posix)
324 {
325 	timelib_posix_str *tmp = timelib_calloc(1, sizeof(timelib_posix_str));
326 	char *ptr = (char*) posix;
327 
328 	// read standard description (ie. EST or <-03>)
329 	tmp->std = read_description(&ptr);
330 	if (!tmp->std) {
331 		timelib_posix_str_dtor(tmp);
332 		return NULL;
333 	}
334 
335 	// read required offset
336 	tmp->std_offset = read_offset(&ptr);
337 	if (tmp->std_offset == TIMELIB_UNSET) {
338 		timelib_posix_str_dtor(tmp);
339 		return NULL;
340 	}
341 
342 	// if we're at the end return, otherwise we'll continue to try to parse
343 	// the dst abbreviation and spec
344 	if (*ptr == '\0') {
345 		return tmp;
346 	}
347 
348 	// assume dst is there, and initialise offset
349 	tmp->dst_offset = tmp->std_offset + 3600;
350 
351 	tmp->dst = read_description(&ptr);
352 	if (!tmp->dst) {
353 		timelib_posix_str_dtor(tmp);
354 		return NULL;
355 	}
356 
357 	// if we have a "," here, then the dst offset is the standard offset +
358 	// 3600 seconds, otherwise, try to parse the dst offset
359 	if (*ptr != ',' && *ptr != '\0') {
360 		tmp->dst_offset = read_offset(&ptr);
361 		if (tmp->dst_offset == TIMELIB_UNSET) {
362 			timelib_posix_str_dtor(tmp);
363 			return NULL;
364 		}
365 	}
366 
367 	// if we *don't* have a "," here, we're missing the dst transitions
368 	// ,start[/time],end[/time]
369 	if (*ptr != ',') {
370 		timelib_posix_str_dtor(tmp);
371 		return NULL;
372 	}
373 
374 	ptr++; // skip ','
375 
376 	// start[/time]
377 	tmp->dst_begin = read_transition_spec(&ptr);
378 	if (!tmp->dst_begin) {
379 		timelib_posix_str_dtor(tmp);
380 		return NULL;
381 	}
382 
383 	// if we *don't* have a "," here, we're missing the dst end transition
384 	// ,end[/time]
385 	if (*ptr != ',') {
386 		timelib_posix_str_dtor(tmp);
387 		return NULL;
388 	}
389 
390 	ptr++; // skip ','
391 
392 	// end[/time]
393 	tmp->dst_end = read_transition_spec(&ptr);
394 	if (!tmp->dst_end) {
395 		timelib_posix_str_dtor(tmp);
396 		return NULL;
397 	}
398 
399 	// make sure there is no trailing data
400 	if (*ptr != '\0') {
401 		timelib_posix_str_dtor(tmp);
402 		return NULL;
403 	}
404 
405 	return tmp;
406 }
407 
408 static const int month_lengths[2][MONTHS_PER_YEAR] = {
409 	{ 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }, // normal year
410 	{ 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }  // leap year
411 };
412 
413 /* This function is adapted from the 'localtime.c' function 'transtime' as bundled with the 'tzcode' project
414  * from IANA, and is public domain licensed. */
calc_transition(timelib_posix_trans_info * psi,timelib_sll year)415 static timelib_sll calc_transition(timelib_posix_trans_info *psi, timelib_sll year)
416 {
417 	int leap_year = timelib_is_leap(year);
418 
419 	switch (psi->type) {
420 		case TIMELIB_POSIX_TRANS_TYPE_JULIAN_NO_FEB29: {
421 			timelib_sll value = (psi->days - 1);
422 
423 			if (leap_year && psi->days >= 60) {
424 				value++;
425 			}
426 
427 			return value * SECS_PER_DAY;
428 		}
429 
430 		case TIMELIB_POSIX_TRANS_TYPE_JULIAN_FEB29: {
431 			return psi->days * SECS_PER_DAY;
432 		}
433 
434 		case TIMELIB_POSIX_TRANS_TYPE_MWD: {
435 			/*
436 			 * Mm.n.d - nth "dth day" of month m.
437 			 */
438 
439 			int i, d, m1, yy0, yy1, yy2, dow;
440 			timelib_sll value = 0;
441 
442 			/* Use Zeller's Congruence to get day-of-week of first day of
443 			 * month. */
444 			m1 = (psi->mwd.month + 9) % 12 + 1;
445 			yy0 = (psi->mwd.month <= 2) ? (year - 1) : year;
446 			yy1 = yy0 / 100;
447 			yy2 = yy0 % 100;
448 			dow = ((26 * m1 - 2) / 10 + 1 + yy2 + yy2 / 4 + yy1 / 4 - 2 * yy1) % 7;
449 			if (dow < 0) {
450 				dow += DAYS_PER_WEEK;
451 			}
452 
453 			/* "dow" is the day-of-week of the first day of the month. Get the
454 			 * day-of-month (zero-origin) of the first "dow" day of the month. */
455 			d = psi->mwd.dow - dow;
456 			if (d < 0) {
457 				d += DAYS_PER_WEEK;
458 			}
459 			for (i = 1; i < psi->mwd.week; ++i) {
460 				if (d + DAYS_PER_WEEK >= month_lengths[leap_year][psi->mwd.month - 1]) {
461 					break;
462 				}
463 				d += DAYS_PER_WEEK;
464 			}
465 
466 			/* "d" is the day-of-month (zero-origin) of the day we want. */
467 			value = d * SECS_PER_DAY;
468 			for (i = 0; i < psi->mwd.month - 1; ++i) {
469 				value += month_lengths[leap_year][i] * SECS_PER_DAY;
470 			}
471 
472 			return value;
473 		} break;
474 	}
475 
476 	return 0;
477 }
478 
count_leap_years(timelib_sll y)479 static timelib_sll count_leap_years(timelib_sll y)
480 {
481 	/* Because we want this for Jan 1, the leap day hasn't happend yet, so
482 	 * subtract one of year before we calculate */
483 	y--;
484 
485 	return (y/4) - (y/100) + (y/400);
486 }
487 
timelib_ts_at_start_of_year(timelib_sll year)488 timelib_sll timelib_ts_at_start_of_year(timelib_sll year)
489 {
490 	timelib_sll epoch_leap_years = count_leap_years(1970);
491 	timelib_sll current_leap_years = count_leap_years(year);
492 
493 	return SECS_PER_DAY * (
494 		((year-1970) * DAYS_PER_YEAR)
495 		+ current_leap_years
496 		- epoch_leap_years
497 	);
498 }
499 
timelib_get_transitions_for_year(timelib_tzinfo * tz,timelib_sll year,timelib_posix_transitions * transitions)500 void timelib_get_transitions_for_year(timelib_tzinfo *tz, timelib_sll year, timelib_posix_transitions *transitions)
501 {
502 	timelib_sll trans_begin; /* Since start of the year */
503 	timelib_sll trans_end;
504 	timelib_sll year_begin_ts = timelib_ts_at_start_of_year(year);
505 
506 	trans_begin = year_begin_ts;
507 	trans_begin += calc_transition(tz->posix_info->dst_begin, year);
508 	trans_begin += tz->posix_info->dst_begin->hour;
509 	trans_begin -= tz->posix_info->std_offset;
510 
511 	trans_end = year_begin_ts;
512 	trans_end += calc_transition(tz->posix_info->dst_end, year);
513 	trans_end += tz->posix_info->dst_end->hour;
514 	trans_end -= tz->posix_info->dst_offset;
515 
516 	if (trans_begin < trans_end) {
517 		transitions->times[transitions->count  ] = trans_begin;
518 		transitions->times[transitions->count+1] = trans_end;
519 		transitions->types[transitions->count  ] = tz->posix_info->type_index_dst_type;
520 		transitions->types[transitions->count+1] = tz->posix_info->type_index_std_type;
521 	} else {
522 		transitions->times[transitions->count+1] = trans_begin;
523 		transitions->times[transitions->count  ] = trans_end;
524 		transitions->types[transitions->count+1] = tz->posix_info->type_index_dst_type;
525 		transitions->types[transitions->count  ] = tz->posix_info->type_index_std_type;
526 	}
527 
528 	transitions->count += 2;
529 }
530 
timelib_fetch_posix_timezone_offset(timelib_tzinfo * tz,timelib_sll ts,timelib_sll * transition_time)531 ttinfo* timelib_fetch_posix_timezone_offset(timelib_tzinfo *tz, timelib_sll ts, timelib_sll *transition_time)
532 {
533 	timelib_sll               year;
534 	timelib_time              dummy;
535 	timelib_posix_transitions transitions = { 0 };
536 	size_t            i;
537 
538 	/* If there is no second (dst_end) information, the UTC offset is valid for the whole year, so no need to
539 	 * do clever logic */
540 	if (!tz->posix_info->dst_end) {
541 		if (transition_time) {
542 			*transition_time = tz->trans[tz->bit64.timecnt - 1];
543 		}
544 		return &(tz->type[tz->posix_info->type_index_std_type]);
545 	}
546 
547 	/* Find 'year' (UTC) for 'ts' */
548 	timelib_unixtime2gmt(&dummy, ts);
549 	year = dummy.y;
550 
551 	/* Calculate transition times for 'year-1', 'year', and 'year+1' */
552 	timelib_get_transitions_for_year(tz, year - 1, &transitions);
553 	timelib_get_transitions_for_year(tz, year,     &transitions);
554 	timelib_get_transitions_for_year(tz, year + 1, &transitions);
555 
556 	/* Check where the 'ts' falls in the 4 transitions */
557 	for (i = 1; i < transitions.count; i++) {
558 		if (ts < transitions.times[i]) {
559 			if (transition_time) {
560 				*transition_time = transitions.times[i - 1];
561 			}
562 			return &(tz->type[transitions.types[i - 1]]);
563 		}
564 	}
565 
566 	return NULL;
567 }
568