1 /*
2 * Copyright (C) 2018 Alexander Borisov
3 *
4 * Author: Alexander Borisov <borisov@lexbor.com>
5 */
6
7 #include "lexbor/core/str.h"
8
9 #define LEXBOR_STR_RES_ANSI_REPLACEMENT_CHARACTER
10 #define LEXBOR_STR_RES_MAP_LOWERCASE
11 #define LEXBOR_STR_RES_MAP_UPPERCASE
12 #include "lexbor/core/str_res.h"
13
14
15 lexbor_str_t *
lexbor_str_create(void)16 lexbor_str_create(void)
17 {
18 return lexbor_calloc(1, sizeof(lexbor_str_t));
19 }
20
21 lxb_char_t *
lexbor_str_init(lexbor_str_t * str,lexbor_mraw_t * mraw,size_t size)22 lexbor_str_init(lexbor_str_t *str, lexbor_mraw_t *mraw, size_t size)
23 {
24 if (str == NULL) {
25 return NULL;
26 }
27
28 str->data = lexbor_mraw_alloc(mraw, (size + 1));
29 str->length = 0;
30
31 if (str->data != NULL) {
32 *str->data = '\0';
33 }
34
35 return str->data;
36 }
37
38 lxb_char_t *
lexbor_str_init_append(lexbor_str_t * str,lexbor_mraw_t * mraw,const lxb_char_t * data,size_t length)39 lexbor_str_init_append(lexbor_str_t *str, lexbor_mraw_t *mraw,
40 const lxb_char_t *data, size_t length)
41 {
42 lxb_char_t *p;
43
44 if (str == NULL) {
45 return NULL;
46 }
47
48 p = lexbor_mraw_alloc(mraw, (length + 1));
49 if (p == NULL) {
50 return NULL;
51 }
52
53 memcpy(p, data, length);
54
55 p[length] = '\0';
56
57 str->data = p;
58 str->length = length;
59
60 return p;
61 }
62
63 void
lexbor_str_clean(lexbor_str_t * str)64 lexbor_str_clean(lexbor_str_t *str)
65 {
66 str->length = 0;
67 }
68
69 void
lexbor_str_clean_all(lexbor_str_t * str)70 lexbor_str_clean_all(lexbor_str_t *str)
71 {
72 memset(str, 0, sizeof(lexbor_str_t));
73 }
74
75 lexbor_str_t *
lexbor_str_destroy(lexbor_str_t * str,lexbor_mraw_t * mraw,bool destroy_obj)76 lexbor_str_destroy(lexbor_str_t *str, lexbor_mraw_t *mraw, bool destroy_obj)
77 {
78 if (str == NULL) {
79 return NULL;
80 }
81
82 if (str->data != NULL) {
83 str->data = lexbor_mraw_free(mraw, str->data);
84 }
85
86 if (destroy_obj) {
87 return lexbor_free(str);
88 }
89
90 return str;
91 }
92
93 lxb_char_t *
lexbor_str_realloc(lexbor_str_t * str,lexbor_mraw_t * mraw,size_t new_size)94 lexbor_str_realloc(lexbor_str_t *str, lexbor_mraw_t *mraw, size_t new_size)
95 {
96 lxb_char_t *tmp = lexbor_mraw_realloc(mraw, str->data, new_size);
97 if (tmp == NULL) {
98 return NULL;
99 }
100
101 str->data = tmp;
102
103 return tmp;
104 }
105
106 lxb_char_t *
lexbor_str_check_size(lexbor_str_t * str,lexbor_mraw_t * mraw,size_t plus_len)107 lexbor_str_check_size(lexbor_str_t *str, lexbor_mraw_t *mraw, size_t plus_len)
108 {
109 lxb_char_t *tmp;
110
111 if (str->length > (SIZE_MAX - plus_len)) {
112 return NULL;
113 }
114
115 if ((str->length + plus_len) <= lexbor_str_size(str)) {
116 return str->data;
117 }
118
119 tmp = lexbor_mraw_realloc(mraw, str->data, (str->length + plus_len));
120 if (tmp == NULL) {
121 return NULL;
122 }
123
124 str->data = tmp;
125
126 return tmp;
127 }
128
129 /* Append API */
130 lxb_char_t *
lexbor_str_append(lexbor_str_t * str,lexbor_mraw_t * mraw,const lxb_char_t * buff,size_t length)131 lexbor_str_append(lexbor_str_t *str, lexbor_mraw_t *mraw,
132 const lxb_char_t *buff, size_t length)
133 {
134 lxb_char_t *data_begin;
135
136 lexbor_str_check_size_arg_m(str, lexbor_str_size(str),
137 mraw, (length + 1), NULL);
138
139 data_begin = &str->data[str->length];
140 memcpy(data_begin, buff, sizeof(lxb_char_t) * length);
141
142 str->length += length;
143 str->data[str->length] = '\0';
144
145 return data_begin;
146 }
147
148 lxb_char_t *
lexbor_str_append_before(lexbor_str_t * str,lexbor_mraw_t * mraw,const lxb_char_t * buff,size_t length)149 lexbor_str_append_before(lexbor_str_t *str, lexbor_mraw_t *mraw,
150 const lxb_char_t *buff, size_t length)
151 {
152 lxb_char_t *data_begin;
153
154 lexbor_str_check_size_arg_m(str, lexbor_str_size(str),
155 mraw, (length + 1), NULL);
156
157 data_begin = &str->data[str->length];
158
159 memmove(&str->data[length], str->data, sizeof(lxb_char_t) * str->length);
160 memcpy(str->data, buff, sizeof(lxb_char_t) * length);
161
162 str->length += length;
163 str->data[str->length] = '\0';
164
165 return data_begin;
166 }
167
168 lxb_char_t *
lexbor_str_append_one(lexbor_str_t * str,lexbor_mraw_t * mraw,const lxb_char_t data)169 lexbor_str_append_one(lexbor_str_t *str, lexbor_mraw_t *mraw,
170 const lxb_char_t data)
171 {
172 lexbor_str_check_size_arg_m(str, lexbor_str_size(str), mraw, 2, NULL);
173
174 str->data[str->length] = data;
175
176 str->length += 1;
177 str->data[str->length] = '\0';
178
179 return &str->data[(str->length - 1)];
180 }
181
182 lxb_char_t *
lexbor_str_append_lowercase(lexbor_str_t * str,lexbor_mraw_t * mraw,const lxb_char_t * data,size_t length)183 lexbor_str_append_lowercase(lexbor_str_t *str, lexbor_mraw_t *mraw,
184 const lxb_char_t *data, size_t length)
185 {
186 size_t i;
187 lxb_char_t *data_begin;
188
189 lexbor_str_check_size_arg_m(str, lexbor_str_size(str),
190 mraw, (length + 1), NULL);
191
192 data_begin = &str->data[str->length];
193
194 for (i = 0; i < length; i++) {
195 data_begin[i] = lexbor_str_res_map_lowercase[ data[i] ];
196 }
197
198 data_begin[i] = '\0';
199 str->length += length;
200
201 return data_begin;
202 }
203
204 lxb_char_t *
lexbor_str_append_with_rep_null_chars(lexbor_str_t * str,lexbor_mraw_t * mraw,const lxb_char_t * buff,size_t length)205 lexbor_str_append_with_rep_null_chars(lexbor_str_t *str, lexbor_mraw_t *mraw,
206 const lxb_char_t *buff, size_t length)
207 {
208 const lxb_char_t *pos, *res, *end;
209 size_t current_len = str->length;
210
211 lexbor_str_check_size_arg_m(str, lexbor_str_size(str),
212 mraw, (length + 1), NULL);
213 end = buff + length;
214
215 while (buff != end) {
216 pos = memchr(buff, '\0', sizeof(lxb_char_t) * (end - buff));
217 if (pos == NULL) {
218 break;
219 }
220
221 res = lexbor_str_append(str, mraw, buff, (pos - buff));
222 if (res == NULL) {
223 return NULL;
224 }
225
226 res = lexbor_str_append(str, mraw,
227 lexbor_str_res_ansi_replacement_character,
228 sizeof(lexbor_str_res_ansi_replacement_character) - 1);
229 if (res == NULL) {
230 return NULL;
231 }
232
233 buff = pos + 1;
234 }
235
236 if (buff != end) {
237 res = lexbor_str_append(str, mraw, buff, (end - buff));
238 if (res == NULL) {
239 return NULL;
240 }
241 }
242
243 return &str->data[current_len];
244 }
245
246 lxb_char_t *
lexbor_str_copy(lexbor_str_t * dest,const lexbor_str_t * target,lexbor_mraw_t * mraw)247 lexbor_str_copy(lexbor_str_t *dest, const lexbor_str_t *target,
248 lexbor_mraw_t *mraw)
249 {
250 if (target->data == NULL) {
251 return NULL;
252 }
253
254 if (dest->data == NULL) {
255 lexbor_str_init(dest, mraw, target->length);
256
257 if (dest->data == NULL) {
258 return NULL;
259 }
260 }
261
262 return lexbor_str_append(dest, mraw, target->data, target->length);
263 }
264
265 void
lexbor_str_stay_only_whitespace(lexbor_str_t * target)266 lexbor_str_stay_only_whitespace(lexbor_str_t *target)
267 {
268 size_t i, pos = 0;
269 lxb_char_t *data = target->data;
270
271 for (i = 0; i < target->length; i++) {
272 if (lexbor_utils_whitespace(data[i], ==, ||)) {
273 data[pos] = data[i];
274 pos++;
275 }
276 }
277
278 target->length = pos;
279 }
280
281 void
lexbor_str_strip_collapse_whitespace(lexbor_str_t * target)282 lexbor_str_strip_collapse_whitespace(lexbor_str_t *target)
283 {
284 size_t i, offset, ws_i;
285 lxb_char_t *data = target->data;
286
287 if (target->length == 0) {
288 return;
289 }
290
291 if (lexbor_utils_whitespace(*data, ==, ||)) {
292 *data = 0x20;
293 }
294
295 for (i = 0, offset = 0, ws_i = 0; i < target->length; i++)
296 {
297 if (lexbor_utils_whitespace(data[i], ==, ||)) {
298 if (data[ws_i] != 0x20) {
299 data[offset] = 0x20;
300
301 ws_i = offset;
302 offset++;
303 }
304 }
305 else {
306 if (data[ws_i] == 0x20) {
307 ws_i = offset;
308 }
309
310 data[offset] = data[i];
311 offset++;
312 }
313 }
314
315 if (offset != i) {
316 if (offset != 0) {
317 if (data[offset - 1] == 0x20) {
318 offset--;
319 }
320 }
321
322 data[offset] = 0x00;
323 target->length = offset;
324 }
325 }
326
327 size_t
lexbor_str_crop_whitespace_from_begin(lexbor_str_t * target)328 lexbor_str_crop_whitespace_from_begin(lexbor_str_t *target)
329 {
330 size_t i;
331 lxb_char_t *data = target->data;
332
333 for (i = 0; i < target->length; i++) {
334 if (lexbor_utils_whitespace(data[i], !=, &&)) {
335 break;
336 }
337 }
338
339 if (i != 0 && i != target->length) {
340 memmove(target->data, &target->data[i], (target->length - i));
341 }
342
343 target->length -= i;
344 return i;
345 }
346
347 size_t
lexbor_str_whitespace_from_begin(lexbor_str_t * target)348 lexbor_str_whitespace_from_begin(lexbor_str_t *target)
349 {
350 size_t i;
351 lxb_char_t *data = target->data;
352
353 for (i = 0; i < target->length; i++) {
354 if (lexbor_utils_whitespace(data[i], !=, &&)) {
355 break;
356 }
357 }
358
359 return i;
360 }
361
362 size_t
lexbor_str_whitespace_from_end(lexbor_str_t * target)363 lexbor_str_whitespace_from_end(lexbor_str_t *target)
364 {
365 size_t i = target->length;
366 lxb_char_t *data = target->data;
367
368 while (i) {
369 i--;
370
371 if (lexbor_utils_whitespace(data[i], !=, &&)) {
372 return target->length - (i + 1);
373 }
374 }
375
376 return 0;
377 }
378
379 /*
380 * Data utils
381 * TODO: All functions need optimization.
382 */
383 const lxb_char_t *
lexbor_str_data_ncasecmp_first(const lxb_char_t * first,const lxb_char_t * sec,size_t sec_size)384 lexbor_str_data_ncasecmp_first(const lxb_char_t *first, const lxb_char_t *sec,
385 size_t sec_size)
386 {
387 size_t i;
388
389 for (i = 0; i < sec_size; i++) {
390 if (first[i] == '\0') {
391 return &first[i];
392 }
393
394 if (lexbor_str_res_map_lowercase[ first[i] ]
395 != lexbor_str_res_map_lowercase[ sec[i] ])
396 {
397 return NULL;
398 }
399 }
400
401 return &first[i];
402 }
403
404 bool
lexbor_str_data_ncasecmp_end(const lxb_char_t * first,const lxb_char_t * sec,size_t size)405 lexbor_str_data_ncasecmp_end(const lxb_char_t *first, const lxb_char_t *sec,
406 size_t size)
407 {
408 while (size != 0) {
409 size--;
410
411 if (lexbor_str_res_map_lowercase[ first[size] ]
412 != lexbor_str_res_map_lowercase[ sec[size] ])
413 {
414 return false;
415 }
416 }
417
418 return true;
419 }
420
421 bool
lexbor_str_data_ncasecmp_contain(const lxb_char_t * where,size_t where_size,const lxb_char_t * what,size_t what_size)422 lexbor_str_data_ncasecmp_contain(const lxb_char_t *where, size_t where_size,
423 const lxb_char_t *what, size_t what_size)
424 {
425 for (size_t i = 0; what_size <= (where_size - i); i++) {
426 if(lexbor_str_data_ncasecmp(&where[i], what, what_size)) {
427 return true;
428 }
429 }
430
431 return false;
432 }
433
434 bool
lexbor_str_data_ncasecmp(const lxb_char_t * first,const lxb_char_t * sec,size_t size)435 lexbor_str_data_ncasecmp(const lxb_char_t *first, const lxb_char_t *sec,
436 size_t size)
437 {
438 for (size_t i = 0; i < size; i++) {
439 if (lexbor_str_res_map_lowercase[ first[i] ]
440 != lexbor_str_res_map_lowercase[ sec[i] ])
441 {
442 return false;
443 }
444 }
445
446 return true;
447 }
448
449 bool
lexbor_str_data_nlocmp_right(const lxb_char_t * first,const lxb_char_t * sec,size_t size)450 lexbor_str_data_nlocmp_right(const lxb_char_t *first, const lxb_char_t *sec,
451 size_t size)
452 {
453 for (size_t i = 0; i < size; i++) {
454 if (first[i] != lexbor_str_res_map_lowercase[ sec[i] ]) {
455 return false;
456 }
457 }
458
459 return true;
460 }
461
462 bool
lexbor_str_data_nupcmp_right(const lxb_char_t * first,const lxb_char_t * sec,size_t size)463 lexbor_str_data_nupcmp_right(const lxb_char_t *first, const lxb_char_t *sec,
464 size_t size)
465 {
466 for (size_t i = 0; i < size; i++) {
467 if (first[i] != lexbor_str_res_map_uppercase[ sec[i] ]) {
468 return false;
469 }
470 }
471
472 return true;
473 }
474
475 bool
lexbor_str_data_casecmp(const lxb_char_t * first,const lxb_char_t * sec)476 lexbor_str_data_casecmp(const lxb_char_t *first, const lxb_char_t *sec)
477 {
478 for (;;) {
479 if (lexbor_str_res_map_lowercase[*first]
480 != lexbor_str_res_map_lowercase[*sec])
481 {
482 return false;
483 }
484
485 if (*first == '\0') {
486 return true;
487 }
488
489 first++;
490 sec++;
491 }
492 }
493
494 bool
lexbor_str_data_ncmp_end(const lxb_char_t * first,const lxb_char_t * sec,size_t size)495 lexbor_str_data_ncmp_end(const lxb_char_t *first, const lxb_char_t *sec,
496 size_t size)
497 {
498 while (size != 0) {
499 size--;
500
501 if (first[size] != sec[size]) {
502 return false;
503 }
504 }
505
506 return true;
507 }
508
509 bool
lexbor_str_data_ncmp_contain(const lxb_char_t * where,size_t where_size,const lxb_char_t * what,size_t what_size)510 lexbor_str_data_ncmp_contain(const lxb_char_t *where, size_t where_size,
511 const lxb_char_t *what, size_t what_size)
512 {
513 for (size_t i = 0; what_size <= (where_size - i); i++) {
514 if(memcmp(&where[i], what, sizeof(lxb_char_t) * what_size) == 0) {
515 return true;
516 }
517 }
518
519 return false;
520 }
521
522 bool
lexbor_str_data_ncmp(const lxb_char_t * first,const lxb_char_t * sec,size_t size)523 lexbor_str_data_ncmp(const lxb_char_t *first, const lxb_char_t *sec,
524 size_t size)
525 {
526 return memcmp(first, sec, sizeof(lxb_char_t) * size) == 0;
527 }
528
529 bool
lexbor_str_data_cmp(const lxb_char_t * first,const lxb_char_t * sec)530 lexbor_str_data_cmp(const lxb_char_t *first, const lxb_char_t *sec)
531 {
532 for (;;) {
533 if (*first != *sec) {
534 return false;
535 }
536
537 if (*first == '\0') {
538 return true;
539 }
540
541 first++;
542 sec++;
543 }
544 }
545
546 bool
lexbor_str_data_cmp_ws(const lxb_char_t * first,const lxb_char_t * sec)547 lexbor_str_data_cmp_ws(const lxb_char_t *first, const lxb_char_t *sec)
548 {
549 for (;;) {
550 if (*first != *sec) {
551 return false;
552 }
553
554 if (lexbor_utils_whitespace(*first, ==, ||) || *first == '\0') {
555 return true;
556 }
557
558 first++;
559 sec++;
560 }
561 }
562
563 void
lexbor_str_data_to_lowercase(lxb_char_t * to,const lxb_char_t * from,size_t len)564 lexbor_str_data_to_lowercase(lxb_char_t *to, const lxb_char_t *from, size_t len)
565 {
566 while (len) {
567 len--;
568
569 to[len] = lexbor_str_res_map_lowercase[ from[len] ];
570 }
571 }
572
573 void
lexbor_str_data_to_uppercase(lxb_char_t * to,const lxb_char_t * from,size_t len)574 lexbor_str_data_to_uppercase(lxb_char_t *to, const lxb_char_t *from, size_t len)
575 {
576 while (len) {
577 len--;
578
579 to[len] = lexbor_str_res_map_uppercase[ from[len] ];
580 }
581 }
582
583 const lxb_char_t *
lexbor_str_data_find_lowercase(const lxb_char_t * data,size_t len)584 lexbor_str_data_find_lowercase(const lxb_char_t *data, size_t len)
585 {
586 while (len) {
587 len--;
588
589 if (data[len] == lexbor_str_res_map_lowercase[ data[len] ]) {
590 return &data[len];
591 }
592 }
593
594 return NULL;
595 }
596
597 const lxb_char_t *
lexbor_str_data_find_uppercase(const lxb_char_t * data,size_t len)598 lexbor_str_data_find_uppercase(const lxb_char_t *data, size_t len)
599 {
600 while (len) {
601 len--;
602
603 if (data[len] == lexbor_str_res_map_uppercase[ data[len] ]) {
604 return &data[len];
605 }
606 }
607
608 return NULL;
609 }
610
611 /*
612 * No inline functions for ABI.
613 */
614 lxb_char_t *
lexbor_str_data_noi(lexbor_str_t * str)615 lexbor_str_data_noi(lexbor_str_t *str)
616 {
617 return lexbor_str_data(str);
618 }
619
620 size_t
lexbor_str_length_noi(lexbor_str_t * str)621 lexbor_str_length_noi(lexbor_str_t *str)
622 {
623 return lexbor_str_length(str);
624 }
625
626 size_t
lexbor_str_size_noi(lexbor_str_t * str)627 lexbor_str_size_noi(lexbor_str_t *str)
628 {
629 return lexbor_str_size(str);
630 }
631
632 void
lexbor_str_data_set_noi(lexbor_str_t * str,lxb_char_t * data)633 lexbor_str_data_set_noi(lexbor_str_t *str, lxb_char_t *data)
634 {
635 lexbor_str_data_set(str, data);
636 }
637
638 lxb_char_t *
lexbor_str_length_set_noi(lexbor_str_t * str,lexbor_mraw_t * mraw,size_t length)639 lexbor_str_length_set_noi(lexbor_str_t *str, lexbor_mraw_t *mraw, size_t length)
640 {
641 return lexbor_str_length_set(str, mraw, length);
642 }
643