1 /*
2 * Copyright (C) 2018-2022 Alexander Borisov
3 *
4 * Author: Alexander Borisov <borisov@lexbor.com>
5 */
6
7 #include "lexbor/dom/interfaces/node.h"
8 #include "lexbor/dom/interfaces/attr.h"
9 #include "lexbor/dom/interfaces/document.h"
10 #include "lexbor/dom/interfaces/document_type.h"
11 #include "lexbor/dom/interfaces/element.h"
12 #include "lexbor/dom/interfaces/processing_instruction.h"
13
14
15 typedef struct lxb_dom_node_cb_ctx lxb_dom_node_cb_ctx_t;
16
17 typedef bool
18 (*lxb_dom_node_attr_cmp_f)(lxb_dom_node_cb_ctx_t *ctx, lxb_dom_attr_t *attr);
19
20 struct lxb_dom_node_cb_ctx {
21 lxb_dom_collection_t *col;
22 lxb_status_t status;
23 lxb_dom_node_attr_cmp_f cmp_func;
24
25 lxb_dom_attr_id_t name_id;
26 lxb_ns_prefix_id_t prefix_id;
27
28 const lxb_char_t *value;
29 size_t value_length;
30 };
31
32
33 LXB_API lxb_dom_attr_data_t *
34 lxb_dom_attr_local_name_append(lexbor_hash_t *hash,
35 const lxb_char_t *name, size_t length);
36
37 LXB_API const lxb_tag_data_t *
38 lxb_tag_append(lexbor_hash_t *hash, lxb_tag_id_t tag_id,
39 const lxb_char_t *name, size_t length);
40
41 LXB_API const lxb_ns_data_t *
42 lxb_ns_append(lexbor_hash_t *hash, const lxb_char_t *link, size_t length);
43
44 static lexbor_action_t
45 lxb_dom_node_by_tag_name_cb(lxb_dom_node_t *node, void *ctx);
46
47 static lexbor_action_t
48 lxb_dom_node_by_tag_name_cb_all(lxb_dom_node_t *node, void *ctx);
49
50 static lexbor_action_t
51 lxb_dom_node_by_class_name_cb(lxb_dom_node_t *node, void *ctx);
52
53 static lexbor_action_t
54 lxb_dom_node_by_attr_cb(lxb_dom_node_t *node, void *ctx);
55
56 static bool
57 lxb_dom_node_by_attr_cmp_full(lxb_dom_node_cb_ctx_t *ctx,
58 lxb_dom_attr_t *attr);
59 static bool
60 lxb_dom_node_by_attr_cmp_full_case(lxb_dom_node_cb_ctx_t *ctx,
61 lxb_dom_attr_t *attr);
62 static bool
63 lxb_dom_node_by_attr_cmp_begin(lxb_dom_node_cb_ctx_t *ctx,
64 lxb_dom_attr_t *attr);
65 static bool
66 lxb_dom_node_by_attr_cmp_begin_case(lxb_dom_node_cb_ctx_t *ctx,
67 lxb_dom_attr_t *attr);
68 static bool
69 lxb_dom_node_by_attr_cmp_end(lxb_dom_node_cb_ctx_t *ctx,
70 lxb_dom_attr_t *attr);
71 static bool
72 lxb_dom_node_by_attr_cmp_end_case(lxb_dom_node_cb_ctx_t *ctx,
73 lxb_dom_attr_t *attr);
74 static bool
75 lxb_dom_node_by_attr_cmp_contain(lxb_dom_node_cb_ctx_t *ctx,
76 lxb_dom_attr_t *attr);
77 static bool
78 lxb_dom_node_by_attr_cmp_contain_case(lxb_dom_node_cb_ctx_t *ctx,
79 lxb_dom_attr_t *attr);
80
81 static lexbor_action_t
82 lxb_dom_node_text_content_size(lxb_dom_node_t *node, void *ctx);
83
84 static lexbor_action_t
85 lxb_dom_node_text_content_concatenate(lxb_dom_node_t *node, void *ctx);
86
87
88 lxb_dom_node_t *
lxb_dom_node_interface_create(lxb_dom_document_t * document)89 lxb_dom_node_interface_create(lxb_dom_document_t *document)
90 {
91 lxb_dom_node_t *element;
92
93 element = lexbor_mraw_calloc(document->mraw,
94 sizeof(lxb_dom_node_t));
95 if (element == NULL) {
96 return NULL;
97 }
98
99 element->owner_document = lxb_dom_document_owner(document);
100 element->type = LXB_DOM_NODE_TYPE_UNDEF;
101
102 return element;
103 }
104
105 lxb_dom_node_t *
lxb_dom_node_interface_clone(lxb_dom_document_t * document,const lxb_dom_node_t * node,bool is_attr)106 lxb_dom_node_interface_clone(lxb_dom_document_t *document,
107 const lxb_dom_node_t *node, bool is_attr)
108 {
109 lxb_dom_node_t *new;
110
111 new = lxb_dom_node_interface_create(document);
112 if (new == NULL) {
113 return NULL;
114 }
115
116 if (lxb_dom_node_interface_copy(new, node, is_attr) != LXB_STATUS_OK) {
117 return lxb_dom_document_destroy_interface(new);
118 }
119
120 return new;
121 }
122
123 lxb_dom_node_t *
lxb_dom_node_interface_destroy(lxb_dom_node_t * node)124 lxb_dom_node_interface_destroy(lxb_dom_node_t *node)
125 {
126 lxb_dom_document_t *doc = node->owner_document;
127
128 if (doc->ev_destroy != NULL) {
129 doc->ev_destroy(node);
130 }
131
132 return lexbor_mraw_free(node->owner_document->mraw, node);
133 }
134
135 lxb_status_t
lxb_dom_node_interface_copy(lxb_dom_node_t * dst,const lxb_dom_node_t * src,bool is_attr)136 lxb_dom_node_interface_copy(lxb_dom_node_t *dst,
137 const lxb_dom_node_t *src, bool is_attr)
138 {
139 lxb_dom_document_t *from, *to;
140 const lxb_ns_data_t *ns;
141 const lxb_tag_data_t *tag;
142 const lxb_ns_prefix_data_t *prefix;
143 const lexbor_hash_entry_t *entry;
144 const lxb_dom_attr_data_t *data;
145
146 dst->type = src->type;
147 dst->user = src->user;
148
149 if (dst->owner_document == src->owner_document) {
150 dst->local_name = src->local_name;
151 dst->ns = src->ns;
152 dst->prefix = src->prefix;
153
154 return LXB_STATUS_OK;
155 }
156
157 from = src->owner_document;
158 to = dst->owner_document;
159
160 if (is_attr) {
161 if (src->local_name < LXB_DOM_ATTR__LAST_ENTRY) {
162 dst->local_name = src->local_name;
163 }
164 else {
165 data = lxb_dom_attr_data_by_id(from->attrs, src->local_name);
166 if (data == NULL) {
167 return LXB_STATUS_ERROR_NOT_EXISTS;
168 }
169
170 entry = &data->entry;
171
172 data = lxb_dom_attr_local_name_append(to->attrs,
173 lexbor_hash_entry_str(entry),
174 entry->length);
175 if (data == NULL) {
176 return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
177 }
178
179 dst->local_name = (lxb_dom_attr_id_t) data;
180 }
181 }
182 else {
183 if (src->local_name < LXB_TAG__LAST_ENTRY) {
184 dst->local_name = src->local_name;
185 }
186 else {
187 tag = lxb_tag_data_by_id(src->local_name);
188 if (tag == NULL) {
189 return LXB_STATUS_ERROR_NOT_EXISTS;
190 }
191
192 entry = &tag->entry;
193
194 tag = lxb_tag_append(to->tags, LXB_TAG__UNDEF,
195 lexbor_hash_entry_str(entry), entry->length);
196 if (tag == NULL) {
197 return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
198 }
199
200 dst->local_name = (lxb_dom_attr_id_t) tag;
201 }
202 }
203
204 if (src->ns < LXB_NS__LAST_ENTRY) {
205 dst->ns = src->ns;
206 }
207 else {
208 ns = lxb_ns_data_by_id(from->ns, src->ns);
209 if (ns == NULL) {
210 return LXB_STATUS_ERROR_NOT_EXISTS;
211 }
212
213 entry = &ns->entry;
214
215 ns = lxb_ns_append(to->ns, lexbor_hash_entry_str(entry),
216 entry->length);
217 if (ns == NULL) {
218 return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
219 }
220
221 dst->ns = (lxb_ns_id_t) ns;
222 }
223
224 if (src->prefix < LXB_NS__LAST_ENTRY) {
225 dst->prefix = src->prefix;
226 }
227 else {
228 prefix = lxb_ns_prefix_data_by_id(from->prefix, src->prefix);
229 if (prefix == NULL) {
230 return LXB_STATUS_ERROR_NOT_EXISTS;
231 }
232
233 entry = &prefix->entry;
234
235 prefix = lxb_ns_prefix_append(to->prefix, lexbor_hash_entry_str(entry),
236 entry->length);
237 if (prefix == NULL) {
238 return LXB_STATUS_ERROR;
239 }
240
241 dst->prefix = (lxb_ns_prefix_id_t) prefix;
242 }
243
244 return LXB_STATUS_OK;
245 }
246
247 lxb_dom_node_t *
lxb_dom_node_destroy(lxb_dom_node_t * node)248 lxb_dom_node_destroy(lxb_dom_node_t *node)
249 {
250 lxb_dom_node_remove(node);
251
252 if (node->owner_document->ev_destroy != NULL) {
253 node->owner_document->ev_destroy(node);
254 }
255
256 return lxb_dom_document_destroy_interface(node);
257 }
258
259 lxb_dom_node_t *
lxb_dom_node_destroy_deep(lxb_dom_node_t * root)260 lxb_dom_node_destroy_deep(lxb_dom_node_t *root)
261 {
262 lxb_dom_node_t *tmp;
263 lxb_dom_node_t *node = root;
264
265 while (node != NULL) {
266 if (node->first_child != NULL) {
267 node = node->first_child;
268 }
269 else {
270 while(node != root && node->next == NULL) {
271 tmp = node->parent;
272
273 lxb_dom_node_destroy(node);
274
275 node = tmp;
276 }
277
278 if (node == root) {
279 lxb_dom_node_destroy(node);
280
281 break;
282 }
283
284 tmp = node->next;
285
286 lxb_dom_node_destroy(node);
287
288 node = tmp;
289 }
290 }
291
292 return NULL;
293 }
294
295 lxb_dom_node_t *
lxb_dom_node_clone(lxb_dom_node_t * node,bool deep)296 lxb_dom_node_clone(lxb_dom_node_t *node, bool deep)
297 {
298 return lxb_dom_document_import_node(node->owner_document, node, deep);
299 }
300
301 const lxb_char_t *
lxb_dom_node_name(lxb_dom_node_t * node,size_t * len)302 lxb_dom_node_name(lxb_dom_node_t *node, size_t *len)
303 {
304 switch (node->type) {
305 case LXB_DOM_NODE_TYPE_ELEMENT:
306 return lxb_dom_element_tag_name(lxb_dom_interface_element(node),
307 len);
308
309 case LXB_DOM_NODE_TYPE_ATTRIBUTE:
310 return lxb_dom_attr_qualified_name(lxb_dom_interface_attr(node),
311 len);
312
313 case LXB_DOM_NODE_TYPE_TEXT:
314 if (len != NULL) {
315 *len = sizeof("#text") - 1;
316 }
317
318 return (const lxb_char_t *) "#text";
319
320 case LXB_DOM_NODE_TYPE_CDATA_SECTION:
321 if (len != NULL) {
322 *len = sizeof("#cdata-section") - 1;
323 }
324
325 return (const lxb_char_t *) "#cdata-section";
326
327 case LXB_DOM_NODE_TYPE_PROCESSING_INSTRUCTION:
328 return lxb_dom_processing_instruction_target(lxb_dom_interface_processing_instruction(node),
329 len);
330
331 case LXB_DOM_NODE_TYPE_COMMENT:
332 if (len != NULL) {
333 *len = sizeof("#comment") - 1;
334 }
335
336 return (const lxb_char_t *) "#comment";
337
338 case LXB_DOM_NODE_TYPE_DOCUMENT:
339 if (len != NULL) {
340 *len = sizeof("#document") - 1;
341 }
342
343 return (const lxb_char_t *) "#document";
344
345 case LXB_DOM_NODE_TYPE_DOCUMENT_TYPE:
346 return lxb_dom_document_type_name(lxb_dom_interface_document_type(node),
347 len);
348
349 case LXB_DOM_NODE_TYPE_DOCUMENT_FRAGMENT:
350 if (len != NULL) {
351 *len = sizeof("#document-fragment") - 1;
352 }
353
354 return (const lxb_char_t *) "#document-fragment";
355
356 default:
357 break;
358 }
359
360 if (len != NULL) {
361 *len = 0;
362 }
363
364 return NULL;
365 }
366
367 void
lxb_dom_node_insert_child_wo_events(lxb_dom_node_t * to,lxb_dom_node_t * node)368 lxb_dom_node_insert_child_wo_events(lxb_dom_node_t *to, lxb_dom_node_t *node)
369 {
370 if (to->last_child != NULL) {
371 to->last_child->next = node;
372 }
373 else {
374 to->first_child = node;
375 }
376
377 node->parent = to;
378 node->next = NULL;
379 node->prev = to->last_child;
380
381 to->last_child = node;
382 }
383
384 void
lxb_dom_node_insert_child(lxb_dom_node_t * to,lxb_dom_node_t * node)385 lxb_dom_node_insert_child(lxb_dom_node_t *to, lxb_dom_node_t *node)
386 {
387 lxb_dom_node_insert_child_wo_events(to, node);
388
389 if (node->owner_document->ev_insert != NULL) {
390 node->owner_document->ev_insert(node);
391 }
392 }
393
394 void
lxb_dom_node_insert_before_wo_events(lxb_dom_node_t * to,lxb_dom_node_t * node)395 lxb_dom_node_insert_before_wo_events(lxb_dom_node_t *to, lxb_dom_node_t *node)
396 {
397 if (to->prev != NULL) {
398 to->prev->next = node;
399 }
400 else {
401 if (to->parent != NULL) {
402 to->parent->first_child = node;
403 }
404 }
405
406 node->parent = to->parent;
407 node->next = to;
408 node->prev = to->prev;
409
410 to->prev = node;
411 }
412
413 void
lxb_dom_node_insert_before(lxb_dom_node_t * to,lxb_dom_node_t * node)414 lxb_dom_node_insert_before(lxb_dom_node_t *to, lxb_dom_node_t *node)
415 {
416 lxb_dom_node_insert_before_wo_events(to, node);
417
418 if (node->owner_document->ev_insert != NULL) {
419 node->owner_document->ev_insert(node);
420 }
421 }
422
423 void
lxb_dom_node_insert_after_wo_events(lxb_dom_node_t * to,lxb_dom_node_t * node)424 lxb_dom_node_insert_after_wo_events(lxb_dom_node_t *to, lxb_dom_node_t *node)
425 {
426 if (to->next != NULL) {
427 to->next->prev = node;
428 }
429 else {
430 if (to->parent != NULL) {
431 to->parent->last_child = node;
432 }
433 }
434
435 node->parent = to->parent;
436 node->next = to->next;
437 node->prev = to;
438 to->next = node;
439 }
440
441 void
lxb_dom_node_insert_after(lxb_dom_node_t * to,lxb_dom_node_t * node)442 lxb_dom_node_insert_after(lxb_dom_node_t *to, lxb_dom_node_t *node)
443 {
444 lxb_dom_node_insert_after_wo_events(to, node);
445
446 if (node->owner_document->ev_insert != NULL) {
447 node->owner_document->ev_insert(node);
448 }
449 }
450
451 void
lxb_dom_node_remove_wo_events(lxb_dom_node_t * node)452 lxb_dom_node_remove_wo_events(lxb_dom_node_t *node)
453 {
454 if (node->parent != NULL) {
455 if (node->parent->first_child == node) {
456 node->parent->first_child = node->next;
457 }
458
459 if (node->parent->last_child == node) {
460 node->parent->last_child = node->prev;
461 }
462 }
463
464 if (node->next != NULL) {
465 node->next->prev = node->prev;
466 }
467
468 if (node->prev != NULL) {
469 node->prev->next = node->next;
470 }
471
472 node->parent = NULL;
473 node->next = NULL;
474 node->prev = NULL;
475 }
476
477 void
lxb_dom_node_remove(lxb_dom_node_t * node)478 lxb_dom_node_remove(lxb_dom_node_t *node)
479 {
480 if (node->owner_document->ev_remove != NULL) {
481 node->owner_document->ev_remove(node);
482 }
483
484 lxb_dom_node_remove_wo_events(node);
485 }
486
487 lxb_status_t
lxb_dom_node_replace_all(lxb_dom_node_t * parent,lxb_dom_node_t * node)488 lxb_dom_node_replace_all(lxb_dom_node_t *parent, lxb_dom_node_t *node)
489 {
490 while (parent->first_child != NULL) {
491 lxb_dom_node_destroy_deep(parent->first_child);
492 }
493
494 lxb_dom_node_insert_child(parent, node);
495
496 return LXB_STATUS_OK;
497 }
498
499 void
lxb_dom_node_simple_walk(lxb_dom_node_t * root,lxb_dom_node_simple_walker_f walker_cb,void * ctx)500 lxb_dom_node_simple_walk(lxb_dom_node_t *root,
501 lxb_dom_node_simple_walker_f walker_cb, void *ctx)
502 {
503 lexbor_action_t action;
504 lxb_dom_node_t *node = root->first_child;
505
506 while (node != NULL) {
507 action = walker_cb(node, ctx);
508 if (action == LEXBOR_ACTION_STOP) {
509 return;
510 }
511
512 if (node->first_child != NULL && action != LEXBOR_ACTION_NEXT) {
513 node = node->first_child;
514 }
515 else {
516 while(node != root && node->next == NULL) {
517 node = node->parent;
518 }
519
520 if (node == root) {
521 break;
522 }
523
524 node = node->next;
525 }
526 }
527 }
528
529 lxb_inline lxb_status_t
lxb_dom_node_prepare_by_attr(lxb_dom_document_t * document,lxb_dom_node_cb_ctx_t * cb_ctx,const lxb_char_t * qname,size_t qlen)530 lxb_dom_node_prepare_by_attr(lxb_dom_document_t *document,
531 lxb_dom_node_cb_ctx_t *cb_ctx,
532 const lxb_char_t *qname, size_t qlen)
533 {
534 size_t length;
535 const lxb_char_t *prefix_end;
536 const lxb_dom_attr_data_t *attr_data;
537 const lxb_ns_prefix_data_t *prefix_data;
538
539 cb_ctx->prefix_id = LXB_NS__UNDEF;
540
541 prefix_end = memchr(qname, ':', qlen);
542
543 if (prefix_end != NULL) {
544 length = prefix_end - qname;
545
546 if (length == 0) {
547 return LXB_STATUS_ERROR_WRONG_ARGS;
548 }
549
550 prefix_data = lxb_ns_prefix_data_by_name(document->prefix, qname, qlen);
551 if (prefix_data == NULL) {
552 return LXB_STATUS_STOP;
553 }
554
555 cb_ctx->prefix_id = prefix_data->prefix_id;
556
557 length += 1;
558
559 if (length >= qlen) {
560 return LXB_STATUS_ERROR_WRONG_ARGS;
561 }
562
563 qname += length;
564 qlen -= length;
565 }
566
567 attr_data = lxb_dom_attr_data_by_local_name(document->attrs, qname, qlen);
568 if (attr_data == NULL) {
569 return LXB_STATUS_STOP;
570 }
571
572 cb_ctx->name_id = attr_data->attr_id;
573
574 return LXB_STATUS_OK;
575 }
576
577 lxb_inline lxb_status_t
lxb_dom_node_prepare_by(lxb_dom_document_t * document,lxb_dom_node_cb_ctx_t * cb_ctx,const lxb_char_t * qname,size_t qlen)578 lxb_dom_node_prepare_by(lxb_dom_document_t *document,
579 lxb_dom_node_cb_ctx_t *cb_ctx,
580 const lxb_char_t *qname, size_t qlen)
581 {
582 size_t length;
583 const lxb_char_t *prefix_end;
584 const lxb_tag_data_t *tag_data;
585 const lxb_ns_prefix_data_t *prefix_data;
586
587 cb_ctx->prefix_id = LXB_NS__UNDEF;
588
589 prefix_end = memchr(qname, ':', qlen);
590
591 if (prefix_end != NULL) {
592 length = prefix_end - qname;
593
594 if (length == 0) {
595 return LXB_STATUS_ERROR_WRONG_ARGS;
596 }
597
598 prefix_data = lxb_ns_prefix_data_by_name(document->prefix, qname, qlen);
599 if (prefix_data == NULL) {
600 return LXB_STATUS_STOP;
601 }
602
603 cb_ctx->prefix_id = prefix_data->prefix_id;
604
605 length += 1;
606
607 if (length >= qlen) {
608 return LXB_STATUS_ERROR_WRONG_ARGS;
609 }
610
611 qname += length;
612 qlen -= length;
613 }
614
615 tag_data = lxb_tag_data_by_name(document->tags, qname, qlen);
616 if (tag_data == NULL) {
617 return LXB_STATUS_STOP;
618 }
619
620 cb_ctx->name_id = tag_data->tag_id;
621
622 return LXB_STATUS_OK;
623 }
624
625 lxb_status_t
lxb_dom_node_by_tag_name(lxb_dom_node_t * root,lxb_dom_collection_t * collection,const lxb_char_t * qualified_name,size_t len)626 lxb_dom_node_by_tag_name(lxb_dom_node_t *root,
627 lxb_dom_collection_t *collection,
628 const lxb_char_t *qualified_name, size_t len)
629 {
630 lxb_status_t status;
631 lxb_dom_node_cb_ctx_t cb_ctx = {0};
632
633 cb_ctx.col = collection;
634
635 /* "*" (U+002A) */
636 if (len == 1 && *qualified_name == 0x2A) {
637 lxb_dom_node_simple_walk(root, lxb_dom_node_by_tag_name_cb_all,
638 &cb_ctx);
639 return cb_ctx.status;
640 }
641
642 status = lxb_dom_node_prepare_by(root->owner_document, &cb_ctx,
643 qualified_name, len);
644 if (status != LXB_STATUS_OK) {
645 if (status == LXB_STATUS_STOP) {
646 return LXB_STATUS_OK;
647 }
648
649 return status;
650 }
651
652 lxb_dom_node_simple_walk(lxb_dom_interface_node(root),
653 lxb_dom_node_by_tag_name_cb, &cb_ctx);
654
655 return cb_ctx.status;
656 }
657
658 static lexbor_action_t
lxb_dom_node_by_tag_name_cb_all(lxb_dom_node_t * node,void * ctx)659 lxb_dom_node_by_tag_name_cb_all(lxb_dom_node_t *node, void *ctx)
660 {
661 if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
662 return LEXBOR_ACTION_OK;
663 }
664
665 lxb_dom_node_cb_ctx_t *cb_ctx = ctx;
666
667 cb_ctx->status = lxb_dom_collection_append(cb_ctx->col, node);
668 if (cb_ctx->status != LXB_STATUS_OK) {
669 return LEXBOR_ACTION_STOP;
670 }
671
672 return LEXBOR_ACTION_OK;
673 }
674
675 static lexbor_action_t
lxb_dom_node_by_tag_name_cb(lxb_dom_node_t * node,void * ctx)676 lxb_dom_node_by_tag_name_cb(lxb_dom_node_t *node, void *ctx)
677 {
678 if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
679 return LEXBOR_ACTION_OK;
680 }
681
682 lxb_dom_node_cb_ctx_t *cb_ctx = ctx;
683
684 if (node->local_name == cb_ctx->name_id
685 && node->prefix == cb_ctx->prefix_id)
686 {
687 cb_ctx->status = lxb_dom_collection_append(cb_ctx->col, node);
688 if (cb_ctx->status != LXB_STATUS_OK) {
689 return LEXBOR_ACTION_STOP;
690 }
691 }
692
693 return LEXBOR_ACTION_OK;
694 }
695
696 lxb_status_t
lxb_dom_node_by_class_name(lxb_dom_node_t * root,lxb_dom_collection_t * collection,const lxb_char_t * class_name,size_t len)697 lxb_dom_node_by_class_name(lxb_dom_node_t *root,
698 lxb_dom_collection_t *collection,
699 const lxb_char_t *class_name, size_t len)
700 {
701 if (class_name == NULL || len == 0) {
702 return LXB_STATUS_OK;
703 }
704
705 lxb_dom_node_cb_ctx_t cb_ctx = {0};
706
707 cb_ctx.col = collection;
708 cb_ctx.value = class_name;
709 cb_ctx.value_length = len;
710
711 lxb_dom_node_simple_walk(lxb_dom_interface_node(root),
712 lxb_dom_node_by_class_name_cb, &cb_ctx);
713
714 return cb_ctx.status;
715 }
716
717 static lexbor_action_t
lxb_dom_node_by_class_name_cb(lxb_dom_node_t * node,void * ctx)718 lxb_dom_node_by_class_name_cb(lxb_dom_node_t *node, void *ctx)
719 {
720 if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
721 return LEXBOR_ACTION_OK;
722 }
723
724 lxb_dom_node_cb_ctx_t *cb_ctx = ctx;
725 lxb_dom_element_t *el = lxb_dom_interface_element(node);
726
727 if (el->attr_class == NULL
728 || el->attr_class->value == NULL
729 || el->attr_class->value->length < cb_ctx->value_length)
730 {
731 return LEXBOR_ACTION_OK;
732 }
733
734 const lxb_char_t *data = el->attr_class->value->data;
735 size_t length = el->attr_class->value->length;
736
737 bool is_it = false;
738 const lxb_char_t *pos = data;
739 const lxb_char_t *end = data + length;
740
741 lxb_dom_document_t *doc = el->node.owner_document;
742
743 for (; data < end; data++) {
744 if (lexbor_utils_whitespace(*data, ==, ||)) {
745
746 if (pos != data && (size_t) (data - pos) == cb_ctx->value_length) {
747 if (doc->compat_mode == LXB_DOM_DOCUMENT_CMODE_QUIRKS) {
748 is_it = lexbor_str_data_ncasecmp(pos, cb_ctx->value,
749 cb_ctx->value_length);
750 }
751 else {
752 is_it = lexbor_str_data_ncmp(pos, cb_ctx->value,
753 cb_ctx->value_length);
754 }
755
756 if (is_it) {
757 cb_ctx->status = lxb_dom_collection_append(cb_ctx->col,
758 node);
759 if (cb_ctx->status != LXB_STATUS_OK) {
760 return LEXBOR_ACTION_STOP;
761 }
762
763 return LEXBOR_ACTION_OK;
764 }
765 }
766
767 if ((size_t) (end - data) < cb_ctx->value_length) {
768 return LEXBOR_ACTION_OK;
769 }
770
771 pos = data + 1;
772 }
773 }
774
775 if ((size_t) (end - pos) == cb_ctx->value_length) {
776 if (doc->compat_mode == LXB_DOM_DOCUMENT_CMODE_QUIRKS) {
777 is_it = lexbor_str_data_ncasecmp(pos, cb_ctx->value,
778 cb_ctx->value_length);
779 }
780 else {
781 is_it = lexbor_str_data_ncmp(pos, cb_ctx->value,
782 cb_ctx->value_length);
783 }
784
785 if (is_it) {
786 cb_ctx->status = lxb_dom_collection_append(cb_ctx->col, node);
787 if (cb_ctx->status != LXB_STATUS_OK) {
788 return LEXBOR_ACTION_STOP;
789 }
790 }
791 }
792
793 return LEXBOR_ACTION_OK;
794 }
795
796 lxb_status_t
lxb_dom_node_by_attr(lxb_dom_node_t * root,lxb_dom_collection_t * collection,const lxb_char_t * qualified_name,size_t qname_len,const lxb_char_t * value,size_t value_len,bool case_insensitive)797 lxb_dom_node_by_attr(lxb_dom_node_t *root,
798 lxb_dom_collection_t *collection,
799 const lxb_char_t *qualified_name, size_t qname_len,
800 const lxb_char_t *value, size_t value_len,
801 bool case_insensitive)
802 {
803 lxb_status_t status;
804 lxb_dom_node_cb_ctx_t cb_ctx = {0};
805
806 cb_ctx.col = collection;
807 cb_ctx.value = value;
808 cb_ctx.value_length = value_len;
809
810 status = lxb_dom_node_prepare_by_attr(root->owner_document, &cb_ctx,
811 qualified_name, qname_len);
812 if (status != LXB_STATUS_OK) {
813 if (status == LXB_STATUS_STOP) {
814 return LXB_STATUS_OK;
815 }
816
817 return status;
818 }
819
820 if (case_insensitive) {
821 cb_ctx.cmp_func = lxb_dom_node_by_attr_cmp_full_case;
822 }
823 else {
824 cb_ctx.cmp_func = lxb_dom_node_by_attr_cmp_full;
825 }
826
827 lxb_dom_node_simple_walk(root, lxb_dom_node_by_attr_cb, &cb_ctx);
828
829 return cb_ctx.status;
830 }
831
832 lxb_status_t
lxb_dom_node_by_attr_begin(lxb_dom_node_t * root,lxb_dom_collection_t * collection,const lxb_char_t * qualified_name,size_t qname_len,const lxb_char_t * value,size_t value_len,bool case_insensitive)833 lxb_dom_node_by_attr_begin(lxb_dom_node_t *root,
834 lxb_dom_collection_t *collection,
835 const lxb_char_t *qualified_name, size_t qname_len,
836 const lxb_char_t *value, size_t value_len,
837 bool case_insensitive)
838 {
839 lxb_status_t status;
840 lxb_dom_node_cb_ctx_t cb_ctx = {0};
841
842 cb_ctx.col = collection;
843 cb_ctx.value = value;
844 cb_ctx.value_length = value_len;
845
846 status = lxb_dom_node_prepare_by_attr(root->owner_document, &cb_ctx,
847 qualified_name, qname_len);
848 if (status != LXB_STATUS_OK) {
849 if (status == LXB_STATUS_STOP) {
850 return LXB_STATUS_OK;
851 }
852
853 return status;
854 }
855
856 if (case_insensitive) {
857 cb_ctx.cmp_func = lxb_dom_node_by_attr_cmp_begin_case;
858 }
859 else {
860 cb_ctx.cmp_func = lxb_dom_node_by_attr_cmp_begin;
861 }
862
863 lxb_dom_node_simple_walk(lxb_dom_interface_node(root),
864 lxb_dom_node_by_attr_cb, &cb_ctx);
865
866 return cb_ctx.status;
867 }
868
869 lxb_status_t
lxb_dom_node_by_attr_end(lxb_dom_node_t * root,lxb_dom_collection_t * collection,const lxb_char_t * qualified_name,size_t qname_len,const lxb_char_t * value,size_t value_len,bool case_insensitive)870 lxb_dom_node_by_attr_end(lxb_dom_node_t *root, lxb_dom_collection_t *collection,
871 const lxb_char_t *qualified_name, size_t qname_len,
872 const lxb_char_t *value, size_t value_len,
873 bool case_insensitive)
874 {
875 lxb_status_t status;
876 lxb_dom_node_cb_ctx_t cb_ctx = {0};
877
878 cb_ctx.col = collection;
879 cb_ctx.value = value;
880 cb_ctx.value_length = value_len;
881
882 status = lxb_dom_node_prepare_by_attr(root->owner_document, &cb_ctx,
883 qualified_name, qname_len);
884 if (status != LXB_STATUS_OK) {
885 if (status == LXB_STATUS_STOP) {
886 return LXB_STATUS_OK;
887 }
888
889 return status;
890 }
891
892 if (case_insensitive) {
893 cb_ctx.cmp_func = lxb_dom_node_by_attr_cmp_end_case;
894 }
895 else {
896 cb_ctx.cmp_func = lxb_dom_node_by_attr_cmp_end;
897 }
898
899 lxb_dom_node_simple_walk(root, lxb_dom_node_by_attr_cb, &cb_ctx);
900
901 return cb_ctx.status;
902 }
903
904 lxb_status_t
lxb_dom_node_by_attr_contain(lxb_dom_node_t * root,lxb_dom_collection_t * collection,const lxb_char_t * qualified_name,size_t qname_len,const lxb_char_t * value,size_t value_len,bool case_insensitive)905 lxb_dom_node_by_attr_contain(lxb_dom_node_t *root,
906 lxb_dom_collection_t *collection,
907 const lxb_char_t *qualified_name, size_t qname_len,
908 const lxb_char_t *value, size_t value_len,
909 bool case_insensitive)
910 {
911 lxb_status_t status;
912 lxb_dom_node_cb_ctx_t cb_ctx = {0};
913
914 cb_ctx.col = collection;
915 cb_ctx.value = value;
916 cb_ctx.value_length = value_len;
917
918 status = lxb_dom_node_prepare_by_attr(root->owner_document, &cb_ctx,
919 qualified_name, qname_len);
920 if (status != LXB_STATUS_OK) {
921 if (status == LXB_STATUS_STOP) {
922 return LXB_STATUS_OK;
923 }
924
925 return status;
926 }
927
928 if (case_insensitive) {
929 cb_ctx.cmp_func = lxb_dom_node_by_attr_cmp_contain_case;
930 }
931 else {
932 cb_ctx.cmp_func = lxb_dom_node_by_attr_cmp_contain;
933 }
934
935 lxb_dom_node_simple_walk(root, lxb_dom_node_by_attr_cb, &cb_ctx);
936
937 return cb_ctx.status;
938 }
939
940 static lexbor_action_t
lxb_dom_node_by_attr_cb(lxb_dom_node_t * node,void * ctx)941 lxb_dom_node_by_attr_cb(lxb_dom_node_t *node, void *ctx)
942 {
943 if (node->type != LXB_DOM_NODE_TYPE_ELEMENT) {
944 return LEXBOR_ACTION_OK;
945 }
946
947 lxb_dom_attr_t *attr;
948 lxb_dom_node_cb_ctx_t *cb_ctx = ctx;
949 lxb_dom_element_t *el = lxb_dom_interface_element(node);
950
951 attr = lxb_dom_element_attr_by_id(el, cb_ctx->name_id);
952 if (attr == NULL) {
953 return LEXBOR_ACTION_OK;
954 }
955
956 if ((cb_ctx->value_length == 0 && (attr->value == NULL || attr->value->length == 0))
957 || cb_ctx->cmp_func(cb_ctx, attr))
958 {
959 cb_ctx->status = lxb_dom_collection_append(cb_ctx->col, node);
960
961 if (cb_ctx->status != LXB_STATUS_OK) {
962 return LEXBOR_ACTION_STOP;
963 }
964 }
965
966 return LEXBOR_ACTION_OK;
967 }
968
969 static bool
lxb_dom_node_by_attr_cmp_full(lxb_dom_node_cb_ctx_t * ctx,lxb_dom_attr_t * attr)970 lxb_dom_node_by_attr_cmp_full(lxb_dom_node_cb_ctx_t *ctx, lxb_dom_attr_t *attr)
971 {
972 if (attr->value != NULL && ctx->value_length == attr->value->length
973 && lexbor_str_data_ncmp(attr->value->data, ctx->value,
974 ctx->value_length))
975 {
976 return true;
977 }
978
979 return attr->value == NULL && ctx->value_length == 0;
980 }
981
982 static bool
lxb_dom_node_by_attr_cmp_full_case(lxb_dom_node_cb_ctx_t * ctx,lxb_dom_attr_t * attr)983 lxb_dom_node_by_attr_cmp_full_case(lxb_dom_node_cb_ctx_t *ctx,
984 lxb_dom_attr_t *attr)
985 {
986 if (attr->value != NULL && ctx->value_length == attr->value->length
987 && lexbor_str_data_ncasecmp(attr->value->data, ctx->value,
988 ctx->value_length))
989 {
990 return true;
991 }
992
993 return attr->value == NULL && ctx->value_length == 0;
994 }
995
996 static bool
lxb_dom_node_by_attr_cmp_begin(lxb_dom_node_cb_ctx_t * ctx,lxb_dom_attr_t * attr)997 lxb_dom_node_by_attr_cmp_begin(lxb_dom_node_cb_ctx_t *ctx, lxb_dom_attr_t *attr)
998 {
999 if (attr->value != NULL && ctx->value_length <= attr->value->length
1000 && lexbor_str_data_ncmp(attr->value->data, ctx->value,
1001 ctx->value_length))
1002 {
1003 return true;
1004 }
1005
1006 return attr->value == NULL && ctx->value_length == 0;
1007 }
1008
1009 static bool
lxb_dom_node_by_attr_cmp_begin_case(lxb_dom_node_cb_ctx_t * ctx,lxb_dom_attr_t * attr)1010 lxb_dom_node_by_attr_cmp_begin_case(lxb_dom_node_cb_ctx_t *ctx,
1011 lxb_dom_attr_t *attr)
1012 {
1013 if (attr->value != NULL && ctx->value_length <= attr->value->length
1014 && lexbor_str_data_ncasecmp(attr->value->data,
1015 ctx->value, ctx->value_length))
1016 {
1017 return true;
1018 }
1019
1020 return attr->value == NULL && ctx->value_length == 0;
1021 }
1022
1023 static bool
lxb_dom_node_by_attr_cmp_end(lxb_dom_node_cb_ctx_t * ctx,lxb_dom_attr_t * attr)1024 lxb_dom_node_by_attr_cmp_end(lxb_dom_node_cb_ctx_t *ctx, lxb_dom_attr_t *attr)
1025 {
1026 if (attr->value != NULL && ctx->value_length <= attr->value->length) {
1027 size_t dif = attr->value->length - ctx->value_length;
1028
1029 if (lexbor_str_data_ncmp_end(&attr->value->data[dif],
1030 ctx->value, ctx->value_length))
1031 {
1032 return true;
1033 }
1034 }
1035
1036 return attr->value == NULL && ctx->value_length == 0;
1037 }
1038
1039 static bool
lxb_dom_node_by_attr_cmp_end_case(lxb_dom_node_cb_ctx_t * ctx,lxb_dom_attr_t * attr)1040 lxb_dom_node_by_attr_cmp_end_case(lxb_dom_node_cb_ctx_t *ctx,
1041 lxb_dom_attr_t *attr)
1042 {
1043 if (attr->value != NULL && ctx->value_length <= attr->value->length) {
1044 size_t dif = attr->value->length - ctx->value_length;
1045
1046 if (lexbor_str_data_ncasecmp_end(&attr->value->data[dif],
1047 ctx->value, ctx->value_length))
1048 {
1049 return true;
1050 }
1051 }
1052
1053 return attr->value == NULL && ctx->value_length == 0;
1054 }
1055
1056 static bool
lxb_dom_node_by_attr_cmp_contain(lxb_dom_node_cb_ctx_t * ctx,lxb_dom_attr_t * attr)1057 lxb_dom_node_by_attr_cmp_contain(lxb_dom_node_cb_ctx_t *ctx,
1058 lxb_dom_attr_t *attr)
1059 {
1060 if (attr->value != NULL && ctx->value_length <= attr->value->length
1061 && lexbor_str_data_ncmp_contain(attr->value->data, attr->value->length,
1062 ctx->value, ctx->value_length))
1063 {
1064 return true;
1065 }
1066
1067 return attr->value == NULL && ctx->value_length == 0;
1068 }
1069
1070 static bool
lxb_dom_node_by_attr_cmp_contain_case(lxb_dom_node_cb_ctx_t * ctx,lxb_dom_attr_t * attr)1071 lxb_dom_node_by_attr_cmp_contain_case(lxb_dom_node_cb_ctx_t *ctx,
1072 lxb_dom_attr_t *attr)
1073 {
1074 if (attr->value != NULL && ctx->value_length <= attr->value->length
1075 && lexbor_str_data_ncasecmp_contain(attr->value->data, attr->value->length,
1076 ctx->value, ctx->value_length))
1077 {
1078 return true;
1079 }
1080
1081 return attr->value == NULL && ctx->value_length == 0;
1082 }
1083
1084 lxb_char_t *
lxb_dom_node_text_content(lxb_dom_node_t * node,size_t * len)1085 lxb_dom_node_text_content(lxb_dom_node_t *node, size_t *len)
1086 {
1087 lxb_char_t *text;
1088 size_t length = 0;
1089
1090 switch (node->type) {
1091 case LXB_DOM_NODE_TYPE_DOCUMENT_FRAGMENT:
1092 case LXB_DOM_NODE_TYPE_ELEMENT:
1093 lxb_dom_node_simple_walk(node, lxb_dom_node_text_content_size,
1094 &length);
1095
1096 text = lxb_dom_document_create_text(node->owner_document,
1097 (length + 1));
1098 if (text == NULL) {
1099 goto failed;
1100 }
1101
1102 lxb_dom_node_simple_walk(node, lxb_dom_node_text_content_concatenate,
1103 &text);
1104
1105 text -= length;
1106
1107 break;
1108
1109 case LXB_DOM_NODE_TYPE_ATTRIBUTE: {
1110 const lxb_char_t *attr_text;
1111
1112 attr_text = lxb_dom_attr_value(lxb_dom_interface_attr(node), &length);
1113 if (attr_text == NULL) {
1114 goto failed;
1115 }
1116
1117 text = lxb_dom_document_create_text(node->owner_document,
1118 (length + 1));
1119 if (text == NULL) {
1120 goto failed;
1121 }
1122
1123 /* +1 == with null '\0' */
1124 memcpy(text, attr_text, sizeof(lxb_char_t) * (length + 1));
1125
1126 break;
1127 }
1128
1129 case LXB_DOM_NODE_TYPE_TEXT:
1130 case LXB_DOM_NODE_TYPE_PROCESSING_INSTRUCTION:
1131 case LXB_DOM_NODE_TYPE_COMMENT: {
1132 lxb_dom_character_data_t *ch_data;
1133
1134 ch_data = lxb_dom_interface_character_data(node);
1135 length = ch_data->data.length;
1136
1137 text = lxb_dom_document_create_text(node->owner_document,
1138 (length + 1));
1139 if (text == NULL) {
1140 goto failed;
1141 }
1142
1143 /* +1 == with null '\0' */
1144 memcpy(text, ch_data->data.data, sizeof(lxb_char_t) * (length + 1));
1145
1146 break;
1147 }
1148
1149 default:
1150 goto failed;
1151 }
1152
1153 if (len != NULL) {
1154 *len = length;
1155 }
1156
1157 text[length] = 0x00;
1158
1159 return text;
1160
1161 failed:
1162
1163 if (len != NULL) {
1164 *len = 0;
1165 }
1166
1167 return NULL;
1168 }
1169
1170 static lexbor_action_t
lxb_dom_node_text_content_size(lxb_dom_node_t * node,void * ctx)1171 lxb_dom_node_text_content_size(lxb_dom_node_t *node, void *ctx)
1172 {
1173 if (node->type == LXB_DOM_NODE_TYPE_TEXT) {
1174 *((size_t *) ctx) += lxb_dom_interface_text(node)->char_data.data.length;
1175 }
1176
1177 return LEXBOR_ACTION_OK;
1178 }
1179
1180 static lexbor_action_t
lxb_dom_node_text_content_concatenate(lxb_dom_node_t * node,void * ctx)1181 lxb_dom_node_text_content_concatenate(lxb_dom_node_t *node, void *ctx)
1182 {
1183 if (node->type != LXB_DOM_NODE_TYPE_TEXT) {
1184 return LEXBOR_ACTION_OK;
1185 }
1186
1187 lxb_char_t **text = (lxb_char_t **) ctx;
1188 lxb_dom_character_data_t *ch_data = &lxb_dom_interface_text(node)->char_data;
1189
1190 memcpy(*text, ch_data->data.data, sizeof(lxb_char_t) * ch_data->data.length);
1191
1192 *text = *text + ch_data->data.length;
1193
1194 return LEXBOR_ACTION_OK;
1195 }
1196
1197 lxb_status_t
lxb_dom_node_text_content_set(lxb_dom_node_t * node,const lxb_char_t * content,size_t len)1198 lxb_dom_node_text_content_set(lxb_dom_node_t *node,
1199 const lxb_char_t *content, size_t len)
1200 {
1201 lxb_status_t status;
1202
1203 switch (node->type) {
1204 case LXB_DOM_NODE_TYPE_DOCUMENT_FRAGMENT:
1205 case LXB_DOM_NODE_TYPE_ELEMENT: {
1206 lxb_dom_text_t *text;
1207
1208 text = lxb_dom_document_create_text_node(node->owner_document,
1209 content, len);
1210 if (text == NULL) {
1211 return LXB_STATUS_ERROR_MEMORY_ALLOCATION;
1212 }
1213
1214 status = lxb_dom_node_replace_all(node, lxb_dom_interface_node(text));
1215 if (status != LXB_STATUS_OK) {
1216 lxb_dom_document_destroy_interface(text);
1217
1218 return status;
1219 }
1220
1221 break;
1222 }
1223
1224 case LXB_DOM_NODE_TYPE_ATTRIBUTE:
1225 return lxb_dom_attr_set_existing_value(lxb_dom_interface_attr(node),
1226 content, len);
1227
1228 case LXB_DOM_NODE_TYPE_TEXT:
1229 case LXB_DOM_NODE_TYPE_PROCESSING_INSTRUCTION:
1230 case LXB_DOM_NODE_TYPE_COMMENT:
1231 return lxb_dom_character_data_replace(lxb_dom_interface_character_data(node),
1232 content, len, 0, 0);
1233
1234 default:
1235 return LXB_STATUS_OK;
1236 }
1237
1238 return LXB_STATUS_OK;
1239 }
1240
1241 bool
lxb_dom_node_is_empty(const lxb_dom_node_t * root)1242 lxb_dom_node_is_empty(const lxb_dom_node_t *root)
1243 {
1244 lxb_char_t chr;
1245 lexbor_str_t *str;
1246 const lxb_char_t *data, *end;
1247 lxb_dom_node_t *node = root->first_child;
1248
1249 while (node != NULL) {
1250 if(node->local_name != LXB_TAG__EM_COMMENT) {
1251 if(node->local_name != LXB_TAG__TEXT)
1252 return false;
1253
1254 str = &lxb_dom_interface_text(node)->char_data.data;
1255 data = str->data;
1256 end = data + str->length;
1257
1258 while (data < end) {
1259 chr = *data++;
1260
1261 if (lexbor_utils_whitespace(chr, !=, &&)) {
1262 return false;
1263 }
1264 }
1265 }
1266
1267 if(node->first_child != NULL) {
1268 node = node->first_child;
1269 }
1270 else {
1271 while(node != root && node->next == NULL) {
1272 node = node->parent;
1273 }
1274
1275 if(node == root) {
1276 break;
1277 }
1278
1279 node = node->next;
1280 }
1281 }
1282
1283 return true;
1284 }
1285
1286 lxb_tag_id_t
lxb_dom_node_tag_id_noi(lxb_dom_node_t * node)1287 lxb_dom_node_tag_id_noi(lxb_dom_node_t *node)
1288 {
1289 return lxb_dom_node_tag_id(node);
1290 }
1291
1292 lxb_dom_node_t *
lxb_dom_node_next_noi(lxb_dom_node_t * node)1293 lxb_dom_node_next_noi(lxb_dom_node_t *node)
1294 {
1295 return lxb_dom_node_next(node);
1296 }
1297
1298 lxb_dom_node_t *
lxb_dom_node_prev_noi(lxb_dom_node_t * node)1299 lxb_dom_node_prev_noi(lxb_dom_node_t *node)
1300 {
1301 return lxb_dom_node_prev(node);
1302 }
1303
1304 lxb_dom_node_t *
lxb_dom_node_parent_noi(lxb_dom_node_t * node)1305 lxb_dom_node_parent_noi(lxb_dom_node_t *node)
1306 {
1307 return lxb_dom_node_parent(node);
1308 }
1309
1310 lxb_dom_node_t *
lxb_dom_node_first_child_noi(lxb_dom_node_t * node)1311 lxb_dom_node_first_child_noi(lxb_dom_node_t *node)
1312 {
1313 return lxb_dom_node_first_child(node);
1314 }
1315
1316 lxb_dom_node_t *
lxb_dom_node_last_child_noi(lxb_dom_node_t * node)1317 lxb_dom_node_last_child_noi(lxb_dom_node_t *node)
1318 {
1319 return lxb_dom_node_last_child(node);
1320 }
1321