1 /*
2 * Copyright (C) 2021-2024 Alexander Borisov
3 *
4 * Author: Alexander Borisov <borisov@lexbor.com>
5 * Adapted for PHP + libxml2 by: Niels Dossche <nielsdos@php.net>
6 * Based on Lexbor 2.4.0 (upstream commit e9d35f6384de7bd8c1b79e7111bc3a44f8822967)
7 */
8
9 #include <libxml/xmlstring.h>
10 #include <libxml/dict.h>
11 #include <Zend/zend.h>
12 #include <Zend/zend_operators.h>
13 #include <Zend/zend_API.h>
14 #include <php.h>
15
16 #include "lexbor/selectors-adapted/selectors.h"
17 #include "../../../namespace_compat.h"
18 #include "../../../domexception.h"
19 #include "../../../php_dom.h"
20
21 #include <math.h>
22
23 /* Note: casting and then comparing is a bit faster on my i7-4790 */
24 #define CMP_NODE_TYPE(node, ty) ((unsigned char) (node)->type == ty)
25
26 typedef struct dom_lxb_str_wrapper {
27 lexbor_str_t str;
28 bool should_free;
29 } dom_lxb_str_wrapper;
30
dom_lxb_str_wrapper_release(dom_lxb_str_wrapper * wrapper)31 static void dom_lxb_str_wrapper_release(dom_lxb_str_wrapper *wrapper)
32 {
33 if (wrapper->should_free) {
34 xmlFree(wrapper->str.data);
35 }
36 }
37
lxb_selectors_adapted_is_matchable_child(const xmlNode * node)38 static zend_always_inline bool lxb_selectors_adapted_is_matchable_child(const xmlNode *node)
39 {
40 return CMP_NODE_TYPE(node, XML_ELEMENT_NODE);
41 }
42
lxb_selectors_adapted_cmp_local_name_literal(const xmlNode * node,const char * name)43 static zend_always_inline bool lxb_selectors_adapted_cmp_local_name_literal(const xmlNode *node, const char *name)
44 {
45 return strcmp((const char *) node->name, name) == 0;
46 }
47
lxb_selectors_adapted_cmp_ns(const xmlNode * a,const xmlNode * b)48 static zend_always_inline bool lxb_selectors_adapted_cmp_ns(const xmlNode *a, const xmlNode *b)
49 {
50 /* Namespace URIs are not interned, hence a->href != b->href. */
51 return a->ns == b->ns || (a->ns != NULL && b->ns != NULL && xmlStrEqual(a->ns->href, b->ns->href));
52 }
53
lxb_selectors_adapted_cmp_local_name_id(const xmlNode * node,const lxb_selectors_adapted_id * id)54 static zend_always_inline bool lxb_selectors_adapted_cmp_local_name_id(const xmlNode *node, const lxb_selectors_adapted_id *id)
55 {
56 uintptr_t ptr = (uintptr_t) node->name;
57 if (id->interned && (ptr & (ZEND_MM_ALIGNMENT - 1)) != 0) {
58 /* It cannot be a heap-allocated string because the pointer is not properly aligned for a heap allocation.
59 * Therefore, it must be interned into the dictionary pool. */
60 return node->name == id->name;
61 }
62
63 return strcmp((const char *) node->name, (const char *) id->name) == 0;
64 }
65
lxb_selectors_adapted_attr(const xmlNode * node,const lxb_char_t * name)66 static zend_always_inline const xmlAttr *lxb_selectors_adapted_attr(const xmlNode *node, const lxb_char_t *name)
67 {
68 const xmlAttr *attr = xmlHasProp(node, (const xmlChar *) name);
69 if (attr != NULL && attr->ns != NULL) {
70 return NULL;
71 }
72 return attr;
73 }
74
lxb_selectors_adapted_has_attr(const xmlNode * node,const char * name)75 static zend_always_inline bool lxb_selectors_adapted_has_attr(const xmlNode *node, const char *name)
76 {
77 return lxb_selectors_adapted_attr(node, (const lxb_char_t *) name) != NULL;
78 }
79
lxb_selectors_adapted_attr_value(const xmlAttr * attr)80 static zend_always_inline dom_lxb_str_wrapper lxb_selectors_adapted_attr_value(const xmlAttr *attr)
81 {
82 dom_lxb_str_wrapper ret;
83 ret.str.data = (lxb_char_t *) php_libxml_attr_value(attr, &ret.should_free);
84 ret.str.length = strlen((const char *) ret.str.data);
85 return ret;
86 }
87
lxb_selectors_adapted_set_entry_id_ex(lxb_selectors_entry_t * entry,const lxb_css_selector_t * selector,const xmlNode * node)88 static void lxb_selectors_adapted_set_entry_id_ex(lxb_selectors_entry_t *entry, const lxb_css_selector_t *selector, const xmlNode *node)
89 {
90 if (node->doc != NULL && node->doc->dict != NULL) {
91 const xmlChar *interned = xmlDictExists(node->doc->dict, selector->name.data, selector->name.length);
92 if (interned != NULL) {
93 entry->id.name = interned;
94 entry->id.interned = true;
95 return;
96 }
97 }
98
99 entry->id.name = selector->name.data;
100 entry->id.interned = false;
101 }
102
lxb_selectors_adapted_set_entry_id(lxb_selectors_entry_t * entry,const lxb_css_selector_t * selector,const xmlNode * node)103 static zend_always_inline void lxb_selectors_adapted_set_entry_id(lxb_selectors_entry_t *entry, const lxb_css_selector_t *selector, const xmlNode *node)
104 {
105 if (entry->id.name == NULL) {
106 lxb_selectors_adapted_set_entry_id_ex(entry, selector, node);
107 }
108 }
109
110 static lxb_status_t
111 lxb_selectors_state_tree(lxb_selectors_t *selectors, const xmlNode *root,
112 const lxb_css_selector_list_t *list);
113
114 static lxb_status_t
115 lxb_selectors_state_run(lxb_selectors_t *selectors, const xmlNode *node,
116 const lxb_css_selector_list_t *list);
117
118 static lxb_selectors_entry_t *
119 lxb_selectors_state_find(lxb_selectors_t *selectors,
120 lxb_selectors_entry_t *entry);
121
122 static lxb_selectors_entry_t *
123 lxb_selectors_state_find_check(lxb_selectors_t *selectors, const xmlNode *node,
124 const lxb_css_selector_t *selector,
125 lxb_selectors_entry_t *entry);
126
127 static lxb_selectors_entry_t *
128 lxb_selectors_state_pseudo_class_function(lxb_selectors_t *selectors,
129 lxb_selectors_entry_t *entry);
130
131 static const xmlNode *
132 lxb_selectors_next_node(lxb_selectors_nested_t *main);
133
134 static const xmlNode *
135 lxb_selectors_state_has_relative(const xmlNode *node,
136 const lxb_css_selector_t *selector);
137
138 static lxb_selectors_entry_t *
139 lxb_selectors_state_after_find_has(lxb_selectors_t *selectors,
140 lxb_selectors_entry_t *entry);
141
142 static lxb_selectors_entry_t *
143 lxb_selectors_state_after_find(lxb_selectors_t *selectors,
144 lxb_selectors_entry_t *entry);
145
146 static lxb_selectors_entry_t *
147 lxb_selectors_state_after_nth_child(lxb_selectors_t *selectors,
148 lxb_selectors_entry_t *entry);
149
150 static bool
151 lxb_selectors_match(lxb_selectors_t *selectors, lxb_selectors_entry_t *entry,
152 const lxb_css_selector_t *selector, const xmlNode *node);
153
154 static bool
155 lxb_selectors_match_element(const lxb_css_selector_t *selector,
156 const xmlNode *node, lxb_selectors_entry_t *entry);
157
158 static bool
159 lxb_selectors_match_id(const lxb_css_selector_t *selector, const xmlNode *node, bool quirks);
160
161 static bool
162 lxb_selectors_match_class(const lexbor_str_t *target, const lexbor_str_t *src,
163 bool quirks);
164
165 static bool
166 lxb_selectors_match_attribute(const lxb_css_selector_t *selector,
167 const xmlNode *node, lxb_selectors_entry_t *entry);
168
169 static bool
170 lxb_selectors_pseudo_class(const lxb_css_selector_t *selector,
171 const xmlNode *node);
172
173 static bool
174 lxb_selectors_pseudo_class_function(const lxb_css_selector_t *selector,
175 const xmlNode *node);
176
177 static bool
178 lxb_selectors_pseudo_element(const lxb_css_selector_t *selector,
179 const xmlNode *node);
180
181 static bool
182 lxb_selectors_pseudo_class_disabled(const xmlNode *node);
183
184 static bool
185 lxb_selectors_pseudo_class_first_child(const xmlNode *node);
186
187 static bool
188 lxb_selectors_pseudo_class_first_of_type(const xmlNode *node);
189
190 static bool
191 lxb_selectors_pseudo_class_last_child(const xmlNode *node);
192
193 static bool
194 lxb_selectors_pseudo_class_last_of_type(const xmlNode *node);
195
196 static bool
197 lxb_selectors_pseudo_class_read_write(const xmlNode *node);
198
199 static bool
200 lxb_selectors_anb_calc(const lxb_css_selector_anb_of_t *anb, size_t index);
201
202 static lxb_status_t
203 lxb_selectors_cb_ok(const xmlNode *node,
204 lxb_css_selector_specificity_t spec, void *ctx);
205
206 static lxb_status_t
207 lxb_selectors_cb_not(const xmlNode *node,
208 lxb_css_selector_specificity_t spec, void *ctx);
209
210
211 lxb_status_t
lxb_selectors_init(lxb_selectors_t * selectors)212 lxb_selectors_init(lxb_selectors_t *selectors)
213 {
214 lxb_status_t status;
215
216 selectors->objs = lexbor_dobject_create();
217 status = lexbor_dobject_init(selectors->objs,
218 128, sizeof(lxb_selectors_entry_t));
219 if (status != LXB_STATUS_OK) {
220 return status;
221 }
222
223 selectors->nested = lexbor_dobject_create();
224 status = lexbor_dobject_init(selectors->nested,
225 64, sizeof(lxb_selectors_nested_t));
226 if (status != LXB_STATUS_OK) {
227 return status;
228 }
229
230 selectors->options = LXB_SELECTORS_OPT_DEFAULT;
231
232 return LXB_STATUS_OK;
233 }
234
235 void
lxb_selectors_clean(lxb_selectors_t * selectors)236 lxb_selectors_clean(lxb_selectors_t *selectors)
237 {
238 lexbor_dobject_clean(selectors->objs);
239 lexbor_dobject_clean(selectors->nested);
240 }
241
242 void
lxb_selectors_destroy(lxb_selectors_t * selectors)243 lxb_selectors_destroy(lxb_selectors_t *selectors)
244 {
245 selectors->objs = lexbor_dobject_destroy(selectors->objs, true);
246 selectors->nested = lexbor_dobject_destroy(selectors->nested, true);
247 }
248
249 lxb_inline const xmlNode *
lxb_selectors_descendant(lxb_selectors_t * selectors,lxb_selectors_entry_t * entry,const lxb_css_selector_t * selector,const xmlNode * node)250 lxb_selectors_descendant(lxb_selectors_t *selectors,
251 lxb_selectors_entry_t *entry,
252 const lxb_css_selector_t *selector,
253 const xmlNode *node)
254 {
255 node = node->parent;
256
257 while (node != NULL) {
258 if (CMP_NODE_TYPE(node, XML_ELEMENT_NODE)
259 && lxb_selectors_match(selectors, entry, selector, node))
260 {
261 return node;
262 }
263
264 node = node->parent;
265 }
266
267 return NULL;
268 }
269
270 lxb_inline const xmlNode *
lxb_selectors_close(lxb_selectors_t * selectors,lxb_selectors_entry_t * entry,const lxb_css_selector_t * selector,const xmlNode * node)271 lxb_selectors_close(lxb_selectors_t *selectors, lxb_selectors_entry_t *entry,
272 const lxb_css_selector_t *selector, const xmlNode *node)
273 {
274 if (lxb_selectors_match(selectors, entry, selector, node)) {
275 return node;
276 }
277
278 return NULL;
279 }
280
281 lxb_inline const xmlNode *
lxb_selectors_child(lxb_selectors_t * selectors,lxb_selectors_entry_t * entry,const lxb_css_selector_t * selector,const xmlNode * root)282 lxb_selectors_child(lxb_selectors_t *selectors, lxb_selectors_entry_t *entry,
283 const lxb_css_selector_t *selector, const xmlNode *root)
284 {
285 root = root->parent;
286
287 if (root != NULL && CMP_NODE_TYPE(root, XML_ELEMENT_NODE)
288 && lxb_selectors_match(selectors, entry, selector, root))
289 {
290 return root;
291 }
292
293 return NULL;
294 }
295
296 lxb_inline const xmlNode *
lxb_selectors_sibling(lxb_selectors_t * selectors,lxb_selectors_entry_t * entry,const lxb_css_selector_t * selector,const xmlNode * node)297 lxb_selectors_sibling(lxb_selectors_t *selectors, lxb_selectors_entry_t *entry,
298 const lxb_css_selector_t *selector, const xmlNode *node)
299 {
300 node = node->prev;
301
302 while (node != NULL) {
303 if (CMP_NODE_TYPE(node, XML_ELEMENT_NODE)) {
304 if (lxb_selectors_match(selectors, entry, selector, node)) {
305 return node;
306 }
307
308 return NULL;
309 }
310
311 node = node->prev;
312 }
313
314 return NULL;
315 }
316
317 lxb_inline const xmlNode *
lxb_selectors_following(lxb_selectors_t * selectors,lxb_selectors_entry_t * entry,const lxb_css_selector_t * selector,const xmlNode * node)318 lxb_selectors_following(lxb_selectors_t *selectors, lxb_selectors_entry_t *entry,
319 const lxb_css_selector_t *selector, const xmlNode *node)
320 {
321 node = node->prev;
322
323 while (node != NULL) {
324 if (CMP_NODE_TYPE(node, XML_ELEMENT_NODE) &&
325 lxb_selectors_match(selectors, entry, selector, node))
326 {
327 return node;
328 }
329
330 node = node->prev;
331 }
332
333 return NULL;
334 }
335
336 lxb_status_t
lxb_selectors_find(lxb_selectors_t * selectors,const xmlNode * root,const lxb_css_selector_list_t * list,lxb_selectors_cb_f cb,void * ctx)337 lxb_selectors_find(lxb_selectors_t *selectors, const xmlNode *root,
338 const lxb_css_selector_list_t *list,
339 lxb_selectors_cb_f cb, void *ctx)
340 {
341 lxb_selectors_entry_t *entry;
342 lxb_selectors_nested_t nested;
343
344 entry = lexbor_dobject_calloc(selectors->objs);
345
346 entry->combinator = LXB_CSS_SELECTOR_COMBINATOR_CLOSE;
347 entry->selector = list->last;
348
349 nested.parent = NULL;
350 nested.entry = entry;
351 nested.cb = cb;
352 nested.ctx = ctx;
353
354 selectors->current = &nested;
355 selectors->status = LXB_STATUS_OK;
356
357 return lxb_selectors_state_tree(selectors, root, list);
358 }
359
360 lxb_status_t
lxb_selectors_match_node(lxb_selectors_t * selectors,const xmlNode * node,const lxb_css_selector_list_t * list,lxb_selectors_cb_f cb,void * ctx)361 lxb_selectors_match_node(lxb_selectors_t *selectors, const xmlNode *node,
362 const lxb_css_selector_list_t *list,
363 lxb_selectors_cb_f cb, void *ctx)
364 {
365 lxb_status_t status;
366 lxb_selectors_entry_t *entry;
367 lxb_selectors_nested_t nested;
368
369 if (!CMP_NODE_TYPE(node, XML_ELEMENT_NODE)) {
370 return LXB_STATUS_OK;
371 }
372
373 entry = lexbor_dobject_calloc(selectors->objs);
374
375 entry->combinator = LXB_CSS_SELECTOR_COMBINATOR_CLOSE;
376 entry->selector = list->last;
377
378 nested.parent = NULL;
379 nested.entry = entry;
380 nested.cb = cb;
381 nested.ctx = ctx;
382
383 selectors->current = &nested;
384 selectors->status = LXB_STATUS_OK;
385
386 status = lxb_selectors_state_run(selectors, node, list);
387
388 lxb_selectors_clean(selectors);
389
390 return status;
391 }
392
393 static lxb_status_t
lxb_selectors_state_tree(lxb_selectors_t * selectors,const xmlNode * root,const lxb_css_selector_list_t * list)394 lxb_selectors_state_tree(lxb_selectors_t *selectors, const xmlNode *root,
395 const lxb_css_selector_list_t *list)
396 {
397 lxb_status_t status;
398 const xmlNode *node;
399
400 #if 0
401 if (selectors->options & LXB_SELECTORS_OPT_MATCH_ROOT) {
402 node = root;
403
404 if (CMP_NODE_TYPE(node, XML_DOCUMENT_NODE) || CMP_NODE_TYPE(node, XML_HTML_DOCUMENT_NODE)
405 || CMP_NODE_TYPE(node, XML_DOCUMENT_FRAG_NODE)) {
406 node = root->children;
407 }
408 }
409 else
410 #endif
411 {
412 node = root->children;
413 }
414
415 if (node == NULL) {
416 goto out;
417 }
418
419 do {
420 if (!CMP_NODE_TYPE(node, XML_ELEMENT_NODE)) {
421 goto next;
422 }
423
424 status = lxb_selectors_state_run(selectors, node, list);
425 if (status != LXB_STATUS_OK) {
426 if (status == LXB_STATUS_STOP) {
427 break;
428 }
429
430 lxb_selectors_clean(selectors);
431
432 return status;
433 }
434
435 if (node->children != NULL) {
436 node = node->children;
437 }
438 else {
439
440 next:
441
442 while (node != root && node->next == NULL) {
443 node = node->parent;
444 }
445
446 if (node == root) {
447 break;
448 }
449
450 node = node->next;
451 }
452 }
453 while (true);
454
455 out:
456 lxb_selectors_clean(selectors);
457
458 return LXB_STATUS_OK;
459 }
460
461 static lxb_status_t
lxb_selectors_state_run(lxb_selectors_t * selectors,const xmlNode * node,const lxb_css_selector_list_t * list)462 lxb_selectors_state_run(lxb_selectors_t *selectors, const xmlNode *node,
463 const lxb_css_selector_list_t *list)
464 {
465 lxb_selectors_entry_t *entry;
466
467 entry = selectors->current->entry;
468
469 entry->node = node;
470 selectors->state = lxb_selectors_state_find;
471 selectors->first = entry;
472
473 again:
474
475 do {
476 entry = selectors->state(selectors, entry);
477 }
478 while (entry != NULL);
479
480 if (selectors->current->parent != NULL
481 && selectors->status == LXB_STATUS_OK)
482 {
483 entry = selectors->current->entry;
484 selectors->state = selectors->current->return_state;
485
486 goto again;
487 }
488
489 return selectors->status;
490 }
491
492 static lxb_selectors_entry_t *
lxb_selectors_state_find(lxb_selectors_t * selectors,lxb_selectors_entry_t * entry)493 lxb_selectors_state_find(lxb_selectors_t *selectors,
494 lxb_selectors_entry_t *entry)
495 {
496 const xmlNode *node;
497 lxb_selectors_entry_t *next;
498 const lxb_css_selector_t *selector;
499 const lxb_css_selector_anb_of_t *anb;
500 const lxb_css_selector_pseudo_t *pseudo;
501
502 selector = entry->selector;
503
504 if (selector->type == LXB_CSS_SELECTOR_TYPE_PSEUDO_CLASS_FUNCTION) {
505 pseudo = &selector->u.pseudo;
506
507 /* Optimizing. */
508
509 switch (pseudo->type) {
510 case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_CHILD:
511 case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_LAST_CHILD:
512 anb = pseudo->data;
513
514 if (anb->of != NULL) {
515 break;
516 }
517
518 goto godoit;
519
520 case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_OF_TYPE:
521 case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_LAST_OF_TYPE:
522 goto godoit;
523
524 default:
525 break;
526 }
527
528 if (entry->nested == NULL) {
529 next = lexbor_dobject_calloc(selectors->objs);
530
531 next->combinator = LXB_CSS_SELECTOR_COMBINATOR_CLOSE;
532
533 entry->nested = lexbor_dobject_calloc(selectors->nested);
534
535 entry->nested->entry = next;
536 entry->nested->parent = selectors->current;
537 }
538
539 selectors->state = lxb_selectors_state_pseudo_class_function;
540 selectors->current->last = entry;
541 selectors->current = entry->nested;
542
543 next = entry->nested->entry;
544 next->node = entry->node;
545
546 return next;
547 }
548
549 godoit:
550
551 switch (entry->combinator) {
552 case LXB_CSS_SELECTOR_COMBINATOR_DESCENDANT:
553 node = lxb_selectors_descendant(selectors, entry,
554 selector, entry->node);
555 break;
556
557 case LXB_CSS_SELECTOR_COMBINATOR_CLOSE:
558 node = lxb_selectors_close(selectors, entry,
559 selector, entry->node);
560 break;
561
562 case LXB_CSS_SELECTOR_COMBINATOR_CHILD:
563 node = lxb_selectors_child(selectors, entry,
564 selector, entry->node);
565 break;
566
567 case LXB_CSS_SELECTOR_COMBINATOR_SIBLING:
568 node = lxb_selectors_sibling(selectors, entry,
569 selector, entry->node);
570 break;
571
572 case LXB_CSS_SELECTOR_COMBINATOR_FOLLOWING:
573 node = lxb_selectors_following(selectors, entry,
574 selector, entry->node);
575 break;
576
577 case LXB_CSS_SELECTOR_COMBINATOR_CELL:
578 default:
579 selectors->status = LXB_STATUS_ERROR;
580 return NULL;
581 }
582
583 return lxb_selectors_state_find_check(selectors, node, selector, entry);
584 }
585
586 static lxb_selectors_entry_t *
lxb_selectors_state_find_check(lxb_selectors_t * selectors,const xmlNode * node,const lxb_css_selector_t * selector,lxb_selectors_entry_t * entry)587 lxb_selectors_state_find_check(lxb_selectors_t *selectors, const xmlNode *node,
588 const lxb_css_selector_t *selector,
589 lxb_selectors_entry_t *entry)
590 {
591 lxb_selectors_entry_t *next;
592 lxb_selectors_nested_t *current;
593
594 if (node == NULL) {
595
596 try_next:
597
598 if (entry->next == NULL) {
599
600 try_next_list:
601
602 if (selector->list->next == NULL) {
603 return NULL;
604 }
605
606 /*
607 * Try the following selectors from the selector list.
608 */
609
610 if (entry->following != NULL) {
611 entry->following->node = entry->node;
612
613 if (selectors->current->parent == NULL) {
614 selectors->first = entry->following;
615 }
616
617 return entry->following;
618 }
619
620 next = lexbor_dobject_calloc(selectors->objs);
621
622 next->combinator = LXB_CSS_SELECTOR_COMBINATOR_CLOSE;
623 next->selector = selector->list->next->last;
624 next->node = entry->node;
625
626 entry->following = next;
627
628 if (selectors->current->parent == NULL) {
629 selectors->first = next;
630 }
631
632 return next;
633 }
634
635 do {
636 entry = entry->next;
637
638 while (entry->combinator == LXB_CSS_SELECTOR_COMBINATOR_CLOSE) {
639 if (entry->next == NULL) {
640 selector = entry->selector;
641 goto try_next;
642 }
643
644 entry = entry->next;
645 }
646
647 switch (entry->combinator) {
648 case LXB_CSS_SELECTOR_COMBINATOR_DESCENDANT:
649 node = entry->node->parent;
650
651 if (node == NULL
652 || !CMP_NODE_TYPE(node, XML_ELEMENT_NODE))
653 {
654 node = NULL;
655 }
656
657 break;
658
659 case LXB_CSS_SELECTOR_COMBINATOR_FOLLOWING:
660 node = entry->node->prev;
661 break;
662
663 case LXB_CSS_SELECTOR_COMBINATOR_SIBLING:
664 case LXB_CSS_SELECTOR_COMBINATOR_CHILD:
665 case LXB_CSS_SELECTOR_COMBINATOR_CLOSE:
666 node = NULL;
667 break;
668
669 case LXB_CSS_SELECTOR_COMBINATOR_CELL:
670 default:
671 selectors->status = LXB_STATUS_ERROR;
672 return NULL;
673 }
674 }
675 while (node == NULL);
676
677 entry->node = node;
678
679 return entry;
680 }
681
682 if (selector->prev == NULL) {
683 current = selectors->current;
684
685 selectors->status = current->cb(current->entry->node,
686 selector->list->specificity,
687 current->ctx);
688
689 if ((selectors->options & LXB_SELECTORS_OPT_MATCH_FIRST) == 0
690 && current->parent == NULL)
691 {
692 if (selectors->status == LXB_STATUS_OK) {
693 entry = selectors->first;
694 goto try_next_list;
695 }
696 }
697
698 return NULL;
699 }
700
701 if (entry->prev == NULL) {
702 next = lexbor_dobject_calloc(selectors->objs);
703
704 next->combinator = selector->combinator;
705 next->selector = selector->prev;
706 next->node = node;
707
708 next->next = entry;
709 entry->prev = next;
710
711 return next;
712 }
713
714 entry->prev->node = node;
715
716 return entry->prev;
717 }
718
719 static lxb_selectors_entry_t *
lxb_selectors_state_pseudo_class_function(lxb_selectors_t * selectors,lxb_selectors_entry_t * entry)720 lxb_selectors_state_pseudo_class_function(lxb_selectors_t *selectors,
721 lxb_selectors_entry_t *entry)
722 {
723 const xmlNode *node, *base;
724 lxb_selectors_nested_t *current;
725 const lxb_css_selector_list_t *list;
726 lxb_css_selector_anb_of_t *anb;
727 const lxb_css_selector_pseudo_t *pseudo;
728
729 current = selectors->current;
730
731 base = lxb_selectors_next_node(current);
732 if (base == NULL) {
733 goto not_found;
734 }
735
736 pseudo = ¤t->parent->last->selector->u.pseudo;
737
738 switch (pseudo->type) {
739 case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_HAS:
740 list = (lxb_css_selector_list_t *) pseudo->data;
741 node = lxb_selectors_state_has_relative(base, list->first);
742
743 if (node == NULL) {
744 selectors->current = selectors->current->parent;
745 entry = selectors->current->last;
746
747 selectors->state = lxb_selectors_state_find;
748
749 return lxb_selectors_state_find_check(selectors, NULL,
750 entry->selector, entry);
751 }
752
753 current->root = base;
754
755 current->entry->selector = list->last;
756 current->entry->node = node;
757 current->return_state = lxb_selectors_state_after_find_has;
758 current->cb = lxb_selectors_cb_ok;
759 current->ctx = ¤t->found;
760 current->found = false;
761
762 selectors->state = lxb_selectors_state_find;
763
764 return entry;
765
766 case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_CURRENT:
767 case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_IS:
768 case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_WHERE:
769 current->entry->selector = ((lxb_css_selector_list_t *) pseudo->data)->last;
770 current->entry->node = base;
771 current->return_state = lxb_selectors_state_after_find;
772 current->cb = lxb_selectors_cb_ok;
773 current->ctx = ¤t->found;
774 current->found = false;
775
776 selectors->state = lxb_selectors_state_find;
777
778 return entry;
779
780 case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NOT:
781 current->entry->selector = ((lxb_css_selector_list_t *) pseudo->data)->last;
782 current->entry->node = base;
783 current->return_state = lxb_selectors_state_after_find;
784 current->cb = lxb_selectors_cb_not;
785 current->ctx = ¤t->found;
786 current->found = true;
787
788 selectors->state = lxb_selectors_state_find;
789
790 return entry;
791
792 case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_CHILD:
793 case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_LAST_CHILD:
794 anb = pseudo->data;
795
796 current->entry->selector = anb->of->last;
797 current->entry->node = base;
798 current->return_state = lxb_selectors_state_after_nth_child;
799 current->cb = lxb_selectors_cb_ok;
800 current->ctx = ¤t->found;
801 current->root = base;
802 current->index = 0;
803 current->found = false;
804
805 selectors->state = lxb_selectors_state_find;
806
807 return entry;
808
809 /*
810 * This one can only happen if the user has somehow messed up the
811 * selector.
812 */
813
814 case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_OF_TYPE:
815 case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_LAST_OF_TYPE:
816 case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_DIR:
817 case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_LANG:
818 case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_COL:
819 case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_LAST_COL:
820 default:
821 break;
822 }
823
824 not_found:
825
826 selectors->current = selectors->current->parent;
827 entry = selectors->current->last;
828
829 selectors->state = lxb_selectors_state_find;
830
831 return lxb_selectors_state_find_check(selectors, NULL,
832 entry->selector, entry);
833 }
834
835 static const xmlNode *
lxb_selectors_next_node(lxb_selectors_nested_t * main)836 lxb_selectors_next_node(lxb_selectors_nested_t *main)
837 {
838 const xmlNode *node = main->entry->node;
839
840 switch (main->parent->last->combinator) {
841 case LXB_CSS_SELECTOR_COMBINATOR_DESCENDANT:
842 case LXB_CSS_SELECTOR_COMBINATOR_CHILD:
843 if (node->parent == NULL
844 || !CMP_NODE_TYPE(node->parent, XML_ELEMENT_NODE))
845 {
846 return NULL;
847 }
848
849 return node->parent;
850
851 case LXB_CSS_SELECTOR_COMBINATOR_CLOSE:
852 return node;
853
854 case LXB_CSS_SELECTOR_COMBINATOR_SIBLING:
855 case LXB_CSS_SELECTOR_COMBINATOR_FOLLOWING:
856 node = node->prev;
857 break;
858
859 default:
860 return NULL;
861 }
862
863 while (node != NULL) {
864 if (CMP_NODE_TYPE(node, XML_ELEMENT_NODE)) {
865 break;
866 }
867
868 node = node->prev;
869 }
870
871 return node;
872 }
873
874 static const xmlNode *
lxb_selectors_state_has_relative(const xmlNode * node,const lxb_css_selector_t * selector)875 lxb_selectors_state_has_relative(const xmlNode *node,
876 const lxb_css_selector_t *selector)
877 {
878 const xmlNode *root = node;
879
880 switch (selector->combinator) {
881 case LXB_CSS_SELECTOR_COMBINATOR_DESCENDANT:
882 case LXB_CSS_SELECTOR_COMBINATOR_CHILD:
883 node = node->children;
884 break;
885
886 case LXB_CSS_SELECTOR_COMBINATOR_SIBLING:
887 case LXB_CSS_SELECTOR_COMBINATOR_FOLLOWING:
888 node = node->next;
889 break;
890
891 default:
892 return NULL;
893 }
894
895 while (node != NULL) {
896 if (CMP_NODE_TYPE(node, XML_ELEMENT_NODE)) {
897 break;
898 }
899
900 while (node !=root && node->next == NULL) {
901 node = node->parent;
902 }
903
904 if (node == root) {
905 return NULL;
906 }
907
908 node = node->next;
909 }
910
911 return node;
912 }
913
914 static lxb_selectors_entry_t *
lxb_selectors_state_after_find_has(lxb_selectors_t * selectors,lxb_selectors_entry_t * entry)915 lxb_selectors_state_after_find_has(lxb_selectors_t *selectors,
916 lxb_selectors_entry_t *entry)
917 {
918 const xmlNode *node;
919 lxb_selectors_entry_t *parent;
920 lxb_selectors_nested_t *current;
921
922 if (selectors->current->found) {
923 node = selectors->current->root;
924
925 selectors->current = selectors->current->parent;
926 parent = selectors->current->last;
927
928 selectors->state = lxb_selectors_state_find;
929
930 return lxb_selectors_state_find_check(selectors, node,
931 parent->selector, parent);
932 }
933
934 current = selectors->current;
935 node = entry->node;
936
937 switch (entry->selector->list->first->combinator) {
938 case LXB_CSS_SELECTOR_COMBINATOR_DESCENDANT:
939 if (node->children != NULL) {
940 node = node->children;
941 }
942 else {
943
944 next:
945
946 while (node != current->root && node->next == NULL) {
947 node = node->parent;
948 }
949
950 if (node == current->root) {
951 goto failed;
952 }
953
954 node = node->next;
955 }
956
957 if (!CMP_NODE_TYPE(node, XML_ELEMENT_NODE)) {
958 goto next;
959 }
960
961 break;
962
963 case LXB_CSS_SELECTOR_COMBINATOR_CHILD:
964 case LXB_CSS_SELECTOR_COMBINATOR_FOLLOWING:
965 node = node->next;
966
967 while (node != NULL && !CMP_NODE_TYPE(node, XML_ELEMENT_NODE)) {
968 node = node->next;
969 }
970
971 if (node == NULL) {
972 goto failed;
973 }
974
975 break;
976
977 case LXB_CSS_SELECTOR_COMBINATOR_SIBLING:
978 goto failed;
979
980 case LXB_CSS_SELECTOR_COMBINATOR_CLOSE:
981 case LXB_CSS_SELECTOR_COMBINATOR_CELL:
982 default:
983 selectors->status = LXB_STATUS_ERROR;
984 return NULL;
985 }
986
987 entry->node = node;
988 selectors->state = lxb_selectors_state_find;
989
990 return entry;
991
992 failed:
993
994 selectors->current = selectors->current->parent;
995 parent = selectors->current->last;
996
997 selectors->state = lxb_selectors_state_find;
998
999 return lxb_selectors_state_find_check(selectors, NULL,
1000 parent->selector, parent);
1001 }
1002
1003
1004 static lxb_selectors_entry_t *
lxb_selectors_state_after_find(lxb_selectors_t * selectors,lxb_selectors_entry_t * entry)1005 lxb_selectors_state_after_find(lxb_selectors_t *selectors,
1006 lxb_selectors_entry_t *entry)
1007 {
1008 const xmlNode *node;
1009 lxb_selectors_entry_t *parent;
1010 lxb_selectors_nested_t *current;
1011
1012 current = selectors->current;
1013
1014 if (current->found) {
1015 node = entry->node;
1016
1017 selectors->current = current->parent;
1018 parent = selectors->current->last;
1019
1020 selectors->state = lxb_selectors_state_find;
1021
1022 return lxb_selectors_state_find_check(selectors, node,
1023 parent->selector, parent);
1024 }
1025
1026 node = entry->node;
1027
1028 switch (current->parent->last->combinator) {
1029 case LXB_CSS_SELECTOR_COMBINATOR_DESCENDANT:
1030 if (node->parent != NULL
1031 && CMP_NODE_TYPE(node->parent, XML_ELEMENT_NODE))
1032 {
1033 node = node->parent;
1034 }
1035 else {
1036 node = NULL;
1037 }
1038
1039 break;
1040
1041 case LXB_CSS_SELECTOR_COMBINATOR_FOLLOWING:
1042 node = node->prev;
1043
1044 while (node != NULL && !CMP_NODE_TYPE(node, XML_ELEMENT_NODE)) {
1045 node = node->prev;
1046 }
1047
1048 break;
1049
1050 case LXB_CSS_SELECTOR_COMBINATOR_CHILD:
1051 case LXB_CSS_SELECTOR_COMBINATOR_SIBLING:
1052 case LXB_CSS_SELECTOR_COMBINATOR_CLOSE:
1053 node = NULL;
1054 break;
1055
1056 case LXB_CSS_SELECTOR_COMBINATOR_CELL:
1057 default:
1058 selectors->status = LXB_STATUS_ERROR;
1059 return NULL;
1060 }
1061
1062 if (node == NULL) {
1063 selectors->current = current->parent;
1064 parent = selectors->current->last;
1065
1066 selectors->state = lxb_selectors_state_find;
1067
1068 return lxb_selectors_state_find_check(selectors, node,
1069 parent->selector, parent);
1070 }
1071
1072 entry->node = node;
1073 selectors->state = lxb_selectors_state_find;
1074
1075 return entry;
1076 }
1077
1078 static lxb_selectors_entry_t *
lxb_selectors_state_after_nth_child(lxb_selectors_t * selectors,lxb_selectors_entry_t * entry)1079 lxb_selectors_state_after_nth_child(lxb_selectors_t *selectors,
1080 lxb_selectors_entry_t *entry)
1081 {
1082 bool found;
1083 const xmlNode *node;
1084 lxb_selectors_entry_t *parent;
1085 lxb_selectors_nested_t *current;
1086 const lxb_css_selector_t *selector;
1087 const lxb_css_selector_pseudo_t *pseudo;
1088
1089 current = selectors->current;
1090 selector = current->parent->last->selector;
1091 pseudo = &selector->u.pseudo;
1092
1093 node = entry->node;
1094
1095 if (current->found) {
1096 current->index += 1;
1097 }
1098 else if (current->root == node) {
1099 node = NULL;
1100 goto done;
1101 }
1102
1103 if (pseudo->type == LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_CHILD) {
1104 node = node->prev;
1105
1106 while (node != NULL) {
1107 if (CMP_NODE_TYPE(node, XML_ELEMENT_NODE)) {
1108 break;
1109 }
1110
1111 node = node->prev;
1112 }
1113 }
1114 else {
1115 node = node->next;
1116
1117 while (node != NULL) {
1118 if (CMP_NODE_TYPE(node, XML_ELEMENT_NODE)) {
1119 break;
1120 }
1121
1122 node = node->next;
1123 }
1124 }
1125
1126 if (node == NULL) {
1127 goto done;
1128 }
1129
1130 entry->node = node;
1131 current->found = false;
1132 selectors->state = lxb_selectors_state_find;
1133
1134 return entry;
1135
1136 done:
1137
1138 if (current->index > 0) {
1139 found = lxb_selectors_anb_calc(pseudo->data, current->index);
1140
1141 node = (found) ? current->root : NULL;
1142 }
1143
1144 selectors->state = lxb_selectors_state_find;
1145 selectors->current = selectors->current->parent;
1146
1147 parent = selectors->current->last;
1148
1149 return lxb_selectors_state_find_check(selectors, node,
1150 parent->selector, parent);
1151 }
1152
1153 static bool
lxb_selectors_match(lxb_selectors_t * selectors,lxb_selectors_entry_t * entry,const lxb_css_selector_t * selector,const xmlNode * node)1154 lxb_selectors_match(lxb_selectors_t *selectors, lxb_selectors_entry_t *entry,
1155 const lxb_css_selector_t *selector, const xmlNode *node)
1156 {
1157 switch (selector->type) {
1158 case LXB_CSS_SELECTOR_TYPE_ANY:
1159 return true;
1160
1161 case LXB_CSS_SELECTOR_TYPE_ELEMENT:
1162 return lxb_selectors_match_element(selector, node, entry);
1163
1164 case LXB_CSS_SELECTOR_TYPE_ID:
1165 return lxb_selectors_match_id(selector, node, selectors->options & LXB_SELECTORS_OPT_QUIRKS_MODE);
1166
1167 case LXB_CSS_SELECTOR_TYPE_CLASS: {
1168 const xmlAttr *dom_attr = lxb_selectors_adapted_attr(node, (const lxb_char_t *) "class");
1169 if (dom_attr == NULL) {
1170 return false;
1171 }
1172
1173 dom_lxb_str_wrapper trg = lxb_selectors_adapted_attr_value(dom_attr);
1174
1175 if (trg.str.length == 0) {
1176 dom_lxb_str_wrapper_release(&trg);
1177 return false;
1178 }
1179
1180 bool ret = lxb_selectors_match_class(&trg.str,
1181 &selector->name, selectors->options & LXB_SELECTORS_OPT_QUIRKS_MODE);
1182 dom_lxb_str_wrapper_release(&trg);
1183 return ret;
1184 }
1185
1186 case LXB_CSS_SELECTOR_TYPE_ATTRIBUTE:
1187 return lxb_selectors_match_attribute(selector, node, entry);
1188
1189 case LXB_CSS_SELECTOR_TYPE_PSEUDO_CLASS:
1190 return lxb_selectors_pseudo_class(selector, node);
1191
1192 case LXB_CSS_SELECTOR_TYPE_PSEUDO_CLASS_FUNCTION:
1193 return lxb_selectors_pseudo_class_function(selector, node);
1194
1195 case LXB_CSS_SELECTOR_TYPE_PSEUDO_ELEMENT:
1196 return lxb_selectors_pseudo_element(selector, node);
1197
1198 case LXB_CSS_SELECTOR_TYPE_PSEUDO_ELEMENT_FUNCTION:
1199 return false;
1200
1201 EMPTY_SWITCH_DEFAULT_CASE();
1202 }
1203
1204 return false;
1205 }
1206
1207 static bool
lxb_selectors_match_element(const lxb_css_selector_t * selector,const xmlNode * node,lxb_selectors_entry_t * entry)1208 lxb_selectors_match_element(const lxb_css_selector_t *selector,
1209 const xmlNode *node, lxb_selectors_entry_t *entry)
1210 {
1211 lxb_selectors_adapted_set_entry_id(entry, selector, node);
1212 return lxb_selectors_adapted_cmp_local_name_id(node, &entry->id);
1213 }
1214
1215 static bool
lxb_selectors_match_id(const lxb_css_selector_t * selector,const xmlNode * node,bool quirks)1216 lxb_selectors_match_id(const lxb_css_selector_t *selector, const xmlNode *node, bool quirks)
1217 {
1218 const xmlAttr *dom_attr = lxb_selectors_adapted_attr(node, (const lxb_char_t *) "id");
1219 if (dom_attr == NULL) {
1220 return false;
1221 }
1222
1223 const lexbor_str_t *src = &selector->name;
1224 dom_lxb_str_wrapper trg = lxb_selectors_adapted_attr_value(dom_attr);
1225 bool ret = false;
1226 if (trg.str.length == src->length) {
1227 if (quirks) {
1228 ret = lexbor_str_data_ncasecmp(trg.str.data, src->data, src->length);
1229 } else {
1230 ret = lexbor_str_data_ncmp(trg.str.data, src->data, src->length);
1231 }
1232 }
1233 dom_lxb_str_wrapper_release(&trg);
1234
1235 return ret;
1236 }
1237
1238 static bool
lxb_selectors_match_class(const lexbor_str_t * target,const lexbor_str_t * src,bool quirks)1239 lxb_selectors_match_class(const lexbor_str_t *target, const lexbor_str_t *src,
1240 bool quirks)
1241 {
1242 lxb_char_t chr;
1243
1244 if (target->length < src->length) {
1245 return false;
1246 }
1247
1248 bool is_it = false;
1249
1250 const lxb_char_t *data = target->data;
1251 const lxb_char_t *pos = data;
1252 const lxb_char_t *end = data + target->length;
1253
1254 for (; data < end; data++) {
1255 chr = *data;
1256
1257 if (lexbor_utils_whitespace(chr, ==, ||)) {
1258
1259 if ((size_t) (data - pos) == src->length) {
1260 if (quirks) {
1261 is_it = lexbor_str_data_ncasecmp(pos, src->data, src->length);
1262 }
1263 else {
1264 is_it = lexbor_str_data_ncmp(pos, src->data, src->length);
1265 }
1266
1267 if (is_it) {
1268 return true;
1269 }
1270 }
1271
1272 if ((size_t) (end - data) < src->length) {
1273 return false;
1274 }
1275
1276 pos = data + 1;
1277 }
1278 }
1279
1280 if ((size_t) (end - pos) == src->length && src->length != 0) {
1281 if (quirks) {
1282 is_it = lexbor_str_data_ncasecmp(pos, src->data, src->length);
1283 }
1284 else {
1285 is_it = lexbor_str_data_ncmp(pos, src->data, src->length);
1286 }
1287 }
1288
1289 return is_it;
1290 }
1291
1292 static bool
lxb_selectors_match_attribute_value(const lxb_css_selector_attribute_t * attr,const lexbor_str_t * trg,const lexbor_str_t * src)1293 lxb_selectors_match_attribute_value(const lxb_css_selector_attribute_t *attr, const lexbor_str_t *trg, const lexbor_str_t *src)
1294 {
1295 bool res;
1296 bool ins = attr->modifier == LXB_CSS_SELECTOR_MODIFIER_I;
1297
1298 switch (attr->match) {
1299 case LXB_CSS_SELECTOR_MATCH_EQUAL: /* = */
1300 if (trg->length == src->length) {
1301 if (ins) {
1302 return lexbor_str_data_ncasecmp(trg->data, src->data,
1303 src->length);
1304 }
1305
1306 return lexbor_str_data_ncmp(trg->data, src->data,
1307 src->length);
1308 }
1309
1310 return false;
1311
1312 case LXB_CSS_SELECTOR_MATCH_INCLUDE: /* ~= */
1313 return lxb_selectors_match_class(trg, src, ins);
1314
1315 case LXB_CSS_SELECTOR_MATCH_DASH: /* |= */
1316 if (trg->length == src->length) {
1317 if (ins) {
1318 return lexbor_str_data_ncasecmp(trg->data, src->data,
1319 src->length);
1320 }
1321
1322 return lexbor_str_data_ncmp(trg->data, src->data,
1323 src->length);
1324 }
1325
1326 if (trg->length > src->length) {
1327 if (ins) {
1328 res = lexbor_str_data_ncasecmp(trg->data,
1329 src->data, src->length);
1330 }
1331 else {
1332 res = lexbor_str_data_ncmp(trg->data,
1333 src->data, src->length);
1334 }
1335
1336 if (res && trg->data[src->length] == '-') {
1337 return true;
1338 }
1339 }
1340
1341 return false;
1342
1343 case LXB_CSS_SELECTOR_MATCH_PREFIX: /* ^= */
1344 if (src->length != 0 && trg->length >= src->length) {
1345 if (ins) {
1346 return lexbor_str_data_ncasecmp(trg->data, src->data,
1347 src->length);
1348 }
1349
1350 return lexbor_str_data_ncmp(trg->data, src->data,
1351 src->length);
1352 }
1353
1354 return false;
1355
1356 case LXB_CSS_SELECTOR_MATCH_SUFFIX: /* $= */
1357 if (src->length != 0 && trg->length >= src->length) {
1358 size_t dif = trg->length - src->length;
1359
1360 if (ins) {
1361 return lexbor_str_data_ncasecmp(trg->data + dif,
1362 src->data, src->length);
1363 }
1364
1365 return lexbor_str_data_ncmp(trg->data + dif, src->data,
1366 src->length);
1367 }
1368
1369 return false;
1370
1371 case LXB_CSS_SELECTOR_MATCH_SUBSTRING: /* *= */
1372 if (src->length == 0) {
1373 return false;
1374 }
1375
1376 if (ins) {
1377 return lexbor_str_data_ncasecmp_contain(trg->data, trg->length,
1378 src->data, src->length);
1379 }
1380
1381 return lexbor_str_data_ncmp_contain(trg->data, trg->length,
1382 src->data, src->length);
1383 EMPTY_SWITCH_DEFAULT_CASE();
1384 }
1385
1386 return false;
1387 }
1388
1389 static bool
lxb_selectors_match_attribute(const lxb_css_selector_t * selector,const xmlNode * node,lxb_selectors_entry_t * entry)1390 lxb_selectors_match_attribute(const lxb_css_selector_t *selector,
1391 const xmlNode *node, lxb_selectors_entry_t *entry)
1392 {
1393 const lxb_css_selector_attribute_t *attr = &selector->u.attribute;
1394
1395 lxb_selectors_adapted_set_entry_id(entry, selector, node);
1396
1397 const xmlAttr *dom_attr = lxb_selectors_adapted_attr(node, entry->id.name);
1398 if (dom_attr == NULL) {
1399 return false;
1400 }
1401
1402 const lexbor_str_t *src = &attr->value;
1403 if (src->data == NULL) {
1404 return true;
1405 }
1406
1407 dom_lxb_str_wrapper trg = lxb_selectors_adapted_attr_value(dom_attr);
1408 bool res = lxb_selectors_match_attribute_value(attr, &trg.str, src);
1409 dom_lxb_str_wrapper_release(&trg);
1410 return res;
1411 }
1412
1413 static bool
lxb_selectors_pseudo_class(const lxb_css_selector_t * selector,const xmlNode * node)1414 lxb_selectors_pseudo_class(const lxb_css_selector_t *selector,
1415 const xmlNode *node)
1416 {
1417 const lxb_css_selector_pseudo_t *pseudo = &selector->u.pseudo;
1418
1419 static const lxb_char_t checkbox[] = "checkbox";
1420 static const size_t checkbox_length = sizeof(checkbox) / sizeof(lxb_char_t) - 1;
1421
1422 static const lxb_char_t radio[] = "radio";
1423 static const size_t radio_length = sizeof(radio) / sizeof(lxb_char_t) - 1;
1424
1425 switch (pseudo->type) {
1426 case LXB_CSS_SELECTOR_PSEUDO_CLASS_ACTIVE:
1427 return false;
1428
1429 case LXB_CSS_SELECTOR_PSEUDO_CLASS_ANY_LINK:
1430 /* https://drafts.csswg.org/selectors/#the-any-link-pseudo */
1431 if (php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token)
1432 && (lxb_selectors_adapted_cmp_local_name_literal(node, "a")
1433 || lxb_selectors_adapted_cmp_local_name_literal(node, "area")))
1434 {
1435 return lxb_selectors_adapted_has_attr(node, "href");
1436 }
1437
1438 return false;
1439
1440 case LXB_CSS_SELECTOR_PSEUDO_CLASS_BLANK:
1441 if (!EG(exception)) {
1442 php_dom_throw_error_with_message(NOT_SUPPORTED_ERR, ":blank selector is not implemented because CSSWG has not yet decided its semantics (https://github.com/w3c/csswg-drafts/issues/1967)", true);
1443 }
1444 return false;
1445
1446 case LXB_CSS_SELECTOR_PSEUDO_CLASS_CHECKED:
1447 /* https://drafts.csswg.org/selectors/#checked */
1448 if (!php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token)) {
1449 return false;
1450 }
1451 if (lxb_selectors_adapted_cmp_local_name_literal(node, "input")) {
1452 const xmlAttr *dom_attr = lxb_selectors_adapted_attr(node, (const lxb_char_t *) "type");
1453 if (dom_attr == NULL) {
1454 return false;
1455 }
1456
1457 dom_lxb_str_wrapper str = lxb_selectors_adapted_attr_value(dom_attr);
1458 bool res = false;
1459
1460 if (str.str.length == 8) {
1461 if (lexbor_str_data_ncasecmp(checkbox, str.str.data, checkbox_length)) {
1462 res = lxb_selectors_adapted_has_attr(node, "checked");
1463 }
1464 }
1465 else if (str.str.length == 5) {
1466 if (lexbor_str_data_ncasecmp(radio, str.str.data, radio_length)) {
1467 res = lxb_selectors_adapted_has_attr(node, "checked");
1468 }
1469 }
1470
1471 dom_lxb_str_wrapper_release(&str);
1472
1473 return res;
1474 }
1475 else if(lxb_selectors_adapted_cmp_local_name_literal(node, "option")) {
1476 return lxb_selectors_adapted_has_attr(node, "selected");
1477 }
1478
1479 return false;
1480
1481 case LXB_CSS_SELECTOR_PSEUDO_CLASS_CURRENT:
1482 case LXB_CSS_SELECTOR_PSEUDO_CLASS_DEFAULT:
1483 return false;
1484
1485 case LXB_CSS_SELECTOR_PSEUDO_CLASS_DISABLED:
1486 return lxb_selectors_pseudo_class_disabled(node);
1487
1488 case LXB_CSS_SELECTOR_PSEUDO_CLASS_EMPTY:
1489 node = node->children;
1490
1491 while (node != NULL) {
1492 /* Following https://developer.mozilla.org/en-US/docs/Web/CSS/:empty, i.e. what currently happens in browsers,
1493 * not the CSS Selectors Level 4 Draft that no one implements yet. */
1494 if (!CMP_NODE_TYPE(node, XML_COMMENT_NODE) && !CMP_NODE_TYPE(node, XML_PI_NODE)) {
1495 return false;
1496 }
1497
1498 node = node->next;
1499 }
1500
1501 return true;
1502
1503 case LXB_CSS_SELECTOR_PSEUDO_CLASS_ENABLED:
1504 return !lxb_selectors_pseudo_class_disabled(node);
1505
1506 case LXB_CSS_SELECTOR_PSEUDO_CLASS_FIRST_CHILD:
1507 return lxb_selectors_pseudo_class_first_child(node);
1508
1509 case LXB_CSS_SELECTOR_PSEUDO_CLASS_FIRST_OF_TYPE:
1510 return lxb_selectors_pseudo_class_first_of_type(node);
1511
1512 case LXB_CSS_SELECTOR_PSEUDO_CLASS_FOCUS:
1513 break;
1514
1515 case LXB_CSS_SELECTOR_PSEUDO_CLASS_FOCUS_VISIBLE:
1516 break;
1517
1518 case LXB_CSS_SELECTOR_PSEUDO_CLASS_FOCUS_WITHIN:
1519 break;
1520
1521 case LXB_CSS_SELECTOR_PSEUDO_CLASS_FULLSCREEN:
1522 break;
1523
1524 case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUTURE:
1525 break;
1526
1527 case LXB_CSS_SELECTOR_PSEUDO_CLASS_HOVER:
1528 break;
1529
1530 case LXB_CSS_SELECTOR_PSEUDO_CLASS_IN_RANGE:
1531 break;
1532
1533 case LXB_CSS_SELECTOR_PSEUDO_CLASS_INDETERMINATE:
1534 break;
1535
1536 case LXB_CSS_SELECTOR_PSEUDO_CLASS_INVALID:
1537 break;
1538
1539 case LXB_CSS_SELECTOR_PSEUDO_CLASS_LAST_CHILD:
1540 return lxb_selectors_pseudo_class_last_child(node);
1541
1542 case LXB_CSS_SELECTOR_PSEUDO_CLASS_LAST_OF_TYPE:
1543 return lxb_selectors_pseudo_class_last_of_type(node);
1544
1545 case LXB_CSS_SELECTOR_PSEUDO_CLASS_LINK:
1546 /* https://html.spec.whatwg.org/multipage/semantics-other.html#selector-link */
1547 if (php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token)
1548 && (lxb_selectors_adapted_cmp_local_name_literal(node, "a")
1549 || lxb_selectors_adapted_cmp_local_name_literal(node, "area")))
1550 {
1551 return lxb_selectors_adapted_has_attr(node, "href");
1552 }
1553
1554 return false;
1555
1556 case LXB_CSS_SELECTOR_PSEUDO_CLASS_LOCAL_LINK:
1557 break;
1558
1559 case LXB_CSS_SELECTOR_PSEUDO_CLASS_ONLY_CHILD:
1560 return lxb_selectors_pseudo_class_first_child(node)
1561 && lxb_selectors_pseudo_class_last_child(node);
1562
1563 case LXB_CSS_SELECTOR_PSEUDO_CLASS_ONLY_OF_TYPE:
1564 return lxb_selectors_pseudo_class_first_of_type(node)
1565 && lxb_selectors_pseudo_class_last_of_type(node);
1566
1567 case LXB_CSS_SELECTOR_PSEUDO_CLASS_OPTIONAL:
1568 if (php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token)
1569 && (lxb_selectors_adapted_cmp_local_name_literal(node, "input")
1570 || lxb_selectors_adapted_cmp_local_name_literal(node, "select")
1571 || lxb_selectors_adapted_cmp_local_name_literal(node, "textarea")))
1572 {
1573 return !lxb_selectors_adapted_has_attr(node, "required");
1574 }
1575
1576 return false;
1577
1578 case LXB_CSS_SELECTOR_PSEUDO_CLASS_OUT_OF_RANGE:
1579 break;
1580
1581 case LXB_CSS_SELECTOR_PSEUDO_CLASS_PAST:
1582 break;
1583
1584 case LXB_CSS_SELECTOR_PSEUDO_CLASS_PLACEHOLDER_SHOWN:
1585 if (php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token)
1586 && (lxb_selectors_adapted_cmp_local_name_literal(node, "input")
1587 || lxb_selectors_adapted_cmp_local_name_literal(node, "textarea")))
1588 {
1589 return lxb_selectors_adapted_has_attr(node, "placeholder");
1590 }
1591
1592 return false;
1593
1594 case LXB_CSS_SELECTOR_PSEUDO_CLASS_READ_ONLY:
1595 return !lxb_selectors_pseudo_class_read_write(node);
1596
1597 case LXB_CSS_SELECTOR_PSEUDO_CLASS_READ_WRITE:
1598 return lxb_selectors_pseudo_class_read_write(node);
1599
1600 case LXB_CSS_SELECTOR_PSEUDO_CLASS_REQUIRED:
1601 if (php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token)
1602 && (lxb_selectors_adapted_cmp_local_name_literal(node, "input")
1603 || lxb_selectors_adapted_cmp_local_name_literal(node, "select")
1604 || lxb_selectors_adapted_cmp_local_name_literal(node, "textarea")))
1605 {
1606 return lxb_selectors_adapted_has_attr(node, "required");
1607 }
1608
1609 return false;
1610
1611 case LXB_CSS_SELECTOR_PSEUDO_CLASS_ROOT:
1612 return node->parent != NULL
1613 && (node->parent->type == XML_DOCUMENT_FRAG_NODE || node->parent->type == XML_DOCUMENT_NODE
1614 || node->parent->type == XML_HTML_DOCUMENT_NODE);
1615
1616 case LXB_CSS_SELECTOR_PSEUDO_CLASS_SCOPE:
1617 break;
1618
1619 case LXB_CSS_SELECTOR_PSEUDO_CLASS_TARGET:
1620 break;
1621
1622 case LXB_CSS_SELECTOR_PSEUDO_CLASS_TARGET_WITHIN:
1623 break;
1624
1625 case LXB_CSS_SELECTOR_PSEUDO_CLASS_USER_INVALID:
1626 break;
1627
1628 case LXB_CSS_SELECTOR_PSEUDO_CLASS_VALID:
1629 break;
1630
1631 case LXB_CSS_SELECTOR_PSEUDO_CLASS_VISITED:
1632 break;
1633
1634 case LXB_CSS_SELECTOR_PSEUDO_CLASS_WARNING:
1635 break;
1636 }
1637
1638 return false;
1639 }
1640
1641 static bool
lxb_selectors_pseudo_class_function(const lxb_css_selector_t * selector,const xmlNode * node)1642 lxb_selectors_pseudo_class_function(const lxb_css_selector_t *selector,
1643 const xmlNode *node)
1644 {
1645 size_t index;
1646 const xmlNode *base;
1647 const lxb_css_selector_pseudo_t *pseudo;
1648
1649 pseudo = &selector->u.pseudo;
1650
1651 switch (pseudo->type) {
1652 case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_CHILD:
1653 case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_LAST_CHILD:
1654 index = 0;
1655
1656 if (pseudo->type == LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_CHILD) {
1657 while (node != NULL) {
1658 if (lxb_selectors_adapted_is_matchable_child(node))
1659 {
1660 index++;
1661 }
1662
1663 node = node->prev;
1664 }
1665 }
1666 else {
1667 while (node != NULL) {
1668 if (lxb_selectors_adapted_is_matchable_child(node))
1669 {
1670 index++;
1671 }
1672
1673 node = node->next;
1674 }
1675 }
1676
1677 return lxb_selectors_anb_calc(pseudo->data, index);
1678
1679 case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_OF_TYPE:
1680 case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_LAST_OF_TYPE:
1681 index = 0;
1682 base = node;
1683
1684 if (pseudo->type == LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_OF_TYPE) {
1685 while (node != NULL) {
1686 if(lxb_selectors_adapted_is_matchable_child(node)
1687 && xmlStrEqual(node->name, base->name)
1688 && lxb_selectors_adapted_cmp_ns(node, base))
1689 {
1690 index++;
1691 }
1692
1693 node = node->prev;
1694 }
1695 }
1696 else {
1697 while (node != NULL) {
1698 if(lxb_selectors_adapted_is_matchable_child(node)
1699 && xmlStrEqual(node->name, base->name)
1700 && lxb_selectors_adapted_cmp_ns(node, base))
1701 {
1702 index++;
1703 }
1704
1705 node = node->next;
1706 }
1707 }
1708
1709 return lxb_selectors_anb_calc(pseudo->data, index);
1710
1711 case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_DIR:
1712 case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_LANG:
1713 case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_COL:
1714 case LXB_CSS_SELECTOR_PSEUDO_CLASS_FUNCTION_NTH_LAST_COL:
1715 default:
1716 break;
1717 }
1718
1719 return false;
1720 }
1721
1722 static bool
lxb_selectors_pseudo_element(const lxb_css_selector_t * selector,const xmlNode * node)1723 lxb_selectors_pseudo_element(const lxb_css_selector_t *selector,
1724 const xmlNode *node)
1725 {
1726 const lxb_css_selector_pseudo_t *pseudo = &selector->u.pseudo;
1727
1728 switch (pseudo->type) {
1729 case LXB_CSS_SELECTOR_PSEUDO_ELEMENT_AFTER:
1730 case LXB_CSS_SELECTOR_PSEUDO_ELEMENT_BACKDROP:
1731 case LXB_CSS_SELECTOR_PSEUDO_ELEMENT_BEFORE:
1732 case LXB_CSS_SELECTOR_PSEUDO_ELEMENT_FIRST_LETTER:
1733 case LXB_CSS_SELECTOR_PSEUDO_ELEMENT_FIRST_LINE:
1734 case LXB_CSS_SELECTOR_PSEUDO_ELEMENT_GRAMMAR_ERROR:
1735 case LXB_CSS_SELECTOR_PSEUDO_ELEMENT_INACTIVE_SELECTION:
1736 case LXB_CSS_SELECTOR_PSEUDO_ELEMENT_MARKER:
1737 case LXB_CSS_SELECTOR_PSEUDO_ELEMENT_PLACEHOLDER:
1738 case LXB_CSS_SELECTOR_PSEUDO_ELEMENT_SELECTION:
1739 case LXB_CSS_SELECTOR_PSEUDO_ELEMENT_SPELLING_ERROR:
1740 case LXB_CSS_SELECTOR_PSEUDO_ELEMENT_TARGET_TEXT:
1741 break;
1742 }
1743
1744 return false;
1745 }
1746
1747 /* https://html.spec.whatwg.org/multipage/semantics-other.html#concept-element-disabled */
1748 static bool
lxb_selectors_pseudo_class_disabled(const xmlNode * node)1749 lxb_selectors_pseudo_class_disabled(const xmlNode *node)
1750 {
1751 if (!php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token)) {
1752 return false;
1753 }
1754
1755 if (lxb_selectors_adapted_has_attr(node, "disabled")
1756 && (lxb_selectors_adapted_cmp_local_name_literal(node, "button")
1757 || lxb_selectors_adapted_cmp_local_name_literal(node, "input")
1758 || lxb_selectors_adapted_cmp_local_name_literal(node, "select")
1759 || lxb_selectors_adapted_cmp_local_name_literal(node, "textarea")
1760 || lxb_selectors_adapted_cmp_local_name_literal(node, "optgroup")
1761 || lxb_selectors_adapted_cmp_local_name_literal(node, "fieldset")))
1762 {
1763 return true;
1764 }
1765
1766 if (lxb_selectors_adapted_cmp_local_name_literal(node, "fieldset")) {
1767 const xmlNode *fieldset = node;
1768 node = node->parent;
1769
1770 while (node != NULL && lxb_selectors_adapted_is_matchable_child(node)) {
1771 /* node is a disabled fieldset that is an ancestor of fieldset */
1772 if (php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token)
1773 && lxb_selectors_adapted_cmp_local_name_literal(node, "fieldset")
1774 && lxb_selectors_adapted_has_attr(node, "disabled"))
1775 {
1776 /* Search first legend child and figure out if fieldset is a descendent from that. */
1777 const xmlNode *search_current = node->children;
1778 do {
1779 if (search_current->type == XML_ELEMENT_NODE
1780 && php_dom_ns_is_fast(search_current, php_dom_ns_is_html_magic_token)
1781 && lxb_selectors_adapted_cmp_local_name_literal(search_current, "legend")) {
1782 /* search_current is a legend element. */
1783 const xmlNode *inner_search_current = fieldset;
1784
1785 /* Disabled does not apply if fieldset is a descendant from search_current */
1786 do {
1787 if (inner_search_current == search_current) {
1788 return false;
1789 }
1790
1791 inner_search_current = inner_search_current->parent;
1792 } while (inner_search_current != NULL);
1793
1794 return true;
1795 }
1796
1797 search_current = search_current->next;
1798 } while (search_current != NULL);
1799 }
1800
1801 node = node->parent;
1802 }
1803 }
1804
1805 return false;
1806 }
1807
1808 static bool
lxb_selectors_pseudo_class_first_child(const xmlNode * node)1809 lxb_selectors_pseudo_class_first_child(const xmlNode *node)
1810 {
1811 node = node->prev;
1812
1813 while (node != NULL) {
1814 if (lxb_selectors_adapted_is_matchable_child(node))
1815 {
1816 return false;
1817 }
1818
1819 node = node->prev;
1820 }
1821
1822 return true;
1823 }
1824
1825 static bool
lxb_selectors_pseudo_class_first_of_type(const xmlNode * node)1826 lxb_selectors_pseudo_class_first_of_type(const xmlNode *node)
1827 {
1828 const xmlNode *root = node;
1829 node = node->prev;
1830
1831 while (node) {
1832 if (lxb_selectors_adapted_is_matchable_child(node)
1833 && xmlStrEqual(node->name, root->name)
1834 && lxb_selectors_adapted_cmp_ns(node, root))
1835 {
1836 return false;
1837 }
1838
1839 node = node->prev;
1840 }
1841
1842 return true;
1843 }
1844
1845 static bool
lxb_selectors_pseudo_class_last_child(const xmlNode * node)1846 lxb_selectors_pseudo_class_last_child(const xmlNode *node)
1847 {
1848 node = node->next;
1849
1850 while (node != NULL) {
1851 if (lxb_selectors_adapted_is_matchable_child(node))
1852 {
1853 return false;
1854 }
1855
1856 node = node->next;
1857 }
1858
1859 return true;
1860 }
1861
1862 static bool
lxb_selectors_pseudo_class_last_of_type(const xmlNode * node)1863 lxb_selectors_pseudo_class_last_of_type(const xmlNode *node)
1864 {
1865 const xmlNode *root = node;
1866 node = node->next;
1867
1868 while (node) {
1869 if (lxb_selectors_adapted_is_matchable_child(node)
1870 && xmlStrEqual(node->name, root->name)
1871 && lxb_selectors_adapted_cmp_ns(node, root))
1872 {
1873 return false;
1874 }
1875
1876 node = node->next;
1877 }
1878
1879 return true;
1880 }
1881
1882 /* https://drafts.csswg.org/selectors/#rw-pseudos */
1883 static bool
lxb_selectors_pseudo_class_read_write(const xmlNode * node)1884 lxb_selectors_pseudo_class_read_write(const xmlNode *node)
1885 {
1886 if (php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token)) {
1887 if (lxb_selectors_adapted_cmp_local_name_literal(node, "input")
1888 || lxb_selectors_adapted_cmp_local_name_literal(node, "textarea")) {
1889 return !lxb_selectors_adapted_has_attr(node, "readonly") && !lxb_selectors_adapted_has_attr(node, "disabled");
1890 } else {
1891 const xmlAttr *attr = lxb_selectors_adapted_attr(node, (const lxb_char_t *) "contenteditable");
1892 return attr && !dom_compare_value(attr, BAD_CAST "false");
1893 }
1894 }
1895
1896 return false;
1897 }
1898
1899 static bool
lxb_selectors_anb_calc(const lxb_css_selector_anb_of_t * anb,size_t index)1900 lxb_selectors_anb_calc(const lxb_css_selector_anb_of_t *anb, size_t index)
1901 {
1902 double num;
1903
1904 if (anb->anb.a == 0) {
1905 if (anb->anb.b >= 0 && (size_t) anb->anb.b == index) {
1906 return true;
1907 }
1908 }
1909 else {
1910 num = ((double) index - (double) anb->anb.b) / (double) anb->anb.a;
1911
1912 if (num >= 0.0f && (num - trunc(num)) == 0.0f) {
1913 return true;
1914 }
1915 }
1916
1917 return false;
1918 }
1919
1920 static lxb_status_t
lxb_selectors_cb_ok(const xmlNode * node,lxb_css_selector_specificity_t spec,void * ctx)1921 lxb_selectors_cb_ok(const xmlNode *node,
1922 lxb_css_selector_specificity_t spec, void *ctx)
1923 {
1924 *((bool *) ctx) = true;
1925 return LXB_STATUS_OK;
1926 }
1927
1928 static lxb_status_t
lxb_selectors_cb_not(const xmlNode * node,lxb_css_selector_specificity_t spec,void * ctx)1929 lxb_selectors_cb_not(const xmlNode *node,
1930 lxb_css_selector_specificity_t spec, void *ctx)
1931 {
1932 *((bool *) ctx) = false;
1933 return LXB_STATUS_OK;
1934 }
1935