xref: /PHP-5.5/ext/tidy/examples/urlgrab5.php (revision 2f4ca9a2)
1<?php
2    /*
3     * urlgrab5.php
4     *
5     * A simple command-line utility to extract all of the URLS contained
6     * within <A HREF> tags from a document.
7     *
8     * NOTE: Only works with tidy for PHP 5, please see urlgrab.php for tidy for PHP 4.3.x
9     *
10     * By: John Coggeshall <john@php.net>
11     *
12     * Usage: php urlgrab5.php <file>
13     *
14     */
15    function dump_nodes(tidyNode $node, &$urls = NULL) {
16
17	$urls = (is_array($urls)) ? $urls : array();
18
19	if(isset($node->id)) {
20	    if($node->id == TIDY_TAG_A) {
21		$urls[] = $node->attribute['href'];
22	    }
23	}
24
25	if($node->hasChildren()) {
26
27	    foreach($node->child as $c) {
28		dump_nodes($c, $urls);
29	    }
30
31	}
32
33	return $urls;
34    }
35
36    $a = tidy_parse_file($_SERVER['argv'][1]);
37    $a->cleanRepair();
38    print_r(dump_nodes($a->html()));
39?>
40