xref: /web-master/scripts/pregen_news (revision 0e511803)
1<?php /* vim: set noet ts=4 sw=4 ft=php: : */
2
3define("XMLNS_ATOM",  "http://www.w3.org/2005/Atom");
4define("XMLNS_XHTML", "http://www.w3.org/1999/xhtml");
5define("XMLNS_PHP",   "http://php.net/ns/news");
6
7function date_sort($a, $b) {
8	return $a["updated"] == $b["updated"] ? 0 : (strtotime($a["updated"]) > strtotime($b["updated"]) ? -1 : 1);
9}
10
11$XML_OPTIONS =  LIBXML_COMPACT | LIBXML_NOBLANKS | LIBXML_NOCDATA | LIBXML_NSCLEAN | LIBXML_XINCLUDE;
12
13function pregen_atom($feed, $feedDest, $newsDest) {
14	$dom = new DOMDocument;
15	$dom->preserveWhiteSpace = false;
16	$dom->formatOutput = true;
17
18	if (!$dom->load($feed, $GLOBALS["XML_OPTIONS"])) {
19		trigger_error("News feed b0rked!", E_USER_WARNING);
20		return;
21	}
22	if (!$dom->xinclude()) {
23		trigger_error("News items b0rked!", E_USER_WARNING);
24		return;
25	}
26
27	$timestamps = [];
28	foreach ($dom->getElementsByTagName("updated") as $node) {
29		$timestamps[] = strtotime($node->nodeValue, $_SERVER["REQUEST_TIME"]);
30	}
31	$updated = max($timestamps);
32	$upnode = $dom->createElement("updated", date(DATE_ATOM, $updated));
33	$dom->documentElement->insertBefore($upnode, $dom->getElementsByTagName("link")->item(0));
34
35	$feed = $dom->saveXML();
36	$entries = format_atom_feed($feed);
37
38	$write = '<?php $NEWS_ENTRIES = ' . var_export($entries, 1) . ';';
39	file_put_contents($newsDest, $write);
40	file_put_contents($feedDest, $feed);
41}
42
43function format_atom_feed($filename) {
44	$r = new XMLReader;
45	$r->XML($filename, "UTF-8");
46
47	$entries = [];
48	while($r->read()) {
49		if ($r->nodeType === XMLReader::ELEMENT && $r->name === "entry") {
50			$entries[] = $current = format_atom_entry($r);
51		}
52	}
53
54	return $entries;
55}
56
57// {{{ Parse the entry into array(element => value)
58function format_atom_entry($r) {
59	$retval = [];
60
61	while($r->read()) {
62		if ($r->nodeType !== XMLReader::ELEMENT) {
63			if ($r->nodeType === XMLReader::END_ELEMENT && $r->name === "entry") {
64				return $retval;
65			}
66			continue;
67		}
68
69		$name = $r->localName;
70		if ($r->namespaceURI === XMLNS_ATOM) {
71			switch($name) {
72			case "title":
73			case "id":
74			case "published":
75			case "updated":
76				$retval[$name] = $r->readString();
77				$retval[$name] = $r->readString();
78				break;
79
80			case "link":
81			case "category":
82				$retval[$name][] = format_attributes($r);
83				break;
84
85			case "content":
86				if ($r->hasAttributes) {
87					switch($r->getAttribute("type")) {
88					case "html":
89					case "xhtml":
90						$str = ltrim($r->readInnerXML());
91
92						// Remove the xmlns attribute
93						$search = ' xmlns="'.XMLNS_XHTML.'"';
94						if (($pos = strpos($str, $search)) !== false && $pos < 10) {
95							$str = substr_replace($str, "", $pos, strlen($search));
96						}
97
98						$retval[$name] = $str;
99						break;
100
101					case "text":
102						$retval[$name] = $r->readString();
103						break;
104					}
105				} else {
106					$retval[$name] = $r->readString();
107				}
108
109				$dom = new DOMDocument();
110				@$dom->loadHTML($retval[$name]);
111
112				$xpath = new DomXPath($dom);
113				$nodes = $xpath->query('//body/div/*');
114
115				if ($nodes !== FALSE) {
116					$content = '<div>';
117
118					$count = 0;
119					foreach ($nodes as $node) {
120						if ($count++ < 2) {
121							$content .= $dom->saveXML($node);
122						}
123					}
124
125					if ($count > 2) {
126						$permanentLink = get_link($retval, "via");
127						$content .= "<p class='fullArticleLink'><a href='$permanentLink' class>&hellip; read full article</a></p>";
128					}
129
130					$content .= '</div>';
131				} else {
132					$content = $retval[$name];
133				}
134				$retval["intro"] = $content;
135				break;
136			}
137		} elseif ($r->namespaceURI === XMLNS_PHP) {
138			switch($name) {
139			case "newsImage":
140				$retval[$name] = format_attributes($r);
141				$retval[$name]["content"] = $r->readString();
142				break;
143
144			case "finalTeaserDate":
145				$retval[$name] = $r->readString();
146				break;
147			}
148		}
149	}
150
151	return $retval;
152} // }}}
153
154// {{{ Return all attrs for current element as an array(attr-name => attr-value)
155function format_attributes($r) {
156	$retval = [];
157
158	if (!$r->hasAttributes) {
159		return $retval;
160	}
161
162	$r->moveToFirstAttribute();
163	do {
164		$retval[$r->localName] = $r->value;
165	} while($r->moveToNextAttribute());
166	$r->moveToElement();
167
168	return $retval;
169} // }}}
170
171function get_link($data, $rel = "alternate") {
172	foreach($data["link"] as $link) {
173		if ($link["rel"] == $rel) {
174			return $link["href"];
175		}
176	}
177	return "";
178}
179
180function legacy_rss($atom, $newsDest, $confDest) {
181	$sxe = new SimpleXMLElement($atom, $GLOBALS["XML_OPTIONS"], true);
182	$CONF = $CONF_ITEMS = $NEWS = $NEWS_ITEMS= "";
183	$links = [];
184
185	foreach($sxe->entry as $entry) {
186		$item = "";
187		$term  = (string)$entry->category["term"];
188		$type = $term != "frontpage" ? "conf" : "news";
189		GenerateRSSItem(
190			$links[$type][]["link"] = $entry->link["href"],
191			(string)$entry->title,
192			ProcessText($entry->content->asXML()),
193			date("Y-m-d", strtotime((string)$entry->updated)),
194			$type == "conf" ? ($term == "conferences" ? "conference" : $term): null,
195			$item
196		);
197		if($type == "conf") {
198			$CONF_ITEMS .= $item;
199		} else {
200			$NEWS_ITEMS .= $item;
201		}
202	}
203	GenerateRSSHeader($links["conf"], $CONF, "http://php.net/conferences/");
204	GenerateRSSHeader($links["news"], $NEWS, "http://php.net/");
205
206	$CONF .= $CONF_ITEMS;
207	$NEWS .= $NEWS_ITEMS;
208
209	GenerateRSSFooter($CONF);
210	GenerateRSSFooter($NEWS);
211
212	file_put_contents($newsDest, $NEWS);
213	file_put_contents($confDest, $CONF);
214}
215
216