/', $text, $matches)) { $link = "#" .$matches[1]; } elseif (preg_match('/<\?php\s+print_link\s*\("([^"]+)",\s*"[^"]+"\);\s*\?>/', $text, $matches)) { $link = $matches[1]; } elseif (preg_match('/<\?php\s+print_link\s*\(\'([^\']+)\',\s*\'[^\']+\'\);\s*\?>/', $text, $matches)) { $link = $matches[1]; } elseif (preg_match('/<\?php\s+print_link\s*\("([^"]+)",\s*make_image\s*\([^\)]*\)\s*\);\s*\?>/', $text, $matches)) { $link = $matches[1]; } elseif (preg_match('//', $text, $matches)) { $link = $matches[1]; } // Make sure it is a full URL if (!preg_match('/^http:/', $link)) { $rootLink = rtrim($rootLink, "/"); $link = ($link[0] != '/') ? "$rootLink/$link" : "$rootLink$link"; } return $link; } // Preserve parts in the text needed and drop out everything unsusable function ProcessText($text) { // Delete images, since this data will go through XML $text = preg_replace('/<\?php\s+echo\s+make_image\s*\("([^"]*)",\s*"([^"]*)",\s+"([^"]*)"\);\s*\?>/i', " ", $text); // Replace the links with links (use this or the one below) /* $text = preg_replace('/<\?php\s+print_link\s*\("([^"]+)",\s*"([^"]+)"\);\s*\?>/', "$2", $text); // /', "$2", $text); $text = preg_replace('/<\?php\s+print_link\s*\("([^"]+)",\s*make_image\s*\([^\)]*\)\s*\);\s*\?>/', "", $text); */ // Remove the hyperlink references (use this or the one above) $text = preg_replace('/<\?php\s+print_link\s*\("([^"]+)",\s*"([^"]+)"\);\s*\?>/', "$2", $text); $text = preg_replace('/<\?php\s+print_link\s*\(\'([^\']+)\',\s*\'([^\']+)\'\);\s*\?>/', "$2", $text); $text = preg_replace('/<\?php\s+print_link\s*\("([^"]+)",\s*make_image\s*\([^\)]*\)\s*\);\s*\?>/', "", $text); // Drop HTML, trim string and drop multiple spaces $text = trim(strip_tags($text)); return preg_replace("!\\s+!", " ", $text); } // Parse the index file searching for news item information function ParseNews ($index_page = "", $aboutLink) { // Remove commented items //$index_page = preg_replace("//", "", $index_page); // Split the file by newlines $lines = preg_split("/\n/", $index_page); #DEBUG# print "
"; print_r($lines); print "
"; // Define month conversion hash $mos = [ "Jan" => 1, "Feb" => 2, "Mar" => 3, "Apr" => 4, "May" => 5, "Jun" => 6, "Jul" => 7, "Aug" => 8, "Sep" => 9, "Oct" => 10, "Nov" => 11, "Dec" => 12 ]; // We have not started to parse the // news and we have no headlines right here $news_started = FALSE; $headlineid = 0; // Try to classify every line backed with state information // and patterns to recognize for news item elements foreach ($lines as $i => $line) { // We are not in a news item if (!$news_started) { // If we found theis comment, then we are at the right place if (strpos($line, "DO NOT REMOVE THIS COMMENT")) { $news_started = TRUE; } else { continue; } // We are in a news item } elseif ($news_started) { // Headline separator reached if (preg_match('!
!', $line)) { $headlineid++; #DEBUG# print "

Info: New Headline: $headlineid
"; // End of headlines reached } elseif (preg_match('@News Archive@', $line) || strpos($line, "// NO MORE NEWS TO PARSE") === 0) { array_pop($headlines); break; // The headline title is in

tags [it needs to be be on line line!] } elseif (preg_match('/

(.*)<\/h1>/i', $line, $matches)) { $headlines[$headlineid]['title'] = "$matches[1]"; #DEBUG# print "Title: $matches[1]
"; // Dates are below the headline title } elseif (preg_match('/\[(\d+)-(\S*)-(\d+)\]<\/span>/', $line, $matches)) { $headlines[$headlineid]['date'] = mktime(1,1,1, $mos[$matches[2]], $matches[1], $matches[3]); #DEBUG# print "Date: $matches[1] $matches[2] $matches[3]
"; // Subjects (i.e RDF category) } elseif (preg_match("//", $line, $matches)) { $headlines[$headlineid]['subject'] = $matches[1]; // Everything else is part of the headline text } else { if (!preg_match('/^\s*\?>\s*$/', $line)) { if (isset($headlines[$headlineid]['text'])) { $headlines[$headlineid]['text'] .= " $line"; } else { $headlines[$headlineid]['text'] = " $line"; } } } } } // Cycle through the headlines foreach ($headlines as $num => $headline) { // The first link found is THE link for the news item $headlines[$num]['link'] = scanLinks($headline['text'], $aboutLink); // And the text needs to be cleaned up $headlines[$num]['text'] = ProcessText($headline['text']); // And date needs to be reformatted $headlines[$num]['date'] = date("Y-m-d", $headline['date']); } return $headlines; } // Generate RSS header text and inject it into $RSS function GenerateRSSHeader($headlines, &$RSS, $aboutLink) { $RSS .= "<" . "?xml version=\"1.0\" encoding=\"utf-8\"?>\n" . "\n" . "\n" . "\tPHP: Hypertext Preprocessor\n" . "\t$aboutLink\n" . "\tThe PHP scripting language web site\n" . "\t\n" . "\t\t\n"; // Cycle through all the Resources on the RSS foreach ((array)$headlines as $headline) { $RSS .= "\t\t\t\n"; } $RSS .= "\t\t\n\t\n\n"; } // Add RSS footer information to $RSS function GenerateRSSFooter(&$RSS) { $RSS .= "\n"; } // Add an RSS item's information to $RSS function GenerateRSSItem($href, $title, $text, $date, $subject, &$RSS) { if($subject) { $s = "\t$subject\n"; } else { $s = ""; } $RSS .= "\n\n" . "\t$title\n" . "\t$href\n" . $s . "\t$text\n" . "\t" . $date . "\n" . "\n"; } function GenerateRSSFile($root, $aboutLink) { // Get the PHP.net index page's source code $homepage = getData("$root/index.php"); // This returns a data structure containing all the news items found $hlines = ParseNews($homepage, $aboutLink); // Start with an empty RSS string $RSS = ''; // Generate the RSS Header GenerateRSSHeader($hlines, $RSS, $aboutLink); // Add separator comment $RSS .= "\n"; // Add every news item to the feed foreach ($hlines as $hline) { GenerateRSSItem($hline['link'], $hline['title'], $hline['text'], $hline['date'], isset($hline['subject']) ? $hline['subject'] : false, $RSS); } // Add end separator $RSS .= "\n"; // Dump the last XML tag GenerateRSSFooter($RSS); #DEBUG# echo $RSS; return $RSS; } #$RSSNews = GenerateRSSFile($root, "http://php.net/"); #$RSSConf = GenerateRSSFile("$root/conferences", "http://php.net/conferences/"); ?>