PHP Classes

File: news_parser_4.php

Recommend this page to a friend!
  Classes of Carter Comunale   news_parser_4   news_parser_4.php   Download  
File: news_parser_4.php
Role: ???
Content type: text/plain
Description: This contains the parser and article classes.. please use morever_api.php it is the updated version of this class with channel and category support! Thanks Mike
Class: news_parser_4
Author: By
Last change:
Date: 23 years ago
Size: 6,399 bytes
 

Contents

Class file image Download
<? /* File name: news_parser_4.php - the 4 indicates that php 4 or better is required. php3 port? Classes: article and news_xml_parser. Purpose: These two classes are intended to be used with the moreover.com news feed site. article is a simple object that represents a single news feed. news_xml_parser is an xml parser that creates article objects from the moreover.com xml news feed. ToDo: Well, there is a mess of stuff that I could add to this, but for now I am just leave it. You can add to it. It would be nice to have more configuration stuff for the url that gets passed in. moreover offers a ton of options, it would be nice if I handled it better. check out http://w.moreover.com/dev/custom/ for details on building feed urls to pass the parser. They have category support in addition to the keyword stuff I used in my example code (show_news.php) which should be with this file. Author: Carter Comunale (carter@brasscity.com) comments and suggestions are welcome. Date: 07/04/2001 (the 4th of July!) Modified Last By: <your name here> Modified Last Date: <the date you changed it> Note: Feel free to do whatever you want with this code, however, if you do change it making it better send me a note. I would like to know what you did :) So you want to see it work? copy an past this url http://agn3.dhs.org/~carter/show_news.php?search_str=linux&action=search&search=search */ // simple class to hold our news feed articles that we build class article { var $article_id; var $url; var $headline_text; var $source; var $media_type; var $cluster; var $tagline; var $document_url; var $harvest_time; var $access_registration; var $access_status; function article() { // do nothing for now just be nice oo style. } } class news_xml_parser { var $xml_file; var $type; var $xml_parser; var $news_objects; var $current_tag; var $current_article; function news_xml_parser($xml_file) { // constructor $this->xml_file = $xml_file; $this->type = 'UTF-8'; $this->parser = xml_parser_create($this->type); xml_parser_set_option($this->parser, XML_OPTION_CASE_FOLDING, true); xml_parser_set_option($this->parser, XML_OPTION_TARGET_ENCODING, 'UTF-8'); xml_set_element_handler($this->parser,"tag_open","tag_close"); xml_set_character_data_handler($this->parser,"cdata"); } function parse() { xml_set_object($this->parser,&$this); if (!($fp = fopen($this->xml_file, 'r'))) { echo "Could not open $xml_file for parsing!\n"; } while ($data = fread($fp, 4096)) { if (!($data = utf8_encode($data))) { echo 'ERROR'."\n"; } if (!xml_parse($this->parser, $data, feof($fp))) { die(sprintf( "XML error: %s at line %d\n\n", xml_error_string(xml_get_error_code($this->parser)), xml_get_current_line_number($this->parser))); } } } function tag_open($parser,$tag,$attributes) { //var_dump($parser,$tag,$attributes); $this->current_tag = $tag; switch ($tag) { case "MOREOVERNEWS": // this tag means we are at the start of a new xml file, create the array to hold the objects created $this->news_objects = array (" "); array_pop($this->news_objects); break; case "ARTICLE": // when we get this tag, create a new article object $this->current_article = new article(); break; } } function cdata($parser,$cdata) { //var_dump($parser,$cdata); switch ($this->current_tag) { case "URL": if (!$this->current_article->url) { $this->current_article->url = $cdata; } break; case "HEADLINE_TEXT": if (!$this->current_article->headline_text) { $this->current_article->headline_text = $cdata; } break; case "SOURCE": if (!$this->current_article->source) { $this->current_article->source = $cdata; } break; case "MEDIA_TYPE": if (!$this->current_article->media_type) { $this->current_article->media_type = $cdata; } break; case "CLUSTER": if (!$this->current_article->cluster) { $this->current_article->cluster = $cdata; } break; case "TAGLINE": if (!$this->current_article->tagline) { $this->current_article->tagline = $cdata; } break; case "DOCUMENT_URL": if (!$this->current_article->document_url) { $this->current_article->document_url = $cdata; } break; case "HARVEST_TIME": if (!$this->current_article->harvest_time) { $this->current_article->harvest_time = $cdata; } break; case "ACCESS_REGISTRATION": if (!$this->current_article->access_registration) { $this->current_article->access_registration = $cdata; } break; case "ACCESS_STATUS": if (!$this->current_article->access_status) { $this->current_article->access_status = $cdata; } break; } } function tag_close($parser,$tag) { //var_dump($parser,$tag); switch ($tag) { case "ARTICLE": // when we get this tag, we are done with thee current object, insert it into the arrray. array_push($this->news_objects, $this->current_article); break; } } function free_parser() { xml_parser_free($this->parser); } } ?>