File: news_parser_4.php

Recommend this page to a friend!
news_parser_4.php
File:	`news_parser_4.php`
Role:	???
Content type:	`text/plain`
Description:	This contains the parser and article classes.. please use morever_api.php it is the updated version of this class with channel and category support! Thanks Mike
Class:	news_parser_4
Author:	By Carter Comunale
Last change:
Date:	23 years ago
Size:	`6,399 bytes`
Download
<?

/*
        File name: news_parser_4.php - the 4 indicates that php 4 or better is required. php3 port?
        Classes: article and news_xml_parser.
        Purpose: These two classes are intended to be used with the moreover.com news feed site.
                        article is a simple object that represents a single news feed.
                        news_xml_parser is an xml parser that creates article objects from the moreover.com xml news feed.

        ToDo: Well, there is a mess of stuff that I could add to this, but for now I am just leave it. You can add to it.
                   It would be nice to have more configuration stuff for the url that gets passed in.
                   moreover offers a ton of options, it would be nice if I handled it better.
                   check out     http://w.moreover.com/dev/custom/
                   for details on building feed urls to pass the parser. They have category support in addition to the keyword
                   stuff I used in my example code (show_news.php) which should be with this file.

        Author: Carter Comunale   (carter@brasscity.com) comments and suggestions are welcome.
        Date: 07/04/2001 (the 4th of July!)
        Modified Last By:         <your name here>
        Modified Last Date:      <the date you changed it>
        Note: Feel free to do whatever you want with this code, however, if you do change it making it better send me a note.
                  I would like to know what you did :)

So you want to see it work? copy an past this url
http://agn3.dhs.org/~carter/show_news.php?search_str=linux&action=search&search=search

*/

// simple class to hold our news feed articles that we build
class article {
    var $article_id;
    var $url;
    var $headline_text;
    var $source;
    var $media_type;
    var $cluster;
    var $tagline;
    var $document_url;
    var $harvest_time;
    var $access_registration;
    var $access_status;

    function article() {
        // do nothing for now just be nice oo style.
    }
}

class news_xml_parser {
    var $xml_file;
    var $type;
    var $xml_parser;
    var $news_objects;
    var $current_tag;
    var $current_article;

    function news_xml_parser($xml_file) { // constructor
        $this->xml_file = $xml_file;
        $this->type = 'UTF-8';
        $this->parser = xml_parser_create($this->type);
        xml_parser_set_option($this->parser, XML_OPTION_CASE_FOLDING, true);
        xml_parser_set_option($this->parser, XML_OPTION_TARGET_ENCODING, 'UTF-8');
        xml_set_element_handler($this->parser,"tag_open","tag_close");
        xml_set_character_data_handler($this->parser,"cdata");
    }

    function parse() {
        xml_set_object($this->parser,&$this);
        if (!($fp = fopen($this->xml_file, 'r'))) {
            echo "Could not open $xml_file for parsing!\n";
        }
        while ($data = fread($fp, 4096)) {
            if (!($data = utf8_encode($data))) {
                echo 'ERROR'."\n";
            }
            if (!xml_parse($this->parser, $data, feof($fp))) {
                die(sprintf( "XML error: %s at line %d\n\n",
                xml_error_string(xml_get_error_code($this->parser)),
                xml_get_current_line_number($this->parser)));
            }
        }
    }

    function tag_open($parser,$tag,$attributes) {
        //var_dump($parser,$tag,$attributes);
	$this->current_tag = $tag;
        switch ($tag) {
		case "MOREOVERNEWS":  // this tag means we are at the start of a new xml file, create the array to hold the objects created
		$this->news_objects = array (" ");
		array_pop($this->news_objects);
		break;

                case "ARTICLE": // when we get this tag, create a new article object
		$this->current_article = new article();
		break;
	}

    }

    function cdata($parser,$cdata) {
        //var_dump($parser,$cdata);

        switch ($this->current_tag) {

                case "URL":
	        if (!$this->current_article->url) {
			$this->current_article->url = $cdata;
		}
		break;

                case "HEADLINE_TEXT":
                if (!$this->current_article->headline_text) {
			$this->current_article->headline_text = $cdata;
                }
                break;

                case "SOURCE":
                if (!$this->current_article->source) {
	                $this->current_article->source = $cdata;
                }
                break;

                case "MEDIA_TYPE":
                if (!$this->current_article->media_type) {
	                $this->current_article->media_type = $cdata;
                }
                break;

                case "CLUSTER":
                if (!$this->current_article->cluster) {
	                $this->current_article->cluster = $cdata;
                }
                break;

                case "TAGLINE":
                if (!$this->current_article->tagline) {
	                $this->current_article->tagline = $cdata;
                }
                break;

                case "DOCUMENT_URL":
                if (!$this->current_article->document_url) {
	                $this->current_article->document_url = $cdata;
                }
                break;

                case "HARVEST_TIME":
                if (!$this->current_article->harvest_time) {
	                $this->current_article->harvest_time = $cdata;
                }
                break;

                case "ACCESS_REGISTRATION":
                if (!$this->current_article->access_registration) {
	                $this->current_article->access_registration = $cdata;
                }
                break;

                case "ACCESS_STATUS":
                if (!$this->current_article->access_status) {
			$this->current_article->access_status = $cdata;
                }
                break;

	}
    }

    function tag_close($parser,$tag) {
        //var_dump($parser,$tag);

	switch ($tag) {

                case "ARTICLE": // when we get this tag, we are done with thee current object, insert it into the arrray.
		array_push($this->news_objects, $this->current_article);
                break;
	}

    }

    function free_parser() {
        xml_parser_free($this->parser);
    }
}
?>
About us
Advertise on this site
For more information send a message to info at phpclasses dot org.
File: news_parser_4.php

Contents