PHP Classes

File: example6.php

Recommend this page to a friend!
  Classes of Alexey G. Piyanin   HTML SAX Parser   example6.php   Download  
File: example6.php
Role: Example script
Content type: text/plain
Description: Example #6 (get wikipedia page content)
Class: HTML SAX Parser
Parse HTML documents using regular expressions
Author: By
Last change: change description
Date: 18 years ago
Size: 1,308 bytes
 

Contents

Class file image Download
<?
/*
Author: Alexey G. Piyanin (e-mail: drdrzlo at mail dot ru)
Date: Jun 7 2006
Title: Get wikipedia page content
*/
include('SAXParser.php');

function
character($str){
  global
$isComment,$startContent,$endContent,$commentPos;
  if(!
$startContent){
    if(
$isComment && trim($str)=='start content') $startContent=true;
  }else{
    if(
$isComment && trim($str)=='end content') { $endContent=$commentPos; /*return(-1);*/ }
  }
}

function
comment($start,$pos){
  global
$isComment,$startContent,$commentPos,$beginContent;
 
//----
 
if($startContent && !$start && $beginContent==0) $beginContent=$pos+3;
 
//----
 
$isComment=$start;
 
//----
 
$commentPos=$pos;
}

$URL = 'http://en.wikipedia.org/wiki/Kalimpong';
#---
$isComment = false;
$commentPos = 0;
$startContent = false;
#---
$beginContent = 0;
$endContent = 0;
#---
$parser = new HTML_SAXParser();
$parser->initFunc('','','character','comment');
#---
$content=join('',file($URL)); // ATTENTION!!! replace for correct loading content
?>
<html>
<body>
<center>Source page:<br><iframe src="<?=$URL?>" width="600" height="400" ></iframe><br><br></center>
Content:<br>
<?
$parser
->parseString($content);
//----
echo substr($content,$beginContent,$endContent-$beginContent);
?>
</body></html>