PHP Classes

File: class.html_transform.php

Recommend this page to a friend!
  Classes of Keyvan Minoukadeh   HTTP Navigator   class.html_transform.php   Download  
File: class.html_transform.php
Role: ???
Content type: text/plain
Description: Simple example class for modifying HTML
Class: HTTP Navigator
Web fetching
Author: By
Last change:
Date: 22 years ago
Size: 7,147 bytes
 

Contents

Class file image Download
<?php // tab/indent == 4 spaces // $Date: 2002/04/21 03:08:15 $ // $Revision: 1.4 $ /** * HTML Transform class * * PHP class to modify HTML content. * * Note: I've quickly put this together for the * http_navigator example, it's not complete * and will change in future. * * @author Keyvan Minoukadeh <keyvan@k1m.com> * @version 0.1.2 alpha */ class html_transform { var $html_orig; var $html; var $base_url = ""; var $pass_through = ""; var $use_base_href = true; var $rewrite_url = true; // these vars would hold form details and frame details // (to be added when I get some more time) var $form = array(); var $frame = array(); /** * CONSTRUCTOR * * @param string $html_orig html content * @param string $base_url base url to rewrite relative urls with */ function html_transform($html_orig="", $base_url="") { $this->html_orig = trim($html_orig); $this->html = trim($html_orig); $this->set_base_url($base_url); } /** * Use base href * * If <base href..> tag found, use it as base url? (default: yes) * * @param bool $use_base_href */ function use_base_href($use_base_href=true) { $this->use_base_href = $use_base_href; return true; } /** * Set base URL * * This URL will be prepended to all relative URLs found * * @param string $base_url prepend this URL to all relative paths */ function set_base_url($base_url) { $base_url = trim($base_url); if ((strtolower(substr($base_url, 0, 7)) == "http://") || (strtolower(substr($base_url, 0, 8)) == "https://")) { $this->base_url = $base_url; if (substr_count($this->base_url, "/") > 2) { $this->base_url = substr($this->base_url, 0, (strrpos($this->base_url, "/")+1)); } else { $this->base_url = $this->base_url."/"; } } else { return false; } return true; } /** * Set pass through URL * * Should point to a script which can process the page, eg. * http://www.example.com/process.php?url= * * @param string $pass_through All URLs will pass through this script */ function set_pass_through($pass_through) { $this->pass_through = $pass_through; return true; } /** * Count string * * Returns number of times string found in html * * @param mixed $string string or array containing strings to find * @param bool $strip_tags strip html tags before counting? * @return mixed int containing number of matches, or associative array containing * subject as key and matches as value */ function count_string($string, $strip_tags=false) { $content = (($strip_tags) ? strip_tags($this->html) : $this->html); if (is_array($string)) { $found = array(); foreach ($string as $val) { $found["$val"] = substr_count($content, $val); } return $found; } else { return substr_count($content, $string); } } /** * Count word * * Returns number of times word (regex word boundary used) is found in html * * @param mixed $word string or array containing words to find * @param bool $strip_tags strip html tags before counting? * @return mixed int containing number of matches, or associative array containing * subject as key and matches as value */ function count_word($word, $strip_tags=false) { $content = (($strip_tags) ? strip_tags($this->html) : $this->html); if (is_array($word)) { $found = array(); foreach ($word as $val) { $val = trim($val); if (preg_match_all("/\\b".preg_quote($val)."\\b/i", $content, $matches)) { $found["$val"] = count($matches[0]); unset($matches); } else { $found["$val"] = 0; } } return $found; } else { if (preg_match_all("/\\b".preg_quote($word)."\\b/i", $content, $matches)) { return count($matches[0]); } else { return 0; } } } /** * Search and replace * * Search html file for 1st argument, replace with 2nd argument * * @param mixed $search string or array containing strings to find * @param mixed $replace string or array containing replacement string */ function search_replace($search, $replace) { $this->html = str_replace($search, $replace, $this->html); return true; } /** * Process html */ function process() { if ($this->use_base_href && preg_match("!<base.+?href\\s*=\\s*[\"']?(http://[^\"'>]+?)[\"']?.*?".">!i", $this->html, $matches)) { $this->set_base_url(trim($matches[1])); } if ($this->rewrite_url) { $this->html = preg_replace_callback('/<(a|area|img|link|frame|iframe|input)(.*?)(src|href)(\s?=\s?)("|\')?([^> \'"]+)/is', array($this, 'rewrite_url'), $this->html); } } /** * Rewrite URL */ function rewrite_url(&$modify) { $url = trim($modify[6]); $pre = "<".$modify[1].$modify[2].$modify[3].$modify[4].$modify[5]; $post = ""; $ret = $url; if ($url_split = @parse_url($url)) { // if mailto link, return unchanged if (strtolower(substr($url, 0, 7)) == "mailto:") { return $modify[0]; } // if scheme included if (isset($url_split["scheme"])) { if (strtolower($url_split["scheme"]) == "http") { $ret = $url; if (in_array(strtolower($modify[1]), array("a","frame","area","iframe"))) $ret = $this->add_passthrough($ret); return $pre.$ret.$post; } else { // return unchanged return $modify[0]; } } // if fragment only (#??) if (substr($url, 0, 1) == "#") { // return unchanged return $modify[0]; } // if scheme not included (relative) if (!empty($this->base_url)) { if (isset($url_split["path"])) { if (substr($url_split["path"], 0, 1) == "/") { $base_split = @parse_url($this->base_url); $ret = "http://".$base_split["host"].(isset($base_split["port"]) ? ":".$base_split["port"] : "").$url_split["path"]; $ret .= (!empty($url_split["query"]) ? "?".$url_split["query"] : ""); if (in_array(strtolower($modify[1]), array("a","frame","area","iframe"))) $ret = $this->add_passthrough($ret); return $pre.$ret.(isset($url_split["fragment"]) ? "#".$url_split["fragment"] : "").$post; } else { $ret = $this->base_url.$url_split["path"].(!empty($url_split["query"]) ? "?".$url_split["query"] : ""); if (in_array(strtolower($modify[1]), array("a","frame","area","iframe"))) $ret = $this->add_passthrough($ret); return $pre.$ret.(isset($url_split["fragment"]) ? "#".$url_split["fragment"] : "").$post; } } } } // return unchanged return $modify[0]; } /** * Add passthrough */ function add_passthrough($url) { if (!empty($this->pass_through)) { return $this->pass_through.urlencode(trim($url)); } else { return $url; } } /** * Add base tag */ function add_base($url) { $this->html = preg_replace('/<base[^>]+>/i', '', $this->html); $this->html = preg_replace('/<html[^>]*>/i', '<html><base href="'.$url.'" />', $this->html, 1); return true; } } ?>