php正则替换处理HTML页面的方法
author:一佰互联 2019-04-28   click:151

本文实例讲述了php正则替换处理HTML页面的方法。分享给大家供大家参考。具体如下:

<?phpif(!defined("BASEPATH")) exit("No direct script access allowed"); /** * HTML替换处理类,考虑如下几种替换 * 1. img src : "/<img(.+?)src=(["" ])?(.+?)([ >]+?)/i" * 2. a href : "/<a(.+?)href=(["" ])?(.+?)([ >]+?)/i" * 3. ifram.src : "/<iframe(.+?)src=(["" ])?(.+?)([ >]+?)/i" * 4. frame src : "/<frame(.+?)src=(["" ])?(.+?)([ >]+?)/i" * 5. js : "/window.open([( ]+?)(["" ]+?)(.+?)([ )+?])/i" * 6. css : "/background(.+?)url([( ])(["" ]+?)(.+?)([ )+?])/i" */ class Myreplace { private $moudle_array = array("udata","tdata","tresult","dresult"); private $content; private $relative_dirname; private $projectid; private $moudle; function __construct() {  $this->CI = &get_instance (); } /**  * 替换  * @param string $content HTML内容  * @param string $relative 相对路径  * @param int $projectid 项目id  * @moudle string $moudle 模板标识: udata,tdata,tresult,dresult  */ public function my_replace($content,$relative,$projectid,$moudle) {  $this->content = $content;  $this->relative_dirname = $relative;  $this->projectid = $projectid;  if(in_array(strtolower($moudle),$this->moudle_array))  $this->moudle = $moudle;  else exit;  switch($this->moudle) {  case "udata":   $this->CI->load->model("mupload_data","model");   break;  case "tdata":   $this->CI->load->model("taskdata","model");   break;  case "tresult":   $this->CI->load->model("taskresult","model");   break;  case "dresult":   $this->CI->load->model("dmsresult","model");   break;  default:   break;  }  $pattern = "/<img(.+?)src=(["" ])?(.+?)([ >]+?)/i";  $content = preg_replace_callback( $pattern, array($this, "image_replace") , $content );  $pattern = "/<a(.+?)href=(["" ])?(.+?)([ >]+?)/i";  $content = preg_replace_callback( $pattern, array($this, "html_replace") , $content );  $pattern = "/<iframe(.+?)src=(["" ])?(.+?)([ >]+?)/i";  $content = preg_replace_callback( $pattern, array($this, "iframe_replace") , $content );  $pattern = "/<frame(.+?)src=(["" ])?(.+?)([ >]+?)/i";   $content = preg_replace_callback( $pattern, array($this, "frame_replace"), $content );  $pattern = "/window.open([( ]+?)(["" ]+?)(.+?)([ )]+?)/i";  $content = preg_replace_callback( $pattern, array($this, "js_replace"), $content );  $pattern = "/background(.+?)url([( ])(["" ]+?)(.+?)([ )+?])/i";  $content = preg_replace_callback( $pattern, array($this, "css_replace"), $content);  return $content; } private function image_replace($matches) {  if(count($matches) < 4) return "";  if( empty($matches[3]) ) return "";  $matches[3] = rtrim($matches[3],"""/");  //获取图片的id  $parent_dir_num = substr_count( $matches[3], "../");  $relative_dirname = $this->relative_dirname;  for($i=0; $i<$parent_dir_num; $i++) {  $relative_dirname = substr( $relative_dirname, 0, strrpos($relative_dirname,"/") );  }  $relativepath = rtrim($relative_dirname,"/") . "/".ltrim($matches[3],"./");  $image_id = $this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);  //输出  if( !empty($image_id) ) {  if($this->moudle == "dresult") {   return "<img".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readpic/$image_id?pid=".$this->projectid .$matches[2]. $matches[4];  } else {   return "<img".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/picfile/$image_id?pid=".$this->projectid .$matches[2]. $matches[4];  }  } else {  return "<img".$matches[1]."src=".$matches[2].$matches[3].$matches[2].$matches[4];  } } private function html_replace( $matches ) {  if(count($matches) < 4) return "";  if( empty($matches[3]) ) return "";  //如果href的链接($matches[3])以http或www或mailto开始,则不进行处理  //if(preg_match("/^[http|www|mailto](.+?)/i",$matches[3]))   // return "<a".$matches[1]."href=".$matches[2].$matches[3].$matches[4];  $matches[3] = rtrim($matches[3],"""/");  //处理锚点  if(substr_count($matches[3],"#")>0)   $matches[3] = substr($matches[3],0,strrpos($matches[3],"#"));  //获取html的id  $parent_dir_num = substr_count( $matches[3], "../");  $relative_dirname = $this->relative_dirname;  for($i=0; $i<$parent_dir_num; $i++) {  $relative_dirname = substr( $relative_dirname, 0, strrpos($relative_dirname,"/") );  }  $relativepath = rtrim($relative_dirname,"/") . "/".ltrim($matches[3],"./");  $txtfile_id = $this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);  //输出  if( !empty($txtfile_id ) ) {  if($this->moudle == "dresult") {   return "<a".$matches[1]."href=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readfile/$txtfile_id?pid=".$this->projectid .$matches[2].$matches[4];  } else {   return "<a".$matches[1]."href=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/txtfile/$txtfile_id?pid=".$this->projectid .$matches[2].$matches[4];  }  } else {  return "<a".$matches[1]."href=".$matches[2].$matches[3].$matches[2].$matches[4];  } } private function iframe_replace( $matches ) {  if(count($matches) < 4) return "";  if( empty($matches[3]) ) return "";  $matches[3] = rtrim($matches[3],"""/");  //处理锚点  if(substr_count($matches[3],"#")>0)   $matches[3] = substr($matches[3],0,strrpos($matches[3],"#"));  //获取html的id  $parent_dir_num = substr_count( $matches[3], "../");  $relative_dirname = $this->relative_dirname;  for($i=0; $i<$parent_dir_num; $i++) {  $relative_dirname = substr( $relative_dirname, 0, strrpos($relative_dirname,"/") );  }  $relativepath = rtrim($relative_dirname,"/") . "/".ltrim($matches[3],"./");  $txtfile_id = $this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);  //输出  if( !empty($txtfile_id ) ) {  if($this->moudle == "dresult") {    return "<iframe".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readfile/$txtfile_id?pid=".$this->projectid .$matches[2].$matches[4];  } else {   return "<iframe".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/txtfile/$txtfile_id?pid=".$this->projectid .$matches[2].$matches[4];  }  } else {  return "<iframe".$matches[1]."src=".$matches[2].$matches[3].$matches[2].$matches[4];  } } private function frame_replace( $matches ) {    if(count($matches) < 4) return "";  if( empty($matches[3]) ) return "";  $matches[3] = rtrim($matches[3],"""/");  //处理锚点  if(substr_count($matches[3],"#")>0)   $matches[3] = substr($matches[3],0,strrpos($matches[3],"#"));  //获取html的id  $parent_dir_num = substr_count( $matches[3], "../");  $relative_dirname = $this->relative_dirname;  for($i=0; $i<$parent_dir_num; $i++) {  $relative_dirname = substr( $relative_dirname, 0, strrpos($relative_dirname,"/") );  }  $relativepath = rtrim($relative_dirname,"/") . "/".ltrim($matches[3],"./");  $txtfile_id = $this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);  //输出  if( !empty($txtfile_id ) ) {  if($this->moudle == "dresult") {    return "<frame".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readfile/$txtfile_id?pid=".$this->projectid.$matches[2].$matches[4];  } else {   return "<frame".$matches[1]."src=".$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/txtfile/$txtfile_id?pid=".$this->projectid.$matches[2].$matches[4];  }  } else {  return "<frame".$matches[1]."src=".$matches[2].$matches[3].$matches[2].$matches[4];  } } private function js_replace( $matches ){  if(count($matches) < 4) return "";  if( empty($matches[3]) ) return "";  //处理链接  $arr_html = split(",",$matches[3]);  $href = $arr_html[0];  $other = "";  for($i=0; $i<count($arr_html); $i++)  $other = $arr_html[$i].", ";  $other = rtrim($other,", ");  $href =rtrim($href,"""");  //处理锚点  if(substr_count($href,"#")>0)   return "window.open".$matches[1].$matches[2].$matches[3].$matches[4];;  //获取html的id  $parent_dir_num = substr_count( $href, "../");  $relative_dirname = $this->relative_dirname;  for($i=0; $i<$parent_dir_num; $i++) {  $relative_dirname = substr( $relative_dirname, 0, strrpos($relative_dirname,"/") );  }  $relativepath = rtrim($relative_dirname,"/") . "/".ltrim($href,"./");  $txtfile_id = $this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);  //输出  if( !empty($txtfile_id ) ) {  if($this->moudle == "dresult") {    return "window.open".$matches[1].$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readfile/$txtfile_id?pid=".$this->projectid.$matches[2].",".$other.$matches[4];  } else {   return "window.open".$matches[1].$matches[2].$this->CI->config->item("base_url")."cdms/".$this->moudle."/txtfile/$txtfile_id?pid=".$this->projectid.$matches[2].",".$other.$matches[4];  }  } else {  return "window.open".$matches[1].$matches[2].$matches[3].$matches[4];  } } private function css_replace( $matches ) {  if(count($matches) < 5) return "";  if( empty($matches[4]) ) return "";    $matches[4] = rtrim($matches[4],"""/");  //获取图片的id  $parent_dir_num = substr_count( $matches[4], "../");  $relative_dirname = $this->relative_dirname;  for($i=0; $i<$parent_dir_num; $i++) {  $relative_dirname = substr( $relative_dirname, 0, strrpos($relative_dirname,"/") );  }  $relativepath = rtrim($relative_dirname,"/") . "/".ltrim($matches[4],"./");  $image_id = $this->CI->model->get_id_by_path_and_project($relativepath,$this->projectid);  //输出  if( !empty($image_id) ) {  if($this->moudle == "dresult") {   return "background".$matches[1]."url".$matches[2].$matches[3].$this->CI->config->item("base_url")."cdms/".$this->moudle."/readpic/$image_id?pid=".$this->projectid .$matches[3]. $matches[5];  } else {   return "background".$matches[1]."url".$matches[2].$matches[3].$this->CI->config->item("base_url")."cdms/".$this->moudle."/picfile/$image_id?pid=".$this->projectid .$matches[3]. $matches[5];  }  } else {  return "background".$matches[1]."url".$matches[2].$matches[3].$matches[4].$matches[3].$matches[5];  } } }/* End of Myreplace.php *//* Location: /application/libraries/Myreplace.php */

PS:这里再为大家提供2款非常方便的正则表达式工具供大家参考使用:

JavaScript正则表达式在线测试工具:http://tools.jb51.net/regex/javascript

正则表达式在线生成工具:http://tools.jb51.net/regex/create_reg

希望本文所述对大家的php程序设计有所帮助。