GhostManSec
Server: Apache
System: Linux webm003.cluster115.gra.hosting.ovh.net 5.15.206-ovh-vps-grsec-zfs-classid #1 SMP Fri May 15 02:41:25 UTC 2026 x86_64
User: eliteafr (153088)
PHP: 5.4.45
Disabled: _dyuweyrj4,_dyuweyrj4r,dl
Upload Files
File: /home/eliteafr/pmb/classes/docwatch/datasources/docwatch_datasource_monitoring_website.class.php
<?php
// +-------------------------------------------------+
// © 2002-2014 PMB Services / www.sigb.net pmb@sigb.net et contributeurs (voir www.sigb.net)
// +-------------------------------------------------+
// $Id: docwatch_datasource_monitoring_website.class.php,v 1.1.2.5 2016-12-05 13:25:38 dgoron Exp $

if (stristr($_SERVER['REQUEST_URI'], ".class.php")) die("no access");

require_once($class_path."/docwatch/datasources/docwatch_datasource.class.php");

/**
 * class docwatch_datasource_monitoring_website
 * 
 */
class docwatch_datasource_monitoring_website extends docwatch_datasource{

	/** Aggregations: */

	/** Compositions: */

	 /*** Attributes: ***/
	
	protected $upload_date;
	
	protected $content;
	
	protected $content_hash;
	
	protected $content_headers;
	/**
	 * @return void
	 * @access public
	 */
	public function __construct($id=0) {
		parent::__construct($id);
	} // end of member function __construct
		
	protected function clean_html($html){
		
		if($this->parameters['xpath_expressions'] == '') {
			preg_match("/\<body.*\>(.*)\<\/body\>/isU", $html, $matches);
			$html = preg_replace('#<script(.*?)>(.*?)</script>#is', '', $matches[1]);
		} else {
			$html = preg_replace('#<script(.*?)>(.*?)</script>#is', '', $html);
		}
		switch ($this->parameters['mode_creation_items']) {
			case 'all_change':
			case 'by_change':
				$html = strip_tags($html);
				break;
			case 'all_links':
				preg_match_all("/\<a.*\>(.*)\<\/a\>/isU", strip_tags($html, '<a>'), $link_matches);
				$links = array();
				foreach ($link_matches[0] as $link_match) {
					$links[] = $link_match;
				}
				$html = implode(PHP_EOL, $links);
				break;
		}
		return $html;
	}
	
	protected function save_content($first=true) {
		if($first) {
			$query = "insert into docwatch_datasource_monitoring_website set 
					datasource_monitoring_website_num_datasource = '".$this->id."', ";
		} else {
			$query = "update docwatch_datasource_monitoring_website set ";
			$where = "where datasource_monitoring_website_num_datasource = '".$this->id."' ";
		}
		$query .= " 
				datasource_monitoring_website_upload_date = NOW(),
				datasource_monitoring_website_content = '".addslashes(serialize($this->content))."',
				datasource_monitoring_website_content_hash = '".addslashes($this->content_hash)."'";
		$query .= $where;
		pmb_mysql_query($query);
	}
	
	protected function get_xdiff_change($xdiff_string) {
		$xdiff_change = array();
		preg_match_all("/@@(.*)@@/isU", $xdiff_string, $matches);
		for($i=0; $i<count($matches[0]); $i++) {
			$start = strpos($xdiff_string, $matches[0][$i])+strlen($matches[0][$i]);
			if($i<count($matches[0])-1) {
				$html_extracted = substr($xdiff_string, $start, (strpos($xdiff_string, $matches[0][$i+1]) - $start));
			} else {
				$html_extracted = substr($xdiff_string, $start);
			}
			$has_change = false;
			$lines = explode(PHP_EOL, $html_extracted);
			$change = array();
			foreach ($lines as $line) {
				$line = trim($line);
				if($line != '') {
					if(substr($line, 0, 1) == '-') {
						if(strip_tags(trim(substr($line, 1))) != '') {
// 							$change[] = "<p>- <del style='background-color:#ffcccc'>".trim(substr($line, 1))."</del></p>";
							$has_change = true;
						}
					} elseif(substr($line, 0, 1) == '+') {
						if(strip_tags(trim(substr($line, 1))) != '') {
// 							$change[] = "<p>+ <ins style='background-color:#ccffcc'>".trim(substr($line, 1))."</ins></p>";
							$change[] = "<p>".trim(substr($line, 1))."</p>";
							$has_change = true;
						}
					} else {
						if(strip_tags($line) != '') {
							$change[] = "<p>".$line."</p>";
						}
					}
				}
			}
			if($has_change) {
				$xdiff_change[] = $change;
			}
		}
		return $xdiff_change;
	}
	
	protected function get_content_from_link($link) {
		global $charset;
		
		$content_from_link = array();
		if($link){
			$datas = array();
			@ini_set("zend.ze1_compatibility_mode", "0");
			$informations = array();
			$loaded=false;
			$aCurl = new Curl();
			$aCurl->timeout=2;
			$content = $aCurl->get($link);
			$html=$content->body;
			if($html && $content->headers['Status-Code'] == 200){
				$this->content_headers = $content->headers;
				if($charset != 'utf-8') {
					$html = utf8_decode($html);
				}
				if(is_array($this->parameters['xpath_expressions']) && count($this->parameters['xpath_expressions'])) {
					$dom = new DOMDocument();
					$dom->encoding = $charset;
					$old_errors_value = false;
					if(libxml_use_internal_errors(true)){
						$old_errors_value = true;
					}
					$loaded = $dom->loadHTML($html);
					if($loaded) {
						$xpath = new DOMXPath($dom);
						foreach ($this->parameters['xpath_expressions'] as $i=>$xpath_expression) {
							$entries = $xpath->query($xpath_expression);
							$html_content = $this->clean_html($dom->saveHTML($entries->item(0)));
							if($this->parameters['xpath_expressions_for_title'][$i]) {
								$entries = $xpath->query($this->parameters['xpath_expressions_for_title'][$i]);
								$html_title = $this->clean_html($dom->saveHTML($entries->item(0)));
							} else {
								$html_title = '';
							}
							$html_link = '';
							if($this->parameters['xpath_expressions_for_link'][$i]) {
								$entries = $xpath->query($this->parameters['xpath_expressions_for_link'][$i]);
								preg_match("/\<a.*\>(.*)\<\/a\>/isU", strip_tags($dom->saveHTML($entries->item(0)), '<a>'), $link_matches);
								if($link_matches[0]) {
									$html_link = $link_matches[0];
								}
							}
							$content_from_link[] = array(
									'content' => $html_content,
									'title' => $html_title,
									'link' => $html_link
							);
						}
					}
					libxml_use_internal_errors($old_errors_value);
				} else {
					$content_from_link[] = array(
							'content' => $this->clean_html($html),
							'title' => '',
							'link' => ''
					);
				}
			}
		}
		return $content_from_link;
	}
	
	protected function get_constructed_remote_link($link, $link_for_construct = '') {
		if($link_for_construct) {
			if(substr($link_for_construct, 0, 7) == 'http://' || substr($link_for_construct, 0, 8) == 'https://') {
				$link = $link_for_construct;
			} else {
				preg_match("/\s+(?:[^\"'>]+|\"[^\"]*\"|'[^']*')*href=(\"[^\"]+\"|'[^']+'|[^<>\s]+)/i", ' '.$link_for_construct.' ', $matches);
				if($matches[0]) {
					$match_link = str_replace('"', '', $matches[0]);
					if(substr($match_link, 0, 7) == 'http://' || substr($match_link, 0, 8) == 'https://') {
						$link = $match_link;
					} else {
						if(strpos($match_link, '/') !== false) {
							$match_link = substr($match_link, strpos($match_link, '/')+1);
						}
						$link .= $match_link;
					}
				} else {
					$link .= $link_for_construct;
				}
			}
		} else {
			$link .= '#'.strtotime($this->content_headers['Date']).rand(0,1000);
		}
		return $link;
	}
	
	protected function get_items_datas($link){
		$items = array();
		$content_from_link = $this->get_content_from_link($link);
		$content_hash_from_link = md5(serialize($content_from_link));
		
		if(is_array($content_from_link) && count($content_from_link)) {
			$query = "select datasource_monitoring_website_upload_date, datasource_monitoring_website_content, datasource_monitoring_website_content_hash
						from docwatch_datasource_monitoring_website where datasource_monitoring_website_num_datasource = ".$this->id;
			$result = pmb_mysql_query($query);
			if($result && pmb_mysql_num_rows($result)) {
				$row = pmb_mysql_fetch_object($result);
				if($content_hash_from_link != $row->datasource_monitoring_website_content_hash) {
					$content_from_base = unserialize($row->datasource_monitoring_website_content);
					if(is_array($this->parameters['xpath_expressions']) && count($this->parameters['xpath_expressions'])) {
						$hash_from_base = array();
						if(is_array($content_from_base)) {
							foreach ($content_from_base as $content) {
								$hash_from_base[] = md5($content['content']);
							}
						}
						foreach ($content_from_link as $i=>$content) {
							if(!in_array(md5($content['content']), $hash_from_base)) {
								$items[] = array(
										'content' => $content['content'],
										'title' => ($content['title'] ? $content['title'] : $this->get_title()),
										'link' => $this->get_constructed_remote_link($link, $content['link'])
								);
							}
						}
					} else {
						if(!extension_loaded('xdiff')) {
							return false;
						}
						if(md5($content_from_base[0]['content']) != md5($content_from_link[0]['content'])) {
							$xdiff_string = xdiff_string_diff($content_from_base[0]['content'] , $content_from_link[0]['content']);
							$xdiff_change = $this->get_xdiff_change($xdiff_string);
							if($this->parameters['mode_creation_items'] == 'by_change') {
								foreach ($xdiff_change as $i=>$change) {
									$items[] = array(
											'content' => implode('', $change),
											'title' => $this->get_title(),
											'link' => $this->get_constructed_remote_link($link)
									);
								}
							} else {
								$item_content = '';
								foreach ($xdiff_change as $change) {
									$item_content .= implode('', $change);
								}
								$items[] = array(
										'content' => $item_content,
										'title' => $this->get_title(),
										'link' => $this->get_constructed_remote_link($link)
								);
							}
						}
					}
					foreach ($items as $item) {
						$data = array();
						$data["type"] = "monitoring_website";
						$data["title"] = $item['title'];
						$data["summary"] = $item['content'];
						$data["content"] = '';
						$data["url"] = $item['link'];
						$data["publication_date"] = date( 'Y-m-d H:i:s', strtotime($this->content_headers['Date']));
						$data["logo_url"] = '';
						$data["descriptors"] = "";
						$data["tags"] = '';
						$datas[] = $data;
					}
					$this->content = $content_from_link;
					$this->content_hash = $content_hash_from_link;
					$this->save_content(false);
				}
			} else {
				$this->content = $content_from_link;
				$this->content_hash = $content_hash_from_link;
				$this->save_content(true);
			}
			return $datas;
		}else{
			return false;
		}
	}
	
	public function get_available_selectors(){
		global $msg;
		return array(
				"docwatch_selector_monitoring_website" => $msg['dsi_docwatch_selector_monitoring_website']
		);
	}
	
	protected function get_expression_xpath_content($parameter_name, $i, $expression) {
		global $msg;
		
		$form = "<input type='text' data-dojo-type='dijit/form/TextBox' id='docwatch_datasource_monitoring_website_".$parameter_name."_".$i."' name='docwatch_datasource_monitoring_website_".$parameter_name."[]' value=\"".$expression."\" style='width: 50em;'/>
	 			<button data-dojo-type='dijit/form/Button' type='button'>".$msg['raz']."
	 				<script type='dojo/on' data-dojo-event='click' data-dojo-args='evt'>
	 					require(['dojo/dom'], function(dom){
				            dom.byId('docwatch_datasource_monitoring_website_".$parameter_name."_".$i."').value = '';
				        });
	 				</script>
	 			</button>";
		if($i == 0) {
			$form .= "<button data-dojo-type='dijit/form/Button' type='button'>+
					<script type='dojo/on' data-dojo-event='click' data-dojo-args='evt'>
	 					require(['dojo/dom', 'dojo/dom-construct', 'dojo/dom-attr', 'dojo/on', 'dojo/parser'], function(dom, domConstruct, domAttr, on, parser){
				            var count = dom.byId('".$parameter_name."_count').value;
							var div = domConstruct.create('div', {id : 'monitoring_website_".$parameter_name."_'+count});
							var input = domConstruct.create('input', {type : 'text', 'data-dojo-type' : 'dijit/form/TextBox', id : 'docwatch_datasource_monitoring_website_".$parameter_name."_'+count, name : 'docwatch_datasource_monitoring_website_".$parameter_name."[]', style : 'width: 50em;'});
							domConstruct.place(input, div);
							var button = domConstruct.create('button', {id : 'monitoring_website_".$parameter_name."_'+count+'_button', 'data-dojo-type' : 'dijit/form/Button', type : 'button', innerHTML : ' X '});
							on(button, 'click', function(){
								dom.byId('docwatch_datasource_monitoring_website_".$parameter_name."_'+count).value = '';
							});
							domConstruct.place(button, div);
							domConstruct.place(div, 'add_".$parameter_name."');
							parser.parse('monitoring_website_".$parameter_name."_'+count);
							dom.byId('".$parameter_name."_count').value = count+1;
				        });
	 				</script>
				</button>";
		}
		return $form;
	}
	
	protected function get_xpath_expressions_form($parameter_name) {
		global $msg,$charset;
		$form = "
	 		<div class='row'>
	 			<label>".htmlentities($msg['dsi_docwatch_datasource_monitoring_website_'.$parameter_name],ENT_QUOTES,$charset)."</label>
	 		</div>";
		if(is_array($this->parameters[$parameter_name]) && count($this->parameters[$parameter_name])) {
			foreach ($this->parameters[$parameter_name] as $i=>$expression) {
				$form .= "
			 		<div class='row'>
			 			".$this->get_expression_xpath_content($parameter_name, $i, $expression)."	
					</div>";
			}
			$form .= "<input type='hidden' id='".$parameter_name."_count' name='".$parameter_name."_count' value= '".count($this->parameters[$parameter_name])."' />";
		} else {
			$form .= "
	 			<div class='row'>
		 			".$this->get_expression_xpath_content($parameter_name, 0, '')."	
				</div>
		 		<input type='hidden' id='".$parameter_name."_count' name='".$parameter_name."_count' value= '1' />";
		}
		$form .= "<div id='add_".$parameter_name."'></div>";
		return $form;
	}
	
	public function get_form_content(){
		global $msg,$charset;
		$form = parent::get_form_content();
		$form .= "<div class='row'>&nbsp;</div>
 		<div class='row'>
 			<label>".htmlentities($msg['dsi_docwatch_datasource_monitoring_website_mode_creation_items'],ENT_QUOTES,$charset)."</label>
 		</div>
 		<div class='row'>
 			<select id='docwatch_datasource_monitoring_website_mode_creation_items' name='docwatch_datasource_monitoring_website_mode_creation_items'>
 				<option value='all_change' ".("all_change" == $this->parameters['mode_creation_items'] ? "selected='selected'" : "").">".htmlentities($msg['dsi_docwatch_datasource_monitoring_website_mode_creation_items_all_change'], ENT_QUOTES, $charset)."</option>
				<option value='by_change' ".("by_change" == $this->parameters['mode_creation_items'] ? "selected='selected'" : "").">".htmlentities($msg['dsi_docwatch_datasource_monitoring_website_mode_creation_items_by_change'], ENT_QUOTES, $charset)."</option>
				<option value='all_links' ".("all_links" == $this->parameters['mode_creation_items'] ? "selected='selected'" : "").">".htmlentities($msg['dsi_docwatch_datasource_monitoring_website_mode_creation_items_all_links'], ENT_QUOTES, $charset)."</option>
			</select>
 		</div>
		<div class='row'>&nbsp;</div>
		".$this->get_xpath_expressions_form('xpath_expressions')."
		<div class='row'>&nbsp;</div>
		".$this->get_xpath_expressions_form('xpath_expressions_for_title')."
		<div class='row'>&nbsp;</div>
 		".$this->get_xpath_expressions_form('xpath_expressions_for_link')."
		";
		return $form;
	}
	
	public function set_from_form() {
		global $docwatch_datasource_monitoring_website_mode_creation_items;
		global $docwatch_datasource_monitoring_website_xpath_expressions;
		global $docwatch_datasource_monitoring_website_xpath_expressions_for_title;
		global $docwatch_datasource_monitoring_website_xpath_expressions_for_link;
		
		$this->parameters['mode_creation_items'] = stripslashes($docwatch_datasource_monitoring_website_mode_creation_items);
		$this->parameters['xpath_expressions'] = array();
		foreach ($docwatch_datasource_monitoring_website_xpath_expressions as $xpath_expression) {
			if($xpath_expression) {
				$this->parameters['xpath_expressions'][] = stripslashes($xpath_expression);
			}
		}
		$this->parameters['xpath_expressions_for_title'] = array();
		foreach ($docwatch_datasource_monitoring_website_xpath_expressions_for_title as $xpath_expression_for_title) {
			if($xpath_expression_for_title) {
				$this->parameters['xpath_expressions_for_title'][] = stripslashes($xpath_expression_for_title);
			}
		}
		$this->parameters['xpath_expressions_for_link'] = array();
		foreach ($docwatch_datasource_monitoring_website_xpath_expressions_for_link as $xpath_expression_for_link) {
			if($xpath_expression_for_link) {
				$this->parameters['xpath_expressions_for_link'][] = stripslashes($xpath_expression_for_link);
			}
		}
		parent::set_from_form();
	}

} // end of docwatch_datasource_monitoring_website