<?php

/**
 * SeoParser - parser for seofeatures. It makes needed updatings and synchronization
 *
 * @author Eugene I. Nezhuta
 */
class RCMS_Object_Parser_SeoParser {

	const PFS_FILE_NAME = 'pfs.prs';

	/**
	 * SeoParser Model
	 *
	 * @var RCMS_Object_Parser_SeoParserModel
	 */
	private $_model = null;

	/**
	 * Content to parse
	 *
	 * @var string
	 */
	private $_seoContent = '';

	/**
	 * Current domain name
	 *
	 * @var string
	 */
	private $_currentDomain = '';

	/**
	 * Generator object
	 *
	 * @var  RCMS_Object_Generator_Generator
	 */
	private $_generator = null;

	private $_urlForDeeplink;

	public $_websiteUrl;

	/**
	 * Current website url
	 *
	 * @var string
	 */
	private $_siteUrl;

	public function  __construct($content = '') {
		if(!empty($content)) {
			$this->_seoContent = $content;
		}
		$this->_model = new RCMS_Object_Parser_SeoParserModel();
		$this->_generator = new RCMS_Object_Generator_Generator();
		$this->_currentDomain = 'http://' . $_SERVER['SERVER_NAME'] . '/';
		$this->_websiteUrl = unserialize(Zend_Registry::get('config'))->website->website->url;
		$this->_siteUrl = unserialize(Zend_Registry::get('config'))->website->website->url;
	}

	/**
	 * Set content for parser
	 *
	 * @param string $content
	 */
	public function setContentForParsing($content) {
		$this->_seoContent = $content;
	}

	/**
	 * Updation of link references for container
	 *
	 * @param integer $containerId
	 */
	public function updateLinkReference($containerId) {
		$links = preg_match_all('~<a.*href\s*="(.*)".*>.*</a>~U', $this->_seoContent, $matches);
		$sitePagesUrls = $this->_model->getSitePagesUrls();
		if(!empty($matches[0])) {
			$hrefs = $matches[1];
			foreach ($sitePagesUrls as $sitePagesUrl) {
				foreach ($hrefs as $href) {
					$href = str_replace('.html', '', $href);
					if(($sitePagesUrl == $href) || ($this->_currentDomain . $sitePagesUrl == $href)) {
						$href = (stristr($href, $this->_currentDomain)) ? $href : $this->_currentDomain . $href;
						if(!$this->_checkReferenseLinkExists($containerId, $href)) {
							$this->_model->insertLinkReference($containerId, $href);
						}
					}
				}
			}
		}
	}

	/**
	 * Check if link reference for container already exists
	 *
	 * @param integer $cId
	 * @param string $href
	 * @return boolean
	 */
	private function _checkReferenseLinkExists($cId, $href) {
		$result = $this->_model->checkExists($cId, $href);
		if($result) {
			return true;
		}
		return false;
	}

	/**
	 * Create sitemap xml file
	 *
	 */
	public function createSiteMapFile() {
		$draftCategoryId = $this->_model->selectDraftCategoryId();
		$timestamp = time();
		$siteMapFilePages = array();
		$pages = $this->_model->selectAllPages();
		if(is_array($pages)) {
			foreach ($pages as $page) {
				if($page['url'] == RCMS_Object_QuickConfig_QuickConfig::$draftCategoryUrl || $page['parentCategoryId'] == $draftCategoryId) {
					continue;
				}
				switch($page['url']) {
					case 'index': $priority = '1.0'; break;
					default: $priority = '0.8'; break;
				}

				$siteMapFilePages[] = array(
					'location' => trim($this->_websiteUrl.$page['url']) . '.html',
					'lastmod' => date('c', $timestamp),
					'changefreq' => 'daily',
					'priority' => $priority
				);
			}
			
			$siteMapContent = $this->_generator->generateSiteMapXml($siteMapFilePages);
		}
	}

	/**
	 * Chain synchronization on page update or add
	 *
	 * @param string $oldUrl old page url
	 * @param string $newUrl new page url
	 * @param string $pageH1Tag h1 tag of the page
	 * @param integer $pageId id of the page
	 */
	public function chainSyncOnUpdate($oldUrl, $newUrl, $pageH1Tag, $pageId) {
	//update all links in content
		$this->_synchronizeLinks($oldUrl, $newUrl, $pageH1Tag);
		$this->_synchronizeDeeplinks($this->_currentDomain . $oldUrl .'.html', $this->_currentDomain . $newUrl . '.html');
		if($oldUrl != $newUrl) {
		//put renamed page into db for 301 redirect
			$this->_updateRenamed($pageId, $oldUrl, $newUrl);
			//update redirects
			$this->_synchronizeRedirects();
		}
		//synchronize page runk sculpting file
		$this->_syncPRSFile();
	}

	/**
	 * Synchronize links on page updating
	 *
	 * @param string $old
	 * @param string $new
	 * @param string $pageH1
	 */
	private function _synchronizeLinks($old, $new, $pageH1) {
		$containersIdsToSynchronize = $this->_model->getContainersIdsForSync($old, $this->_currentDomain);
		if(!empty($containersIdsToSynchronize)) {
			foreach ($containersIdsToSynchronize as $containerId) {
				$container = new RCMS_Object_Container_Container($containerId);
				$content = $container->getContent();
				//@todo should be optimized
				$content = preg_replace('~(<a.*\s+href\s*=\s*")('. $old .')(\.html".*>.*</a>)~U', '${1}' .$new . '${3}', $content);
				$content = preg_replace('~(<a.*\s+href\s*=\s*")('. $this->_currentDomain . $old .')(\.html".*>.*</a>)~U', '${1}' .$this->_currentDomain.$new . '${3}', $content);
				$content = preg_replace('~(<a\shref=")(' . $this->_currentDomain . $new . '\.html|' . $new . '\.html)("\stitle=")(.*)(">.*</a>)~U', '${1}${2}${3}' . $pageH1 . '${5}', $content);
				$content = preg_replace('~(<a\stitle=")(.*)("\shref=")(' . $this->_currentDomain . $new . '\.html|' . $new . '\.html)(">.*</a>)~U', '${1}' .$pageH1. '${3}${4}${5}', $content);
				$content = preg_replace('~(<a\srel=")(.*)("\stitle=")(.*)("\shref=")(' . $this->_currentDomain . $new . '\.html|' . $new . '\.html)(">.*</a>)~U', '${1}${2}${3}' .$pageH1. '${5}${6}${7}', $content);
				$container->setContent($content);
				$container->save();
				$this->_model->updateLinkReference($containerId, $old, $new, $this->_currentDomain);
				unset($container);
			}
		}
	}

	/**
	 * Synchronize deeplinks
	 *
	 * @param string $oldUrl
	 * @param string $newUrl
	 */
	private function _synchronizeDeeplinks($oldUrl, $newUrl) {
		$this->_model->replaceDeeplinksUrl($oldUrl, $newUrl);
	}

	/**
	 * Update of renamed pages for 301 redirect
	 *
	 * @param integer $pageId
	 * @param string $url
	 * @param string $newUrl
	 */
	private function _updateRenamed($pageId, $url, $newUrl) {
		if($pageId && $url && $newUrl) {
			$this->_cleanBadRedirect($newUrl);
			$this->_model->insertRenamedPage($pageId, preg_quote(urlencode(str_replace('?', '', urldecode($url)))), $newUrl);
		}
	}

	/**
	 * Synchronize 301 redirects
	 *
	 * @param integer $pageId
	 */
	private function _synchronizeRedirects() {
		$redirectTable = $this->_model->getRenamedPages();
		if(!empty($redirectTable)) {
			$i = 0;
			foreach ($redirectTable as $page => $data) {
				$preparedRedirectTable[$i]['currUrl'] = $data['currUrl'];
				$preparedRedirectTable[$i]['oldUrl'] = preg_quote(urldecode($data['oldUrl']));
				$i++;
			}
		}
		if(empty($preparedRedirectTable)) {
			$preparedRedirectTable = '';
		}
		$this->_generator->generateRedirects($preparedRedirectTable);
	}

	/**
	 * Refresh the .htaccess file
	 *
	 */
	public function refreshHtAccess() {
		$this->_synchronizeRedirects();
	}

	/**
	 * Replace deeplinks
	 *
	 */
	public function replaceDeeplinksInContent() {
		$deeplinks = $this->_model->selectAllDeeplinks();
		$countDeeplinks = count($deeplinks);
		for($i=0; $i < $countDeeplinks; $i++) {
			$deeplinks[$i]['countWords'] = strlen($deeplinks[$i]['name']);
		}
		for($i2=0; $i2 < $countDeeplinks; $i2++) {
			if(isset($deeplinks[$i2+1]['countWords']) && $i2<$countDeeplinks && $deeplinks[$i2]['countWords'] <	$deeplinks[$i2+1]['countWords']) {
				$temp = $deeplinks[$i2];
				$deeplinks[$i2] = $deeplinks[$i2+1];
				$deeplinks[$i2+1] = $temp;
				if($i2 >  0) {
					$i2-=2;
				}
			}
		}
		$arrayContents = $this->_model->selectContents();
		$webSiteURL = unserialize(Zend_Registry::get('config'))->website->website->url;
		$arrayPagesH1 = array();
		$existDeeplinksInContent = array();

		foreach($arrayContents as $key => $content) {
			$arrayContents[$key]['content_value'] = preg_replace_callback("~(title|alt)=\"(.*)\"~Ui", array( &$this, '_preg_callback_dash'), $arrayContents[$key]['content_value']);
			$arrayContents[$key]['length'] = strlen($arrayContents[$key]['content_value']);
			$arrayPagesH1[$content['page_h1']] = $webSiteURL . $content['page_url'] . '.html';
		}

		foreach($deeplinks as $deeplink) {
			foreach($arrayContents as $key => $content) {
				if ((isset($content['page_name']) && $deeplink['name'] === $content['page_name']) || trim($content['content_value']) === '' || $webSiteURL . $content['page_url'] . '.html' === $deeplink['url']) {
					continue;
				}
				switch($deeplink['type']) {
					case 'int':
						if($h1 = array_search($deeplink['url'], $arrayPagesH1)) {
							$url = '<a href="' . $deeplink['url'] . '" title="' . $h1 . '"' . ($deeplink['nofollow']==1?' rel="nofollow" ':'') . '>';
						} else {
							$url = '<a href="' . $deeplink['url'] . '" title=""' . ($deeplink['nofollow']==1?' rel="nofollow" ':'') . '>';
						}
						break;
					case 'ext':
						$url = '<a href="' . $deeplink['url'] . '" target="_blank">';
						break;
				}
				$this->_urlForDeeplink = $url;
				$tags = '<\/span>|\.<\/span>|<\/p>|\.<\/p>|<br>|<br \/>|\.<br>|\.<br />|<\/div>|\.<\/div>|<\/li>|\.<\/li>|<\/b>|\.<\/b>|' .
					'<\/u>|\.<\/u>|<\/s>|\.<\/s>|<\/em>|\.<\/em>|<\/strong>|\.<\/strong>|</\sub>|\.</\sub>|<\/sup>|\.<\/sup>';
				$pattern = "~(" . $deeplink['name'] . ")(\s+|\.$|[\$\&\^\@\!\(\)\+\#\,\%\|]|" . $tags . ")~Ui";
				$arrayContents[$key]['content_value'] = preg_replace_callback($pattern, array( &$this, '_preg_callback_url'), $content['content_value']);
				if($arrayContents[$key]['length'] < strlen($arrayContents[$key]['content_value'])) {
					$arrayContents[$key]['length'] = strlen($arrayContents[$key]['content_value']);
					$existDeeplinksInContent[] = array(
						'deeplink_id'   => $deeplink['id'],
						'content_id'    => $arrayContents[$key]['content_id'],
						'content'       => $arrayContents[$key]['content_value'],
						'container_id'  => $arrayContents[$key]['container_id']
					);
				}
			}
		}

		foreach($existDeeplinksInContent as $data) {
			$data['content'] = str_replace('~%dash%~', ' ', $data['content']);
			$data['content'] = str_replace('~%deeplink%~', ' ', $data['content']);
			$this->_model->updateContent($data['content_id'], $data['content']);
			$this->_model->insertIdDeeplinkAndContent($data['deeplink_id'], $data['content_id']);
			$this->_seoContent = $data['content'];
			$this->updateLinkReference($data['container_id']);
		}
	}

	/**
	 * Callback for deeplinks
	 *
	 * @param array $matches
	 * @return string
	 */
	private function _preg_callback_url($matches) {
		return ' ' . $this->_urlForDeeplink . str_replace(' ', '~%deeplink%~', $matches[1]) . '</a> ';
	}

	/**
	 * Callback for deeplinks
	 *
	 * @param array $matches
	 * @return string
	 */
	private function _preg_callback_dash($matches) {
		return $matches[1] . '="' . str_replace(' ', '~%dash%~', $matches[2]) . '"';
	}

	/**
	 * Delete deeplink from container content
	 *
	 * @param integer $id
	 */
	public function deleteDeeplinkFromContent($id) {
		$data = $this->_model->selectDeeplinkAndContentById($id);
		foreach ($data as $key => $value) {
			$pattern = '~<a href=\"' . $data[$key]['url_deeplink'] . '\"[^>]*>(' . $data[$key]['name_deeplink'] . ')</a>~Ui';
			$data[$key]['content'] = preg_replace($pattern, '$1', $data[$key]['content']);
			$this->_model->updateContent($data[$key]['content_id'], $data[$key]['content']);
		}
		$this->_model->deleteDeeplinkAndContentByDeeplinkId($id);
	}

	/**
	 * Clean redirect table. Prevent that wether when browser gets into loop
	 *
	 * @param string $url
	 * @return void
	 */
	private function _cleanBadRedirect($url) {
		$badRedirectRecordId = $this->_model->selectBadRedirectId($url);
		if(empty ($badRedirectRecordId) || !$badRedirectRecordId) {
			return;
		}
		$this->_model->cleanBadRedirect($badRedirectRecordId, $url);
	}

	/**
	 * Interface method for cleaning bad redirects
	 *
	 * @param string $url
	 */
	public function cleanBadRedirect($url) {
		$this->_cleanBadRedirect($url);
	}

	/**
	 * Synchronize news routes according to news folder
	 *
	 */
	public function syncNewsRoutes() {
		$this->_generator->generateNewsRoutes();
	}

	/**
	 * Run page rank sculpting
	 *
	 * @param integer $siloId
	 * @return string
	 */
	public function runPRSculpting($siloId) {
		$silo = new RCMS_Object_Silo_Silo($siloId);
		$siloPages = $silo->getRelatedPages();
		if(preg_match_all('/<a.*href="([\w0-9-\s\p{L}\p{M}\p{P}]+\.html)".*>/u', $this->_seoContent, $matches)) {
			$hrefs = array_unique($matches[1]);
			foreach ($hrefs as $href) {
				if(in_array(str_replace(array($this->_siteUrl, '.html'), '', $href), $siloPages)) {
					$keys = array_keys($hrefs, $href);
					unset($hrefs[$keys[0]]);
				}
			}
			if(in_array($this->_siteUrl . 'index.html', $hrefs)) {
				$keys = array_keys($hrefs, $this->_siteUrl . 'index.html');
				unset($hrefs[$keys[0]]);
			}
			if(in_array($this->_siteUrl . 'index', $hrefs)) {
				$keys = array_keys($hrefs, $this->_siteUrl . 'index');
				unset($hrefs[$keys[0]]);
			}
			if(in_array('index.html', $hrefs)) {
				$keys = array_keys($hrefs, 'index.html');
				unset($hrefs[$keys[0]]);
			}
			foreach ($hrefs as $key => $replHref) {
				$pageId = $this->_model->selectPageIdByUrl(urlencode(str_replace(array($this->_siteUrl, '.html'), '', $replHref)));
				$this->_seoContent = preg_replace('~<a.*href="('. $replHref .')".*>(.*)</a>~Uu', '<span id="' . $pageId .'" class="nsp">$2</span>', $this->_seoContent);
			}
		}
		return $this->_seoContent;
	}

	/**
	 * Sinchronize page rank sculpting file (interface method)
	 *
	 */
	public function syncPRSFile() {
		$this->_syncPRSFile();
	}

	/**
	 * Sinchronize page rank sculpting file
	 *
	 */
	protected function _syncPRSFile() {
		$config = unserialize(Zend_Registry::get('config'));
		$path = $config->website->website->path . 'public/tmp/';
		$this->_generator->generatePRSFile($path, $this->getPagesUrlsAndIds());
	}

	/**
	 * Get pages urls and ids
	 *
	 * @return array
	 */
	public function getPagesUrlsAndIds() {
		$pagesAndIds = array();
		$result = $this->_model->getPagesUrlsAndIds();
		foreach ($result as $item) {
			$item['url'] = urldecode($item['url']);
			$pagesAndIds[] = $item;
		}
		return $pagesAndIds;
	}

	public function makeCanonicRules() {
		$hasWww = false;
		//prepare website url
		$url = parse_url($this->_siteUrl);
		$url['path'] = (isset($url['path']) && $url['path'] != '/') ? $url['path'] : '';
		if ((strpos($this->_siteUrl, 'http://www.') !== false) || (strpos($this->_siteUrl, 'https://www.') !== false)) {
			$hasWww = true;
			$withoutWww = preg_quote(str_replace('www.', '', $url['host']));
		}
		else {
			$withoutWww = preg_quote($url['host']);
		}
		//checking if site in subfolder, then return
		if(strlen($url['path']) > 1) {
			return;
		}
		elseif(strlen($url['path']) == 1 && $url['path'] != '/') {
			return;
		}
		//define pattern and replacement
		$pattern = '/\#\{\%canonic_rules\%\}/';
		if($hasWww) {
			$replacement = "RewriteCond %{HTTP_HOST} !^www\." . $withoutWww . "$\nRewriteRule ^(.*)$ " . $url['scheme'] . "://www." . stripslashes($withoutWww) . "/$1 [R=permanent,L]";
		}
		else {
			$replacement = "RewriteCond %{HTTP_HOST} !^" . $withoutWww . "$\nRewriteRule ^(.*)$ " . $url['scheme'] . "://" . stripslashes($withoutWww) . "/$1 [R=permanent,L]";
		}
		$config = unserialize(Zend_Registry::get('config'));
		//geting main .htaccess
		$mainHtaccessPath = $config->website->website->path . '.htaccess';
		$mainHtaccess = RCMS_Tools_FilesystemTools::getFile($mainHtaccessPath);
		//var_dump($mainHtaccess); die();
		if(!empty ($mainHtaccess)) {
			if(preg_match($pattern, $mainHtaccess)) {
				$mainHtaccess = preg_replace($pattern, $replacement, $mainHtaccess);
				if(is_writable($mainHtaccessPath)) {
					RCMS_Tools_FilesystemTools::saveFile($mainHtaccessPath, $mainHtaccess);
				}
			}
		}
	}
}
