R55536 - in /trunk: egroupware/api/src/ egroupware/api/src/Html/ egroupware/api/src/Mail/ phpgwapi/inc/

ralfbecker_sf 2016-03-28 19:38:21 UTC #1

Author: ralfbecker
New Revision: 55536

URL: http://svn.stylite.de/viewvc/egroupware?rev=55536&view=rev
Log:
move html mail specific part of translation class to Api\Mail\Html

Added:
trunk/egroupware/api/src/Mail/Html.php
- copied, changed from r55535, trunk/phpgwapi/inc/class.translation.inc.php
Modified:
trunk/egroupware/api/src/Html/HtmLawed.php
trunk/egroupware/api/src/Mail.php
trunk/phpgwapi/inc/class.translation.inc.php

Modified: trunk/egroupware/api/src/Html/HtmLawed.php
URL: http://svn.stylite.de/viewvc/egroupware/trunk/egroupware/api/src/Html/HtmLawed.php?rev=55536&r1=55535&r2=55536&view=diff

— trunk/egroupware/api/src/Html/HtmLawed.php (original)
+++ trunk/egroupware/api/src/Html/HtmLawed.php Mon Mar 28 21:38:21 2016
@@ -13,9 +13,6 @@
namespace EGroupware\Api\Html;

use EGroupware\Api;

-// explicitly name old, not yet converted api classes
-use translation; // mail specific stuff not in Api\Translation

require_once(DIR.’/htmLawed/htmLawed.php’);

@@ -141,7 +138,7 @@
{
//error_log(METHOD.LINE.array2string($newStyle[0]));
$style2buffer = implode(’’,$newStyle[0]);

  	// only replace what we have found, we use it here, as we use the same routine in translation::replaceTagsCompletley

  	// only replace what we have found, we use it here, as we use the same routine in Api\Mail\Html::replaceTagsCompletley
  	// no need to do the extra routine
  	$html = str_ireplace($newStyle[0],'',$html);
  }

@@ -170,15 +167,15 @@
// CSS Security
// http://code.google.com/p/browsersec/wiki/Part1#Cascading_stylesheets
$css = preg_replace(’/(javascript|expession|-moz-binding)/i’,’’,$style);

  if (stripos($css,'script')!==false) translation::replaceTagsCompletley($css,'script'); // Strip out script that may be included

  if (stripos($css,'script')!==false) Api\Mail\Html::replaceTagsCompletley($css,'script'); // Strip out script that may be included
  // we need this, as styledefinitions are enclosed with curly brackets; and template stuff tries to replace everything between curly brackets that is having no horizontal whitespace
  // as the comments as <!-- styledefinition --> in stylesheet are outdated, and ck-editor does not understand it, we remove it
  $css_no_comment = str_replace(array(':','<!--','-->'),array(': ','',''),$css);
  //error_log(__METHOD__.__LINE__.$css);

  // we already removed what we have found, above, as we used pretty much the same routine as in translation::replaceTagsCompletley

  // we already removed what we have found, above, as we used pretty much the same routine as in Api\Mail\Html::replaceTagsCompletley
  // no need to do the extra routine
  // TODO: we may have to strip urls and maybe comments and ifs

  //if (stripos($html,'style')!==false) translation::replaceTagsCompletley($html,'style'); // clean out empty or pagewide style definitions / left over tags

  //if (stripos($html,'style')!==false) Api\Mail\Html::replaceTagsCompletley($html,'style'); // clean out empty or pagewide style definitions / left over tags
  return $css_no_comment;

}

Modified: trunk/egroupware/api/src/Mail.php
URL: http://svn.stylite.de/viewvc/egroupware/trunk/egroupware/api/src/Mail.php?rev=55536&r1=55535&r2=55536&view=diff

— trunk/egroupware/api/src/Mail.php (original)
+++ trunk/egroupware/api/src/Mail.php Mon Mar 28 21:38:21 2016
@@ -31,7 +31,6 @@
use tidy;

// old not yet converted api classes
-use translation; // mail stuff not in Api\Translation
use addressbook_merge; // should go to Contacts\Merge

/**
@@ -1529,7 +1528,7 @@
if (empty($headerObject[‘BODYPREVIEW’])&&$part->getPrimaryType()== ‘text’)
{
$charset = $part->getContentTypeParameter(‘charset’);

  					$buffer = translation::convertHTMLToText($part->toString(array(

  					$buffer = Mail\Html::convertHTMLToText($part->toString(array(
  										'encode' => Horde_Mime_Part::ENCODE_BINARY,	// otherwise we cant recode charset
  									)), $charset, 'utf-8');
  					$headerObject['BODYPREVIEW']=trim(str_replace(array("\r\n","\r","\n"),' ',mb_substr(Translation::convert_jsonsafe($buffer),0,((int)$_fetchPreviews<300?300:$_fetchPreviews))));

@@ -1562,7 +1561,7 @@
($partdisposition !== ‘attachment’)) {
$_structure=$part;
$this->fetchPartContents($uid, $_structure, false,true);

  					$headerObject['BODYPREVIEW']=trim(str_replace(array("\r\n","\r","\n"),' ',mb_substr(translation::convertHTMLToText($_structure->getContents()),0,((int)$_fetchPreviews<300?300:$_fetchPreviews))));

  					$headerObject['BODYPREVIEW']=trim(str_replace(array("\r\n","\r","\n"),' ',mb_substr(Mail\Html::convertHTMLToText($_structure->getContents()),0,((int)$_fetchPreviews<300?300:$_fetchPreviews))));
  					$charSet=Translation::detect_encoding($headerObject['BODYPREVIEW']);
  					// add line breaks to $bodyParts
  					//error_log(__METHOD__.' ('.__LINE__.') '.' Charset:'.$bodyParts[$i]['charSet'].'->'.$bodyParts[$i]['body']);

@@ -2212,7 +2211,7 @@
}
else
{

  	$_string = translation::decodeMailHeader($_string,self::$displayCharset);

  	$_string = Mail\Html::decodeMailHeader($_string,self::$displayCharset);
  	$test = @json_encode($_string);
  	//error_log(__METHOD__.__LINE__.' ->'.strlen($singleBodyPart['body']).' Error:'.json_last_error().'<- BodyPart:#'.$test.'#');
  	if (($test=="null" || $test === false || is_null($test)) && strlen($_string)>0)

@@ -4175,18 +4174,18 @@
$_html = str_replace(array(’&’,’

’,"

",’

‘,’’,’
’,’’,’<o:p></o:p>’,’<o:p>’,’</o:p>’),
array(’&’, ‘
’, ‘
’, ‘
’, ‘’,’’, ‘’, ‘’, ‘’, ‘’),$_html);
//$_html = str_replace(array(’&’),array(’&’),$_html);

  if (stripos($_html,'style')!==false) translation::replaceTagsCompletley($_html,'style'); // clean out empty or pagewide style definitions / left over tags

  if (stripos($_html,'head')!==false) translation::replaceTagsCompletley($_html,'head'); // Strip out stuff in head

  //if (stripos($_html,'![if')!==false && stripos($_html,'<![endif]>')!==false) translation::replaceTagsCompletley($_html,'!\[if','<!\[endif\]>',false); // Strip out stuff in ifs

  //if (stripos($_html,'!--[if')!==false && stripos($_html,'<![endif]-->')!==false) translation::replaceTagsCompletley($_html,'!--\[if','<!\[endif\]-->',false); // Strip out stuff in ifs

  if (stripos($_html,'style')!==false) Mail\Html::replaceTagsCompletley($_html,'style'); // clean out empty or pagewide style definitions / left over tags

  if (stripos($_html,'head')!==false) Mail\Html::replaceTagsCompletley($_html,'head'); // Strip out stuff in head

  //if (stripos($_html,'![if')!==false && stripos($_html,'<![endif]>')!==false) Mail\Html::replaceTagsCompletley($_html,'!\[if','<!\[endif\]>',false); // Strip out stuff in ifs

  //if (stripos($_html,'!--[if')!==false && stripos($_html,'<![endif]-->')!==false) Mail\Html::replaceTagsCompletley($_html,'!--\[if','<!\[endif\]-->',false); // Strip out stuff in ifs
  //error_log(__METHOD__.' ('.__LINE__.') '.$_html);

  if (get_magic_quotes_gpc() === 1) $_html = stripslashes($_html);
  // Strip out doctype in head, as htmlLawed cannot handle it TODO: Consider extracting it and adding it afterwards

  if (stripos($_html,'!doctype')!==false) translation::replaceTagsCompletley($_html,'!doctype');

  if (stripos($_html,'?xml:namespace')!==false) translation::replaceTagsCompletley($_html,'\?xml:namespace','/>',false);

  if (stripos($_html,'?xml version')!==false) translation::replaceTagsCompletley($_html,'\?xml version','\?>',false);

  if (strpos($_html,'!CURSOR')!==false) translation::replaceTagsCompletley($_html,'!CURSOR');

  if (stripos($_html,'!doctype')!==false) Mail\Html::replaceTagsCompletley($_html,'!doctype');

  if (stripos($_html,'?xml:namespace')!==false) Mail\Html::replaceTagsCompletley($_html,'\?xml:namespace','/>',false);

  if (stripos($_html,'?xml version')!==false) Mail\Html::replaceTagsCompletley($_html,'\?xml version','\?>',false);

  if (strpos($_html,'!CURSOR')!==false) Mail\Html::replaceTagsCompletley($_html,'!CURSOR');
  // htmLawed filter only the 'body'
  //preg_match('`(<htm.+?<body[^>]*>)(.+?)(</body>.*?</html>)`ims', $_html, $matches);
  //if ($matches[2])

@@ -4857,7 +4856,7 @@
if (!$preserveHTML)
{
$alreadyHtmlLawed=false;

  			// as translation::convert reduces \r\n to \n and purifier eats \n -> peplace it with a single space

  			// as Translation::convert reduces \r\n to \n and purifier eats \n -> peplace it with a single space
  			$newBody = str_replace("\n"," ",$newBody);
  			// convert HTML to text, as we dont want HTML in infologs
  			if ($useTidy && extension_loaded('tidy'))

@@ -4902,7 +4901,7 @@
$alreadyHtmlLawed=true;
}
//error_log(METHOD.’ (’.LINE.’) ‘.’ after purify:’.$newBody);

  			if ($preserveHTML==false) $newBody = translation::convertHTMLToText($newBody,self::$displayCharset,true,true);

  			if ($preserveHTML==false) $newBody = Mail\Html::convertHTMLToText($newBody,self::$displayCharset,true,true);
  			//error_log(__METHOD__.' ('.__LINE__.') '.' after convertHTMLToText:'.$newBody);
  			if ($preserveHTML==false) $newBody = nl2br($newBody); // we need this, as htmLawed removes \r\n
  			/*if (!$alreadyHtmlLawed) */ $mailClass->getCleanHTML($newBody); // remove stuff we regard as unwanted

@@ -4913,9 +4912,9 @@
continue;
}
//error_log(METHOD.’ (’.LINE.’) ‘.’ Body(after specialchars):’.$newBody);

  	//use translation::convertHTMLToText instead of strip_tags, (even message is plain text) as strip_tags eats away too much

  	//use Mail\Html::convertHTMLToText instead of strip_tags, (even message is plain text) as strip_tags eats away too much
  	//$newBody = strip_tags($newBody); //we need to fix broken tags (or just stuff like "<800 USD/p" )

  	$newBody = translation::convertHTMLToText($newBody,self::$displayCharset,false,false);

  	$newBody = Mail\Html::convertHTMLToText($newBody,self::$displayCharset,false,false);
  	//error_log(__METHOD__.' ('.__LINE__.') '.' Body(after strip tags):'.$newBody);
  	$newBody = htmlspecialchars_decode($newBody,ENT_QUOTES);
  	//error_log(__METHOD__.' ('.__LINE__.') '.' Body (after hmlspc_decode):'.$newBody);

@@ -5300,7 +5299,7 @@
// CSS Security
// http://code.google.com/p/browsersec/wiki/Part1#Cascading_stylesheets
$css = preg_replace(’/(javascript|expression|-moz-binding)/i’,’’,$style);

  if (stripos($css,'script')!==false) translation::replaceTagsCompletley($css,'script'); // Strip out script that may be included

  if (stripos($css,'script')!==false) Mail\Html::replaceTagsCompletley($css,'script'); // Strip out script that may be included
  // we need this, as styledefinitions are enclosed with curly brackets; and template stuff tries to replace everything between curly brackets that is having no horizontal whitespace
  // as the comments as <!-- styledefinition --> in stylesheet are outdated, and ck-editor does not understand it, we remove it
  $css = str_replace(array(':','<!--','-->'),array(': ','',''),$css);

@@ -5985,7 +5984,7 @@
{
if ($bodyParts[$i][‘mimeType’]==‘text/html’)
{

  				$bodyParts[$i]['body'] = translation::convertHTMLToText($bodyParts[$i]['body'],$bodyParts[$i]['charSet'],true,$stripalltags=true);

  				$bodyParts[$i]['body'] = Mail\Html::convertHTMLToText($bodyParts[$i]['body'],$bodyParts[$i]['charSet'],true,$stripalltags=true);
  				$bodyParts[$i]['mimeType']='text/plain';
  			}
  		}

Copied: trunk/egroupware/api/src/Mail/Html.php (from r55535, trunk/phpgwapi/inc/class.translation.inc.php)
URL: http://svn.stylite.de/viewvc/egroupware/trunk/egroupware/api/src/Mail/Html.php?p2=trunk/egroupware/api/src/Mail/Html.php&p1=trunk/phpgwapi/inc/class.translation.inc.php&r1=55535&r2=55536&rev=55536&view=diff

— trunk/phpgwapi/inc/class.translation.inc.php (original)
+++ trunk/egroupware/api/src/Mail/Html.php Mon Mar 28 21:38:21 2016
@@ -1,24 +1,24 @@

<?php /** - * EGroupware API - Translations + * EGroupware API - Html mail handling * * @link http://www.egroupware.org - * @author Joseph Engo - * @author Dan Kuykendall - * Copyright (C) 2000, 2001 Joseph Engo + * @author Klaus Leithoff * @license http://opensource.org/licenses/lgpl-license.php LGPL - GNU Lesser General Public License * @package api * @version $Id$ */ +namespace EGroupware\Api\Mail; + use EGroupware\Api; /** - * EGroupware API - Translations + * Html mail handling * - * @deprecated use Api\Translation for non-mail specific methods + * @todo clean up IDE warnings, mostly multiple assigments */ -class translation extends Api\Translation +class Html { /** * Return the decoded string meeting some additional requirements for mailheaders @@ -57,7 +57,7 @@ $convertAtEnd = false; foreach((array)$elements as $element) { - if ($element->charset == 'default') $element->charset = self::detect_encoding($element->text); + if ($element->charset == 'default') $element->charset = Api\Translation::detect_encoding($element->text); if ($element->charset != 'x-unknown') { if( strtoupper($element->charset) != 'UTF-8') $element->text = preg_replace($sar,$rar,$element->text); @@ -67,7 +67,7 @@ $element->text = self::decodeMailHeader($element->text, $element->charset); $element->charset = $displayCharset; } - $newString .= self::convert($element->text,$element->charset); + $newString .= Api\Translation::convert($element->text,$element->charset); } else { Modified: trunk/phpgwapi/inc/class.translation.inc.php URL: http://svn.stylite.de/viewvc/egroupware/trunk/phpgwapi/inc/class.translation.inc.php?rev=55536&r1=55535&r2=55536&view=diff ============================================================================== --- trunk/phpgwapi/inc/class.translation.inc.php (original) +++ trunk/phpgwapi/inc/class.translation.inc.php Mon Mar 28 21:38:21 2016 @@ -16,7 +16,7 @@ /** * EGroupware API - Translations * - * @deprecated use Api\Translation for non-mail specific methods + * @deprecated use Api\Translation for non-mail specific methods or Api\Mail\Html for others */ class translation extends Api\Translation { @@ -26,83 +26,11 @@ * @param string $_string -> part of an mailheader * @param string $displayCharset the charset parameter specifies the character set to represent the result by (if iconv_mime_decode is to be used) * @return string + * @deprecated use Api\Mail\Html::decodeMailHeader */ static function decodeMailHeader($_string, $displayCharset='utf-8') { - //error_log(__FILE__.','.__METHOD__.':'."called with $_string and CHARSET $displayCharset"); - if(function_exists('imap_mime_header_decode')) - { - // some characterreplacements, as they fail to translate - $sar = array( - '@(\x84|\x93|\x94)@', - '@(\x96|\x97|\x1a)@', - '@(\x91|\x92)@', - '@(\x85)@', - '@(\x86)@', - ); - $rar = array( - '"', - '-', - '\'', - '...', - '+', - ); - - $newString = ''; - - $string = preg_replace('/\?=\s+=\?/', '?= =?', $_string); - - $elements=imap_mime_header_decode($string); - - $convertAtEnd = false; - foreach((array)$elements as $element) - { - if ($element->charset == 'default') $element->charset = self::detect_encoding($element->text); - if ($element->charset != 'x-unknown') - { - if( strtoupper($element->charset) != 'UTF-8') $element->text = preg_replace($sar,$rar,$element->text); - // check if there is a possible nested encoding; make sure that the inputstring and the decoded result are different to avoid loops - if(preg_match('/\?=.+=\?/', $element->text) && $element->text != $_string) - { - $element->text = self::decodeMailHeader($element->text, $element->charset); - $element->charset = $displayCharset; - } - $newString .= self::convert($element->text,$element->charset); - } - else - { - $newString .= $element->text; - $convertAtEnd = true; - } - } - if ($convertAtEnd) $newString = self::decodeMailHeader($newString,$displayCharset); - return preg_replace('/([\000-\012\015\016\020-\037\075])/','',$newString); - } - elseif(function_exists(mb_decode_mimeheader)) - { - $matches = null; - if(preg_match_all('/=\?.*\?Q\?.*\?=/iU', $string=$_string, $matches)) - { - foreach($matches[0] as $match) - { - $fixedMatch = str_replace('_', ' ', $match); - $string = str_replace($match, $fixedMatch, $string); - } - $string = str_replace('=?ISO8859-','=?ISO-8859-', - str_replace('=?windows-1258','=?ISO-8859-1',$string)); - } - $string = mb_decode_mimeheader($string); - return preg_replace('/([\000-\012\015\016\020-\037\075])/','',$string); - } - elseif(function_exists(iconv_mime_decode)) - { - // continue decoding also if an error occurs - $string = @iconv_mime_decode($_string, 2, $displayCharset); - return preg_replace('/([\000-\012\015\016\020-\037\075])/','',$string); - } - - // no decoding function available - return preg_replace('/([\000-\012\015\016\020-\037\075])/','',$_string); + return Api\Mail\Html::decodeMailHeader($_string, $displayCharset); } /** @@ -110,21 +38,11 @@ * as well as those emailadresses in links, and within broken links * @param string the text to process * @return 1 + * @deprecated use Api\Mail\Html::replaceEmailAdresses */ static function replaceEmailAdresses(&$text) { - //error_log($text); - //replace CRLF with something other to be preserved via preg_replace as CRLF seems to vanish - $text2 = str_replace("\r\n",'<#cr-lf#>',$text); - // replace emailaddresses eclosed in <> (eg.: ) with the emailaddress only (e.g: me@you.de) - $text3 = preg_replace("/(<|<a href=\")*(mailto:([\w\.,-.,_.,0-9.]+)(@)([\w\.,-.,_.,0-9.]+))(>|>)*/i","$2 ", $text2); - //$text = preg_replace_callback("/(<|<a href=\")*(mailto:([\w\.,-.,_.,0-9.]+)(@)([\w\.,-.,_.,0-9.]+))(>|>)*/i",'self::transform_mailto2text',$text); - //$text = preg_replace('~]+href=\"(mailto:)+([^"]+)\"[^>]*>~si','$2 ',$text); - $text4 = preg_replace_callback('~]+href=\"(mailto:)+([^"]+)\"[^>]*>([ @\w\.,-.,_.,0-9.]+)<\/a>~si','self::transform_mailto2text',$text3); - $text5 = preg_replace("/(([\w\.,-.,_.,0-9.]+)(@)([\w\.,-.,_.,0-9.]+))( |\s)*(<\/a>)*( |\s)*(>|>)*/i","$1 ", $text4); - $text6 = preg_replace("/(<|<)*(([\w\.,-.,_.,0-9.]+)@([\w\.,-.,_.,0-9.]+))(>|>)*/i","$2 ", $text5); - $text = str_replace('<#cr-lf#>',"\r\n",$text6); - return 1; + return Api\Mail\Html::replaceEmailAdresses($text); } /** @@ -135,81 +53,21 @@ * @param string $endtag can be different from tag but should be used only, if begin and endtag are known to be different e.g.: * @param bool $addbracesforendtag if endtag is given, you may decide if the braces are to be added, * or if you want the string to be matched as is - * @return void the modified text is passed via reference + * @deprecated use Api\Mail\Html::replaceTagsCompletley */ static function replaceTagsCompletley(&$_body,$tag,$endtag='',$addbracesforendtag=true) { - if ($tag) $tag = strtolower($tag); - $singleton = false; - if ($endtag=='/>') $singleton =true; - if ($endtag == '' || empty($endtag) || !isset($endtag)) - { - $endtag = $tag; - } else { - $endtag = strtolower($endtag); - //error_log(__METHOD__.' Using EndTag:'.$endtag); - } - // strip tags out of the message completely with their content - if ($_body) { - if ($singleton) - { - //$_body = preg_replace('~<'.$tag.'[^>].*? '.$endtag.'~simU','',$_body); - $_body = preg_replace('~<?'.$tag.'[^>].* '.$endtag.'~simU','',$_body); // we are in Ungreedy mode, so we expect * to be ungreedy without specifying ? - } - else - { - $found=null; - if ($addbracesforendtag === true ) - { - if (stripos($_body,'<'.$tag)!==false) $ct = preg_match_all('#<'.$tag.'(?:\s.*)?>(.+)#isU', $_body, $found);

```
  			if ($ct>0)
```
```
  			{
```

  				//error_log(__METHOD__.__LINE__.array2string($found[0]));

  				// only replace what we have found

  				$_body = str_ireplace($found[0],'',$_body);

```
  			}
```

  			// remove left over tags, unfinished ones, and so on

  			$_body = preg_replace('~<'.$tag.'[^>]*?>~si','',$_body);

```
  		}
```

  		if ($addbracesforendtag === false )

```
  		{
```

  			if (stripos($_body,'<'.$tag)!==false)  $ct = preg_match_all('#<'.$tag.'(?:\s.*)?>(.+)'.$endtag.'#isU', $_body, $found);

```
  			if ($ct>0)
```
```
  			{
```

  				//error_log(__METHOD__.__LINE__.array2string($found[0]));

  				// only replace what we have found

  				$_body = str_ireplace($found[0],'',$_body);

```
  			}
```

-/*

  			$_body = preg_replace('~<'.$tag.'[^>]*?>(.*?)'.$endtag.'~simU','',$_body);

-*/

  			// remove left over tags, unfinished ones, and so on

  			$_body = preg_replace(array('~<'.$tag.'[^>]*?>~si', '~'.$endtag.'~'), '', $_body);

```
  		}
```
```
  	}
```
```
  }
```

  Api\Mail\Html::replaceTagsCompletley($_body, $tag, $endtag, $addbracesforendtag);

}

static function transform_mailto2text($matches)
{

  //error_log(__METHOD__.__LINE__.array2string($matches));

```
  // this is the actual url
```

  $matches[2] = trim(strip_tags($matches[2]));

  $matches[3] = trim(strip_tags($matches[3]));

  $matches[2] = str_replace(array('%40','%20'),array('@',' '),$matches[2]);

  $matches[3] = str_replace(array('%40','%20'),array('@',' '),$matches[3]);

  return $matches[1].$matches[2].($matches[2]==$matches[3]?' ':' -> '.$matches[3].' ');

  return Api\Mail\Html::transform_mailto2text($matches);

}

static function transform_url2text($matches)
{

  //error_log(__METHOD__.__LINE__.array2string($matches));

```
  $linkTextislink = false;
```
```
  // this is the actual url
```

  $matches[2] = trim(strip_tags($matches[2]));

  if ($matches[2]==$matches[1]) $linkTextislink = true;

  $matches[1] = str_replace(' ','%20',$matches[1]);

  return ($linkTextislink?' ':'[ ').$matches[1].($linkTextislink?'':' -> '.$matches[2]).($linkTextislink?' ':' ]');

```
  return Api\Mail\Html::transform_url2text($matches);
```
}

/**
@@ -219,244 +77,11 @@
- @param bool $stripcrl : flag to indicate for the removal of all crlf \r\n
- @param bool $stripalltags : flag to indicate wether or not to strip $_html from all remaining tags
- @return text $_html : the modified text.
- @deprecated use Api\Mail\Html::convertHTMLToText
  */
  static function convertHTMLToText($_html,$displayCharset=false,$stripcrl=false,$stripalltags=true)
  {

  // assume input isHTML, but test the input anyway, because,

  // if it is not, we may not want to strip whitespace

```
  $isHTML = true;
```

  if (strlen(strip_tags($_html)) == strlen($_html))

```
  {
```
```
  	$isHTML = false;
```

  	// return $_html; // maybe we should not proceed at all

```
  }
```

  if ($displayCharset === false) $displayCharset = self::$system_charset;

```
  //error_log(__METHOD__.$_html);
```
```
  #print '<hr>';
```

  #print "<pre>"; print htmlspecialchars($_html);

```
  #print "</pre>";
```
```
  #print "<hr>";
```

  if (stripos($_html,'style')!==false) self::replaceTagsCompletley($_html,'style'); // clean out empty or pagewide style definitions / left over tags

  if (stripos($_html,'head')!==false) self::replaceTagsCompletley($_html,'head'); // Strip out stuff in head

  if (stripos($_html,'![if')!==false && stripos($_html,'<![endif]>')!==false) self::replaceTagsCompletley($_html,'!\[if','<!\[endif\]>',false); // Strip out stuff in ifs

  if (stripos($_html,'!--[if')!==false && stripos($_html,'<![endif]-->')!==false) self::replaceTagsCompletley($_html,'!--\[if','<!\[endif\]-->',false); // Strip out stuff in ifs

  $Rules = array ('@<script[^>]*?>.*?</script>@siU', // Strip out javascript

  	'@&(quot|#34);@i',                // Replace HTML entities

  	'@&(amp|#38);@i',                 //   Ampersand &

  	'@&(lt|#60);@i',                  //   Less Than <

  	'@&(gt|#62);@i',                  //   Greater Than >

  	'@&(nbsp|#160);@i',               //   Non Breaking Space

  	'@&(iexcl|#161);@i',              //   Inverted Exclamation point

  	'@&(cent|#162);@i',               //   Cent

  	'@&(pound|#163);@i',              //   Pound

  	'@&(copy|#169);@i',               //   Copyright

  	'@&(reg|#174);@i',                //   Registered

  	'@&(trade|#8482);@i',             //   trade

  	'@&#39;@i',                       //   singleQuote

  	'@(\xc2\xa0)@',                   //   nbsp or tab (encoded windows-style)

  	'@(\xe2\x80\x8b)@',               //   ZERO WIDTH SPACE

```
  );
```
```
  $Replace = array ('',
```
```
  	'"',
```
```
  	'#amper#sand#',
```
```
  	'<',
```
```
  	'>',
```
```
  	' ',
```
```
  	chr(161),
```
```
  	chr(162),
```
```
  	chr(163),
```
```
  	'(C)',//chr(169),// copyrighgt
```
```
  	'(R)',//chr(174),// registered
```
```
  	'(TM)',// trade
```
```
  	"'",
```
```
  	' ',
```
```
  	'',
```
```
  );
```

  $_html = preg_replace($Rules, $Replace, $_html);

  //   removing carriage return linefeeds, preserve those enclosed in <pre> </pre> tags

```
  if ($stripcrl === true )
```
```
  {
```

  	if (stripos($_html,'<pre ')!==false || stripos($_html,'<pre>')!==false)

```
  	{
```

  		$contentArr = self::splithtmlByPRE($_html);

  		foreach ($contentArr as $k =>&$elem)

```
  		{
```

  			if (stripos($elem,'<pre ')===false && stripos($elem,'<pre>')===false)

```
  			{
```

  				//$elem = str_replace('@(\r\n)@i',' ',$elem);

  				$elem = str_replace(array("\r\n","\n"),($isHTML?'':' '),$elem);

```
  			}
```
```
  		}
```
```
  		$_html = implode('',$contentArr);
```
```
  	}
```
```
  	else
```
```
  	{
```

  		$_html = str_replace(array("\r\n","\n"),($isHTML?'':' '),$_html);

```
  	}
```
```
  }
```
```
  $tags = array (
```
```
  	0 => '~<h[123][^>]*>\r*\n*~si',
```
```
  	1 => '~<h[456][^>]*>\r*\n*~si',
```
```
  	2 => '~<table[^>]*>\r*\n*~si',
```
```
  	3 => '~<tr[^>]*>\r*\n*~si',
```
```
  	4 => '~<li[^>]*>\r*\n*~si',
```
```
  	5 => '~<br[^>]*>\r*\n*~si',
```
```
  	6 => '~<br[^>]*>~si',
```
```
  	7 => '~<p[^>]*>\r*\n*~si',
```
```
  	8 => '~<div[^>]*>\r*\n*~si',
```
```
  	9 => '~<hr[^>]*>\r*\n*~si',
```
```
  	10 => '/<blockquote type="cite">/',
```
```
  	11 => '/<blockquote>/',
```
```
  	12 => '~</blockquote>~si',
```
```
  	13 => '~<blockquote[^>]*>~si',
```
```
  	14 => '/<=([1234567890])/',
```
```
  	15 => '/>=([1234567890])/',
```
```
  	16 => '/<([1234567890])/',
```
```
  	17 => '/>([1234567890])/',
```
```
  );
```
```
  $Replace = array (
```
```
  	0 => "\r\n",
```
```
  	1 => "\r\n",
```
```
  	2 => "\r\n",
```
```
  	3 => "\r\n",
```
```
  	4 => "\r\n",
```
```
  	5 => "\r\n",
```
```
  	6 => "\r\n",
```
```
  	7 => "\r\n",
```
```
  	8 => "\r\n",
```

  	9 => "\r\n__________________________________________________\r\n",

```
  	10 => '#blockquote#type#cite#',
```
```
  	11 => '#blockquote#type#cite#',
```
```
  	12 => '#blockquote#end#cite#',
```
```
  	13 => '#blockquote#type#cite#',
```
```
  	14 => '#lowerorequal#than#$1',
```
```
  	15 => '#greaterorequal#than#$1',
```
```
  	16 => '#lower#than#$1',
```
```
  	17 => '#greater#than#$1',
```
```
  );
```

  $_html = preg_replace($tags,$Replace,$_html);

  $_html = preg_replace('~</t(d|h)>\s*<t(d|h)[^>]*>~si',' - ',$_html);

  $_html = preg_replace('~<img[^>]+>~s','',$_html);

  // replace emailaddresses eclosed in <> (eg.: <me@you.de>) with the emailaddress only (e.g: me@you.de)

```
  self::replaceEmailAdresses($_html);
```

  //convert hrefs to description -> URL

  //$_html = preg_replace('~<a[^>]+href=\"([^"]+)\"[^>]*>(.*)</a>~si','[$2 -> $1]',$_html);

  $_html = preg_replace_callback('~<a[^>]+href=\"([^"]+)\"[^>]*>(.*?)</a>~si','self::transform_url2text',$_html);

  // reducing double \r\n to single ones, dont mess with pre sections

```
  if ($stripcrl === true && $isHTML)
```
```
  {
```

  	if (stripos($_html,'<pre ')!==false || stripos($_html,'<pre>')!==false)

```
  	{
```

  		$contentArr = self::splithtmlByPRE($_html);

  		foreach ($contentArr as $k =>&$elem)

```
  		{
```

  			if (stripos($elem,'<pre ')===false && stripos($elem,'<pre>')===false)

```
  			{
```

  				//this is supposed to strip out all remaining stuff in tags, this is sometimes taking out whole sections off content

```
  				if ( $stripalltags ) {
```

  					$_html = preg_replace('~<[^>^@]+>~s','',$_html);

```
  				}
```

  				// strip out whitespace inbetween CR/LF

  				$elem = preg_replace('~\r\n\s+\r\n~si', "\r\n\r\n", $elem);

  				// strip out / reduce exess CR/LF

  				$elem = preg_replace('~\r\n{3,}~si',"\r\n\r\n",$elem);

```
  			}
```
```
  		}
```
```
  		$_html = implode('',$contentArr);
```
```
  	}
```
```
  	else
```
```
  	{
```

  		//this is supposed to strip out all remaining stuff in tags, this is sometimes taking out whole sections off content

```
  		if ( $stripalltags ) {
```

  			$_html = preg_replace('~<[^>^@]+>~s','',$_html);

```
  		}
```

  		// strip out whitespace inbetween CR/LF

  		$_html = preg_replace('~\r\n\s+\r\n~si', "\r\n\r\n", $_html);

```
  		// strip out / reduce exess CR/LF
```

  		$_html = preg_replace('~(\r\n){3,}~si',"\r\n\r\n",$_html);

```
  	}
```
```
  }
```

  //this is supposed to strip out all remaining stuff in tags, this is sometimes taking out whole sections off content

```
  if ( $stripalltags ) {
```

  	$_html = preg_replace('~<[^>^@]+>~s','',$_html);

  	//$_html = strip_tags($_html, '<a>');

```
  }
```

  // reducing spaces (not for input that was plain text from the beginning)

  if ($isHTML) $_html = preg_replace('~ +~s',' ',$_html);

```
  // restoring ampersands
```

  $_html = str_replace('#amper#sand#','&',$_html);

  // restoring lower|greater[or equal] than

  $_html = str_replace('#lowerorequal#than#','<=',$_html);

  $_html = str_replace('#greaterorequal#than#','>=',$_html);

  $_html = str_replace('#lower#than#','<',$_html);

  $_html = str_replace('#greater#than#','>',$_html);

  //error_log(__METHOD__.__LINE__.' Charset:'.$displayCharset.' -> '.$_html);

  $_html = html_entity_decode($_html, ENT_COMPAT, $displayCharset);

  //error_log(__METHOD__.__LINE__.' Charset:'.$displayCharset.' After html_entity_decode: -> '.$_html);

  //self::replaceEmailAdresses($_html);

```
  $pos = strpos($_html, 'blockquote');
```

  //error_log("convert HTML2Text: $_html");

```
  if($pos === false) {
```
```
  	return $_html;
```
```
  } else {
```
```
  	$indent = 0;
```
```
  	$indentString = '';
```

  	$quoteParts = preg_split('/#blockquote#type#cite#/', $_html, -1, PREG_SPLIT_OFFSET_CAPTURE);

  	foreach($quoteParts as $quotePart) {

```
  		if($quotePart[1] > 0) {
```
```
  			$indent++;
```
```
  			$indentString .= '>';
```
```
  		}
```

  		$quoteParts2 = preg_split('/#blockquote#end#cite#/', $quotePart[0], -1, PREG_SPLIT_OFFSET_CAPTURE);

  		foreach($quoteParts2 as $quotePart2) {

```
  			if($quotePart2[1] > 0) {
```
```
  				$indent--;
```

  				$indentString = substr($indentString, 0, $indent);

```
  			}
```

  			$quoteParts3 = explode("\r\n", $quotePart2[0]);

  			foreach($quoteParts3 as $quotePart3) {

  				//error_log(__METHOD__.__LINE__.'Line:'.$quotePart3);

  				$allowedLength = 76-strlen("\r\n$indentString");

  				// only break lines, if not already indented

  				if (substr($quotePart3,0,strlen($indentString)) != $indentString)

```
  				{
```

  					if (strlen($quotePart3) > $allowedLength) {

```
  						$s=explode(" ", $quotePart3);
```
```
  						$quotePart3 = "";
```
```
  						$linecnt = 0;
```
```
  						foreach ($s as $k=>$v) {
```
```
  							$cnt = strlen($v);
```

  							// only break long words within the wordboundaries,

  							// but it may destroy links, so we check for href and dont do it if we find it

  							if($cnt > $allowedLength && stripos($v,'href=')===false) {

  								//error_log(__METHOD__.__LINE__.'LongWordFound:'.$v);

  								$v=wordwrap($v, $allowedLength, "\r\n$indentString", true);

```
  							}
```

  							// the rest should be broken at the start of the new word that exceeds the limit

  							if ($linecnt+$cnt > $allowedLength) {

```
  								$v="\r\n$indentString$v";
```

  								//error_log(__METHOD__.__LINE__.'breaking here:'.$v);

```
  								$linecnt = 0;
```
```
  							} else {
```
```
  								$linecnt += $cnt;
```
```
  							}
```

  							if (strlen($v))  $quotePart3 .= (strlen($quotePart3) ? " " : "").$v;

```
  						}
```
```
  					}
```
```
  				}
```

  				//error_log(__METHOD__.__LINE__.'partString to return:'.$indentString . $quotePart3);

  				$asciiTextBuff[] = $indentString . $quotePart3 ;

```
  			}
```
```
  		}
```
```
  	}
```

  	return implode("\r\n",$asciiTextBuff);

```
  }
```

```
  return Api\Mail\Html::convertHTMLToText($_html, $displayCharset, $stripcrl, $stripalltags);
```
}

/**
@@ -464,37 +89,10 @@
- @author Leithoff, Klaus
- @param string html
- @return mixed array of parts or unaffected html
- @deprecated use Api\Mail\Html::splithtmlByPRE
  */
  static function splithtmlByPRE($html)
  {

```
  $searchFor = '<pre ';
```
```
  $pos = stripos($html,$searchFor);
```
```
  if ($pos===false)
```
```
  {
```
```
  	$searchFor = '<pre>';
```
```
  	$pos = stripos($html,$searchFor);
```
```
  }
```
```
  if ($pos === false)
```
```
  {
```
```
  	return $html;
```
```
  }
```
```
  $html2ret[] = substr($html,0,$pos);
```
```
  while ($pos!==false)
```
```
  {
```

  	$endofpre = stripos($html,'</pre>',$pos);

```
  	$length = $endofpre-$pos+6;
```

  	$html2ret[] = substr($html,$pos,$length);

```
  	$searchFor = '<pre ';
```

  	$pos = stripos($html,$searchFor, $endofpre+6);

```
  	if ($pos===false)
```
```
  	{
```
```
  		$searchFor = '<pre>';
```

  		$pos = stripos($html,$searchFor, $endofpre+6);

```
  	}
```

  	$html2ret[] = ($pos ? substr($html,$endofpre+6,$pos-($endofpre+6)): substr($html,$endofpre+6));

```
  	//$pos=false;
```
```
  }
```

  //error_log(__METHOD__.__LINE__.array2string($html2ret));

```
  return $html2ret;
```

  return Api\Mail\Html::splithtmlByPRE($html);

}
}

Transform Data into Opportunity.
Accelerate data analysis in your applications with
Intel Data Analytics Acceleration Library.
Click to learn more.
http://pubads.g.doubleclick.net/gampad/clk?id=278785471&iu=/4140

eGroupWare-cvs mailing list
eGroupWare-cvs@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/egroupware-cvs