Snippet Name: Strip HTML to Text
Description: Strips tags, tables, etc and returns nice, clean text from HTML source.
Comment: (none)
Language: PHP
Highlight Mode: PHP
Last Modified: February 27th, 2009
|
<?PHP
FUNCTION html2text($html){
$tags = ARRAY (
0 => '~<h[123][^>]+>~si',
1 => '~<h[456][^>]+>~si',
2 => '~<table[^>]+>~si',
3 => '~<tr[^>]+>~si',
4 => '~<li[^>]+>~si',
5 => '~<br[^>]+>~si',
6 => '~<p[^>]+>~si',
7 => '~<div[^>]+>~si',
);
$html = PREG_REPLACE($tags,"\n",$html);
$html = PREG_REPLACE('~</t(d|h)>\s*<t(d|h)[^>]+>~si',' - ',$html);
$html = PREG_REPLACE('~<[^>]+>~s','',$html);
// reducing spaces
$html = PREG_REPLACE('~ +~s',' ',$html);
$html = PREG_REPLACE('~^\s+~m','',$html);
$html = PREG_REPLACE('~\s+$~m','',$html);
// reducing newlines
$html = PREG_REPLACE('~\n+~s',"\n",$html);
RETURN $html;
}
?> |