php里的html内容切取
HTMl内容的切取,找了很多都不行或有乱码,然后我组合了一个。
1 /** 2 * Truncates text. 3 * 4 * Cuts a string to the length of $length and replaces the last characters 5 * with the ending if the text is longer than length. 6 * 7 * @param string $text String to truncate. 8 * @param integer $length Length of returned string, including ellipsis. 9 * @param string $ending Ending to be appended to the trimmed string. 10 * @param boolean $exact If false, $text will not be cut mid-word 11 * @param boolean $considerHtml If true, HTML tags would be handled correctly 12 * @return string Trimmed string. 13 */ 14 function truncate($text, $length = 100, $ending = '...', $exact = true, $considerHtml = false) { 15 if ($considerHtml) { 16 // if the plain text is shorter than the maximum length, return the whole text 17 if (strlen(preg_replace('/<.*?>/', '', $text)) <= $length) { 18 return $text; 19 } 20 21 // splits all html-tags to scanable lines 22 preg_match_all('/(<.+?>)?([^<>]*)/s', $text, $lines, PREG_SET_ORDER); 23 24 $total_length = strlen($ending); 25 $open_tags = array(); 26 $truncate = ''; 27 28 foreach ($lines as $line_matchings) { 29 // if there is any html-tag in this line, handle it and add it (uncounted) to the output 30 if (!empty($line_matchings[1])) { 31 // if it's an "empty element" with or without xhtml-conform closing slash (f.e. <br/>) 32 if (preg_match('/^<(\s*.+?\/\s*|\s*(img|br|input|hr|area|base|basefont|col|frame|isindex|link|meta|param)(\s.+?)?)>$/is', $line_matchings[1])) { 33 // do nothing 34 // if tag is a closing tag (f.e. </b>) 35 } else if (preg_match('/^<\s*\/([^\s]+?)\s*>$/s', $line_matchings[1], $tag_matchings)) { 36 // delete tag from $open_tags list 37 $pos = array_search($tag_matchings[1], $open_tags); 38 if ($pos !== false) { 39 unset($open_tags[$pos]); 40 } 41 // if tag is an opening tag (f.e. <b>) 42 } else if (preg_match('/^<\s*([^\s>!]+).*?>$/s', $line_matchings[1], $tag_matchings)) { 43 // add tag to the beginning of $open_tags list 44 array_unshift($open_tags, strtolower($tag_matchings[1])); 45 } 46 // add html-tag to $truncate'd text 47 $truncate .= $line_matchings[1]; 48 } 49 50 // calculate the length of the plain text part of the line; handle entities as one character 51 $content_length = strlen(preg_replace('/&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};/i', ' ', $line_matchings[2])); 52 if ($total_length+$content_length> $length) { 53 // the number of characters which are left 54 $left = $length - $total_length; 55 $entities_length = 0; 56 // search for html entities 57 if (preg_match_all('/&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};/i', $line_matchings[2], $entities, PREG_OFFSET_CAPTURE)) { 58 // calculate the real length of all entities in the legal range 59 foreach ($entities[0] as $entity) { 60 if ($entity[1]+1-$entities_length <= $left) { 61 $left--; 62 $entities_length += strlen($entity[0]); 63 } else { 64 // no more characters left 65 break; 66 } 67 } 68 } 69 //$truncate .= substr($line_matchings[2], 0, $left+$entities_length); 70 71 $truncate .= cutString($line_matchings[2], $left+$entities_length); 72 // maximum lenght is reached, so get off the loop 73 break; 74 } else { 75 $truncate .= $line_matchings[2]; 76 $total_length += $content_length; 77 } 78 79 // if the maximum length is reached, get off the loop 80 if($total_length>= $length) { 81 break; 82 } 83 } 84 } else { 85 if (strlen($text) <= $length) { 86 return $text; 87 } else { 88 //$truncate = substr($text, 0, $length - strlen($ending)); 89 $truncate = cutString($text,$length - strlen($ending)); 90 91 } 92 } 93 94 // if the words shouldn't be cut in the middle... 95 if (!$exact) { 96 // ...search the last occurance of a space... 97 $spacepos = strrpos($truncate, ' '); 98 if (isset($spacepos)) { 99 // ...and cut the text in this position 100 //$truncate = substr($truncate, 0, $spacepos); 101 $truncate = cutString($truncate,$spacepos); 102 } 103 } 104 105 // add the defined ending to the text 106 $truncate .= $ending; 107 108 if($considerHtml) { 109 // close all unclosed html-tags 110 foreach ($open_tags as $tag) { 111 $truncate .= '</' . $tag . '>'; 112 } 113 } 114 115 return $truncate; 116 117 } 118 119 private function cutString($sourcestr,$cutlength) 120 { 121 $returnstr=''; 122 $i=0; 123 $n=0; 124 $str_length=strlen($sourcestr);//字符串的字节数 125 while (($n<$cutlength) and ($i<=$str_length)) 126 { 127 $temp_str=substr($sourcestr,$i,1); 128 $ascnum=Ord($temp_str);//得到字符串中第$i位字符的ascii码 129 if ($ascnum>=224) //如果ASCII位高与224, 130 { 131 $returnstr=$returnstr.substr($sourcestr,$i,3); //根据UTF-8编码规范,将3个连续的字符计为单个字符 132 $i=$i+3; //实际Byte计为3 133 $n++; //字串长度计1 134 } 135 elseif ($ascnum>=192) //如果ASCII位高与192, 136 { 137 $returnstr=$returnstr.substr($sourcestr,$i,2); //根据UTF-8编码规范,将2个连续的字符计为单个字符 138 $i=$i+2; //实际Byte计为2 139 $n++; //字串长度计1 140 } 141 elseif ($ascnum>=65 && $ascnum<=90) //如果是大写字母, 142 { 143 $returnstr=$returnstr.substr($sourcestr,$i,1); 144 $i=$i+1; //实际的Byte数仍计1个 145 $n++; //但考虑整体美观,大写字母计成一个高位字符 146 } 147 else //其他情况下,包括小写字母和半角标点符号, 148 { 149 $returnstr=$returnstr.substr($sourcestr,$i,1); 150 $i=$i+1; //实际的Byte数计1个 151 $n=$n+0.5; //小写字母和半角标点等与半个高位字符宽... 152 } 153 } 154 155 //if ($str_length>$cutlength){ 156 // $returnstr = $returnstr . "...";//超过长度时在尾处加上省略号 157 //} 158 return $returnstr; 159 }
2个都是网上找的然后组合的。呵呵
使用:
echo self::truncate($str,146,'...',true,true);
好像这个也不错:http://code.google.com/p/cut-html-string/
不过我测试有问题,可能是测试问题。
浙公网安备 33010602011771号