php里的html内容切取

HTMl内容的切取，找了很多都不行或有乱码，然后我组合了一个。

  1 /**
  2     * Truncates text.
  3     *
  4     * Cuts a string to the length of $length and replaces the last characters
  5     * with the ending if the text is longer than length.
  6     *
  7     * @param string  $text String to truncate.
  8     * @param integer $length Length of returned string, including ellipsis.
  9     * @param string  $ending Ending to be appended to the trimmed string.
 10     * @param boolean $exact If false, $text will not be cut mid-word
 11     * @param boolean $considerHtml If true, HTML tags would be handled correctly
 12     * @return string Trimmed string.
 13     */
 14     function truncate($text, $length = 100, $ending = '...', $exact = true, $considerHtml = false) {
 15         if ($considerHtml) {
 16             // if the plain text is shorter than the maximum length, return the whole text
 17             if (strlen(preg_replace('/<.*?>/', '', $text)) <= $length) {
 18                 return $text;
 19             }
 20            
 21             // splits all html-tags to scanable lines
 22             preg_match_all('/(<.+?>)?([^<>]*)/s', $text, $lines, PREG_SET_ORDER);
 23    
 24             $total_length = strlen($ending);
 25             $open_tags = array();
 26             $truncate = '';
 27            
 28             foreach ($lines as $line_matchings) {
 29                 // if there is any html-tag in this line, handle it and add it (uncounted) to the output
 30                 if (!empty($line_matchings[1])) {
 31                     // if it's an "empty element" with or without xhtml-conform closing slash (f.e. <br/>)
 32                     if (preg_match('/^<(\s*.+?\/\s*|\s*(img|br|input|hr|area|base|basefont|col|frame|isindex|link|meta|param)(\s.+?)?)>$/is', $line_matchings[1])) {
 33                         // do nothing
 34                     // if tag is a closing tag (f.e. </b>)
 35                     } else if (preg_match('/^<\s*\/([^\s]+?)\s*>$/s', $line_matchings[1], $tag_matchings)) {
 36                         // delete tag from $open_tags list
 37                         $pos = array_search($tag_matchings[1], $open_tags);
 38                         if ($pos !== false) {
 39                             unset($open_tags[$pos]);
 40                         }
 41                     // if tag is an opening tag (f.e. <b>)
 42                     } else if (preg_match('/^<\s*([^\s>!]+).*?>$/s', $line_matchings[1], $tag_matchings)) {
 43                         // add tag to the beginning of $open_tags list
 44                         array_unshift($open_tags, strtolower($tag_matchings[1]));
 45                     }
 46                     // add html-tag to $truncate'd text
 47                     $truncate .= $line_matchings[1];
 48                 }
 49                
 50                 // calculate the length of the plain text part of the line; handle entities as one character
 51                 $content_length = strlen(preg_replace('/&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};/i', ' ', $line_matchings[2]));
 52                 if ($total_length+$content_length> $length) {
 53                     // the number of characters which are left
 54                     $left = $length - $total_length;
 55                     $entities_length = 0;
 56                     // search for html entities
 57                     if (preg_match_all('/&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};/i', $line_matchings[2], $entities, PREG_OFFSET_CAPTURE)) {
 58                         // calculate the real length of all entities in the legal range
 59                         foreach ($entities[0] as $entity) {
 60                             if ($entity[1]+1-$entities_length <= $left) {
 61                                 $left--;
 62                                 $entities_length += strlen($entity[0]);
 63                             } else {
 64                                 // no more characters left
 65                                 break;
 66                             }
 67                         }
 68                     }
 69                     //$truncate .= substr($line_matchings[2], 0, $left+$entities_length);
 70                     
 71                     $truncate .= cutString($line_matchings[2], $left+$entities_length);
 72                     // maximum lenght is reached, so get off the loop
 73                     break;
 74                 } else {
 75                     $truncate .= $line_matchings[2];
 76                     $total_length += $content_length;
 77                 }
 78                
 79                 // if the maximum length is reached, get off the loop
 80                 if($total_length>= $length) {
 81                     break;
 82                 }
 83             }
 84         } else {
 85             if (strlen($text) <= $length) {
 86                 return $text;
 87             } else {
 88                 //$truncate = substr($text, 0, $length - strlen($ending));
 89                 $truncate = cutString($text,$length - strlen($ending));
 90                 
 91             }
 92         }
 93        
 94         // if the words shouldn't be cut in the middle...
 95         if (!$exact) {
 96             // ...search the last occurance of a space...
 97             $spacepos = strrpos($truncate, ' ');
 98             if (isset($spacepos)) {
 99                 // ...and cut the text in this position
100                 //$truncate = substr($truncate, 0, $spacepos);
101                 $truncate = cutString($truncate,$spacepos);
102             }
103         }
104        
105         // add the defined ending to the text
106         $truncate .= $ending;
107        
108         if($considerHtml) {
109             // close all unclosed html-tags
110             foreach ($open_tags as $tag) {
111                 $truncate .= '</' . $tag . '>';
112             }
113         }
114        
115         return $truncate;
116        
117     }
118 
119     private function cutString($sourcestr,$cutlength) 
120     { 
121        $returnstr=''; 
122        $i=0; 
123        $n=0; 
124        $str_length=strlen($sourcestr);//字符串的字节数 
125        while (($n<$cutlength) and ($i<=$str_length)) 
126        { 
127           $temp_str=substr($sourcestr,$i,1); 
128           $ascnum=Ord($temp_str);//得到字符串中第$i位字符的ascii码 
129           if ($ascnum>=224)    //如果ASCII位高与224，
130           { 
131              $returnstr=$returnstr.substr($sourcestr,$i,3); //根据UTF-8编码规范，将3个连续的字符计为单个字符         
132              $i=$i+3;            //实际Byte计为3
133              $n++;            //字串长度计1
134           }
135           elseif ($ascnum>=192) //如果ASCII位高与192，
136           { 
137              $returnstr=$returnstr.substr($sourcestr,$i,2); //根据UTF-8编码规范，将2个连续的字符计为单个字符 
138              $i=$i+2;            //实际Byte计为2
139              $n++;            //字串长度计1
140           }
141           elseif ($ascnum>=65 && $ascnum<=90) //如果是大写字母，
142           { 
143              $returnstr=$returnstr.substr($sourcestr,$i,1); 
144              $i=$i+1;            //实际的Byte数仍计1个
145              $n++;            //但考虑整体美观，大写字母计成一个高位字符
146           }
147           else                //其他情况下，包括小写字母和半角标点符号，
148           { 
149              $returnstr=$returnstr.substr($sourcestr,$i,1); 
150              $i=$i+1;            //实际的Byte数计1个
151              $n=$n+0.5;        //小写字母和半角标点等与半个高位字符宽...
152           } 
153        } 
154          
155          //if ($str_length>$cutlength){
156          //     $returnstr = $returnstr . "...";//超过长度时在尾处加上省略号
157           //}
158         return $returnstr;  
159     }

2个都是网上找的然后组合的。呵呵

使用：

echo self::truncate($str,146,'...',true,true);

好像这个也不错：http://code.google.com/p/cut-html-string/

不过我测试有问题，可能是测试问题。

posted on 2012-10-12 15:33 myx 阅读(1460) 评论(0) 收藏举报