用vim 看文本,比如小说,特别是从网页上剪下来的文本,是一件很痛苦的事情。
由于vim在滚屏时,以段落为优先,所以看一大段大段的文本时,j后发现找不到看哪了- -
$linewidth=75为字符宽度,一个汉字算两个字符。
$tri_bi=3(如果是utf-8的文件的话,设为3;如果是cp936的话,设为2)
文件路径设的是针对linux的,在windows下有bug.
仅学习交流用,实际使用起来,不熟悉原理的话,更麻烦。
----------------------------------------下面是程序
###############################################
#! /usr/bin/perl
## "try_breakline"
##joneykk
######
####invoke method:try_breakline filename
####The filename is consisted of Chinese or English characters.The encoding should be utf-8.
use Encode;
$linewidth=75;
$tri_bi=2;
my $path=$ENV{'PWD'};
$name=$path."//".$ARGV[0];
open NEWF,">".$name.".klg" or die "can't open file";
while (<>)
{
 my $line = $_;
 while ($line =~/./)
 {
  $num = 0;
  $line2 = $line;
  $x=0;
  $buf=0;
  while ($line2 =~/./)
     {
#####This flag is to mark:there are only character <$linewidth=75,to avoid this part to be lost.
   $flag=0;
   $buf++;
   $line2 = $';
   if(ord($&)>=128)
   {
    $x++;
    if($x==$tri_bi)
    {
     $num+=2;
     $x=0;
    }
   }
   else
   {
    $num++;
   }
   
##################################
   if($num>=$linewidth && $x==0)
   {
    $flag=1;
##########################
###If there is a word in the wrap position, the wrap should be avoid.
    $char=substr($line,$buf-1,1);
    if($char =~/[a-zA-Z0-9]/)
    {
     while(substr($line,$buf-1,1)=~/[a-zA-Z0-9]/)
     {
      $buf--;
      $num--;
     }
    }
        
############################
    $line3=substr($line,0,$buf);
    $breaksign = &getsignlength($line3);
    if($buf-$breaksign<20)
    {
######break should occur at the sign.
     $newline=substr($line,0,$breaksign);
     print NEWF $newline,"/n";
     $line=substr($line,$breaksign);
     last;
       }
    else
    {
     $newline=substr($line,0,$buf);
     print NEWF $newline,"/n";
     $line=substr($line,$buf);
     last;
    }
       }
     }
  if(!$flag)
  {
   print NEWF $line;
   last;
  }
 }
print NEWF "/n";
}
close NEWF;
sub getsignlength
{
##the reture value is the length from the beginnig to the sign(included)
 my $string=$_[0];
 my $len=0;
 while($string =~/,|。|、|?/)
 {
  $len+=length($`.$&);
  $string=$';
 }
 return $len;
}
                    
                
                
            
        
浙公网安备 33010602011771号