python strip_tags 支持保留指定标签

#coding:utf-8

import re

def strip_tags(string, allowed_tags=''):
  if allowed_tags != '':
    # Get a list of all allowed tag names.
    allowed_tags = allowed_tags.split(',')
    allowed_tags_pattern = ['</?'+allowed_tag+'[^>]*>' for allowed_tag in allowed_tags]
    all_tags = re.findall(r'<[^>]+>', string, re.I)
    not_allowed_tags = []
    tmp = 0
    for tag in all_tags:
        for pattern in allowed_tags_pattern:
            rs = re.match(pattern,tag)
            if rs:
                tmp += 1
            else:
                tmp += 0
        if not tmp:
            not_allowed_tags.append(tag)
        tmp = 0
    for not_allowed_tag in not_allowed_tags:
        string = re.sub(re.escape(not_allowed_tag), '',string)
    print not_allowed_tags
  else:
    # If no allowed tags, remove all.
    string = re.sub(r'<[^>]*?>', '', string)
 
  return string

  

posted @ 2015-05-06 15:13  捕蛇者说  阅读(1405)  评论(0编辑  收藏  举报