获取关于图片类文字

# from aip import AipOcr
# import requests
# import re
# import os
# from decimal import Decimal#浮点数精度计算
# from lxml import html#此处直接引入etree报错是因为版本问题,换个方式引入
# etree = html.etree#引入etree方法
# from string import punctuation
# # """ 你的 APPID AK SK """
# APP_ID = '23597797'
# API_KEY = 'Va3onwymweV9htshK13GiNUs'
# SECRET_KEY = 'FiAd8gWb489uDD2yUI7Y1iKaxQUOwqwM'
# client = AipOcr(APP_ID, API_KEY, SECRET_KEY)
# 
# headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4356.6 Safari/537.36'}
# source= requests.get('http://tj.ziroom.com/x/777120723.html',headers=headers).text
# # print(source)
# base=etree.HTML(source).xpath('/html/body/section/aside/div[1]/i/@style')
# px=[]
# urls1=''
# for i in base:
#     a='background-position:-(.*?)px;background-image: url\((.*?)\);'
#     demo = re.compile(a)  # 将用正则取出的字符串编译为字节代码。
#     lists = demo.findall(i)
#     px.append(lists[0][0])
#     urls1='http:'+lists[0][1]#验证图片链接
# print(px,urls1)
# #获取超链接图片打码
# url = urls1
# """ 调用通用文字识别, 图片参数为远程url图片 """
# client.basicGeneralUrl(url);
# """ 如果有可选参数 """
# options = {}
# options["language_type"] = "CHN_ENG"
# options["detect_direction"] = "true"
# options["detect_language"] = "true"
# options["probability"] = "true"
# """ 带参数调用通用文字识别, 图片参数为远程url图片 """
# base=client.basicGeneralUrl(url, options)
# print(base)
# dicts={}
# n=0
# b=31.24
# for i in range(10):#生成以图片位置坐标与数字下标对应的键值对
#     dicts[str(n)]=i
#     n=round(n+b,2)#浮点型数据保留两位小数
# print(dicts)
# index=[dicts[x] for x in px]#找出与图片获取到的位置相对应的下标
# print(index)
# yzm=base['words_result'][0]['words']#打码出的内容
# jg=[yzm[x] for x in index ]#通过下标找出打码出的内容中对应的数
# jg=''.join(jg)#合并为字符串
# print(yzm)
# print(jg)

 

posted @ 2021-01-27 15:40  秋叶落日  阅读(99)  评论(0编辑  收藏  举报