爬虫23-验证码识别
1.tesseract
import pytesseract
from PIL import Image
pytesseract.pytesseract.tesseract_cmd=r"H:\Python\Tesseract_dev20170510\Tesseract-OCR\tesseract.exe"
image=Image.open('a.png')
text=pytesseract.image_to_string(image)
print(text)
2.识别拉勾网图形验证码
#encoding: utf-8
import pytesseract
from urllib import request
from PIL import Image
import time
def main():
pytesseract.pytesseract.tesseract_cmd = r"H:\Python\Tesseract_dev20170510\Tesseract-OCR\tesseract.exe"
url = "https://passport.lagou.com/vcode/create?from=register&refresh=1513082291955"
while True:
request.urlretrieve(url,'captcha.png')
image = Image.open('captcha.png')
text = pytesseract.image_to_string(image)
print(text)
time.sleep(2)
if __name__ == '__main__':
main()

浙公网安备 33010602011771号