博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
Python3.4 12306 2015年3月验证码识别
阅读量:6257 次
发布时间:2019-06-22

本文共 3696 字,大约阅读时间需要 12 分钟。

import sslimport jsonfrom PIL import Imageimport requestsimport reimport urllib.request as urllib2if hasattr(ssl, '_create_unverified_context'):    ssl.create_default_context = ssl._create_unverified_contextUA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.89 Safari/537.36"pic_url = "https://kyfw.12306.cn/otn/passcodeNew/getPassCodeNew?module=login&rand=sjrand&0.21191171556711197"def imgCut():   pic_obj = Image.open('./tmp.jpg')   box = (120, 0, 290, 25)   region = pic_obj.crop(box)   region.save('./text.jpg')def ocrApi():    filename = './text.jpg'    upload_pic_url = "http://cn.docs88.com/pdftowordupload2.php"    filename_tmp = filename.split('/')[-1]    headers_fake = {        'ccept': '*/*',        'Accept-Encoding': 'gzip, deflate',        'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6',        'Connection': 'keep-alive',        'Host': 'cn.docs88.com',        'Origin': 'http://cn.docs88.com',        'User-Agent': 'Mozilla/5.0 (KHTML, like Gecko) Chrome/41.0.2272.89',        'X-Requested-With': 'ShockwaveFlash/17.0.0.134',        }    para = {'Filename': filename_tmp,            'sourcename': filename_tmp,            'sourcelanguage': 'cn',            'desttype': 'txt',            'Upload': 'Submit Query',              }    upload_pic = requests.post(upload_pic_url, data=para, files={"Filedata" : open(filename, 'rb')}, headers=headers_fake)    text_result_url = 'http://cn.docs88.com/' + str(upload_pic.content)[5:-1]    text_result = urllib2.urlopen(text_result_url).read().decode()    return text_resultdef get_img():    resp = urllib2.urlopen(pic_url)    raw = resp.read()    with open('./tmp.jpg', 'wb') as fp:        fp.write(raw)    return Image.open('./tmp.jpg')def get_sub_img(im, x, y):    assert 0 <= x <= 3    assert 0 <= y <= 2    #WITH = HEIGHT = 68    left = 5 + (67 + 5) * x    top = 41 + (67 + 5) * y    right = left + 67    bottom = top + 67    return im.crop((left, top, right, bottom))def baidu_stu_lookup(im):    url = "http://stu.baidu.com/n/image?fr=html5&needRawImageUrl=true&id=WU_FILE_0&name=233.png&type=image%2Fpng&lastModifiedDate=Mon+Mar+16+2015+20%3A49%3A11+GMT%2B0800+(CST)&size="    im.save("./query_temp_img.png")    raw = open("./query_temp_img.png", 'rb').read()    url = url + str(len(raw))    req = urllib2.Request(url, raw, {'Content-Type': 'image/png', 'User-Agent': UA})    resp_url = urllib2.urlopen(req).read()    url = "http://stu.baidu.com/n/searchpc?

queryImageUrl=" + urllib2.quote(resp_url) req = urllib2.Request(url, headers={'User-Agent': UA}) resp = urllib2.urlopen(req) html = resp.read().decode() return baidu_stu_html_extract(html) def baidu_stu_html_extract(html): pattern = re.compile(r"keywords:'(.*?)'") matches = pattern.findall(html) if not matches: return '[UNKOWN]' json_str = matches[0] json_str = json_str.replace('\\x22', '"').replace('\\\\', '\\') result = [item['keyword'] for item in json.loads(json_str)] return '|'.join(result) if result else '[UNKOWN]' if __name__ == '__main__': im = get_img() imgCut() captcha_text = ocrApi() print(captcha_text) dic_list = {} count = 0 for y in range(2): for x in range(4): count += 1 im2 = get_sub_img(im, x, y) result = baidu_stu_lookup(im2) dic_list[count] = result print((y, x), result) if captcha_text.strip(): print('\n可能的结果是:') maybe_result = [] for v in dic_list: for c in range(len(captcha_text.strip())): text = (captcha_text)[c] if text in dic_list[v]: _str_res = '%s --- %s' % (v, dic_list[v]) maybe_result.append(_str_res) for r in list(set(maybe_result)): print(r) else: print('False')

改自 https://gist.github.com/Evi1m0/fbbdb1ba7c66cc4e1bb2

转载请注明作者与出处:http://blog.csdn.net/u013511642   王小涛_同學

 
你可能感兴趣的文章
日志挖掘(logminer)
查看>>
LaTeX技巧005:定制自己炫酷的章节样式实例
查看>>
LeetCode解题思路:27. Remove Element
查看>>
CCF NOI1138 高精度加法
查看>>
构造函数私有方法和公有方法
查看>>
JS原型与原型链终极详解
查看>>
win7 下配置Openssl
查看>>
Android中Handler的使用方法——在子线程中更新界面
查看>>
1_NAT模式和桥接模式下的网络配置
查看>>
netcore webapi帮助文档设置
查看>>
springcloud~配置中心的使用
查看>>
EF架构~为EF DbContext生成的实体添加注释(T5模板应用)
查看>>
认识flask框架
查看>>
7. 类的继承
查看>>
npm
查看>>
【转】VLAN原理详解
查看>>
django和apache交互的wsgi分析
查看>>
python --- json模块和pickle模块详解
查看>>
说说一道实在很多陷阱的题
查看>>
EM算法
查看>>