#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
模块介绍:先抓取一定数目的原始验证码,接着对其进行二值去噪,根据去噪后的验证码进行切分,形成一个个单独的字符图片,
作为字模,当然字模得覆盖完全所有的字符。字模创建完成后,根据需要识别的验证码,同样对其进行二值去噪、切分之后,和字模
中的字符图片一一进行对比,对比方法是对比两个图片的每个像素,差距最小的则即为识别后的字符。
"""
import os
import Image
import urllib2
# 下载验证码用于提取字库,下载的原始验证码存放在pic目录中
def fetch_pic_code():
url='xxx'
for i in range(50):
file("pic/%d.jpg" %i,"wb").write(urllib2.urlopen(url).read())
# 根据原始验证码进行二值去噪,去噪后的验证码存储在pic_binary中
def binaryzation():
dir_pic="pic/"
for f in os.listdir(dir_pic):
img=Image.open(dir_pic+f)
img=img.convert("RGBA")
pixdata=img.load()
for y in xrange(img.size[1]):
for x in xrange(img.size[0]):
if pixdata[x,y][0]<90:
pixdata[x,y]=(0,0,0,255)
for y in xrange(img.size[1]):
for x in xrange(img.size[0]):
if pixdata[x,y][1]<136:
pixdata[x,y]=(0,0,0,255)
for y in xrange(img.size[1]):
for x in xrange(img.size[0]):
if pixdata[x,y][2]>0:
pixdata[x,y]=(255,255,255,255)
img.save("pic_binary/"+f)
# 根据二值化的验证码创建字模,存储num目录中
def make_matrix():
pic_origin="pic_binary/"
j=1
for f in os.listdir(pic_origin):
img=Image.open(pic_origin+f)
for i in range(4):
x=0+i*20
y=0
img.crop((x,y,x+20,y+28)).save("num/%d.jpg" %j)
print "j=",j
j+=1
#####################
# 二值化
def binary(f):
img = Image.open(f)
pixdata = img.load()
for y in xrange(img.size[1]):
for x in xrange(img.size[0]):
if pixdata[x, y][0] < 90:
pixdata[x, y] = (0, 0, 0, 255)
for y in xrange(img.size[1]):
for x in xrange(img.size[0]):
if pixdata[x, y][1] < 136:
pixdata[x, y] = (0, 0, 0, 255)
for y in xrange(img.size[1]):
for x in xrange(img.size[0]):
if pixdata[x, y][2] > 0:
pixdata[x, y] = (255, 255, 255, 255)
return img
def division(img):
font=[]
for i in range(4):
x=7+i*13
y=3
font.append(img.crop((x,y,x+9,y+13)))
return font
def recognize(img):
fontMods = []
for i in range(10):
fontMods.append((str(i), Image.open("./num/%d.bmp" % i)))
result=""
font=division(img)
for i in font:
target=i
points = []
for mod in fontMods:
diffs = 0
for yi in range(13):
for xi in range(9):
if mod[1].getpixel((xi, yi)) != target.getpixel((xi, yi)):
diffs += 1
points.append((diffs, mod[0]))
points.sort()
result += points[0][1]
return result
if __name__ == '__main__':
fetch_pic_code()
binaryzation()
make_matrix()
codedir="./code/"
for imgfile in os.listdir(codedir):
if imgfile.endswith(".jpg"):
dir_result="./result/"
img=binary(codedir+imgfile)
num=recognize(img)
dir_result += (num+".png")
print "save to", dir_result
img.save(dir_result)
|