Denua 博客

PIL 识别简单验证码

发布时间: 2017-12-01 22:53   分类 : Python    标签: Python 图像识别 浏览: 1191   

只使用了 PIL (Python Image Lib), 直接放代码, 具体细节查看知乎, Python识别简单验证码

```

coding=utf-8

/usr/bin/Python

from future import division, print_function import os from PIL import Image

import re

H = 12 # 切片高 W = 8 # 切片宽 ST = (7, 7) # 左边开始位置, 顶部开始位置 PIECE = 5 # 切片 PAD = 1 # 间隔

END_Y = ST[1] + H # 底部 PAD_X = W + PAD # 偏移

DEBUG_LEAVE = False

''' LEAVE False: not detail LEAVE 3: LEAVE 5: all detail '''

EACH_RECT = ( (ST[0] + PAD_X * 0, ST[1], ST[0] + PAD_X * 0 + W, END_Y), # 左边距 上边距 右边距 下边距 (ST[0] + PAD_X * 1, ST[1], ST[0] + PAD_X * 1 + W, END_Y), (ST[0] + PAD_X * 2, ST[1], ST[0] + PAD_X * 2 + W, END_Y), (ST[0] + PAD_X * 3, ST[1], ST[0] + PAD_X * 3 + W, END_Y), (ST[0] + PAD_X * 4, ST[1], ST[0] + PAD_X * 4 + W, END_Y) )

def get_cut(file_name):

img = Image.open(file_name)
img = img.convert('L')
cut_img = []
# 每个数字的范围
rct = EACH_RECT
# 转换图片
bin_img = get_bin(img)
# bin_img.crop(rct[0]).show()
# pr(bin_img.crop(rct[0]))
# 将图片分割为CAP部分
for part in range(PIECE):
    debug('\n', rct[part])
    if DEBUG_LEAVE >= 5:
        bin_img.crop(rct[part]).save(part.__str__() + '.gif', 'gif')
    cut_img.append(bin_img.crop(rct[part]))
return cut_img

去除图片噪点并转为黑白

def get_bin(img):

bin_img = Image.new('L', img.size, 255)     # 新图片 模式 L灰度 大小  色彩深度
cvt_img = img.convert("L")      # 转换模式为灰度图
for x in range(img.size[1]):
    for y in range(img.size[0]):
        pix = cvt_img.getpixel((y, x))
        if pix < 110:   # 去噪阈值过滤灰度大于 110 的像素点
            bin_img.putpixel((y, x), 0)     # 填充新图片
        else:
            bin_img.putpixel((y, x), 255)
return bin_img

def pr(img):

if img.mode != 'L':
    img = img.convert('L')

size = img.size
for x in range(size[1]):
    for y in range(size[0]):
        px = img.getpixel((y, x))
        if px == 255:
            print ('-',)
        else:
            print ('@',)
    print()
print ('━' * 50)

去干扰

def denoise(img):

cvt_im = Image.new('L', img.size, 255)
img = img.convert('L')
s = img.size
for x in range(s[1]):
    for y in range(s[0]):
        if x == 0 or x == s[1]-1 or y == 0 or y == s[0]-1:
            cvt_im.putpixel((y, x), 255)
        else:
            if img.getpixel((y, x)) == 255:
                cvt_im.putpixel((y, x), 255)
            else:
                cvt_im.putpixel((y, x), 0)
return cvt_im

转换为矢量

def get_vector(img):

img = img.convert('L')
d1 = []
for i in range(H):
    for j in range(W):
        d1.append(0)
        index = i * W + j
        if img.getpixel((j, i)) == 255:
            d1[index] = 1
        else:
            d1[index] = 0
return d1

获取列表最大数

def getmax(list): li = list max = -1 index = 0 for i in range(len(li)): if max < li[i]: max = li[i] index = i return index

def getmini(list): li = list mini = 0 index = 0 for i in range(len(li)): if mini > li[i]: mini = li[i] index = i return index

def prbin(i):

for o in range(H):
    for j in range(W):
        ind = o * W + j
        if i[ind] == 1:
            print ('. ', end='')
        else:
            print('# ', end='')
    print()
print()

def get_dict(path, deep):

dict_ = {}
for i in range(10):
    count = 0
    temp = []
    imDir = (''.join([path, '\\', str(i)]))
    debug ('\n', imDir)
    for im in os.listdir(imDir):
        img = ''.join([path, '\\', str(i), '\\', im])
        img = Image.open(img)
        temp.append(get_vector(denoise(img)))
        count += 1
        if count >= deep:
            break
    dict_[i] = temp
    # print 'Finished: ', i
return dict_

def createDict(labeledPath, savePath, dictDeepness):

fil = open(savePath, 'w')
d = get_dict(labeledPath, dictDeepness).values()

fil.write('val = \\\n')
fil.write(d.__str__())

fil.close()

识别每个图片块的内容

def get_result(src_img, labeled):

guess = [-1, []]
src_img = src_img.convert('L')
# 将图片转换为二值列表
srcimgbin = get_vector(src_img)
# 用于储存每个对比数字的相似率
simalist = []
for i in range(H):
    simalist.append(0)
# 与每个数字对比
for cp_numb in range(W):

    comp = labeled[cp_numb]
    cp_count = len(labeled[cp_numb])

    for each in comp:
        xy = 0
        yx = 0
        # 纵向扫描对比 并统计相同
        for x in range(W):
            for y in range(H):
                # 如果像素点一样
                if each[x * y + y] == srcimgbin[x * y + y] == 1:
                    xy += 1
                else:
                    if each[x * y + y] != srcimgbin[x * y + y]:
                        xy -= 1
        # 横向扫描对比
        for y in range(H):
            for x in range(W):
                if each[y * x + x] == srcimgbin[y * x + x] == 1:
                    yx += 1
                else:
                    if each[y * x + x] != srcimgbin[y * x + x]:
                        yx -= 1
        # 得出单张图片相似率
        sima = (xy + yx) / 280
        # 统计该数相似率
        simalist[cp_numb] += sima
    # 求于个数平均值
    simalist[cp_numb] /= cp_count
    debug('\n', simalist[cp_numb])
    # 得到相似度最高的数字
guess[0] = getmax(simalist)  # max(simalist)
guess[1] = simalist

return guess

def scan(img, dict_val):

import random

global DEBUG_LEAVE

try:
    dictionary = dict_val
except:
    print ('dict Exception')
    return
# 得到只有黑白值的图片分割块
cut_img = get_cut(img)
result = ''
# 识别每个图片块的内容
for im in cut_img:
    if DEBUG_LEAVE >= 3:
        prbin(get_vector(im))
    # 将识别的结果连接
    res = str(get_result(denoise(im), dictionary)[0])
    # im.save(res + '\\' + str(random.randint(0, 2000)) + '.gif')
    # get_bin(denoise(img))
    result = result + str(res)
return result

def detect(imFile, dict):

import time
for x in os.listdir(imFile):
    image = ''.join([imFile, '\\', x])
    t = time.time()
    capRes = scan(image, dict)
    t = time.time() - t
    print (t, capRes)
    newName = ''.join([imFile, capRes, '.gif'])
    try:
        os.rename(image, newName)
    except WindowsError, Exception :
        os.rename(image, newName.join('0'))

def make_piece(samplePath = 'img\', pieceSavePath = 'classify\'):

import random
if not os.path.isdir(samplePath):
    os.makedirs(samplePath)

for labeledPath in range(10):
    labeledPath = ''.join([pieceSavePath, str(labeledPath)])
    if not os.path.isdir(labeledPath):
        os.makedirs(labeledPath)

for sample in os.listdir(samplePath):

    sample = ''.join([samplePath, sample])
    if os.path.isfile(sample):
        for piece_ in get_cut(sample):
            pieceName = ''.join([pieceSavePath, random.randint(0, 9999).__str__(), '.gif'])
            piece_.save(pieceName, 'gif')
            piece_.close()

def debug(end = '\n', *args):

if not DEBUG_LEAVE:
    return

for x in args:

    print(x, end=end)

def cutPiece(Height_, Width_, ST_, Piece_, Pad_, samplePath_ = '\img', pieceSavePath_ = 'classify\'):

global  H
global W
global ST
global PIECE
global PAD

H = Height_
W = Width_
ST = ST_
PIECE = Piece_
PAD = Pad_

global  END_Y
global PAD_X
global EACH_RECT

END_Y = ST[1] + H
PAD_X = W + PAD

EACH_RECT = (
    (ST[0] + PAD_X * 0, ST[1], ST[0] + PAD_X * 0 + W, END_Y),  # 左边距   上边距     右边距     下边距
    (ST[0] + PAD_X * 1, ST[1], ST[0] + PAD_X * 1 + W, END_Y),
    (ST[0] + PAD_X * 2, ST[1], ST[0] + PAD_X * 2 + W, END_Y),
    (ST[0] + PAD_X * 3, ST[1], ST[0] + PAD_X * 3 + W, END_Y),
    (ST[0] + PAD_X * 4, ST[1], ST[0] + PAD_X * 4 + W, END_Y)
)

make_piece(samplePath_, pieceSavePath_)

if name == 'main':

import login_x as cap_dict

# print (read_dict('login.dict'))
# dictPath = 'labeled.dict'
#
# print scan('7.gif', dictPath)
# count = 0
# for im in os.listdir('img\\'):
#     count += 1
#     print scan('img\\' + im, dictPath)
#
# createDict('classify', 'login_x.py', 10)
# detect('img\\', cap_dict.val)
# make_piece()
print('', '\nResult: ', scan('chk.gif', cap_dict.val), '\n')

```

评论    

Copyright denua denua.cn