验证码识别的总结

我目前工作中对接的目标网站都采用的大多是文本识别类型的验证码,类似于这种




还有微信二维码动态获取的

  • 前两种验证码可以使用像素点比对的方式,将样本图片切割成单个字符的图片,然后灰度话,二值化,去噪点,可以得到白底黑字的样本,使用python科学计算库numpy将图片转换成一个0和1组成的矩阵np.array(img),最后可以把每个字符样本以(识别值:矩阵值)的形式保存到一个python文件中,训练出大量的样本数据保存为矩阵值,识别的时候将需要识别的图片切割,按同样的方式转化成矩阵值,然后和样本库一一比对sum(imap(lambda x, y: bin(x ^ y).count(‘1’), l1, l2)),差异最少的那个矩阵对应的值作为识别值,这种识别方式比较简单粗暴,所以就不贴详细过程了。

  • 第三种中文识别一开始我还是采用的上面那种方式识别,效果也不错,不过就是存在一些问题,容易把两个形状相近的字体搞错,比如把陆和除,叁和乘,加和四搞反,但也基本满足生产需求,平均正确率70%,为了提高正确率,我找到了神经网络来训练的方法,卷积神经网络,先后尝试了k近邻算法。
    使用谷歌的tensorflow框架就可以搭建卷积网络模型

    cnn
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    148
    149
    150
    151
    152
    153
    154
    155
    156
    157
    158
    159
    160
    161
    162
    163
    164
    165
    166
    167
    168
    169
    170
    171
    172
    173
    174
    175
    176
    177
    178
    179
    180
    181
    182
    183
    184
    185
    186
    187
    188
    189
    190
    191
    192
    193
    194
    195
    196
    197
    198
    199
    200
    201
    202
    203
    204
    205
    206
    207
    208
    209
    210
    211
    212
    213
    214
    215
    216
    217
    218
    219
    220
    221
    222
    223
    224
    225
    226
    227
    228
    229
    230
    231
    232
    233
    234
    235
    236
    237
    238
    239
    240
    241
    242
    243
    244
    245
    246
    247
    248
    249
    250
    251
    252
    253
    254
    255
    256
    257
    258
    259
    260
    261
    262
    263
    264
    265
    266
    267
    268
    269
    270
    271
    272
    273
    274
    275
    276
    277
    278
    279
    280
    281
    282
    283
    284
    285
    286
    287
    288
    289
    290
    291
    292
    293
    294
    295
    296
    297
    298
    299
    300
    301
    302
    303
    304
    305
    306
    307
    308
    309
    310
    311
    312
    313
    314
    315
    316
    317
    318
    319
    320
    321
    322
    323
    324
    #coding=utf-8

    import os
    #图像读取库
    from PIL import Image
    from PIL import ImageEnhance
    from PIL import ImageFilter
    #矩阵运算库
    import numpy as np
    import tensorflow as tf



    # 数据文件夹
    data_dir = "data"
    test_data_dir = "test_data"
    # 训练还是测试
    train = True
    # 模型文件路径
    model_path = "model/image_model"

    char_to_digit = ["零","壹","贰","叁","肆","伍","陆","柒","捌","玖","拾","一","二","三","四","五","六","七","八","九","加","减","乘","除"]


    def max_dif(x, y, z):
    '''计算三个值之间最大的差值'''
    max = min = x
    if y > max:
    max = y
    else:
    min = y
    if z > max:
    max = z
    else:
    min = z
    return max - min

    def binar_by_rgb(img):
    '''根据rgb二值化'''
    w, h = img.size
    for y in range(h):
    for x in range(w):
    r, g, b = img.getpixel((x, y))
    a_list = sorted([r, g, b])
    diff = max_dif(r, g, b)
    # if diff < 16 and a_list[2]<128:
    # img.putpixel((x,y),(255,255,255))

    if diff < 16 and a_list[0] > 128 and a_list[2] < 255:
    img.putpixel((x, y), (0, 0, 0))

    # if diff >32 and a_list[0] >224 :
    # img.putpixel((x, y), (0, 0, 0))
    # else:
    # img.putpixel((x, y), (0, 0, 0))
    # print_img(self.img)
    # import pdb
    # pdb.set_trace()
    return img

    def graying(img):
    u"""灰度化."""
    img = img.convert("L")
    return img

    def remove_boader(img):
    """去除边框上的噪点"""
    w, h = img.size
    for x in range(w):
    img.putpixel((x, 0), 255)
    img.putpixel((x, 1), 255)
    img.putpixel((x, h-1), 255)
    img.putpixel((x, h-2), 255)
    for y in range(h):
    img.putpixel((0, y), 255)
    img.putpixel((1, y), 255)
    img.putpixel((w-1, y), 255)
    img.putpixel((w - 2, y), 255)
    return img

    def filter_line(img, v):
    """分解斜线为孤点"""
    w, h = img.size
    black_point = 1
    white_point = 0
    for x in range(v, w - v):
    for y in range(v, h - v):
    mid_pixel = img.getpixel((x, y)) # 中央像素点像素值
    if mid_pixel == 0: # 找出上下左右右下五个像素点像素值
    top_pixel = img.getpixel((x, y-v))
    left_pixel = img.getpixel((x-v, y))
    down_pixel = img.getpixel((x, y+v))
    right_pixel = img.getpixel((x+v, y))
    right_down_pixel = img.getpixel((x+v, y+v))
    right_top_pixel = img.getpixel((x + v, y - v))
    left_down_pixel = img.getpixel((x - 1, y + 1))
    left_top_pixel = img.getpixel((x - 1, y - 1))

    # 如果一个点周围九宫格内只有
    if top_pixel == 255: # 上
    white_point += 1
    if left_pixel == 255: # 左
    white_point += 1
    if down_pixel == 255: # 下
    white_point += 1
    if right_pixel == 255: # 右
    white_point += 1
    if right_down_pixel == 0: # 右下
    black_point += 1
    elif right_top_pixel == 0: # 右上
    black_point += 1
    elif left_down_pixel == 0: # 左下
    black_point += 1
    elif left_top_pixel == 0: # 左上
    black_point += 1
    if black_point >= 2 and white_point >= 3:
    img.putpixel((x, y), 255)
    # print black_point
    black_point = 1
    white_point = 0

    return img

    def filter_line_pro(img, v):
    """去除孤点"""
    w, h = img.size
    black_point = 1
    for x in range(v, w - v):
    for y in range(v, h - v):
    mid_pixel = img.getpixel((x, y)) # 中央像素点像素值
    if mid_pixel == 0: # 找出上下左右四个方向像素点像素值
    top_pixel = img.getpixel((x, y - v))
    left_pixel = img.getpixel((x - v, y))
    down_pixel = img.getpixel((x, y + v))
    right_pixel = img.getpixel((x + v, y))
    right_down_pixel = img.getpixel((x + v, y + v))
    right_top_pixel = img.getpixel((x+v, y-v))
    left_down_pixel = img.getpixel((x-1, y+1))
    left_top_pixel = img.getpixel((x-1, y-1))

    # 判断上下左右的黑色像素点总个数
    if top_pixel == 0:
    black_point += 1
    if left_pixel == 0:
    black_point += 1
    if down_pixel == 0:
    black_point += 1
    if right_pixel == 0:
    black_point += 1
    if right_down_pixel == 0:
    black_point += 1
    if right_top_pixel == 0:
    black_point += 1
    if left_down_pixel == 0:
    black_point += 1
    if left_top_pixel == 0:
    black_point += 1
    if black_point <= 2: # 以x,y为中心的九个像素点中黑色像素点个数为1 or 2,这种情况认为该点为噪点
    img.putpixel((x, y), 255)
    # print black_point
    black_point = 1
    return img

    def binarization(img):
    u"""二值化, 白底黑字."""
    w, h = img.size
    for y in range(h):
    for x in range(w):
    color = img.getpixel((x, y))
    if color >= 224:
    img.putpixel((x, y), 255)
    else:
    img.putpixel((x, y), 0)
    # print_img(img)
    # import pdb
    # print_img(img)
    # pdb.set_trace()
    img = remove_boader(img)
    # print_img(img)
    # pdb.set_trace()
    img = filter_line(img, 1)
    # print_img(img)
    # pdb.set_trace()
    img = filter_line_pro(img, 1)

    return img

    # 从文件夹读取图片和标签到numpy数组中
    # 标签信息在文件名中,将标签(文件名第一个字符映射为一个数字),存到标签数组
    def read_data(data_dir):
    datas = []
    labels = []
    fpaths = []
    for fname in os.listdir(data_dir):
    if fname.split("_")[0] not in char_to_digit:
    continue # 非法标签,去掉
    fpath = os.path.join(data_dir, fname)
    fpaths.append(fpath)
    image = Image.open(fpath)
    image = ImageEnhance.Contrast(image).enhance(2.0)
    image = image.filter(ImageFilter.EDGE_ENHANCE_MORE)
    image = binar_by_rgb(image)
    image = graying(image)
    image = binarization(image)
    image.save("data/preprocessed_data/{}".format(fname))

    data = np.array(image)
    # data = np.array(image) / 255.0
    # data = np.array(image.convert("L")) / 255.0

    label = int(char_to_digit.index(fname.split("_")[0]))
    datas.append(data.reshape(30, 26, 1))
    # datas.append(data)
    labels.append(label)

    datas = np.array(datas)
    labels = np.array(labels)

    print("shape of datas: {}\tshape of labels: {}".format(datas.shape, labels.shape))
    return fpaths, datas, labels

    fpaths, datas, labels = read_data(data_dir)
    test_fpath, test_datas, test_labels = read_data(test_data_dir)
    data_len = datas.shape[0]

    # 计算有多少类图片
    num_classes = len(set(labels))


    # 定义Placeholder,存放输入和标签 图片样本大小为26*30*1
    # datas_placeholder = tf.placeholder(tf.float32, [None, 30, 26, 1])
    datas_placeholder = tf.placeholder(tf.float32, [None, 30, 26, 1])
    labels_placeholder = tf.placeholder(tf.int32, [None])

    # 存放DropOut参数的容器,训练时为0.25,测试时为0
    dropout_placeholdr = tf.placeholder(tf.float32)

    # 定义卷积层, 25个卷积核, 卷积核大小为1,用Relu激活
    conv0 = tf.layers.conv2d(datas_placeholder, 25, 1, activation=tf.nn.relu)
    # 定义max-pooling层,pooling窗口为2x2,步长为2x2
    pool0 = tf.layers.max_pooling2d(conv0, [2, 2], [2, 2])

    # 定义卷积层, 40个卷积核, 卷积核大小为1,用Relu激活
    conv1 = tf.layers.conv2d(conv0, 40, 1, activation=tf.nn.relu)
    # 定义max-pooling层,pooling窗口为2x2,步长为2x2
    pool1 = tf.layers.max_pooling2d(conv1, [2, 2], [2, 2])

    # 将3维特征转换为1维向量
    flatten = tf.layers.flatten(conv1)

    # 全连接层,转换为长度为100的特征向量
    fc = tf.layers.dense(flatten, 100, activation=tf.nn.relu)

    # 加上DropOut,防止过拟合
    dropout_fc = tf.layers.dropout(fc, dropout_placeholdr)

    # 未激活的输出层
    logits = tf.layers.dense(dropout_fc, num_classes)
    # logits = tf.layers.dense(fc, num_classes)

    predicted_labels = tf.argmax(logits, 1)


    # 利用交叉熵定义损失
    losses = tf.nn.softmax_cross_entropy_with_logits(
    labels=tf.one_hot(labels_placeholder, num_classes),
    logits=logits
    )
    # 平均损失
    mean_loss = tf.reduce_mean(losses)

    # 定义优化器,指定要优化的损失函数
    optimizer = tf.train.AdamOptimizer(learning_rate=1e-2).minimize(losses)


    # 用于保存和载入模型
    saver = tf.train.Saver()

    with tf.Session() as sess:

    if train:
    print("训练模式")
    # 如果是训练,初始化参数
    sess.run(tf.global_variables_initializer())
    # 定义输入和Label以填充容器,训练时dropout为0.25
    train_feed_dict = {
    datas_placeholder: datas,
    labels_placeholder: labels,
    dropout_placeholdr: 0.25
    }
    for step in range(150):
    _, mean_loss_val = sess.run([optimizer, mean_loss], feed_dict=train_feed_dict)

    if step % 10 == 0:
    print("step = {}\tmean loss = {}".format(step, mean_loss_val))
    saver.save(sess, model_path)
    print("训练结束,保存模型到{}".format(model_path))
    else:
    print("测试模式")
    # 如果是测试,载入参数
    saver.restore(sess, model_path)
    print("从{}载入模型".format(model_path))
    label_name_dict = {k: v for k,v in enumerate(char_to_digit)}
    # 定义输入和Label以填充容器,测试时dropout为0
    test_feed_dict = {
    datas_placeholder: test_datas,
    labels_placeholder: test_labels,
    dropout_placeholdr: 0
    }
    predicted_labels_val = sess.run(predicted_labels, feed_dict=test_feed_dict)
    # 真实label与模型预测label
    err_count = 0
    err_pred = []
    for fpath, real_label, predicted_label in zip(test_fpath, test_labels, predicted_labels_val):
    # 将label id转换为label名
    real_label_name = label_name_dict[real_label]
    predicted_label_name = label_name_dict[predicted_label]
    if real_label_name != predicted_label_name:
    err_count += 1
    err_pred.append((real_label_name,predicted_label_name))
    print("{}\t{} => {}".format(fpath, real_label_name, predicted_label_name))
    print(err_count)
    print(err_pred)

k近邻也是用tensorflow实现的

knn
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
import numpy as np
import tensorflow as tf
import tensorflow.examples.tutorials.mnist.input_data as input_data

import os
#图像读取库
from PIL import Image
from PIL import ImageEnhance
from PIL import ImageFilter
#矩阵运算库
import numpy as np
import tensorflow as tf



# 数据文件夹
data_dir = "data"
test_data_dir = "test_data"
# 训练还是测试
train = True
# 模型文件路径
model_path = "model/image_model"

char_to_digit = ["零","壹","贰","叁","肆","伍","陆","柒","捌","玖","拾","一","二","三","四","五","六","七","八","九","加","减","乘","除"]


def max_dif(x, y, z):
'''计算三个值之间最大的差值'''
max = min = x
if y > max:
max = y
else:
min = y
if z > max:
max = z
else:
min = z
return max - min

def binar_by_rgb(img):
'''根据rgb二值化'''
w, h = img.size
for y in range(h):
for x in range(w):
r, g, b = img.getpixel((x, y))
a_list = sorted([r, g, b])
diff = max_dif(r, g, b)
# if diff < 16 and a_list[2]<128:
# img.putpixel((x,y),(255,255,255))

if diff < 16 and a_list[0] > 128 and a_list[2] < 255:
img.putpixel((x, y), (0, 0, 0))

# if diff >32 and a_list[0] >224 :
# img.putpixel((x, y), (0, 0, 0))
# else:
# img.putpixel((x, y), (0, 0, 0))
# print_img(self.img)
# import pdb
# pdb.set_trace()
return img

def graying(img):
u"""灰度化."""
img = img.convert("L")
return img

def remove_boader(img):
"""去除边框上的噪点"""
w, h = img.size
for x in range(w):
img.putpixel((x, 0), 255)
img.putpixel((x, 1), 255)
img.putpixel((x, h-1), 255)
img.putpixel((x, h-2), 255)
for y in range(h):
img.putpixel((0, y), 255)
img.putpixel((1, y), 255)
img.putpixel((w-1, y), 255)
img.putpixel((w - 2, y), 255)
return img

def filter_line(img, v):
"""分解斜线为孤点"""
w, h = img.size
black_point = 1
white_point = 0
for x in range(v, w - v):
for y in range(v, h - v):
mid_pixel = img.getpixel((x, y)) # 中央像素点像素值
if mid_pixel == 0: # 找出上下左右右下五个像素点像素值
top_pixel = img.getpixel((x, y-v))
left_pixel = img.getpixel((x-v, y))
down_pixel = img.getpixel((x, y+v))
right_pixel = img.getpixel((x+v, y))
right_down_pixel = img.getpixel((x+v, y+v))
right_top_pixel = img.getpixel((x + v, y - v))
left_down_pixel = img.getpixel((x - 1, y + 1))
left_top_pixel = img.getpixel((x - 1, y - 1))

# 如果一个点周围九宫格内只有
if top_pixel == 255: # 上
white_point += 1
if left_pixel == 255: # 左
white_point += 1
if down_pixel == 255: # 下
white_point += 1
if right_pixel == 255: # 右
white_point += 1
if right_down_pixel == 0: # 右下
black_point += 1
elif right_top_pixel == 0: # 右上
black_point += 1
elif left_down_pixel == 0: # 左下
black_point += 1
elif left_top_pixel == 0: # 左上
black_point += 1
if black_point >= 2 and white_point >= 3:
img.putpixel((x, y), 255)
# print black_point
black_point = 1
white_point = 0

return img

def filter_line_pro(img, v):
"""去除孤点"""
w, h = img.size
black_point = 1
for x in range(v, w - v):
for y in range(v, h - v):
mid_pixel = img.getpixel((x, y)) # 中央像素点像素值
if mid_pixel == 0: # 找出上下左右四个方向像素点像素值
top_pixel = img.getpixel((x, y - v))
left_pixel = img.getpixel((x - v, y))
down_pixel = img.getpixel((x, y + v))
right_pixel = img.getpixel((x + v, y))
right_down_pixel = img.getpixel((x + v, y + v))
right_top_pixel = img.getpixel((x+v, y-v))
left_down_pixel = img.getpixel((x-1, y+1))
left_top_pixel = img.getpixel((x-1, y-1))

# 判断上下左右的黑色像素点总个数
if top_pixel == 0:
black_point += 1
if left_pixel == 0:
black_point += 1
if down_pixel == 0:
black_point += 1
if right_pixel == 0:
black_point += 1
if right_down_pixel == 0:
black_point += 1
if right_top_pixel == 0:
black_point += 1
if left_down_pixel == 0:
black_point += 1
if left_top_pixel == 0:
black_point += 1
if black_point <= 2: # 以x,y为中心的九个像素点中黑色像素点个数为1 or 2,这种情况认为该点为噪点
img.putpixel((x, y), 255)
# print black_point
black_point = 1
return img

def binarization(img):
u"""二值化, 白底黑字."""
w, h = img.size
for y in range(h):
for x in range(w):
color = img.getpixel((x, y))
if color >= 224:
img.putpixel((x, y), 255)
else:
img.putpixel((x, y), 0)
# print_img(img)
# import pdb
# print_img(img)
# pdb.set_trace()
img = remove_boader(img)
# print_img(img)
# pdb.set_trace()
img = filter_line(img, 1)
# print_img(img)
# pdb.set_trace()
img = filter_line_pro(img, 1)
# print_img(img)
# pdb.set_trace()
# img = self.filter_line(img, 1)
# img = self.filter_line_pro(img,1)
# img = self.filter_line(img,1)
# self.pIx(img)
# print_img(img)
# pdb.set_trace()

return img

# 从文件夹读取图片和标签到numpy数组中
# 标签信息在文件名中,将标签(文件名第一个字符映射为一个数字),存到标签数组
def read_data(data_dir):
datas = []
labels = []
fpaths = []
for fname in os.listdir(data_dir):
if fname.split("_")[0] not in char_to_digit:
continue # 非法标签,去掉
fpath = os.path.join(data_dir, fname)
fpaths.append(fpath)
image = Image.open(fpath)
image = ImageEnhance.Contrast(image).enhance(2.0)
image = image.filter(ImageFilter.EDGE_ENHANCE_MORE)
image = binar_by_rgb(image)
image = graying(image)
image = binarization(image)
image.save("data/preprocessed_data/{}".format(fname))

data = np.array(image)
# data = np.array(image) / 255.0
# data = np.array(image.convert("L")) / 255.0

label = int(char_to_digit.index(fname.split("_")[0]))
datas.append(data.flatten())
# datas.append(data)
tmp1 = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ]
tmp1[label] = 1
labels.append(tmp1)

datas = np.array(datas)
labels = np.array(labels)

print("shape of datas: {}\tshape of labels: {}".format(datas.shape, labels.shape))
return fpaths, datas, labels


fpaths,train_X, train_Y = read_data(data_dir)

test_fpath, test_X, test_Y = read_data(test_data_dir)

import pdb
pdb.set_trace()

tra_X = tf.placeholder("float", [None, 780])
te_X = tf.placeholder("float", [780])

# 使用L1计算近邻距离
distance = tf.reduce_sum(tf.abs(tf.add(tra_X, tf.negative(te_X))), reduction_indices=1)
# 预测:获取最近的样本点的索引
pred = tf.arg_min(distance, 0)

accuracy = 0.

# 初始化变量
init = tf.initialize_all_variables()

# 开启会话
with tf.Session() as sess:
sess.run(init)

# 测试数据的循环
for i in range(len(test_X)):
# 获取最近邻居
nn_index = sess.run(pred, feed_dict={tra_X: train_X, te_X: test_X[i, :]})
# 获取最近邻样本的标签,并与真实样本标签进行比较
print("Test", i, "Prediction:", np.argmax(train_Y[nn_index]), \
"True Class:", np.argmax(test_Y[i]))
# 计算准确率
if np.argmax(train_Y[nn_index]) == np.argmax(test_Y[i]):
accuracy += 1. / len(test_X)
print("Done!")
print("Accuracy:", accuracy)

使用cnn和knn后,测试集正确率均能达90%以上

  • 第四种二维码的验证方式看着复杂,实则是最简单的,通过fiddler抓包手机扫描二维码后的请求,发现每一个账号都有其固定不变的一个值,在识别的时候带着那个值模拟请求一次对方的动态验证码接口,就可以拿到验证码
-------------本文结束感谢您的阅读-------------

本文标题:验证码识别的总结

文章作者:fengxi

发布时间:2018年12月18日 - 16:12

最后更新:2019年04月04日 - 21:04

原始链接:https://super-storm.github.io/2018/12/18/验证码识别的总结/

许可协议: 署名-非商业性使用-禁止演绎 4.0 国际 转载请保留原文链接及作者。