我目前工作中对接的目标网站都采用的大多是文本识别类型的验证码,类似于这种
还有微信二维码动态获取的
前两种验证码可以使用像素点比对的方式,将样本图片切割成单个字符的图片,然后灰度话,二值化,去噪点,可以得到白底黑字的样本,使用python科学计算库numpy将图片转换成一个0和1组成的矩阵np.array(img),最后可以把每个字符样本以(识别值:矩阵值)的形式保存到一个python文件中,训练出大量的样本数据保存为矩阵值,识别的时候将需要识别的图片切割,按同样的方式转化成矩阵值,然后和样本库一一比对sum(imap(lambda x, y: bin(x ^ y).count(‘1’), l1, l2)),差异最少的那个矩阵对应的值作为识别值,这种识别方式比较简单粗暴,所以就不贴详细过程了。
第三种中文识别一开始我还是采用的上面那种方式识别,效果也不错,不过就是存在一些问题,容易把两个形状相近的字体搞错,比如把陆和除,叁和乘,加和四搞反,但也基本满足生产需求,平均正确率70%,为了提高正确率,我找到了神经网络来训练的方法,卷积神经网络,先后尝试了k近邻算法。
使用谷歌的tensorflow框架就可以搭建卷积网络模型cnn 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324#coding=utf-8
import os
#图像读取库
from PIL import Image
from PIL import ImageEnhance
from PIL import ImageFilter
#矩阵运算库
import numpy as np
import tensorflow as tf
# 数据文件夹
data_dir = "data"
test_data_dir = "test_data"
# 训练还是测试
train = True
# 模型文件路径
model_path = "model/image_model"
char_to_digit = ["零","壹","贰","叁","肆","伍","陆","柒","捌","玖","拾","一","二","三","四","五","六","七","八","九","加","减","乘","除"]
def max_dif(x, y, z):
'''计算三个值之间最大的差值'''
max = min = x
if y > max:
max = y
else:
min = y
if z > max:
max = z
else:
min = z
return max - min
def binar_by_rgb(img):
'''根据rgb二值化'''
w, h = img.size
for y in range(h):
for x in range(w):
r, g, b = img.getpixel((x, y))
a_list = sorted([r, g, b])
diff = max_dif(r, g, b)
# if diff < 16 and a_list[2]<128:
# img.putpixel((x,y),(255,255,255))
if diff < 16 and a_list[0] > 128 and a_list[2] < 255:
img.putpixel((x, y), (0, 0, 0))
# if diff >32 and a_list[0] >224 :
# img.putpixel((x, y), (0, 0, 0))
# else:
# img.putpixel((x, y), (0, 0, 0))
# print_img(self.img)
# import pdb
# pdb.set_trace()
return img
def graying(img):
u"""灰度化."""
img = img.convert("L")
return img
def remove_boader(img):
"""去除边框上的噪点"""
w, h = img.size
for x in range(w):
img.putpixel((x, 0), 255)
img.putpixel((x, 1), 255)
img.putpixel((x, h-1), 255)
img.putpixel((x, h-2), 255)
for y in range(h):
img.putpixel((0, y), 255)
img.putpixel((1, y), 255)
img.putpixel((w-1, y), 255)
img.putpixel((w - 2, y), 255)
return img
def filter_line(img, v):
"""分解斜线为孤点"""
w, h = img.size
black_point = 1
white_point = 0
for x in range(v, w - v):
for y in range(v, h - v):
mid_pixel = img.getpixel((x, y)) # 中央像素点像素值
if mid_pixel == 0: # 找出上下左右右下五个像素点像素值
top_pixel = img.getpixel((x, y-v))
left_pixel = img.getpixel((x-v, y))
down_pixel = img.getpixel((x, y+v))
right_pixel = img.getpixel((x+v, y))
right_down_pixel = img.getpixel((x+v, y+v))
right_top_pixel = img.getpixel((x + v, y - v))
left_down_pixel = img.getpixel((x - 1, y + 1))
left_top_pixel = img.getpixel((x - 1, y - 1))
# 如果一个点周围九宫格内只有
if top_pixel == 255: # 上
white_point += 1
if left_pixel == 255: # 左
white_point += 1
if down_pixel == 255: # 下
white_point += 1
if right_pixel == 255: # 右
white_point += 1
if right_down_pixel == 0: # 右下
black_point += 1
elif right_top_pixel == 0: # 右上
black_point += 1
elif left_down_pixel == 0: # 左下
black_point += 1
elif left_top_pixel == 0: # 左上
black_point += 1
if black_point >= 2 and white_point >= 3:
img.putpixel((x, y), 255)
# print black_point
black_point = 1
white_point = 0
return img
def filter_line_pro(img, v):
"""去除孤点"""
w, h = img.size
black_point = 1
for x in range(v, w - v):
for y in range(v, h - v):
mid_pixel = img.getpixel((x, y)) # 中央像素点像素值
if mid_pixel == 0: # 找出上下左右四个方向像素点像素值
top_pixel = img.getpixel((x, y - v))
left_pixel = img.getpixel((x - v, y))
down_pixel = img.getpixel((x, y + v))
right_pixel = img.getpixel((x + v, y))
right_down_pixel = img.getpixel((x + v, y + v))
right_top_pixel = img.getpixel((x+v, y-v))
left_down_pixel = img.getpixel((x-1, y+1))
left_top_pixel = img.getpixel((x-1, y-1))
# 判断上下左右的黑色像素点总个数
if top_pixel == 0:
black_point += 1
if left_pixel == 0:
black_point += 1
if down_pixel == 0:
black_point += 1
if right_pixel == 0:
black_point += 1
if right_down_pixel == 0:
black_point += 1
if right_top_pixel == 0:
black_point += 1
if left_down_pixel == 0:
black_point += 1
if left_top_pixel == 0:
black_point += 1
if black_point <= 2: # 以x,y为中心的九个像素点中黑色像素点个数为1 or 2,这种情况认为该点为噪点
img.putpixel((x, y), 255)
# print black_point
black_point = 1
return img
def binarization(img):
u"""二值化, 白底黑字."""
w, h = img.size
for y in range(h):
for x in range(w):
color = img.getpixel((x, y))
if color >= 224:
img.putpixel((x, y), 255)
else:
img.putpixel((x, y), 0)
# print_img(img)
# import pdb
# print_img(img)
# pdb.set_trace()
img = remove_boader(img)
# print_img(img)
# pdb.set_trace()
img = filter_line(img, 1)
# print_img(img)
# pdb.set_trace()
img = filter_line_pro(img, 1)
return img
# 从文件夹读取图片和标签到numpy数组中
# 标签信息在文件名中,将标签(文件名第一个字符映射为一个数字),存到标签数组
def read_data(data_dir):
datas = []
labels = []
fpaths = []
for fname in os.listdir(data_dir):
if fname.split("_")[0] not in char_to_digit:
continue # 非法标签,去掉
fpath = os.path.join(data_dir, fname)
fpaths.append(fpath)
image = Image.open(fpath)
image = ImageEnhance.Contrast(image).enhance(2.0)
image = image.filter(ImageFilter.EDGE_ENHANCE_MORE)
image = binar_by_rgb(image)
image = graying(image)
image = binarization(image)
image.save("data/preprocessed_data/{}".format(fname))
data = np.array(image)
# data = np.array(image) / 255.0
# data = np.array(image.convert("L")) / 255.0
label = int(char_to_digit.index(fname.split("_")[0]))
datas.append(data.reshape(30, 26, 1))
# datas.append(data)
labels.append(label)
datas = np.array(datas)
labels = np.array(labels)
print("shape of datas: {}\tshape of labels: {}".format(datas.shape, labels.shape))
return fpaths, datas, labels
fpaths, datas, labels = read_data(data_dir)
test_fpath, test_datas, test_labels = read_data(test_data_dir)
data_len = datas.shape[0]
# 计算有多少类图片
num_classes = len(set(labels))
# 定义Placeholder,存放输入和标签 图片样本大小为26*30*1
# datas_placeholder = tf.placeholder(tf.float32, [None, 30, 26, 1])
datas_placeholder = tf.placeholder(tf.float32, [None, 30, 26, 1])
labels_placeholder = tf.placeholder(tf.int32, [None])
# 存放DropOut参数的容器,训练时为0.25,测试时为0
dropout_placeholdr = tf.placeholder(tf.float32)
# 定义卷积层, 25个卷积核, 卷积核大小为1,用Relu激活
conv0 = tf.layers.conv2d(datas_placeholder, 25, 1, activation=tf.nn.relu)
# 定义max-pooling层,pooling窗口为2x2,步长为2x2
pool0 = tf.layers.max_pooling2d(conv0, [2, 2], [2, 2])
# 定义卷积层, 40个卷积核, 卷积核大小为1,用Relu激活
conv1 = tf.layers.conv2d(conv0, 40, 1, activation=tf.nn.relu)
# 定义max-pooling层,pooling窗口为2x2,步长为2x2
pool1 = tf.layers.max_pooling2d(conv1, [2, 2], [2, 2])
# 将3维特征转换为1维向量
flatten = tf.layers.flatten(conv1)
# 全连接层,转换为长度为100的特征向量
fc = tf.layers.dense(flatten, 100, activation=tf.nn.relu)
# 加上DropOut,防止过拟合
dropout_fc = tf.layers.dropout(fc, dropout_placeholdr)
# 未激活的输出层
logits = tf.layers.dense(dropout_fc, num_classes)
# logits = tf.layers.dense(fc, num_classes)
predicted_labels = tf.argmax(logits, 1)
# 利用交叉熵定义损失
losses = tf.nn.softmax_cross_entropy_with_logits(
labels=tf.one_hot(labels_placeholder, num_classes),
logits=logits
)
# 平均损失
mean_loss = tf.reduce_mean(losses)
# 定义优化器,指定要优化的损失函数
optimizer = tf.train.AdamOptimizer(learning_rate=1e-2).minimize(losses)
# 用于保存和载入模型
saver = tf.train.Saver()
with tf.Session() as sess:
if train:
print("训练模式")
# 如果是训练,初始化参数
sess.run(tf.global_variables_initializer())
# 定义输入和Label以填充容器,训练时dropout为0.25
train_feed_dict = {
datas_placeholder: datas,
labels_placeholder: labels,
dropout_placeholdr: 0.25
}
for step in range(150):
_, mean_loss_val = sess.run([optimizer, mean_loss], feed_dict=train_feed_dict)
if step % 10 == 0:
print("step = {}\tmean loss = {}".format(step, mean_loss_val))
saver.save(sess, model_path)
print("训练结束,保存模型到{}".format(model_path))
else:
print("测试模式")
# 如果是测试,载入参数
saver.restore(sess, model_path)
print("从{}载入模型".format(model_path))
label_name_dict = {k: v for k,v in enumerate(char_to_digit)}
# 定义输入和Label以填充容器,测试时dropout为0
test_feed_dict = {
datas_placeholder: test_datas,
labels_placeholder: test_labels,
dropout_placeholdr: 0
}
predicted_labels_val = sess.run(predicted_labels, feed_dict=test_feed_dict)
# 真实label与模型预测label
err_count = 0
err_pred = []
for fpath, real_label, predicted_label in zip(test_fpath, test_labels, predicted_labels_val):
# 将label id转换为label名
real_label_name = label_name_dict[real_label]
predicted_label_name = label_name_dict[predicted_label]
if real_label_name != predicted_label_name:
err_count += 1
err_pred.append((real_label_name,predicted_label_name))
print("{}\t{} => {}".format(fpath, real_label_name, predicted_label_name))
print(err_count)
print(err_pred)
k近邻也是用tensorflow实现的1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271import numpy as np
import tensorflow as tf
import tensorflow.examples.tutorials.mnist.input_data as input_data
import os
#图像读取库
from PIL import Image
from PIL import ImageEnhance
from PIL import ImageFilter
#矩阵运算库
import numpy as np
import tensorflow as tf
# 数据文件夹
data_dir = "data"
test_data_dir = "test_data"
# 训练还是测试
train = True
# 模型文件路径
model_path = "model/image_model"
char_to_digit = ["零","壹","贰","叁","肆","伍","陆","柒","捌","玖","拾","一","二","三","四","五","六","七","八","九","加","减","乘","除"]
def max_dif(x, y, z):
'''计算三个值之间最大的差值'''
max = min = x
if y > max:
max = y
else:
min = y
if z > max:
max = z
else:
min = z
return max - min
def binar_by_rgb(img):
'''根据rgb二值化'''
w, h = img.size
for y in range(h):
for x in range(w):
r, g, b = img.getpixel((x, y))
a_list = sorted([r, g, b])
diff = max_dif(r, g, b)
# if diff < 16 and a_list[2]<128:
# img.putpixel((x,y),(255,255,255))
if diff < 16 and a_list[0] > 128 and a_list[2] < 255:
img.putpixel((x, y), (0, 0, 0))
# if diff >32 and a_list[0] >224 :
# img.putpixel((x, y), (0, 0, 0))
# else:
# img.putpixel((x, y), (0, 0, 0))
# print_img(self.img)
# import pdb
# pdb.set_trace()
return img
def graying(img):
u"""灰度化."""
img = img.convert("L")
return img
def remove_boader(img):
"""去除边框上的噪点"""
w, h = img.size
for x in range(w):
img.putpixel((x, 0), 255)
img.putpixel((x, 1), 255)
img.putpixel((x, h-1), 255)
img.putpixel((x, h-2), 255)
for y in range(h):
img.putpixel((0, y), 255)
img.putpixel((1, y), 255)
img.putpixel((w-1, y), 255)
img.putpixel((w - 2, y), 255)
return img
def filter_line(img, v):
"""分解斜线为孤点"""
w, h = img.size
black_point = 1
white_point = 0
for x in range(v, w - v):
for y in range(v, h - v):
mid_pixel = img.getpixel((x, y)) # 中央像素点像素值
if mid_pixel == 0: # 找出上下左右右下五个像素点像素值
top_pixel = img.getpixel((x, y-v))
left_pixel = img.getpixel((x-v, y))
down_pixel = img.getpixel((x, y+v))
right_pixel = img.getpixel((x+v, y))
right_down_pixel = img.getpixel((x+v, y+v))
right_top_pixel = img.getpixel((x + v, y - v))
left_down_pixel = img.getpixel((x - 1, y + 1))
left_top_pixel = img.getpixel((x - 1, y - 1))
# 如果一个点周围九宫格内只有
if top_pixel == 255: # 上
white_point += 1
if left_pixel == 255: # 左
white_point += 1
if down_pixel == 255: # 下
white_point += 1
if right_pixel == 255: # 右
white_point += 1
if right_down_pixel == 0: # 右下
black_point += 1
elif right_top_pixel == 0: # 右上
black_point += 1
elif left_down_pixel == 0: # 左下
black_point += 1
elif left_top_pixel == 0: # 左上
black_point += 1
if black_point >= 2 and white_point >= 3:
img.putpixel((x, y), 255)
# print black_point
black_point = 1
white_point = 0
return img
def filter_line_pro(img, v):
"""去除孤点"""
w, h = img.size
black_point = 1
for x in range(v, w - v):
for y in range(v, h - v):
mid_pixel = img.getpixel((x, y)) # 中央像素点像素值
if mid_pixel == 0: # 找出上下左右四个方向像素点像素值
top_pixel = img.getpixel((x, y - v))
left_pixel = img.getpixel((x - v, y))
down_pixel = img.getpixel((x, y + v))
right_pixel = img.getpixel((x + v, y))
right_down_pixel = img.getpixel((x + v, y + v))
right_top_pixel = img.getpixel((x+v, y-v))
left_down_pixel = img.getpixel((x-1, y+1))
left_top_pixel = img.getpixel((x-1, y-1))
# 判断上下左右的黑色像素点总个数
if top_pixel == 0:
black_point += 1
if left_pixel == 0:
black_point += 1
if down_pixel == 0:
black_point += 1
if right_pixel == 0:
black_point += 1
if right_down_pixel == 0:
black_point += 1
if right_top_pixel == 0:
black_point += 1
if left_down_pixel == 0:
black_point += 1
if left_top_pixel == 0:
black_point += 1
if black_point <= 2: # 以x,y为中心的九个像素点中黑色像素点个数为1 or 2,这种情况认为该点为噪点
img.putpixel((x, y), 255)
# print black_point
black_point = 1
return img
def binarization(img):
u"""二值化, 白底黑字."""
w, h = img.size
for y in range(h):
for x in range(w):
color = img.getpixel((x, y))
if color >= 224:
img.putpixel((x, y), 255)
else:
img.putpixel((x, y), 0)
# print_img(img)
# import pdb
# print_img(img)
# pdb.set_trace()
img = remove_boader(img)
# print_img(img)
# pdb.set_trace()
img = filter_line(img, 1)
# print_img(img)
# pdb.set_trace()
img = filter_line_pro(img, 1)
# print_img(img)
# pdb.set_trace()
# img = self.filter_line(img, 1)
# img = self.filter_line_pro(img,1)
# img = self.filter_line(img,1)
# self.pIx(img)
# print_img(img)
# pdb.set_trace()
return img
# 从文件夹读取图片和标签到numpy数组中
# 标签信息在文件名中,将标签(文件名第一个字符映射为一个数字),存到标签数组
def read_data(data_dir):
datas = []
labels = []
fpaths = []
for fname in os.listdir(data_dir):
if fname.split("_")[0] not in char_to_digit:
continue # 非法标签,去掉
fpath = os.path.join(data_dir, fname)
fpaths.append(fpath)
image = Image.open(fpath)
image = ImageEnhance.Contrast(image).enhance(2.0)
image = image.filter(ImageFilter.EDGE_ENHANCE_MORE)
image = binar_by_rgb(image)
image = graying(image)
image = binarization(image)
image.save("data/preprocessed_data/{}".format(fname))
data = np.array(image)
# data = np.array(image) / 255.0
# data = np.array(image.convert("L")) / 255.0
label = int(char_to_digit.index(fname.split("_")[0]))
datas.append(data.flatten())
# datas.append(data)
tmp1 = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ]
tmp1[label] = 1
labels.append(tmp1)
datas = np.array(datas)
labels = np.array(labels)
print("shape of datas: {}\tshape of labels: {}".format(datas.shape, labels.shape))
return fpaths, datas, labels
fpaths,train_X, train_Y = read_data(data_dir)
test_fpath, test_X, test_Y = read_data(test_data_dir)
import pdb
pdb.set_trace()
tra_X = tf.placeholder("float", [None, 780])
te_X = tf.placeholder("float", [780])
# 使用L1计算近邻距离
distance = tf.reduce_sum(tf.abs(tf.add(tra_X, tf.negative(te_X))), reduction_indices=1)
# 预测:获取最近的样本点的索引
pred = tf.arg_min(distance, 0)
accuracy = 0.
# 初始化变量
init = tf.initialize_all_variables()
# 开启会话
with tf.Session() as sess:
sess.run(init)
# 测试数据的循环
for i in range(len(test_X)):
# 获取最近邻居
nn_index = sess.run(pred, feed_dict={tra_X: train_X, te_X: test_X[i, :]})
# 获取最近邻样本的标签,并与真实样本标签进行比较
print("Test", i, "Prediction:", np.argmax(train_Y[nn_index]), \
"True Class:", np.argmax(test_Y[i]))
# 计算准确率
if np.argmax(train_Y[nn_index]) == np.argmax(test_Y[i]):
accuracy += 1. / len(test_X)
print("Done!")
print("Accuracy:", accuracy)
使用cnn和knn后,测试集正确率均能达90%以上
- 第四种二维码的验证方式看着复杂,实则是最简单的,通过fiddler抓包手机扫描二维码后的请求,发现每一个账号都有其固定不变的一个值,在识别的时候带着那个值模拟请求一次对方的动态验证码接口,就可以拿到验证码