最近遇到了一个验证码识别的问题,不过经过观察该验证码相当的简陋,且看下面几个验证码图片的例子:
事实上,上面我已经把所有可能的字符都列出来了,包含0~9A-Z共36个字符。这16张图片都有下面几个共同特征:
- 背景色都是白色或者是纯色
- 前景色都不是白色
- 文字都很规则,没有做过扭曲处理
- 图片大小都是40x10个像素
- 每张图片上都只有4个字符
- 结合特征4和特征5来看,每个字符都是占据了10x10个像素
- 没有噪音
就上面这些特征决定了,我们要识别这些验证非常的简单,识别率绝对是100%。这种验证码就是纯同虚设。 下面我们就用简单的代码来识别它。 首先采集36个字符的图像特征到一个数据库(广义的,我们这里就用一个简单的Map来存储): 首先我们把上面包含了所有36个字符的16个图片存放在同一个目录下,并将其文件名定为字符上的文字,比如第一张图片就叫“0JQT.bmp”。 接
着我们写一个小程序来将所有字符的编码采集下来并保存成一个映射文件,采集特征信息的大体过程是先把一张40x10的图片分割成4张10x10的图片(因
为一个10x10上就是一个字符),然后分别扫描每个10x10的图片,记录每个像素的特征,假如这个像素是白色(背景是白色,经过代码计算发现这个背景
并不是那么的白,而是255,250,250,所以你看到代码中判断是否是白色用的是 >= 250 而不是
255)那么记录为1,否则记录为0,然后把10x10=100个像素的0、1标识拼接成一个字符串,这个字符串就代表一个字符了。
/** * Created on 2007-11-18 下午03:33:28 */ import java.awt.Image; import java.awt.image.PixelGrabber; import java.io.File; import java.io.FileOutputStream; import java.io.FilenameFilter; import java.util.HashMap; import java.util.Map; import java.util.Properties;
import javax.imageio.ImageIO;
import org.apache.commons.io.FilenameUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory;
/** * @author sutra * */ public class Gather { private static final Log log = LogFactory.getLog(Gather.class);
private static int handleSinglePixel(int x, int y, int pixel) { // int alpha = (pixel >> 24) & 0xff; int red = (pixel >> 16) & 0xff; int green = (pixel >> 8) & 0xff; int blue = (pixel) & 0xff; // Deal with the pixel as necessary... log.debug(x + "," + y + ":" + red + "," + green + "," + blue); int white = 0; if (red >= 250 && green >= 250 && blue >= 250) { white = 1; } // System.out.println(String.format("%1$s,%2$s:%3$s", x, y, w)); return white; }
public static String[] gather(Image src) throws InterruptedException { int width = src.getWidth(null); // 得到源图宽 int height = src.getHeight(null); // 得到源图长 log.debug("width: " + width); log.debug("height: " + height); int pixels[] = new int[width * height]; PixelGrabber pg = new PixelGrabber(src, 0, 0, width, height, pixels, 0, width); pg.grabPixels();
String[] ret = new String[4];
for (int x = 0; x < 40; x += 10) { int y = 0; StringBuilder sb = new StringBuilder(); for (int j = 0; j < height; j++) { for (int i = x; i < x + 10; i++) { int w = handleSinglePixel(x + i, y + j, pixels[j * width + i]); sb.append(w); } } log.debug(x + ":" + sb.toString()); ret[x / 10] = sb.toString(); }
return ret; }
/** * @param args * @throws Exception */ public static void main(String[] args) throws Exception { File bmpDir = new File("src/main/bmp/"); File[] bmps = bmpDir.listFiles(new FilenameFilter() {
public boolean accept(File dir, String name) { return "bmp".equalsIgnoreCase(FilenameUtils.getExtension(name)); }
}); Map codes = new HashMap(); for (File bmp : bmps) { log.debug("bmp: " + bmp); Image src = ImageIO.read(bmp); // 构造Image对象 String filename = bmp.getName(); String[] charCodes = gather(src); for (int i = 0; i < 4; i++) { char ch = filename.charAt(i); String code = charCodes[i]; String old; if ((old = codes.get(ch)) != null) { if (!old.equals(code)) { throw new Exception("如果发生这样的异常,说明我们的假设有问题。"); } else { log.debug("old equals new"); } } else { codes.put(ch, code); } } }
char[] allChars = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ".toCharArray(); Properties codesDb = new Properties(); for (char ch : allChars) { log.debug("ch: " + ch); String code = codes.get(ch); if (code == null) { // 做点检查,如果缺少的话,你需要去收集更多的图片 throw new Exception("缺少 " + ch); } codesDb.put(new String(new char[] { ch }), codes.get(ch)); } codesDb.list(System.out); codesDb.store(new FileOutputStream("codes.db"), "codes"); } }
这样我们就得到这样一个映射表:
0:1110000111110111101111011110111101001011110100101111010010111101001011110111101111011110111110000111 1:1111011111110001111111110111111111011111111101111111110111111111011111111101111111110111111100000111 2:1110000111110111101111011110111111111011111111011111111011111111011111111011111111011110111100000011 3:1110000111110111101111011110111111110111111100111111111101111111111011110111101111011110111110000111 4:1111101111111110111111110011111110101111110110111111011011111100000011111110111111111011111111000011 5:1100000011110111111111011111111101000111110011101111111110111111111011110111101111011110111110000111 6:1111000111111011101111011111111101111111110100011111001110111101111011110111101111011110111110000111 7:1100000011110111011111011101111111101111111110111111110111111111011111111101111111110111111111011111 8:1110000111110111101111011110111101111011111000011111101101111101111011110111101111011110111110000111 9:1110001111110111011111011110111101111011110111001111100010111111111011111111101111011101111110001111 A:1111011111111101111111101011111110101111111010111111101011111100000111110111011111011101111000100011 B:1000000111110111101111011110111101110111110000111111011101111101111011110111101111011110111000000111 C:1110000011110111101110111110111011111111101111111110111111111011111111101111101111011101111110001111 D:1000001111110111011111011110111101111011110111101111011110111101111011110111101111011101111000001111 E:1000000111110111101111011011111101101111110000111111011011111101101111110111111111011110111000000111 F:1000000111110111101111011011111101101111110000111111011011111101101111110111111111011111111000111111 G:1110000111110111011110111101111011111111101111111110111111111011100011101111011111011101111110001111 H:1000100011110111011111011101111101110111110000011111011101111101110111110111011111011101111000100011 I:1100000111111101111111110111111111011111111101111111110111111111011111111101111111110111111100000111 J:1110000011111110111111111011111111101111111110111111111011111111101111111110111110111011111000011111 K:1000100011110111011111011011111101011111110001111111010111111101101111110110111111011101111000100011 L:1000111111110111111111011111111101111111110111111111011111111101111111110111111111011110111000000011 M:1000100011110010011111001001111100100111110101011111010101111101010111110101011111010101111001010011 N:1000100011110011011111001101111101010111110101011111010101111101100111110110011111011001111000110111 O:1110001111110111011110111110111011111011101111101110111110111011111011101111101111011101111110001111 P:1000000111110111101111011110111101111011110000011111011111111101111111110111111111011111111000111111 Q:1110001111110111011110111110111011111011101111101110111110111011111011101001101111011001111110001011 R:1000001111110111011111011101111101110111110000111111010111111101101111110110111111011101111000110011 S:1110000011110111101111011110111101111111111001111111111001111111111011110111101111011110111100000111 T:1000000011101101101111110111111111011111111101111111110111111111011111111101111111110111111110001111 U:1000100011110111011111011101111101110111110111011111011101111101110111110111011111011101111110001111 V:1000100011110111011111011101111101110111111010111111101011111110101111111010111111110111111111011111 W:1001010011110101011111010101111101010111110101011111001001111110101111111010111111101011111110101111 X:1000100011110111011111101011111110101111111101111111110111111110101111111010111111011101111000100011 Y:1000100011110111011111011101111110101111111010111111110111111111011111111101111111110111111110001111 Z:1100000011110111011111111101111111101111111110111111110111111111011111111011111111101110111100000011
然后写一个根据这个保存好的映射文件来识别图片的 ImageParser 吧,也很简单:
/** * Created on 2007-11-18 下午04:59:12 */ import java.awt.Image; import java.io.IOException; import java.util.Enumeration; import java.util.HashMap; import java.util.Map; import java.util.Properties;
import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory;
/** * @author sutra * */ public class ImageParser { @SuppressWarnings("unused") private static final Log log = LogFactory.getLog(ImageParser.class);
private static class SingletonHolder { public static final ImageParser instance; static { try { instance = new ImageParser(); } catch (IOException e) { throw new RuntimeException(e); } } }
private Map codes;
/** * @throws IOException * */ private ImageParser() throws IOException { codes = new HashMap(36); Properties p = new Properties(); p.load(ImageParser.class.getResourceAsStream("/code.db")); Enumeration e = p.keys(); while (e.hasMoreElements()) { String n = (String) e.nextElement(); String v = p.getProperty(n); codes.put(v, n); } }
public static ImageParser getInstance() { return SingletonHolder.instance; }
public String parse(Image src) throws InterruptedException { String[] codes = Gather.gather(src); StringBuilder sb = new StringBuilder(); for (String s : codes) { sb.append(this.codes.get(s)); } return sb.toString(); } }
最后就是如何来调用这个 ImageParser ,看它的一个单元测试就明白了:
/** * Created on 2007-11-22 下午11:18:17 */ import static org.junit.Assert.assertEquals;
import java.io.File; import java.io.FilenameFilter; import java.io.IOException;
import javax.imageio.ImageIO;
import org.apache.commons.io.FilenameUtils; import org.junit.Test;
/** * @author sutra * */ public class ImageParserTest {
/** * {@link ImageParser#parse(java.awt.Image)} 的测试方法。 * * @throws IOException * @throws InterruptedException */ @Test public void testParse() throws InterruptedException, IOException { File bmpDir = new File("src/main/bmp/"); File[] bmps = bmpDir.listFiles(new FilenameFilter() {
public boolean accept(File dir, String name) { return "bmp".equalsIgnoreCase(FilenameUtils.getExtension(name)); }
}); for (File bmp : bmps) { assertEquals(FilenameUtils.getBaseName(bmp.getName()), ImageParser .getInstance().parse(ImageIO.read(bmp))); } }
}
这样简单的验证码还存在吗?存在的,确实存在的,有些程序员根本就没有理解验证码的目的,也许他只不过是看人家弄一个他也弄一个。这样的程序员存在吗?存在的,确实存在的,而且必将永远存在下去。 或者他弄个简单的验证码的目的就是为了让某些人去识别。
|