數字驗證碼很多地方都會用到,我前段時間也寫過一篇有關于生成驗證碼的文章,那是隨機生成大小不一,顏色不一,形狀不一的數字圖片,本文主要是針對那些比較規范的驗證碼的識別,何謂規范?規范就是數字的大小幾乎一致,顏色對比度挺高,沒什么干擾線.識別的依據就是最最最基礎的辦法,比對,先取樣,保存成字模,再用字模去和將要識別的圖片進行比較,取最接近的那個結果.不過在比較之前必須得到圖片里面的數據提取出來并適當地去除一些干擾.
下面就是識別部份的代碼:
/*
* ImageCode.java
*
* Created on 2007年1月18日, 下午10:00
*
* To change this template, choose Tools | Template Manager
* and open the template in the editor.
*/
package net.bccn.hadeslee.programfan;
import java.awt.image.BufferedImage;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StreamTokenizer;
import java.net.URL;
import javax.imageio.ImageIO;
/**
* 驗證碼識別程序
* @author hadeslee
*/
public class ImageCode {
private BufferedImage bi;
private static int[][][] model=new int[5][10][208];
//靜態初始化塊
static{
initNumModel();
}
/**
* Creates a new instance of ImageCode
*/
public ImageCode() {
initNumModel();
}
public String getNumber(InputStream is){
try{
bi= ImageIO.read( is );
final StringBuffer sb=new StringBuffer();
for(int i=0;i<4;i++){
int[] data=this.getData(i);
sb.append(this.doCheck(data));
}
return sb.toString();
} catch(Exception exe){
exe.printStackTrace();
return "";
}
}
/**
* 重載的方法,根據傳進來的參數得到返回的字符串
* @param bi
* @return 結果
*/
public String getNumber(BufferedImage bi){
try{
this.bi= bi;
StringBuffer sb=new StringBuffer();
for(int i=0;i<4;i++){
int[] data=this.getData(i);
sb.append(this.doCheck(data));
}
//System.out.println(sb.toString());
return sb.toString();
} catch(Exception exe){
exe.printStackTrace();
return "";
}
}
/**
* 靜態初始化方法,
* 用于初始化字模
*/
private static void initNumModel(){
try{
//System.out.println("初始化model");
for(int i=0;i<10;i++){
StreamTokenizer st=new StreamTokenizer(new InputStreamReader(ImageCode.class.getResourceAsStream("/net/bccn/hadeslee/model/programfan_"+i+".mod")));
st.whitespaceChars('#','#');
st.whitespaceChars(',',',');
st.eolIsSignificant(false);
out:while(true){
int token=st.nextToken();
if(token==StreamTokenizer.TT_WORD){
int who=0;
int index=0;
if(st.sval.equals("center")){
who=0;
}else if(st.sval.equals("left")){
who=1;
}else if(st.sval.equals("right")){
who=2;
}else if(st.sval.equals("up")){
who=3;
}else if(st.sval.equals("down")){
who=4;
}
while(st.nextToken()==StreamTokenizer.TT_NUMBER){
model[who][i][index++]=(int)st.nval;
}
st.pushBack();
}else if(token==StreamTokenizer.TT_EOF){
break out;
}
}
}
} catch(Exception exe){
exe.printStackTrace();
}
//System.out.println("初始化結束model");
}
//通過傳進來的字符串得到BufferedImage對象
private BufferedImage getBI(String url){
try {
return ImageIO.read(new URL(url));
} catch (IOException ex) {
ex.printStackTrace();
return null;
}
}
/**根據索引得到
*某一塊的圖像轉為數組
*的文件
*/
private int[] getData(int index){
BufferedImage sub=bi.getSubimage(index*16,0,16,13);
int iw=sub.getWidth();
int ih=sub.getHeight();
int[] demo=new int[iw*ih];
for(int i=0;i<ih;i++){
for(int j=0;j<iw;j++){
demo[i*iw+j]=(sub.getRGB(j,i)==-1?0:1);
}
}
return demo;
}
//根據傳進來的數組,得到五個位置當中和差別最小的那個
private int getMin(int who,int[] demo){
int temp=208;
for(int i=0;i<5;i++){
int x=0;
for(int j=0;j<demo.length;j++){
x+=(model[i][who][j]==demo[j]?0:1);
}
if(x<temp){
temp=x;
}
}
//System.out.println("比對"+who+"最小值是"+temp);
return temp;
}
//分析689或者0的方法,以免這幾個數字混淆
private int get689(int[] demo,int origin){
boolean isLeft=false,isRight=false;
int temp=-1;
if((demo[75]==1&&demo[90]==1)||(demo[76]==1&&demo[91]==1)||
(demo[58]==1&&demo[74]==1&&demo[90]==1)||(demo[59]==1&&demo[75]==1&&demo[91]==1)||
(demo[60]==1&&demo[76]==1&&demo[92]==1)||(demo[28]==1&&demo[44]==1&&demo[60]==1)||
(demo[27]==1&&demo[43]==1&&demo[59]==1)){
isRight=true;
}
if((demo[131]==1&&demo[147]==1)||(demo[132]==1&&demo[148]==1)||(demo[133]==1&&demo[149]==1)){
isLeft=true;
}
if(isLeft&&isRight){
temp=8;
}else if(isLeft){
temp=6;
}else if(isRight){
temp=9;
}else{
temp=origin;
}
if(temp==8&&(!((demo[103]==1&&demo[104]==1&&demo[105]==1&&demo[106]==1)||
(demo[87]==1&&demo[88]==1&&demo[89]==1&&demo[90]==1)||
(demo[103]+demo[104]+demo[105]+demo[106]+demo[87]+demo[88]+
demo[89]+demo[90]>3)))){
return temp=0;
}
return temp;
}
//比較傳入的數據,返回最接近的值
private int doCheck(int[] demo){
int number=-1;
int temp=208;
for(int i=0;i<10;i++){
int x=this.getMin(i,demo);
if(x<temp){
temp=x;
number=i;
}
}
//System.out.println("===========================================");
if(number==6||number==8||number==9){
number=this.get689(demo,number);
}
return number;
}
}
* ImageCode.java
*
* Created on 2007年1月18日, 下午10:00
*
* To change this template, choose Tools | Template Manager
* and open the template in the editor.
*/
package net.bccn.hadeslee.programfan;
import java.awt.image.BufferedImage;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StreamTokenizer;
import java.net.URL;
import javax.imageio.ImageIO;
/**
* 驗證碼識別程序
* @author hadeslee
*/
public class ImageCode {
private BufferedImage bi;
private static int[][][] model=new int[5][10][208];
//靜態初始化塊
static{
initNumModel();
}
/**
* Creates a new instance of ImageCode
*/
public ImageCode() {
initNumModel();
}
public String getNumber(InputStream is){
try{
bi= ImageIO.read( is );
final StringBuffer sb=new StringBuffer();
for(int i=0;i<4;i++){
int[] data=this.getData(i);
sb.append(this.doCheck(data));
}
return sb.toString();
} catch(Exception exe){
exe.printStackTrace();
return "";
}
}
/**
* 重載的方法,根據傳進來的參數得到返回的字符串
* @param bi
* @return 結果
*/
public String getNumber(BufferedImage bi){
try{
this.bi= bi;
StringBuffer sb=new StringBuffer();
for(int i=0;i<4;i++){
int[] data=this.getData(i);
sb.append(this.doCheck(data));
}
//System.out.println(sb.toString());
return sb.toString();
} catch(Exception exe){
exe.printStackTrace();
return "";
}
}
/**
* 靜態初始化方法,
* 用于初始化字模
*/
private static void initNumModel(){
try{
//System.out.println("初始化model");
for(int i=0;i<10;i++){
StreamTokenizer st=new StreamTokenizer(new InputStreamReader(ImageCode.class.getResourceAsStream("/net/bccn/hadeslee/model/programfan_"+i+".mod")));
st.whitespaceChars('#','#');
st.whitespaceChars(',',',');
st.eolIsSignificant(false);
out:while(true){
int token=st.nextToken();
if(token==StreamTokenizer.TT_WORD){
int who=0;
int index=0;
if(st.sval.equals("center")){
who=0;
}else if(st.sval.equals("left")){
who=1;
}else if(st.sval.equals("right")){
who=2;
}else if(st.sval.equals("up")){
who=3;
}else if(st.sval.equals("down")){
who=4;
}
while(st.nextToken()==StreamTokenizer.TT_NUMBER){
model[who][i][index++]=(int)st.nval;
}
st.pushBack();
}else if(token==StreamTokenizer.TT_EOF){
break out;
}
}
}
} catch(Exception exe){
exe.printStackTrace();
}
//System.out.println("初始化結束model");
}
//通過傳進來的字符串得到BufferedImage對象
private BufferedImage getBI(String url){
try {
return ImageIO.read(new URL(url));
} catch (IOException ex) {
ex.printStackTrace();
return null;
}
}
/**根據索引得到
*某一塊的圖像轉為數組
*的文件
*/
private int[] getData(int index){
BufferedImage sub=bi.getSubimage(index*16,0,16,13);
int iw=sub.getWidth();
int ih=sub.getHeight();
int[] demo=new int[iw*ih];
for(int i=0;i<ih;i++){
for(int j=0;j<iw;j++){
demo[i*iw+j]=(sub.getRGB(j,i)==-1?0:1);
}
}
return demo;
}
//根據傳進來的數組,得到五個位置當中和差別最小的那個
private int getMin(int who,int[] demo){
int temp=208;
for(int i=0;i<5;i++){
int x=0;
for(int j=0;j<demo.length;j++){
x+=(model[i][who][j]==demo[j]?0:1);
}
if(x<temp){
temp=x;
}
}
//System.out.println("比對"+who+"最小值是"+temp);
return temp;
}
//分析689或者0的方法,以免這幾個數字混淆
private int get689(int[] demo,int origin){
boolean isLeft=false,isRight=false;
int temp=-1;
if((demo[75]==1&&demo[90]==1)||(demo[76]==1&&demo[91]==1)||
(demo[58]==1&&demo[74]==1&&demo[90]==1)||(demo[59]==1&&demo[75]==1&&demo[91]==1)||
(demo[60]==1&&demo[76]==1&&demo[92]==1)||(demo[28]==1&&demo[44]==1&&demo[60]==1)||
(demo[27]==1&&demo[43]==1&&demo[59]==1)){
isRight=true;
}
if((demo[131]==1&&demo[147]==1)||(demo[132]==1&&demo[148]==1)||(demo[133]==1&&demo[149]==1)){
isLeft=true;
}
if(isLeft&&isRight){
temp=8;
}else if(isLeft){
temp=6;
}else if(isRight){
temp=9;
}else{
temp=origin;
}
if(temp==8&&(!((demo[103]==1&&demo[104]==1&&demo[105]==1&&demo[106]==1)||
(demo[87]==1&&demo[88]==1&&demo[89]==1&&demo[90]==1)||
(demo[103]+demo[104]+demo[105]+demo[106]+demo[87]+demo[88]+
demo[89]+demo[90]>3)))){
return temp=0;
}
return temp;
}
//比較傳入的數據,返回最接近的值
private int doCheck(int[] demo){
int number=-1;
int temp=208;
for(int i=0;i<10;i++){
int x=this.getMin(i,demo);
if(x<temp){
temp=x;
number=i;
}
}
//System.out.println("===========================================");
if(number==6||number==8||number==9){
number=this.get689(demo,number);
}
return number;
}
}
下面是一些字模的內容,把它保存成相應的文件,并能讓程序找到就可以了.
比如這是0的字模,它在不同位字模,以此類推.這些字模都是先取到樣本,然后再分類的
#center
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0,
0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,
0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0,
#left
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,1,1,1,1,1,1,1,1,0,0,0,0,0,
0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,
0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,
0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,
0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,
0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,
0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,
0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,
0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,
0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,
0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,
0,0,0,1,1,1,1,1,1,1,1,0,0,0,0,0,
#right
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,
0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,
0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,
0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,
0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,
0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,
0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,
0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,
0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,
0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,
0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,
0,0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,
#up
0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0,
0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,
0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
#down
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0,
0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,
0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0,
0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,
0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0,
#left
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,1,1,1,1,1,1,1,1,0,0,0,0,0,
0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,
0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,
0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,
0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,
0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,
0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,
0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,
0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,
0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,0,
0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,
0,0,0,1,1,1,1,1,1,1,1,0,0,0,0,0,
#right
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,
0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,
0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,
0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,
0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,
0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,
0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,
0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,
0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,
0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,
0,0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,
0,0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,
#up
0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0,
0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,
0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
#down
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0,
0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,
0,0,0,1,1,1,1,1,1,1,1,1,1,0,0,0,
0,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0,
在此算法的實現中主要是針對比較規范的驗證碼,然后還要針對外形比較相似的6890進行分辨,實現識別的方式有很多種,大家仁者見仁,智者見智吧.不過,說句題外話,MOTO的識別就很牛,它對手寫字體的支持都能達到很高的識別率,更不要說是正體了,這就是另外一個領域了.不是一兩句代碼就能搞得定的:)
盡管千里冰封
依然擁有晴空
你我共同品味JAVA的濃香.