posts - 165, comments - 198, trackbacks - 0, articles - 1
            BlogJava :: 首頁 :: 新隨筆 :: 聯系 :: 聚合  :: 管理

          扒網頁數據(jdk+正則解析)

          Posted on 2008-03-09 10:46 G_G 閱讀(1511) 評論(2)  編輯  收藏 所屬分類: javaGeneral
          數據扒出效果
          雙色球(2008001=02,04,07,09,14,29#03
          雙色球(2008002=03,04,18,22,25,29#09
          ..

          junit代碼
          package?test;

          import?java.io.InputStream;
          import?java.net.URL;
          import?java.net.URLConnection;
          import?java.util.regex.Matcher;
          import?java.util.regex.Pattern;

          import?junit.framework.TestCase;

          public?class?HttpConn?extends?TestCase?{
          ????
          public?void?testT()?throws?Exception?{
          ????????zq?:
          ????????
          for(int?i=2008001;true;i++){
          ????????????String?num?
          =??getQihao(i)?;
          ????????????System.out.println(
          "雙色球("+i+")="+?num);
          ????????????
          if(num==null||num.equals(""))?break?zq;
          ????????}
          ????}
          ????
          ????
          public?String?getQihao(int?qihao)?throws?Exception?{
          ????????URL?url?
          =?new?URL("http://www.cnlot.net/ssq/details.php?issue="+qihao);
          ????????URLConnection?uconn?
          =?url.openConnection();
          ????????
          ????????String?num?
          =?"";
          ????????
          ????????InputStream?in?
          =?uconn.getInputStream();
          ????????
          byte[]?bs?=?new?byte[in.available()];
          ????????in.read(bs);
          ????????String?date?
          =?new?String(bs)?;
          ????????
          ????????Pattern?pa?
          =?Pattern.compile("?.+color=red>([0-9][0-9])<.+"?);
          ????????Matcher?m?
          =?pa.matcher(date);
          ????????
          while(?m.find()?)
          ????????????num
          +=?m.group(1)+",";
          ????????
          ?????????pa?
          =?Pattern.compile("?.+color=blue>([0-9][0-9])<.+"?);
          ?????????m?
          =?pa.matcher(date);
          ????????
          while(?m.find()?)
          ????????????num?
          =?num.substring(?0,num.length()-1?)+"#"+m.group(1)?;
          ????????
          ????????pa?
          =?Pattern.compile("^(([0-9][0-9],){5,}[0-9][0-9]#([0-9][0-9],)*[0-9][0-9]\\|)*(([0-9][0-9],){5,}[0-9][0-9]#([0-9][0-9],)*[0-9][0-9])*$");
          ????????m?
          =??pa.matcher(num);
          ????????
          if(?m.find()?)
          ????????????
          return?num?;
          ????????
          else?
          ????????????
          return?null?;
          ????}
          }



          評論

          # re: 扒網頁數據(jdk+正則解析)  回復  更多評論   

          2008-03-10 10:08 by richardning
          呵呵,老大催我干活。回頭詳看。。我自己也寫了個抓網絡數據的代碼,呵,有空。比較比較。

          # re: 扒網頁數據(jdk+正則解析)  回復  更多評論   

          2008-03-11 10:18 by 蔣家狂潮
          不錯的文章,
          主站蜘蛛池模板: 宜昌市| 博罗县| 天津市| 略阳县| 曲沃县| 惠水县| 鄂伦春自治旗| 葫芦岛市| 雷波县| 柳江县| 淮安市| 响水县| 河西区| 宁南县| 怀化市| 定南县| 金溪县| 秦皇岛市| 繁峙县| 阜康市| 芜湖市| 化德县| 日土县| 宝丰县| 塔城市| 青田县| 镇沅| 清丰县| 沙田区| 保靖县| 神木县| 航空| 阜新市| 凤阳县| 蓝田县| 平武县| 攀枝花市| 平江县| 余干县| 鸡西市| 巢湖市|