posts - 165, comments - 198, trackbacks - 0, articles - 1
            BlogJava :: 首頁 :: 新隨筆 :: 聯系 :: 聚合  :: 管理

          扒網頁數據(jdk+正則解析)

          Posted on 2008-03-09 10:46 G_G 閱讀(1511) 評論(2)  編輯  收藏 所屬分類: javaGeneral
          數據扒出效果
          雙色球(2008001=02,04,07,09,14,29#03
          雙色球(2008002=03,04,18,22,25,29#09
          ..

          junit代碼
          package?test;

          import?java.io.InputStream;
          import?java.net.URL;
          import?java.net.URLConnection;
          import?java.util.regex.Matcher;
          import?java.util.regex.Pattern;

          import?junit.framework.TestCase;

          public?class?HttpConn?extends?TestCase?{
          ????
          public?void?testT()?throws?Exception?{
          ????????zq?:
          ????????
          for(int?i=2008001;true;i++){
          ????????????String?num?
          =??getQihao(i)?;
          ????????????System.out.println(
          "雙色球("+i+")="+?num);
          ????????????
          if(num==null||num.equals(""))?break?zq;
          ????????}
          ????}
          ????
          ????
          public?String?getQihao(int?qihao)?throws?Exception?{
          ????????URL?url?
          =?new?URL("http://www.cnlot.net/ssq/details.php?issue="+qihao);
          ????????URLConnection?uconn?
          =?url.openConnection();
          ????????
          ????????String?num?
          =?"";
          ????????
          ????????InputStream?in?
          =?uconn.getInputStream();
          ????????
          byte[]?bs?=?new?byte[in.available()];
          ????????in.read(bs);
          ????????String?date?
          =?new?String(bs)?;
          ????????
          ????????Pattern?pa?
          =?Pattern.compile("?.+color=red>([0-9][0-9])<.+"?);
          ????????Matcher?m?
          =?pa.matcher(date);
          ????????
          while(?m.find()?)
          ????????????num
          +=?m.group(1)+",";
          ????????
          ?????????pa?
          =?Pattern.compile("?.+color=blue>([0-9][0-9])<.+"?);
          ?????????m?
          =?pa.matcher(date);
          ????????
          while(?m.find()?)
          ????????????num?
          =?num.substring(?0,num.length()-1?)+"#"+m.group(1)?;
          ????????
          ????????pa?
          =?Pattern.compile("^(([0-9][0-9],){5,}[0-9][0-9]#([0-9][0-9],)*[0-9][0-9]\\|)*(([0-9][0-9],){5,}[0-9][0-9]#([0-9][0-9],)*[0-9][0-9])*$");
          ????????m?
          =??pa.matcher(num);
          ????????
          if(?m.find()?)
          ????????????
          return?num?;
          ????????
          else?
          ????????????
          return?null?;
          ????}
          }



          評論

          # re: 扒網頁數據(jdk+正則解析)  回復  更多評論   

          2008-03-10 10:08 by richardning
          呵呵,老大催我干活。回頭詳看。。我自己也寫了個抓網絡數據的代碼,呵,有空。比較比較。

          # re: 扒網頁數據(jdk+正則解析)  回復  更多評論   

          2008-03-11 10:18 by 蔣家狂潮
          不錯的文章,
          主站蜘蛛池模板: 梓潼县| 颍上县| 仲巴县| 金阳县| 保定市| 营山县| 玉山县| 米林县| 贵港市| 新余市| 贵定县| 辉县市| 汪清县| 湄潭县| 龙井市| 沙湾县| 信宜市| 凤城市| 乐至县| 雷山县| 三河市| 偃师市| 台东县| 萝北县| 京山县| 若羌县| 勃利县| 德惠市| 上杭县| 泗洪县| 乐平市| 缙云县| 清丰县| 广丰县| 磐安县| 泗洪县| 巴里| 青铜峡市| 藁城市| 习水县| 桃源县|