posts - 165, comments - 198, trackbacks - 0, articles - 1
            BlogJava :: 首頁 :: 新隨筆 :: 聯系 :: 聚合  :: 管理

          扒網頁數據(jdk+正則解析)

          Posted on 2008-03-09 10:46 G_G 閱讀(1519) 評論(2)  編輯  收藏 所屬分類: javaGeneral
          數據扒出效果
          雙色球(2008001=02,04,07,09,14,29#03
          雙色球(2008002=03,04,18,22,25,29#09
          ..

          junit代碼
          package?test;

          import?java.io.InputStream;
          import?java.net.URL;
          import?java.net.URLConnection;
          import?java.util.regex.Matcher;
          import?java.util.regex.Pattern;

          import?junit.framework.TestCase;

          public?class?HttpConn?extends?TestCase?{
          ????
          public?void?testT()?throws?Exception?{
          ????????zq?:
          ????????
          for(int?i=2008001;true;i++){
          ????????????String?num?
          =??getQihao(i)?;
          ????????????System.out.println(
          "雙色球("+i+")="+?num);
          ????????????
          if(num==null||num.equals(""))?break?zq;
          ????????}
          ????}
          ????
          ????
          public?String?getQihao(int?qihao)?throws?Exception?{
          ????????URL?url?
          =?new?URL("http://www.cnlot.net/ssq/details.php?issue="+qihao);
          ????????URLConnection?uconn?
          =?url.openConnection();
          ????????
          ????????String?num?
          =?"";
          ????????
          ????????InputStream?in?
          =?uconn.getInputStream();
          ????????
          byte[]?bs?=?new?byte[in.available()];
          ????????in.read(bs);
          ????????String?date?
          =?new?String(bs)?;
          ????????
          ????????Pattern?pa?
          =?Pattern.compile("?.+color=red>([0-9][0-9])<.+"?);
          ????????Matcher?m?
          =?pa.matcher(date);
          ????????
          while(?m.find()?)
          ????????????num
          +=?m.group(1)+",";
          ????????
          ?????????pa?
          =?Pattern.compile("?.+color=blue>([0-9][0-9])<.+"?);
          ?????????m?
          =?pa.matcher(date);
          ????????
          while(?m.find()?)
          ????????????num?
          =?num.substring(?0,num.length()-1?)+"#"+m.group(1)?;
          ????????
          ????????pa?
          =?Pattern.compile("^(([0-9][0-9],){5,}[0-9][0-9]#([0-9][0-9],)*[0-9][0-9]\\|)*(([0-9][0-9],){5,}[0-9][0-9]#([0-9][0-9],)*[0-9][0-9])*$");
          ????????m?
          =??pa.matcher(num);
          ????????
          if(?m.find()?)
          ????????????
          return?num?;
          ????????
          else?
          ????????????
          return?null?;
          ????}
          }



          評論

          # re: 扒網頁數據(jdk+正則解析)  回復  更多評論   

          2008-03-10 10:08 by richardning
          呵呵,老大催我干活。回頭詳看。。我自己也寫了個抓網絡數據的代碼,呵,有空。比較比較。

          # re: 扒網頁數據(jdk+正則解析)  回復  更多評論   

          2008-03-11 10:18 by 蔣家狂潮
          不錯的文章,
          主站蜘蛛池模板: 台安县| 朔州市| 板桥市| 富宁县| 丰顺县| 铁力市| 咸阳市| 汕头市| 巴里| 准格尔旗| 福安市| 文昌市| 科尔| 兴业县| 津南区| 洪雅县| 沛县| 城步| 玛纳斯县| 沧源| 疏附县| 平江县| 昌图县| 巴楚县| 阳信县| 尤溪县| 措美县| 达孜县| 吉首市| 伊宁县| 孟津县| 城固县| 海丰县| 观塘区| 武邑县| 罗定市| 武定县| 会理县| 兴业县| 耒阳市| 仁布县|