qileilove

          blog已經轉移至github,大家請訪問 http://qaseven.github.io/

          網頁主動探測工具使用

           單位的項目是IBatis做的,每個查詢的SQL里面都有很多判斷
            上次優化SQL之后,其中的一個分支報錯,但是作為dba,不可能排查每一個分支.
            所以,干脆用爬蟲爬過所有的網頁,主動探測程序的異常.
            這樣有兩個好處
            1.可以主動查看網頁是否異常 (500錯誤,404錯誤)
            2.可以篩查速度較慢的網頁,從這個方向也可以定位慢SQL吧.(也有服務器資源不足,造成網絡超時的情況)
            前提,
            必須是互聯網公司,大多數網頁不用登錄也可以瀏覽
            首先,建表
            CREATE SEQUENCE seq_probe_id INCREMENT BY 1 START WITH 1 NOMAXvalue NOCYCLE CACHE 2000;
            create table probe(
            id int primary key,
            host varchar(40) not null,
            path varchar(500) not null,
            state int not null,
            taskTime int not null,
            type varchar(10) not null,
            createtime date default sysdate not null
            ) ;
            其中host是域名,path是網頁的相對路徑,state是HTTP狀態碼,taskTime是網頁獲取時間,單位是毫秒,type是類型(html,htm,jpg等)
            程序結構
            程序分三個主要步驟,再分別用三個隊列實現生產者消費者模式.
            1.連接.根據連接隊列的目標,使用Socket獲取網頁,然后放入解析隊列
            2.解析.根據解析隊列的內容,使用正則表達式獲取該網頁的合法連接,將其再放入連接隊列.然后將解析的網頁放入持久化隊列
            3.持久化.將持久化隊列的內容存入數據庫,以便查詢。
            程序使用三個步驟并行,每個步驟可以并發的方式.
          但是通常來說,解析和持久化可以分別用單線程的方式執行.
          import java.io.BufferedReader;
          import java.io.BufferedWriter;
          import java.io.InputStreamReader;
          import java.io.OutputStreamWriter;
          import java.net.InetAddress;
          import java.net.Socket;
          import java.sql.Connection;
          import java.sql.DriverManager;
          import java.sql.PreparedStatement;
          import java.sql.SQLException;
          import java.util.ArrayList;
          import java.util.Iterator;
          import java.util.List;
          import java.util.Set;
          import java.util.concurrent.BlockingQueue;
          import java.util.concurrent.ConcurrentSkipListSet;
          import java.util.concurrent.CopyOnWriteArrayList;
          import java.util.concurrent.ExecutorService;
          import java.util.concurrent.Executors;
          import java.util.concurrent.LinkedBlockingQueue;
          import java.util.concurrent.atomic.AtomicInteger;
          import java.util.regex.Matcher;
          import java.util.regex.Pattern;
          public class Probe {
          private static final BlockingQueue<Task> CONNECTLIST = new LinkedBlockingQueue<Task>();
          private static final BlockingQueue<Task> PARSELIST = new LinkedBlockingQueue<Task>();
          private static final BlockingQueue<Task> PERSISTENCELIST = new LinkedBlockingQueue<Task>();
          private static ExecutorService CONNECTTHREADPOOL;
          private static ExecutorService PARSETHREADPOOL;
          private static ExecutorService PERSISTENCETHREADPOOL;
          private static final List<String> DOMAINLIST = new CopyOnWriteArrayList<>();
          static {
          CONNECTTHREADPOOL = Executors.newFixedThreadPool(200);
          PARSETHREADPOOL = Executors.newSingleThreadExecutor();
          PERSISTENCETHREADPOOL = Executors.newFixedThreadPool(1);
          DOMAINLIST.add("域名");
          }
          public static void main(String args[]) throws Exception {
          long start = System.currentTimeMillis();
          CONNECTLIST.put(new Task("域名", 80, "/static/index.html"));
          for (int i = 0; i < 600; i++) {
          CONNECTTHREADPOOL.submit(new ConnectHandler(CONNECTLIST, PARSELIST));
          }
          PARSETHREADPOOL.submit(new ParseHandler(CONNECTLIST, PARSELIST, PERSISTENCELIST, DOMAINLIST));
          PERSISTENCETHREADPOOL.submit(new PersistenceHandler(PERSISTENCELIST));
          while (true) {
          Thread.sleep(1000);
          long end = System.currentTimeMillis();
          float interval = ((end - start) / 1000);
          int connectTotal = ConnectHandler.GETCOUNT();
          int parseTotal = ParseHandler.GETCOUNT();
          int persistenceTotal = PersistenceHandler.GETCOUNT();
          int connectps = Math.round(connectTotal / interval);
          int parseps = Math.round(parseTotal / interval);
          int persistenceps = Math.round(persistenceTotal / interval);
          System.out.print("\r連接總數:" + connectTotal + " \t每秒連接:" + connectps + "\t連接隊列剩余:" + CONNECTLIST.size()
          + " \t解析總數:" + parseTotal + " \t每秒解析:" + parseps + "\t解析隊列剩余:" + PARSELIST.size() + " \t持久化總數:"
          + persistenceTotal + " \t每秒持久化:" + persistenceps + "\t持久化隊列剩余:" + PERSISTENCELIST.size());
          }
          }
          }
          class Task {
          public Task() {
          }
          public void init(String host, int port, String path) {
          this.setCurrentPath(path);
          this.host = host;
          this.port = port;
          }
          public Task(String host, int port, String path) {
          init(host, port, path);
          }
          private String host;
          private int port;
          private String currentPath;
          private long taskTime;
          private String type;
          private String content;
          private int state;
          public int getState() {
          return state;
          }
          public void setState(int state) {
          this.state = state;
          }
          public String getCurrentPath() {
          return currentPath;
          }
          public void setCurrentPath(String currentPath) {
          this.currentPath = currentPath;
          this.type = currentPath.substring(currentPath.indexOf(".") + 1,
          currentPath.indexOf("?") != -1 ? currentPath.indexOf("?") : currentPath.length());
          }
          public long getTaskTime() {
          return taskTime;
          }
          public void setTaskTime(long taskTime) {
          this.taskTime = taskTime;
          }
          public String getType() {
          return type;
          }
          public void setType(String type) {
          this.type = type;
          }
          public String getHost() {
          return host;
          }
          public int getPort() {
          return port;
          }
          public String getContent() {
          return content;
          }
          public void setContent(String content) {
          this.content = content;
          }
          }
          class ParseHandler implements Runnable {
          private static Set<String> SET = new ConcurrentSkipListSet<String>();
          public static int GETCOUNT() {
          return COUNT.get();
          }
          private static final AtomicInteger COUNT = new AtomicInteger();
          private BlockingQueue<Task> connectlist;
          private BlockingQueue<Task> parselist;
          private BlockingQueue<Task> persistencelist;
          List<String> domainlist;
          private interface Filter {
          void doFilter(Task fatherTask, Task newTask, String path, Filter chain);
          }
          private class FilterChain implements Filter {
          private List<Filter> list = new ArrayList<Filter>();
          {
          addFilter(new TwoLevel());
          addFilter(new OneLevel());
          addFilter(new FullPath());
          addFilter(new Root());
          addFilter(new Default());
          }
          private void addFilter(Filter filter) {
          list.add(filter);
          }
          private Iterator<Filter> it = list.iterator();
          @Override
          public void doFilter(Task fatherTask, Task newTask, String path, Filter chain) {
          if (it.hasNext()) {
          it.next().doFilter(fatherTask, newTask, path, chain);
          }
          }
          }
          private class TwoLevel implements Filter {
          @Override
          public void doFilter(Task fatherTask, Task newTask, String path, Filter chain) {
          if (path.startsWith("../../")) {
          String prefix = getPrefix(fatherTask.getCurrentPath(), 3);
          newTask.init(fatherTask.getHost(), fatherTask.getPort(), path.replace("../../", prefix));
          } else {
          chain.doFilter(fatherTask, newTask, path, chain);
          }
          }
          }
          private class OneLevel implements Filter {
          @Override
          public void doFilter(Task fatherTask, Task newTask, String path, Filter chain) {
          if (path.startsWith("../")) {
          String prefix = getPrefix(fatherTask.getCurrentPath(), 2);
          newTask.init(fatherTask.getHost(), fatherTask.getPort(), path.replace("../", prefix));
          } else {
          chain.doFilter(fatherTask, newTask, path, chain);
          }
          }
          }
          private class FullPath implements Filter {
          @Override
          public void doFilter(Task fatherTask, Task newTask, String path, Filter chain) {
          if (path.startsWith("http://")) {
          Iterator<String> it = domainlist.iterator();
          boolean flag = false;
          while (it.hasNext()) {
          String domain = it.next();
          if (path.startsWith("http://" + domain + "/")) {
          newTask.init(domain, fatherTask.getPort(), path.replace("http://" + domain + "/", "/"));
          flag = true;
          break;
          }
          }
          if (!flag) {
          newTask = null;
          }
          } else {
          chain.doFilter(fatherTask, newTask, path, chain);
          }
          }
          }
          private class Root implements Filter {
          @Override
          public void doFilter(Task fatherTask, Task newTask, String path, Filter chain) {
          if (path.startsWith("/")) {
          newTask.init(fatherTask.getHost(), fatherTask.getPort(), path);
          } else {
          chain.doFilter(fatherTask, newTask, path, chain);
          }
          }
          }
          private class Default implements Filter {
          @Override
          public void doFilter(Task fatherTask, Task newTask, String path, Filter chain) {
          String prefix = getPrefix(fatherTask.getCurrentPath(), 1);
          newTask.init(fatherTask.getHost(), fatherTask.getPort(), prefix + "/" + path);
          }
          }
          public ParseHandler(BlockingQueue<Task> connectlist, BlockingQueue<Task> parselist,
          BlockingQueue<Task> persistencelist, List<String> domainlist) {
          this.connectlist = connectlist;
          this.parselist = parselist;
          this.persistencelist = persistencelist;
          this.domainlist = domainlist;
          }
          private Pattern pattern = Pattern.compile("\"[^\"]+\\.htm[^\"]*\"");
          private void handler() {
          try {
          Task task = parselist.take();
          parseTaskState(task);
          if (200 == task.getState()) {
          Matcher matcher = pattern.matcher(task.getContent());
          while (matcher.find()) {
          String path = matcher.group();
          if (!path.contains(" ") && !path.contains("\t") && !path.contains("(") && !path.contains(")")
          && !path.contains(":")) {
          path = path.substring(1, path.length() - 1);
          if (!SET.contains(path)) {
          SET.add(path);
          createNewTask(task, path);
          }
          }
          }
          }
          task.setContent(null);
          persistencelist.put(task);
          } catch (Exception e) {
          // TODO Auto-generated catch block
          e.printStackTrace();
          }
          }
          private void parseTaskState(Task task) {
          if (task.getContent().startsWith("HTTP/1.1")) {
          task.setState(Integer.parseInt(task.getContent().substring(9, 12)));
          } else {
          task.setState(Integer.parseInt(task.getContent().substring(19, 22)));
          }
          }
          /**
          * @param fatherTask
          * @param path
          * @throws Exception
          */
          private void createNewTask(Task fatherTask, String path) throws Exception {
          Task newTask = new Task();
          FilterChain filterchain = new FilterChain();
          filterchain.doFilter(fatherTask, newTask, path, filterchain);
          if (newTask != null) {
          connectlist.put(newTask);
          }
          }
          private String getPrefix(String s, int count) {
          String prefix = s;
          while (count > 0) {
          prefix = prefix.substring(0, prefix.lastIndexOf("/"));
          count--;
          }
          return "".equals(prefix) ? "/" : prefix;
          }
          @Override
          public void run() {
          while (true) {
          this.handler();
          COUNT.addAndGet(1);
          }
          }
          }
          class ConnectHandler implements Runnable {
          public static int GETCOUNT() {
          return COUNT.get();
          }
          private static final AtomicInteger COUNT = new AtomicInteger();
          private BlockingQueue<Task> connectlist;
          private BlockingQueue<Task> parselist;
          public ConnectHandler(BlockingQueue<Task> connectlist, BlockingQueue<Task> parselist) {
          this.connectlist = connectlist;
          this.parselist = parselist;
          }
          private void handler() {
          try {
          Task task = connectlist.take();
          long start = System.currentTimeMillis();
          getHtml(task);
          long end = System.currentTimeMillis();
          task.setTaskTime(end - start);
          parselist.put(task);
          } catch (Exception e) {
          // TODO Auto-generated catch block
          e.printStackTrace();
          }
          }
          private void getHtml(Task task) throws Exception {
          StringBuilder sb = new StringBuilder(2048);
          InetAddress addr = InetAddress.getByName(task.getHost());
          // 建立一個Socket
          Socket socket = new Socket(addr, task.getPort());
          // 發送命令,無非就是在Socket發送流的基礎上加多一些握手信息,詳情請了解HTTP協議
          BufferedWriter wr = new BufferedWriter(new OutputStreamWriter(socket.getOutputStream(), "UTF-8"));
          wr.write("GET " + task.getCurrentPath() + " HTTP/1.0\r\n");
          wr.write("HOST:" + task.getHost() + "\r\n");
          wr.write("Accept:*/*\r\n");
          wr.write("\r\n");
          wr.flush();
          // 接收Socket返回的結果,并打印出來
          BufferedReader rd = new BufferedReader(new InputStreamReader(socket.getInputStream()));
          String line;
          while ((line = rd.readLine()) != null) {
          sb.append(line);
          }
          wr.close();
          rd.close();
          task.setContent(sb.toString());
          socket.close();
          }
          @Override
          public void run() {
          while (true) {
          this.handler();
          COUNT.addAndGet(1);
          }
          }
          }
          class PersistenceHandler implements Runnable {
          static {
          try {
          Class.forName("oracle.jdbc.OracleDriver");
          } catch (ClassNotFoundException e) {
          // TODO Auto-generated catch block
          e.printStackTrace();
          }
          }
          public static int GETCOUNT() {
          return COUNT.get();
          }
          private static final AtomicInteger COUNT = new AtomicInteger();
          private BlockingQueue<Task> persistencelist;
          public PersistenceHandler(BlockingQueue<Task> persistencelist) {
          this.persistencelist = persistencelist;
          try {
          conn = DriverManager.getConnection("jdbc:oracle:thin:127.0.0.1:1521:orcl", "edmond", "edmond");
          ps = conn
          .prepareStatement("insert into probe(id,host,path,state,tasktime,type) values(seq_probe_id.nextval,?,?,?,?,?)");
          } catch (SQLException e) {
          // TODO Auto-generated catch block
          e.printStackTrace();
          }
          }
          private Connection conn;
          private PreparedStatement ps;
          @Override
          public void run() {
          while (true) {
          this.handler();
          COUNT.addAndGet(1);
          }
          }
          private void handler() {
          try {
          Task task = persistencelist.take();
          ps.setString(1, task.getHost());
          ps.setString(2, task.getCurrentPath());
          ps.setInt(3, task.getState());
          ps.setLong(4, task.getTaskTime());
          ps.setString(5, task.getType());
          ps.executeUpdate();
          conn.commit();
          } catch (InterruptedException e) {
          e.printStackTrace();
          } catch (SQLException e) {
          e.printStackTrace();
          }
          }
          }
            ParseHandler 使用了一個職責鏈模式,
            TwoLevel 處理../../開頭的連接(../../sucai/sucai.htm)
            OneLevel 處理../開頭的連接(../sucai/sucai.htm)
            FullPath 處理絕對路徑的連接(http://域名/sucai/sucai.htm)
            Root 處理/開頭的連接(/sucai/sucai.htm)
            Default 處理常規的連接(sucai.htm)
            ParseHandler FullPath 過濾需要一個白名單.
            這樣可以使程序在固定的域名爬行
            ParseHandler parseTaskState 解析狀態碼 可能需要根據實際情況進行調整
            比如網頁404,服務器可能會返回一個錯誤頁,而不是通常的HTTP狀態碼。
            第一版僅僅實現了功能,錯誤處理不完整,
            所以僅僅在定制的域名下生效,其實并不通用,后續會逐步完善.

          posted on 2014-12-03 13:43 順其自然EVO 閱讀(204) 評論(0)  編輯  收藏 所屬分類: 測試學習專欄

          <2014年12月>
          30123456
          78910111213
          14151617181920
          21222324252627
          28293031123
          45678910

          導航

          統計

          常用鏈接

          留言簿(55)

          隨筆分類

          隨筆檔案

          文章分類

          文章檔案

          搜索

          最新評論

          閱讀排行榜

          評論排行榜

          主站蜘蛛池模板: 本溪| 蒲江县| 万荣县| 盐山县| 颍上县| 阳曲县| 衡南县| 滦平县| 石城县| 惠州市| 达孜县| 建水县| 囊谦县| 双流县| 兴业县| 甘孜县| 瑞丽市| 慈利县| 无为县| 清河县| 扬中市| 西吉县| 沙雅县| 东方市| 鸡西市| 积石山| 漳州市| 屏东市| 阳春市| 苗栗市| 闸北区| 信阳市| 河北区| 虞城县| 南宁市| 富平县| 漳浦县| 温州市| 扎兰屯市| 庆元县| 连江县|