在前面的廢話不想看的可以skip-----由于工作需要,經(jīng)常需要打開和查詢250M左右的文本文件(總行數(shù)超過250萬行)分析日志,公司發(fā)的筆記本內(nèi)存才512M,地球人都知道,這樣的電腦根本不能勝任。不知道有沒有人和我一樣倒霉,面對(duì)這么龐大的文件,曾經(jīng)讓我很無奈,很彷徨。從尋找優(yōu)秀的類似notepad這樣的文本工具,企圖通過工具來提高下速度,其中JIURL這個(gè)記事本工具,我個(gè)人覺得很好,它陪我走了好長(zhǎng)時(shí)間,雖然它老是有這樣那樣的BUG,不過它的速度確實(shí)比其他的記事本快很多,對(duì)于巨型文件,不會(huì)像其他記事本那樣直接掛死。工具再好,也是死的,經(jīng)常操作一樣的動(dòng)作,讓我想撞墻。偶又去研究了Linux下的sh命令,哎,不是我說MicroSoft的系統(tǒng)不好,Linux的命令確實(shí)好使,特別是查詢文檔,日志分析的時(shí)候,只要你使用的好,基本都能完成你的任務(wù)。像grep,gawk,wc,head,hail etc.這樣的命令總是能給我們帶來驚喜。Linux系統(tǒng)還是要好好學(xué)的,雖然和widows差別很大,但是很強(qiáng),很方便。 下面我就來說說用JAVASE怎么操作這些龐然大物的。 先介紹下我要操作的文本文件 [28-03-2008 00:00:00.101] |SH1|CIC015|->|SH1|AG015| >>IAM|CN15|CD51001500|CG15921351879|PA0|PB1|<< [28-03-2008 00:00:00.101] |SH1|CIC153|->|SH1|AG144| >>IAM|CN144|CD51001500|CG13761957943|PA0|PB1|<< [28-03-2008 00:00:00.117] |SH1|CIC015|<-|SH1|AG015| >>ACM|CN15|<< [28-03-2008 00:00:00.117] |SH1|CIC153|<-|SH1|AG144| >>ACM|CN144|<< [28-03-2008 00:00:00.179] |SH1|CIC111|->|SH1|AG105| >>REL|CN105|PA16|PB0|PC4|<< [28-03-2008 00:00:00.179] |SH1|CIC111|<-|SH1|AG105| >>RLC|CN105|<< [28-03-2008 00:00:00.273] |SH1|CIC1373|->|SH3|AG118| >>ANM|CN118|<< [28-03-2008 00:00:00.383] |SH1|CIC1365|<-|SH3|AG110| >>REL|CN110|PA16|<< [28-03-2008 00:00:00.461] |SH1|CIC1365|->|SH3|AG110| >>RLC|CN110|<< [28-03-2008 00:00:00.586] |SH1|CIC2577|->|S11|AG196| >>ANM|CN196|<< 大家不要頭暈,這個(gè)本來就是日志,搞過No.7的人都看的懂,不過這個(gè)不是重點(diǎn)了。我們要處理就是這樣的文件,JAVASE里有一個(gè)新的包 java.nio,具體干啥的,我就不說了,自己去查看API吧。下面貼出具體代碼,大家重點(diǎn)看getResult() 方法。在里面使用了MappedByteBuffer把文件映射到內(nèi)存里面去,然后使用緩存的技術(shù)把他再讀出來,如果不適用緩存,速度會(huì)很慢,想在這個(gè)里面查個(gè)東西,至少要7分鐘以上,使用了緩存整個(gè)文件遍歷最多13秒,差距很大吧,這個(gè)時(shí)間還是和硬件有關(guān)系的,我的電腦比較爛了,我老婆經(jīng)常說我的是“低廉的雙核”,我那個(gè)汗啊!o(∩_∩)o...,大家好好看看里面的代碼吧。希望能對(duì)經(jīng)常和大型文件打交道的人有個(gè)幫助!
1 package com.xhp.ss7 ;
2
3 import java.io.File ;
4 import java.io.RandomAccessFile ;
5 import java.io.BufferedReader ;
6 import java.io.StringReader ;
7 import java.util.List ;
8 import java.util.ArrayList ;
9 import java.util.Date ;
10 import java.util.regex.Pattern ;
11 import java.util.regex.Matcher ;
12 import java.nio.MappedByteBuffer ;
13 import java.nio.channels.FileChannel ;
14
15
16 public class SS7Handle
{
17 private String year ;
18 private String month ;
19 private String day ;
20 private long length ;
21 private File file ;
22 private int bufferSize = 0x600000;// 默認(rèn)6M的緩沖 ;
23 private long diff ;
24
25
26 public SS7Handle(String path)
{
27 this.loadFile(path) ;
28 }
29
30 public List<String> getResult(int type,String number,String time,String minute,String second,int count)
{
31 List<String> result = new ArrayList<String>() ;
32 String strRex = getStrRex(type,number,time,minute,second) ;
33 Pattern pattern = Pattern.compile(strRex) ;
34 int counts = count ;
35 Date begin = null ;
36 Date end = null ;
37 try
{
38 MappedByteBuffer inputBuffer = new RandomAccessFile(file,"r").getChannel()
39 .map(FileChannel.MapMode.READ_ONLY, 0, length);
40 byte[] dst = new byte[bufferSize] ; //每次讀出6M的內(nèi)容
41 BufferedReader br = null ;
42 boolean findFirst = false ;
43 boolean over = false ;
44 begin = new Date() ;
45 for(int offset=0; offset < length; offset+= bufferSize)
{
46 //for(int offset=0; offset < 1; offset+= bufferSize){
47
48 if(length - offset >= bufferSize)
{
49 for(int i = 0;i < bufferSize;i++)
50 dst[i] = inputBuffer.get(offset + i);
51 }else
{
52 for(int i = 0;i < length - offset;i++)
53 dst[i] = inputBuffer.get(offset + i);
54 }
55 //把得到的dst 交給 BufferedReader 按行讀出
56 br = new BufferedReader(new StringReader(new String(dst))) ;
57 String line ;
58 StringBuffer next = new StringBuffer() ;
59
60 while((line=br.readLine()) != null )
{
61 if(!findFirst)
{
62 if(pattern.matcher(line).find())
{
63 System.out.println("line-->"+line) ;
64 counts-- ;
65 result.add(line) ;
66 findFirst = true ;
67 String[] temp = line.split("\|") ;
68 next.append(temp[4]).append("|").append(temp[5]) ;
69 }
70 }else if(findFirst)
{
71 if(line.contains(next.toString()))
{
72 System.out.println(next) ;
73 result.add(line) ;
74 counts-- ;
75 }
76 }
77 if(counts<0)
{
78 over = true ;
79 break ;
80 }
81 }
82 br.close() ;
83 if(over)
{
84 break ;
85 }
86 }
87 end = new Date();
88 this.setDiff(end.getTime()-begin.getTime()) ;
89 }catch(Exception e)
{
90 System.out.println("error") ;
91 }
92 return result ;
93 }
94
95
96 private String getStrRex(int type,String number,String hour,String minute,String second)
{
97 if(0<minute.length() && minute.length()<2)
{
98 minute = "0"+minute ;
99 }else if(0<second.length() && second.length()<2)
{
100 second = "0" +second ;
101 }
102 StringBuffer temp = new StringBuffer() ;
103 if(type == 1)
{ // 查詢 系統(tǒng) 打出的電話
104 String outnumber = "CD"+number ;
105 temp.append("^\[").append(day).append("\-").append(month).append("\-").append(year)
106 .append(" ").append(hour)
107 .append(!(minute.trim()==null || "".equals(minute.trim()))?("\:"+minute):"")
108 .append(!(second.trim()==null || "".equals(second.trim()))?("\:"+second):"")
109 .append(".*\<\-.*IAM.*").append(outnumber).append(".*") ;
110 System.out.println(temp.toString()) ;
111 }else if(type ==2)
{ //查詢 用戶打進(jìn)的 電話
112 String innumber = "CG" + number ;
113 temp.append("^\[").append(day).append("\-").append(month).append("\-").append(year)
114 .append(" ").append(hour)
115 .append(!(minute.trim()==null || "".equals(minute.trim()))?("\:"+minute):"")
116 .append(!(second.trim()==null || "".equals(second.trim()))?("\:"+second):"")
117 .append(".*\-\>.*IAM.*").append(innumber).append(".*") ;
118 }
119 return temp.toString() ;
120 }
121
122 public void loadFile(String path)
{
123 this.file = new File(path) ;
124 this.setLength(file.length()) ;
125 int pathLength = path.length() ;
126 int start = pathLength - new String("xxxxxxxx_xxx.xxx").length() ;
127 this.setYear(path.substring(start,start+4)) ;
128 this.setMonth(path.substring(start+4,start+6)) ;
129 this.setDay(path.substring(start+6,start+8)) ;
130 }
131
132 public void setYear(String year)
{
133 this.year = year ;
134 }
135 public void setMonth(String month)
{
136 this.month = month ;
137 }
138 public void setDay(String day)
{
139 this.day = day ;
140 }
141 private void setLength(long length)
{
142 this.length = length ;
143 }
144 public void setDiff(long diff)
{
145 this.diff = diff ;
146 }
147 public long getDiff()
{
148 return this.diff ;
149 }
150 public long getLength()
{
151 return this.length ;
152 }
153 public String getYear()
{
154 return this.year ;
155 }
156 public String getMonth()
{
157 return this.month ;
158 }
159 public String getDay()
{
160 return this.day ;
161 }
162 public String toString()
{
163 return this.year+"-"+this.month+"-"+this.day ;
164 }
165
166 }
2
3 import java.io.File ;
4 import java.io.RandomAccessFile ;
5 import java.io.BufferedReader ;
6 import java.io.StringReader ;
7 import java.util.List ;
8 import java.util.ArrayList ;
9 import java.util.Date ;
10 import java.util.regex.Pattern ;
11 import java.util.regex.Matcher ;
12 import java.nio.MappedByteBuffer ;
13 import java.nio.channels.FileChannel ;
14
15
16 public class SS7Handle

17 private String year ;
18 private String month ;
19 private String day ;
20 private long length ;
21 private File file ;
22 private int bufferSize = 0x600000;// 默認(rèn)6M的緩沖 ;
23 private long diff ;
24
25
26 public SS7Handle(String path)

27 this.loadFile(path) ;
28 }
29
30 public List<String> getResult(int type,String number,String time,String minute,String second,int count)

31 List<String> result = new ArrayList<String>() ;
32 String strRex = getStrRex(type,number,time,minute,second) ;
33 Pattern pattern = Pattern.compile(strRex) ;
34 int counts = count ;
35 Date begin = null ;
36 Date end = null ;
37 try

38 MappedByteBuffer inputBuffer = new RandomAccessFile(file,"r").getChannel()
39 .map(FileChannel.MapMode.READ_ONLY, 0, length);
40 byte[] dst = new byte[bufferSize] ; //每次讀出6M的內(nèi)容
41 BufferedReader br = null ;
42 boolean findFirst = false ;
43 boolean over = false ;
44 begin = new Date() ;
45 for(int offset=0; offset < length; offset+= bufferSize)

46 //for(int offset=0; offset < 1; offset+= bufferSize){
47
48 if(length - offset >= bufferSize)

49 for(int i = 0;i < bufferSize;i++)
50 dst[i] = inputBuffer.get(offset + i);
51 }else

52 for(int i = 0;i < length - offset;i++)
53 dst[i] = inputBuffer.get(offset + i);
54 }
55 //把得到的dst 交給 BufferedReader 按行讀出
56 br = new BufferedReader(new StringReader(new String(dst))) ;
57 String line ;
58 StringBuffer next = new StringBuffer() ;
59
60 while((line=br.readLine()) != null )

61 if(!findFirst)

62 if(pattern.matcher(line).find())

63 System.out.println("line-->"+line) ;
64 counts-- ;
65 result.add(line) ;
66 findFirst = true ;
67 String[] temp = line.split("\|") ;
68 next.append(temp[4]).append("|").append(temp[5]) ;
69 }
70 }else if(findFirst)

71 if(line.contains(next.toString()))

72 System.out.println(next) ;
73 result.add(line) ;
74 counts-- ;
75 }
76 }
77 if(counts<0)

78 over = true ;
79 break ;
80 }
81 }
82 br.close() ;
83 if(over)

84 break ;
85 }
86 }
87 end = new Date();
88 this.setDiff(end.getTime()-begin.getTime()) ;
89 }catch(Exception e)

90 System.out.println("error") ;
91 }
92 return result ;
93 }
94
95
96 private String getStrRex(int type,String number,String hour,String minute,String second)

97 if(0<minute.length() && minute.length()<2)

98 minute = "0"+minute ;
99 }else if(0<second.length() && second.length()<2)

100 second = "0" +second ;
101 }
102 StringBuffer temp = new StringBuffer() ;
103 if(type == 1)

104 String outnumber = "CD"+number ;
105 temp.append("^\[").append(day).append("\-").append(month).append("\-").append(year)
106 .append(" ").append(hour)
107 .append(!(minute.trim()==null || "".equals(minute.trim()))?("\:"+minute):"")
108 .append(!(second.trim()==null || "".equals(second.trim()))?("\:"+second):"")
109 .append(".*\<\-.*IAM.*").append(outnumber).append(".*") ;
110 System.out.println(temp.toString()) ;
111 }else if(type ==2)

112 String innumber = "CG" + number ;
113 temp.append("^\[").append(day).append("\-").append(month).append("\-").append(year)
114 .append(" ").append(hour)
115 .append(!(minute.trim()==null || "".equals(minute.trim()))?("\:"+minute):"")
116 .append(!(second.trim()==null || "".equals(second.trim()))?("\:"+second):"")
117 .append(".*\-\>.*IAM.*").append(innumber).append(".*") ;
118 }
119 return temp.toString() ;
120 }
121
122 public void loadFile(String path)

123 this.file = new File(path) ;
124 this.setLength(file.length()) ;
125 int pathLength = path.length() ;
126 int start = pathLength - new String("xxxxxxxx_xxx.xxx").length() ;
127 this.setYear(path.substring(start,start+4)) ;
128 this.setMonth(path.substring(start+4,start+6)) ;
129 this.setDay(path.substring(start+6,start+8)) ;
130 }
131
132 public void setYear(String year)

133 this.year = year ;
134 }
135 public void setMonth(String month)

136 this.month = month ;
137 }
138 public void setDay(String day)

139 this.day = day ;
140 }
141 private void setLength(long length)

142 this.length = length ;
143 }
144 public void setDiff(long diff)

145 this.diff = diff ;
146 }
147 public long getDiff()

148 return this.diff ;
149 }
150 public long getLength()

151 return this.length ;
152 }
153 public String getYear()

154 return this.year ;
155 }
156 public String getMonth()

157 return this.month ;
158 }
159 public String getDay()

160 return this.day ;
161 }
162 public String toString()

163 return this.year+"-"+this.month+"-"+this.day ;
164 }
165
166 }