網上有很多關于java連接http server 的文章,也有不少在http server 上通過用戶認證后從后臺獲取網頁的帖子。但是,很少有一個對于https server上通過用戶驗證的介紹。項目需要,我折騰了一整天,終于搞定了,現在來整理一下。概括的說, https 比之 http 只是多了一個加密解密過程,所以https的連接只是比http連接多了一個驗證的過程,一旦驗證通過,剩下的操作與http上的相同。也就是說,在https server上一旦certificate通過驗證,剩下的用戶驗證就于http server上用戶認證一致,概括起來,這整一個過程如下:
1. 建立第一個HttpsURLConnection(URL為登錄頁面url),  通過https上server certificate 與client的驗證   
2. 用POST方式向登錄頁面傳出userID 和 password (具體的變量名要參考單 
    中的名稱)。 post操作成功后,取得上面connection的Cookie,通過cookie split出SessionID。
3. 建立第二個HttpsURLConnection(URL為要抓取頁面的url), 通過https上的證書驗證
4. 用 URLConnetion.setPropertyValue("Cookie", SessionID),設置第二個URL的cookie,
    確保兩個connection屬于同一個登入后的Session
5. connection.getInputStream獲得目標頁面的內容

下面是我用到的一段testing code,是幾個獨立的片段,已通過測試,有興趣的朋友改改后就可以用:

try
            
{
                                
if(protocol.equals("http")){
                                        
                    
final HttpURLConnection connection = (HttpURLConnection)iSourceURL.openConnection();
                    connection.connect();
                    stream 
= connection.getInputStream();
//                    
//                    try{
//                        printIoStream(stream);
//                    }catch(Exception e){
//                        e.printStackTrace();
//                    }
                    
                    modelSource 
= new StreamSource(stream);
                    
//                    connection.disconnect();
                }

                
else if(protocol.equals("https")){
                    
try {
                        
                        SSLContext sc 
= SSLContext.getInstance("SSL");
                        sc.init(
nullnew TrustManager[] new iTrustManager() },
                                
new java.security.SecureRandom());
                        
//                        url = new URL("https://9.186.10.56:8443/LogonServlet");
                        URL url = new URL(iSourceURL.getProtocol() + "://" + iSourceURL.getHost() + ":" + iSourceURL.getPort() + "/LogonServlet");
                        String strPost 
= "intranetID=*****&password=******";
                        HttpsURLConnection conn 
= (HttpsURLConnection) url.openConnection();
                        conn.setSSLSocketFactory(sc.getSocketFactory());
                        conn.setHostnameVerifier(
new TrustAnyHostnameVerifier());
                        
                        addProperty(conn);
                        
                        conn.setFollowRedirects(
true);
                        conn.setInstanceFollowRedirects(
true);
                        conn.setDoOutput(
true); // IO input to Server
                        conn.setDoInput(true); // 
                        conn.setUseCaches(false); // obtain the newest info of server
                        conn.setAllowUserInteraction(false);
                        conn.setRequestMethod(
"POST");
                        
                        conn.getOutputStream().write(strPost.getBytes());
                        conn.getOutputStream().flush();
                        conn.connect();

                        String cookie 
= conn.getHeaderField("Set-Cookie");
                        
                        String SessionID 
= getSessionIdFromCookie(cookie);
                        
                        stream 
= conn.getInputStream();

                        conn.disconnect();
                        
//                        printIoStream(stream);
                
                        
final HttpsURLConnection connection = (HttpsURLConnection)iSourceURL.openConnection();
                        connection.setSSLSocketFactory(sc.getSocketFactory());
                        connection.setHostnameVerifier(
new TrustAnyHostnameVerifier());
                        connection.setRequestProperty(
"Cookie", SessionID);
                        connection.connect();

                        stream 
= connection.getInputStream();
                        modelSource 
= new StreamSource(stream);
                        
//                        printIoStream(stream);
                        
                    }
 catch (Exception e) {
                        TMCodePlugin.getInstance().writeToLog(
                                Status.ERROR,
"Could not read data via URL(https):"+ iSourceURL, null);
                        e.printStackTrace();
                    }

                }
else{
                    TMCodePlugin.getInstance().writeToLog(Status.ERROR, 
"Protocol illegal: "+iSourceURL, null);
                }

            }

            
catch(IOException e)
            
{
                TMCodePlugin.getInstance().writeToLog(Status.ERROR, 
"Could not read data via URL:"+iSourceURL, null);
            }

            
catch(IllegalArgumentException e)
            
{
                TMCodePlugin.getInstance().writeToLog(Status.ERROR, 
"Could not read data via URL - illegal argument in URL:"+iSourceURL, null);
            }

        }







/**
     * the protocal of SSL operation on java, visite the HTTPS server via socket
     * 
@author chaixzh
     
*/

    
class iTrustManager implements X509TrustManager {
        iTrustManager() 
{
            
        }


        
// check client trust status
        public void checkClientTrusted(X509Certificate chain[], String authType)
                
throws CertificateException {
            System.out.println(
"check client trust status");
        }


        
// check Server trust status
        public void checkServerTrusted(X509Certificate chain[], String authType)
                
throws CertificateException {
            System.out.println(
"check Server trust status");
        }


        
//get those accepted Issuers
        public X509Certificate[] getAcceptedIssuers() {
            
return null;
        }

    }

    
    
private static class TrustAnyHostnameVerifier implements HostnameVerifier {
        
public boolean verify(String hostname, SSLSession session) {
            
return true;
        }

    }

    
    
/**
     * to split out the SessionID from a Cookie String
     * 
@param cookie
     * 
@return
     
*/

    
private String getSessionIdFromCookie(String cookie){
        
int index_1 = cookie.indexOf("JSESSIONID=");
        
int index_2 = cookie.indexOf(";");
        
return cookie.substring(index_1, index_2);
    }

    
    
/**
     * just for the sake of debuging
     * 
@param stream
     * 
@throws Exception
     
*/

    
private void printIoStream(InputStream stream) throws Exception{
        BufferedInputStream buff 
= new BufferedInputStream(stream);
        Reader r 
= new InputStreamReader(buff, "gbk");
        BufferedReader br 
= new BufferedReader(r);
        StringBuffer strHtml 
= new StringBuffer("");
        String strLine 
= null;
        
while ((strLine = br.readLine()) != null{
            strHtml.append(strLine 
+ "\r\n");
        }

        System.out.print(strHtml.toString());
    }

    
    
private void addProperty(URLConnection connection){
        connection.addRequestProperty(
"Accept""image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/msword, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/x-silverlight, */*");
        connection.setRequestProperty(
"Referer""https://9.186.10.56:8443/index.jsp");
        connection.setRequestProperty(
"Accept-Language""zh-cn");
        connection.setRequestProperty(
"Content-Type""application/x-www-form-urlencoded");
        connection.setRequestProperty(
"Accept-Encoding""gzip, deflate");
        connection.setRequestProperty(
"User-Agent""Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; Foxy/1; .NET CLR 2.0.50727;MEGAUPLOAD 1.0)");
        connection.setRequestProperty(
"Connection""Keep-Alive");
        connection.setRequestProperty(
"Cache-Control""no-cache");
    }


此外,還有通過socket連接, 或者借助apache 的httpclient連接的,不一而足,大同小異,總體就是通過驗證后保持在同一個session中,進而抓取目標網頁內容。

cxzforever