< |
< |
灝忎簬鍙?/p> |
> |
> |
澶т簬鍙?/p> |
& |
& |
鍜?/p> |
' |
' |
鍗曞紩鍙?/p> |
" |
" |
鍙屽紩鍙?/p> |
(function(){
if(!/*@cc_on!@*/0)
return;
var e = "abbr,article,aside,audio,bb,canvas,datagrid,datalist,details,dialog,eventsource,figure,footer,header,hgroup,mark,menu,meter,nav,output,progress,section,time,video".split(','),i=e.length;
while(i--){document.createElement(e[i])}
})()
//鐒跺悗鍦╤ead涓紩鍏ヨjs
<!--[if lt IE 9]>
<script src="http://html5shim.googlecode.com/svn/trunk/html5.js">
</script>
<![endif]-->
package com.koyo.downloadphoto.service.impl;
import java.io.File;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.client.entity.UrlEncodedFormEntity;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.protocol.ClientContext;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.util.EntityUtils;
import org.apache.log4j.Logger;
import org.htmlparser.Parser;
import org.htmlparser.filters.AndFilter;
import org.htmlparser.filters.HasAttributeFilter;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.tags.ImageTag;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.tags.Span;
import org.htmlparser.util.NodeList;
import com.koyo.downloadphoto.service.Spider;
import com.koyo.downloadphoto.utils.HttpUtils;
import com.koyo.downloadphoto.utils.ParseUtils;
public class SpiderForRenRen extends Spider {
private Logger logger = Logger.getLogger(SpiderForRenRen.class);
@Override
public void execute() {
try {
String url = " + "/album/relatives";
// ===================璇鋒眰鐧誨綍======================================================
HttpPost post = new HttpPost(" // 娣誨姞POST鍙傛暟
List<NameValuePair> nvps = new ArrayList<NameValuePair>();
nvps.add(new BasicNameValuePair("email", loginName));
nvps.add(new BasicNameValuePair("password", loginPassword));
post.setEntity(new UrlEncodedFormEntity(nvps, "utf-8"));
HttpResponse response = httpClient.execute(post, httpContext);
HttpEntity entity = response.getEntity();
if (entity != null) {
InputStream is = entity.getContent();
// 浣跨敤鍝嶅簲涓殑緙栫爜鏉ヨВ閲婂搷搴旂殑鍐呭
String html1 = IOUtils.toString(is);
LinkTag linkTag1 = ParseUtils.parseTag(html1, LinkTag.class);
String url1 = linkTag1.getLink();
HttpGet get = new HttpGet(url1);
response = httpClient.execute(get, httpContext);
// 淇濆瓨cookie
httpContext.setAttribute(ClientContext.COOKIE_STORE, httpClient
.getParams().getParameter("CookieStore"));
EntityUtils.consume(response.getEntity());
System.out.println("璐﹀彿錛? + loginName);
System.out.println("瀵嗙爜錛? + loginPassword);
}
// ===================鑾峰彇鐩稿唽欏甸潰淇℃伅===================================================
// 鏍規(guī)嵁URL鍦板潃錛岃幏鍙栫綉欏靛唴瀹?br /> String html = HttpUtils.getHtml(httpClient, httpContext, url);
if (html == null) {
logger.error("鏃犳硶鑾峰彇銆? + url + "銆戠綉鍧鐨勫唴瀹?);
throw new RuntimeException("鏃犳硶鑾峰彇銆? + url + "銆戠綉鍧鐨勫唴瀹?);
}
//鑾峰彇濂藉弸鍚?br /> Parser parser = new Parser();
parser.setInputHTML(html);
AndFilter andFilter = new AndFilter(new TagNameFilter("ul"), new HasAttributeFilter("class", "nav-tabs"));
NodeList nodes = parser.parse(andFilter);
String tempString = nodes.toHtml();
LinkTag tempTag = ParseUtils.parseTag(tempString, LinkTag.class);
String tempName = tempTag.getLinkText();
// String friendName = tempName.substring(tempName.indexOf("\n")+1,tempName.lastIndexOf("\n"));
String friendName = tempName.trim();
//鑾峰彇鐩稿唽鍚?br /> String albumName;
List<LinkTag> linkTags = ParseUtils.parseTags(html, LinkTag.class,
"class", "album-cover");
List<Span> spans = ParseUtils.parseTags(html, Span.class,"class","album-name");
if (linkTags != null) {
for (int i=0; i<linkTags.size(); i++) {
tempName = spans.get(i).getStringText();
//鐢變簬澶村儚鐩稿唽鍓嶈繕鏈変竴涓?lt;span class="userhead"/> 鏁呬笉鑳戒嬌鐢╰empName.trim()
albumName = tempName.substring(tempName.lastIndexOf("\n")+1);
url = linkTags.get(i).getLink();
// 鏍規(guī)嵁URL鍦板潃錛岃幏鍙栫綉欏靛唴瀹?br /> html = HttpUtils.getHtml(httpClient, httpContext, url);
if (html == null) {
logger.error("鏃犳硶鑾峰彇銆? + url + "銆戠綉鍧鐨勫唴瀹?);
throw new RuntimeException("鏃犳硶鑾峰彇銆? + url + "銆戠綉鍧鐨勫唴瀹?);
}
List<LinkTag> linkTags2 = ParseUtils.parseTags(html,
LinkTag.class, "class", "picture");
if (linkTags2 != null) {
for (LinkTag linkTag2 : linkTags2) {
url = linkTag2.getLink();
// 鏍規(guī)嵁URL鍦板潃錛岃幏鍙栫綉欏靛唴瀹?br /> html = HttpUtils.getHtml(httpClient, httpContext,
url);
if (html == null) {
logger.error("鏃犳硶鑾峰彇銆? + url + "銆戠綉鍧鐨勫唴瀹?);
throw new RuntimeException("鏃犳硶鑾峰彇銆? + url
+ "銆戠綉鍧鐨勫唴瀹?);
}
// 緗戦〉涓墍鍖呭惈鐨勫浘鐗囷紝騫朵笅杞藉埌upload鐩綍錛岀劧鍚庡垱寤篈ttachment瀵硅薄
ImageTag imageTag = ParseUtils.parseTag(html,
ImageTag.class, "id", "photo");
if (imageTag != null) {
// 寰楀埌鍥劇墖鎵鍦ㄧ殑璺緞鐩綍
// String baseUrl = url.substring(0,
// url.lastIndexOf("/") + 1);
// 榪欎釜鏄?lt;img>鏍囩涓殑src鐨勫?br /> String imageUrl = imageTag.getImageURL();
String photoName = imageUrl.substring(imageUrl
.lastIndexOf("/"));
// 鍥劇墖鐨勭粷瀵硅礬寰?br /> // String absoluteUrl = baseUrl + imageUrl;
// : "鏂囩珷鏍囬/xxx.jpg"
String imageName = friendName + "/" +albumName + photoName;
// 鎶婂浘鐗囦繚瀛樺埌upload鐩綍
// 棣栧厛紜畾錛屼繚瀛樺埌鏈湴鐨勫浘鐗囩殑璺緞
String imageLocalFile = "D:/PhotosForRenRen/"
+ imageName;
// 濡傛灉鍥劇墖宸茬粡琚笅杞藉埌鏈湴錛屽垯涓嶅啀涓嬭澆
if (!new File(imageLocalFile).exists()) {
// 涓嬭澆鍥劇墖鐨勪俊鎭?br /> byte[] image = HttpUtils.getImage(
httpClient, httpContext, imageUrl);
// 鐩存帴浣跨敤new
// FileOutputStream(imageLocalFile)榪欑鏂瑰紡錛屽垱寤轟竴涓?br /> // 鏂囦歡杈撳嚭嫻侊紝瀛樺湪鐨勯棶棰樺氨鏄細(xì)濡傛灉榪欎釜鏂囦歡鎵鍦ㄧ殑鐩綍涓嶅瓨鍦紝鍒欏垱寤轟笉浜?br /> // 杈撳嚭嫻侊紝浼?xì)鎶涘嚭寮傚父锛?br /> // 鎵浠ワ紝浣跨敤杈呭姪鐨勫伐鍏風(fēng)被鏉ュ垱寤轟竴涓枃浠惰緭鍑烘祦:FileUtils.openOutputStream(new
// File(imageLocalFile))
// 閫氳繃榪欎釜鏂規(guī)硶錛屽綋鏂囦歡鎵鍦ㄧ殑鐖剁洰褰曚笉瀛樺湪鐨勬椂鍊欙紝灝嗚嚜鍔ㄥ垱寤哄叾鎵鏈夌殑鐖剁洰褰?br /> IOUtils.write(image, FileUtils
.openOutputStream(new File(
imageLocalFile)));
System.out.println("鍥劇墖銆? + imageUrl
+ "銆戝凡涓嬭澆");
}
}
}
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}