杀手的童话片尾曲,杭州19楼论坛,演唱会网站
import java.util.hashset;
import java.util.list;
import us.codecraft.webmagic.page;
import us.codecraft.webmagic.site;
import us.codecraft.webmagic.spider;
import us.codecraft.webmagic.processor.pageprocessor;
public class moviepaperpageprocessor implements pageprocessor {
private site page = site.me().setretrytimes(3).setsleeptime(1000);
public site getsite() {
return page;
}
public void process(page page) {
list<string> links = page.gethtml().links().regex(
"http://posters.aa.com/poster/\\d+").all();
links = removeduplicate(links);
page.addtargetrequests(links);
page.putfield("title", page.gethtml().xpath(
"//div[@id='imdbleftsecc']/center/h1/text()").tostring());
page.putfield("imgurl", page.gethtml().xpath(
"//div[@id='imdbleftsecc']/center/img/@src").tostring());
}
public static void main(string[] args) {
for (int i = 1; i <= 3; i++) {
spider.create(new moviepaperpageprocessor()).addurl(
"http://posters.aa.co/poster_page/" + i).thread(5).run();
}
}
public static list removeduplicate(list list) {
hashset hs = new hashset(list);
list.clear();
list.addall(hs);
return list;
}
}
如对本文有疑问,请在下面进行留言讨论,广大热心网友会与你互动!! 点击进行留言回复
浅析我对 String、StringBuilder、StringBuffer 的理解
使用IDEA搭建SSM框架的详细教程(spring + springMVC +MyBatis)
Springboot整合freemarker 404问题解决方案
引入mybatis-plus报 Invalid bound statement错误问题的解决方法
网友评论