String query = URLEncoder.encode( "xxx" , "UTF-8" ); |
String url = ".baidu./s?wd=" + query + "&pn=" + p * 10 + "&tn=baiduhome_pg&ie=utf-8" |
public void MakeQuery(String domain) { |
try { |
HttpClient httpClient = new HttpClient(); |
GetMethod getMethod = new GetMethod(domain); |
//System.out.println("*************************************************************"); |
//System.out.println(getMethod); |
try { |
httpClient.executeMethod(getMethod); |
|
} catch (Exception e){ |
System.out.println( "网络问题" ); |
|
} |
getMethod.getParams()。setParameter(HttpMethodParams.RETRY_HANDLER, |
new DefaultHttpMethodRetryHandler()); |
int statusCode = httpClient.executeMethod(getMethod); |
if (statusCode != HttpStatus.SC_OK) { |
System.err.println( "Method failed: " |
+ getMethod.getStatusLine()); |
|
} |
byte [] responseBody = getMethod.getResponseBody(); |
//System.out.println("*************************************************************"); |
//System.out.println(responseBody); |
String response = new String(responseBody, "UTF-8" ); |
//System.out.println("*************************************************************"); |
//System.out.println(response); |
//Jsoup解析html |
Document doc = Jsoup.parse(response); |
//System.out.println("*************************************************************"); |
//System.out.println(doc); |
Elements contents = doc.getElementsByClass( "f" ); |
for (Element content: contents){ |
Element links = content.getElementsByTag( "a" )。first(); |
String linkHref = links.attr( "href" ); //链接 |
String linkText = links.text(); //摘要 |
FoursearchZH.map.put(linkHref, linkText); |
System.out.println( "------------------" ); |
System.out.println(linkHref); |
System.out.println(linkText); |
|
} |
|
} catch (Exception e) { |
System.err.println( "Something went wrong…" ); |
e.printStackTrace(); |
|
} |
|
} |
高级设计师
by: 小蜜锋 发表于:2014-07-07 22:54:10 顶(1) | 踩(0) 回复
Jsoup
回复评论