HttpClient根據(jù)jsoup解析網(wǎng)頁
Java代碼- 1.package jsoup;
- 2.
- 3.import org.apache.http.HttpEntity;
- 4.import org.apache.http.HttpResponse;
- 5.import org.apache.http.HttpStatus;
- 6.import org.apache.http.client.HttpClient;
- 7.import org.apache.http.client.methods.HttpGet;
- 8.import org.apache.http.impl.client.DefaultHttpClient;
- 9.import org.apache.http.util.EntityUtils;
- 10.import org.jsoup.Jsoup;
- 11.import org.jsoup.nodes.Document;
- 12.import org.jsoup.nodes.Element;
- 13.import org.jsoup.select.Elements;
- 14.
- 15./**
- 16. * 利用HttpClient獲取html代碼,然后使用jsoup對html代碼進(jìn)行解析
- 17. * @author Administrator
- 18. *
- 19. */
- 20.public class JustTest {
- 21. public static void main(String[] args) {
- 22. String html = getHtmlByUrl("http://www.iteye.com/");
- 23. if (html != null && !"".equals(html)) {
- 24. Document doc = Jsoup.parse(html);
- 25. Elements linksElements = doc
- 26. .select("div#page>div#content>div#main>div.left>div#recommend>ul>li>a");
- 27. // 以上代碼的意思是 找id為“page”的div里面 id為“content”的div里面 id為“main”的div里面
- 28. // class為“l(fā)eft”的div里面 id為“recommend”的div里面ul里面li里面a標(biāo)簽
- 29. for (Element ele : linksElements) {
- 30. String href = ele.attr("href");
- 31. String title = ele.text();
- 32. System.out.println(href + "," + title);
- 33. }
- 34. }
- 35. }
- 36.
- 37. /**
- 38. * 根據(jù)URL獲得所有的html信息
- 39. *
- 40. * @param url
- 41. * @return
- 42. */
- 43. public static String getHtmlByUrl(String url) {
- 44. String html = null;
- 45. HttpClient httpClient = new DefaultHttpClient();// 創(chuàng)建httpClient對象
- 46. HttpGet httpget = new HttpGet(url);// 以get方式請求該URL
- 47. try {
- 48. HttpResponse responce = httpClient.execute(httpget);// 得到responce對象
- 49. int resStatu = responce.getStatusLine().getStatusCode();// 返回碼
- 50. if (resStatu == HttpStatus.SC_OK) {// 200正常 其他就不對
- 51. // 獲得相應(yīng)實(shí)體
- 52. HttpEntity entity = responce.getEntity();
- 53. if (entity != null) {
- 54. html = EntityUtils.toString(entity);// 獲得html源代碼
- 55. System.out.println(html);
- 56. }
- 57. }
- 58. } catch (Exception e) {
- 59. System.out.println("訪問【" + url + "】出現(xiàn)異常!");
- 60. e.printStackTrace();
- 61. } finally {
- 62. httpClient.getConnectionManager().shutdown();
- 63. }
- 64. return html;
- 65. }
- 66.}
復(fù)制代碼 |