最近忙于一個項目,了解下httpclient,在這里總結出來,和大家一起學習分享,希望各位朋友提出寶貴的意見。 首先介紹一下項目的背景: 目標:把國內一家保險公司的“WEB一賬通”改成“WAP一賬通”。 資源:客戶不提供任何的webservice接口。 本項目中用到的第三方組件是apache的httpclient,一個非常強大的網頁抓取工具(抓這個字用得可能不太好), 這里和大家 一起討論下httpclient的一些常用用法和要注意的地方。 本文引用的資源列表: httpclient入門: http://www.ibm.com/developerworks/cn/opensource/os-httpclient/ httpclient證書導入:http://m.tkk7.com/happytian/archive/2006/12/22/89447.html httpclient高級認識:http://laohuang.iteye.com/blog/55613 httpclient官方文檔:http://hc.apache.org/httpcomponents-client/index.html httpclient資源關閉:http://www.iteye.com/topic/234759 上面的文章寫得很好,看完之后也就知道怎么用httpclient這個很好的工具了,但是在這里還是補充一些比較重要的東西,也是項目中經 常碰到的問題。 首先要注意的有以下幾點: 1、httpclient連接后資源釋放問題很重要,就跟我們用database connection要釋放資源一樣。 2、https網站采用ssl加密傳輸,證書導入要注意。 3、做這樣的項目最好先了解下http協義,比如302,301,200,404返回代碼的含義(這是最基本的),cookie,session的機制。 4、httpclient的redirect狀態默認是自動的,這在很大程度上給開發者很大的方便(如一些授權獲得cookie),但是有時要手動管理下,比如 有時會遇到CircularRedirectException異常,出現這樣的情況是因為返回的頭文件中location值指向之前重復(端口號可以不同)地址,導致可能會出現死 循環遞歸重定向,這時可以手動關閉:method.setFollowRedirects(false) 5、有的網站會先判別用戶的請求是否是來自瀏覽器,如不是,則返回不正確的文本,所以用httpclient抓取信息時在頭部加入如下信息: header.put("User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 1.7; .NET CLR 1.1.4322; CIBA; .NET CLR 2.0.50727)"); 6、當post請求提交數據時要改變默認編碼,不然的話提交上去的數據會出現亂碼。重寫postMethod的setContentCharSet()方法就可以了: 下面寫一個通用類來處理request請求返回的文本: Java代碼

- /*
- * HttpRequestProxy.java
- *
- * Created on November 3, 2008, 9:53 AM
- */
-
- package cn.com.mozat.net;
-
- import java.io.BufferedReader;
- import java.io.IOException;
- import java.io.InputStream;
- import java.io.InputStreamReader;
- import java.util.HashMap;
- import java.util.Iterator;
- import java.util.Map;
- import java.util.Set;
-
- import org.apache.commons.httpclient.Header;
- import org.apache.commons.httpclient.HttpClient;
- import org.apache.commons.httpclient.HttpException;
- import org.apache.commons.httpclient.HttpMethod;
- import org.apache.commons.httpclient.NameValuePair;
- import org.apache.commons.httpclient.SimpleHttpConnectionManager;
- import org.apache.commons.httpclient.methods.GetMethod;
- import org.apache.commons.httpclient.methods.PostMethod;
-
- import cn.com.mozat.exception.CustomException;
-
- /**
- *
- * @author bird email:lihongfu-84@163.com
- *
- * 2008-11-4 09:49:48
- */
- public class HttpRequestProxy{
- //超時間隔
- private static int connectTimeOut = 60000;
- //讓connectionmanager管理httpclientconnection時是否關閉連接
- private static boolean alwaysClose = false;
- //返回數據編碼格式
- private String encoding = "UTF-8";
-
- private final HttpClient client = new HttpClient(new SimpleHttpConnectionManager(alwaysClose));
-
- public HttpClient getHttpClient(){
- return client;
- }
-
- /**
- * 用法:
- * HttpRequestProxy hrp = new HttpRequestProxy();
- * hrp.doRequest("http://www.163.com",null,null,"gbk");
- *
- * @param url 請求的資源URL
- * @param postData POST請求時form表單封裝的數據 沒有時傳null
- * @param header request請求時附帶的頭信息(header) 沒有時傳null
- * @param encoding response返回的信息編碼格式 沒有時傳null
- * @return response返回的文本數據
- * @throws CustomException
- */
- public String doRequest(String url,Map postData,Map header,String encoding) throws CustomException{
- String responseString = null;
- //頭部請求信息
- Header[] headers = null;
- if(header != null){
- Set entrySet = header.entrySet();
- int dataLength = entrySet.size();
- headers= new Header[dataLength];
- int i = 0;
- for(Iterator itor = entrySet.iterator();itor.hasNext();){
- Map.Entry entry = (Map.Entry)itor.next();
- headers[i++] = new Header(entry.getKey().toString(),entry.getValue().toString());
- }
- }
- //post方式
- if(postData!=null){
- PostMethod postRequest = new PostMethod(url.trim());
- if(headers != null){
- for(int i = 0;i < headers.length;i++){
- postRequest.setRequestHeader(headers[i]);
- }
- }
- Set entrySet = postData.entrySet();
- int dataLength = entrySet.size();
- NameValuePair[] params = new NameValuePair[dataLength];
- int i = 0;
- for(Iterator itor = entrySet.iterator();itor.hasNext();){
- Map.Entry entry = (Map.Entry)itor.next();
- params[i++] = new NameValuePair(entry.getKey().toString(),entry.getValue().toString());
- }
- postRequest.setRequestBody(params);
- try {
- responseString = this.executeMethod(postRequest,encoding);
- } catch (CustomException e) {
- throw e;
- } finally{
- postRequest.releaseConnection();
- }
- }
- //get方式
- if(postData == null){
- GetMethod getRequest = new GetMethod(url.trim());
- if(headers != null){
- for(int i = 0;i < headers.length;i++){
- getRequest.setRequestHeader(headers[i]);
- }
- }
- try {
- responseString = this.executeMethod(getRequest,encoding);
- } catch (CustomException e) {
- e.printStackTrace();
- throw e;
- }finally{
- getRequest.releaseConnection();
- }
- }
-
- return responseString;
- }
-
- private String executeMethod(HttpMethod request, String encoding) throws CustomException{
- String responseContent = null;
- InputStream responseStream = null;
- BufferedReader rd = null;
- try {
- this.getHttpClient().executeMethod(request);
- if(encoding != null){
- responseStream = request.getResponseBodyAsStream();
- rd = new BufferedReader(new InputStreamReader(responseStream,
- encoding));
- String tempLine = rd.readLine();
- StringBuffer tempStr = new StringBuffer();
- String crlf=System.getProperty("line.separator");
- while (tempLine != null)
- {
- tempStr.append(tempLine);
- tempStr.append(crlf);
- tempLine = rd.readLine();
- }
- responseContent = tempStr.toString();
- }else
- responseContent = request.getResponseBodyAsString();
-
- Header locationHeader = request.getResponseHeader("location");
- //返回代碼為302,301時,表示頁面己經重定向,則重新請求location的url,這在
- //一些登錄授權取cookie時很重要
- if (locationHeader != null) {
- String redirectUrl = locationHeader.getValue();
- this.doRequest(redirectUrl, null, null,null);
- }
- } catch (HttpException e) {
- throw new CustomException(e.getMessage());
- } catch (IOException e) {
- throw new CustomException(e.getMessage());
-
- } finally{
- if(rd != null)
- try {
- rd.close();
- } catch (IOException e) {
- throw new CustomException(e.getMessage());
- }
- if(responseStream != null)
- try {
- responseStream.close();
- } catch (IOException e) {
- throw new CustomException(e.getMessage());
-
- }
- }
- return responseContent;
- }
-
-
- /**
- * 特殊請求數據,這樣的請求往往會出現redirect本身而出現遞歸死循環重定向
- * 所以單獨寫成一個請求方法
- * 比如現在請求的url為:http://localhost:8080/demo/index.jsp
- * 返回代碼為302 頭部信息中location值為:http://localhost:8083/demo/index.jsp
- * 這時httpclient認為進入遞歸死循環重定向,拋出CircularRedirectException異常
- * @param url
- * @return
- * @throws CustomException
- */
- public String doSpecialRequest(String url,int count,String encoding) throws CustomException{
- String str = null;
- InputStream responseStream = null;
- BufferedReader rd = null;
- GetMethod getRequest = new GetMethod(url);
- //關閉httpclient自動重定向動能
- getRequest.setFollowRedirects(false);
- try {
-
- this.client.executeMethod(getRequest);
- Header header = getRequest.getResponseHeader("location");
- if(header!= null){
- //請求重定向后的URL,count同時加1
- this.doSpecialRequest(header.getValue(),count+1, encoding);
- }
- //這里用count作為標志位,當count為0時才返回請求的URL文本,
- //這樣就可以忽略所有的遞歸重定向時返回文本流操作,提高性能
- if(count == 0){
- getRequest = new GetMethod(url);
- getRequest.setFollowRedirects(false);
- this.client.executeMethod(getRequest);
- responseStream = getRequest.getResponseBodyAsStream();
- rd = new BufferedReader(new InputStreamReader(responseStream,
- encoding));
- String tempLine = rd.readLine();
- StringBuffer tempStr = new StringBuffer();
- String crlf=System.getProperty("line.separator");
- while (tempLine != null)
- {
- tempStr.append(tempLine);
- tempStr.append(crlf);
- tempLine = rd.readLine();
- }
- str = tempStr.toString();
- }
-
- } catch (HttpException e) {
- throw new CustomException(e.getMessage());
- } catch (IOException e) {
- throw new CustomException(e.getMessage());
- } finally{
- getRequest.releaseConnection();
- if(rd !=null)
- try {
- rd.close();
- } catch (IOException e) {
- throw new CustomException(e.getMessage());
- }
- if(responseStream !=null)
- try {
- responseStream.close();
- } catch (IOException e) {
- throw new CustomException(e.getMessage());
- }
- }
- return str;
- }
-
-
-
-
- public static void main(String[] args) throws Exception{
- HttpRequestProxy hrp = new HttpRequestProxy();
- Map header = new HashMap();
- header.put("User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; QQDownload 1.7; .NET CLR 1.1.4322; CIBA; .NET CLR 2.0.50727)");
- String str = hrp.doRequest(
- "http://www.cma-cgm.com/en/eBusiness/Tracking/Default.aspx?BolNumber=GZ2108827",
- null, header,null);
- System.out.println(str.contains("row_CRXU1587647"));
- // System.out.println(str);
- }
-
- }
posted on 2015-08-20 13:04
長春語林科技 閱讀(223)
評論(0) 編輯 收藏 所屬分類:
android