delphi idhttp 实战用法(TIdhttpEx)

以delphi XE8 自带indy(10.5.8.0)组件为例,分享实战中遇到的问题及解决方法。

TIdHttpEx 用法实例01[多线程获取网页](包含完整源码)

实例02(如何Post参数,如何保存与提取Cookie)待写

TIdHttpEx 已实现了对GZIP的解压,对UTF-8编码解码等

本文包含以下几个单元

uIdhttp.pas (TIdHttpEx)

uIdCookieMgr.pas (TIdCookieMgr)

uOperateIndy.pas 操作 TIdhttpEx 全靠它了

uIdhttp.Pas

 1 unit uIdHttpEx;
 2 
 3 interface
 4 
 5 uses
 6   Classes, Idhttp, uIdCookieMgr, IdSSLOpenSSL;
 7   {uIdCookieMgr 是我改进的}
 8 
 9 type
10 
11   TIdhttpEx = class(TIdhttp)
12   private
13     FIdCookieMgr: TIdCookieMgr;
14     FIdSSL: TIdSSLIOHandlerSocketOpenSSL;
15   public
16     constructor Create(AOwner: TComponent);
17     property CookieMgr: TIdCookieMgr read FIdCookieMgr;
18     procedure GenRandomUserAgent; //随便生成一个请求头,可以忽略或自己改进
19     property IdSSL: TIdSSLIOHandlerSocketOpenSSL read FIdSSL;
20 
21   end;
22 
23 implementation
24 
25 { TIdhttpEx }
26 
27 const
28 
29   sUserAgent =
30     'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1) ; .NET CLR 2.0.50727)';
31   // sAccept = 'image/gif, image/jpeg, image/pjpeg, image/pjpeg, application/x-shockwave-flash, application/msword, application/vnd.ms-excel, application/vnd.ms-powerpoint, */*';
32   sUserAgent2 =
33     'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; InfoPath.3; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729)';
34   sAccept = 'application/x-shockwave-flash, image/gif, image/jpeg, image/pjpeg, application/msword, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/x-ms-application, application/x-ms-xbap, application/vnd.ms-xpsdocument, application/xaml+xml, */*';
35 
36   sUserAgent3 =
37     'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.65 Safari/537.36';
38   sAccept2 = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8';
39 
40   MaxUserAgentCount = 3;
41 
42 var
43   UserAgent: array [0 .. MaxUserAgentCount - 1] of string;
44 
45 constructor TIdhttpEx.Create(AOwner: TComponent);
46 begin
47   inherited;
48 
49   HTTPOptions := []; // 禁止POST参数编码,自己手动编 HttpEncodeX
50 
51   // HTTPOptions := [hoNoParseMetaHTTPEquiv]; // 禁止POST参数编码,自己手动编 HttpEncodeX
52   // hoNoParseMetaHTTPEquiv 禁止解析html 此可能造成假死!
53 
54   FIdCookieMgr := TIdCookieMgr.Create(self);
55   CookieManager := FIdCookieMgr;
56 
57   // ssl 需要 libeay32.dll ssleay32.dll 阿里旺旺目录下可以搜索到
58 
59   FIdSSL := TIdSSLIOHandlerSocketOpenSSL.Create(self);
60   IOHandler := FIdSSL;
61 
62   HandleRedirects := true;
63   AllowCookies := true;
64   ProtocolVersion := pv1_1;
65 
66   Request.RawHeaders.FoldLength := 25000; // 参数头长度,重要
67 
68   ReadTimeout := 15000;
69   ConnectTimeout := 15000;
70 
71   RedirectMaximum := 5;
72   Request.UserAgent := sUserAgent3;
73   Request.Accept := sAccept;
74   Request.AcceptEncoding := 'gzip';
75 
76 end;
77 
78 procedure TIdhttpEx.GenRandomUserAgent;
79 begin
80   Randomize;
81   self.Request.UserAgent := UserAgent[Random(MaxUserAgentCount)];
82 end;
83 
84 initialization
85 
86 UserAgent[0] :=
87   'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1) ; .NET CLR 2.0.50727)';
88 UserAgent[1] :=
89   'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; InfoPath.3; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729)';
90 UserAgent[2] :=
91   'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.65 Safari/537.36';
92 
93 // 这三句请忽略,有些网站认求头,我随便写的。请大家根本实际情况改进
94 finalization
95 
96 end.
uIdhttpEx.pas

uIdCookieMgr.Pas

  1 unit uIdCookieMgr;
  2 
  3 interface
  4 
  5 uses
  6   IdCookieManager, Classes;
  7 
  8 type
  9   TIdCookieMgr = class(TIdCookieManager)
 10   private
 11 
 12     procedure SetCurCookies(const Value: string);
 13 
 14     function GetCurCookies: string;
 15     function GetCookieList: TStringList;
 16 
 17   public
 18 
 19     procedure SaveCookies(const AFileName: string);
 20     procedure LoadCookies(const AFileName: string);
 21 
 22     function GetCookieValue(const ACookieName: string): string;
 23     property CurCookies: string read GetCurCookies write SetCurCookies;
 24 
 25   end;
 26 
 27 implementation
 28 
 29 uses
 30   IdCookie, SysUtils, IdURI, uStrUtils, IdGlobalProtocols, DateUtils;
 31 { uStrUtils 一套操作字串的函数单元 }
 32 
 33 function TIdCookieMgr.GetCookieList: TStringList;
 34 var
 35   C: Tcollectionitem;
 36 begin
 37   result := TStringList.Create;
 38   for C in CookieCollection do
 39     result.add((C as TIdCookie).CookieText);
 40 end;
 41 
 42 function TIdCookieMgr.GetCookieValue(const ACookieName: string): string;
 43 var
 44   n: integer;
 45 begin
 46   result := '';
 47   if IsNotEmptyStr(ACookieName) then
 48   begin
 49     n := CookieCollection.GetCookieIndex(ACookieName);
 50     if n >= 0 then
 51       result := CookieCollection.Cookies[n].Value;
 52   end;
 53 end;
 54 
 55 function TIdCookieMgr.GetCurCookies: string;
 56 var
 57   strs: TStringList;
 58 begin
 59   strs := GetCookieList;
 60   try
 61     result := strs.Text;
 62   finally
 63     strs.Free;
 64   end;
 65 end;
 66 
 67 procedure TIdCookieMgr.LoadCookies(const AFileName: string);
 68 var
 69   StrLst: TStringList;
 70   C: TIdCookie;
 71   uri: TIdURI;
 72   s, t: string;
 73 begin
 74   StrLst := TStringList.Create;
 75   uri := TIdURI.Create;
 76   try
 77     if FileExists(AFileName) then
 78     begin
 79       StrLst.LoadFromFile(AFileName);
 80       for s in StrLst do
 81       begin
 82         C := CookieCollection.add;
 83         CookieCollection.AddCookie(C, uri);
 84         C.ParseServerCookie(s, uri);
 85         C.Domain := GetStrBetween(s, 'Domain=', ';');
 86         C.Path := GetStrBetween(s, 'Path=', ';');
 87         t := GetStrBetween(s, 'Expires=', 'GMT') + 'GMT'; // GetStrBetween 在 uStrUtils 单元中
 88         C.Expires := CookieStrToLocalDateTime(t);
 89       end;
 90     end;
 91   finally
 92     uri.Free;
 93     StrLst.Free;
 94   end;
 95 end;
 96 
 97 procedure TIdCookieMgr.SaveCookies(const AFileName: string);
 98 var
 99   StrLst: TStringList;
100 begin
101   StrLst := GetCookieList;
102   try
103     StrLst.SaveToFile(AFileName);
104   finally
105     StrLst.Free;
106   end;
107 end;
108 
109 procedure TIdCookieMgr.SetCurCookies(const Value: string);
110 var
111   StrLst: TStringList;
112   C: TIdCookie;
113   uri: TIdURI;
114   s, t: string;
115 begin
116   StrLst := TStringList.Create;
117   uri := TIdURI.Create;
118   try
119     StrLst.Text := Value;
120     CookieCollection.Clear;
121     for s in StrLst do
122     begin
123       C := CookieCollection.add;
124       CookieCollection.AddCookie(C, uri);
125       C.ParseServerCookie(s, uri);
126       C.Domain := GetStrBetween(s, 'Domain=', ';');
127       C.Path := GetStrBetween(s, 'Path=', ';');
128       t := GetStrBetween(s, 'Expires=', 'GMT') + 'GMT';
129       C.Expires := CookieStrToLocalDateTime(t);
130     end;
131   finally
132     uri.Free;
133     StrLst.Free;
134   end;
135 end;
136 
137 end.
uIdCookeMgr.pas

uOperateIndy.pas 非常有用操作 TIdhttpEx 全靠它了

  1 unit uOperateIndy;
  2 
  3 interface
  4 
  5 uses
  6   Classes, Idhttp, IdMultipartFormData;
  7 
  8 function IdhttpGet(AIdhttp: TIdHTTP; AUrl: string; var AHtml: string): Boolean;
  9 function IdhttpPost(AIdhttp: TIdHTTP; AStrList: TStringList; AUrl: string; var AHtml: String)
 10   : Boolean; overload;
 11 function IdhttpPost(AIdhttp: TIdHTTP; AIdMul: TIdMultiPartFormDataStream; AUrl: string;
 12   var AHtml: string): Boolean; overload;
 13 
 14 function GetHtmlFromUrl(AUrl: string; var AHtml: string): Boolean;
 15 
 16 implementation
 17 
 18 uses
 19   uIdhttpEx, SysUtils, ZLibEx, StrUtils, uStrUtils, uHtmlElement, uParseHtml;
 20 { 带u的单元,都是我写的,ZLibEx 是解压库 }
 21 
 22 //解压GZIP 那个参数31是试出来的
 23 procedure DecompressGZIP(inStream, outStream: TStream); inline;
 24 begin
 25   ZDecompressStream2(inStream, outStream, 31);
 26 end;
 27 
 28 function HtmlIsUTF8(AHtml: string): Boolean;
 29 var
 30   BMetaList: TSingleHtmlElementList;
 31   BMeta: TSingleHtmlElement;
 32   BKeyElement: PKeyElement;
 33   BCheckOver: Boolean;
 34   sKeyName: string;
 35   sKeyValue: string;
 36 begin
 37   Result := false;
 38   BMetaList := TSingleHtmlElementList.Create;
 39   try
 40 
 41     GetMetaList(AHtml, BMetaList);
 42 
 43     BCheckOver := false;
 44 
 45     for BMeta in BMetaList do
 46     begin
 47 
 48       for BKeyElement in BMeta.KeyElementList do
 49       begin
 50 
 51         sKeyName := UpperCase(BKeyElement.Name);
 52         sKeyValue := UpperCase(BKeyElement.Value);
 53 
 54         if PosEx('UTF-8', sKeyValue) > 0 then
 55         begin
 56           Result := true;
 57           BCheckOver := true;
 58           break;
 59         end;
 60 
 61       end;
 62 
 63       if BCheckOver then
 64         break;
 65     end;
 66 
 67   finally
 68     BMetaList.Free;
 69   end;
 70 end;
 71 
 72 function GetHtmlAfterOperateIdhttp(AIdhttp: TIdHTTP; AStream: TStream): string;
 73 var
 74   BSize: Int64;
 75   BOutStream: TMemoryStream;
 76   TempStream: TMemoryStream;
 77   rS: RawByteString;
 78   s: string;
 79   sUtf8: string;
 80   BIsUtf8: Boolean;
 81   sCharSet: string;
 82 
 83 begin
 84   BSize := AStream.Size;
 85 
 86   BOutStream := TMemoryStream.Create;
 87   try
 88     if BSize > 0 then
 89     begin
 90 
 91       if PosEx('GZIP', UpperCase(AIdhttp.Response.ContentEncoding)) > 0 then
 92       begin
 93         AStream.Position := 0;
 94         DecompressGZIP(AStream, BOutStream);
 95         TempStream := BOutStream;
 96       end
 97       else
 98         TempStream := TMemoryStream(AStream);
 99 
100       BSize := TempStream.Size;
101       SetLength(rS, BSize);
102       TempStream.Position := 0;
103       TempStream.ReadBuffer(rS[1], BSize);
104 
105       s := string(rS);
106       sUtf8 := UTF8ToString(rS);
107 
108       sCharSet := AIdhttp.Response.CharSet;
109       BIsUtf8 := PosEx('UTF-8', UpperCase(sCharSet)) > 0;
110       if not BIsUtf8 then
111         BIsUtf8 := HtmlIsUTF8(s);
112 
113       if BIsUtf8 then
114         Result := sUtf8
115       else
116       begin
117 
118         if (PosEx('', sUtf8) > 0) or (PosEx('', sUtf8) > 0) or (PosEx('', sUtf8) > 0) or
119           (PosEx('', sUtf8) > 0) or (PosEx('我们', sUtf8) > 0) or (PosEx('', sUtf8) > 0) or
120           (PosEx('', sUtf8) > 0) then
121 
122         begin
123           Result := sUtf8;
124         end
125         else
126           Result := s;
127 
128       end;
129 
130     end
131   finally
132     BOutStream.Free;
133   end;
134 
135 end;
136 
137 function IdhttpGet(AIdhttp: TIdHTTP; AUrl: string; var AHtml: string): Boolean;
138 var
139   BStrStream: TMemoryStream;
140 begin
141   AHtml := '';
142   BStrStream := TMemoryStream.Create;
143   try
144     try
145       AIdhttp.Get(AUrl, BStrStream);
146       AHtml := GetHtmlAfterOperateIdhttp(AIdhttp, BStrStream);
147       Result := true;
148     except
149       on e: Exception do
150       begin
151         Result := false;
152         AHtml := e.Message;
153       end;
154     end;
155   finally
156     BStrStream.Free;
157   end;
158 end;
159 
160 function IdhttpPost(AIdhttp: TIdHTTP; AStrList: TStringList; AUrl: string; var AHtml: String)
161   : Boolean; overload;
162 var
163   BStrStream: TMemoryStream;
164 begin
165   Result := true;
166   AHtml := '';
167   BStrStream := TMemoryStream.Create;
168   try
169     try
170       AIdhttp.Post(AUrl, AStrList, BStrStream);
171       AHtml := GetHtmlAfterOperateIdhttp(AIdhttp, BStrStream);
172     except
173       on e: Exception do
174       begin
175         AHtml := e.Message;
176         Result := false;
177       end;
178     end;
179   finally
180     BStrStream.Free;
181   end;
182 end;
183 
184 function IdhttpPost(AIdhttp: TIdHTTP; AIdMul: TIdMultiPartFormDataStream; AUrl: string;
185   var AHtml: string): Boolean; overload;
186 var
187   BStrStream: TMemoryStream;
188 begin
189   Result := true;
190   AHtml := '';
191   BStrStream := TMemoryStream.Create;
192   try
193     try
194       AIdhttp.Post(AUrl, AIdMul, BStrStream);
195       AHtml := GetHtmlAfterOperateIdhttp(AIdhttp, BStrStream);
196     except
197       on e: Exception do
198       begin
199         AHtml := e.Message;
200         Result := false;
201       end;
202     end;
203   finally
204     BStrStream.Free;
205   end;
206 end;
207 
208 function GetHtmlFromUrl(AUrl: string; var AHtml: string): Boolean;
209 var
210   Idhttp: TIdhttpEx;
211 begin
212   Idhttp := TIdhttpEx.Create(nil);
213   try
214     Result := IdhttpGet(Idhttp, AUrl, AHtml);
215   finally
216     Idhttp.Free;
217   end;
218 end;
219 
220 end.
uOperateIndy.pas

附:delphi 进阶基础技能说明

posted on 2014-11-09 15:10  晓不得2013  阅读(3746)  评论(0编辑  收藏  举报

导航