还是获取网页的问题，不过是多线程

09年我在这个问题上折腾了大半年，还因此写了篇博文[这下该不会阻塞了吧 ]，最初的是想实现多线程下载网页源码，但经过对HTTP协议的学习，发现只有支持断点继传的网站才用得上多线程下载，因此又把需求放在批量快速下载网页源码上，比如我同时需要访问100个网页，如果用IdHttp就头大了，Idhttp就连访问302页面也会出现异常，调试起来很麻烦。而用自己写的TWinHttp类，recv函数阻塞的问题是能解决了，但如果短时间频繁的获取网页源码，50%以上的访问都会出现connect失败和recv超时，以前很难解决的问题，这次却让我想到了一个较好的解决办法。

首先,对于connect，如果连接失败就进入死循环重复连接，直接连接成功才退出死循环

对于recv，由于recv失败一般都是因为该函数阻塞导致recv线程超时，而如果超时后不进行再处理的话，下载网页就会失败了。所以解决办法是当recv阻塞导致超时后，重新进行connect、send和recv，也就是当recv失败，就进行死循环重复connect、send和recv操作。

解决connect失败和recv失败的问题都用到了死循环，但如果在这两个操作上始终失败呢，怎么样才能退出死循环？我的解决办法是检查本地网络连接是否正常。当然，有必要也可以对目标网站能否访问进行判断，如果目标网站比较稳定，就不需要这个判断了，比如访问百度。

检查本地网络连接是否正常，我用的方法很简单，就是用gethostbyname函数来获取一个域名的IP地址。

使用以上的办法，目前还没发现有什么负作用，只是可能在短时间频繁访问网页时在死循环中耗费的时间比较多。

下面是新TWinHttp类的源码，这个类实现了GET/HEAD/POST这几个应用，如果本地网络连接正常，经测试，基本不会出现访问失败的现象。

unit WinHttp; interface uses WinSock, Sockets, Windows, SysUtils, Classes; const HTTP_OK = 1; HTTP_TIMEOUT = 2; HTTP_FAIL = 3; HTTP_STATECODE_ERR = 4; type THttpData = record WSAData:TWSAData; Host, Path:string; sockfd:Integer; //套接字 hostEnt:PHostEnt; addr:sockaddr_in; SocketHost:TSocketHost; IsCon:Boolean; end; PHttpData = ^THttpData; type TWinHttp = class( TObject ) private FHttpData:THttpData; FTimeOut:Integer; FCookie:string; FHttpHead:string; FStateCode:Integer; FLocation:string; FUrl:string; FReferer:string; private procedure ParseURL( const Url:string; var Host, Path:string); function Conn( var HttpData:THttpData ):Boolean; public property TimeOut: Integer read FTimeOut write FTimeOut; property Cookie: string read FCookie write FCookie; property Referer: string read FReferer write FReferer; function Get( Url:string ):string; procedure Head( Url:string ); function Post( Url:string; PostData:string ):string; function HttpHead:string; function StateCode:Integer; function Location:string; constructor Create;overload; end; implementation type TRecvThread = class( TThread ) private FHttpData:THttpData; protected procedure Execute; override; public HtmlSource:string; HttpHead:string; StateCode:Integer; ResultValue:Integer; public constructor Create( HttpData:THttpData );overload; end; constructor TRecvThread.Create( HttpData:THttpData ); begin FHttpData := HttpData; inherited Create( False ); end; procedure TRecvThread.Execute; var Buf:array[0..1024] of char; nPos,nRecv:Integer; HeadFine:Boolean; begin //FreeOnTerminate := True; HtmlSource := ''; HttpHead := ''; StateCode := 0; ResultValue := 1; HeadFine := False; while True do begin FillChar( Buf, 1024, 0 ); nRecv := recv( FHttpData.sockfd, Buf, 1024, 0 ); if nRecv > 0 then begin HtmlSource := HtmlSource + Buf; if not HeadFine then begin nPos := Pos( #13#10#13#10, HtmlSource ); if nPos <> 0 then begin HttpHead := Copy( HtmlSource, 1, nPos ); StateCode := StrToInt( Copy( HttpHead, 10, 3 ) ); HeadFine := True; end; end; end else if nRecv = -1 then begin ResultValue := -1; HtmlSource := ''; Break; end else begin ResultValue := 0; Break; end; end; end; constructor TWinHttp.Create; begin FTimeOut := 30; FHttpData.IsCon := False; end; procedure TWinHttp.Head( Url:string ); var SendBuf:array[0..10240] of char; RecvThread:TRecvThread; nPos:Integer; label start; begin start: FUrl := Url; Conn( FHttpData ); if FHttpData.IsCon then begin FillChar( SendBuf, 10240, 0 ); lstrcpy( SendBuf, PChar( 'HEAD ' + FHttpData.Path + ' HTTP/1.1' + #13#10 ) ); lstrcat( SendBuf, PChar( 'Host: ' + FHttpData.Host + #13#10 ) ); lstrcat( SendBuf, PChar( 'User-Agent: Mozilla/5.0' + ' (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.2.15)' + ' Gecko/20110303 Firefox/3.6.15' + #13#10 ) ); lstrcat( SendBuf, PChar( 'Content-Type: ' + 'application/x-www-form-urlencoded;' + #13#10 ) ); lstrcat( SendBuf, PChar( 'Connection: Close' + #13#10 ) ); if FCookie <> '' then lstrcat( SendBuf, PChar( 'Cookie: ' + FCookie + #13#10#13#10 ) ) else lstrcat( SendBuf, PChar( #13#10 ) ); send( FHttpData.sockfd, SendBuf, StrLen( SendBuf ), 0 ); RecvThread := TRecvThread.Create( FHttpData ); if WaitForSingleObject( RecvThread.Handle, FTimeOut * 1000 ) = WAIT_TIMEOUT then begin //判断本地网络连接是否正常 if gethostbyname( PChar( FHttpData.Host ) ) <> nil then begin FHttpData.IsCon := False; RecvThread.Free; goto start; end; FHttpHead := ''; end; if RecvThread.ResultValue = 0 then begin FHttpHead := RecvThread.HttpHead; FStateCode := RecvThread.StateCode; if FStateCode = 302 then begin nPos := Pos( 'Location: ', FHttpHead ); FLocation := Copy( FHttpHead, nPos + Length( 'Location: ' ), Length( FHttpHead ) - nPos ); FLocation := Copy( FLocation, 1, Pos( #13#10, FLocation ) - 1 ); end; end else begin //判断本地网络连接是否正常 if gethostbyname( PChar( FHttpData.Host ) ) <> nil then begin FHttpData.IsCon := False; RecvThread.Free; goto start; end; FHttpHead := ''; end; RecvThread.Free; //Result := Recv end else begin //判断本地网络连接是否正常 if gethostbyname( PChar( FHttpData.Host ) ) <> nil then begin FHttpData.IsCon := False; goto start; end; FHttpHead := ''; end; end; //获取源码 function TWinHttp.Get( Url:string ):string; var SendBuf:array[0..10240] of char; RecvThread:TRecvThread; HtmlSource:string; i, nPos, nIndex:Integer; CookieList:TStringList; label start; begin CookieList := nil; start: if CookieList = nil then CookieList := TStringList.Create else CookieList.Clear; FUrl := Url; Conn( FHttpData ); if FHttpData.IsCon then begin FillChar( SendBuf, 10240, 0 ); lstrcpy( SendBuf, PChar( 'GET ' + FHttpData.Path + ' HTTP/1.1' + #13#10 ) ); lstrcat( SendBuf, PChar( 'Host: ' + FHttpData.Host + #13#10 ) ); lstrcat( SendBuf, PChar( 'User-Agent: Mozilla/5.0' + ' (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.2.15)' + ' Gecko/20110303 Firefox/3.6.15' + #13#10 ) ); lstrcat( SendBuf, PChar( 'Content-Type: ' + 'application/x-www-form-urlencoded;' + #13#10 ) ); lstrcat( SendBuf, PChar( 'Connection: Close' + #13#10 ) ); if FReferer <> '' then begin lstrcat( SendBuf, PChar( 'Referer: ' + FReferer + #13#10 ) ); end; if FCookie <> '' then lstrcat( SendBuf, PChar( 'Cookie: ' + FCookie + #13#10#13#10 ) ) else lstrcat( SendBuf, PChar( #13#10 ) ); send( FHttpData.sockfd, SendBuf, StrLen( SendBuf ), 0 ); RecvThread := TRecvThread.Create( FHttpData ); if WaitForSingleObject( RecvThread.Handle, FTimeOut * 1000 ) = WAIT_TIMEOUT then begin //判断本地网络连接是否正常 if gethostbyname( PChar( FHttpData.Host ) ) <> nil then begin FHttpData.IsCon := False; RecvThread.Free; goto start; end; HtmlSource := ''; end; if RecvThread.ResultValue = 0 then begin HtmlSource := RecvThread.HtmlSource; FHttpHead := RecvThread.HttpHead; FStateCode := RecvThread.StateCode; if FStateCode = 302 then begin nPos := Pos( 'Location: ', FHttpHead ); FLocation := Copy( FHttpHead, nPos + Length( 'Location: ' ), Length( FHttpHead ) - nPos ); FLocation := Copy( FLocation, 1, Pos( #13#10, FLocation ) - 1 ); end; ExtractStrings( [#13], [], PChar( FHttpHead ), CookieList ); if CookieList.Count > 0 then begin FCookie := ''; for i := 0 to CookieList.Count - 1 do begin nPos := Pos( 'Set-Cookie: ', CookieList[i] ); if nPos = 1 then begin FCookie := FCookie + Copy( CookieList[i], Length( 'Set-Cookie: ' ) + 1, Length( CookieList[i] ) - Length( 'Set-Cookie: ' ) + 1 ); nIndex := Length( FCookie ); if FCookie[nIndex] <> ';' then FCookie := FCookie + '; '; end; end; end; end else begin //判断本地网络连接是否正常 if gethostbyname( PChar( FHttpData.Host ) ) <> nil then begin FHttpData.IsCon := False; RecvThread.Free; goto start; end; HtmlSource := ''; end; RecvThread.Free; end else begin //判断本地网络连接是否正常 if gethostbyname( PChar( FHttpData.Host ) ) <> nil then begin FHttpData.IsCon := False; goto start; end; HtmlSource := ''; end; CookieList.Free; Result := HtmlSource; end; function TWinHttp.Post( Url:string; PostData:string ):string; var SendBuf:array[0..10240] of char; RecvThread:TRecvThread; HtmlSource:string; i,nPos,nIndex:Integer; CookieList:TStringList; label start; begin CookieList := nil; start: if CookieList = nil then CookieList := TStringList.Create else CookieList.Clear; FUrl := Url; Conn( FHttpData ); if FHttpData.IsCon then begin FillChar( SendBuf, 10240, 0 ); lstrcpy( SendBuf, PChar( 'POST ' + FHttpData.Path + ' HTTP/1.1' + #13#10 ) ); lstrcat( SendBuf, PChar( 'Host: ' + FHttpData.Host + #13#10 ) ); lstrcat( SendBuf, PChar( 'User-Agent: Mozilla/5.0' + ' (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.2.15)' + ' Gecko/20110303 Firefox/3.6.15' + #13#10 ) ); lstrcat( SendBuf, PChar( 'Content-Type: ' + 'application/x-www-form-urlencoded;' + #13#10 ) ); lstrcat( SendBuf, PChar( 'Content-Length: ' + IntToStr( Length( PostData ) ) + #13#10 ) ); lstrcat( SendBuf, PChar( 'Connection: Close' + #13#10 ) ); if FReferer <> '' then begin lstrcat( SendBuf, PChar( 'Referer: ' + FReferer + #13#10 ) ); end; if FCookie <> '' then lstrcat( SendBuf, PChar( 'Cookie: ' + FCookie + #13#10#13#10 ) ) else lstrcat( SendBuf, PChar( #13#10 ) ); lstrcat( SendBuf, PChar( PostData ) ); send( FHttpData.sockfd, SendBuf, StrLen( SendBuf ), 0 ); RecvThread := TRecvThread.Create( FHttpData ); if WaitForSingleObject( RecvThread.Handle, FTimeOut * 1000 ) = WAIT_TIMEOUT then begin //判断本地网络连接是否正常 if gethostbyname( PChar( FHttpData.Host ) ) <> nil then begin FHttpData.IsCon := False; RecvThread.Free; goto start; end; HtmlSource := ''; end; if RecvThread.ResultValue = 0 then begin HtmlSource := RecvThread.HtmlSource; FHttpHead := RecvThread.HttpHead; FStateCode := RecvThread.StateCode; if FStateCode = 302 then begin nPos := Pos( 'Location: ', FHttpHead ); FLocation := Copy( FHttpHead, nPos + Length( 'Location: ' ), Length( FHttpHead ) - nPos ); FLocation := Copy( FLocation, 1, Pos( #13#10, FLocation ) - 1 ); end; ExtractStrings( [#13], [], PChar( FHttpHead ), CookieList ); if CookieList.Count > 0 then begin FCookie := ''; for i := 0 to CookieList.Count - 1 do begin nPos := Pos( 'Set-Cookie: ', CookieList[i] ); if nPos = 1 then begin FCookie := FCookie + Copy( CookieList[i], Length( 'Set-Cookie: ' ) + 1, Length( CookieList[i] ) - Length( 'Set-Cookie: ' ) + 1 ); nIndex := Length( FCookie ); if FCookie[nIndex] <> ';' then FCookie := FCookie + '; '; end; end; end; end else begin //判断本地网络连接是否正常 if gethostbyname( PChar( FHttpData.Host ) ) <> nil then begin FHttpData.IsCon := False; RecvThread.Free; goto start; end; HtmlSource := ''; end; RecvThread.Free; end else begin //判断本地网络连接是否正常 if gethostbyname( PChar( FHttpData.Host ) ) <> nil then begin FHttpData.IsCon := False; goto start; end; HtmlSource := ''; end; CookieList.Free; Result := HtmlSource; end; //连接服务器 function TWinHttp.Conn( var HttpData:THttpData ):Boolean; var IsOk:Boolean; i, nCon:Integer; begin HttpData.IsCon := False; IsOk := False; HttpData.sockfd := 0; HttpData.hostEnt := nil; HttpData.SocketHost := ''; with HttpData do begin if WSAStartup(MakeWord(2,2), WSAData) = 0 then begin ParseURL(FUrl, Host, Path); //建立套接字 sockfd := socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); if sockfd <> INVALID_SOCKET then begin if Host <> '' then begin if Host[1] in ['0'..'9'] then begin if inet_addr(PChar(Host)) <> INADDR_NONE then SocketHost := Host; end else begin hostEnt := gethostbyname(pchar(Host)); if hostEnt <> nil then with hostEnt^ do SocketHost := format('%d.%d.%d.%d', [ord(h_addr^[0]), ord(h_addr^[1]), ord(h_addr^[2]), ord(h_addr^[3])]); end; addr.sin_family := AF_INET; addr.sin_port := htons(80); addr.sin_addr.S_addr := inet_addr(PChar(SocketHost)); for i := 0 to 10 do begin //连接 nCon := connect(sockfd, addr, SizeOf(addr) ); if nCon <> 0 then begin Sleep(10); Continue; end else Break; end; if nCon = 0 then begin IsOk := True; HttpData.IsCon := True; end; end; end; end; end; if IsOk then FHttpData := HttpData; Result := IsOk; end; //分隔URL procedure TWinHttp.ParseURL( const Url:string; var Host, Path:string); var nIndex:Integer; S,tmpUrl:string; begin tmpUrl := Url; S := LowerCase(Url); if ( Pos('https://', S) <> 0 ) then begin //删除http:// Delete(tmpUrl, 1, Length('https://')); end else if( Pos( 'http://', S ) <> 0 ) then begin //删除http:// Delete(tmpUrl, 1, Length('http://')); end; nIndex := Pos('/', tmpUrl); if nIndex = 0 then begin Host := tmpUrl; Path := '/'; end else begin Host := Copy(tmpUrl, 1, nIndex - 1); Path := Copy(tmpUrl, nIndex, Length(Url)); end; end; function TWinHttp.HttpHead:string; begin Result := FHttpHead; end; function TWinHttp.StateCode:Integer; begin Result := FStateCode; end; function TWinHttp.Location:string; begin Result := FLocation; end; initialization finalization end.

posted @ 2011-04-08 05:13 冷寒生阅读(899) 评论(0) 编辑收藏举报

会员力量，点亮园子希望

刷新页面返回顶部