'/////////////////////////// Imports System.IO Imports System.Net Imports System Imports System.Text Imports System.Text.RegularExpressions PublicClass HTMLContentParserClass HTMLContentParser Function Return_HTMLContent()Function Return_HTMLContent(ByVal sURL AsString) Dim sStream As Stream Dim URLReq As HttpWebRequest Dim URLRes As HttpWebResponse Try URLReq = WebRequest.Create(sURL) URLRes = URLReq.GetResponse() sStream = URLRes.GetResponseStream() ReturnNew StreamReader(sStream).ReadToEnd() Catch ex As Exception Return ex.Message EndTry End Function Function ParseHTMLLinks()Function ParseHTMLLinks(ByVal sHTMLContent AsString, ByVal sURL AsString) As ArrayList Dim rRegEx As Regex Dim mMatch As Match Dim aMatch AsNew ArrayList() rRegEx =New Regex("a.*href\s*=\s*(?:""(?<1>[^""]*)""|(?<1>\S+))", _ RegexOptions.IgnoreCase Or RegexOptions.Compiled) mMatch = rRegEx.Match(sHTMLContent) While mMatch.Success Dim sMatch AsString sMatch = ProcessURL(mMatch.Groups(1).ToString, sURL) aMatch.Add(sMatch) mMatch = mMatch.NextMatch() EndWhile Return aMatch End Function Function ParseHTMLImages()Function ParseHTMLImages(ByVal sHTMLContent AsString, ByVal sURL AsString) As ArrayList Dim rRegEx As Regex Dim mMatch As Match Dim aMatch AsNew ArrayList() rRegEx =New Regex("img.*src\s*=\s*(?:""(?<1>[^""]*)""|(?<1>\S+))", _ RegexOptions.IgnoreCase Or RegexOptions.Compiled) mMatch = rRegEx.Match(sHTMLContent) While mMatch.Success Dim sMatch AsString sMatch = ProcessURL(mMatch.Groups(1).ToString, sURL) aMatch.Add(sMatch) mMatch = mMatch.NextMatch() EndWhile Return aMatch End Function PrivateFunction ProcessURL()Function ProcessURL(ByVal sInput AsString, ByVal sURL AsString) 'Find out if the sURL has a "/" after the Domain Name 'If not, give a "/" at the end 'First, check out for any slash after the 'Double Dashes of the http:// 'If there is NO slash, then end the sURL string with a SLASH If InStr(8, sURL, "/") = 0 Then sURL +="/" EndIf 'FILTERING 'Filter down to the Domain Name Directory from the Right Dim iCount AsInteger For iCount = sURL.Length To1Step-1 IfMid(sURL, iCount, 1) ="/"Then sURL =Left(sURL, iCount) ExitFor EndIf Next 'Filter out the ">" from the Left For iCount =1To sInput.Length IfMid(sInput, iCount, 4) =">"Then sInput =Left(sInput, iCount -1) 'Stop and Take the Char before ExitFor EndIf Next 'Filter out unnecessary Characters sInput = sInput.Replace("<", Chr(39)) sInput = sInput.Replace(">", Chr(39)) sInput = sInput.Replace(""", "") sInput = sInput.Replace("'", "") If (sInput.IndexOf("http://") <0) Then If (Not (sInput.StartsWith("/")) AndNot (sURL.EndsWith("/"))) Then Return sURL &"/"& sInput Else If (sInput.StartsWith("/")) And (sURL.EndsWith("/")) Then Return sURL.Substring(0, sURL.Length -1) + sInput Else Return sURL + sInput EndIf EndIf Else Return sInput EndIf End Function End Class