在网站内容编辑时, 经常会把别的网站的内容COPY后再PASTE到自己的后台编辑中, 但是其中的图片只是保存的URL, 并未COPY到本地. 下面这个方法能把COPY回来的HTML代码中的图片的URL转换成指定的URL, 并把图片下载到指定的物理路径.
Imports System
Imports System.Collections.Generic
Imports System.Text.RegularExpressions
Imports System.Net
Imports System.IO

Namespace EFPlatformNamespace EFPlatform.Helper

Public Class WebRequestHelperClass WebRequestHelper

RequestFileFromHtml#region RequestFileFromHtml

Public Shared Function RequestFileFromHtml()Function RequestFileFromHtml(ByVal html As String, ByVal fileUrl As String, ByVal filePath As String, ByVal reservedHost As String) As String
Dim url As Uri
Dim fileExt As String
Dim fileName As String
Dim c As WebClient = New WebClient()
Dim p As String = "((http|https|ftp):(\/\/|\\\\){1}(([A-Za-z0-9_-])+[.]){1,}(net|com|cn|org|cc|tv|[0-9]{1,3})(\S*\/)((\S)+[.]{1}(gif|jpg|png)))"
Dim r As Regex = New Regex(p,RegexOptions.Compiled | RegexOptions.IgnoreCase)
Dim mc As MatchCollection = r.Matches(html)
If mc.Count > 0 Then
Dim urlList As List<Uri> = New List<Uri>()
Dim matchIndex As Integer = 0
Dim repeated As Boolean = False
Dim i As Integer
For i = 0 To mc.Count- 1 Step i + 1
url = New Uri(mc(i).Value)
Dim j As Integer
For j = 0 To urlList.Count- 1 Step j + 1
If url = urlList(j) Then
repeated = True
Exit For
End If
Next
If Not repeated &&(url.Host.ToLower() <> reservedHost.ToLower()) Then
urlList.Add(url)
matchIndex = matchIndex + 1
End If
Next
Dim i As Integer
For i = 0 To urlList.Count- 1 Step i + 1
url = urlList(i)
fileExt = url.AbsoluteUri.Substring(url.AbsoluteUri.LastIndexOf("."))
fileName = String.Format("{0:yyMMddHHmmssff}{1}{2}", DateTime.Now, i, fileExt)
Try
c.DownloadFile(url, filePath + fileName)
html = html.Replace(url.AbsoluteUri, fileUrl + fileName)
End Try
Next
End If
Return html
End Function
#End Region
End Class
End Namespace
来源:
http://www.cnblogs.com/ericfine/archive/2007/06/30/801561.html 由C#转为VB.Net,时间原因先存下来,尚未测试是否正确。