用Clipboard获取HTML格式内容
2007-09-15 17:24 清炒白菜 阅读(1136) 评论(0) 收藏 举报.NET的Clipboard.GetData(DataFormats.Html)方法有BUG。
网上查询了一下,有个解决方法,不过那个网站被封了(至少我这没法访问)
用某软件上去看了一下,照做,OK了
using System;
using System.Text;
using System.Diagnostics;
using System.Windows.Forms;
using System.Runtime.InteropServices;
using System.Runtime.InteropServices.ComTypes;


namespace JX
{
/// <summary>
/// By JX
/// Version 1.0.200511041900
/// This static class contains a method to extract Dataformat.Html data from a IDataObject
/// obtained from the Clipboard or a Drag and Drop operation.
/// The method overcomes the UTF8 corruption problem encountered in .NET Framework version 2.0.50727.42
/// Version 1.0 of the Dataformat.Html data has a header like this:
///
/// Version:1.0
/// StartHTML:000000238
/// EndHTML:000000546
/// StartFragment:000000353
/// EndFragment:000000510
/// StartSelection:000000353
/// EndSelection:000000510
///
/// The values refer to the BYTES of the string not the actual chars
/// </summary>
static class HtmlFromIDataObject
{
/// <summary>
/// Extracts data of type Dataformat.Html from an IdataObject data container
/// This method shouldn't throw any exception but writes relevant exception informations in the debug window
/// </summary>
/// <param name="data">IdataObject data container</param>
/// <returns>A byte[] array with the decoded string or null if the method fails</returns>
public static byte[] GetHtml(System.Windows.Forms.IDataObject data)
{
System.Runtime.InteropServices.ComTypes.IDataObject interopData = data as System.Runtime.InteropServices.ComTypes.IDataObject;
FORMATETC format = new FORMATETC();
format.cfFormat = (short)DataFormats.GetFormat(DataFormats.Html).Id;
format.dwAspect = DVASPECT.DVASPECT_CONTENT;
format.lindex = -1;
format.tymed = TYMED.TYMED_HGLOBAL;
STGMEDIUM stgmedium = new STGMEDIUM();
stgmedium.tymed = TYMED.TYMED_HGLOBAL;
stgmedium.pUnkForRelease = null;
int queryResult = 0;
try
{
queryResult = interopData.QueryGetData(ref format);
}
catch (Exception exp)
{
Debug.WriteLine("HtmlFromIDataObject.GetHtml -> QueryGetData(ref format) threw an exception: "
+ Environment.NewLine + exp.ToString());
return null;
}
if (queryResult != 0)
{
Debug.WriteLine("HtmlFromIDataObject.GetHtml -> QueryGetData(ref format) returned a code != 0 code: "
+ queryResult.ToString());
return null;
}
try
{
interopData.GetData(ref format, out stgmedium);
}
catch (Exception exp)
{
System.Diagnostics.Debug.WriteLine("HtmlFromIDataObject.GetHtml -> GetData(ref format, out stgmedium) threw this exception: "
+ Environment.NewLine + exp.ToString());
return null;
}
if (stgmedium.unionmember == IntPtr.Zero)
{
Debug.WriteLine("HtmlFromIDataObject.GetHtml -> stgmedium.unionmember returned an IntPtr pointing to zero");
return null;
}
IntPtr pointer = stgmedium.unionmember;
HandleRef handleRef = new HandleRef(null, pointer);
byte[] rawArray = null;
try
{
IntPtr ptr1 = GlobalLock(handleRef);
int length = GlobalSize(handleRef);
rawArray = new byte[length];
Marshal.Copy(ptr1, rawArray, 0, length); 
}
catch (Exception exp)
{
Debug.WriteLine("HtmlFromIDataObject.GetHtml -> Html Import threw an exception: " + Environment.NewLine + exp.ToString());
}
finally
{
GlobalUnlock(handleRef);
}
return rawArray;
}

[DllImport("kernel32.dll", CharSet = CharSet.Auto, ExactSpelling = true, SetLastError = true)]
private static extern IntPtr GlobalLock(HandleRef handle);
[DllImport("kernel32.dll", CharSet = CharSet.Auto, ExactSpelling = true, SetLastError = true)]
private static extern bool GlobalUnlock(HandleRef handle);
[DllImport("kernel32.dll", CharSet = CharSet.Auto, ExactSpelling = true, SetLastError = true)]
private static extern int GlobalSize(HandleRef handle);
}
}

byte[] rawHtmlBytes = JX.HtmlFromIDataObject.GetHtml(dataObject)
string rawHtml = Encoding.UTF8.GetString(rawHtmlBytes);原文地址:http://bakamachine.blogspot.com/2006/05/workarond-for-dataobject-html.html


浙公网安备 33010602011771号