刚从D7转过来有点不习惯,到发贴时都没搞懂string,ansistring,widestring的区别与具体使用环境。

  这里主要记录一些在D2010中使用自带INDY10.5.5  post网站的一些问题(主要用于发贴)。

  首先,与大家一样,遇到的是使用TIdMultiPartFormDataStream,post中文变乱码(这个乱码不是UTF8那种乱码,是一串“口”),网上说把IdMultipartFormData.pas中的IdRead函数修改,如下:

function TIdMultiPartFormDataStream.IdRead(var VBuffer: TIdBytes; AOffset, ACount: Longint): Longint;
var
LTotalRead: Integer;
LCount: Integer;
LBufferCount: Integer;
LRemaining : Integer;
LItem: TIdFormDataField;
begin
if not FInitialized then begin
FInitialized := True;
FCurrentItem := 0;
SetLength(FInternalBuffer, 0);
end;

LTotalRead := 0;
LBufferCount := 0;

while (LTotalRead < ACount) and ((FCurrentItem < FFields.Count) or (Length(FInternalBuffer) > 0)) do begin
if (Length(FInternalBuffer) = 0) and not Assigned(FInputStream) then begin
LItem := FFields.Items[FCurrentItem];
AppendString(FInternalBuffer, LItem.FormatField, TEncoding.Default);

if Assigned(LItem.FieldObject) then begin
if (LItem.FieldObject is TStream) then begin
FInputStream := TStream(LItem.FieldObject);
FInputStream.Position := 0;
end else begin
if (LItem.FieldObject is TStrings) then begin
AppendString(FInternalBuffer, TStrings(LItem.FieldObject).Text, TEncoding.Default);
Inc(FCurrentItem);
end;
end;
end else begin
Inc(FCurrentItem);
end;
end;

if Length(FInternalBuffer) > 0 then begin
if Length(FInternalBuffer) > (ACount - LBufferCount) then begin
LCount := ACount - LBufferCount;
end else begin
LCount := Length(FInternalBuffer);
end;

if LCount > 0 then begin
LRemaining := Length(FInternalBuffer) - LCount;
CopyTIdBytes(FInternalBuffer, 0, VBuffer, LBufferCount, LCount);
if LRemaining > 0 then begin
CopyTIdBytes(FInternalBuffer, LCount, FInternalBuffer, 0, LRemaining);
end;
SetLength(FInternalBuffer, LRemaining);
LBufferCount := LBufferCount + LCount;
FPosition := FPosition + LCount;
LTotalRead := LTotalRead + LCount;
end;
end;

if Assigned(FInputStream) and (LTotalRead < ACount) then begin
LCount := TIdStreamHelper.ReadBytes(FInputStream,VBuffer, ACount - LTotalRead, LBufferCount);
if LCount < (ACount - LTotalRead) then begin
FInputStream.Position := 0;
FInputStream := nil;
Inc(FCurrentItem);
SetLength(FInternalBuffer, 0);
AppendString(FInternalBuffer, #13#10);
end;

LBufferCount := LBufferCount + LCount;
LTotalRead := LTotalRead + LCount;
FPosition := FPosition + LCount;
end;

if FCurrentItem = FFields.Count then begin
AppendString(FInternalBuffer, PrepareStreamForDispatch, TEncoding.Default);
Inc(FCurrentItem);
end;
end;
Result := LTotalRead;
end;

红色部分是修改部分,在调用AppendString时补齐第三个参数,值为 TEncoding.Default  但是未见成效。有成功的请告诉我应该怎么做,谢谢。

 

最后无法,我使用了一个精简的TMsMultiPartFormDataStream类,源码如下:

 

View Code
unit MsMultiPartFormData;

interface

uses
SysUtils, Classes;

const
CONTENT_TYPE = 'multipart/form-data; boundary=';
CRLF = #13#10;
CONTENT_DISPOSITION = 'Content-Disposition: form-data; name="%s"';
FILE_NAME_PLACE_HOLDER = '; filename="%s"';
CONTENT_TYPE_PLACE_HOLDER = 'Content-Type: %s' + crlf + crlf;
CONTENT_LENGTH = 'Content-Length: %d' + crlf;

type
TMsMultiPartFormDataStream = class(TMemoryStream)
private
FBoundary: ansistring;
FRequestContentType: ansistring;
function GenerateUniqueBoundary: ansistring;
public
procedure AddFormField(const FieldName, FieldValue: ansistring);
procedure AddFile(const FieldName, FileName, ContentType: ansistring; FileData: TStream); overload;
procedure AddFile(const FieldName, FileName, ContentType: ansistring); overload;
procedure PrepareStreamForDispatch;
constructor Create;
property Boundary: ansistring read FBoundary;
property RequestContentType: ansistring read FRequestContentType;
end;

implementation
{ TMsMultiPartFormDataStream }

constructor TMsMultiPartFormDataStream.Create;
begin
inherited;
FBoundary := GenerateUniqueBoundary;
FRequestContentType := CONTENT_TYPE + FBoundary;
end;

procedure TMsMultiPartFormDataStream.AddFile(const FieldName, FileName,
ContentType: ansistring; FileData: TStream);
var
sFormFieldInfo: ansistring;
Buffer: PChar;
iSize: Int64;
begin
iSize := FileData.Size;
sFormFieldInfo := Format(CRLF + '--' + Boundary + CRLF + CONTENT_DISPOSITION +
FILE_NAME_PLACE_HOLDER + CRLF + CONTENT_LENGTH +
CONTENT_TYPE_PLACE_HOLDER, [FieldName, FileName, iSize, ContentType]);
Write(Pointer(sFormFieldInfo)^, Length(sFormFieldInfo));
FileData.Position := 0;
GetMem(Buffer, iSize);
try
FileData.Read(Buffer^, iSize);
Write(Buffer^, iSize);
finally
FreeMem(Buffer, iSize);
end;
end;

procedure TMsMultiPartFormDataStream.AddFile(const FieldName, FileName,
ContentType: ansistring);
var
FileStream: TFileStream;
begin
FileStream := TFileStream.Create(FileName, fmOpenRead or fmShareDenyWrite);
try
AddFile(FieldName, FileName, ContentType, FileStream);
finally
FileStream.Free;
end;
end;

procedure TMsMultiPartFormDataStream.AddFormField(const FieldName,
FieldValue: ansistring);
var
sFormFieldInfo: ansistring;
begin
{ sFormFieldInfo := Format(CRLF + '--' + Boundary + CRLF + CONTENT_DISPOSITION + CRLF + CRLF +
FieldValue, [FieldName]);
}
sFormFieldInfo := CRLF + '--' + Boundary + CRLF + 'Content-Disposition: form-data; name="'+FieldName+'"' + CRLF + CRLF +
FieldValue;
Write(Pointer(sFormFieldInfo)^, Length(sFormFieldInfo));
end;

function TMsMultiPartFormDataStream.GenerateUniqueBoundary: ansistring;
begin
Result := '---------------------------' + FormatDateTime('mmddyyhhnnsszzz', Now);
end;

procedure TMsMultiPartFormDataStream.PrepareStreamForDispatch;
var
sFormFieldInfo: ansistring;
begin
sFormFieldInfo := CRLF + '--' + Boundary + '--' + CRLF;
Write(Pointer(sFormFieldInfo)^, Length(sFormFieldInfo));
Position := 0;
end;

end.

注意,其中的字符串类型全是AnsiString,这个类和indy的TIdMultiPartFormDataStream用法一样。

用这个类添加中文,post后不会是乱码,尚不明原因,求解释。

  接下来的问题是,post UTF8编码问题,在2009以前,str:=UTF8Encode('字符串'),会得到一个看着像乱码的字符串,这是正确的,但是在2010中,str的结果还是“字符串”,所以用抓包工具看到的中文实际上也是编码不成功的,在2010中想要得到正确的utf8字符串,应该:str:=UTF8EncodeToShortString('字符串'); 这个函数只能针对长度不超过255的中文有效,超过255的需要用下面的函数:

function Utf8Encode(const US: UnicodeString): AnsiString;
var
L: Integer;
begin
Result := '';
if US = '' then Exit;
L := Length(US);
SetLength(Result, L *3);
L := UnicodeToUtf8(PAnsiChar(Result), Length(Result) + 1, PWideChar(US), L);
if L=0 then
Result:='';
end;

  上面的函数不是system的utf8Encode,是根据自己需要改编自UTF8EncodeToShortString 和 UTF8Encode,其中SetLength(Result, L *3);长度X3是因为UTF-8长度为3。

 其中原理我也不清楚,反正这样用没错,至于原理,还请知道的人告知,谢谢。 

  最后一个是GET UTF8编码问题,在d7中,我经常这样写:

  str:=http.get(url);

  str:=utf8toansi(url);(utf8toansi不是系统自带的函数,是自己写的,系统自带的utf8toansi其实是调用utf8decode,大多时候会把字符串变成空)

  在d2010中,上面的方式行不通,需要用到TStringStream,如果确实是utf8编码则,st:=tStringStream.create('',TEncoding.UTF8); 如果不确定返回数据的编码,则先

 http.get(url,st);

 //然后判断编码

  isutf8:=checkencoding(st.DataString,http.Response.ContentType);

  if isutf8 then

    str:=utf8tostring(st.DataString);

  当然,这样也不是最完美的解决方法,经过utf8tostring后,一样会有少数字符是“?”,以下附上自己用的checkEncoding;

function  CheckEncoding(s,ContentType:string):Boolean;
var
per:TPerlRegEx;
begin
if ansicontainstext(ContentType,'charset=utf-8') then
begin
result:=True;
end
else
begin
per:=TPerlRegEx.Create(nil);
per.Options:=[preCaseLess];
per.Subject:=s;
per.RegEx:='<meta[\S\s]*?charset=(.*)>';
if per.Match then
if AnsiContainsText(per.MatchedExpression,'UTF-8') then
begin
result:=True;
end;
freeandnil(per);
end;
end;

  以上只是个人拙见,欢迎斧正。

posted on 2011-11-30 13:00  Bach  阅读(3759)  评论(2编辑  收藏  举报