function GetHtmltxt(aHtml:string):string;
function DelStrbyTag(aHtml,aFtTag,aEndTag:string):string; // 除去制点 tag 之间的数据 var aFt,aBk:integer; TempStr,BackStr:string; begin TempStr:=aHtml; BackStr:=''; while Tempstr<>'' do begin aFt:=Pos(aFtTag,Tempstr); aBk:=Pos(aEndTag,Tempstr); if (aFt>0) and (aBk>0) then begin BackStr:=BackStr+copy(Tempstr,1,aFt-1); TempStr:=copy(TempStr,aBk+length(aEndTag),length(tempstr)); end else begin BackStr:=BackStr+tempstr; tempstr:=''; end; end; Result:=BackStr; end;
var i:integer; s:string; begin i:=1; s:=''; aHtml:=trim(aHtml); aHtml:=stringReplace(aHtml,'<p>',chr(13)+chr(10),[rfReplaceAll,rfIgnoreCase]); aHtml:=DelStrbyTag(aHtml,'<script','</script>'); aHtml:=StringReplace(aHtml,#$D#$A, '',[rfReplaceAll,rfIgnoreCase]); //回车换行符 ; aHtml:=StringReplace(aHtml,' ','',[rfReplaceAll,rfIgnoreCase]); //删除Html空格 while i<=length(aHtml) do begin if aHtml[i]='<' then repeat inc(i) until (aHtml[i]='>') else begin if aHtml[i]<>' ' then begin s:=s+aHtml[i]; end else begin if s[length(s)]<>' ' then begin s:=s+aHtml[i]; end; end; end; inc(i); end; s:=StringReplace(s,'“','“',[rfReplaceAll,rfIgnoreCase]); s:=StringReplace(s,'”','”',[rfReplaceAll,rfIgnoreCase]); // s:=StringReplace(s,' ','',[rfReplaceAll,rfIgnoreCase]); s:=StringReplace(s,' ','',[rfReplaceAll,rfIgnoreCase]); Result:=s; end;
说明:类似 (webbrowser1.Document as IHTMLDocument2 ).body.innertext;
这个功能,但自己写的可以控制。可以分段。
|