ASP抓取页面源代码并截取指定内容(自动判断编码)
作者:刚子 日期:2009-07-22
程序代码<%
'ASP抓取远程页面功能类(自动判断编码格式)
Function GetHttpPage(HttpUrl)
Set Http=server.createobject("MSX"&"ML2.XML"&"HTTP")
Http.open "GET",HttpUrl,False
Http.Send()
If Http.Readystate<>4 and Http.status<>200 then
Set Http=Nothing
Exit function
End if
Set ore = New RegExp
ore.Pattern = "<meta[^>]+charset=[""]?([\w\-]+)[^>]*>"
ore.Global = True
ore.IgnoreCase = True
Set Matches = ore.execute(Http.responseText)
If(Matches.count>0)Then
GetHTTPPage=bytesToBSTR(Http.responseBody,Matches(0).submatches(0))
Else
'GetHTTPPage=Http.responseText '没有找到编码则不转换编码
GetHTTPPage=bytesToBSTR(Http.responseBody,"gb2312") '没有找到编码则转换为GB2312
End if
Set Http=Nothing
End Function
Function BytesToBstr(body,Cset)
dim objstream
set objstream = Server.CreateObject("adodb.stream")
objstream.Type = 1
objstream.Mode =3
objstream.Open
objstream.Write body
objstream.Position = 0
objstream.Type = 2
objstream.Charset = Cset
BytesToBstr = objstream.ReadText
objstream.Close
set objstream = nothing
End Function
Function GetKey(HTML,Start,Last)
filearray=split(HTML,Start)
if ubound(filearray)>0 then
filearray2=split(filearray(1),Last)
GetKey=filearray2(0)
end if
End Function
HttpUrl="http://www.baidu.com"
StartGet = getHTTPPage(HttpUrl)
'下面是提取两段字符间代码 开始和结束部分的代码标记(双引号要写为两个双引号)
List=Getkey(StartGet,"<title>","</title>")
Response.write List
%>
'ASP抓取远程页面功能类(自动判断编码格式)
Function GetHttpPage(HttpUrl)
Set Http=server.createobject("MSX"&"ML2.XML"&"HTTP")
Http.open "GET",HttpUrl,False
Http.Send()
If Http.Readystate<>4 and Http.status<>200 then
Set Http=Nothing
Exit function
End if
Set ore = New RegExp
ore.Pattern = "<meta[^>]+charset=[""]?([\w\-]+)[^>]*>"
ore.Global = True
ore.IgnoreCase = True
Set Matches = ore.execute(Http.responseText)
If(Matches.count>0)Then
GetHTTPPage=bytesToBSTR(Http.responseBody,Matches(0).submatches(0))
Else
'GetHTTPPage=Http.responseText '没有找到编码则不转换编码
GetHTTPPage=bytesToBSTR(Http.responseBody,"gb2312") '没有找到编码则转换为GB2312
End if
Set Http=Nothing
End Function
Function BytesToBstr(body,Cset)
dim objstream
set objstream = Server.CreateObject("adodb.stream")
objstream.Type = 1
objstream.Mode =3
objstream.Open
objstream.Write body
objstream.Position = 0
objstream.Type = 2
objstream.Charset = Cset
BytesToBstr = objstream.ReadText
objstream.Close
set objstream = nothing
End Function
Function GetKey(HTML,Start,Last)
filearray=split(HTML,Start)
if ubound(filearray)>0 then
filearray2=split(filearray(1),Last)
GetKey=filearray2(0)
end if
End Function
HttpUrl="http://www.baidu.com"
StartGet = getHTTPPage(HttpUrl)
'下面是提取两段字符间代码 开始和结束部分的代码标记(双引号要写为两个双引号)
List=Getkey(StartGet,"<title>","</title>")
Response.write List
%>
评论: 0 | 引用: 0 | 查看次数: -
发表评论
上一篇
下一篇

文章来自:
Tags: