|
可以在此基础上结合正则表达式做成更好的效果,希望大家能分享一下Xmlhttp的session共享技术
<html> <head> <title>AUTOGET</title> <meta http-equiv="Content-Type" content="text/html; charset=gb2312"> </head> <body bgcolor="#FFFFFF" style="font-family:Arial;font-size:12px"> <% '================================================= 'FileName: Getit.Asp 'Intro : Auto Get Data From Remote WebSite 'Author: Babyt(阿泰) 'URL: http://blog.csdn.net/babyt 'createAt: 2002-02 Lastupdate:2004-09 'DB Table : data 'Table Field: ' UID -> Long -> Keep ID Of the pages ' UContent -> Text -> Keep Content Of the Pages(HTML) '=================================================
Server.ScriptTimeout=5000
'on error resume next Set conn = Server.createObject("ADODB.Connection") conn.open "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" & Server.MapPath("getit.mdb") Set rs = Server.createObject("ADODB.Recordset") sql="select * from data" rs.open sql,conn,1,3
Dim comeFrom,myErr,myCount
'======================================================== comeFrom="http://www.xxx.com/U.asp?ID=" myErr1="该资料不存在" myErr2="该资料已隐藏" '========================================================
'*************************************************************** ' 只需要更改这里 i 的始点intMin和终点intMax,设定步长intStep ' 每次区间设置成5万左右。估计要两个多小时。期间不需要人工干预 '**************************************************************** intMin=0 intMax=10000 '设定步长 intStep=100
'========================================================== '以下代码不要更改 '========================================================== Call GetPart (intMin) Response.write "已经转换完成" & intMin & "~~" & intMax & "之间的数据" rs.close Set rs=Nothing conn.Close set conn=nothing %> </body> </html> <% '使用XMLHTTP抓取地址并进次内容处理 Function GetBody(Url) Dim objXML On Error Resume Next Set objXML = createObject("Microsoft.XMLHTTP") With objXML .Open "Get", Url, False, "", "" .Send GetBody = .ResponseBody End With GetBody=BytesToBstr(GetBody,"GB2312") Set objXML = Nothing End Function '使用Adodb.Stream处理二进制数据 Function BytesToBstr(strBody,CodeBase) dim objStream set objStream = Server.createObject("Adodb.Stream") objStream.Type = 1 objStream.Mode =3 objStream.Open objStream.Write strBody objStream.Position = 0 objStream.Type = 2 objStream.Charset = CodeBase BytesToBstr = objStream.ReadText objStream.Close set objStream = nothing End Function '主函数 Function GetPart(iStart) Dim iGo time1=timer() myCount=0 For iGo=iStart To iStart+intStep If iGo<=intMax Then Response.Execute comeFrom & iGo '进行简单的数据处理 content = GetBody(comeFrom & iGo ) content = Replace(content,chr(34),""") If instr(content,myErr1) OR instr(content,myErr2) Then '跳过错误信息 Else '写入数据库 rs.AddNew rs("UID")=iGo '******************************** rs("UContent")=Replace(content,""",chr(34)) '********************************* rs.update myCount=myCount+1 Response.Write iGo & "<BR>" Response.Flush End If Else Response.write "<font color=red>成功抓取"&myCount&"条记录," time2=timer() Response.write "耗时:" & Int(FormatNumber((time2-time1)*1000000,3)) & " 秒</font><BR>" Response.Flush Exit Function End If Next Response.write "<font color=red>成功抓取"&myCount&"条记录," time2=timer() Response.write "耗时:" & CInt(FormatNumber((time2-time1),3)) & " 秒</font><BR>" Response.Flush '递归 GetPart(iGo+1) End Function%>
|
|