国产精品成人免费一区二区视频,可莉污黄h本子

主頁 > 知識庫 > 自己做采集程序

自己做采集程序

現在網上的采集程序很多，但是有時候你發現一個好的網站，想自己做個采集工具采集一些信息，就需要自己去寫程序了，其實這樣的采集程序并不難寫，主要是去分析源網站的網頁結構。
首先去下載個XMLHTTP的類文件：
%
Class xhttp
private cset,sUrl,sError
Private Sub Class_Initialize()
'cset="UTF-8"
cset="GB2312"
sError=""
end sub

Private Sub Class_Terminate()
End Sub

Public Property LET URL(theurl)
sUrl=theurl
end property
public property GET BasePath()
BasePath=mid(sUrl,1,InStrRev(sUrl,"/")-1)
end property
public property GET FileName()
FileName=mid(sUrl,InStrRev(sUrl,"/")+1)
end property
public property GET Html()
Html=BytesToBstr(getBody(sUrl))
end property

public property GET xhttpError()
xhttpError=sError
end property

private Function BytesToBstr(body)
on error resume next
'Cset:GB2312 UTF-8
dim objstream
set objstream = Server.CreateObject("adodb.stream")
with objstream
.Type = 1 '
.Mode = 3 '
.Open
.Write body '
.Position = 0 '
.Type = 2 '
.Charset = Cset '
BytesToBstr = .ReadText '
.Close
end with
set objstream = nothing
End Function

private function getBody(surl)
on error resume next
dim xmlHttp
'Set xmlHttp=server.createobject("Msxml2.XMLHTTP.4.0")
'set xmlHttp=server.createobject("Microsoft.XMLHTTP")
set xmlHttp=server.createobject("MSXML2.ServerXMLHTTP")
xmlHttp.setTimeouts 10000,10000,10000,30000
xmlHttp.open "GET",surl,false
xmlHttp.send
if xmlHttp.readystate=4 then
'if xmlHttp.status=200 then
getBody=xmlhttp.responsebody
'end if
else
getBody=""
end if

if Err.Number>0 then
sError=Err.Number
Err.clear
else
sError=""
end if
set xmlHttp=nothing
end function

Public function saveimage(tofile,isoverwrite)
on error resume next
dim objStream,objFSO,imgs

if Not isoverwrite Then
Set objFSO = Server.CreateObject("Scripting.FileSystemObject")
If objFSO.FileExists(Server.MapPath(tofile)) Then
Exit Function
End If
Set objFSO = Nothing
End IF

imgs=getBody(sUrl)
Set objStream = Server.CreateObject("ADODB.Stream")
with objStream
.Type =1
.Open
.write imgs
.SaveToFile server.mappath(tofile),2
.Close()
end with
set objstream=nothing
end function

end class

%>
用了這個類文件，做起事情來就方便多了。
然后就可以分析采集網站的網頁結構，寫采集程序了。
下面給個例子：
!--#include file="conn.asp"-->
!--#include file="inc/xhttp_class.asp"-->
!--#include file="inc/function.asp"-->
%
server.ScriptTimeout = 1000
%>
html>
head>
meta http-equiv="Content-Type" content="text/html; charset=gb2312" />
title>BT采集器/title>
/head>
body>
form name="form1" method="post" action="get81bt.asp">
分類ID：
  input type="text" name="cid" value="%=request("cid")%>">br>
開始ID：
  input type="text" name="startid" value="%=request("startid")%>">
  br>
  結束ID：
  input type="text" name="overid" value="%=request("overid")%>">
  br>
  分類名稱：input type="text" name="classname" value="%=request("classname")%>">為空自動獲取
  br>
  input name="action" type="hidden" id="action" value="getdata">
  input type="submit" name="Submit" value="采集">
/form>
當前ID：%=request("id")%> br>
%
dim action

action = Request("action")
if action = "getdata" then
cid = Request("cid")
startid = Request("startid")
overid = Request("overid")
id = Request("id")
if id = "" then id = startid

set objxhttp = new xhttp

objxhttp.URL = "http://www.81dd.com/Class/"cid"_"id".htm"
content = objxhttp.Html

if InStr(content,"網站維護中") then
call NextID
response.End()
end if

list = GetContent(content,"!--內容開始-->","!--內容結束-->",0)

Dim regEx, Match, Matches,patrn
Set regEx = New RegExp
patrn = "a href=""../BtHtml/(.+?)"">"
regEx.Pattern = patrn
regEx.IgnoreCase = True
regEx.Global = True
Set Matches = regEx.Execute(list)
on error resume next
For Each Match in Matches

'response.write Match.Value "br>"
weburl = "http://www.81dd.com/BtHtml/" regEx.Replace(Match.Value,"$1")
response.write weburl "br>"
response.Flush()

objxhttp.URL = weburl
cpage = objxhttp.Html
cpage = GetContent(cpage,"!--內容開始-->","!--內容結束-->",0)

title = GetContent(cpage,"BT資源名稱：strong>","/strong>",0)
title = stripHTML(title)

IF Request("classname") > "" then
classname = Request("classname")
Else
if InStr(title,"喜劇") then
classname = "喜劇"
Elseif InStr(title,"動作") then
classname = "動作"
Elseif InStr(title,"驚悚") then
classname = "驚悚"
Elseif InStr(title,"犯罪") then
classname = "犯罪"
Elseif InStr(title,"恐怖") then
classname = "恐怖"
Elseif InStr(title,"愛情") then
classname = "愛情"
Elseif InStr(title,"冒險") then
classname = "冒險"
Elseif InStr(title,"科幻") then
classname = "科幻"
Elseif InStr(title,"懸念") then
classname = "懸念"
Elseif InStr(title,"奇幻") then
classname = "奇幻"
Elseif InStr(title,"戰爭") then
classname = "戰爭"
Elseif InStr(title,"連續劇") then
classname = "連續劇"
Elseif InStr(title,"綜藝") then
classname = "綜藝"
Elseif InStr(title,"災難") then
classname = "災難"
Elseif InStr(title,"倫理") then
classname = "倫理"
Elseif InStr(title,"動漫") or InStr(title,"動畫") then
classname = "動漫"
Elseif InStr(title,"國語") or InStr(title,"集") then
classname = "其他影視"
Else
classname = "其他"
End if
End IF

intro = GetContent(cpage,"tr>td width=770 bgcolor=#FFFFFF>div style=""margin:10px;line-height:150%"">","/div>",0)
intro = Replace(intro,"br />","[br]")
intro = Replace(intro,"BR />","[br]")
intro = Replace(intro,"BR>","[br]")
intro = Replace(intro,"br>","[br]")
intro = Replace(intro,"p>","[p]")
intro = Replace(intro,"P>","[p]")
intro = Replace(intro,"/p>","[/p]")
intro = Replace(intro,"/P>","[p]")
intro = Replace(intro,"img","[img")
intro = Replace(intro,"IMG","[img")
intro = stripHTML(intro)
intro = Replace(intro,"[br]","br>")
intro = Replace(intro,"[p]","p>")
intro = Replace(intro,"[/p]","/p>")
intro = Replace(intro,"[img","img")
intro = Replace(intro,"[img]","img src=")
intro = Replace(intro,"[/img]",">")
intro = Replace(intro,"[IMG]","img src=")
intro = Replace(intro,"[/IMG]",">")
'response.write t
'response.End()

addtime = Trim(GetContent(cpage,"發布時間："," ",0))
if Not IsDate(addtime) then addtime = now()

username = "bt"

filesize = GetContent(content,"BT文件大小："," ",0)

title2 = title

downurl = GetContent(cpage,"a style=""color:red"" href=""","""",0)

p = CDate(addtime)
Dim sRnd
Randomize
sRnd = Int(900 * Rnd) + 100
sFileName = year(p) month(p) day(p) hour(now) minute(now) second(now) sRnd ".torrent"

url = "torrent/" year(p) "-" month(p) "-" day(p) "/" sFileName
Call CreateF(url)

'Text
Response.Write classname "br>"
Response.write title "br>"
'response.Write intro "br>"
'response.Write addtime "br>"
'response.Write username "br>"
'response.Write filesize "br>"
response.Write downurl "br>"
response.Write url "br>"
response.Flush()

'response.End()
'database

if err.number = 0 then
if (Not IsNull(title)) and title > "" and downurl > "" then
set rs = server.CreateObject("adodb.recordset")
sql = "select * from bt_class where classname = '" classname "'"
rs.open sql,conn,1,3
if rs.eof then
rs.addnew
rs("classname") = classname
rs.update
end if
classid = rs("classid")
rs.close
set rs = nothing

set rs = server.CreateObject("adodb.recordset")
sql = "select * from bt_movie where title in ('" title "')"
rs.open sql,conn,1,3
if rs.eof then
response.Write "div>font color=blue>寫入數據庫.../font>/div>"
response.Flush()
rs.addnew
rs("classid") = classid
rs("title") = title
rs("title2") = title2
rs("intro") = intro
rs("username") = username
rs("filesize") = filesize
rs("url") = url
rs("serverid") = 1
rs("addtime") = addtime
rs("ismake") = 0
rs.update

objxhttp.URL = downurl
objxhttp.saveimage url,False
else
response.Write "div>font color=red>已經存在！/font>/div>"
end if
rs.close
set rs = nothing

'objxhttp.URL = downurl
'objxhttp.saveimage url,False
End IF

Else
err.clear
End IF
response.Write "-------------------------------------------br>"
Next
set regEx = nothing

response.Write "下一頁br>"
response.Flush()

Call NextID()

end if

Sub NextID
conn.close
set conn = nothing

if cint(startid) cint(overid) and cint(id) cint(overid) then
response.Write "script>location.href='get81bt.asp?action=getdataclassname=" Request("classname") "cid=" cid "startid=" startid "overid=" overid "id=" id + 1 "'/script>"
Elseif cint(startid) > cint(overid) and cint(id) > cint(overid) then
response.Write "script>location.href='get81bt.asp?action=getdataclassname=" Request("classname") "cid=" cid "startid=" startid "overid=" overid "id=" id - 1 "'/script>"
Else
Response.Write "采集完成！br>"
response.End()
End if
End Sub

%>

/body>
/html>

標簽：重慶成都內江公主嶺臺灣麗江天津懷化

巨人網絡通訊聲明：本文標題《自己做采集程序》，本文關鍵詞自己做,采集,程序,自己做,；如發現本文內容存在版權問題，煩請提供相關信息告之我們，我們將及時溝通與處理。本站內容系統采集于網絡，涉及言論、版權與本站無關。

好湿?好紧?好多水好爽自慰,久久久噜久噜久久综合,成人做爰A片免费看黄冈,机机对机机30分钟无遮挡

自己做采集程序