«
用ASP编写下载网页中所有资源的程序

时间:2008-5-31    作者:Deri    分类: 分享


   <p>  看过一篇关于下载网页中图片的文章,它只能下载以http头的图片,我做了些改进,可以下载网页中的所有连接资源,并按照网页中的目录结构建立本地目录,存放资源。</p><p>  download.asp?url=你要下载的网页</p><p>  download.asp代码如下:</p><p>  <code><%<br />Server.ScriptTimeout=9999<br />function SaveToFile(from,tofile)<br />on error resume next<br />dim geturl,objStream,imgs<br />geturl=trim(from)<br />Mybyval=getHTTPstr(geturl)<br />Set objStream = Server.CreateObject("ADODB.Stream")<br />objStream.Type =1<br />objStream.Open<br />objstream.write Mybyval<br />objstream.SaveToFile tofile,2<br />objstream.Close()<br />set objstream=nothing<br />if err.number<>0 then err.Clear<br />end function<br />function geturlencodel(byval url)'中文文件名转换<br />Dim i,code<br />geturlencodel=""<br />if trim(Url)="" then exit function<br />for i=1 to len(Url)<br />code=Asc(mid(Url,i,1))<br />if code<0 Then code = code + 65536<br />If code>255 Then<br />geturlencodel=geturlencodel&"%"&Left(Hex(Code),2)&"%"&Right(Hex(Code),2)<br />else<br />geturlencodel=geturlencodel&mid(Url,i,1)<br />end if<br />next<br />end function<br />function getHTTPPage(url)<br />on error resume next<br />dim http<br />set http=Server.createobject("Msxml2.XMLHTTP")<br />Http.open "GET",url,false<br />Http.send()<br />if Http.readystate<>4 then exit function<br />getHTTPPage=bytes2BSTR(Http.responseBody)<br />set http=nothing<br />if err.number<>0 then err.Clear<br />end function<br />Function bytes2BSTR(vIn)<br />dim strReturn<br />dim i,ThisCharCode,NextCharCode<br />strReturn = ""<br />For i = 1 To LenB(vIn)<br />ThisCharCode = AscB(MidB(vIn,i,1))<br />If ThisCharCode < &H80 Then<br />strReturn = strReturn & Chr(ThisCharCode)<br />Else<br />NextCharCode = AscB(MidB(vIn,i+1,1))<br />strReturn = strReturn & Chr(CLng(ThisCharCode) * &H100 + CInt(NextCharCode))<br />i = i + 1<br />End If<br />Next<br />bytes2BSTR = strReturn<br />End Function<br />function getFileName(byval filename)<br />if instr(filename,"/")>0 then<br />fileExt_a=split(filename,"/")<br />getFileName=lcase(fileExt_a(ubound(fileExt_a)))<br />if instr(getFileName,"?")>0 then<br />getFileName=left(getFileName,instr(getFileName,"?")-1)<br />end if<br />else<br />getFileName=filename<br />end if<br />end function<br />function getHTTPstr(url)<br />on error resume next<br />dim http<br />set http=server.createobject("MSXML2.XMLHTTP")<br />Http.open "GET",url,false<br />Http.send()<br />if Http.readystate<>4 then exit function<br />getHTTPstr=Http.responseBody<br />set http=nothing<br />if err.number<>0 then err.Clear<br />end function<br />Function CreateDIR(ByVal LocalPath) '建立目录的程序,如果有多级目录,则一级一级的创建<br /> On Error Resume Next<br /> LocalPath = Replace(LocalPath, "\\", "/")<br /> Set FileObject = server.CreateObject("Scripting.FileSystemObject")<br /> patharr = Split(LocalPath, "/")<br /> path_level = UBound(patharr)<br /> For I = 0 To path_level<br />  If I = 0 Then pathtmp = patharr(0) & "/" Else pathtmp = pathtmp & patharr(I) & "/"<br />   cpath = Left(pathtmp, Len(pathtmp) - 1)<br />  If Not FileObject.FolderExists(cpath) Then FileObject.CreateFolder cpath<br /> Next<br /> Set FileObject = Nothing<br /> If Err.Number <> 0 Then<br />  CreateDIR = False<br />  Err.Clear<br /> Else<br />  CreateDIR = True<br /> End If<br />End Function<br />function GetfileExt(byval filename)<br /> fileExt_a=split(filename,".")<br /> GetfileExt=lcase(fileExt_a(ubound(fileExt_a)))<br />end function<br />function getvirtual(str,path,urlhead)<br /> if left(str,7)="http://" then<br />  url=str<br /> elseif left(str,1)="/" then<br />  start=instrRev(str,"/")<br />  if start=1 then<br />   url="/"<br />  else<br />   url=left(str,start)<br />  end if<br />  url=urlhead&url<br />  elseif left(str,3)="../" then<br />  str1=mid(str,inStrRev(str,"../")+2)<br />  ar=split(str,"../")<br />  lv=ubound(ar)+1<br />  ar=split(path,"/")<br />  url="/"<br />  for i=1 to (ubound(ar)-lv)<br />   url=url&ar(i)<br />  next<br />  url=url&str1<br />  url=urlhead&url<br /> else<br />  url=urlhead&str<br /> end if<br /> getvirtual=url<br />end function<br />'示例代码<br />dim dlpath<br />virtual="/downweb/"<br />truepath=server.MapPath(virtual)<br />if request("url")<> "" then<br /> url=request("url")<br /> fn=getFileName(url)<br /> urlhead=left(url,(instr(replace(url,"//",""),"/")+1))<br /> urlpath=replace(left(url,instrRev(url,"/")),urlhead,"")<br /> strContent = getHTTPPage(url)<br /> mystr=strContent<br /> Set objRegExp = New Regexp<br /> objRegExp.IgnoreCase = True<br /> objRegExp.Global = True<br /> objRegExp.Pattern = "(src|href)=.[^\\>]+? "<br /> Set Matches =objRegExp.Execute(strContent)<br /> For Each Match in Matches<br />  str=Match.Value<br />  str=replace(str,"src=","")<br />  str=replace(str,"href=","")<br />  str=replace(str,"""","")<br /> str=replace(str,"'","")<br />filename=GetfileName(str)<br />  getRet=getVirtual(str,urlpath,urlhead)<br />  temp=Replace(getRet,"//","**")<br />  start=instr(temp,"/")<br />  endt=instrRev(temp,"/")-start+1<br />  if start>0 then<br />   repl=virtual&mid(temp,start)&" "<br />   'response.Write repl&"<br>"<br />   mystr=Replace(mystr,str,repl)<br />  dir=mid(temp,start,endt)<br />  temp=truepath&Replace(dir,"/","\\")<br />  CreateDir(temp)<br />  'response.Write getRet&"||"&temp&filename&"<br><br>"<br />  SaveToFile getRet,temp&filename<br /> end if<br />Next<br />set Matches=nothing<br />end if<br />%></code></p>