VBA抓取带登录和cookie的网页

这里分享两个做爬虫的函数,一个用来拿到cookie,另一个用cookie拿到网页内容。

1.登录后需要记下网站的cookie,这是个获得cookie的小工具。使用它的前提是你已经完成登录过程,比如用WebBrowser控件完成了用户名、密码的登录(参考这里)。参数中url是可以登录的页面地址,errmsg是目标网站可能返回的出错信息,比如“错误”“未登录”之类。返回值是cookie字符串。

Private Declare Function InternetGetCookieEx Lib "wininet.dll" Alias "InternetGetCookieExA" (ByVal url As String, ByVal cookieName As String, ByVal cookieData As String, ByRef size As Long, ByVal flags As Long, ByVal pReserved As Long) As Boolean
Private Const INTERNET_COOKIE_HTTPONLY As Integer = 8192

Function getCookie(url As String, errmsg As String) As String
    Dim cookietmp As String * 8192
    Dim dummy As Variant
    Dim pcchCookiedata As Long
    pcchCookiedata = 8192
    Dim HTTP As Object
    
    On Error GoTo errhandler
    Set HTTP = CreateObject("msxml2.xmlhttp.6.0")
    HTTP.Open "get", url, False
    HTTP.send
    
    If InStr(1, HTTP.responsetext, errmsg) <> 0 Or HTTP.responsetext = "" Then
        getCookie = ""
    Else
        If Not InternetGetCookieEx(url, vbNullString, cookietmp, pcchCookiedata, INTERNET_COOKIE_HTTPONLY, 0) Then
            dummy = InternetGetCookieEx(url, vbNullString, vbNullString, pcchCookiedata, INTERNET_COOKIE_HTTPONLY, 0)
        End If
        getCookie = Mid(cookietmp, 1, pcchCookiedata - 1)
    End If
    Set HTTP = Nothing
    Exit Function
    
errhandler:
    getCookie = ""
    Exit Function
End Function

2.拿到cookie后,就可以用它访问需要登录的网页,用这个函数拿到网页的响应。其中errmsg是目标网页可能给出的出错信息,nRetry是出错后你能接受的最大重试次数。注意,xmlhttp是不支持cookie的,必须用serverxmlhttp。

Function httpRequest(url, cookie, errmsg, nRetry)
    Dim HTTP
    On Error Resume Next
    For j = 0 To nRetry - 1
        Set HTTP = Nothing
        Set HTTP = CreateObject("msxml2.serverxmlhttp.6.0")
        Err.Clear
        HTTP.setTimeouts 0, 5000, 5000, 15000
        HTTP.Open "get", url, True
        HTTP.setRequestHeader "Cookie", cookie
        HTTP.setRequestHeader "Connection", "keep-alive"
        HTTP.send
        If HTTP.waitForResponse(15) = False Then
            HTTP.abort
            temp = ""
        Else
            temp = HTTP.responsetext
        End If
        errFlag = Err.Description <> "" Or Len(temp) = 0 Or InStr(1, temp, errmsg) <> 0
        
        If errFlag = False Then
            Set HTTP = Nothing
            httpRequest = temp
            nRetry = j
            Exit Function
        End If
    Next 'j
    Set HTTP = Nothing
    httpRequest = temp
End Function

以上。

爬的老狼

《VBA抓取带登录和cookie的网页》上有3条评论

发表回复

您的电子邮箱地址不会被公开。 必填项已用*标注