爬取验证码实现带cookie登陆只需要sessionid即可吗?
我使用request-promise爬取验证码url,获取cookie后带cookie进行post登陆验证,结果失败。经过调试发现cookie保存正常,并且与验证码对应准确。问题出现在带cookie进行post时显示验证码错误。可否有火眼金睛的大神能帮忙看看?感激不尽! get验证码url并保存cookie的云函数1: const cloud = require('wx-server-sdk')
const rp = require('request-promise')
const tough = require('tough-cookie')
const fs = require('fs')
cloud.init({
env: 'test'
})
//初始化数据库
const db = cloud.database()
const dbcookies = db.collection('cookies')
const _ = db.command
// 云函数入口函数
exports.main = async (event, context) => {
var options = {
method: 'get',
uri: 'http://zhjw.scu.edu.cn/img/captcha.jpg',
qs: {},//参数
headers: {
'Connection': 'keep-alive',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36(KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'
},//请求头
json: true, //是否json数据
resolveWithFullResponse: true, //是否获取完整响应
encoding:null
}
//获取响应
try {
var res = await rp(options)
.then((full) => {
console.log(full)
return full
})
.catch((err) => {
return { errmsg: "rp函数获取失败" }
})
//获取cookie
myCookie = res.headers['set-cookie'][0]
//保存到数据库,留作云函数jwc_login()登陆取用
//const cookieToStr = JSON.stringify(cookies[0])
dbcookies.where({
_id: "3f8c212f5ea3adf5002f2c3f7c1f8795"
}).update({
data: {
'cookie': myCookie
}
})
.then()
//解析验证码为base64
var buffer = res.body
var base64Img = buffer.toString('base64')
//console.log(base64Img)
var decodeImg = new Buffer(base64Img, 'base64') // new Buffer(string, encoding)
if (Buffer.compare(buffer, decodeImg))
return {
errmsg: "imgError"
}
else return {
base64Img: base64Img
}
} catch (err) {
console.log(err)
}
}
这里结束之后数据库中成功保存JSESSIONID,格式为字符串:"JSESSIONID=cabk4vp147b4K5nl76Zhx; path=/" 返回的base64Img用img组件直接显示 下面是登陆函数: // 云函数入口文件
const cloud = require('wx-server-sdk')
const rp = require('request-promise')
const tough = require('tough-cookie')
cloud.init({
env: 'test'
})
//初始化数据库
const db = cloud.database()
const dbcookies = db.collection('cookies')
const _ = db.command
// 云函数入口函数
exports.main = async (event, context) => {
const wxContext = cloud.getWXContext()
const account = event.account
const pwd = event.pwd
const cap = event.cap
//从数据库拿到cookie
var cookies = await dbcookies.where({
_id: "3f8c212f5ea3adf5002f2c3f7c1f8795"
}).get()
.then(res => {
// res.data 包含该记录的数据
return res.data[0].cookie
})
let cookie_0 = rp.cookie('' + cookies)
let cookiejar = rp.jar()
cookiejar.setCookie(cookie_0, 'http://zhjw.scu.edu.cn/j_spring_security_check')
console.log(cookiejar)
//设置请求部分
var options = {
method: 'post',
uri: 'http://zhjw.scu.edu.cn/j_spring_security_check',
headers:{
// 'Connection':'Keep-Alive',
// 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'
},
form: {
j_username: account,
j_password: pwd,
j_captcha: cap,
},
resolveWithFullResponse: true,
jar: cookiejar // Tells rp to include cookies in jar that match uri
}
console.log(options)
try {
var r = await rp(options)
.then((full) => {
console.log(full)
})
.catch((err) => {
return { errmsg: "rp函数获取失败" }
})
} catch (err) {
console.log(err)
}
return {
event,
openid: wxContext.OPENID,
appid: wxContext.APPID,
unionid: wxContext.UNIONID,
}
}
结果如图所示 cookiejar: [图片] 响应头: [图片] 响应体显示验证码错误 本人也是头一次使用云函数爬虫,还希望多多指教~同时也希望其中的一部分代码可以帮助到一部分人~