爬虫实战篇(模拟登录)---我们以模拟去哪儿网为例
1 #author: "xian" 2 #date: 2018/5/30 3 import re 4 import requests 5 6 #使用requests库的会话维持用法 7 def start_get_session(): 8 s = requests.session() 9 return s 10 11 def get_base_cookies(s): 12 s.get('https://user.qunar.com/passport/login.jsp') 13 get_image(s) 14 s.get('https://user.qunar.com/passport/addICK.jsp?ssl') 15 response = s.get('https://rmcsdf.qunar.com/js/df.js?org_id=ucenter.login&js_type=0') 16 17 #获取sessionid 18 session_id = re.findall(r'sessionId=(.*?)&',response.text) 19 session_id = session_id[0] #脱壳操作 20 21 #获取fid 22 s.get('https://rmcsdf.qunar.com/api/device/challenge.json?callback=callback_1527735086394&sessionId={}&domain=qunar.com&orgId=ucenter.login'.format(session_id))
23 s.cookies.update({'QN271':session_id})
25
26
27
28 #获取图片
29 def get_image(s):
30 response = s.get('https://user.qunar.com/captcha/api/image?k={en7mni(z&p=ucenter_login&c=ef7d278eca6d25aa6aec7272d57f0a9a&t=1527644979725')
31
32
33 with open('https://www.cnblogs.com/518894-lu/p/img/code.png','wb') as f:
34 f.write(response.content)
35
36
37 #登录函数
38 def login(s,username,password,code):
39 data = {
40 'loginType': 0,
41 'username': username,
42 'password': password,
43 'remember': 1,
44 'vcode': code,
45 }
46
47 url = 'https://user.qunar.com/passport/loginx.jsp'
48 response = s.post(url,data = data)
49 print(response.text)
50 response = s.get('http://user.qunar.com/index/basic')
#模拟登录后爬取该网页
51 print(response.text)
52
53
54
55
#主函数
56 if __name__ == '__main__':
57 session = start_get_session()
58 get_base_cookies(session)
59 username = input('请输入用户名:')
60 password = input('请输入密码:')
61 code = input('请输入验证码:')
62
63 login(session,username,password,code)





