Python爬虫-Requests模块

3月 8, 2020

安装Requests模块

pip install requests

Requests语法

import requests
# 构造get请求头参数部分
argument = {
    'cate': 'realtimehot'
}
# 向指定的url发送get含有参数的请求
response = requests.get('https://s.weibo.com/top/summary',params=argument)
# 返回状态码
print(response.status_code)
# 返回网站的编码
print(response.encoding)
# 返回网站对象
print(response.text)

小案例-爬取微博热搜

# 导入包
import requests
import re

# 构造url,user-agent,get请求头参数部分
ua = {
    'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36'
}
argument = {
    'cate': 'realtimehot'
}
url = 'https://s.weibo.com/top/summary'

# 向指定的url发送get含有参数的请求
response = requests.get(url,params=argument,headers=ua)

# 返回网站对象
code = response.status_code
res = response.text
# if判断访问状态码是否为200
if code == 200 :
    # 正则匹配
    pat="target=\"_blank\">.*?(.*?)</a>.*?<span>"
    result=re.compile(pat,re.S).findall(res)

    # for循环,重新排版
    for j in range(0,len(result)):
        print(str(j+1)+"·"+result[j])