# 1.创建请求对象(Request())
url = "http://..."
# 1.1 添加多个请求头,每次放一个进行访问
# list = [agent1,agent2,agent3,agent4,agent5]
# agent = random.choice(list)
headers = {
"User-Agent": "", # 伪装,反爬虫机制 # 1.1 "User-Agent":agent,
"Cookie": "", # Cookie模拟登陆
}
# 1.2创建自定义请求对象
req = urllib.request.Request(url, headers=headers)
# 2.获取响应对象(urlopen())
res = urllib.request.urlopen(req)
# 3.获取内容(read().decode("utf-8")
html = res.read().decode("utf-8")
# decode() : bytes -> string
# encode() : string -> bytes
# 2-3.可结合
# html = request.urlopen(req).read().decode("utf-8")
print(html)
hmoban主题是根据ripro二开的主题,极致后台体验,无插件,集成会员系统
自学咖网 »
spider.2-爬虫的基础