spider.2-爬虫的基础
# 1.创建请求对象(Request()) url = "http://..." # 1.1 添加多个请求头,每次放一个进行访问 # list = [agent1,agent2,agent3,agent4,agent5] # agent = random.choice(list) headers = { "User-Agent": "", # 伪装,反爬虫机制 # 1.1 "User-Agent":agent, "Cookie": "", # Cookie模拟登陆 } # 1.2创建自定义请求对象 req = urllib.request.Request(url, headers=headers) # 2.获取响应对象(urlopen()) res = urllib.request.urlopen(req) # 3.获取内容(read().decode("utf-8") html = res.read().decode("utf-8") # decode() : bytes -> string # encode() : string -> bytes # 2-3.可结合 # html = request.urlopen(req).read().decode("utf-8") print(html)