先声明一下画师通的的网址,这里面存在大量的二次元的图片。总有一款适合你,但小孩子才做选择题,我们全要!!!
进入画师通
爬取效果
爬取代码
import requestsfrom lxml import etreeclass Dmimg:def __init__(self):self.headers = {"User - Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36","Cookie": "UM_distinctid = 1712e065ee94a5 - 0fe79487749dc4 - f313f6d - 144000 - 1712e065eea910;hstud = u2ltte469895f389;auth_tk = MGRiNjZhODQxODE3NGM4ZTllMmFmYzQyODhjZGNhZTZvbzd2bl8yODcwNQ ==;Hm_lvt_a3e2ff554f3229fd90bcfe77f75b9806 = 1585615106, 1585615135;Hm_lpvt_a3e2ff554f3229fd90bcfe77f75b9806 = 1585651165", "If - Modified - Since": "Sun, 29 Mar 05:38: 04GMT","If - None - Match": "AIF7wq3NzjqeN4RpTnJILDgjP8SQ",}self.conut=0def get_url_list(self):url_list =[]url_list.append("/share")for i in range(1000, 10000):url_list.append("/draw/{}".format(i))return url_listdef get_img_url(self,url_list):print(url_list)content = requests.get(url_list,headers =self.headers)img_url = etree.HTML(content.content)url = img_url.xpath('//*[@id="imgTooles"]/div/img/@src')try:for url_img in url :img = requests.get(url_img,headers =self.headers)name = "jpg"if "png" in url_img:name = "png"with open('img/'+str(self.conut)+'.'+name,"wb") as f:print("写入成功")print(img.content)f.write(img.content)self.conut = self.conut + 1except:print("写入失败")def run(self):pass# 1.获取爬取网站的列表url_list = self.get_url_list()# 2.访问网站内容并提取图片链接for url in url_list:img_url = self.get_img_url(url)if __name__ == "__main__":Dm = Dmimg()Dm.run()