python爬虫用什么软件写_【自己写了个软件】云顶之弈工具(python爬虫)

[Asm] 纯文本查看 复制代码from selenium import webdriver

from selenium.webdriver.support import expected_conditions as EC

from selenium.webdriver.common.by import By

import time

import json

def getBaseItem(driver):

driver.get(‘https://101.qq.com/tft/index.shtmlTAG=cooperation.glzx.tft&type=items’)

time.sleep(2)

print(“基础装备爬取——–“)

div =driver.find_elements(“css selector”, “#base_equip_list .item”)

div[0].click()#定位

time.sleep(1)

minitemlen = len(div)

baseItemList = []

for i in range(0,minitemlen):

div[i].click()

time.sleep(1)

baseItem = {

“name”:driver.find_elements(“css selector”, “#item_tab3 > div > div.g-items-right > div.items-right > div > div.item-active > p > span:nth-child(1)”)[0].text,

“attr”:driver.find_elements(“css selector”, “#item_tab3 > div > div.g-items-right > div.items-right > div > div.item-active > p > span:nth-child(2)”)[0].text,

“id”:driver.find_elements(“css selector”, “#item_tab3 > div > div.g-items-right > div.items-right > div > div.item-active > div”)[0].get_attribute(“data-id”),

“img”:driver.find_elements(“css selector”, “#item_tab3 > div > div.g-items-right > div.items-right > div > div.item-active > div > img”)[0].get_attribute(“src”),

}

baseItemList.append(baseItem)

jsonstr = json.dumps(baseItemList,ensure_ascii=False)

print(“爬取结果:——–“)

print(jsonstr)

try:

f =open(“./source/json/baseItem.json”,’r’)

f.close()

except IOError:

f = open(“./source/json/baseItem.json”,’w’)

print(“新建了文件baseItem.json”)

f = open(“./source/json/baseItem.json”,’w+’,encoding=’utf-8′)

f.write(jsonstr)

f.close()

print(“基础装备信息保存成功”)

def getItem(driver):

driver.get(‘https://101.qq.com/tft/index.shtmlTAG=cooperation.glzx.tft&type=items’)

time.sleep(2)

print(“合成装备爬取——–“)

div =driver.find_elements(“css selector”, “#base_form_list > .item”)#获取合成装备集合html

div[0].click()#定位

time.sleep(1)

minitemlen = len(div)#获取集合长度

ItemList = []

for i in range(0,minitemlen):

div[i].click()#模拟点击

time.sleep(1)

Item = {

“name”:driver.find_elements(“css selector”, “#item_tab3 > div > div.g-items-right > div.items-right > div > div.item-active > p > span:nth-child(1)”)[0].text,

“attr”:driver.find_elements(“css selector”, “#item_tab3 > div > div.g-items-right > div.items-right > div > div.item-active > p > span:nth-child(2)”)[0].text,

“id”:driver.find_elements(“css selector”, “#item_tab3 > div > div.g-items-right > div.items-right > div > div.item-active > div”)[0].get_attribute(“data-id”),

“img”:driver.find_elements(“css selector”, “#item_tab3 > div > div.g-items-right > div.items-right > div > div.item-active > div > img”)[0].get_attribute(“src”),

}

ItemList.append(Item)

jsonstr = json.dumps(ItemList,ensure_ascii=False)#转json

print(“爬取结果:——–“)

print(jsonstr)

#写入保存

try:

f =open(“./source/json/Item.json”,’r’)

f.close()

except IOError:

f = open(“./source/json/Item.json”,’w’)

print(“新建了文件Item.json”)

f = open(“./source/json/Item.json”,’w+’,encoding=’utf-8′)

f.write(jsonstr)

f.close()

print(“合成装备信息保存成功”)

def getItem_Link(driver):

driver.get(‘https://101.qq.com/tft/index.shtmlTAG=cooperation.glzx.tft&type=items’)

time.sleep(2)

print(“合成关系爬取——–“)

div =driver.find_elements(“css selector”, “#base_form_list > .item”)#获取合成装备集合html

div[0].click()#定位

time.sleep(1)

minitemlen = len(div)#获取集合长度

LinkList = []

for i in range(0,minitemlen):

div[i].click()#模拟点击

time.sleep(1)

link = [

driver.find_elements(“css selector”, “#item_tab3 > div > div.g-items-right > div.items-right > div > div.table-combination > div.combination-list.scroll-list > div > div.formula > div:nth-child(1)”)[0].get_attribute(“data-id”),

driver.find_elements(“css selector”, “#item_tab3 > div > div.g-items-right > div.items-right > div > div.table-combination > div.combination-list.scroll-list > div > div.formula > div:nth-child(2)”)[0].get_attribute(“data-id”),

driver.find_elements(“css selector”, “#item_tab3 > div > div.g-items-right > div.items-right > div > div.table-combination > div.combination-list.scroll-list > div > div.compound > div”)[0].get_attribute(“data-id”),

]

LinkList.append(link)

jsonstr = json.dumps(LinkList,ensure_ascii=False)#转json

print(“爬取结果:——–“)

print(jsonstr)

#写入保存

try:

f =open(“./source/json/ItemLink.json”,’r’)

f.close()

except IOError:

f = open(“./source/json/ItemLink.json”,’w’)

print(“新建了文件ItemLink.json”)

f = open(“./source/json/ItemLink.json”,’w+’,encoding=’utf-8′)

f.write(jsonstr)

f.close()

print(“装备合成关系信息保存成功”)

#爬取特质

def getRace(driver):

driver.get(‘https://101.qq.com/tft/index.shtmlTAG=cooperation.glzx.tft&type=database2’)

time.sleep(2)

print(“特质爬取——–“)

ul =driver.find_elements(“css selector”, “#raceDatabaseList > ul > li”)#获取特质集合

List = []

for i in range(0,len(ul)):

name = ul[i].find_elements(“css selector”,”div.trait-1 > div div.name”)[0].text

img = ul[i].find_elements(“css selector”,”div.trait-1 > div > div.pic > img”)[0].get_attribute(“src”)

desc = ul[i].find_elements(“css selector”,”div.trait-2 > div.desc”)[0].text

Raceid = str(img).split(“.”)[-2].split(“/”)[-1]

effectsList = []

for j in range(0,len(ul[i].find_elements(“css selector”,”div.trait-2 > div.effects span”))):

num = ul[i].find_elements(“css selector”,”div.trait-2 > div.effects span”)[j].find_elements(“css selector”,”b”)[0].text

effect = ul[i].find_elements(“css selector”,”div.trait-2 > div.effects span”)[j].text

effect = effect[len(num):]

effectitem = {“num”:num,”effect”:effect}

effectsList.append(effectitem)

heroList = []

for j in range(0,len(ul[i].find_elements(“css selector”,”div.trait-3 > div.champion-pic”))):

id = ul[i].find_elements(“css selector”,”div.trait-3 > div.champion-pic”)[j].find_elements(“css selector”,”img”)[0].get_attribute(“data-id”)

heroList.append(id)

item = {“id”:Raceid,”name”:name,”img”:img,”desc”:desc,”effectsList”:effectsList,”heroList”:heroList}

List.append(item)

jsonstr = json.dumps(List,ensure_ascii=False)#转json

print(“爬取结果:——–“)

print(jsonstr)

#写入保存

try:

f =open(“./source/json/Race.json”,’r’)

f.close()

except IOError:

f = open(“./source/json/Race.json”,’w’)

print(“新建了文件Race.json”)

f = open(“./source/json/Race.json”,’w+’,encoding=’utf-8′)

f.write(jsonstr)

f.close()

print(“特质信息保存成功”)

#爬取职业

def getJob(driver):

driver.get(‘https://101.qq.com/tft/index.shtmlTAG=cooperation.glzx.tft&type=database3’)

time.sleep(2)

print(“职业爬取——–“)

ul =driver.find_elements(“css selector”, “#jobDatabaseList > ul > li”)#获取特质集合

List = []

for i in range(0,len(ul)):

name = ul[i].find_elements(“css selector”,”div.trait-1 > div div.name”)[0].text

img = ul[i].find_elements(“css selector”,”div.trait-1 > div > div.pic > img”)[0].get_attribute(“src”)

Jobid = str(img).split(“.”)[-2].split(“/”)[-1]

desc = ul[i].find_elements(“css selector”,”div.trait-2 > div.desc”)[0].text

effectsList = []

for j in range(0,len(ul[i].find_elements(“css selector”,”div.trait-2 > div.effects span”))):

num = ul[i].find_elements(“css selector”,”div.trait-2 > div.effects span”)[j].find_elements(“css selector”,”b”)[0].text

effect = ul[i].find_elements(“css selector”,”div.trait-2 > div.effects span”)[j].text

effect = effect[len(num):]

effectitem = {“num”:num,”effect”:effect}

effectsList.append(effectitem)

heroList = []

for j in range(0,len(ul[i].find_elements(“css selector”,”div.trait-3 > div.champion-pic”))):

id = ul[i].find_elements(“css selector”,”div.trait-3 > div.champion-pic”)[j].find_elements(“css selector”,”img”)[0].get_attribute(“data-id”)

heroList.append(id)

item = {“id”:Jobid,”name”:name,”img”:img,”desc”:desc,”effectsList”:effectsList,”heroList”:heroList}

List.append(item)

jsonstr = json.dumps(List,ensure_ascii=False)#转json

print(“爬取结果:——–“)

print(jsonstr)

#写入保存

try:

f =open(“./source/json/Job.json”,’r’)

f.close()

except IOError:

f = open(“./source/json/Job.json”,’w’)

print(“新建了文件Job.json”)

f = open(“./source/json/Job.json”,’w+’,encoding=’utf-8′)

f.write(jsonstr)

f.close()

print(“职业信息保存成功”)

#爬取英雄

def getHero(driver):

driver.get(‘https://101.qq.com/tft/index.shtmlTAG=cooperation.glzx.tft&type=database0’)

time.sleep(2)

print(“英雄爬取——–“)

ul =driver.find_elements(“css selector”, “#heroOverallDatabaseList > li”)#获取特质集合

List = []

for i in range(0,len(ul)):

name = ul[i].find_elements(“css selector”,”div.hero-info > div.name”)[0].text

id = ul[i].find_elements(“css selector”,”div.hero-info > div.champion-pic > img”)[0].get_attribute(“data-id”)

img = ul[i].find_elements(“css selector”,”div.hero-info > div.champion-pic > img”)[0].get_attribute(“src”)

price = ul[i].find_elements(“css selector”,”div.hero-price > span”)[0].text

rece = ul[i].find_elements(“css selector”,”div:nth-child(3) > div > div.name”)[0].text

#爬取英雄详细资料

other = getHeroDetails(driver,ul[i].find_elements(“css selector”,”div.hero-info > div.champion-pic”)[0])

JobList = []

#heroOverallDatabaseList > li:nth-child(3) > div:nth-child(4) > div:nth-child(1)

#heroOverallDatabaseList > li:nth-child(3) > div:nth-child(4) > div:nth-child(1) > div.name

for j in range(0,len(ul[i].find_elements(“css selector”,”div:nth-child(4) > div”))):

job = ul[i].find_elements(“css selector”,”div:nth-child(4) > div”)[j].find_elements(“css selector”,”div.name”)[0].text

JobList.append(job)

item = {

“id”:id,”name”:name,”img”:img,”price”:price,”rece”:rece,”JobList”:JobList,

“skillname”:other[‘skillname’],

“skillimg”:other[‘skillimg’],

“skilldesc”:other[‘skilldesc’],

“itemList”:other[‘itemList’],

“attrList”:other[‘attrList’],

}

List.append(item)

jsonstr = json.dumps(List,ensure_ascii=False)#转json

print(“爬取结果:——–“)

print(jsonstr)

#写入保存

try:

f =open(“./source/json/Hero.json”,’r’)

f.close()

except IOError:

f = open(“./source/json/Hero.json”,’w’)

print(“新建了文件Hero.json”)

f = open(“./source/json/Hero.json”,’w+’,encoding=’utf-8′)

f.write(jsonstr)

f.close()

print(“英雄信息保存成功”)

#爬取英雄个人信息

def getHeroDetails(driver,div):

webdriver.ActionChains(driver).move_to_element(div)

div.click()

HeroDiv = driver.find_elements(“css selector”, “#pop1”)[0]

skillname = HeroDiv.find_elements(“css selector”, “div.center > div.skill > div > p.name > span:nth-child(1)”)[0].text

skillimg = HeroDiv.find_elements(“css selector”, “div.center > div.skill > div > img”)[0].get_attribute(“src”)

skilldesc = HeroDiv.find_elements(“css selector”, “div.center > div.skill > div > p.description”)[0].text

itemList = []

itemDiv = HeroDiv.find_elements(“css selector”, “div.left > div.recommend > div > img”)

for i in range(0,len(itemDiv)):

imgsrc = itemDiv[i].get_attribute(“src”)

itemList.append(str(imgsrc).split(“.”)[-2].split(“/”)[-1])

attrList = []

attrDiv = HeroDiv.find_elements(“css selector”, “div.left > div.attribute > ul li”)

for i in range(0,len(attrDiv)):

attrList.append(attrDiv[i].text)

closeBtn = driver.find_elements(“css selector”, “#pop1 > a”)[0]

driver.execute_script(“arguments[0].click();”, closeBtn)

#closeBtn.click()

item = {

“skillname”:skillname,

“skillimg”:skillimg,

“skilldesc”:skilldesc,

“itemList”:itemList,

“attrList”:attrList,

}

return item

#爬取阵容

def getSynergies(driver):

driver.get(‘https://101.qq.com/tft/index.shtmlTAG=cooperation.glzx.tft&type=synergies’)

time.sleep(2)

while True:#跳到最下面以求获取全部阵容

driver.execute_script(“window.scrollTo(0,document.body.scrollHeight)”)

height1 = driver.execute_script(“return document.body.scrollHeight;”)

time.sleep(2)

height2 = driver.execute_script(“return document.body.scrollHeight;”)

if height1 == height2:

break

print(“阵容爬取——–“)

div =driver.find_elements(“css selector”, “#recommendSynergies > div.group-box > div > div”)

resultList = []

for i in range(0,len(div)):

print(“进度”+str(i)+”/”+str(len(div)))

name = div[i].find_elements(“css selector”, “div.tier.tier1 > div.info > p”)[0].text

#获取完整阵容(英雄,装备,站位)

championsList = []

championsDiv = div[i].find_elements(“css selector”, “div.tier.tier1 > div.champions > div”)

for j in range(0,len(championsDiv)):

#获取完整阵容的英雄id

cid = championsDiv[j].find_elements(“css selector”, “div.champion-pic > img”)[0].get_attribute(“data-id”)

#获取英雄装备集合

ItemDiv = championsDiv[j].find_elements(“css selector”, “div.cost-skills > img”)

itemList = []

for k in range(0,len(ItemDiv)):

itemid = ItemDiv[k].get_attribute(“data-id”)

itemList.append(itemid)

#获取英雄站位

stanceList = []

stanceDiv = div[i].find_elements(“css selector”, “div.tier.tier3 > div.stance > img”)

for k in range(0,len(stanceDiv)):

if cid == stanceDiv[k].get_attribute(“data-id”):

classname = stanceDiv[k].get_attribute(“class”)

stanceList =str(classname).split(“position”)[1].split(“-“)

item = {“id”:cid,”itemList”:itemList,”stanceList”:stanceList}

championsList.append(item)

#获取前期阵容

earlierDiv = div[i].find_elements(“css selector”, “div.tier.tier2 > div.earlier > div”)

earlierList = []

for j in range(0,len(earlierDiv)):

#获取完整阵容的英雄id

eid = earlierDiv[j].find_elements(“css selector”, “div.champion-pic > img”)[0].get_attribute(“data-id”)

earlierList.append(eid)

#获取中期阵容

interimDiv = div[i].find_elements(“css selector”, “div.tier.tier2 > div.interim > div”)

interimList = []

for j in range(0,len(interimDiv)):

#获取中期阵容的英雄id

iid = interimDiv[j].find_elements(“css selector”, “div.champion-pic > img”)[0].get_attribute(“data-id”)

interimList.append(iid)

traitsDiv = div[i].find_elements(“css selector”, “div.tier.tier3 > div.traits > div”)

traitsList = []

for j in range(0,len(traitsDiv)):

#获取中期阵容的英雄id

trait = traitsDiv[j].find_elements(“css selector”, “span”)[0].text

traitsList.append(trait)

explainDiv = div[i].find_elements(“css selector”, “div.tier.tier2 > div.explain”)[0]

# webdriver.ActionChains(driver).move_to_element(explainDiv)

driver.execute_script(“arguments[0].click();”, explainDiv)

explainList = []

explainitemDiv = driver.find_elements(“css selector”, “body > div.explain-box.show > div”)

for j in range(0,len(explainitemDiv)):

title = explainitemDiv[j].find_element(“css selector”,”p.title”).text

description = explainitemDiv[j].find_element(“css selector”,”p.description”).text

explain ={title:description}

explainList.append(explain)

synergy ={“name”:name,”championsList”:championsList,”earlierList”:earlierList,”interimList”:interimList,”traitsList”:traitsList,”explainList”:explainList}

resultList.append(synergy)

jsonstr = json.dumps(resultList,ensure_ascii=False)#转json

print(“爬取结果:——–“)

print(jsonstr)

#写入保存

try:

f =open(“./source/json/synergy.json”,’r’)

f.close()

except IOError:

f = open(“./source/json/synergy.json”,’w’)

print(“新建了文件synergy.json”)

f = open(“./source/json/synergy.json”,’w+’,encoding=’utf-8′)

f.write(jsonstr)

f.close()

print(“阵容信息保存成功”)

if __name__ == ‘__main__’:

driver = webdriver.Chrome(executable_path=”C:FTF_Toolchromedriver.exe”)

# getBaseItem(driver)

# getItem(driver)

getItem_Link(driver)

# getRace(driver)

# getJob(driver)

# getHero(driver)

# getSynergies(driver)

driver.quit()

文章知识点与官方知识档案匹配,可进一步学习相关知识Python入门技能树 络爬虫urllib211389 人正在系统学习中 相关资源:开源的爬虫软件Heritrix3.1.0_开源爬虫-Java工具类资源-CSDN文库

声明:本站部分文章及图片源自用户投稿,如本站任何资料有侵权请您尽早请联系jinwei@zod.com.cn进行处理,非常感谢!

上一篇 2020年10月19日
下一篇 2020年10月19日

相关推荐