from lxml import etree
from lxml import html
import requests
headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.63 Safari/537.36'}
page = requests.get("https://www.baidu.com",headers=headers)
html = page.text
#print(source1)
# 從字符串解析
element = etree.HTML(html)
# 元素列表,獲取的方式列出了如下兩種
# ps = element.xpath('//*[@id="hotsearch-content-wrapper"]/li/a/span[2]')
ps = element.xpath('//*[@class="title-content-title"]')
#熱搜文本內(nèi)容
text = []
if len(ps) > 0:
for p in ps:
#輸出節(jié)點(diǎn)的文本
text1 = p.text
text.append(text1)
else:
print("空")
x = element.xpath('//*[@class="s-hotsearch-content"]/li')
#熱搜文本對(duì)應(yīng)的排名
index = []
for x1 in x:
#獲取節(jié)點(diǎn)的屬性
index1 = x1.get("data-index")
index.append(index1)
print(text)
print(index)
#定義一個(gè)對(duì)文本和排名進(jìn)行匹配的函數(shù),返回一個(gè)字典型數(shù)據(jù)
def PP(index_array,text_array):
x = {}
i = 0
for index_a in index_array:
#index_a = int(index_a)
x[index_a] = text_array[i]
i = i + 1
return x
re_text = PP(index,text)
#對(duì)字典性數(shù)據(jù)按key進(jìn)行排序,即key=lambda re:re[0],排序完成后再轉(zhuǎn)換為字典型數(shù)據(jù)
last_text = dict(sorted(re_text.items(),key=lambda re:re[0]))
到此這篇關(guān)于Python獲取百度熱搜的完整代碼的文章就介紹到這了,更多相關(guān)Python 百度熱搜內(nèi)容請(qǐng)搜索腳本之家以前的文章或繼續(xù)瀏覽下面的相關(guān)文章希望大家以后多多支持腳本之家!