current position:Home>Python actual combat case, requests module, python implementation, obtaining dynamic charts

Python actual combat case, requests module, python implementation, obtaining dynamic charts

2022-02-02 06:41:53 Dai mubai

Time is not negative , Create constantly , This article is participating in 2021 Year end summary essay contest

Preface

utilize Python Get dynamic chart , I don't say much nonsense ~

Let's start happily ~

development tool

Python edition : 3.6.4

Related modules :

re modular ;

requests modular ;

urllib modular ;

pandas modular ;

As well as some Python Built in modules .

Environment building

install Python And add to environment variable ,pip Install the relevant modules required .

to glance at B standing 2019 year 「 Data visualization 」 Status of sections , The first video super 2 Millions of hits ,4 ten thousand + Bullet curtain of

1.png+

baidu index

Get Baidu Index , First, you need to log in to your baidu account

With the key words 「 Glory of Kings 」 For example , The custom time is 2020-10-01~2020-10-10

Through developer tools , We can see the data interface of the graph

2.jpg

However, a look at the result of the request , There was no data , The reason is that JS encryption

Find a solution , Successful crawling , Code implementation

import time
import json
import execjs
import datetime
import requests
from urllib.parse import urlencode


def get_data(keywords, startDate, endDate, area):
    """  Get encrypted parameter data  """
    # data_url = "http://index.baidu.com/api/SearchApi/index?area=0&word=[[%7B%22name%22:%22%E7%8E%8B%E8%80%85%E8%8D%A3%E8%80%80%22,%22wordType%22:1%7D]]&startDate=2020-10-01&endDate=2020-10-10"
    params = {
        'word': json.dumps([[{'name': keyword, 'wordType': 1}] for keyword in keywords]),
        'startDate': startDate,
        'endDate': endDate,
        'area': area
    }
    data_url = 'http://index.baidu.com/api/SearchApi/index?' + urlencode(params)
    # print(data_url)
    headers = {
        #  Copy the after login cookie
        "Cookie": ' Yours cookie',
        "Referer": "http://index.baidu.com/v2/main/index.html",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36"
    }

    #  obtain data and uniqid
    res = requests.get(url=data_url, headers=headers).json()
    data = res["data"]["userIndexes"][0]["all"]["data"]
    uniqid = res["data"]["uniqid"]

    #  obtain js Parameters in function t = "ev-fxk9T8V1lwAL6,51348+.9270-%"
    t_url = "http://index.baidu.com/Interface/ptbk?uniqid={}".format(uniqid)
    rep = requests.get(url=t_url, headers=headers).json()
    t = rep["data"]
    return {"data": data, "t": t}


def get_search_index(word, startDate, endDate, area):
    """  Get the final data  """
    word = word
    startDate = startDate
    endDate = endDate
    #  call get_data obtain data and uniqid
    res = get_data(word, startDate, endDate, area)
    e = res["data"]
    t = res["t"]

    #  Read js file 
    with open('parsing_data_function.js', encoding='utf-8') as f:
        js = f.read()

    #  adopt compile Turn the order into a js object 
    docjs = execjs.compile(js)

    #  call function Method , Get the exponential value 
    res = docjs.call('decrypt', t, e)
    # print(res)
    return res


def get_date_list(begin_date, end_date):
    """  Get time list  """
    dates = []
    dt = datetime.datetime.strptime(begin_date, "%Y-%m-%d")
    date = begin_date[:]
    while date <= end_date:
        dates.append(date)
        dt += datetime.timedelta(days=1)
        date = dt.strftime("%Y-%m-%d")
    return dates


def get_area():
    areas = {"901": " Shandong ", "902": " guizhou ", "903": " jiangxi ", "904": " Chongqing ", "905": " Inner Mongolia ", "906": " hubei ", "907": " liaoning ", "908": " hunan ", "909": " fujian ", "910": " Shanghai ", "911": " Beijing ", "912": " guangxi ", "913": " guangdong ", "914": " sichuan ", "915": " yunnan ", "916": " jiangsu ", "917": " Zhejiang ", "918": " qinghai ", "919": " ningxia ", "920": " hebei ", "921": " heilongjiang ", "922": " Ji Lin ", "923": " tianjin ", "924": " shaanxi ", "925": " gansu ", "926": " xinjiang ", "927": " Henan ", "928": " anhui ", "929": " shanxi ", "930": " hainan ", "931": " Taiwan ", "932": " Tibet ", "933": " Hong Kong ", "934": " Macau "}
    for value in areas.keys():
        try:
            word = [' Glory of Kings ']
            time.sleep(1)
            startDate = '2020-10-01'
            endDate = '2020-10-10'
            area = value
            res = get_search_index(word, startDate, endDate, area)
            result = res.split(',')
            dates = get_date_list(startDate, endDate)
            for num, date in zip(result, dates):
                print(areas[value], num, date)
                with open('area.csv', 'a+', encoding='utf-8') as f:
                    f.write(areas[value] + ',' + str(num) + ',' + date + '\n')
        except:
            pass


def get_word():
    words = [' Zhuge Dali ', ' Zhang Wei ', ' Hu Yifei ', ' LV ziqiao ', ' Chen Meijia ', ' Zhao Haitang ', ' Curry sauce ', ' Zeng Xiaoxian ', ' Qin Yumo ']
    for word in words:
        try:
            time.sleep(2)
            startDate = '2020-10-01'
            endDate = '2020-10-10'
            area = 0
            res = get_search_index(word, startDate, endDate, area)
            result = res.split(',')
            dates = get_date_list(startDate, endDate)
            for num, date in zip(result, dates):
                print(word, num, date)
                with open('word.csv', 'a+', encoding='utf-8') as f:
                    f.write(word + ',' + str(num) + ',' + date + '\n')
        except:
            pass


get_area()
get_word()
 Copy code 

Got CSV The results are as follows , There are two forms of data

One is Multiple keywords Daily index data , The other is A key word: provinces and cities Daily index data

4.png

With data, you can use Python Make a motion picture

import pandas as pd
import bar_chart_race as bcr

#  Reading data 
# df = pd.read_csv('word.csv', encoding='utf-8', header=None, names=['name', 'number', 'day'])
df = pd.read_csv('area.csv', encoding='utf-8', header=None, names=['name', 'number', 'day'])

#  Data processing , PivotTable 
df_result = pd.pivot_table(df, values='number', index=['day'], columns=['name'], fill_value=0)

#  Generate GIF
# bcr.bar_chart_race(df_result, filename='word.gif', title=' Love apartment 5 The popularity ranking of the cast ')
bcr.bar_chart_race(df_result, filename='area.gif', title=' Ranking of the king's glory popularity in various provinces and cities in China ')
 Copy code 

5 That's ok Python Code , See how it works

11.gif

12.gif

Microblog index

Baidu searches Sina Microblog index , Open the website and have a look , Found that the web version cannot be used

10.png

We just need to open Developer tools , Simulate your browser as a mobile terminal , Just refresh the page

7.png

You can see , The interface of micro index comes out

Add keywords , View the data interface of the index

8.png

The request is Post Method , And you don't need to log in to the microblog account

import re
import time
import json
import requests
import datetime


#  Request header information 
headers = """accept: application/json accept-encoding: gzip, deflate, br accept-language: zh-CN,zh;q=0.9 content-length: 50 content-type: application/x-www-form-urlencoded cookie: ' Yours cookie' origin: https://data.weibo.com referer: https://data.weibo.com/index/newindex?visit_type=trend&wid=1011224685661 sec-fetch-mode: cors sec-fetch-site: same-origin user-agent: Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1 x-requested-with: XMLHttpRequest"""

#  Convert the request header string into a dictionary 
headers = dict([line.split(": ",1) for line in headers.split("\n")])
print(headers)

#  Data interface 
url = 'https://data.weibo.com/index/ajax/newindex/getchartdata'


#  Get time list 
def get_date_list(begin_date, end_date):
    dates = []
    dt = datetime.datetime.strptime(begin_date, "%Y-%m-%d")
    date = begin_date[:]
    while date <= end_date:
        dates.append(date)
        dt += datetime.timedelta(days=1)
        date = dt.strftime("%Y-%m-%d")
    return dates


#  Related information 
names = [' Tang Wei ', ' Ya wen zhu ', ' Deng Jiajia ', ' Qiao Zhenyu ', ' Wang Xueqi ', ' Zhang Yi Xing ', ' Yu Haoming ', ' Wu yue ', ' Liang Guanhua ', ' Li Xinliang ', ' Suke ', ' Sun Xiaoxiao ', ' Zhao hanyingzi ', ' Eric ', ' Wei Wei ']


#  Get micro index data 
for name in names:
    try:
        #  Get keywords ID
        url_id = 'https://data.weibo.com/index/ajax/newindex/searchword'
        data_id = {
            'word': name
        }
        html_id = requests.post(url=url_id, data=data_id, headers=headers)
        pattern = re.compile(r'li wid=\\\"(.*?)\\\" word')
        id = pattern.findall(html_id.text)[0]

        #  Interface parameters 
        data = {
            'wid': id,
            'dateGroup': '1month'
        }
        time.sleep(2)
        #  Request data 
        html = requests.post(url=url, data=data, headers=headers)
        result = json.loads(html.text)
        #  Processing data 
        if result['data']:
            values = result['data'][0]['trend']['s']
            startDate = '2019-01-01'
            endDate = '2020-01-01'
            dates = result['data'][0]['trend']['x']
            #  Save the data 
            for value, date in zip(values, dates):
                print(name, value, date)
                with open('weibo.csv', 'a+', encoding='utf-8') as f:
                    f.write(name + ',' + str(value) + ',' + date + '\n')
    except:
        pass

 Copy code 

Information obtained

9.png

Also to generate a dynamic chart

import pandas as pd
import bar_chart_race as bcr

#  Reading data 
df = pd.read_csv('weibo.csv', encoding='utf-8', header=None, names=['name', 'number', 'day'])

#  Data processing , PivotTable 
df_result = pd.pivot_table(df, values='number', index=['day'], columns=['name'], fill_value=0)
# print(df_result[:10])

#  Generate GIF
bcr.bar_chart_race(df_result[:10], filename='weibo.gif', title=' Daming Fenghua's popularity ranking of performers ')
 Copy code 

Result display

13.gif

copyright notice
author[Dai mubai],Please bring the original link to reprint, thank you.
https://en.pythonmana.com/2022/02/202202020641523586.html

Random recommended