Python crawler crawls the Douban movie ranking list and writes it into CSV file for visual data analysis

2022-05-15 02:35:07Emilyzhai

#1、 Crawling content , Write in csv file 
import requests
import re
import csv
# Douban movie list , Write in csv file 
url = ""#start Determine the starting position of each time , Every time 25 individual 

headers = {
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36"
res = requests.get(url,headers=headers)
#.*? Indicates lazy matching , Add parentheses to indicate the content to be matched ,?P<name> Indicates that the matching content is obtained according to this tag , Use the following group Time use 
# Regular precompiling 
obj = re.compile(r'<li>.*?<span class="title">(?P<name>.*?)</span>.*?<br>(?P<year>.*?)&nbsp;'
                 r'.*?<span class="rating_num" property="v:average">(?P<score>.*?)</span>'
                 r'.*?<span>(?P<coment>.*?) People comment on </span>',re.S)

result = obj.finditer(page_content) # The generated content is an iterator 
f = open("data.csv","a+",encoding="utf-8")
csvwriter = csv.writer(f)

for it in result:
    # print('name')) # Corresponding ?P<name>, It can be used name Mark to get the matching content 
    # print('year').strip())# Process the space before the year 
    # print('score'))
    # print('coment'))
    dic = it.groupdict() # Put the data into the dictionary 
    dic['year'] = dic['year'].strip()
#2、 visualization , Film and rating 

import csv
from matplotlib import pyplot
pyplot.rcParams['font.sans-serif'] = ['SimHei']  #  Used to display Chinese labels normally 
f = open("data.csv","r",encoding="utf-8")
csvReader = csv.reader(f)
name,score,year = [],[],[]
for row in csvReader:
    header_row = next(csvReader)
pyplot.title(" Movie charts ")
pyplot.xlabel(" The movie ")
pyplot.ylabel(" score ")
pyplot.xticks(rotation=90)# Control the direction of abscissa

