文章目录
相关文章
Python+Flask实现全国、全球疫情大数据可视化(二):网页页面布局+echarts可视化中国地图、世界地图、柱状图和折线图
Python+Flask实现全国、全球疫情大数据可视化(三):ajax读取mysql中的数据并将参数传递至echarts表格中
2021.8.1更新,由于之前使用的百度疫情数据接口,现在接口已经不可用了,所以这里我将数据接口换成了腾讯的。
一、实现效果
最近简单学习了一下flask,决定来做一个疫情大数据的网页出来。
话不多说先上效果图。还是比较喜欢这样的排版的。
二、数据获取地址
数据来源于百度提供的api接口。直接在百度搜索疫情数据。就能看到国内疫情与国外疫情两个内容了。然后进入网页去找接口。那么接口我已经找到了,如下:
腾讯疫情数据页面
#国内疫情数据接口
url='https://api.inews.qq.com/newsqa/v1/query/inner/publish/modules/list?modules=chinaDayList,chinaDayAddList,nowConfirmStatis,provinceCompare'
#国外疫情数据接口
url2='https://api.inews.qq.com/newsqa/v1/automation/modules/list?modules=FAutoforeignList'
我们爬虫的编写思路就是分别爬取国内和国外的数据。
关于国内的数据,从上图可以看到,有四个参数chinaDayList,chinaDayAddList,nowConfirmStatis,provinceCompare
分别是每天汇总的疫情总数据、每天新增的数据、现存确诊统计数据、各省份现存确诊数据。爬虫代码如下
# -*- coding: utf-8 -*-
# @Time :2021/7/30 23:34
# @Author :lzh
# @File : new_spider.py
# @Software: PyCharm
import datetime
import pandas as pd
import requests
from sqlalchemy import create_engine
from translate import COUNTRIES_CH_EN_DICT
def traslate(word):
'''
将世界各国的中文名转化为英文
'''
return COUNTRIES_CH_EN_DICT.get(word, "未知地区")
# %%
def save_data(df, table_name, if_exists="append", need_translate=False):
if need_translate:
df['name'] = df['疫情地区'].apply(traslate)
conn = create_engine('mysql://root:123456@localhost:3306/myspider?charset=utf8')
pd.io.sql.to_sql(df, table_name, con=conn, if_exists=if_exists, index=None)
def crawl_china_data():
url = "https://api.inews.qq.com/newsqa/v1/query/inner/publish/modules/list?modules=chinaDayList,chinaDayAddList,nowConfirmStatis,provinceCompare"
data = requests.get(url)
data = data.json().get("data", {})
provinceCompare = data.get("provinceCompare") # 每个省份的总数据(每日更新)
chinaDayList = data.get("chinaDayList") # 最近一个月的全国疫情的总数据
chinaDayAddList = data.get("chinaDayAddList") # 最近一个月的全国疫情的新增数据
return provinceCompare, chinaDayList, chinaDayAddList
# %%
def crawl_countries_data():
url = "https://api.inews.qq.com/newsqa/v1/automation/modules/list?modules=FAutoforeignList"
data = requests.get(url)
data = data.json().get("data")
foreignList = data.get("FAutoforeignList", [])
return foreignList
def parse_china_daily_data(day_list, day_add_list):
"""
解析每日新增、每日累计数据
:param api_rtn_data:
:return:
"""
day_df = pd.DataFrame(day_list)
day_df["date"] = pd.to_datetime(day_df["y"] + "." + day_df["date"])
day_add_df = pd.DataFrame(day_add_list)
day_add_df["date"] = pd.to_datetime(day_add_df["y"] + "." + day_add_df["date"])
save_data(day_df, "china_day", "append")
save_data(day_add_df, "china_day_add", "append")
def parse_countries_total_data(api_rtn_data):
"""
解析每个省份的新增数据
:param api_rtn_data:
:return:
"""
dates = []
countries = []
dignose = []
heal = []
dead = []
add = []
for country in api_rtn_data:
countries.append(country.get("name", ""))
month, day = country.get("date").split(".")
month = month if "0" not in month else month[-1]
date = datetime.date(int(country.get("y")), int(month), int(day))
dates.append(date)
dignose.append(country.get("nowConfirm", 0))
heal.append(country.get("heal", 0))
dead.append(country.get("dead", 0))
add.append(country.get("confirmAdd", 0))
df = pd.DataFrame({
"疫情地区": countries,
"日期": dates,
"确诊": dignose,
"治愈": heal,
"死亡": dead,
"新增死亡": add
})
save_data(df, "world_epidemic", "replace", True)
return df
def parse_provinces_total_data(api_rtn_data):
dates = []
provinces = []
dignose = []
heal = []
dead = []
add = []
for province, total_data in api_rtn_data.items():
provinces.append(province)
date = datetime.datetime.now()
dates.append(date)
dignose.append(total_data.get("nowConfirm", 0))
heal.append(total_data.get("heal", 0))
dead.append(total_data.get("dead", 0))
add.append(total_data.get("confirmAdd", 0))
df = pd.DataFrame({
"疫情地区": provinces,
"日期": dates,
"确诊": dignose,
"治愈": heal,
"死亡": dead,
"新增死亡": add
})
save_data(df, 'china_total_epidemic')
return df
# %%
def main():
provinceCompare, chinaDayList, chinaDayAddList = crawl_china_data()
parse_china_daily_data(chinaDayList, chinaDayAddList)
parse_provinces_total_data(provinceCompare)
parse_countries_total_data(crawl_countries_data())
if __name__ == '__main__':
main()
print("爬取完成")
注意:外国国家名称转转换为英文
我们需要爬的数据是中国国内数据与全球国家的数据。由于可视化时需要用到echarts绘制世界地图,而爬取到的各国家名称是中文,下面需要将中文转换为英文。
转换字典如下:
COUNTRIES_CH_EN_DICT = {
"索马里": "Somalia",
"列支敦士登": "Liechtenstein",
"摩洛哥": "Morocco",
"西撒哈拉": "W. Sahara",
"塞尔维亚": "Serbia",
"阿富汗": "Afghanistan",
"安哥拉": "Angola",
"阿尔巴尼亚": "Albania",
"安道尔共和国": "Andorra",
"阿拉伯联合酋长国": "United Arab Emirates",
"阿根廷": "Argentina",
"亚美尼亚": "Armenia",
"澳大利亚": "Australia",
"奥地利": "Austria",
"阿塞拜疆": "Azerbaijan",
"布隆迪": "Burundi",
"比利时": "Belgium",
"贝宁": "Benin",
"布基纳法索": "Burkina Faso",
"孟加拉国": "Bangladesh",
"保加利亚": "Bulgaria",
"巴林": "Bahrain",
"巴哈马": "Bahamas",
"波斯尼亚和黑塞哥维那": "Bosnia and Herz.",
"白俄罗斯": "Belarus",
"伯利兹": "Belize",
"百慕大": "Bermuda",
"玻利维亚": "Bolivia",
"巴西": "Brazil",
"巴巴多斯": "Barbados",
"文莱": "Brunei",
"不丹": "Bhutan",
"博茨瓦纳": "Botswana",
"中非": "Central African Rep.",
"加拿大": "Canada",
"瑞士": "Switzerland",
"智利": "Chile",
"中国": "China",
"科特迪瓦": "Côte dIvoire",
"喀麦隆": "Cameroon",
"刚果民主共和国": "Dem. Rep. Congo",
"刚果": "Congo",
"哥伦比亚": "Colombia",
"佛得角": "Cape Verde",
"哥斯达黎加": "Costa Rica",
"古巴": "Cuba",
"北塞浦路斯": "N. Cyprus",
"塞浦路斯": "Cyprus",
"捷克": "Czech Rep.",
"德国": "Germany",
"吉布提": "Djibouti",
"丹麦": "Denmark",
"多米尼加": "Dominica",
"阿尔及利亚": "Algeria",
"厄瓜多尔": "Ecuador",
"埃及": "Egypt",
"厄立特里亚": "Eritrea",
"西班牙": "Spain",
"爱沙尼亚": "Estonia",
"埃塞俄比亚": "Ethiopia",
"芬兰": "Finland",
"斐济": "Fiji",
"法国": "France",
"加蓬": "Gabon",
"英国": "United Kingdom",
"格鲁吉亚": "Georgia",
"加纳": "Ghana",
"几内亚": "Guinea",
"冈比亚": "Gambia",
"几内亚比绍": "Guinea-Bissau",
"赤道几内亚": "Eq. Guinea",
"希腊": "Greece",
"格林纳达": "Grenada",
"格陵兰": "Greenland",
"危地马拉": "Guatemala",
"关岛": "Guam",
"圭亚那": "Guyana",
"洪都拉斯": "Honduras",
"克罗地亚": "Croatia",
"海地": "Haiti",
"匈牙利": "Hungary",
"印度尼西亚": "Indonesia",
"印度": "India",
"英属印度洋领土": "Br. Indian Ocean Ter.",
"爱尔兰": "Ireland",
"伊朗": "Iran",
"伊拉克": "Iraq",
"冰岛": "Iceland",
"以色列": "Israel",
"意大利": "Italy",
"牙买加": "Jamaica",
"约旦": "Jordan",
"日本": "Japan",
"锡亚琴冰川": "Siachen Glacier",
"哈萨克斯坦": "Kazakhstan",
"肯尼亚": "Kenya",
"吉尔吉斯坦": "Kyrgyzstan",
"柬埔寨": "Cambodia",
"韩国": "Korea",
"科威特": "Kuwait",
"老挝": "Lao PDR",
"黎巴嫩": "Lebanon",
"利比里亚": "Liberia",
"利比亚": "Libya",
"斯里兰卡": "Sri Lanka",
"莱索托": "Lesotho",
"立陶宛": "Lithuania",
"卢森堡": "Luxembourg",
"拉脱维亚": "Latvia",
"摩尔多瓦": "Moldova",
"马达加斯加": "Madagascar",
"墨西哥": "Mexico",
"马其顿": "Macedonia",
"马里": "Mali",
"马耳他": "Malta",
"缅甸": "Myanmar",
"黑山": "Montenegro",
"蒙古": "Mongolia",
"莫桑比克": "Mozambique",
"毛里塔尼亚": "Mauritania",
"毛里求斯": "Mauritius",
"马拉维": "Malawi",
"马来西亚": "Malaysia",
"纳米比亚": "Namibia",
"新喀里多尼亚": "New Caledonia",
"尼日尔": "Niger",
"尼日利亚": "Nigeria",
"尼加拉瓜": "Nicaragua",
"荷兰": "Netherlands",
"挪威": "Norway",
"尼泊尔": "Nepal",
"新西兰": "New Zealand",
"阿曼": "Oman",
"巴基斯坦": "Pakistan",
"巴拿马": "Panama",
"秘鲁": "Peru",
"菲律宾": "Philippines",
"巴布亚新几内亚": "Papua New Guinea",
"波兰": "Poland",
"波多黎各": "Puerto Rico",
"朝鲜": "Dem. Rep. Korea",
"葡萄牙": "Portugal",
"巴拉圭": "Paraguay",
"巴勒斯坦": "Palestine",
"卡塔尔": "Qatar",
"罗马尼亚": "Romania",
"俄罗斯": "Russia",
"卢旺达": "Rwanda",
"沙特阿拉伯": "Saudi Arabia",
"苏丹": "Sudan",
"南苏丹": "S. Sudan",
"塞内加尔": "Senegal",
"新加坡": "Singapore",
"所罗门群岛": "Solomon Is.",
"塞拉利昂": "Sierra Leone",
"萨尔瓦多": "El Salvador",
"苏里南": "Suriname",
"斯洛伐克": "Slovakia",
"斯洛文尼亚": "Slovenia",
"瑞典": "Sweden",
"斯威士兰": "Swaziland",
"塞舌尔": "Seychelles",
"叙利亚": "Syria",
"乍得": "Chad",
"多哥": "Togo",
"泰国": "Thailand",
"塔吉克斯坦": "Tajikistan",
"土库曼斯坦": "Turkmenistan",
"东帝汶": "Timor-Leste",
"汤加": "Tonga",
"特立尼达和多巴哥": "Trinidad and Tobago",
"突尼斯": "Tunisia",
"土耳其": "Turkey",
"坦桑尼亚": "Tanzania",
"乌干达": "Uganda",
"乌克兰": "Ukraine",
"乌拉圭": "Uruguay",
"美国": "United States",
"乌兹别克斯坦": "Uzbekistan",
"委内瑞拉": "Venezuela",
"越南": "Vietnam",
"瓦努阿图": "Vanuatu",
"也门": "Yemen",
"南非": "South Africa",
"赞比亚": "Zambia",
"津巴布韦": "Zimbabwe",
"奥兰群岛": "Aland",
"美属萨摩亚": "American Samoa",
"南极洲": "Fr. S. Antarctic Lands",
"安提瓜和巴布达": "Antigua and Barb.",
"科摩罗": "Comoros",
"库拉索岛": "Curaçao",
"开曼群岛": "Cayman Is.",
"马尔维纳斯群岛(福克兰)": "Falkland Is.",
"法罗群岛": "Faeroe Is.",
"密克罗尼西亚": "Micronesia",
"赫德岛和麦克唐纳群岛": "Heard I. and McDonald Is.",
"曼岛": "Isle of Man",
"泽西岛": "Jersey",
"基里巴斯": "Kiribati",
"圣卢西亚": "Saint Lucia",
"北马里亚纳群岛": "N. Mariana Is.",
"蒙特塞拉特": "Montserrat",
"纽埃": "Niue",
"帕劳": "Palau",
"法属波利尼西亚": "Fr. Polynesia",
"南乔治亚岛和南桑威奇群岛": "S. Geo. and S. Sandw. Is.",
"圣赫勒拿": "Saint Helena",
"圣皮埃尔和密克隆群岛": "St. Pierre and Miquelon",
"圣多美和普林西比": "São Tomé and Principe",
"特克斯和凯科斯群岛": "Turks and Caicos Is.",
"圣文森特和格林纳丁斯": "St. Vin. and Gren.",
"美属维尔京群岛": "U.S. Virgin Is.",
"萨摩亚": "Samoa"
}
最后再加上如下语句将英文国家名的中文名翻译为英文,并作为新的一列加入到DataFrame中
三、数据保存
def save_data(df, table_name, if_exists="append", need_translate=False):
if need_translate:
df['name'] = df['疫情地区'].apply(traslate)
conn = create_engine('mysql://root:123456@localhost:3306/myspider?charset=utf8')
pd.io.sql.to_sql(df, table_name, con=conn, if_exists=if_exists, index=None)
#注意换成你的数据库的库名、表名、账号密码
save_data(df,db_name,user,password)
数据库一共保存四张表,分别为最近一个月每天汇总的累计疫情数据china_day、最近一个月每天新增数据china_day_add、每天各省份的疫情数据china_total_epidemic、最近一个月全球各地区的疫情累计数据world_epidemic
最后保存到数据中的数据格式如下
四、完整项目获取
关注一下公众号,回复"0007"即可get完整项目源码