代码拉取完成,页面将自动刷新
#! pip install branca==0.4.1 #0.3.1
# ! pip install wordcloud
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import os
import string
import re
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
import branca.colormap as cm
# from mpl_toolkits.basemap import Basemap
import requests
import folium
from folium import plugins
from folium.plugins import HeatMap
import branca.colormap
from nltk.tokenize import TweetTokenizer
from nltk.corpus import stopwords
from nltk import pos_tag, ne_chunk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from wordcloud import WordCloud
from tqdm import tqdm, notebook
from iso3166 import countries
import plotly.express as px
# %matplotlib inline
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.width', None)
covid = 'sa.csv'
df = pd.read_csv(covid, index_col=0)
df['date'] = pd.to_datetime(df['date'])
df = df.sort_values(['date'])
df['day'] = df['date'].astype(str).str.split(' ', expand=True)[0]
df.info()
hashtags_country = df.groupby(['day', 'user_location'])['user_name'].count().reset_index()
hashtags_country.columns = ['day', 'location', 'count']
hashtags_country['location'] = hashtags_country['location'].str.split(',', expand=True)[1].str.lstrip().str.rstrip()
country_dict = {}
for c in countries:
country_dict[c.name] = c.alpha3
hashtags_country['alpha3'] = hashtags_country['location']
hashtags_country = hashtags_country.replace({"alpha3": country_dict})
country_list = ['England', 'United States', 'United Kingdom', 'London', 'UK']
hashtags_country = hashtags_country[
(hashtags_country['alpha3'] == 'USA') |
(hashtags_country['location'].isin(country_list)) |
(hashtags_country['location'] != hashtags_country['alpha3'])
]
gbr = ['England', 'United Kingdom', 'London', 'UK']
us = ['United States', 'NY', 'CA', 'GA']
hashtags_country = hashtags_country[hashtags_country['location'].notnull()]
hashtags_country.loc[hashtags_country['location'].isin(gbr), 'alpha3'] = 'GBR'
hashtags_country.loc[hashtags_country['location'].isin(us), 'alpha3'] = 'USA'
hashtags_country.loc[hashtags_country['alpha3'] == 'USA', 'location'] = 'USA'
hashtags_country.loc[hashtags_country['alpha3'] == 'GBR', 'location'] = 'United Kingdom'
hashtags_country = hashtags_country.groupby(['day', 'location', 'alpha3'])['count'].sum().reset_index()
hashtags_country
hashtags_country.head()
def plot_hashtag_map(data):
fig = px.choropleth(
data,
locations="alpha3",
hover_name="count",
color="count",
animation_frame="day",
projection="natural earth",
color_continuous_scale=px.colors.sequential.Plasma,
title='Dynamic of tweets count',
width=800,
height=600
)
fig.show() # 此时会打开浏览器
plot_hashtag_map(hashtags_country)
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。