0 Star 1 Fork 0

Tony_幽城 / py-156431-Sentiment-Analysis

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
sa_over_space_time.py 3.22 KB
一键复制 编辑 原始数据 按行查看 历史
w00406273 提交于 2021-05-05 16:30 . paper.md done.
#! pip install branca==0.4.1 #0.3.1
# ! pip install wordcloud
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import os
import string
import re
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
import branca.colormap as cm
# from mpl_toolkits.basemap import Basemap
import requests
import folium
from folium import plugins
from folium.plugins import HeatMap
import branca.colormap
from nltk.tokenize import TweetTokenizer
from nltk.corpus import stopwords
from nltk import pos_tag, ne_chunk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from pandas import DataFrame
from wordcloud import WordCloud
from tqdm import tqdm, notebook
from iso3166 import countries
import plotly.express as px
# %matplotlib inline
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.width', None)
def read_data():
covid = 'sa.csv'
df = pd.read_csv(covid, index_col=0)
df['date'] = pd.to_datetime(df['date'])
df = df.sort_values(['date'])
df['day'] = df['date'].astype(str).str.split(' ', expand=True)[0]
return df
def group_by_mean(df: DataFrame, key:str):
df_groupby = df.groupby(['day', 'alpha3'])[key].mean().reset_index()
# df_groupby.columns = ['day', 'location', key]
return df_groupby
def clean_location(df_day_loc_key: DataFrame):
if 'location' not in df_day_loc_key.columns:
df_day_loc_key = df_day_loc_key.rename({'user_location': 'location'}, axis=1)
df_day_loc_key['location'] = df_day_loc_key['location'].str.split(',', expand=True)[1].str.lstrip().str.rstrip()
country_dict = {}
for c in countries:
country_dict[c.name] = c.alpha3
df_day_loc_key['alpha3'] = df_day_loc_key['location']
df_day_loc_key = df_day_loc_key.replace({"alpha3": country_dict})
country_list = ['England', 'United States', 'United Kingdom', 'London', 'UK']
df_day_loc_key = df_day_loc_key[
(df_day_loc_key['alpha3'] == 'USA') |
(df_day_loc_key['location'].isin(country_list)) |
(df_day_loc_key['location'] != df_day_loc_key['alpha3'])
]
gbr = ['England', 'United Kingdom', 'London', 'UK']
us = ['United States', 'NY', 'CA', 'GA']
df_day_loc_key = df_day_loc_key[df_day_loc_key['location'].notnull()]
df_day_loc_key.loc[df_day_loc_key['location'].isin(gbr), 'alpha3'] = 'GBR'
df_day_loc_key.loc[df_day_loc_key['location'].isin(us), 'alpha3'] = 'USA'
df_day_loc_key.loc[df_day_loc_key['alpha3'] == 'USA', 'location'] = 'USA'
df_day_loc_key.loc[df_day_loc_key['alpha3'] == 'GBR', 'location'] = 'United Kingdom'
return df_day_loc_key
df = read_data()
df = clean_location(df)
print(df.info())
df_groupby = group_by_mean(df, 'sa')
def plot_hashtag_map(data):
fig = px.choropleth(
data,
locations="alpha3",
hover_name="sa",
color="sa",
animation_frame="day",
projection="natural earth",
color_continuous_scale=px.colors.sequential.Plasma,
title='Dynamic of sa',
width=800,
height=600
)
fig.show() # 此时会打开浏览器
plot_hashtag_map(df_groupby)
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/Tony36051/py-156431-sentiment-analysis.git
git@gitee.com:Tony36051/py-156431-sentiment-analysis.git
Tony36051
py-156431-sentiment-analysis
py-156431-Sentiment-Analysis
master

搜索帮助

344bd9b3 5694891 D2dac590 5694891