项目作者: alejandropuerto

项目描述 :
高级语言: Jupyter Notebook
项目地址: git://github.com/alejandropuerto/covid-19-death-comparison.git


  1. # Libraries
  2. import datetime
  3. import numpy as np
  4. import pandas as pd
  5. import seaborn as sns
  6. import matplotlib.pyplot as plt
  7. from dateutil.parser import parse
  8. from pandas import Series

Data from contralacorrupcion.mx

  1. # Reading the dataset
  2. covid19 = pd.read_csv("actas-defuncion-covid-19-cdmx1.csv",parse_dates=['FECHA'], index_col = "FECHA")
  3. covid19.head()































































EDAD SEMANA MES RAZON ACTA
FECHA
2020-03-18 41 AÑOS 12 3 CHOQUE SEPTICO, NEUMONIA POR COVID 19 POR SARS… 7129
2020-03-23 61 AÑOS 13 3 INSUFICIENCIA RESPIRATORIA AGUDA, NEUMONIA VIR… 4459
2020-03-26 60 AÑOS 13 3 SINDROME DE INSUFICIENCIA RESPIRATORIA AGUDA, … 4591
2020-03-26 37 AÑOS 13 3 NEUMONIA POR CORONAVIRUS 7879
2020-03-26 63 AÑOS 13 3 CERVICOVAGINITIS PURULENTA, CARCINOMA EPIDERMO… 7829

  1. number_by_date = covid19.drop(['EDAD', 'SEMANA', 'MES', 'RAZON', 'ACTA'], axis=1)
  2. number_by_date














































FECHA
2020-03-18
2020-03-23
2020-03-26
2020-03-26
2020-03-26
2020-05-12
2020-05-12
2020-05-12
2020-05-12
2020-05-12

4579 rows × 0 columns


  1. number_by_date['MUERTES'] = 0
  1. number_by_date



























































MUERTES
FECHA
2020-03-18 0
2020-03-23 0
2020-03-26 0
2020-03-26 0
2020-03-26 0
2020-05-12 0
2020-05-12 0
2020-05-12 0
2020-05-12 0
2020-05-12 0

4579 rows × 1 columns


  1. number_by_date.drop(number_by_date.head(2).index, inplace=True)
  1. number_by_date = number_by_date.groupby('FECHA').count()
  2. number_by_date.head()



































MUERTES
FECHA
2020-03-26 3
2020-03-27 2
2020-03-28 1
2020-03-29 2
2020-03-30 3

Data from gob.mx

  1. covid19_oficial = pd.read_csv("200521COVID19MEXICO.csv", sep = ",",parse_dates = ["FECHA_DEF"], encoding ='latin1')
  2. covid19_oficial.head(10)















































































































































































































































































FECHA_ACTUALIZACION ID_REGISTRO ORIGEN SECTOR ENTIDAD_UM SEXO ENTIDAD_NAC ENTIDAD_RES MUNICIPIO_RES TIPO_PACIENTE CARDIOVASCULAR OBESIDAD RENAL_CRONICA TABAQUISMO OTRO_CASO RESULTADO MIGRANTE PAIS_NACIONALIDAD PAIS_ORIGEN UCI
0 2020-05-21 11e989 2 3 27 2 27 27 4 2 2 2 2 2 1 1 99 México 99 2
1 2020-05-21 1aad65 2 4 19 2 5 5 18 2 2 2 2 2 99 1 99 México 99 1
2 2020-05-21 04f631 2 4 14 1 14 14 67 1 2 2 2 2 99 1 99 México 99 97
3 2020-05-21 02556b 2 4 15 1 15 15 110 2 2 2 2 2 99 1 99 México 99 2
4 2020-05-21 0356d5 2 4 9 1 9 9 5 2 2 2 2 1 99 1 99 México 99 2
5 2020-05-21 1d2dfb 2 4 25 2 14 25 1 1 2 2 2 2 99 1 99 México 99 97
6 2020-05-21 1b3e2b 2 4 9 2 7 9 2 1 2 2 2 2 99 1 99 México 99 97
7 2020-05-21 0c0eef 2 4 21 1 21 21 114 2 2 1 2 2 99 1 99 México 99 2
8 2020-05-21 043ea2 2 4 27 2 27 27 4 1 2 2 2 2 99 1 99 México 99 97
9 2020-05-21 0bd39a 2 4 8 1 8 8 17 1 2 2 2 2 99 1 99 México 99 97

10 rows × 35 columns


  1. temp = covid19_oficial[['FECHA_DEF', 'ENTIDAD_RES']]
  1. temp = temp.set_index('FECHA_DEF')
  1. temp



























































ENTIDAD_RES
FECHA_DEF
2020-04-27 27
2020-04-03 5
9999-99-99 14
2020-04-20 15
9999-99-99 9
2020-05-14 15
9999-99-99 26
9999-99-99 26
9999-99-99 15
9999-99-99 9

201838 rows × 1 columns


  1. from_cdmx = temp.loc[(temp['ENTIDAD_RES'] == 9)] #Number nine corresponds to CDMX as stated in the data dictionary
  1. from_cdmx



























































ENTIDAD_RES
FECHA_DEF
9999-99-99 9
9999-99-99 9
9999-99-99 9
9999-99-99 9
2020-03-22 9
2020-05-05 9
9999-99-99 9
9999-99-99 9
9999-99-99 9
9999-99-99 9

46594 rows × 1 columns


  1. from_cdmx.drop(['ENTIDAD_RES'], axis=1, inplace=True, errors='ignore')
  1. C:\Users\User\Anaconda3\lib\site-packages\pandas\core\frame.py:3997: SettingWithCopyWarning:
  2. A value is trying to be set on a copy of a slice from a DataFrame
  3. See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  4. errors=errors,
  1. from_cdmx














































FECHA_DEF
9999-99-99
9999-99-99
9999-99-99
9999-99-99
2020-03-22
2020-05-05
9999-99-99
9999-99-99
9999-99-99
9999-99-99

46594 rows × 0 columns


  1. from_cdmx['MUERTES'] = 0
  1. C:\Users\User\Anaconda3\lib\site-packages\ipykernel_launcher.py:1: SettingWithCopyWarning:
  2. A value is trying to be set on a copy of a slice from a DataFrame.
  3. Try using .loc[row_indexer,col_indexer] = value instead
  4. See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  5. """Entry point for launching an IPython kernel.
  1. from_cdmx = from_cdmx.groupby('FECHA_DEF').count()
  1. from_cdmx.head()



































MUERTES
FECHA_DEF
2020-03-16 1
2020-03-22 1
2020-03-23 1
2020-03-25 1
2020-03-26 3

  1. from_cdmx.drop(from_cdmx.tail(1).index, inplace=True) #9999-99-99 date is dropped
  1. from_cdmx = from_cdmx.reset_index()
  1. from_cdmx = from_cdmx.loc[(from_cdmx['FECHA_DEF'] >= '2020-03-26') & (from_cdmx['FECHA_DEF'] <= '2020-05-12')]
  1. from_cdmx = from_cdmx.set_index('FECHA_DEF')
  1. print(from_cdmx.head(1))
  2. print(from_cdmx.tail(1))
  1. MUERTES
  2. FECHA_DEF
  3. 2020-03-26 3
  4. MUERTES
  5. FECHA_DEF
  6. 2020-05-12 60
  1. from_cdmx = from_cdmx.reset_index()
  2. from_cdmx['FECHA_DEF'] = pd.to_datetime(from_cdmx['FECHA_DEF'])
  1. from_cdmx = from_cdmx.set_index('FECHA_DEF')

Visualization

  1. sns.set(rc={'figure.figsize':(14, 7)})
  1. start, end = '2020-03', '2020-05'
  1. fig, ax = plt.subplots()
  2. ax.plot(from_cdmx.loc[start:end],
  3. marker='o', markersize=8, linestyle='-', label='gob.mx')
  4. ax.plot(number_by_date.loc[start:end],
  5. marker='o', markersize=8, linestyle='-', label='contralacorrupcion.mx')
  6. ax.set_xlabel('Date')
  7. ax.set_ylabel('Number of Deaths')
  8. ax.set_title('COVID-19 death comparison in CDMX')
  9. ax.legend()
  1. <matplotlib.legend.Legend at 0x1946b7e5a58>

death comparison