Importamos librerias

import pandas as pd 
import matplotlib.pyplot as plt
%matplotlib inline

Households

general = pd.read_csv('/Users/servandotorres/Documents/data/households/general.csv')
economy =  pd.read_csv('/Users/servandotorres/Documents/data/households/economy.csv')
house_info = pd.read_csv('/Users/servandotorres/Documents/data/households/households_info.csv')
social_services_access = pd.read_csv('/Users/servandotorres/Documents/data/households/social_services_access.csv')
economy_1 = pd.read_csv('/Users/servandotorres/Documents/data/households/economy.csv', index_col='village')

Empezamos el analisis

Dataset general

general.head()

.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}

</style>

	id	gender	hh_head	relation_hh_head	gender_hh_head	district	ward	village	subvillage	house_condition
0	V4-01	Male	Yes	NaN	NaN	Muheza	Misalai	Kazita	Kwemuyu	The house is not good
1	V4-02	Female	Yes	NaN	NaN	Muheza	Misalai	Kazita	Kwemuyu	The house is not good
2	V4-03	Female	Yes	NaN	NaN	Muheza	Misalai	Kazita	Kwemuyu	The house is normal
3	V4-04	Female	Yes	NaN	NaN	Muheza	Misalai	Kazita	Kwemuyu	The house is normal
4	V4-05	Male	Yes	NaN	NaN	Muheza	Misalai	Kazita	Kwemuyu	Good house

general.isnull().sum()
#esta un poco sucio

id                    0
gender                0
hh_head               0
relation_hh_head    226
gender_hh_head      226
district              0
ward                  0
village               0
subvillage            0
house_condition      46
dtype: int64

general.head()
general.shape

(339, 10)

# Estadistica descriptiva 
general.describe()

.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}

</style>

	id	gender	hh_head	relation_hh_head	gender_hh_head	district	ward	village	subvillage	house_condition
count	339	339	339	113	113	339	339	339	339	293
unique	339	2	2	5	2	1	2	8	39	75
top	V3-09	Male	Yes	husband / wife	Male	Muheza	Misalai	Misalai	Mlalo	Normal house
freq	1	179	226	74	100	339	224	70	20	77

#Descricion categorica 
general.describe(include=[np.object, pd.Categorical]).T

.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}

</style>

	count	unique	top	freq
id	339	339	V3-09	1
gender	339	2	Male	179
hh_head	339	2	Yes	226
relation_hh_head	113	5	husband / wife	74
gender_hh_head	113	2	Male	100
district	339	1	Muheza	339
ward	339	2	Misalai	224
village	339	8	Misalai	70
subvillage	339	39	Mlalo	20
house_condition	293	75	Normal house	77

Economia

economy.head()

.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}

</style>

	id	village	acres_own_land	certificate	acres_cultivate_land	land_use	decision_maker	savings
0	V4-01	Kazita	7.0	No	7.0	Crop cultivation	father	Yes
1	V4-02	Kazita	0.5	No	0.5	Crop cultivation	mother	No
2	V4-03	Kazita	4.0	No	1.0	Crop cultivation	mother	Yes
3	V4-04	Kazita	1.5	Yes	1.5	Crop cultivation	both	Yes
4	V4-05	Kazita	0.0	No	0.0	NaN	father	Yes

Vision global de el tomador de decisiones

sns.countplot(y='decision_maker', data=economy)

<matplotlib.axes._subplots.AxesSubplot at 0x1131985c0>

#Valores nulos 
economy.isnull().sum()

id                       0
village                  0
acres_own_land           0
acres_rent_land          2
acres_not_paying_land    2
certificate              1
acres_cultivate_land     2
land_use                 6
decision_maker           0
savings                  0
dtype: int64

Superficie en propiedad por pueblo

ax = sns.barplot(x='village', y='acres_own_land', data=economy)
ax.figure.set_size_inches(16, 4)

ax = sns.barplot(x='village', y='acres_own_land', hue='land_use', 
                 data=economy, palette='Greys')
ax.figure.set_size_inches(16,4)

Propiedad

ax = sns.barplot(x='village', y='acres_own_land', data=economy)
ax.figure.set_size_inches(16, 4)

No paga

ax = sns.barplot(x='village', y='acres_not_paying_land', data=economy)
ax.figure.set_size_inches(16, 4)

Aquila

ax = sns.barplot(x='village', y='acres_rent_land', data=economy)
ax.figure.set_size_inches(16, 4)

house_info.head()

.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}

</style>

	id	village	household_member	gender	age	edu_level
0	V1-03	Misalai	1st member	male	80	Primary
1	V4-01	Kazita	2nd member	female	42	No formal education
2	V4-01	Kazita	3rd member	female	40	No formal education
3	V4-01	Kazita	1st member	male	55	Secundary
4	V4-01	Kazita	5th member	female	11	Primary

social_services_access.head()

.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}

</style>

	id	village	primary_school	service_quality	secundary_school	service_quality.1	market_place	service_quality.2	dispensary	service_quality.3	...	agriculture_storage	service_quality.8	agricultural_extension_service	service_quality.9	veterinary	service_quality.10	forest_service	service_quality.11	water_service	service_quality.12
0	V4-01	Kazita	yes	Bad	no	No	no	No	no	No	...	no	No	no	No	no	No	no	No	no	No
1	V4-02	Kazita	yes	Good	yes	Good	no	No	no	No	...	no	No	no	No	yes	Bad	yes	Bad	no	No
2	V4-03	Kazita	yes	Good	yes	Good	yes	Good	yes	Good	...	no	No	no	No	no	No	yes	Good	yes	Good
3	V4-04	Kazita	yes	Good	yes	Good	yes	Good	no	No	...	no	No	no	No	yes	Good	yes	Good	yes	Good
4	V4-05	Kazita	yes	Good	yes	Good	yes	Good	yes	Good	...	no	No	no	No	no	No	no	No	yes	Good

5 rows × 28 columns

Water access

drinking_water_distance = pd.read_csv('/Users/servandotorres/Documents/data/water-access/drinking_water_distance.csv')
water_collection = pd.read_csv('/Users/servandotorres/Documents/data/water-access/water_collection.csv')
water_collection = pd.read_csv('/Users/servandotorres/Documents/data/water-access/water_collection.csv')
water_payment = pd.read_csv('/Users/servandotorres/Documents/data/water-access/water_payment.csv')
water_quality = pd.read_csv('/Users/servandotorres/Documents/data/water-access/water_quality.csv')
water_sanitation = pd.read_csv('/Users/servandotorres/Documents/data/water-access/water_sanitation.csv')

drinking_water_distance['village']

import numpy as np
np.unique(drinking_water_distance['village'])

array(['8 =Kwelumbizi', 'Kazita', 'Kizerui', 'Kwemsoso', 'Mgambo',
       'Misalai', 'Shambangeda', 'Zirai'], dtype=object)

drinking_water_distance.head()

.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}

</style>

	id	village	rainy_season	dry_season
0	V4-01	Kazita	1- 2 hours	1- 2 hours
1	V4-02	Kazita	5 - 10 min	5 - 10 min
2	V4-03	Kazita	5 - 10 min	5 - 10 min
3	V4-04	Kazita	1- 2 hours	1- 2 hours
4	V4-05	Kazita	< 5 min	< 5 min

water_collection.head()
#water_collection.describe(include=[np.object, pd.Categorical]).T

.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}

</style>

	id	village	collector	no_buckets
0	V4-01	Kazita	Adult women	6 - 7
1	V4-02	Kazita	Adult women	1 - 2
2	V4-03	Kazita	Adult women	2 - 3
3	V4-04	Kazita	Adult women	5 - 6
4	V4-05	Kazita	Adult men	1 - 2

water_payment.head()

.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}

</style>

	id	village	rainy_season	dry_season
0	V4-01	Kazita	No	No
1	V4-02	Kazita	No	No
2	V4-03	Kazita	No	No
3	V4-04	Kazita	No	No
4	V4-05	Kazita	No	No

sns.countplot(y='dry_season', data=water_payment)

<matplotlib.axes._subplots.AxesSubplot at 0x11346a400>

sns.countplot(y='rainy_season', data=water_payment)

<matplotlib.axes._subplots.AxesSubplot at 0x1135ee710>

water_quality.describe(include=[np.object, pd.Categorical]).T

.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}

</style>

	count	unique	top	freq
id	339	339	V3-09	1
village	339	8	Misalai	70
dry_season	339	5	Clean	152
rainy_season	339	5	Clean	127

Water Sanitation

water_sanitation.head()

.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}

</style>

	id	village	floor_material	wall_material	roof_material	toilet_type	toilet_satisfaction	water_in_toilet	rubbish_pit	utensils_rack
0	V4-01	Kazita	earth	mud bricks	galvanised metal sheets / iron sheets	flush toilet with flushing system	fairly satisfied	Water bucket in the toilet	Yes	Yes
1	V4-02	Kazita	earth	mud bricks	galvanised metal sheets / iron sheets	simple pit latrine unimproved	unsatisifed	Water bucket in the toilet	Yes	Yes
2	V4-03	Kazita	earth	mud and poles or stones	galvanised metal sheets / iron sheets	flush toilet	very satisfied	Water bucket in the toilet	Yes	Yes
3	V4-04	Kazita	earth	mud and poles or stones	galvanised metal sheets / iron sheets	flush toilet with flushing system	fairly satisfied	No water in the toilet	No	Yes
4	V4-05	Kazita	earth	baked or burnt bricks	galvanised metal sheets / iron sheets	flush toilet with flushing system	fairly satisfied	Water bucket in the toilet	Yes	Yes

water_sanitation.describe(include=[np.object, pd.Categorical]).T

.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}

</style>

	count	unique	top	freq
id	339	339	V3-09	1
village	339	8	Misalai	70
floor_material	339	4	earth	261
wall_material	339	6	mud and poles or stones	180
roof_material	339	5	galvanised metal sheets / iron sheets	291
toilet_type	339	6	simple pit latrine unimproved	189
toilet_satisfaction	339	4	fairly satisfied	157
water_in_toilet	337	4	Water bucket in the toilet	311
rubbish_pit	339	2	Yes	211
utensils_rack	339	2	Yes	202

np.unique(water_quality['dry_season'])
water_quality.head()

.dataframe thead th {
    text-align: left;
}

.dataframe tbody tr th {
    vertical-align: top;
}

</style>

	id	village	dry_season	rainy_season
0	V4-01	Kazita	Quite clean	Quite clean
1	V4-02	Kazita	Quite clean	Clean
2	V4-03	Kazita	Clean	Clean
3	V4-04	Kazita	Clean	Clean
4	V4-05	Kazita	Quite clean	Quite clean

ax = sns.countplot(y='dry_season', data=water_quality)
ax

<matplotlib.axes._subplots.AxesSubplot at 0x11125cf98>

Calidad del agua por estacion

Sequía

sns.countplot(y='dry_season', data=water_quality)

<matplotlib.axes._subplots.AxesSubplot at 0x1a1acab860>

Lluvia

sns.countplot(y='rainy_season', data=water_quality)

<matplotlib.axes._subplots.AxesSubplot at 0x113643d30>

elenatorro / waterhack2018 Goto Github PK

waterhack2018's Introduction

Importamos librerias

Households

Empezamos el analisis

Dataset general

Economia

Vision global de el tomador de decisiones

Superficie en propiedad por pueblo

Propiedad

No paga

Aquila

Water access

Water Sanitation

Calidad del agua por estacion

Sequía

Lluvia

waterhack2018's People

Contributors

Stargazers

Watchers

Recommend Projects

Recommend Topics

Recommend Org