Giter Site home page Giter Site logo

yash9460 / gsp787-insights-from-data-with-bigquery-challenge-lab Goto Github PK

View Code? Open in Web Editor NEW
2.0 1.0 0.0 3 KB

The dataset and table that will be used for this analysis will be : bigquery-public-data.covid19_open_data.covid19_open_data. This repository contains country-level datasets of daily time-series data related to COVID-19 globally. It includes data relating to demographics, economy, epidemiology, geography, health, hospitalizations, mobility, government response, and weather.

gsp787-insights-from-data-with-bigquery-challenge-lab's Introduction

GSP787-Insights-from-Data-with-BigQuery-Challenge-Lab

The dataset and table that will be used for this analysis will be : bigquery-public-data.covid19_open_data.covid19_open_data. This repository contains country-level datasets of daily time-series data related to COVID-19 globally. It includes data relating to demographics, economy, epidemiology, geography, health, hospitalizations, mobility, government response, and weather.

Navigation Menu -> BigQuery.

Task - 1 : Total Confirmed Cases

SELECT sum(cumulative_confirmed) as total_cases_worldwide FROM `bigquery-public-data.covid19_open_data.covid19_open_data` where date='2020-04-15'

Task - 2 : Worst Affected Areas

with deaths_by_states as (

SELECT subregion1_name as state, sum(cumulative_deceased) as death_count

FROM `bigquery-public-data.covid19_open_data.covid19_open_data`

where country_name="United States of America" and date='2020-04-10' and subregion1_name is NOT NULL

group by subregion1_name

)

select count(*) as count_of_states

from deaths_by_states

where death_count > 100

Task - 3 : Identifying Hotspots

SELECT * FROM (

SELECT subregion1_name as state, sum(cumulative_confirmed) as total_confirmed_cases

FROM `bigquery-public-data.covid19_open_data.covid19_open_data`

WHERE country_code="US" AND date='2020-04-10' AND subregion1_name is NOT NULL

GROUP BY subregion1_name

ORDER BY total_confirmed_cases DESC ) WHERE total_confirmed_cases > 1000

Task - 4 : Fatality Ratio

SELECT sum(cumulative_confirmed) as total_confirmed_cases, sum(cumulative_deceased) as total_deaths, (sum(cumulative_deceased)/sum(cumulative_confirmed))*100 as case_fatality_ratio

FROM `bigquery-public-data.covid19_open_data.covid19_open_data`

where country_name="Italy" AND date BETWEEN '2020-04-01'and '2020-04-30'

Task - 5 : Identifying specific day

SELECT date

FROM `bigquery-public-data.covid19_open_data.covid19_open_data`

where country_name="Italy" and cumulative_deceased>10000

order by date asc

limit 1

Task - 6 : Finding days with zero net new cases :-

WITH india_cases_by_date AS (

SELECT

date,

SUM( cumulative_confirmed ) AS cases

FROM

`bigquery-public-data.covid19_open_data.covid19_open_data`

WHERE

country_name ="India"

AND date between '2020-02-21' and '2020-03-15'

GROUP BY

date

ORDER BY

date ASC

)

, india_previous_day_comparison AS

(SELECT

date,

cases,

LAG(cases) OVER(ORDER BY date) AS previous_day,

cases - LAG(cases) OVER(ORDER BY date) AS net_new_cases

FROM india_cases_by_date

)

select count(*)

from india_previous_day_comparison

where net_new_cases=0

Task - 7 : Doubling rate

WITH us_cases_by_date AS (

SELECT

date,

SUM(cumulative_confirmed) AS cases

FROM

`bigquery-public-data.covid19_open_data.covid19_open_data`

WHERE

country_name="United States of America"

AND date between '2020-03-22' and '2020-04-20'

GROUP BY

date

ORDER BY

date ASC

 )



, us_previous_day_comparison AS

(SELECT

date,

cases,

LAG(cases) OVER(ORDER BY date) AS previous_day,

cases - LAG(cases) OVER(ORDER BY date) AS net_new_cases,

(cases - LAG(cases) OVER(ORDER BY date))*100/LAG(cases) OVER(ORDER BY date) AS percentage_increase

FROM us_cases_by_date

)



select Date, cases as Confirmed_Cases_On_Day, previous_day as Confirmed_Cases_Previous_Day, percentage_increase as Percentage_Increase_In_Cases

from us_previous_day_comparison

where percentage_increase > 10

Task - 8 : Recovery rate

WITH cases_by_country AS (

SELECT

country_name AS country,

sum(cumulative_confirmed) AS cases,

sum(cumulative_recovered) AS recovered_cases

FROM

bigquery-public-data.covid19_open_data.covid19_open_data

WHERE

date = '2020-05-10'

GROUP BY

country_name

)



, recovered_rate AS

(SELECT

country, cases, recovered_cases,

(recovered_cases * 100)/cases AS recovery_rate

FROM cases_by_country

)



SELECT country, cases AS confirmed_cases, recovered_cases, recovery_rate

FROM recovered_rate

WHERE cases > 50000

ORDER BY recovery_rate desc

LIMIT 10

Task - 9 : CDGR - Cumulative Daily Growth Rate

WITH

france_cases AS (

SELECT

date,

SUM(cumulative_confirmed) AS total_cases

FROM

`bigquery-public-data.covid19_open_data.covid19_open_data`

WHERE

country_name="France"

AND date IN ('2020-01-24',

  '2020-05-10')

GROUP BY

date

ORDER BY

date)

, summary as (

SELECT

total_cases AS first_day_cases,

LEAD(total_cases) OVER(ORDER BY date) AS last_day_cases,

DATE_DIFF(LEAD(date) OVER(ORDER BY date),date, day) AS days_diff

FROM

france_cases

LIMIT 1

)

select first_day_cases, last_day_cases, days_diff, POW((last_day_cases/first_day_cases),(1/days_diff))-1 as cdgr

from summary

Task - 10 : Create a Datastudio report

SELECT

date, SUM(cumulative_confirmed) AS country_cases,

SUM(cumulative_deceased) AS country_deaths

FROM

bigquery-public-data.covid19_open_data.covid19_open_data

WHERE

date BETWEEN '2020-03-15'

AND '2020-04-30'

AND country_name ="United States of America"

GROUP BY date
  • Now, Click on Explore Data > Explore with Data Studio > Get Started 
    
  • Select Authorize
    
  • At right corner, Under the Available field, Drag the country_cases into the Dimension
    
  • Now select the line chart, and hence the task is completed.  
    

gsp787-insights-from-data-with-bigquery-challenge-lab's People

Contributors

yash9460 avatar

Stargazers

 avatar  avatar

Watchers

 avatar

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    ๐Ÿ–– Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. ๐Ÿ“Š๐Ÿ“ˆ๐ŸŽ‰

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google โค๏ธ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.