It would be cool is shadow_shift behaved like this: <div class="highlight highligh

I think that the best I can do now is this: <div class="highlight highlight-source

suggestion for `shadow_shift` and `label_missing` about naniar HOT 4 CLOSED

njtierney commented on May 22, 2024

suggestion for `shadow_shift` and `label_missing`

from naniar.

Comments (4)

njtierney commented on May 22, 2024

Something similar to this can be achieved with add_shadow_shift, except for the miss_label.

library(narnia)
library(tidyverse)

airquality %>%
  add_shadow_shift(vars = c("Ozone", "Solar.R")) %>%
  mutate(miss_label = label_missing_2d(Ozone,Solar.R)) %>%
  head()
#> # A tibble: 6 x 9
#>   Ozone Solar.R  Wind  Temp Month   Day Ozone_shift Solar.R_shift
#>   <int>   <int> <dbl> <int> <int> <int>       <dbl>         <dbl>
#> 1    41     190   7.4    67     5     1    41.00000     190.00000
#> 2    36     118   8.0    72     5     2    36.00000     118.00000
#> 3    12     149  12.6    74     5     3    12.00000     149.00000
#> 4    18     313  11.5    62     5     4    18.00000     313.00000
#> 5    NA      NA  14.3    56     5     5   -14.79436     -25.09977
#> 6    28      NA  14.9    66     5     6    28.00000     -19.64529
#> # ... with 1 more variables: miss_label <chr>

It would be interesting to add shadow cols onto each variable added, and then a catch all label.
Perhaps there might be a more compact way of showing the below code:

airquality %>%
  select(Ozone, Solar.R) %>%
  add_shadow_shift(vars = c("Ozone", "Solar.R")) %>%
  cast_shadow(vars = c("Ozone", "Solar.R")) %>%
  select(-Ozone,-Solar.R) %>%
  mutate(any_missing = label_missings(airquality))
#> # A tibble: 153 x 5
#>    Ozone_shift Solar.R_shift Ozone_NA Solar.R_NA any_missing
#>          <dbl>         <dbl>   <fctr>     <fctr>       <chr>
#>  1    41.00000     190.00000      !NA        !NA Not Missing
#>  2    36.00000     118.00000      !NA        !NA Not Missing
#>  3    12.00000     149.00000      !NA        !NA Not Missing
#>  4    18.00000     313.00000      !NA        !NA Not Missing
#>  5   -14.94837     -28.02921       NA         NA     Missing
#>  6    28.00000     -32.93632      !NA         NA     Missing
#>  7    23.00000     299.00000      !NA        !NA Not Missing
#>  8    19.00000      99.00000      !NA        !NA Not Missing
#>  9     8.00000      19.00000      !NA        !NA Not Missing
#> 10   -16.26483     194.00000       NA        !NA     Missing
#> # ... with 143 more rows

from naniar.

njtierney commented on May 22, 2024

I think that the best I can do now is this:

library(narnia)
library(tidyverse)
#> Loading tidyverse: ggplot2
#> Loading tidyverse: tibble
#> Loading tidyverse: tidyr
#> Loading tidyverse: readr
#> Loading tidyverse: purrr
#> Loading tidyverse: dplyr
#> Conflicts with tidy packages ----------------------------------------------
#> filter(): dplyr, stats
#> lag():    dplyr, stats
aq_shift <- airquality %>%
  cast_shadow_shift(vars = c("Ozone", "Solar.R")) %>%
  add_label_missings()

aq_shift
#> # A tibble: 153 x 7
#>    Ozone Solar.R Ozone_NA Solar.R_NA Ozone_shift Solar.R_shift any_missing
#>    <int>   <int>   <fctr>     <fctr>       <dbl>         <dbl>       <chr>
#>  1    41     190      !NA        !NA    41.00000     190.00000 Not Missing
#>  2    36     118      !NA        !NA    36.00000     118.00000 Not Missing
#>  3    12     149      !NA        !NA    12.00000     149.00000 Not Missing
#>  4    18     313      !NA        !NA    18.00000     313.00000 Not Missing
#>  5    NA      NA       NA         NA   -17.38178     -29.47859     Missing
#>  6    28      NA      !NA         NA    28.00000     -33.72173     Missing
#>  7    23     299      !NA        !NA    23.00000     299.00000 Not Missing
#>  8    19      99      !NA        !NA    19.00000      99.00000 Not Missing
#>  9     8      19      !NA        !NA     8.00000      19.00000 Not Missing
#> 10    NA     194       NA        !NA   -11.54721     194.00000     Missing
#> # ... with 143 more rows

  ggplot(aq_shift,
         aes(x = Ozone_shift,
             y = Solar.R_shift,
             colour = any_missing)) + 
  geom_point()

This allows the user to get the same data structure out that powers geom_missing_point()

library(narnia)
  library(tidyverse)
#> Loading tidyverse: ggplot2
#> Loading tidyverse: tibble
#> Loading tidyverse: tidyr
#> Loading tidyverse: readr
#> Loading tidyverse: purrr
#> Loading tidyverse: dplyr
#> Conflicts with tidy packages ----------------------------------------------
#> filter(): dplyr, stats
#> lag():    dplyr, stats
  ggplot(airquality,
         aes(x = Ozone,
             y = Solar.R)) + 
    geom_missing_point()

from naniar.

njtierney commented on May 22, 2024

I have shortened this code to be a little more concise, with the (slightly verbose) function cast_shadow_shift_label

library(tidyverse)
#> Loading tidyverse: ggplot2
#> Loading tidyverse: tibble
#> Loading tidyverse: tidyr
#> Loading tidyverse: readr
#> Loading tidyverse: purrr
#> Loading tidyverse: dplyr
#> Conflicts with tidy packages ----------------------------------------------
#> filter(): dplyr, stats
#> lag():    dplyr, stats
library(narnia)

# using cast is like transmute - it just casts shadow vars for the 
# variables of interest, this facilitates plotting and other summaries
aq_shift <- airquality %>% cast_shadow_shift_label(c("Ozone", "Solar.R"))

aq_shift
#> # A tibble: 153 x 7
#>    Ozone Solar.R Ozone_NA Solar.R_NA Ozone_shift Solar.R_shift any_missing
#>    <int>   <int>   <fctr>     <fctr>       <dbl>         <dbl>       <chr>
#>  1    41     190      !NA        !NA    41.00000     190.00000 Not Missing
#>  2    36     118      !NA        !NA    36.00000     118.00000 Not Missing
#>  3    12     149      !NA        !NA    12.00000     149.00000 Not Missing
#>  4    18     313      !NA        !NA    18.00000     313.00000 Not Missing
#>  5    NA      NA       NA         NA   -17.13731     -17.57498     Missing
#>  6    28      NA      !NA         NA    28.00000     -32.76235     Missing
#>  7    23     299      !NA        !NA    23.00000     299.00000 Not Missing
#>  8    19      99      !NA        !NA    19.00000      99.00000 Not Missing
#>  9     8      19      !NA        !NA     8.00000      19.00000 Not Missing
#> 10    NA     194       NA        !NA   -14.16446     194.00000     Missing
#> # ... with 143 more rows

ggplot(aq_shift,
       aes(x = Ozone_shift,
           y = Solar.R_shift,
           colour = any_missing)) + 
  geom_point()

from naniar.

njtierney commented on May 22, 2024

Done!

from naniar.

suggestion for `shadow_shift` and `label_missing` about naniar HOT 4 CLOSED

Comments (4)

Related Issues (20)

Recommend Projects

React

Vue.js

Typescript

TensorFlow

Django

Laravel

D3

Recommend Topics

javascript

web

server

Machine learning

Visualization

Game

Recommend Org

Facebook

Microsoft

Google

Alibaba

D3

Tencent