본문 바로가기
석사과정

[Statistical Analysis with R] Advanced Tables & Plots

by JANIMUN 2021. 3. 2.

Tables 

table1::table1() function ; descriptive statistics of those variables you want

ex) table1::table1 = taking table 1 function from table 1::

      table1::table1(~Pregnancies + Age + BMI + BloodPressure + Glucose | Diabetes, data = Pima_diabetes)

       yields: 

the qwraps2::summary_table() is another way to create a Table 1

  • table1::table1() only allows knitting to html
  • to make a table using table1::table1() is generally faster than using qwraps2::summary_table()
  • Custom-specification (i.e. to include IQR instead of Min., Max) is easier in qwraps2::summary_table(). 

*In header of R chunk, include results = "asis" ; otherwise, the table will not be printed as table, but as the commands underlying the formating of the table: 

```{r, results = "asis"}

```

In the R chunk, after loading the qwraps2 package, generate the table in R Markdown format with

> options(qwraps2_markup = "markdown")

 

>  our summary <-
+ list("Diabetes" =

+  list("yes" = qwraps2::n perc0(Diabetes == "yes"),

+  "no" = qwraps2::n perc0(Diabetes == "no")),

+  "Age" =

+  list("Median (IQR)" = qwraps2::median iqr(Age)),

+  "BMI" =

+  list("Mean (SD)" = qwraps2::mean sd(BMI))

+   )

Preparation: Load Pima diabetes dataset ”Pima diabetes” and transform Diabetes variable to factor with levels ”yes”, ”no”.

> library(qwraps2)

 

>  options(qwraps2 markup = "markdown")

 

>  our summary <-
+ list("Diabetes" =

+  list("yes" = qwraps2::n perc0(Diabetes == "yes"),

+  "no" = qwraps2::n perc0(Diabetes == "no")),

+  "Age" =

+  list("Median (IQR)" = qwraps2::median iqr(Age)),

+  "BMI" =

+  list("Mean (SD)" = qwraps2::mean sd(BMI)) +)

> summary table(Pima diabetes, our summary)

 

  • In order to stratify the descriptive statistics in a table by a variable groupvariable, use the dplyr::group by() function:
  • summary table(dplyr::group by(Pima diabetes, groupvariable), our summary)

our_summary3 <-
  list("Number of pregnancies" =
       list("0-1" = ~ qwraps2::n_perc0(Pregnancies < 2, show_symbol = TRUE, na_rm = TRUE),
            "2-3"  = ~ qwraps2::n_perc0(Pregnancies %in% 2:3, show_symbol = TRUE, na_rm = TRUE),
            "4-6"  = ~ qwraps2::n_perc0(Pregnancies %in% 4:6, show_symbol = TRUE, na_rm = TRUE),
            "7-17"  = ~ qwraps2::n_perc0(Pregnancies %in% 7:17, show_symbol = TRUE, na_rm = TRUE)),
       "Age" =
       list("Median (IQR)" = ~ median_iqr(Age)),
       "BMI" =
       list("Mean (SD)" = ~ qwraps2::mean_sd(BMI, denote_sd = "paren", na_rm = TRUE, show_n = "never")),
       "Blood Pressure" =
       list("Mean (SD)" = ~ qwraps2::mean_sd(BloodPressure, denote_sd = "paren", na_rm = TRUE, show_n = "never")),
       "Glucose" =
       list("Mean (SD)" = ~ qwraps2::mean_sd(Glucose, denote_sd = "paren", na_rm = TRUE, show_n = "never"))
       )
table3 <- summary_table(dplyr::group_by(Pima_diabetes, Diabetes), our_summary3)
table3

 



# add column with total and format a bit:
table3_overall <- summary_table(Pima_diabetes, our_summary3)
table3_both <- cbind(table3_overall, table3)

print(table3_both,
      rtitle = "Summary Statistics",
      cnames = c("Overall \\ (N = 768)", "Diabetes \\ (N = 500)", "No Diabetes \\ (N = 268)"))

 

Plots:

”The” package to produce nice graphics in R.

Allows to generate many different plots and customize all aspects of the plot.

Online reference: e.g. https://ggplot2.tidyverse.org/reference/.

Book: https://github.com/hadley/ggplot2-book.

Cheatsheets:

https://github.com/rstudio/cheatsheets/blob/ master/data-visualization-2.1.pdf

 

> library(ggplot2)
> ggplot(data = Pima diabetes, mapping = aes(x =

+ Pregnancies)) + geom bar()


> # in short:
> ggplot(Pima diabetes, aes(Pregnancies)) + geom bar()

 ---> dataset, what is the x?, in which form it should be shown?

 

> ggplot(Pima diabetes, aes(Diabetes, BMI)) + + geom boxplot()

 ---> dataset, what is the x,y?, in which form it should be shown?

 

> ggplot(Pima diabetes, aes(BMI, Glucose, col = + Diabetes)) + geom point()

----> dataset, what is the x,y, color variable diabetes, in which form it should be shown?

 

many more examples:

install.packages("ggplot2")
library(ggplot2)

# Bar plot
ggplot(data = Pima_diabetes, mapping = aes(x = Pregnancies)) + geom_bar()
# same but in short:
ggplot(Pima_diabetes, aes(Pregnancies)) + geom_bar()

# Histogram
ggplot(Pima_diabetes, aes(BMI)) + geom_histogram()

# Histogram and smoothed kernel density estimate
ggplot(Pima_diabetes, aes(BMI, stat(density))) + geom_histogram() + geom_density(col = "red")

# Stratified smoothed kernel density estimate
ggplot(Pima_diabetes, aes(BMI, col = Diabetes)) + geom_density(position = "stack")
ggplot(Pima_diabetes, aes(BMI, fill = Diabetes)) + geom_density(position = "stack")

# Stratified boxplot
ggplot(Pima_diabetes, aes(Diabetes, BMI)) + geom_boxplot()

# Scatterplot
ggplot(Pima_diabetes, aes(BMI, Glucose)) + geom_point()

# Scatterplot using color for third variable
ggplot(Pima_diabetes, aes(BMI, Glucose, col = Diabetes)) + geom_point()

# A bit more fancy and complicated scatterplots:
ggplot(Pima_diabetes, aes(BMI, Glucose, col = Pregnancies)) + 
  geom_point() + 
  scale_colour_gradientn(colours = rainbow(4)) +
  facet_wrap(~Diabetes)

Pima_diabetes$ManyPregnancies <- Pima_diabetes$Pregnancies>=4
Pima_diabetes$ManyPregnancies <- factor(Pima_diabetes$ManyPregnancies, labels = c("< 4 Pregnancies", ">= 4 Pregnancies"))
ggplot(Pima_diabetes, aes(BMI, Glucose, col = ManyPregnancies)) + 
  geom_point() +
  facet_wrap(~Diabetes)

 

Pima_diabetes$ManyPregnancies <- Pima_diabetes$Pregnancies>=4
Pima_diabetes$ManyPregnancies <- factor(Pima_diabetes$ManyPregnancies, labels = c("< 4 Pregnancies", ">= 4 Pregnancies"))
ggplot(Pima_diabetes, aes(BMI, Glucose, col = ManyPregnancies)) + 
  geom_point() +
  facet_wrap(~Diabetes)

 

The following two examples can only be knitted to html.
Interactive maps: 
```{r}
# install.packages("leaflet")
library(leaflet)

m <- leaflet() %>% addTiles()

 

m %>%  

# Central Park ==> with this, you can to spatial analysis ex) add one more variable such as noise level / sleep pattern, etc.
  fitBounds(-73.9, 40.75, -73.95,40.8) %>%
  addMeasure()

m %>%
  # Berlin, Germany
  fitBounds(13.76134, 52.675499, 13.0884, 52.33812) %>%
  addMeasure(
    position = "bottomleft",
    primaryLengthUnit = "meters",
    primaryAreaUnit = "sqmeters",
    activeColor = "#3D535D",
    completedColor = "#7D4479")
```

Interactive ggplot2 plots:

```{r}
# install.packages("plotly")
library(plotly)

plot_BMI_glu_diab <- ggplot(Pima_diabetes, aes(BMI, Glucose, col = Diabetes)) + geom_point()
plot_BMI_glu_diab_interactive <- ggplotly(plot_BMI_glu_diab)

plot_BMI_glu_diab_interactive

 

 

 

댓글