Tables
table1::table1() function ; descriptive statistics of those variables you want
ex) table1::table1 = taking table 1 function from table 1::
table1::table1(~Pregnancies + Age + BMI + BloodPressure + Glucose | Diabetes, data = Pima_diabetes)
yields:
the qwraps2::summary_table() is another way to create a Table 1
- table1::table1() only allows knitting to html
- to make a table using table1::table1() is generally faster than using qwraps2::summary_table()
- Custom-specification (i.e. to include IQR instead of Min., Max) is easier in qwraps2::summary_table().
*In header of R chunk, include results = "asis" ; otherwise, the table will not be printed as table, but as the commands underlying the formating of the table:
```{r, results = "asis"}
```
In the R chunk, after loading the qwraps2 package, generate the table in R Markdown format with
> options(qwraps2_markup = "markdown")
> our summary <-
+ list("Diabetes" =
+ list("yes" = ∼ qwraps2::n perc0(Diabetes == "yes"),
+ "no" = ∼ qwraps2::n perc0(Diabetes == "no")),
+ "Age" =
+ list("Median (IQR)" = ∼ qwraps2::median iqr(Age)),
+ "BMI" =
+ list("Mean (SD)" = ∼ qwraps2::mean sd(BMI))
+ )
Preparation: Load Pima diabetes dataset ”Pima diabetes” and transform Diabetes variable to factor with levels ”yes”, ”no”.
> library(qwraps2)
> options(qwraps2 markup = "markdown")
> our summary <-
+ list("Diabetes" =
+ list("yes" = ∼ qwraps2::n perc0(Diabetes == "yes"),
+ "no" = ∼ qwraps2::n perc0(Diabetes == "no")),
+ "Age" =
+ list("Median (IQR)" = ∼ qwraps2::median iqr(Age)),
+ "BMI" =
+ list("Mean (SD)" = ∼ qwraps2::mean sd(BMI)) +)
> summary table(Pima diabetes, our summary)
- In order to stratify the descriptive statistics in a table by a variable groupvariable, use the dplyr::group by() function:
- summary table(dplyr::group by(Pima diabetes, groupvariable), our summary)
our_summary3 <-
list("Number of pregnancies" =
list("0-1" = ~ qwraps2::n_perc0(Pregnancies < 2, show_symbol = TRUE, na_rm = TRUE),
"2-3" = ~ qwraps2::n_perc0(Pregnancies %in% 2:3, show_symbol = TRUE, na_rm = TRUE),
"4-6" = ~ qwraps2::n_perc0(Pregnancies %in% 4:6, show_symbol = TRUE, na_rm = TRUE),
"7-17" = ~ qwraps2::n_perc0(Pregnancies %in% 7:17, show_symbol = TRUE, na_rm = TRUE)),
"Age" =
list("Median (IQR)" = ~ median_iqr(Age)),
"BMI" =
list("Mean (SD)" = ~ qwraps2::mean_sd(BMI, denote_sd = "paren", na_rm = TRUE, show_n = "never")),
"Blood Pressure" =
list("Mean (SD)" = ~ qwraps2::mean_sd(BloodPressure, denote_sd = "paren", na_rm = TRUE, show_n = "never")),
"Glucose" =
list("Mean (SD)" = ~ qwraps2::mean_sd(Glucose, denote_sd = "paren", na_rm = TRUE, show_n = "never"))
)
table3 <- summary_table(dplyr::group_by(Pima_diabetes, Diabetes), our_summary3)
table3
# add column with total and format a bit:
table3_overall <- summary_table(Pima_diabetes, our_summary3)
table3_both <- cbind(table3_overall, table3)
print(table3_both,
rtitle = "Summary Statistics",
cnames = c("Overall \\ (N = 768)", "Diabetes \\ (N = 500)", "No Diabetes \\ (N = 268)"))
Plots:
”The” package to produce nice graphics in R.
Allows to generate many different plots and customize all aspects of the plot.
Online reference: e.g. https://ggplot2.tidyverse.org/reference/.
Book: https://github.com/hadley/ggplot2-book.
Cheatsheets:
https://github.com/rstudio/cheatsheets/blob/ master/data-visualization-2.1.pdf
> library(ggplot2)
> ggplot(data = Pima diabetes, mapping = aes(x =
+ Pregnancies)) + geom bar()
> # in short:
> ggplot(Pima diabetes, aes(Pregnancies)) + geom bar()
---> dataset, what is the x?, in which form it should be shown?
> ggplot(Pima diabetes, aes(Diabetes, BMI)) + + geom boxplot()
---> dataset, what is the x,y?, in which form it should be shown?
> ggplot(Pima diabetes, aes(BMI, Glucose, col = + Diabetes)) + geom point()
----> dataset, what is the x,y, color variable diabetes, in which form it should be shown?
many more examples:
install.packages("ggplot2")
library(ggplot2)
# Bar plot
ggplot(data = Pima_diabetes, mapping = aes(x = Pregnancies)) + geom_bar()
# same but in short:
ggplot(Pima_diabetes, aes(Pregnancies)) + geom_bar()
# Histogram
ggplot(Pima_diabetes, aes(BMI)) + geom_histogram()
# Histogram and smoothed kernel density estimate
ggplot(Pima_diabetes, aes(BMI, stat(density))) + geom_histogram() + geom_density(col = "red")
# Stratified smoothed kernel density estimate
ggplot(Pima_diabetes, aes(BMI, col = Diabetes)) + geom_density(position = "stack")
ggplot(Pima_diabetes, aes(BMI, fill = Diabetes)) + geom_density(position = "stack")
# Stratified boxplot
ggplot(Pima_diabetes, aes(Diabetes, BMI)) + geom_boxplot()
# Scatterplot
ggplot(Pima_diabetes, aes(BMI, Glucose)) + geom_point()
# Scatterplot using color for third variable
ggplot(Pima_diabetes, aes(BMI, Glucose, col = Diabetes)) + geom_point()
# A bit more fancy and complicated scatterplots:
ggplot(Pima_diabetes, aes(BMI, Glucose, col = Pregnancies)) +
geom_point() +
scale_colour_gradientn(colours = rainbow(4)) +
facet_wrap(~Diabetes)
Pima_diabetes$ManyPregnancies <- Pima_diabetes$Pregnancies>=4
Pima_diabetes$ManyPregnancies <- factor(Pima_diabetes$ManyPregnancies, labels = c("< 4 Pregnancies", ">= 4 Pregnancies"))
ggplot(Pima_diabetes, aes(BMI, Glucose, col = ManyPregnancies)) +
geom_point() +
facet_wrap(~Diabetes)
Pima_diabetes$ManyPregnancies <- Pima_diabetes$Pregnancies>=4
Pima_diabetes$ManyPregnancies <- factor(Pima_diabetes$ManyPregnancies, labels = c("< 4 Pregnancies", ">= 4 Pregnancies"))
ggplot(Pima_diabetes, aes(BMI, Glucose, col = ManyPregnancies)) +
geom_point() +
facet_wrap(~Diabetes)
The following two examples can only be knitted to html.
Interactive maps:
```{r}
# install.packages("leaflet")
library(leaflet)
m <- leaflet() %>% addTiles()
m %>%
# Central Park ==> with this, you can to spatial analysis ex) add one more variable such as noise level / sleep pattern, etc.
fitBounds(-73.9, 40.75, -73.95,40.8) %>%
addMeasure()
m %>%
# Berlin, Germany
fitBounds(13.76134, 52.675499, 13.0884, 52.33812) %>%
addMeasure(
position = "bottomleft",
primaryLengthUnit = "meters",
primaryAreaUnit = "sqmeters",
activeColor = "#3D535D",
completedColor = "#7D4479")
```
Interactive ggplot2 plots:
```{r}
# install.packages("plotly")
library(plotly)
plot_BMI_glu_diab <- ggplot(Pima_diabetes, aes(BMI, Glucose, col = Diabetes)) + geom_point()
plot_BMI_glu_diab_interactive <- ggplotly(plot_BMI_glu_diab)
plot_BMI_glu_diab_interactive
'석사과정' 카테고리의 다른 글
시험기간.. (0) | 2021.03.20 |
---|---|
[Statistical Analysis with R] Data Analysis (0) | 2021.03.02 |
[Statistical Analysis with R] Descriptive Analysis (0) | 2021.02.23 |
[Statistical Analysis with R] Data Manipulation (0) | 2021.02.16 |
[Statistical Analysis with R] R Markdown (0) | 2021.02.16 |
댓글