library(tidyverse) library(moments) library(knitr) df <- read_csv("Steps.csv") df <- df %>% mutate(Date = make_date(Year, Month, Day)) plot_scatter <- ggplot(df, aes(y = Steps, x = Date)) + geom_line(na.rm = TRUE) + geom_point(color = "red", na.rm = TRUE) + geom_point( data = subset(df, is.na(Steps)), aes(y = 0, x = Date), shape = 4, inherit.aes = FALSE ) + geom_hline(yintercept = 15000, linetype = "dotted", color = "blue", linewidth = 1.2) + geom_hline(yintercept = 25000, linetype = "dotted", color = "blue", linewidth = 1.2) if(interactive()) { print(plot_scatter) } plot_boxplot <- ggplot(df, aes(y = Steps)) + geom_boxplot(na.rm = TRUE) if(interactive()) { print(plot_boxplot) } plot_histogram <- ggplot(df, aes(x = Steps)) + geom_histogram(aes(y = after_stat(density)), binwidth = 1000, color = "white", alpha = 0.5, na.rm = TRUE) + geom_density(fill ="steelblue", color = "black", alpha = 0.5, na.rm = TRUE) if(interactive()) { print(plot_histogram) } mode_vec <- function(x, na.rm = TRUE) { # TODO: AI-generated code. Review it. if (na.rm) x <- x[!is.na(x)] ux <- unique(x) ux[which.max(tabulate(match(x, ux)))] } summary <- df %>% summarise( range = max(Steps, na.rm = TRUE) - min(Steps, na.rm=TRUE), min = min(Steps, na.rm = TRUE), max = max(Steps, na.rm = TRUE), mean = mean(Steps, na.rm = TRUE), median = median(Steps, na.rm = TRUE), mode = mode_vec(round(Steps, -3)), sd = sd(Steps, na.rm = TRUE), variance = var(Steps, na.rm = TRUE), cv = sd / mean * 100, kurtosis = kurtosis(Steps, na.rm = TRUE), p10 = quantile(Steps, 0.10, na.rm = TRUE), p90 = quantile(Steps, 0.90, na.rm = TRUE), Q1 = quantile(Steps, 0.25, na.rm = TRUE), Q2 = quantile(Steps, 0.50, na.rm = TRUE), Q3 = quantile(Steps, 0.75, na.rm = TRUE), IQR = IQR(Steps, na.rm = TRUE), cqv = IQR / median * 100, skewness = skewness(Steps, na.rm = TRUE), total = sum(Steps, na.rm = TRUE), #n = n(na.rm = TRUE), n = sum(!is.na(Steps)), above10000 = sum(Steps > 10000, na.rm = TRUE) / n * 100, above15000 = sum(Steps > 15000, na.rm = TRUE) / n * 100, above20000 = sum(Steps > 20000, na.rm = TRUE) / n * 100, above25000 = sum(Steps > 25000, na.rm = TRUE) / n * 100, above30000 = sum(Steps > 30000, na.rm = TRUE) / n * 100, ) options(pillar.sigfig = 4) central_tendency <- summary %>% select(mean, median, mode) variability <- summary %>% select(range, min, max, sd, variance, cv) shape <- summary %>% select(skewness, kurtosis) percentiles <- summary %>% select(p10, Q1, Q2, Q3, p90, IQR, cqv) overall <- summary %>% select(total, n) above <- summary %>% select(above10000, above15000, above20000, above25000, above30000) if(interactive()) { print(kable(central_tendency, digits = 2, caption = "Central Tendency")) print(kable(variability, digits = 2, caption = "Variability")) print(kable(shape, digits = 2, caption = "Distribution Shape")) print(kable(percentiles, digits = 2, caption = "Percentiles & Quartiles")) print(kable(overall, digits = 0, caption = "Overall Metrics")) print(kable(above, digits = 2, caption = "Percent above reference points")) }