6.1 Setup

Show the code
source("_common.r")
Show the code
tar_load(c(
  course_and_uni_per_visit,
  n_action,
  n_action_type,
  n_action_w_date,
  time_visit_wday,
  time_visit_wday_fingerprint,
  data_separated_filtered_date_uni_course,
  n_action_fingerprint,
  n_action_w_date_fingerprint,
  n_action_lt_500,
  n_action_lt_500_fingerprint
))

6.2 Aktionen pro idvisit/fingerprint - Mit den 499er-Daten

Show the code
n_action_lt_500 <-
  n_action |>
  filter(nr_max != 499)

n_action_lt_500 |>
  describe_distribution(nr_max) |>
  gt() |>
  fmt_number(columns = where(is.numeric), decimals = 2)
Variable Mean SD IQR Min Max Skewness Kurtosis n n_Missing
nr_max 61.88 88.53 77.00 1.00 496.00 2.27 5.47 13,626.00 0.00
Show the code
n_action_lt_500_fingerprint_unique <-
  n_action_fingerprint |>
  filter(nr_max != 499) |>
  distinct(fingerprint, .keep_all = TRUE)

n_action_lt_500_fingerprint_unique |>
  describe_distribution(nr_max) |>
  gt() |>
  fmt_number(columns = where(is.numeric), decimals = 2)
Variable Mean SD IQR Min Max Skewness Kurtosis n n_Missing
nr_max 75.78 99.73 100.00 1.00 496.00 1.88 3.31 6,771.00 0.00

6.2.1 idvisit

Show the code
n_action_avg = mean(n_action$nr_max) |> round(0)
n_action_median = median(n_action$nr_max) |> round(0)
n_action_sd = sd(n_action$nr_max) |> round(0)
n_action_iqr = IQR(n_action$nr_max) |> round(0)

n_action |>
  ggplot() +
  geom_histogram(aes(x = nr_max)) +
  labs(
    x = "Anzahl von Aktionen pro Visit",
    y = "n",
    caption = "Der vertikale Strich zeigt den Mittelwert; der horizontale MW±SD"
  ) +
  theme_minimal() +
  geom_vline(xintercept = n_action_avg, color = palette_okabe_ito()[1]) +
  geom_segment(
    x = n_action_avg - n_action_sd,
    y = 0,
    xend = n_action_avg + n_action_sd,
    yend = 0,
    color = palette_okabe_ito()[2],
    size = 2
  ) +
  annotate(
    "label",
    x = n_action_avg,
    y = 1500,
    label = paste0("MW = ", n_action_avg)
  ) +
  annotate(
    "label",
    x = n_action_avg + n_action_sd,
    y = 0,
    label = paste0("SD = ", n_action_sd)
  )

Show the code
#geom_label(aes(x = n_action_avg), y = 1, label = "Mean")

n_action |>
  ggplot() +
  geom_histogram(aes(x = nr_max)) +
  labs(
    x = "Anzahl von Aktionen pro Visit",
    y = "n",
    caption = "Der vertikale Strich zeigt den Median; der horizontale Median±IQR"
  ) +
  theme_minimal() +
  geom_vline(xintercept = n_action_median, color = palette_okabe_ito()[1]) +
  geom_segment(
    x = n_action_median - n_action_iqr,
    y = 0,
    xend = n_action_median + n_action_iqr,
    yend = 0,
    color = palette_okabe_ito()[2],
    size = 2
  ) +
  annotate(
    "label",
    x = n_action_median,
    y = 1500,
    label = paste0("Md = ", n_action_median)
  ) +
  annotate(
    "label",
    x = n_action_median + n_action_iqr,
    y = 0,
    label = paste0("IQR = ", n_action_iqr)
  )

Show the code
#geom_label(aes(x = n_action_avg), y = 1, label = "Mean")
  • Mittelwert der Aktionen pro Visit: 80.
  • SD der Aktionen pro Visit: 123.
  • MD: 27.
  • IQR: : 88.

6.2.2 fingerprint

Show the code
n_action_fingerprint_avg = mean(n_action_fingerprint$nr_max) |> round(0)
n_action_fingerprint_median = median(n_action_fingerprint$nr_max) |> round(0)
n_action_fingerprint_sd = sd(n_action_fingerprint$nr_max) |> round(0)
n_action_fingerprint_iqr = IQR(n_action_fingerprint$nr_max) |> round(0)

n_action_fingerprint |>
  ggplot() +
  geom_histogram(aes(x = nr_max)) +
  labs(
    x = "Anzahl von Aktionen pro Visit",
    y = "n",
    caption = "Der vertikale Strich zeigt den Mittelwert; der horizontale MW±SD"
  ) +
  theme_minimal() +
  geom_vline(
    xintercept = n_action_fingerprint_avg,
    color = palette_okabe_ito()[1]
  ) +
  geom_segment(
    x = n_action_fingerprint_avg - n_action_fingerprint_sd,
    y = 0,
    xend = n_action_fingerprint_avg + n_action_fingerprint_sd,
    yend = 0,
    color = palette_okabe_ito()[2],
    size = 2
  ) +
  annotate(
    "label",
    x = n_action_fingerprint_avg,
    y = 1500,
    label = paste0("MW = ", n_action_fingerprint_avg)
  ) +
  annotate(
    "label",
    x = n_action_fingerprint_avg + n_action_fingerprint_sd,
    y = 0,
    label = paste0("SD = ", n_action_fingerprint_sd)
  )

Show the code
#geom_label(aes(x = n_action_fingerprint_avg), y = 1, label = "Mean")

n_action_fingerprint |>
  ggplot() +
  geom_histogram(aes(x = nr_max)) +
  labs(
    x = "Anzahl von Aktionen pro Visit",
    y = "n",
    caption = "Der vertikale Strich zeigt den Median; der horizontale Median±IQR"
  ) +
  theme_minimal() +
  geom_vline(
    xintercept = n_action_fingerprint_median,
    color = palette_okabe_ito()[1]
  ) +
  geom_segment(
    x = n_action_fingerprint_median - n_action_fingerprint_iqr,
    y = 0,
    xend = n_action_fingerprint_median + n_action_fingerprint_iqr,
    yend = 0,
    color = palette_okabe_ito()[2],
    size = 2
  ) +
  annotate(
    "label",
    x = n_action_fingerprint_median,
    y = 1500,
    label = paste0("Md = ", n_action_fingerprint_median)
  ) +
  annotate(
    "label",
    x = n_action_fingerprint_median + n_action_fingerprint_iqr,
    y = 0,
    label = paste0("IQR = ", n_action_fingerprint_iqr)
  )

Show the code
#geom_label(aes(x = n_action_fingerprint_avg), y = 1, label = "Mean")

6.3 Ohne 499er-Daten

6.3.1 idvisit

Show the code
n_action_avg2 = mean(n_action_lt_500$nr_max) |> round(0)
n_action_sd2 = sd(n_action_lt_500$nr_max) |> round(2)

n_action_lt_500 |>
  ggplot() +
  geom_histogram(aes(x = nr_max)) +
  labs(
    x = "Anzahl von Aktionen pro Visit",
    y = "n",
    title = "Verteilung der User-Aktionen pro Visit",
    caption = "Der vertikale Strich zeigt den Mittelwert; der horizontale die SD"
  ) +
  theme_minimal() +
  geom_vline(xintercept = n_action_avg2, color = palette_okabe_ito()[1]) +
  geom_segment(
    x = n_action_avg - n_action_sd2,
    y = 0,
    xend = n_action_avg2 + n_action_sd2,
    yend = 0,
    color = palette_okabe_ito()[2],
    size = 2
  ) +
  annotate(
    "label",
    x = n_action_avg2,
    y = 1500,
    label = paste0("MW = ", n_action_avg2)
  ) +
  annotate(
    "label",
    x = n_action_avg2 + n_action_sd2,
    y = 0,
    label = paste0("SD = ", n_action_sd2)
  )

Show the code
#geom_label(aes(x = n_action_avg), y = 1, label = "Mean")
  • Mittelwert der Aktionen pro Visit: 62.
  • SD der Aktionen pro Visit: 88.53.

6.3.2 fingerprint unique

Show the code
n_action_fingerprint_avg2 = mean(n_action_lt_500_fingerprint_unique$nr_max) |>
  round(0)
n_action_fingerprint_sd2 = sd(n_action_lt_500_fingerprint_unique$nr_max) |>
  round(2)

n_action_lt_500_fingerprint_unique |>
  ggplot() +
  geom_histogram(aes(x = nr_max)) +
  labs(
    x = "Anzahl von Aktionen pro Visit",
    y = "n",
    title = "Verteilung der User-Aktionen pro Visit",
    caption = "Der vertikale Strich zeigt den Mittelwert; der horizontale die SD"
  ) +
  theme_minimal() +
  geom_vline(
    xintercept = n_action_fingerprint_avg2,
    color = palette_okabe_ito()[1]
  ) +
  geom_segment(
    x = n_action_fingerprint_avg - n_action_fingerprint_sd2,
    y = 0,
    xend = n_action_fingerprint_avg2 + n_action_fingerprint_sd2,
    yend = 0,
    color = palette_okabe_ito()[2],
    size = 2
  ) +
  annotate(
    "label",
    x = n_action_fingerprint_avg2,
    y = 1500,
    label = paste0("MW = ", n_action_fingerprint_avg2)
  ) +
  annotate(
    "label",
    x = n_action_fingerprint_avg2 + n_action_fingerprint_sd2,
    y = 0,
    label = paste0("SD = ", n_action_fingerprint_sd2)
  )

Show the code
#geom_label(aes(x = n_action_avg), y = 1, label = "Mean")

6.4 Anzahl Aktionen im Zeitverlauf

6.4.1 Monat

6.4.1.1 idvisit

Show the code
n_action_w_date |>
  ggplot(aes(x = month_date, y = nr_max)) +
  stat_summary(fun = mean, geom = "point", size = 2) +
  stat_summary(
    fun.data = mean_sdl,
    fun.args = list(mult = 1),
    geom = "errorbar",
    width = 0.2
  ) +
  geom_smooth(method = "lm") +
  labs(title = "The number of actions per visit has incresed over time")

Show the code
n_action_w_date |>
  ggplot(aes(x = month_date, y = nr_max)) +
  geom_jitter(alpha = .1)

6.4.1.2 fingerprint unique

Show the code
n_action_w_date_fingerprint_unique <-
  n_action_w_date_fingerprint |>
  distinct(fingerprint, .keep_all = TRUE)

rect_data <- comp_semester_rects(
  n_action_w_date_fingerprint_unique,
  col_date = "month_date"
)

n_action_w_date_fingerprint_unique |>
  ggplot(aes(x = month_date, y = nr_max)) +
  stat_summary(fun = mean, geom = "point", size = 2) +
  stat_summary(
    fun.data = mean_sdl,
    fun.args = list(mult = 1),
    geom = "errorbar",
    width = 0.2
  ) +
  geom_smooth(method = "lm") +
  labs(title = "The number of actions per visit has incresed over time") +
  geom_rect(
    data = rect_data,
    aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax),
    fill = "grey",
    alpha = 0.2,
    inherit.aes = FALSE # Essential to use the rect_data columns
  )

Show the code
n_action_w_date_fingerprint_unique |>
  ggplot(aes(x = month_date, y = nr_max)) +
  geom_jitter(alpha = .1)

6.4.2 Regression (Monat)

6.4.2.1 idvisit

Show the code
lm(nr_max ~ month_date, data = n_action_w_date)

Call:
lm(formula = nr_max ~ month_date, data = n_action_w_date)

Coefficients:
(Intercept)   month_date  
 -5.956e+02    3.937e-07  

6.4.2.2 fingerprint

Show the code
lm(nr_max ~ month_date, data = n_action_w_date_fingerprint)

Call:
lm(formula = nr_max ~ month_date, data = n_action_w_date_fingerprint)

Coefficients:
(Intercept)   month_date  
 -1.186e+03    7.503e-07  

6.4.2.3 fingerprint unique

Show the code
lm(nr_max ~ month_date, data = n_action_w_date_fingerprint_unique)

Call:
lm(formula = nr_max ~ month_date, data = n_action_w_date_fingerprint_unique)

Coefficients:
(Intercept)   month_date  
 -1.186e+03    7.503e-07  

6.4.3 Woche

6.4.3.1 idvisit

Show the code
n_action_w_date |>
  mutate(week_date = as.Date(week_date)) |>
  ggplot(aes(x = week_date, y = nr_max)) +
  stat_summary(fun = mean, geom = "point", size = 2) +
  stat_summary(fun.data = mean_sdl, geom = "errorbar", width = 0.2) +
  geom_smooth(method = "lm") +
  labs(title = "The number of actions per visit has incresed over time")

6.4.3.2 fingerprint

Show the code
n_action_w_date_fingerprint |>
  mutate(week_date = as.Date(week_date)) |>
  ggplot(aes(x = week_date, y = nr_max)) +
  stat_summary(fun = mean, geom = "point", size = 2) +
  stat_summary(fun.data = mean_sdl, geom = "errorbar", width = 0.2) +
  geom_smooth(method = "lm") +
  labs(title = "The number of actions per fingerprint has incresed over time") +
  geom_rect(
    data = rect_data,
    aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax),
    fill = "grey",
    alpha = 0.2,
    inherit.aes = FALSE # Essential to use the rect_data columns
  )

6.4.3.3 fingerprint unique

Show the code
n_action_w_date_fingerprint_unique <-
  n_action_w_date_fingerprint |>
  distinct(fingerprint, .keep_all = TRUE)

n_action_w_date_fingerprint_unique |>
  mutate(week_date = as.Date(week_date)) |>
  ggplot(aes(x = week_date, y = nr_max)) +
  stat_summary(fun = mean, geom = "point", size = 2) +
  stat_summary(fun.data = mean_sdl, geom = "errorbar", width = 0.2) +
  geom_smooth(method = "lm") +
  labs(title = "The number of actions per fingerprint has incresed over time") +
  geom_rect(
    data = rect_data,
    aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax),
    fill = "grey",
    alpha = 0.2,
    inherit.aes = FALSE # Essential to use the rect_data columns
  )

6.4.4 Regression (Woche)

6.4.4.1 idvisit

Show the code
lm(nr_max ~ week_date, data = n_action_w_date)

Call:
lm(formula = nr_max ~ week_date, data = n_action_w_date)

Coefficients:
(Intercept)    week_date  
  -5.93e+02     3.92e-07  

6.4.4.2 fingerprint

Show the code
lm(nr_max ~ week_date, data = n_action_w_date_fingerprint)

Call:
lm(formula = nr_max ~ week_date, data = n_action_w_date_fingerprint)

Coefficients:
(Intercept)    week_date  
 -1.178e+03    7.453e-07  

6.5 fingerprint unqiue

Show the code
lm(nr_max ~ week_date, data = n_action_w_date_fingerprint_unique)

Call:
lm(formula = nr_max ~ week_date, data = n_action_w_date_fingerprint_unique)

Coefficients:
(Intercept)    week_date  
 -1.178e+03    7.453e-07  

6.6 Gruppierung der Visits/fingerprints nach Anzahl der Aktionen

6.6.1 idvisit

Show the code
n_action_lt_500 <-
  n_action_lt_500 |>
  mutate(
    n_actions_type = case_when(
      nr_max < 30 ~ "glimpser",
      nr_max < 300 ~ "serious user",
      TRUE ~ "heavy user"
    )
  )
Show the code
n_action_lt_500 |>
  count(n_actions_type) |>
  gt()
n_actions_type n
glimpser 7388
heavy user 465
serious user 5773
Show the code
ggplot(n_action_lt_500) +
  aes(x = n_actions_type) +
  geom_bar()

6.6.1.1 fingerprint

Show the code
n_action_lt_500_fingerprint <-
  n_action_lt_500_fingerprint |>
  mutate(
    n_actions_type = case_when(
      nr_max < 30 ~ "glimpser",
      nr_max < 300 ~ "serious user",
      TRUE ~ "heavy user"
    )
  )
Show the code
n_action_lt_500_fingerprint |>
  count(n_actions_type) |>
  gt()
n_actions_type n
glimpser 3269
heavy user 334
serious user 3168
Show the code
ggplot(n_action_lt_500_fingerprint) +
  aes(x = n_actions_type) +
  geom_bar()

6.6.1.2 fingerprint unique

Show the code
n_action_lt_500_fingerprint_unique <-
  n_action_lt_500_fingerprint_unique |>
  mutate(
    n_actions_type = case_when(
      nr_max < 30 ~ "glimpser",
      nr_max < 300 ~ "serious user",
      TRUE ~ "heavy user"
    )
  )
Show the code
n_action_lt_500_fingerprint_unique |>
  count(n_actions_type) |>
  gt()
n_actions_type n
glimpser 3269
heavy user 334
serious user 3168
Show the code
ggplot(n_action_lt_500_fingerprint_unique) +
  aes(x = n_actions_type) +
  geom_bar()

6.7 Gruppierung der Visits im Zeitverlauf

6.7.1 idvisit

6.7.1.1 Absolutzahlen

Show the code
n_action_w_date |>
  group_by(month_date) |>
  count(nr_max) |>
  mutate(
    n_actions_type = case_when(
      nr_max < 30 ~ "glimpser",
      nr_max < 300 ~ "serious user",
      TRUE ~ "heavy user"
    )
  ) |>
  count(n_actions_type) |>
  ggplot(aes(
    x = month_date,
    y = n,
    color = n_actions_type,
    group = n_actions_type
  )) +
  geom_point() +
  geom_line() +
  geom_rect(
    data = rect_data,
    aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax),
    fill = "grey",
    alpha = 0.2,
    inherit.aes = FALSE # Essential to use the rect_data columns
  )

6.7.1.2 Relative Anteile

Show the code
library(data.table) # Ensure data.table is loaded if not already
library(ggplot2)
library(scales) # Needed for label_percent()

n_action_w_date |>
  group_by(month_date) |>
  count(nr_max) |>
  mutate(
    n_actions_type = case_when(
      nr_max < 30 ~ "glimpser",
      nr_max < 300 ~ "serious user",
      TRUE ~ "heavy user"
    )
  ) |>
  # 1. Count the number of users by month and type
  count(month_date, n_actions_type) |>
  # 2. Group ONLY by month_date
  group_by(month_date) |>
  # 3. Calculate the proportion (relative frequency)
  mutate(
    proportion = n / sum(n)
  ) |>
  # 4. Create the stacked area chart
  ggplot(aes(
    x = month_date,
    y = proportion,
    fill = n_actions_type # Use 'fill' for stacking
  )) +
  # Use geom_area() with position="fill" to stack and normalize to 100%
  geom_area(position = "fill") +

  # Optional: Customize the y-axis to show percentages
  scale_y_continuous(labels = scales::label_percent()) +

  # Optional: Add clear labels
  labs(
    y = "Proportion of Users",
    fill = "User Type",
    title = "User Type Distribution Over Time"
  )

6.7.2 fingerprint

6.7.2.1 Absolutzahlen

Show the code
n_action_w_date_fingerprint |>
  group_by(month_date) |>
  count(nr_max) |>
  mutate(
    n_actions_type = case_when(
      nr_max < 30 ~ "glimpser",
      nr_max < 300 ~ "serious user",
      TRUE ~ "heavy user"
    )
  ) |>
  count(n_actions_type) |>
  ggplot(aes(
    x = month_date,
    y = n,
    color = n_actions_type,
    group = n_actions_type
  )) +
  geom_point() +
  geom_line() +
  geom_rect(
    data = rect_data,
    aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax),
    fill = "grey",
    alpha = 0.2,
    inherit.aes = FALSE # Essential to use the rect_data columns
  )

6.7.3 fingerprint unique

Show the code
n_action_w_date_fingerprint_unique |>
  group_by(month_date) |>
  count(nr_max) |>
  mutate(
    n_actions_type = case_when(
      nr_max < 30 ~ "glimpser",
      nr_max < 300 ~ "serious user",
      TRUE ~ "heavy user"
    )
  ) |>
  count(n_actions_type) |>
  ggplot(aes(
    x = month_date,
    y = n,
    color = n_actions_type,
    group = n_actions_type
  )) +
  geom_point() +
  geom_line() +
  geom_rect(
    data = rect_data,
    aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax),
    fill = "grey",
    alpha = 0.2,
    inherit.aes = FALSE # Essential to use the rect_data columns
  )

6.7.3.1 Relative Anteile

Show the code
library(data.table) # Ensure data.table is loaded if not already
library(ggplot2)
library(scales) # Needed for label_percent()

n_action_w_date_fingerprint_unique |>
  group_by(month_date) |>
  count(nr_max) |>
  mutate(
    n_actions_type = case_when(
      nr_max < 30 ~ "glimpser",
      nr_max < 300 ~ "serious user",
      TRUE ~ "heavy user"
    )
  ) |>
  # 1. Count the number of users by month and type
  count(month_date, n_actions_type) |>
  # 2. Group ONLY by month_date
  group_by(month_date) |>
  # 3. Calculate the proportion (relative frequency)
  mutate(
    proportion = n / sum(n)
  ) |>
  # 4. Create the stacked area chart
  ggplot(aes(
    x = month_date,
    y = proportion,
    fill = n_actions_type # Use 'fill' for stacking
  )) +
  # Use geom_area() with position="fill" to stack and normalize to 100%
  geom_area(position = "fill") +

  # Optional: Customize the y-axis to show percentages
  scale_y_continuous(labels = scales::label_percent()) +

  # Optional: Add clear labels
  labs(
    y = "Proportion of Users",
    fill = "User Type",
    title = "User Type Distribution Over Time"
  )