
Transform every single case of a single case data frame
Source:R/transform-helper-functions.R, R/transform.scdf.R
transform.scdf.RdTakes an scdf and applies transformations to each individual case. This is useful to calculate or modify new variables.
Usage
moving_median(x, lag = 1)
moving_mean(x, lag = 1)
local_regression(x, mt = 1:length(x), f = 0.2)
set_na_at(x, first_of, positions = 0)
center_at(x, at = TRUE, shift = 0, part = 0)
first_of(x, positions = 0)
across_cases(...)
all_cases(...)
rowwise(...)
# S3 method for class 'scdf'
transform(`_data`, ...)Arguments
- x
A logical vector.
- lag
Number of values surrounding a value to calculate the average
- mt
A vector with measurement times.
- f
the proportion of surrounding data influencing each data point.
- first_of
A logical vector
- positions
A numeric vector with relative positions to the first appearance of a TRUE value in x.
- at
A logical vector. E.g.
phase == "A". The first TRUE value of that vector is the target position for centring. By default, this is the first position.- shift
A value indicating a shift in measurement times for centring. E.g.
shift = 4will centre four measurement-times after the position defined by theatandpartarguments.- part
A numeric value between 0 and 1.
0refers to the firstTRUEin theatvector,1to the last, and0.5to the midpoint of the sequence ofTRUEvalues. E.g. if you want to centre at the middle of phase A, setat = phase == A, part = 0.5. Note: decimals are rounded to integers.- ...
Expressions.
- _data
An scdf.
Details
This function is a method of the generic transform() function.
Unlike the generic version, expressions are evaluated serially:
the result of one expression is used as the basis for subsequent computations.
Several helper functions can be used inside expressions:
n(): returns the number of measurements in a case.all_cases(): extracts the values of a variable across all cases. Takes an expression as argument. For example:mean(all_cases(values))calculates the mean ofvaluesacross all cases.mean(all_cases(values[phase == "A"]))calculates the mean of all values wherephase == "A".
rowwise(): applies a calculation separately to each row. Example:rowwise(sum(values, mt, na.rm = TRUE)).across_cases(): creates new variables or replaces existing ones across all cases. Example:across_cases(values_ranked = rank(values, na.last = "keep"))
See also
Other data manipulation functions:
add_l2(),
as.data.frame.scdf(),
as_scdf(),
fill_missing(),
outlier(),
ranks(),
rescale(),
scdf(),
select_cases(),
set_vars(),
shift(),
smooth_cases(),
standardize(),
truncate_phase()
Examples
## Creates a single-case with frequency distributions. The proportion and
## percentage of the frequencies are calculated with transform:
design <- design(
n = 3,
level = 5,
distribution = "binomial",
n_trials = 20,
start_value = 0.5
)
study <- random_scdf(design)
transform(study, proportion = values/trials, percentage = proportion * 100)
#> #A single-case data frame with three cases
#>
#> [case #1]: phase values mt trials proportion percentage
#> A 12 1 20 0.6 60
#> A 13 2 20 0.65 65
#> A 11 3 20 0.55 55
#> A 9 4 20 0.45 45
#> A 9 5 20 0.45 45
#> B 20 6 20 1 100
#> B 20 7 20 1 100
#> B 20 8 20 1 100
#> B 20 9 20 1 100
#> B 20 10 20 1 100
#> B 20 11 20 1 100
#> B 20 12 20 1 100
#> B 20 13 20 1 100
#> B 20 14 20 1 100
#> B 20 15 20 1 100
#> # ... up to five more rows
#> # two more cases
## Z standardize the dependent variable and add two new variables:
exampleAB |>
transform(
values = scale(values),
mean_values = mean(values),
sd_values = sd(values)
)
#> #A single-case data frame with three cases
#>
#> Johanna: values mt phase mean_values sd_values
#> -1.31 1 A 0 1
#> -1.4 2 A 0 1
#> -1.14 3 A 0 1
#> -0.97 4 A 0 1
#> -1.48 5 A 0 1
#> -0.71 6 B 0 1
#> -0.62 7 B 0 1
#> 0.15 8 B 0 1
#> -0.28 9 B 0 1
#> -0.45 10 B 0 1
#> 0.75 11 B 0 1
#> 0.06 12 B 0 1
#> 0.41 13 B 0 1
#> 1.1 14 B 0 1
#> 0.67 15 B 0 1
#> # ... up to five more rows
#> # two more cases
## Use `all` to calculate global variables.
exampleAB |>
transform(
values_center_case = values - mean(values[phase == "A"]),
values_center_global = values - mean(all(values[phase == "A"])),
value_dif = values_center_case - values_center_global
)
#> #A single-case data frame with three cases
#>
#> Johanna: values mt phase values_center_case values_center_global value_dif
#> 54 1 A -0.6 0.67 -1.27
#> 53 2 A -1.6 -0.33 -1.27
#> 56 3 A 1.4 2.67 -1.27
#> 58 4 A 3.4 4.67 -1.27
#> 52 5 A -2.6 -1.33 -1.27
#> 61 6 B 6.4 7.67 -1.27
#> 62 7 B 7.4 8.67 -1.27
#> 71 8 B 16.4 17.67 -1.27
#> 66 9 B 11.4 12.67 -1.27
#> 64 10 B 9.4 10.67 -1.27
#> 78 11 B 23.4 24.67 -1.27
#> 70 12 B 15.4 16.67 -1.27
#> 74 13 B 19.4 20.67 -1.27
#> 82 14 B 27.4 28.67 -1.27
#> 77 15 B 22.4 23.67 -1.27
#> # ... up to five more rows
#> # two more cases
## Use `across_cases` to calculate or replace a variable with values from
## all cases. E.g., standardize the dependent variable:
exampleABC |>
transform(
across_cases(values = scale(values))
)
#> #A single-case data frame with three cases
#>
#> Marie: values mt phase │ Rosalind: values mt phase │ Lise: values mt phase │
#> -0.24 1 A │ -1.02 1 A │ -0.81 1 A │
#> -0.39 2 A │ -1.45 2 A │ -1.16 2 A │
#> -0.1 3 A │ -1.02 3 A │ 0.11 3 A │
#> 0.11 4 A │ -0.67 4 A │ -0.6 4 A │
#> -0.74 5 A │ -0.53 5 A │ 0.32 5 A │
#> -1.16 6 A │ 0.25 6 A │ -0.32 6 A │
#> -1.23 7 A │ -0.46 7 A │ -1.87 7 A │
#> -0.17 8 A │ -1.73 8 A │ -1.16 8 A │
#> -1.16 9 A │ -0.74 9 A │ 0.88 9 A │
#> -1.59 10 A │ -0.1 10 A │ 0.11 10 A │
#> 1.52 11 B │ -0.1 11 A │ -1.02 11 A │
#> 0.25 12 B │ 0.25 12 A │ -1.16 12 A │
#> 0.6 13 B │ -0.46 13 A │ -1.02 13 A │
#> 1.52 14 B │ -1.09 14 A │ -1.8 14 A │
#> 0.6 15 B │ -0.88 15 A │ -0.74 15 A │
#> # ... up to 15 more rows
## Rank transform the values based on all cases vs. within each case:
exampleABC |>
transform(
across_cases(values_across = rank(values, na.last="keep")),
value_within = rank(values, na.last="keep")
)
#> #A single-case data frame with three cases
#>
#> Marie: values mt phase values_across value_within
#> 58 1 A 40 11
#> 56 2 A 37.5 9.5
#> 60 3 A 44.5 13
#> 63 4 A 50 14.5
#> 51 5 A 23.5 6.5
#> 45 6 A 10 3.5
#> 44 7 A 7 2
#> 59 8 A 41.5 12
#> 45 9 A 10 3.5
#> 39 10 A 5 1
#> 83 11 B 82 27
#> 65 12 B 55 16
#> 70 13 B 65.5 20.5
#> 83 14 B 82 27
#> 70 15 B 65.5 20.5
#> # ... up to 15 more rows
#> # two more cases
## Three helper functions to smooth the data
Huber2014$Berta |>
transform(
"compliance (moving median)" = moving_median(compliance),
"compliance (moving mean)" = moving_mean(compliance),
"compliance (local regression)" = local_regression(compliance, mt)
)
#> #A single-case data frame with one case
#>
#> Berta: compliance mt phase compliance (moving median) compliance (moving mean)
#> 25 1 A 25 25
#> 20.8 2 A 25 28.47
#> 39.6 3 A 39.6 47.69
#> 75 4 A 45 55.9
#> 45 5 A 45 38.5
#> 14.6 6 A 45 32.97
#> 45.8 7 A 45 37.36
#> 33.3 8 A 33.3 33.99
#> 31.3 9 A 32.5 32.6
#> 32.5 10 A 32.5 23.1
#> 4.2 11 B 32.5 24.37
#> 45.8 12 B 32.5 33.82
#> 31.3 13 B 31.3 24.84
#> 9.4 14 B 9.4 13.51
#> 6.3 15 B 6.3 8
#> compliance (local regression)
#> 22.02
#> 28.81
#> 39.6
#> 42.3
#> 45
#> 45.4
#> 45.8
#> 36.07
#> 32.2
#> 32.5
#> 39.15
#> 45.8
#> 29.41
#> 15.05
#> 6.58
#> # ... up to 14 more rows
## Function first_of() helps to set NAs for specific phases.
## E.g., you want to replace the first two values of phase A and the first
## value of phase B and its preceding value.
byHeart2011 |>
transform(
values = set_na_at(values, phase == "A", 0:1),
values = set_na_at(values, phase == "B", -1:0)
)
#> #A single-case data frame with 11 cases
#>
#> Lisa (Turkish): values mt phase │ Patrick (Spanish): values mt phase │
#> <NA> 1 A │ <NA> 1 A │
#> <NA> 2 A │ <NA> 2 A │
#> 0 3 A │ 3 3 A │
#> 0 4 A │ 0 4 A │
#> <NA> 5 A │ <NA> 5 A │
#> <NA> 6 B │ <NA> 6 B │
#> 5 7 B │ 8 7 B │
#> 6 8 B │ 8 8 B │
#> 7 9 B │ 8 9 B │
#> 10 10 B │ 12 10 B │
#> 10 11 B │ 13 11 B │
#> 15 12 B │ 13 12 B │
#> 16 13 B │ 15 13 B │
#> 14 14 B │ 14 14 B │
#> 17 15 B │ 15 15 B │
#> # ... up to 11 more rows
#> # nine more cases