| Title: | Extensions to dplyr's mutate |
|---|---|
| Description: | Extensions to dplyr's mutate. |
| Authors: | Gustavo Velásquez [aut, cre] |
| Maintainer: | Gustavo Velásquez <[email protected]> |
| License: | MIT + file LICENSE |
| Version: | 0.2.0 |
| Built: | 2025-10-26 01:40:20 UTC |
| Source: | https://github.com/gvelasq/mutagen |
This function takes a list and replaces all NULL values with NA. It is useful for working with list-columns in a data frame.
gen_na_listcol(x)gen_na_listcol(x)
x |
A list or list-column to modify. |
Parallelization is supported via purrr::in_parallel().
A list with all NULL values replaced with NA.
library(dplyr, warn.conflicts = FALSE) a <- mtcars %>% select(cyl, vs, am) %>% slice(1:6) %>% as_tibble() %>% mutate(listcol = list(NULL, "b", "c", "d", "e", "f")) glimpse(a) b <- a %>% mutate(across(starts_with("listcol"), gen_na_listcol)) glimpse(b)library(dplyr, warn.conflicts = FALSE) a <- mtcars %>% select(cyl, vs, am) %>% slice(1:6) %>% as_tibble() %>% mutate(listcol = list(NULL, "b", "c", "d", "e", "f")) glimpse(a) b <- a %>% mutate(across(starts_with("listcol"), gen_na_listcol)) glimpse(b)
This function calculates a column percent. The by argument calculates column percents within unique categories of grouping columns. The prop argument calculates a proportion rather than a percent.
gen_percent(data, col, by, prop = FALSE)gen_percent(data, col, by, prop = FALSE)
data |
A data frame. |
col |
< |
by |
An optional character vector of columns to group by. |
prop |
If |
A double vector totaling 100 within col. If grouping columns are specified with by, the percent for each unique category of grouping columns will total 100 within col. If prop is specified, a double vector totaling 1 within col (or totaling 1 within unique categories of grouping columns specified with by).
library(dplyr, warn.conflicts = FALSE) a <- as_tibble(mtcars) gen_percent(a, gear) b <- a %>% select(gear, cyl, carb) %>% arrange(gear, cyl, carb) %>% mutate( pct1 = gen_percent(., gear), pct2 = gen_percent(., gear, by = "cyl"), pct3 = gen_percent(., gear, by = c("cyl", "carb")), prop1 = gen_percent(., gear, prop = TRUE) ) blibrary(dplyr, warn.conflicts = FALSE) a <- as_tibble(mtcars) gen_percent(a, gear) b <- a %>% select(gear, cyl, carb) %>% arrange(gear, cyl, carb) %>% mutate( pct1 = gen_percent(., gear), pct2 = gen_percent(., gear, by = "cyl"), pct3 = gen_percent(., gear, by = c("cyl", "carb")), prop1 = gen_percent(., gear, prop = TRUE) ) b
This function performs a rowwise match of a set of supplied values across columns in a data frame. If any of the row values equal one of the supplied values, this function returns an integer 1 (1L) for that row, otherwise it returns an integer 0 (0L).
gen_rowany(data, cols, values)gen_rowany(data, cols, values)
data |
A data frame. |
cols |
< |
values |
A list of values to match. |
Parallelization is supported via purrr::in_parallel().
A binary integer vector indicating whether any supplied value was matched with an integer 1 (1L), otherwise it returns an integer 0 (0L).
library(dplyr, warn.conflicts = FALSE) a <- tibble( x = 1:3, y = rep(NA, 3), z = letters[1:3], aa = rep(FALSE, 3) ) val <- list(1, NA, "a", FALSE) val2 <- list(5, NaN, "d", Inf) gen_rowany(a, values = val) b <- a %>% mutate( q = gen_rowany(., values = val), r = gen_rowany(., values = val2) ) blibrary(dplyr, warn.conflicts = FALSE) a <- tibble( x = 1:3, y = rep(NA, 3), z = letters[1:3], aa = rep(FALSE, 3) ) val <- list(1, NA, "a", FALSE) val2 <- list(5, NaN, "d", Inf) gen_rowany(a, values = val) b <- a %>% mutate( q = gen_rowany(., values = val), r = gen_rowany(., values = val2) ) b
This function performs a rowwise count of columns in a data frame that match a set of supplied values.
gen_rowcount(data, cols, values)gen_rowcount(data, cols, values)
data |
A data frame. |
cols |
< |
values |
A list of values to match. |
Parallelization is supported via purrr::in_parallel().
An integer vector with the number of matched values.
library(dplyr, warn.conflicts = FALSE) a <- tibble( x = 1:3, y = rep(NA, 3), z = letters[1:3], aa = rep(FALSE, 3) ) val <- list(1, NA, "a", FALSE) gen_rowcount(a, values = val) gen_rowcount(a, everything(), values = val) gen_rowcount(a, starts_with(letters[25:26]), values = val) b <- a %>% mutate(q = gen_rowcount(., values = val)) blibrary(dplyr, warn.conflicts = FALSE) a <- tibble( x = 1:3, y = rep(NA, 3), z = letters[1:3], aa = rep(FALSE, 3) ) val <- list(1, NA, "a", FALSE) gen_rowcount(a, values = val) gen_rowcount(a, everything(), values = val) gen_rowcount(a, starts_with(letters[25:26]), values = val) b <- a %>% mutate(q = gen_rowcount(., values = val)) b
This function returns the rowwise first nonmissing value in a data frame.
gen_rowfirst(data, cols)gen_rowfirst(data, cols)
data |
A data frame. |
cols |
< |
Parallelization is supported via purrr::in_parallel().
A vector of the rowwise first nonmissing value. The vector's type will be of common type to all rowwise nonmissing values.
library(dplyr, warn.conflicts = FALSE) a <- tibble( x = c(1, NA, 2), y = c(NA, 3, NA), z = c(4, NA, 5) ) gen_rowfirst(a) gen_rowfirst(a, all_of(letters[25:26])) b <- a %>% mutate(q = gen_rowfirst(.)) b c <- a %>% mutate(w = c("a", TRUE, NA), .before = "x") %>% mutate(q = gen_rowfirst(.)) c # note that q is of type <chr>library(dplyr, warn.conflicts = FALSE) a <- tibble( x = c(1, NA, 2), y = c(NA, 3, NA), z = c(4, NA, 5) ) gen_rowfirst(a) gen_rowfirst(a, all_of(letters[25:26])) b <- a %>% mutate(q = gen_rowfirst(.)) b c <- a %>% mutate(w = c("a", TRUE, NA), .before = "x") %>% mutate(q = gen_rowfirst(.)) c # note that q is of type <chr>
This function returns the rowwise last nonmissing value in a data frame.
gen_rowlast(data, cols)gen_rowlast(data, cols)
data |
A data frame. |
cols |
< |
Parallelization is supported via purrr::in_parallel().
A vector of the rowwise last nonmissing value. The vector's type will be of common type to all rowwise nonmissing values.
library(dplyr, warn.conflicts = FALSE) a <- tibble( x = c(1, NA, 2), y = c(NA, 3, NA), z = c(4, NA, 5) ) gen_rowlast(a) gen_rowlast(a, all_of(letters[24:25])) b <- a %>% mutate(q = gen_rowlast(.)) b c <- a %>% mutate(aa = c("a", TRUE, NA), .after = "z") %>% mutate(q = gen_rowlast(.)) c # note that q is of type <chr>library(dplyr, warn.conflicts = FALSE) a <- tibble( x = c(1, NA, 2), y = c(NA, 3, NA), z = c(4, NA, 5) ) gen_rowlast(a) gen_rowlast(a, all_of(letters[24:25])) b <- a %>% mutate(q = gen_rowlast(.)) b c <- a %>% mutate(aa = c("a", TRUE, NA), .after = "z") %>% mutate(q = gen_rowlast(.)) c # note that q is of type <chr>
This function returns the rowwise maximum value in a data frame.
gen_rowmax(data, cols)gen_rowmax(data, cols)
data |
A data frame. |
cols |
< |
Parallelization is supported via purrr::in_parallel().
A vector of the rowwise maximum value.
library(dplyr, warn.conflicts = FALSE) a <- tibble( x = c(1, NA, 2), y = c(NA, 3, NA), z = c(4, NA, 5) ) gen_rowmax(a) gen_rowmax(a, everything()) gen_rowmax(a, starts_with(letters[24:25])) b <- a %>% mutate(q = gen_rowmax(.)) blibrary(dplyr, warn.conflicts = FALSE) a <- tibble( x = c(1, NA, 2), y = c(NA, 3, NA), z = c(4, NA, 5) ) gen_rowmax(a) gen_rowmax(a, everything()) gen_rowmax(a, starts_with(letters[24:25])) b <- a %>% mutate(q = gen_rowmax(.)) b
This function returns the rowwise arithmetic mean value in a data frame.
gen_rowmean(data, cols)gen_rowmean(data, cols)
data |
A data frame. |
cols |
< |
Parallelization is supported via purrr::in_parallel().
A double vector of the rowwise arithmetic mean value. Missing values are ignored.
library(dplyr, warn.conflicts = FALSE) a <- tibble( x = c(1, NA, 2), y = c(NA, 3, NA), z = c(4, NA, 5) ) gen_rowmean(a) gen_rowmean(a, everything()) gen_rowmean(a, all_of(letters[25:26])) b <- a %>% mutate(q = gen_rowmean(.)) blibrary(dplyr, warn.conflicts = FALSE) a <- tibble( x = c(1, NA, 2), y = c(NA, 3, NA), z = c(4, NA, 5) ) gen_rowmean(a) gen_rowmean(a, everything()) gen_rowmean(a, all_of(letters[25:26])) b <- a %>% mutate(q = gen_rowmean(.)) b
This function returns the rowwise median value in a data frame.
gen_rowmedian(data, cols)gen_rowmedian(data, cols)
data |
A data frame. |
cols |
< |
Parallelization is supported via purrr::in_parallel().
A double vector of the rowwise median value. Missing values are ignored.
library(dplyr, warn.conflicts = FALSE) a <- tibble( x = c(1, NA, 2), y = c(2, 3, 2), z = c(4, NA, 5) ) gen_rowmedian(a) gen_rowmedian(a, everything()) gen_rowmedian(a, all_of(letters[25:26])) b <- a %>% mutate(q = gen_rowmedian(.)) blibrary(dplyr, warn.conflicts = FALSE) a <- tibble( x = c(1, NA, 2), y = c(2, 3, 2), z = c(4, NA, 5) ) gen_rowmedian(a) gen_rowmedian(a, everything()) gen_rowmedian(a, all_of(letters[25:26])) b <- a %>% mutate(q = gen_rowmedian(.)) b
This function returns the rowwise minimum value in a data frame.
gen_rowmin(data, cols)gen_rowmin(data, cols)
data |
A data frame. |
cols |
< |
Parallelization is supported via purrr::in_parallel().
A vector of the rowwise minimum value.
library(dplyr, warn.conflicts = FALSE) a <- tibble( x = c(1, NA, 2), y = c(NA, 3, NA), z = c(4, NA, 5) ) gen_rowmin(a) gen_rowmin(a, everything()) gen_rowmin(a, starts_with(letters[25:26])) b <- a %>% mutate(q = gen_rowmin(.)) blibrary(dplyr, warn.conflicts = FALSE) a <- tibble( x = c(1, NA, 2), y = c(NA, 3, NA), z = c(4, NA, 5) ) gen_rowmin(a) gen_rowmin(a, everything()) gen_rowmin(a, starts_with(letters[25:26])) b <- a %>% mutate(q = gen_rowmin(.)) b
This function returns the rowwise count of missing values in a data frame.
gen_rowmiss(data, cols)gen_rowmiss(data, cols)
data |
A data frame. |
cols |
< |
Parallelization is supported via purrr::in_parallel().
An integer vector of the rowwise count of missing values.
library(dplyr, warn.conflicts = FALSE) a <- tibble( x = c(1, NA, 2), y = c(NA, 3, NA), z = c(4, NA, 5) ) gen_rowmiss(a) gen_rowmiss(a, all_of(letters[25:26])) b <- a %>% mutate(q = gen_rowmiss(.)) blibrary(dplyr, warn.conflicts = FALSE) a <- tibble( x = c(1, NA, 2), y = c(NA, 3, NA), z = c(4, NA, 5) ) gen_rowmiss(a) gen_rowmiss(a, all_of(letters[25:26])) b <- a %>% mutate(q = gen_rowmiss(.)) b
This function returns the rowwise nth nonmissing value in a data frame.
gen_rownth(data, cols, n)gen_rownth(data, cols, n)
data |
A data frame. |
cols |
< |
n |
An integer vector of length 1 that specifies the position of the rowwise nth nonmissing value to search for. A negative integer will index from the end. |
Parallelization is supported via purrr::in_parallel().
A vector of the rowwise nth nonmissing value. The vector's type will be of common type to all rowwise nonmissing values.
library(dplyr, warn.conflicts = FALSE) a <- tibble( x = c(1, NA, 2), y = c(NA, 3, NA), z = c(4, NA, 5) ) gen_rownth(a, n = 1) gen_rownth(a, n = 2) gen_rownth(a, all_of(letters[25:26]), n = 1) b <- a %>% mutate(q = gen_rownth(., n = 1), r = gen_rownth(., n = 2)) b c <- a %>% mutate(w = c("a", TRUE, NA), .before = "x") %>% mutate(q = gen_rownth(., n = 1), r = gen_rownth(., n = 2)) c # note that q and r are of type <chr>library(dplyr, warn.conflicts = FALSE) a <- tibble( x = c(1, NA, 2), y = c(NA, 3, NA), z = c(4, NA, 5) ) gen_rownth(a, n = 1) gen_rownth(a, n = 2) gen_rownth(a, all_of(letters[25:26]), n = 1) b <- a %>% mutate(q = gen_rownth(., n = 1), r = gen_rownth(., n = 2)) b c <- a %>% mutate(w = c("a", TRUE, NA), .before = "x") %>% mutate(q = gen_rownth(., n = 1), r = gen_rownth(., n = 2)) c # note that q and r are of type <chr>