I'm trying to generalize the code here for more than one repeating column: rename list of dataframe columns to mimic joined suffixes
I have a dataframe with columns name that are the same amoung different data frames in a list
I want to use the same patters as reduce(left_join, suffix = c("_x", "_y"), by="inAll")
to create new names
PATTERN:
- matching but not joined on columns are given _x then _y suffix
- this continues with _x_x and _y_y and so on
- if the number of list items with repeating column no suffix last
- this pattern should be applied to all columns not specified in function (
"inAll")
library(dplyr)
library(purrr)
library(stringr)
dd <- list()
dd$data <- list(
ONE = data.frame(inAll = c(1.1,1.2,1.3), inAll_2 = c(1.4,1.5,1.6), inSome = c(1.7,1.8,1.9), only_one = c(1.10, 1.11, 1.12)),
TWO = data.frame(inAll = c(2.1,2.2,2.3), inAll_2 = c(2.4,2.5,2.6), inOthers = c(2.7,2.8,2.9)),
THREE = data.frame(inAll = c(3.1,3.2,3.3), inAll_2 = c(3.4,3.5,3.6)),
FOUR = data.frame(inAll = c(4.1,4.2,4.3), inAll_2 = c(4.4,4.5,4.6), inOthers = c(4.10, 4.11, 4.12), inSome = c(4.7,4.8,4.9)),
FIVE = data.frame(inAll = c(5.1,5.2,5.3), inAll_2 = c(5.4,5.5,5.6)),
SIX = data.frame(inAll = c(6.1,6.2,6.3), inAll_2 = c(6.4,6.5,6.6), inOthers = c(6.7,6.8,6.8))
)
Desired output
dd$data2 <- list(
ONE = data.frame(inAll = c(1.1,1.2,1.3), inAll_2_x = c(1.4,1.5,1.6), inSome_x = c(1.7,1.8,1.9), only_one = c(1.10, 1.11, 1.12)),
TWO = data.frame(inAll = c(2.1,2.2,2.3), inAll_2_y = c(2.4,2.5,2.6), inOthers_x = c(2.7,2.8,2.9)),
THREE = data.frame(inAll = c(3.1,3.2,3.3), inAll_2_x_x = c(3.4,3.5,3.6)),
FOUR = data.frame(inAll = c(4.1,4.2,4.3), inAll_2_y_y = c(4.4,4.5,4.6), inOthers_y = c(4.10, 4.11, 4.12), inSome_y = c(4.7,4.8,4.9)),
FIVE = data.frame(inAll = c(5.1,5.2,5.3), inAll_2_x_x_x = c(5.4,5.5,5.6)),
SIX = data.frame(inAll = c(6.1,6.2,6.3), inAll_2_y_y_y = c(6.4,6.5,6.6), inOthers = c(6.7,6.8,6.8))
)
How to get there:
Open to entirely different ideas!!!
new_names <- function(data, toExclude) {
# calculate the number of suffixes we need per column
nnames <- map(data, ~colnames(.x)) %>%
unlist() %>%
tibble() %>%
rename("names" = 1) %>%
group_by(names) %>%
count() %>%
filter(!names %in% toExclude)
suffixes <- map(nnames$n, ~strrep(rep(c('_x', '_y'), .x/2), rep(seq_len(.x/2), each = 2)))
map2(nnames$names, suffixes, ~paste0(.x, .y))
# .....somehow apply these to the right columns?
}
Function specifying data frame and column to exclude which should yield the desired output:
new_names(dd$data, "inAll")
Any help, even with just what logic to use to get my desired end result would be really appreciated, thank you!