dynamically set unique column names in list of dataframes [R]

Question

I'm trying to generalize the code here for more than one repeating column: rename list of dataframe columns to mimic joined suffixes

I have a dataframe with columns name that are the same amoung different data frames in a list I want to use the same patters as reduce(left_join, suffix = c("_x", "_y"), by="inAll") to create new names

PATTERN:

matching but not joined on columns are given _x then _y suffix
this continues with _x_x and _y_y and so on
if the number of list items with repeating column no suffix last
this pattern should be applied to all columns not specified in function ("inAll")

library(dplyr)
library(purrr)
library(stringr)

dd <- list()
dd$data <- list(
  ONE = data.frame(inAll = c(1.1,1.2,1.3), inAll_2 = c(1.4,1.5,1.6), inSome = c(1.7,1.8,1.9), only_one = c(1.10, 1.11, 1.12)),
  TWO = data.frame(inAll = c(2.1,2.2,2.3), inAll_2 = c(2.4,2.5,2.6), inOthers = c(2.7,2.8,2.9)),
  THREE = data.frame(inAll = c(3.1,3.2,3.3), inAll_2 = c(3.4,3.5,3.6)),
  FOUR = data.frame(inAll = c(4.1,4.2,4.3), inAll_2 = c(4.4,4.5,4.6), inOthers = c(4.10, 4.11, 4.12), inSome = c(4.7,4.8,4.9)),
  FIVE = data.frame(inAll = c(5.1,5.2,5.3), inAll_2 = c(5.4,5.5,5.6)),
  SIX = data.frame(inAll = c(6.1,6.2,6.3), inAll_2 = c(6.4,6.5,6.6), inOthers = c(6.7,6.8,6.8))
)

Desired output

dd$data2 <- list(
  ONE = data.frame(inAll = c(1.1,1.2,1.3), inAll_2_x = c(1.4,1.5,1.6), inSome_x = c(1.7,1.8,1.9), only_one = c(1.10, 1.11, 1.12)),
  TWO = data.frame(inAll = c(2.1,2.2,2.3), inAll_2_y = c(2.4,2.5,2.6), inOthers_x = c(2.7,2.8,2.9)),
  THREE = data.frame(inAll = c(3.1,3.2,3.3), inAll_2_x_x = c(3.4,3.5,3.6)),
  FOUR = data.frame(inAll = c(4.1,4.2,4.3), inAll_2_y_y = c(4.4,4.5,4.6), inOthers_y = c(4.10, 4.11, 4.12), inSome_y = c(4.7,4.8,4.9)),
  FIVE = data.frame(inAll = c(5.1,5.2,5.3), inAll_2_x_x_x = c(5.4,5.5,5.6)),
  SIX = data.frame(inAll = c(6.1,6.2,6.3), inAll_2_y_y_y = c(6.4,6.5,6.6), inOthers = c(6.7,6.8,6.8))
)

How to get there:

Open to entirely different ideas!!!

new_names <- function(data, toExclude) {
  # calculate the number of suffixes we need per column
  nnames <- map(data, ~colnames(.x)) %>% 
    unlist() %>% 
    tibble() %>% 
    rename("names" = 1) %>% 
    group_by(names) %>% 
    count() %>%
    filter(!names %in% toExclude)

  suffixes <- map(nnames$n, ~strrep(rep(c('_x', '_y'), .x/2), rep(seq_len(.x/2), each = 2)))
  map2(nnames$names, suffixes, ~paste0(.x, .y))

  # .....somehow apply these to the right columns?
}

Function specifying data frame and column to exclude which should yield the desired output:

new_names(dd$data, "inAll")

Any help, even with just what logic to use to get my desired end result would be really appreciated, thank you!

akrun · Accepted Answer · 2020-04-22 23:17:23Z

Here is one option, where we extract column names from the list, change the names based on the how it is duplicated after splitting by common names, then relist it back to list again and change the column names of the original list using map2 with the changed names

library(purrr)
library(dplyr)
library(stringr)
inp <- dd$data
lst1 <- map(inp, names)
nm1 <- unlist(lst1)
i1 <- nm1 != 'inAll'
lst2 <- split(nm1[i1], nm1[i1])
i2 <- lengths(lst2) > 1
lst2[i2] <- map(lst2[i2], ~  { 
                    i3 <- (seq_along(.x)-1) %/% 2 + 1
                     i4 <- i3 %in% names(which(table(i3) == 2))
                     n <- ceiling(length(.x[i4])/2)
                     .x[i4] <- str_c(.x[i4], strrep(rep(c('_x', '_y'), n),
                                  rep(seq_len(n), each = 2)))
                     .x  
                      })
nm1[i1] <- unsplit(lst2, nm1[i1])
out2 <- map2(inp, relist(nm1, skeleton = lst1), set_names)

-checking with OP's output

out <- dd$data2
identical(out, out2)
#[1] TRUE

Collectives™ on Stack Overflow

dynamically set unique column names in list of dataframes [R]

PATTERN:

Desired output

How to get there:

1 Answer 1

Comments

Linked

Hot Network Questions

Collectives™ on Stack Overflow

PATTERN:

Desired output

How to get there:

1 Answer 1

Comments

Linked

Related