Changing a script to use a list of data file names/variables?

Question

I have been using this template script to load and plot data:

library("ggplot2")
library("scales")
library("zoo")
library("lubridate")
library("grid")
library("gridExtra")
library("gtable")
library("labeling")

Template.sum.plot <- read.csv("F:/br/RF/Template_SK.csv", header=TRUE, sep=",", dec=".", quote="")
Template.sum.plot$Template_SK.SUM=rowSums(Template.sum.plot[,-1])
Template.sum.plot$date.hour <- ymd_hms(Template.sum.plot$filenames)
Template.sum.plot<-as.data.frame(Template.sum.plot)

Template_Total.SK.Data.Table.plotted <- ggplot(data=Template.sum.plot, aes(x=date.hour, y=Template_SK.SUM)) + geom_line(color="blue") + scale_y_continuous(labels=comma) + scale_x_datetime(position="top", labels=date_format("%F"), date_breaks="1 month") + theme(axis.title.x=element_blank(), plot.title = element_text(hjust = 0.5, size = 12, margin=margin(0,0,10,0)), panel.background=element_rect(fill='white'), panel.grid.major = element_line(colour = "black", linetype=3), panel.grid.minor = element_blank(), axis.ticks.x=element_blank()) + labs(x=(paste("4WRM, ",(format(Template.sum.plot$date.hour[1], '%Y-%m-%d %T %Z'))," to ",(Template.sum.plot$date.hour[(nrow(Template.sum.plot))]))) , y="PPM\n") + ggtitle("Template PON") + geom_vline(xintercept=as.numeric(Template.sum.plot$date.hour[(which(Template.sum.plot$date.hour == paste("2019-07-12 06:00:00")))]), linetype=2, color="red") + geom_vline(xintercept=as.numeric(Template.sum.plot$date.hour[(which(Template.sum.plot$date.hour == paste("2019-09-16 06:00:00")))]), linetype=2, color="red") + geom_line(aes(y = rollmean(Template_SK.SUM, 336, na.pad=TRUE)), color = "#111111")

png(filename="F:/br/Template.SK.Data.Table.plot.png", width=30, height=5, units="in", res=600)
grobz <- lapply(list(Template_Total.SK.Data.Table.plotted), ggplotGrob)
grobz.plot <- arrangeGrob( grobs = list(rbind(grobz[[1]], size = "last")), ncol = 1)
grid.draw(grobz.plot)
dev.off()

I have been manually search/replacing 'Template' to 'XXF' and 'XXGF' and then running my script up til now, but I am wondering if I can make a list (e.g. list_data <- list(c("XXF","XXGF","XXGT") ) and swap each occurrence of 'Template' for the list names?

You can definitely swap out 'Template' in the string instances. glue::glue() is a nice way to do that. But why do you need to have the variables renamed? Aren't you just going to iterate over the list_data values to get the plots you need? — andrew_reece
– andrew_reece, Commented Jul 31, 2021 at 0:30
For a while I just had two data files (XXF and XXGF) to plot so I just made one script and replaced the variable name to get results for both, but now that I have more data files, using a list of each unique part of the file names seemed like a better, long-term solution. — cap
– cap, Commented Jul 31, 2021 at 1:12

Ronak Shah · Accepted Answer · 2021-07-31 02:17:07Z

You can write a function -

read_and_plot <- function(filename) {
  
Template.sum.plot <- read.csv(filename, header=TRUE, sep=",", dec=".", quote="")
Template.sum.plot$Template_SK.SUM=rowSums(Template.sum.plot[,-1])
Template.sum.plot$date.hour <- ymd_hms(Template.sum.plot$filenames)
Template.sum.plot<-as.data.frame(Template.sum.plot)

Template_Total.SK.Data.Table.plotted <- ggplot(data=Template.sum.plot, 
                                               aes(x=date.hour, y=Template_SK.SUM)) + 
  geom_line(color="blue") + scale_y_continuous(labels=comma) + 
  scale_x_datetime(position="top", labels=date_format("%F"), date_breaks="1 month") + 
  theme(axis.title.x=element_blank(), 
        plot.title = element_text(hjust = 0.5, size = 12, margin=margin(0,0,10,0)), 
        panel.background=element_rect(fill='white'), 
        panel.grid.major = element_line(colour = "black", linetype=3), 
        panel.grid.minor = element_blank(), axis.ticks.x=element_blank()) + 
  labs(x=(paste("4WRM, ", (format(Template.sum.plot$date.hour[1], '%Y-%m-%d %T %Z'))," to ",(Template.sum.plot$date.hour[(nrow(Template.sum.plot))]))) , y="PPM\n") + 
  ggtitle("Template PON") + 
  geom_vline(xintercept=as.numeric(Template.sum.plot$date.hour[(which(Template.sum.plot$date.hour == paste("2019-07-12 06:00:00")))]), linetype=2, color="red") + geom_vline(xintercept=as.numeric(Template.sum.plot$date.hour[(which(Template.sum.plot$date.hour == paste("2019-09-16 06:00:00")))]), linetype=2, color="red") + 
  geom_line(aes(y = rollmean(Template_SK.SUM, 336, na.pad=TRUE)), color = "#111111")

png(filename= paste(dirname(filename), sub('csv', 'png', basename(filename)), sep = '/'), width=30, height=5, units="in", res=600)
grobz <- ggplotGrob(Template_Total.SK.Data.Table.plotted)
grobz.plot <- arrangeGrob(grobs = list(rbind(grobz, size = "last")), ncol = 1)
grid.draw(grobz.plot)
dev.off()

}

and apply it to each csv file in the directory -

all_files <- list.files('F:/br/RF/', pattern = '\\.csv$', full.names = TRUE)
lapply(all_files, read_and_plot)

Note that I removed lapply in grobz <- lapply(list(Template_Total.SK.Data.Table.plotted), ggplotGrob) since Template_Total.SK.Data.Table.plotted is just one plot. Also because of this next line doesn't need grobz[[1]] and only using grobz is enough.

That worked! Is there a way to keep the unique plots from the function in R's stored variables so that I can make a separate script that will stack these plots into a single png using grobz? Something like this: png(filename="F:/br/Template.SK.Data.Table.plot.png", width=30, height=15, units="in", res=600) grobz <- lapply(list(XXF_Total.SK.Data.Table.plotted, XXGF_Total.SK.Data.Table.plotted, XXGT_Total.SK.Data.Table.plotted), ggplotGrob) grobz.plot <- arrangeGrob( grobs = list(rbind(grobz[[1]], grobz[[2]], grobz[[3]], size = "last")), ncol = 1) grid.draw(grobz.plot) dev.off()
You may return only plots from read_and_plot function, save the list of plots in list_plot <- lapply(all_files, read_and_plot)
That worked to save them into my R session, but I'm not sure how to use the stored list to make a single png with all the plots as grobz. I know I can use length(list_plot) to get the number of grobz, but how can I specify the order of the grobz into the png? That was why I initially was reading in the list order with list_data <- list(c("XXF","XXGF","XXGT") because the list order = the grobz order for the final comparative png.

CzechInk · Accepted Answer · 2021-08-02 22:29:11Z

One option is to work with purrr::imap(). You could create a named list as you read-in and modify your plotting data, where the names of the list elements are the file names:

# set common path to your files
path <- "F:/br/RF/"

# specify the files you want to read-in
my_files <- c("XXF","XXGF","XXGT")

# read in each file as an element in a list
sum_plot <- lapply(my_files, function(file.x) {
  df <- read.csv(file = paste0(path, file.x, "_SK.csv"),
                 header = TRUE, sep = ",", dec = ".", quote = "")
  df$SK_sum <- rowSums(df[, -1])
  df$date.hour <- ymd_hms(df$filenames)
  df<- as.data.frame(df)
}) %>%
  setNames(my_files)

Then, use purrr's imap() to work with the plotting data and associated file name simultaneously:

library(purrr)
# create a named list where the list elements are the plots
sum_plot <- sum_plot %>%
  # make the plots
  imap(function(df.x, file_name.x) {
    ggplot(data = df.x,
           aes(x = date.hour,
               y = SK_sum)) +
      geom_line(color = "blue") +
      scale_y_continuous(labels = comma) + 
      scale_x_datetime(position = "top", 
                       labels = date_format("%F"), 
                       date_breaks = "1 month") + 
      theme(axis.title.x = element_blank(), 
            plot.title = element_text(hjust = 0.5, 
                                      size = 12, 
                                      margin = margin(0,0,10,0)), 
            panel.background = element_rect(fill = 'white'), 
            panel.grid.major = element_line(colour = "black", 
                                            linetype = 3), 
            panel.grid.minor = element_blank(), 
            axis.ticks.x = element_blank()) + 
      labs(x = paste0("4WRM, ",
                      format(df.x$date.hour[1], '%Y-%m-%d %T %Z'),
                      " to ",
                      df.x$date.hour[(nrow(df.x))]),
           y = "PPM\n") +
      ggtitle(paste0(file_name.x, " PON")) +
      geom_vline(xintercept = with(df.x,
                                   date.hour[date.hour %in% c("2019-07-12 06:00:00",
                                                              "2019-09-16 06:00:00")] %>%
                                     as.numeric),
                 linetype = 2,
                 color = "red") + 
      geom_line(aes(y = rollmean(df.x, 336, na.pad=TRUE)), 
                color = "#111111")
  })

And to save the plots in a single png (as requested in the comments):

# view the plots
grid.arrange(grobs = sum_plot, ncol = 1)

# save the plots
ggsave(filename = paste0(path, "Combined.SK.Data.Table.plot.png"),
       arrangeGrob(grobs = sum_plot, ncol = 1),
       width = 30, height = 5, units = "in", dpi = 600)

I notice you removed the grobz. Is there a way that I can count the plots/files and still use grobz to stack them up at the end within one png though?
When I run the last section of code (the first two sections work fine; no errors) it gives NULL output: $XXF NULL $XXGF NULL $XXGT NULL

Collectives™ on Stack Overflow

Changing a script to use a list of data file names/variables?

2 Answers 2

3 Comments

2 Comments

Hot Network Questions

Collectives™ on Stack Overflow

2 Answers 2

3 Comments

2 Comments

Related