Skip to main content
for multiple file processing require move codes in BEGIN block to FNR==1 {... } block, in order to regenerate the timestamp for next input file
Source Link
αғsнιη
  • 41.9k
  • 17
  • 75
  • 117
gawk -F, '
BEGIN{ gsub(/"/,"") }

FNR==1{
       delete timestamp;
       start=strftime("%Y %m %d 00 00 00", systime()-86400);
       for(min=0; min<1440; min++)
           timestamp[strftime("%F %H:%M", mktime(start)+min*60)]
     }

{ gsub(/"/,"") }

FNR==1{
       hdr=$0; yday=strftime("%Y%m%d", systime()-86400);
       fname=FILENAME; sub(/\.csv$/,"", fname); dirName=fname"_"yday;
       system("mkdir "dirName); next
     }

(substr($1,1,16) in  timestamp){
       cp=$1; gsub(/[-: ]|00$/, "", cp);
       print hdr ORS $0 >(dirName"/"cp".csv");
       close(dirName"/"cp".csv");
       delete  timestamp[substr($1,1,16)] }

ENDFILE{ for (x in  timestamp){
             cpx=x; gsub(/[-: ]/, "", cpx);
             print hdr ORS x ",0,0,0,0" >(dirName"/"cpx".csv");
             close(dirName"/"cpx".csv")
     }
}' multiple*.csv
gawk -F, '
BEGIN{ start=strftime("%Y %m %d 00 00 00", systime()-86400);
       for(min=0; min<1440; min++)
           timestamp[strftime("%F %H:%M", mktime(start)+min*60)]
     }

{ gsub(/"/,"") }

FNR==1{
       hdr=$0; yday=strftime("%Y%m%d", systime()-86400);
       fname=FILENAME; sub(/\.csv$/,"", fname); dirName=fname"_"yday;
       system("mkdir "dirName); next
     }

(substr($1,1,16) in  timestamp){
       cp=$1; gsub(/[-: ]|00$/, "", cp);
       print hdr ORS $0 >(dirName"/"cp".csv");
       close(dirName"/"cp".csv");
       delete  timestamp[substr($1,1,16)] }

ENDFILE{ for (x in  timestamp){
             cpx=x; gsub(/[-: ]/, "", cpx);
             print hdr ORS x ",0,0,0,0" >(dirName"/"cpx".csv");
             close(dirName"/"cpx".csv")
     }
}' multiple*.csv
gawk -F, '
{ gsub(/"/,"") }

FNR==1{
       delete timestamp;
       start=strftime("%Y %m %d 00 00 00", systime()-86400);
       for(min=0; min<1440; min++)
           timestamp[strftime("%F %H:%M", mktime(start)+min*60)]
       hdr=$0; yday=strftime("%Y%m%d", systime()-86400);
       fname=FILENAME; sub(/\.csv$/,"", fname); dirName=fname"_"yday;
       system("mkdir "dirName); next
     }

(substr($1,1,16) in  timestamp){
       cp=$1; gsub(/[-: ]|00$/, "", cp);
       print hdr ORS $0 >(dirName"/"cp".csv");
       close(dirName"/"cp".csv");
       delete  timestamp[substr($1,1,16)] }

ENDFILE{ for (x in  timestamp){
             cpx=x; gsub(/[-: ]/, "", cpx);
             print hdr ORS x ",0,0,0,0" >(dirName"/"cpx".csv");
             close(dirName"/"cpx".csv")
     }
}' multiple*.csv
added 1 character in body
Source Link
αғsнιη
  • 41.9k
  • 17
  • 75
  • 117
gawk -F, '
BEGIN{ start=strftime("%Y %m %d 00 00 00", systime()-86400);
       for(min=0; min<1440; min++)
           timestamp[strftime("%F %H:%M", mktime(start)+min*60)]
     }

{ gsub(/"/,"") }

FNR==1{
       hdr=$0; yday=strftime("%Y%m%d", systime()-86400);
       fname=FILENAME; sub(/\.csv$/,"", fname); dirName=fname"_"yday;
       system("mkdir "dirName); next
     }

(substr($1,1,16) in  timestamp){
       cp=$1; gsub(/[-: ]|00$/, "", cp);
       print hdr ORS $0 >(dirName"/"cp".csv");
       close(dirName"/"cp".csv");
       delete  timestamp[substr($1,1,16)] }

ENDFILE{ for (x in  timestamp){
             cpx=x; gsub(/[-: ]/, "", cpx);
             print hdr ORS x ",0,0,0,0" >(dirName"/"cpx".csv");
             close(dirName"/"cpx".csv")
     }
}' multiple*.csv
gawk -F, '
BEGIN{ start=strftime("%Y %m %d 00 00 00", systime()-86400);
       for(min=0; min<1440; min++)
           timestamp[strftime("%F %H:%M", mktime(start)+min*60)]
     }

{ gsub(/"/,"") }

FNR==1{
       hdr=$0; yday=strftime("%Y%m%d", systime()-86400);
       fname=FILENAME; sub(/.csv$/,"", fname); dirName=fname"_"yday;
       system("mkdir "dirName); next
     }

(substr($1,1,16) in  timestamp){
       cp=$1; gsub(/[-: ]|00$/, "", cp);
       print hdr ORS $0 >(dirName"/"cp".csv");
       close(dirName"/"cp".csv");
       delete  timestamp[substr($1,1,16)] }

ENDFILE{ for (x in  timestamp){
             cpx=x; gsub(/[-: ]/, "", cpx);
             print hdr ORS x ",0,0,0,0" >(dirName"/"cpx".csv");
             close(dirName"/"cpx".csv")
     }
}' multiple*.csv
gawk -F, '
BEGIN{ start=strftime("%Y %m %d 00 00 00", systime()-86400);
       for(min=0; min<1440; min++)
           timestamp[strftime("%F %H:%M", mktime(start)+min*60)]
     }

{ gsub(/"/,"") }

FNR==1{
       hdr=$0; yday=strftime("%Y%m%d", systime()-86400);
       fname=FILENAME; sub(/\.csv$/,"", fname); dirName=fname"_"yday;
       system("mkdir "dirName); next
     }

(substr($1,1,16) in  timestamp){
       cp=$1; gsub(/[-: ]|00$/, "", cp);
       print hdr ORS $0 >(dirName"/"cp".csv");
       close(dirName"/"cp".csv");
       delete  timestamp[substr($1,1,16)] }

ENDFILE{ for (x in  timestamp){
             cpx=x; gsub(/[-: ]/, "", cpx);
             print hdr ORS x ",0,0,0,0" >(dirName"/"cpx".csv");
             close(dirName"/"cpx".csv")
     }
}' multiple*.csv
added 990 characters in body
Source Link
αғsнιη
  • 41.9k
  • 17
  • 75
  • 117
awk -F, -v yesterday="$(date -d'-1day' +'%F')" '
BEGIN{ for(min=0; min<1440; min++){
           mins = "date +%F\" " "\"%T -d\"" min "minutes" yesterday"\""
           mins |getline yday_tmp; close(mins);
           timestamp["\"" yday_tmp "\""] }
     } 

NR==1    { hdr=$0; next }

($1 in timestamp){
           cp=$1; gsub(/[-": ]/, "", cp);
           print hdr ORS $0 >(cp".csv");
           close(cp".csv");
           delete timestamp[$1] }

END{ for (x in timestamp){
         cpx=x; gsub(/[-": ]/, "", cpx);
         print hdr ORS x ",0,0,0,0" >(cpx".csv")
         close(cpx".csv")
     }
}' infile
gawk -F, '
BEGIN{ start=strftime("%Y %m %d 00 00 00", systime()-86400);
       for(min=0; min<1440; min++)
           timestamp[strftime("%F %H:%M", mktime(start)+min*60)]
     } 

{ gsub(/"/,"") } 

NR==1{ 
       hdr=$0; yday=strftime("dir_%Y%m%d", systime()-86400);
               system("mkdir "yday); next 
     } 

(substr($1,1,16) in  timestamp){
           cp=$1; gsub(/[-: ]|00$/, "", cp);
           print hdr ORS $0 >(yday"/"cp".csv");
           close("ydayyday"/cp"cp".csv");
           delete  timestamp[substr($1,1,16)] } 

END{ for (x in  timestamp){
         cpx=x; gsub(/[-: ]/, "", cpx);
         print hdr ORS x ",0,0,0,0" >(yday"/"cpx".csv");
         close("ydayyday"/cpx"cpx".csv")
     }
}' infile
  • cp=$1: we copy first column into cp variable, we will use value in the cp for later processing.
  • gsub(/[-: ]|00$/, "", cp);; strip characters -, : and Space from the cp variable as well as the trailing double zero "00"s.
  • print hdr ORS $0 >(yday"/"cp".csv");: print the header line which we keep it in the hdr var, an ORS (that's a newline character for Output Record Separator by default) and entire line $0 into the related directory/fileName.csv.
  • close("ydayyday"/cp"cp".csv");: close() the file after write.
  • delete timestamp[substr($1,1,16)]: and delete that timestamp from the array.

To process multiple files and split each input file into individual day directory.

gawk -F, '
BEGIN{ start=strftime("%Y %m %d 00 00 00", systime()-86400);
       for(min=0; min<1440; min++)
           timestamp[strftime("%F %H:%M", mktime(start)+min*60)]
     }

{ gsub(/"/,"") }

FNR==1{
       hdr=$0; yday=strftime("%Y%m%d", systime()-86400);
       fname=FILENAME; sub(/.csv$/,"", fname); dirName=fname"_"yday;
       system("mkdir "dirName); next
     }

(substr($1,1,16) in  timestamp){
       cp=$1; gsub(/[-: ]|00$/, "", cp);
       print hdr ORS $0 >(dirName"/"cp".csv");
       close(dirName"/"cp".csv");
       delete  timestamp[substr($1,1,16)] }

ENDFILE{ for (x in  timestamp){
             cpx=x; gsub(/[-: ]/, "", cpx);
             print hdr ORS x ",0,0,0,0" >(dirName"/"cpx".csv");
             close(dirName"/"cpx".csv")
     }
}' multiple*.csv
awk -F, -v yesterday="$(date -d'-1day' +'%F')" '
BEGIN{ for(min=0; min<1440; min++){
           mins = "date +%F\" " "\"%T -d\"" min "minutes" yesterday"\""
           mins |getline yday_tmp; close(mins);
           timestamp["\"" yday_tmp "\""] }
     }
NR==1    { hdr=$0; next }

($1 in timestamp){
           cp=$1; gsub(/[-": ]/, "", cp);
           print hdr ORS $0 >(cp".csv");
           close(cp".csv");
           delete timestamp[$1] }

END{ for (x in timestamp){
         cpx=x; gsub(/[-": ]/, "", cpx);
         print hdr ORS x ",0,0,0,0" >(cpx".csv")
         close(cpx".csv")
     }
}' infile
gawk -F, '
BEGIN{ start=strftime("%Y %m %d 00 00 00", systime()-86400);
       for(min=0; min<1440; min++)
           timestamp[strftime("%F %H:%M", mktime(start)+min*60)]
     }
{ gsub(/"/,"") }
NR==1{ 
       hdr=$0; yday=strftime("dir_%Y%m%d", systime()-86400);
               system("mkdir "yday); next 
     }
(substr($1,1,16) in  timestamp){
           cp=$1; gsub(/[-: ]|00$/, "", cp);
           print hdr ORS $0 >(yday"/"cp".csv");
           close("yday/cp.csv");
           delete  timestamp[substr($1,1,16)] }
END{ for (x in  timestamp){
         cpx=x; gsub(/[-: ]/, "", cpx);
         print hdr ORS x ",0,0,0,0" >(yday"/"cpx".csv");
         close("yday/cpx.csv")
     }
}' infile
  • cp=$1: we copy first column into cp variable, we will use value in the cp for later processing.
  • gsub(/[-: ]|00$/, "", cp);; strip characters -, : and Space from the cp variable as well as the trailing double zero "00"s.
  • print hdr ORS $0 >(yday"/"cp".csv");: print the header line which we keep it in the hdr var, an ORS (that's a newline character for Output Record Separator by default) and entire line $0 into the related directory/fileName.csv.
  • close("yday/cp.csv");: close() the file after write.
  • delete timestamp[substr($1,1,16)]: and delete that timestamp from the array.
awk -F, -v yesterday="$(date -d'-1day' +'%F')" '
BEGIN{ for(min=0; min<1440; min++){
           mins = "date +%F\" " "\"%T -d\"" min "minutes" yesterday"\""
           mins |getline yday_tmp; close(mins);
           timestamp["\"" yday_tmp "\""] }
     } 

NR==1{ hdr=$0; next }

($1 in timestamp){
           cp=$1; gsub(/[-": ]/, "", cp);
           print hdr ORS $0 >(cp".csv");
           close(cp".csv");
           delete timestamp[$1] }

END{ for (x in timestamp){
         cpx=x; gsub(/[-": ]/, "", cpx);
         print hdr ORS x ",0,0,0,0" >(cpx".csv")
         close(cpx".csv")
     }
}' infile
gawk -F, '
BEGIN{ start=strftime("%Y %m %d 00 00 00", systime()-86400);
       for(min=0; min<1440; min++)
           timestamp[strftime("%F %H:%M", mktime(start)+min*60)]
     } 

{ gsub(/"/,"") } 

NR==1{ 
       hdr=$0; yday=strftime("dir_%Y%m%d", systime()-86400);
               system("mkdir "yday); next 
     } 

(substr($1,1,16) in  timestamp){
       cp=$1; gsub(/[-: ]|00$/, "", cp);
       print hdr ORS $0 >(yday"/"cp".csv");
       close(yday"/"cp".csv");
       delete  timestamp[substr($1,1,16)] } 

END{ for (x in  timestamp){
         cpx=x; gsub(/[-: ]/, "", cpx);
         print hdr ORS x ",0,0,0,0" >(yday"/"cpx".csv");
         close(yday"/"cpx".csv")
     }
}' infile
  • cp=$1: we copy first column into cp variable, we will use value in the cp for later processing.
  • gsub(/[-: ]|00$/, "", cp);; strip characters -, : and Space from the cp variable as well as the trailing double zero "00"s.
  • print hdr ORS $0 >(yday"/"cp".csv");: print the header line which we keep it in the hdr var, an ORS (that's a newline character for Output Record Separator by default) and entire line $0 into the related directory/fileName.csv.
  • close(yday"/"cp".csv");: close() the file after write.
  • delete timestamp[substr($1,1,16)]: and delete that timestamp from the array.

To process multiple files and split each input file into individual day directory.

gawk -F, '
BEGIN{ start=strftime("%Y %m %d 00 00 00", systime()-86400);
       for(min=0; min<1440; min++)
           timestamp[strftime("%F %H:%M", mktime(start)+min*60)]
     }

{ gsub(/"/,"") }

FNR==1{
       hdr=$0; yday=strftime("%Y%m%d", systime()-86400);
       fname=FILENAME; sub(/.csv$/,"", fname); dirName=fname"_"yday;
       system("mkdir "dirName); next
     }

(substr($1,1,16) in  timestamp){
       cp=$1; gsub(/[-: ]|00$/, "", cp);
       print hdr ORS $0 >(dirName"/"cp".csv");
       close(dirName"/"cp".csv");
       delete  timestamp[substr($1,1,16)] }

ENDFILE{ for (x in  timestamp){
             cpx=x; gsub(/[-: ]/, "", cpx);
             print hdr ORS x ",0,0,0,0" >(dirName"/"cpx".csv");
             close(dirName"/"cpx".csv")
     }
}' multiple*.csv
deleted 20 characters in body
Source Link
αғsнιη
  • 41.9k
  • 17
  • 75
  • 117
Loading
added 4236 characters in body
Source Link
αғsнιη
  • 41.9k
  • 17
  • 75
  • 117
Loading
added 848 characters in body
Source Link
αғsнιη
  • 41.9k
  • 17
  • 75
  • 117
Loading
add GNU awk solution
Source Link
αғsнιη
  • 41.9k
  • 17
  • 75
  • 117
Loading
Source Link
αғsнιη
  • 41.9k
  • 17
  • 75
  • 117
Loading