2

I have this file.csv

"201707"|"51976551"|1|0|1|"20170702"
"201707"|"51955194"|1|0|0|"20170702"
"201707"|"51923555"|1|0|1|"20170702"
"201707"|"51976551"|1|0|1|"20170703"
"201707"|"51955194"|1|0|0|"20170703"
"201707"|"51923555"|1|0|1|"20170703"
"201707"|"51960597"|1|0|0|"20170703"

And my hope result is group by the number and sum the column 3, 4 and 5

"201707"|"51976551"|2|0|2
"201707"|"51955194"|2|0|0
"201707"|"51923555"|2|0|2
"201707"|"51960597"|1|0|0

I've tried with:

cat file.csv | awk -F"|" '
  { a[$2] += $3 }
  END {
    for (i in a) {
      printf "%s|%s\n", i, a[i];
    }
  }
'

And the result is:

"51976551"|2
"51955194"|2
"51923555"|2
"51960597"|1

Only shows the sum of third column, but I need 2 columns more. what should I do in this case?

0

2 Answers 2

4

Try:

$ awk -F"|" '{ a[$1 OFS $2]+=$3; b[$1 OFS $2]+=$4; c[$1 OFS $2]+=$5 }
  END {
    for (i in a) {
      print i, a[i], b[i], c[i];
    }
  }
' OFS=\| file.csv
"201707"|"51976551"|2|0|2
"201707"|"51960597"|1|0|0
"201707"|"51923555"|2|0|2
"201707"|"51955194"|2|0|0

How it works

  • -F"|"

    This sets the field separator on input to |.

  • a[$1 OFS $2]+=$3; b[$1 OFS $2]+=$4; c[$1 OFS $2]+=$5

    This keeps track of the totals of the third, fourth, and fifth columns.

  • END { for (i in a) { print i, a[i], b[i], c[i]; } }

    This prints out the results.

  • OFS=\|

    This tells awk to use | as the field separator on output.

Sign up to request clarification or add additional context in comments.

4 Comments

better to define key=$1 FS $2 and use it instead.
@karakfa Excellent suggestion! Thanks. (Upon reflection on your comment, I suspect OFS is better choice). Answer updated.
here doesn't matter since they are the same but in general OFS won't guarantee the composite key will be unique. whereas FS guarantees that.
Thanks John1024 and karafka, FS it's better, it showed correctly the sum of each column! You saved me!
1

To Preserve order:

By processing in END block

awk  'BEGIN{
            FS=OFS="|"
      }
      {
             k = $1 OFS $2; 
             if(!(k in t)){
                    o[++c]=k; 
                    t[k]
             } 
             for(i=3; i<=5; i++)
                    a[k OFS i]+=$i
       }
    END{
             for(i=1; i in o; i++)
             {
                 printf "%s", o[i]; 
                 for(j=3; j<=5; j++)
                     printf "%s%s", OFS, a[o[i] OFS j]; 
                 print ""
              }
        }
     ' infile

Or by reading same file twice (GNU awk)

awk  'BEGIN{
        FS=OFS="|"
      }
      function ps(f)
      {
           for(i=3;i<=5;i++)
           if(f)
           { 
                   a[k OFS i]+=$i; 
                   t[k] 
           }else 
                   s=(s ? s OFS :"") a[k OFS i]
       }
       {
         k=$1 OFS $2
       }
       FNR==NR{
         ps(1); 
         next
       }
       k in t{
         s=""; 
         ps();  
         print k, s; 
         delete t[k] 
       }
     ' infile infile

Input:

$ cat input
"201707"|"51976551"|1|0|1|"20170702"
"201707"|"51955194"|1|0|0|"20170702"
"201707"|"51923555"|1|0|1|"20170702"
"201707"|"51976551"|1|0|1|"20170703"
"201707"|"51955194"|1|0|0|"20170703"
"201707"|"51923555"|1|0|1|"20170703"
"201707"|"51960597"|1|0|0|"20170703"

Output-1:

$ awk  'BEGIN{FS=OFS="|"}{k = $1 OFS $2; if(!(k in t)){o[++c]=k; t[k]} for(i=3; i<=5; i++)a[k OFS i]+=$i}END{for(i=1; i in o; i++){printf "%s", o[i]; for(j=3; j<=5; j++)printf "%s%s", OFS, a[o[i] OFS j]; print ""}}' infile
"201707"|"51976551"|2|0|2
"201707"|"51955194"|2|0|0
"201707"|"51923555"|2|0|2
"201707"|"51960597"|1|0|0

Output-2:

$ awk  'BEGIN{FS=OFS="|"}function ps(f){for(i=3;i<=5;i++)if(f){ a[k OFS i]+=$i; t[k] }else s=(s ? s OFS :"") a[k OFS i]}{k=$1 OFS $2}FNR==NR{ps(1); next}k in t{s=""; ps();  print k, s; delete t[k] }' infile infile
"201707"|"51976551"|2|0|2
"201707"|"51955194"|2|0|0
"201707"|"51923555"|2|0|2
"201707"|"51960597"|1|0|0

Comments

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.