Revisions to How to repeat a row for each value of its corresponding column

added 53 characters in body

Source Link

edited May 10, 2017 at 12:14

30.9k
5
47
68

awk approach:

awk 'BEGIN{OFS="\t";print "pos" OFS "COL1"}{if(NR==1){for(f=2;f<=NF;f++) c[f]=$f;}
     else{for(i=2;i<=NF;i++) print $1,c[i],$i}}' real2.txt

The output:

pos     COL1
18691441        COL1    C
18691441        COL2    A
18691441        COL3    G
18691572        COL1    G
18691572        COL2    C
18691572        COL3    G
18691620        COL1    A
18691620        COL2    T
18691620        COL3    G
18691716        COL1    C
18691716        COL2    G
18691716        COL3    C

OFS="\t" - output field separator

print "pos" OFS "COL1" - prints header line

NR>1if(NR==1){for(f=2;f<=NF;f++) c[f]=$f; - start processingcollecting column names from 2ndthe first/header line

for(i=2;i<=NF;i++) print $1, c[i], $i - printing each column (COL...) value "rowwise" regarding to respective pos column value and its corresponding column name.

awk approach:

awk 'BEGIN{OFS="\t";print "pos" OFS "COL1"}{if(NR==1){for(f=2;f<=NF;f++) c[f]=$f;}
     else{for(i=2;i<=NF;i++) print $1,c[i],$i}}' real2.txt

The output:

pos     COL1
18691441        COL1    C
18691441        COL2    A
18691441        COL3    G
18691572        COL1    G
18691572        COL2    C
18691572        COL3    G
18691620        COL1    A
18691620        COL2    T
18691620        COL3    G
18691716        COL1    C
18691716        COL2    G
18691716        COL3    C

OFS="\t" - output field separator

print "pos" OFS "COL1" - prints header line

NR>1 - start processing from 2nd line

for(i=2;i<=NF;i++) print $1, c[i], $i - printing each column (COL...) value "rowwise" regarding to respective pos column value and its corresponding column name.

awk approach:

awk 'BEGIN{OFS="\t";print "pos" OFS "COL1"}{if(NR==1){for(f=2;f<=NF;f++) c[f]=$f;}
     else{for(i=2;i<=NF;i++) print $1,c[i],$i}}' real2.txt

The output:

pos     COL1
18691441        COL1    C
18691441        COL2    A
18691441        COL3    G
18691572        COL1    G
18691572        COL2    C
18691572        COL3    G
18691620        COL1    A
18691620        COL2    T
18691620        COL3    G
18691716        COL1    C
18691716        COL2    G
18691716        COL3    C

OFS="\t" - output field separator

print "pos" OFS "COL1" - prints header line

if(NR==1){for(f=2;f<=NF;f++) c[f]=$f; - collecting column names from the first/header line

for(i=2;i<=NF;i++) print $1, c[i], $i - printing each column (COL...) value "rowwise" regarding to respective pos column value and its corresponding column name.

updated solution

Source Link

edited May 10, 2017 at 11:59

RomanPerekhrest

30.9k
5
47
68

awk approach:

awk 'BEGIN{OFS="\t"; printOFS="\t";print "pos" OFS "COL1";"COL1"}{if(NR==1){for(f=2;f<=NF;f++) colname[1]="COL1";c[f]=$f;}
 colname[2]="COL2"; colname[3]="COL3";}NR>1   else{for(i=2;i<=NF;i++) print $1, colname[i-1]c[i], $i}}' real2.txt

The output:

pos     COL1
18691441        COL1    C
18691441        COL2    A
18691441        COL3    G
18691572        COL1    G
18691572        COL2    C
18691572        COL3    G
18691620        COL1    A
18691620        COL2    T
18691620        COL3    G
18691716        COL1    C
18691716        COL2    G
18691716        COL3    C

OFS="\t" - output field separator

print "pos" OFS "COL1" - prints header line

NR>1 - start processing from 2nd line

for(i=2;i<=NF;i++) print $1, colname[i-1]c[i], $i - printing each column (COL...) value "rowwise" regarding to respective pos column value and its corresponding column name.

awk approach:

awk 'BEGIN{OFS="\t"; print "pos" OFS "COL1"; colname[1]="COL1"; colname[2]="COL2"; colname[3]="COL3";}NR>1{for(i=2;i<=NF;i++) print $1, colname[i-1], $i}' real2.txt

The output:

pos     COL1
18691441        COL1    C
18691441        COL2    A
18691441        COL3    G
18691572        COL1    G
18691572        COL2    C
18691572        COL3    G
18691620        COL1    A
18691620        COL2    T
18691620        COL3    G
18691716        COL1    C
18691716        COL2    G
18691716        COL3    C

OFS="\t" - output field separator

print "pos" OFS "COL1" - prints header line

NR>1 - start processing from 2nd line

for(i=2;i<=NF;i++) print $1, colname[i-1], $i - printing each column (COL...) value "rowwise" regarding to respective pos column value and its corresponding column name.

awk approach:

awk 'BEGIN{OFS="\t";print "pos" OFS "COL1"}{if(NR==1){for(f=2;f<=NF;f++) c[f]=$f;}
     else{for(i=2;i<=NF;i++) print $1,c[i],$i}}' real2.txt

The output:

pos     COL1
18691441        COL1    C
18691441        COL2    A
18691441        COL3    G
18691572        COL1    G
18691572        COL2    C
18691572        COL3    G
18691620        COL1    A
18691620        COL2    T
18691620        COL3    G
18691716        COL1    C
18691716        COL2    G
18691716        COL3    C

OFS="\t" - output field separator

print "pos" OFS "COL1" - prints header line

NR>1 - start processing from 2nd line

for(i=2;i<=NF;i++) print $1, c[i], $i - printing each column (COL...) value "rowwise" regarding to respective pos column value and its corresponding column name.

I added the column names in the output

Source Link

edit approved May 10, 2017 at 11:53

onur güngör

1.3k
9
14

awk approach:

awk 'BEGIN{OFS="\t"; print "pos" OFS "COL1""COL1"; colname[1]="COL1"; colname[2]="COL2"; colname[3]="COL3";}NR>1{for(i=2;i<=NF;i++) print $1, colname[i-1], $i}' real2.txt

The output:

pos     COL1
18691441        COL1    C
18691441        COL2    A
18691441        COL3    G
18691572        COL1    G
18691572        COL2    C
18691572        COL3    G
18691620        COL1    A
18691620        COL2    T
18691620        COL3    G
18691716        COL1    C
18691716        COL2    G
18691716        COL3    C

OFS="\t" - output field separator

print "pos" OFS "COL1" - prints header line

NR>1 - start processing from 2nd line

for(i=2;i<=NF;i++) print $1, colname[i-1], $i - printing each column (COL...) value "rowwise" regarding to respective pos column value and its corresponding column name.

awk approach:

awk 'BEGIN{OFS="\t"; print "pos" OFS "COL1"}NR>1{for(i=2;i<=NF;i++) print $1,$i}' real2.txt

The output:

pos COL1
18691441    C
18691441    A
18691441    G
18691572    G
18691572    C
18691572    G
18691620    A
18691620    T
18691620    G
18691716    C
18691716    G
18691716    C

OFS="\t" - output field separator

print "pos" OFS "COL1" - prints header line

NR>1 - start processing from 2nd line

for(i=2;i<=NF;i++) print $1,$i - printing each column (COL...) value "rowwise" regarding to respective pos column value

awk approach:

awk 'BEGIN{OFS="\t"; print "pos" OFS "COL1"; colname[1]="COL1"; colname[2]="COL2"; colname[3]="COL3";}NR>1{for(i=2;i<=NF;i++) print $1, colname[i-1], $i}' real2.txt

The output:

pos     COL1
18691441        COL1    C
18691441        COL2    A
18691441        COL3    G
18691572        COL1    G
18691572        COL2    C
18691572        COL3    G
18691620        COL1    A
18691620        COL2    T
18691620        COL3    G
18691716        COL1    C
18691716        COL2    G
18691716        COL3    C

OFS="\t" - output field separator

print "pos" OFS "COL1" - prints header line

NR>1 - start processing from 2nd line

for(i=2;i<=NF;i++) print $1, colname[i-1], $i - printing each column (COL...) value "rowwise" regarding to respective pos column value and its corresponding column name.

added 4 characters in body

Source Link

edited May 10, 2017 at 7:47

RomanPerekhrest

30.9k
5
47
68

Loading

added 227 characters in body

Source Link

edited May 10, 2017 at 7:32

RomanPerekhrest

30.9k
5
47
68

Loading

Source Link

answered May 10, 2017 at 7:25

RomanPerekhrest

30.9k
5
47
68

Loading

Stack Exchange Network

Return to Answer