Skip to main content
added 53 characters in body
Source Link
RomanPerekhrest
  • 30.9k
  • 5
  • 47
  • 68

awk approach:

awk 'BEGIN{OFS="\t";print "pos" OFS "COL1"}{if(NR==1){for(f=2;f<=NF;f++) c[f]=$f;}
     else{for(i=2;i<=NF;i++) print $1,c[i],$i}}' real2.txt

The output:

pos     COL1
18691441        COL1    C
18691441        COL2    A
18691441        COL3    G
18691572        COL1    G
18691572        COL2    C
18691572        COL3    G
18691620        COL1    A
18691620        COL2    T
18691620        COL3    G
18691716        COL1    C
18691716        COL2    G
18691716        COL3    C

OFS="\t" - output field separator

print "pos" OFS "COL1" - prints header line

NR>1if(NR==1){for(f=2;f<=NF;f++) c[f]=$f; - start processingcollecting column names from 2ndthe first/header line

for(i=2;i<=NF;i++) print $1, c[i], $i - printing each column (COL...) value "rowwise" regarding to respective pos column value and its corresponding column name.

awk approach:

awk 'BEGIN{OFS="\t";print "pos" OFS "COL1"}{if(NR==1){for(f=2;f<=NF;f++) c[f]=$f;}
     else{for(i=2;i<=NF;i++) print $1,c[i],$i}}' real2.txt

The output:

pos     COL1
18691441        COL1    C
18691441        COL2    A
18691441        COL3    G
18691572        COL1    G
18691572        COL2    C
18691572        COL3    G
18691620        COL1    A
18691620        COL2    T
18691620        COL3    G
18691716        COL1    C
18691716        COL2    G
18691716        COL3    C

OFS="\t" - output field separator

print "pos" OFS "COL1" - prints header line

NR>1 - start processing from 2nd line

for(i=2;i<=NF;i++) print $1, c[i], $i - printing each column (COL...) value "rowwise" regarding to respective pos column value and its corresponding column name.

awk approach:

awk 'BEGIN{OFS="\t";print "pos" OFS "COL1"}{if(NR==1){for(f=2;f<=NF;f++) c[f]=$f;}
     else{for(i=2;i<=NF;i++) print $1,c[i],$i}}' real2.txt

The output:

pos     COL1
18691441        COL1    C
18691441        COL2    A
18691441        COL3    G
18691572        COL1    G
18691572        COL2    C
18691572        COL3    G
18691620        COL1    A
18691620        COL2    T
18691620        COL3    G
18691716        COL1    C
18691716        COL2    G
18691716        COL3    C

OFS="\t" - output field separator

print "pos" OFS "COL1" - prints header line

if(NR==1){for(f=2;f<=NF;f++) c[f]=$f; - collecting column names from the first/header line

for(i=2;i<=NF;i++) print $1, c[i], $i - printing each column (COL...) value "rowwise" regarding to respective pos column value and its corresponding column name.

updated solution
Source Link
RomanPerekhrest
  • 30.9k
  • 5
  • 47
  • 68

awk approach:

awk 'BEGIN{OFS="\t"; printOFS="\t";print "pos" OFS "COL1";"COL1"}{if(NR==1){for(f=2;f<=NF;f++) colname[1]="COL1";c[f]=$f;}
 colname[2]="COL2"; colname[3]="COL3";}NR>1   else{for(i=2;i<=NF;i++) print $1, colname[i-1]c[i], $i}}' real2.txt

The output:

pos     COL1
18691441        COL1    C
18691441        COL2    A
18691441        COL3    G
18691572        COL1    G
18691572        COL2    C
18691572        COL3    G
18691620        COL1    A
18691620        COL2    T
18691620        COL3    G
18691716        COL1    C
18691716        COL2    G
18691716        COL3    C

OFS="\t" - output field separator

print "pos" OFS "COL1" - prints header line

NR>1 - start processing from 2nd line

for(i=2;i<=NF;i++) print $1, colname[i-1]c[i], $i - printing each column (COL...) value "rowwise" regarding to respective pos column value and its corresponding column name.

awk approach:

awk 'BEGIN{OFS="\t"; print "pos" OFS "COL1"; colname[1]="COL1"; colname[2]="COL2"; colname[3]="COL3";}NR>1{for(i=2;i<=NF;i++) print $1, colname[i-1], $i}' real2.txt

The output:

pos     COL1
18691441        COL1    C
18691441        COL2    A
18691441        COL3    G
18691572        COL1    G
18691572        COL2    C
18691572        COL3    G
18691620        COL1    A
18691620        COL2    T
18691620        COL3    G
18691716        COL1    C
18691716        COL2    G
18691716        COL3    C

OFS="\t" - output field separator

print "pos" OFS "COL1" - prints header line

NR>1 - start processing from 2nd line

for(i=2;i<=NF;i++) print $1, colname[i-1], $i - printing each column (COL...) value "rowwise" regarding to respective pos column value and its corresponding column name.

awk approach:

awk 'BEGIN{OFS="\t";print "pos" OFS "COL1"}{if(NR==1){for(f=2;f<=NF;f++) c[f]=$f;}
     else{for(i=2;i<=NF;i++) print $1,c[i],$i}}' real2.txt

The output:

pos     COL1
18691441        COL1    C
18691441        COL2    A
18691441        COL3    G
18691572        COL1    G
18691572        COL2    C
18691572        COL3    G
18691620        COL1    A
18691620        COL2    T
18691620        COL3    G
18691716        COL1    C
18691716        COL2    G
18691716        COL3    C

OFS="\t" - output field separator

print "pos" OFS "COL1" - prints header line

NR>1 - start processing from 2nd line

for(i=2;i<=NF;i++) print $1, c[i], $i - printing each column (COL...) value "rowwise" regarding to respective pos column value and its corresponding column name.

awk approach:

awk 'BEGIN{OFS="\t"; print "pos" OFS "COL1""COL1"; colname[1]="COL1"; colname[2]="COL2"; colname[3]="COL3";}NR>1{for(i=2;i<=NF;i++) print $1, colname[i-1], $i}' real2.txt

The output:

pos     COL1
18691441        COL1    C
18691441        COL2    A
18691441        COL3    G
18691572        COL1    G
18691572        COL2    C
18691572        COL3    G
18691620        COL1    A
18691620        COL2    T
18691620        COL3    G
18691716        COL1    C
18691716        COL2    G
18691716        COL3    C

OFS="\t" - output field separator

print "pos" OFS "COL1" - prints header line

NR>1 - start processing from 2nd line

for(i=2;i<=NF;i++) print $1, colname[i-1], $i - printing each column (COL...) value "rowwise" regarding to respective pos column value and its corresponding column name.

awk approach:

awk 'BEGIN{OFS="\t"; print "pos" OFS "COL1"}NR>1{for(i=2;i<=NF;i++) print $1,$i}' real2.txt

The output:

pos COL1
18691441    C
18691441    A
18691441    G
18691572    G
18691572    C
18691572    G
18691620    A
18691620    T
18691620    G
18691716    C
18691716    G
18691716    C

OFS="\t" - output field separator

print "pos" OFS "COL1" - prints header line

NR>1 - start processing from 2nd line

for(i=2;i<=NF;i++) print $1,$i - printing each column (COL...) value "rowwise" regarding to respective pos column value

awk approach:

awk 'BEGIN{OFS="\t"; print "pos" OFS "COL1"; colname[1]="COL1"; colname[2]="COL2"; colname[3]="COL3";}NR>1{for(i=2;i<=NF;i++) print $1, colname[i-1], $i}' real2.txt

The output:

pos     COL1
18691441        COL1    C
18691441        COL2    A
18691441        COL3    G
18691572        COL1    G
18691572        COL2    C
18691572        COL3    G
18691620        COL1    A
18691620        COL2    T
18691620        COL3    G
18691716        COL1    C
18691716        COL2    G
18691716        COL3    C

OFS="\t" - output field separator

print "pos" OFS "COL1" - prints header line

NR>1 - start processing from 2nd line

for(i=2;i<=NF;i++) print $1, colname[i-1], $i - printing each column (COL...) value "rowwise" regarding to respective pos column value and its corresponding column name.

added 4 characters in body
Source Link
RomanPerekhrest
  • 30.9k
  • 5
  • 47
  • 68
Loading
added 227 characters in body
Source Link
RomanPerekhrest
  • 30.9k
  • 5
  • 47
  • 68
Loading
Source Link
RomanPerekhrest
  • 30.9k
  • 5
  • 47
  • 68
Loading