BEGIN {
# We assume the default input field separator (changeable with "-F")
# Output will be tab delimited.
OFS = "\t"
}
{
# The number of output records that this input record results in.
k=0
# "seen" records which new record a field should be part of.
# There may be NF new records for each input record if all
# fields are unique.
delete seen
# "a" holds all data for the new output records.
# It's basically a 2-dimensional NFxNF matrix
# encodod in a 1-dimensional array.
delete a
# Iterate over the fields
for (i=1; i<=NF; ++i) {
if (!seen[$i]) {
# This data has not been seen before (in this input record),
# assign it to the next output line.
seen[$i] = ++k
}
# Assign the input field to the right spot
a[(seen[$i]-1)*NF + i] = $i
}
# Create and output new lines
for (j = 1; j<=k; ++j) {
# Save NF as this is reset by emptying $0
nf = NF
$0 = ""
# Create new output record
for (i = 1; i<=nf; ++i)
$i = a[(j-1)*nf + i]
# Output record
print
}
}
Testing on the given data:
$ awk -f script.awk file
1 1 1
2 2 2
3
4 4
5 5 5
1 1
2
3 3 3
Testing on other data:
$ cat file
a b c e
1 2 1 1
2 1 1 1
1 1 1 2
$ awk -f script.awk file
a
b
c
e
1 1 1
2
2
1 1 1
1 1 1
2