Return to Revisions

1 of 2

answered Dec 5, 2018 at 19:02

355.9k
42
735
1.1k

BEGIN {
    # We assume the default input field separator (changeable with "-F")
    # Output will be tab delimited.
    OFS = "\t"
}
{
    # The number of output records that this input record results in.
    k=0

    # "seen" records which new record a field should be part of.
    # There may be NF new records for each input record if all
    # fields are unique.
    delete seen

    # "a" holds all data for the new output records.
    # It's basically a 2-dimensional NFxNF matrix
    # encodod in a 1-dimensional array.
    delete a

    # Iterate over the fields
    for (i=1; i<=NF; ++i) {
        if (!seen[$i]) {
            # This data has not been seen before (in this input record),
            # assign it to the next output line.

            seen[$i] = ++k
        }

        # Assign the input field to the right spot
        a[(seen[$i]-1)*NF + i] = $i
    }

    # Create and output new lines
    for (j = 1; j<=k; ++j) {
        # Save NF as this is reset by emptying $0
        nf = NF
        $0 = ""

        # Create new output record
        for (i = 1; i<=nf; ++i)
            $i = a[(j-1)*nf + i]

        # Output record
        print
    }
}

Testing on the given data:

$ awk -f script.awk file
1       1       1
2       2       2
3
        4       4
5       5       5
1       1
                2
3       3       3

Testing on other data:

$ cat file
a b c e
1 2 1 1
2 1 1 1
1 1 1 2

$ awk -f script.awk file
a
        b
                c
                        e
1               1       1
        2
2
        1       1       1
1       1       1
                        2

answered Dec 5, 2018 at 19:02

Kusalananda ♦

355.9k
42
735
1.1k