Skip to main content
added 291 characters in body
Source Link
Raffa
  • 509
  • 2
  • 5

Assuming GNU gawk ... No fault proofing included though(Taking your word for: "that is the literal document") ... So,:

$ cat file
foo
foobar
  bar
  baz
    bat
bar
$
$ gawk 'BEGIN {
    PROCINFO["sorted_in"] = "@ind_str_asc""@ind_num_asc"
}

{
    match($0, /^[ \t]*/)
    if (RLENGTH == 0) {
        if (NR > 1 && length(a) == 1) {
            print a[0]
        }
        delete a
        a[0] = $0
        lsnum = RLENGTH
    }
    if (RLENGTH > lsnum) {
        lsnum = RLENGTH
        a[lsnum] = "." substr($0, RLENGTH + 1)
        p = 1
    }
    if (p == 1) {
        for (i in a) {
            printf "%s", a[i]
        }
        print ""
        lsnum = 0
        p = 0
    }
}

END {
    if (length(a) == 1) {
        print a[0]
    }
}' file
foo
foobar.bar
foobar.baz
foobar.baz.bat
bar

That should work if the lines in your file are preceded with either spaces or tabs … However, for a mix of the two, you might need to tweak a bit to e.g. parse tabs into spaces or vice versa to avoid duplicate indices which will set the wrong array elements and result in faulty output.

Assuming GNU gawk ... No fault proofing included though(Taking your word for: "that is the literal document") ... So,:

$ cat file
foo
foobar
  bar
  baz
    bat
bar
$
$ gawk 'BEGIN {
    PROCINFO["sorted_in"] = "@ind_str_asc"
}

{
    match($0, /^[ \t]*/)
    if (RLENGTH == 0) {
        if (NR > 1 && length(a) == 1) {
            print a[0]
        }
        delete a
        a[0] = $0
        lsnum = RLENGTH
    }
    if (RLENGTH > lsnum) {
        lsnum = RLENGTH
        a[lsnum] = "." substr($0, RLENGTH + 1)
        p = 1
    }
    if (p == 1) {
        for (i in a) {
            printf "%s", a[i]
        }
        print ""
        lsnum = 0
        p = 0
    }
}

END {
    if (length(a) == 1) {
        print a[0]
    }
}' file
foo
foobar.bar
foobar.baz
foobar.baz.bat
bar

Assuming GNU gawk ... No fault proofing included though(Taking your word for: "that is the literal document") ... So,:

$ cat file
foo
foobar
  bar
  baz
    bat
bar
$
$ gawk 'BEGIN {
    PROCINFO["sorted_in"] = "@ind_num_asc"
}

{
    match($0, /^[ \t]*/)
    if (RLENGTH == 0) {
        if (NR > 1 && length(a) == 1) {
            print a[0]
        }
        delete a
        a[0] = $0
        lsnum = RLENGTH
    }
    if (RLENGTH > lsnum) {
        lsnum = RLENGTH
        a[lsnum] = "." substr($0, RLENGTH + 1)
        p = 1
    }
    if (p == 1) {
        for (i in a) {
            printf "%s", a[i]
        }
        print ""
        lsnum = 0
        p = 0
    }
}

END {
    if (length(a) == 1) {
        print a[0]
    }
}' file
foo
foobar.bar
foobar.baz
foobar.baz.bat
bar

That should work if the lines in your file are preceded with either spaces or tabs … However, for a mix of the two, you might need to tweak a bit to e.g. parse tabs into spaces or vice versa to avoid duplicate indices which will set the wrong array elements and result in faulty output.

deleted 925 characters in body
Source Link
Raffa
  • 509
  • 2
  • 5
$ cat docfile
foo
foobar
  bar
  baz
    bat
bar
$
$ gawk '{
   match($0, /^[ \t]*/);
   if ( RLENGTH == 0 ) {
   if ( NR > 1 && length(a) == 1 ) {
   print a[0] }
   delete a;
   a[0] = $0;
   lsnum = RLENGTH}
   if ( RLENGTH > lsnum ) {
   lsnum = RLENGTH;
   a[lsnum] = "."substr($0,RLENGTH+1);
   p = 1}
   if ( RLENGTH == lsnum && p == 1 ) {
   for (i in a)'BEGIN {
   printf "%s", a[i] }
   print ""; lsnum = 0; pPROCINFO["sorted_in"] = 0} } END {
   if ( NR > 1 && length(a) == 1 ) {"@ind_str_asc"
   print a[0] } }' doc
foo
foobar.bar
foobar.baz
foobar.baz.bat
bar

EDIT: here is the above script formatted by gawk -o- for readability:

{
        match($0, /^[ \t]*/)
        if (RLENGTH == 0) {
                if (NR > 1 && length(a) == 1) {
                        print a[0]
                }
                delete a
                a[0] = $0
                lsnum = RLENGTH
        }
        if (RLENGTH > lsnum) {
                lsnum = RLENGTH
                a[lsnum] = "." substr($0, RLENGTH + 1)
                p = 1
        }
        if (RLENGTH == lsnum && p == 1) {
                for (i in a) {
                        printf "%s", a[i]
                }
                print ""
                lsnum = 0
                p = 0
        }
}

END {
        if (NR > 1 && length(a) == 1) {
                print a[0]
        }
}' file
foo
foobar.bar
foobar.baz
foobar.baz.bat
bar
$ cat doc
foo
foobar
  bar
  baz
    bat
bar
$
$ gawk '{
   match($0, /^[ \t]*/);
   if ( RLENGTH == 0 ) {
   if ( NR > 1 && length(a) == 1 ) {
   print a[0] }
   delete a;
   a[0] = $0;
   lsnum = RLENGTH}
   if ( RLENGTH > lsnum ) {
   lsnum = RLENGTH;
   a[lsnum] = "."substr($0,RLENGTH+1);
   p = 1}
   if ( RLENGTH == lsnum && p == 1 ) {
   for (i in a) {
   printf "%s", a[i] }
   print ""; lsnum = 0; p = 0} } END {
   if ( NR > 1 && length(a) == 1 ) {
   print a[0] } }' doc
foo
foobar.bar
foobar.baz
foobar.baz.bat
bar

EDIT: here is the above script formatted by gawk -o- for readability:

{
        match($0, /^[ \t]*/)
        if (RLENGTH == 0) {
                if (NR > 1 && length(a) == 1) {
                        print a[0]
                }
                delete a
                a[0] = $0
                lsnum = RLENGTH
        }
        if (RLENGTH > lsnum) {
                lsnum = RLENGTH
                a[lsnum] = "." substr($0, RLENGTH + 1)
                p = 1
        }
        if (RLENGTH == lsnum && p == 1) {
                for (i in a) {
                        printf "%s", a[i]
                }
                print ""
                lsnum = 0
                p = 0
        }
}

END {
        if (NR > 1 && length(a) == 1) {
                print a[0]
        }
}
$ cat file
foo
foobar
  bar
  baz
    bat
bar
$
$ gawk 'BEGIN {
    PROCINFO["sorted_in"] = "@ind_str_asc"
}

{
    match($0, /^[ \t]*/)
    if (RLENGTH == 0) {
        if (NR > 1 && length(a) == 1) {
            print a[0]
        }
        delete a
        a[0] = $0
        lsnum = RLENGTH
    }
    if (RLENGTH > lsnum) {
        lsnum = RLENGTH
        a[lsnum] = "." substr($0, RLENGTH + 1)
        p = 1
    }
    if (p == 1) {
        for (i in a) {
            printf "%s", a[i]
        }
        print ""
        lsnum = 0
        p = 0
    }
}

END {
    if (length(a) == 1) {
        print a[0]
    }
}' file
foo
foobar.bar
foobar.baz
foobar.baz.bat
bar
added 951 characters in body
Source Link
Ed Morton
  • 35.9k
  • 6
  • 25
  • 60

EDIT: here is the above script formatted by gawk -o- for readability:

{
        match($0, /^[ \t]*/)
        if (RLENGTH == 0) {
                if (NR > 1 && length(a) == 1) {
                        print a[0]
                }
                delete a
                a[0] = $0
                lsnum = RLENGTH
        }
        if (RLENGTH > lsnum) {
                lsnum = RLENGTH
                a[lsnum] = "." substr($0, RLENGTH + 1)
                p = 1
        }
        if (RLENGTH == lsnum && p == 1) {
                for (i in a) {
                        printf "%s", a[i]
                }
                print ""
                lsnum = 0
                p = 0
        }
}

END {
        if (NR > 1 && length(a) == 1) {
                print a[0]
        }
}

EDIT: here is the above script formatted by gawk -o- for readability:

{
        match($0, /^[ \t]*/)
        if (RLENGTH == 0) {
                if (NR > 1 && length(a) == 1) {
                        print a[0]
                }
                delete a
                a[0] = $0
                lsnum = RLENGTH
        }
        if (RLENGTH > lsnum) {
                lsnum = RLENGTH
                a[lsnum] = "." substr($0, RLENGTH + 1)
                p = 1
        }
        if (RLENGTH == lsnum && p == 1) {
                for (i in a) {
                        printf "%s", a[i]
                }
                print ""
                lsnum = 0
                p = 0
        }
}

END {
        if (NR > 1 && length(a) == 1) {
                print a[0]
        }
}
added "\t" for processing leading tabs as well
Source Link
Raffa
  • 509
  • 2
  • 5
Loading
Source Link
Raffa
  • 509
  • 2
  • 5
Loading