Return to Revisions

4 of 5

edited body

edited Dec 1, 2023 at 2:56

3.3k
4
28
56

Here's an idea:

In sh, escape all newline and backslash characters in each positional parameter, so that we could use the newline character as delimiter.
Pipe the newline-separated positional parameters to awk.
In awk, use quicksort to sort the parameters and write the newline-separated sorted parameters to standard output.
In sh, replace the unsorted list of positional parameters with the newline-separated sorted parameters from awk by splitting on newlines.
Unescape all newline and backslash characters in the list of sorted positional parameters.

Implementation:

#!/bin/sh
# Run this shell script to sort this list of positional parameters:
set -- \
    'Hello' \
    '  0  1  ' \
    '' \
    '*' \
    '\0150\0151' \
    "$(printf '\a\b\e\f\n\r\t\v')" \
    '\a\b\e\f\n\r\t\v%%\\' \
    'qwerty
uiop' \
    '

new

lines

'

printf '====== NOT YET SORTED ======\n'
for param; do
    printf '%s' "$param" | od -ab
done

quicksort() {
    for param; do
        # * Escape newlines (newline -> "\n" [two characters]).
        #   Rationale:
        #   * To be able to use newline as AWK record separator.
        #   * To be able to use it as the shell's $IFS for separating records.
        # * Escape backslashes (backslash -> "\\" [two characters]).
        #   Rationale:
        #   * To distinguish between escaped newlines and the two-character
        #     string "\n" (escaped version: "\\n" [three-character string]).
        #   * To avoid special interpretation of backslashes when passed to
        #     `printf '%b' ...`.
        #
        # `sed` solution adapted from:
        # https://unix.stackexchange.com/questions/761885/how-to-convert-all-newlines-to-n-in-posix-sh-strings
        printf '%s\n' "$param" | LC_ALL=C sed '
            :1
            $ ! {
                N
                b1
            }
            s/\\/\\\\/g
            s/\n/\\n/g'
    done | awk -f ./quicksort.awk
}

# Create temporary file.
tmpfile="$(printf 'mkstemp(template)' \
    | m4 -D template="${TMPDIR:-/tmp}/XXXXXX")" || exit 1
exec 3>"$tmpfile" 4<"$tmpfile"  # Open tmpfile for writing and reading.
rm -f -- "$tmpfile"

quicksort "$@" >&3 3>&-

set --
while IFS='
' read -r line; do
    # Unescape backslashes and newlines.
    # To preserve trailing newlines (if any), insert a trailing character 'x'
    # and later remove it.
    elem="$(printf '%bx' "$line")"
    set -- "$@" "${elem%x}"
done <&4 4<&-

printf '\n====== SORTED RESULTS ======\n'
for param; do
    printf '%s' "$param" | od -ab
done

This is quicksort.awk:

# Takes newline-separated records from standard input and sorts them according
# to the `compare` function. Outputs the sorted newline-separated records to
# standard output.
#
# Backslash and newline characters supplied within each input record must be
# escaped like this:
# * backslash character -> "\\" (two backslash characters)
# * newline character -> "\n" (backslash character followed by the "n" character)
#
# Backslash and newline characters within each output record will be escaped in
# the same manner.
#
# Usage: awk -f quicksort.awk
#
# Attribution:
# Functions `quicksort` and `quicksort_swap` are adapted from the public domain
# quicksort implementation by Arnold Robbins retrieved from
# https://git.savannah.gnu.org/cgit/gawk.git/tree/awklib/eg/lib/quicksort.awk?h=gawk-5.3.0

function compare(a, b) {
    # Unescape backslashes and newlines.
    gsub(/\\/, "\\", a)
    gsub(/\\/, "\\", b)
    gsub(/\\n/, "\n", a)
    gsub(/\\n/, "\n", b)

    # For sorting in ascending lexicographic order.
    return a < b
}

function quicksort(data, left, right,    i, last) {
    if (left >= right)  # Do nothing if array contains fewer than two elements.
        return

    quicksort_swap(data, left, int((left + right) / 2))
    last = left
    for (i = left + 1; i <= right; i++)
        if (compare(data[i], data[left]))
            quicksort_swap(data, ++last, i)
    quicksort_swap(data, left, last)
    quicksort(data, left, last - 1, less_than)
    quicksort(data, last + 1, right, less_than)
}

function quicksort_swap(data, i, j,    temp) {
    temp = data[i]
    data[i] = data[j]
    data[j] = temp
}

{
    lines[counter++] = $0
}

END {
    quicksort(lines, 0, counter - 1)
    for (k = 0; k < counter; k++)
        print lines[k]
}

Tested in:

Debian 12, Dash 0.5.12, GNU sed 4.9, Gawk 5.2.1
Debian 12, Dash 0.5.12, Busybox 1.35.0's sed and awk
FreeBSD 13.2's sh, sed, and awk

answered Nov 21, 2023 at 2:11

Flux

3.3k
4
28
56