Skip to main content
added 289 characters in body
Source Link
glenn jackman
  • 88.5k
  • 16
  • 124
  • 179

Take your existing one-file code and put it into a function:

random_sample() {
    local fasta_file=$1
    local n r tmp sample
    tmp=$(mktemp)
    < "$fasta_file" tail -n+2 | tr -d '\n' > "$tmp"
    n=$(stat -c "%s" "$tmp")
    r=$(shuf -i1-"$((n-200+1))" -n1)
    sample=$(tail -c+"$r" < "$tmp" | head -c200)
    rm "$tmp"
    printf "%s\n" "$sample"
}

Then you can do

for file in *.fa; do
    random_sample "$file" > "${file%.fa}_200_substring.fa"
done

If the fasta files are not huge, I would not use a tmp file:

random_sample() {
    local fasta_file=$1
    local data n r
    data=$(tail -n+2 < "$fasta_file" | tr -d '\n')
    n=${#data}
    r=$(shuf -i1-"$((n-200+1))" -n1)
    tail -c+"$r" <<< "$data" | head -c200
}

and if the files are < 32767 bytes

random_sample() {
    local fasta_file=$1
    local data
    data=$(tail -n+2 < "$fasta_file" | tr -d '\n')
    echo "${data:($RANDOM % ${#data}):200}"
}

Take your existing one-file code and put it into a function:

random_sample() {
    local fasta_file=$1
    local n r tmp sample
    tmp=$(mktemp)
    < "$fasta_file" tail -n+2 | tr -d '\n' > "$tmp"
    n=$(stat -c "%s" "$tmp")
    r=$(shuf -i1-"$((n-200+1))" -n1)
    sample=$(tail -c+"$r" < "$tmp" | head -c200)
    rm "$tmp"
    printf "%s\n" "$sample"
}

Then you can do

for file in *.fa; do
    random_sample "$file" > "${file%.fa}_200_substring.fa"
done

Take your existing one-file code and put it into a function:

random_sample() {
    local fasta_file=$1
    local n r tmp sample
    tmp=$(mktemp)
    < "$fasta_file" tail -n+2 | tr -d '\n' > "$tmp"
    n=$(stat -c "%s" "$tmp")
    r=$(shuf -i1-"$((n-200+1))" -n1)
    sample=$(tail -c+"$r" < "$tmp" | head -c200)
    rm "$tmp"
    printf "%s\n" "$sample"
}

Then you can do

for file in *.fa; do
    random_sample "$file" > "${file%.fa}_200_substring.fa"
done

If the fasta files are not huge, I would not use a tmp file:

random_sample() {
    local fasta_file=$1
    local data n r
    data=$(tail -n+2 < "$fasta_file" | tr -d '\n')
    n=${#data}
    r=$(shuf -i1-"$((n-200+1))" -n1)
    tail -c+"$r" <<< "$data" | head -c200
}

and if the files are < 32767 bytes

random_sample() {
    local fasta_file=$1
    local data
    data=$(tail -n+2 < "$fasta_file" | tr -d '\n')
    echo "${data:($RANDOM % ${#data}):200}"
}
Source Link
glenn jackman
  • 88.5k
  • 16
  • 124
  • 179

Take your existing one-file code and put it into a function:

random_sample() {
    local fasta_file=$1
    local n r tmp sample
    tmp=$(mktemp)
    < "$fasta_file" tail -n+2 | tr -d '\n' > "$tmp"
    n=$(stat -c "%s" "$tmp")
    r=$(shuf -i1-"$((n-200+1))" -n1)
    sample=$(tail -c+"$r" < "$tmp" | head -c200)
    rm "$tmp"
    printf "%s\n" "$sample"
}

Then you can do

for file in *.fa; do
    random_sample "$file" > "${file%.fa}_200_substring.fa"
done