This is pretty horrible code, but it works. I don't code in bash 😭, usually code in Python.
This is a learning exercise for me, so that's why I'm using bash + that's why I don't use libraries.
I obviously don't expect anyone to correct the whole thing, but any main points on how to make this more bash-like would be greatly appreciated! 🙏
Sample input:
1,3,John smith,"BA, Economics",,Economics
2,4,Brian Jones,Master of Science,,Economics
3,5,Bill jones,"MSc, Biology",,Biology
Sample output:
1,3,John Smith,"BA, Economics",[email protected],Economics
2,4,Brian Jones,Master of Science,[email protected],Economics
3,5,Bill Jones,"Msc, Biology",[email protected],Biology
Script:
#!/bin/bash
if [[ $# -ne 1 ]]; then
    echo "Usage: $0 <input_file>"
    exit 1
fi
input_file="$1"
output_file="students_new.csv"
declare -A email_count
parsed_line=()
get_corrected_surname() {
    name="$1"
    corrected_name=$(echo "$name" | awk '{
        for(i=1; i<=NF; i++) {
            $i = toupper(substr($i,1,1)) substr($i,2)
        }
        print
    }')
    echo "$corrected_name"
}
get_email_prefix() {
    local name=$(echo "$1" | tr '[:upper:]' '[:lower:]')
    local first_name=$(echo "$name" | awk '{print $1}')
    local surname=$(echo "$name" | awk '{print $2}')
    local initial=$(echo "$first_name" | cut -c 1)    
    local email_prefix="${initial}${surname}"
    echo "$email_prefix"
}
get_email() {
    name="$1"
    location_id="$2"
    email_prefix=$(get_email_prefix "$name")
    if (( email_count["$email_prefix"] > 1 )); then
        email_prefix="${email_prefix}${location_id}"
    fi
    email="${email_prefix}@ucla.edu"
    echo "$email"
}
prep_email_count_array() {
    while IFS=',' read -r id location_id full_name _rest; do
        if [[ "$id" = "id" ]]; then
            continue
        fi
        email_prefix=$(get_email_prefix "$full_name")
        email_count["$email_prefix"]=$(( ${email_count["$email_prefix"]} + 1 ))
    done < "$input_file"
}
parse_line() {
    line="$1"
    parsed_line=()
    current_string=""
    inside_quotes=false
    i=0
    while [ $i -lt ${#line} ]; do
        char="${line:i:1}"
        if [[ "$char" == ',' && "$inside_quotes" == false ]]; then
            parsed_line+=("$current_string")
            current_string=""
        elif [[ "$char" == '"' ]]; then
            if [[ "$inside_quotes" == false ]]; then
                inside_quotes=true
            else
                inside_quotes=false
            fi
            current_string+="$char"
            if [[ "$inside_quotes" == false && -n "$current_string" ]]; then
                parsed_line+=("$current_string")
                current_string=""
                (( i++ ))
            fi
        else
            current_string+="$char"
        fi
        (( i++ ))
    done
    if [[ -n "$current_string" ]]; then
        parsed_line+=("$current_string")
    fi
}
parse_and_write() {
    echo "id,class_id,name,degree,email,department" > "$output_file"
    while IFS= read -r line; do
        parse_line "$line"
        id="${parsed_line[0]}"
        # skip header
        if [[ "$id" = "id" ]]; then
            continue
        fi
        class_id="${parsed_line[1]}"
        full_name="${parsed_line[2]}"
        degree="${parsed_line[3]}"
        email="${parsed_line[4]}"
        department="${parsed_line[5]}"
        corrected_full_name=$(get_corrected_surname "$full_name")
        email=$(get_email "$full_name" "$class_id")
        echo "$id,$class_id,$corrected_full_name,$degree,$email,$department" >> "$output_file"
    done < "$input_file"
}
prep_email_count_array
parse_and_write