I'm pretty new to x64 programming and tried to recreate parts of head for study (targeting linux).
Are there any stylistic issues?
The head function uses a buffer in the bss section for IO, but I've also seen people allocate buffers on the stack for similar purposes.
Is there a benefit to allocating buffers on the stack vs. bss aside from recursive calls?
The program is tested by comparing its output on a few files in examples with the output of head.
Any feedback would be greatly appreciated.
Project layout:
.
├── examples
│ ├── empty
│ ├── ex01
│ ├── ex02
│ └── to_be
├── headx64.s
├── Makefile
├── README.md
├── test.sh
└── UNLICENSE
Makefile:
AS := as
LD := ld
BIN := headx64
.DELETE_ON_ERROR:
$(BIN): headx64.o
$(LD) -o $@ $^
headx64.o: headx64.s
$(AS) -o $@ $^ -ggdb
.PHONY: test clean
test: $(BIN)
bash test.sh examples
clean:
rm -rf $(BIN) *.o
test.sh:
#!/bin/bash
# PURPOSE: A simple test script.
# USAGE: To test headx64 on the contents of a directory, use call test.sh DIRECTORY.
directory=$1
echo Testing:
# Test single files
for file in `ls $directory`; do
echo Testing $directory/$file...
(head $directory/$file) >> tmp1
(./headx64 $directory/$file) >> tmp2
diff tmp1 tmp2 || echo head and headx64 differ on $file
done
# Test all files at once
echo Testing $directory/*...
(head $directory/*) >> tmp1
(./headx64 $directory/*) >> tmp2
diff tmp1 tmp2 || echo head and headx64 differ on all files
rm -f tmp1 tmp2
echo All test ran!
headx64.s:
# PURPOSE: Simple copy of the head command line utility.
# USAGE: headx64 [FILE ...]
# AUTHOR: Me ;)
# LICENSE: This code is part of the public domain.
#---CONSTANTS---#
# Syscalls
.equ SYS_READ, 0
.equ SYS_WRITE, 1
.equ SYS_OPEN, 2
.equ SYS_CLOSE, 3
.equ SYS_EXIT, 60
# File descriptors
.equ STDIN, 0
.equ STDOUT, 1
.equ STDERR, 2
# Status codes
.equ SUCCESS, 0
.equ FILE_ERROR, 1
.equ IO_ERROR, 2
#---DATA---#
.section .data
# Strings for header
header_left:
.asciz "==> "
header_right:
.asciz " <==\n"
# String representing stdin
file_stdin:
.asciz "-"
# Error messages
file_error_msg:
.asciz "couldn't open file!\n"
io_error_msg:
.asciz "io error!\n"
line_break:
.asciz "\n"
#---BUFFERS---#
.section .bss
# Buffer used in i/o.
.equ BUFFER_SIZE, 256
.lcomm buffer, BUFFER_SIZE
.section .text
.globl _start
#---MAIN ROUTINE---#
_start:
# Register usage:
# (rbx) - argc
# (r12) - Argument in argv.
mov (%rsp), %rbx
mov $1, %r12
# Only a single file (or stdin)?
cmp $2, %rbx
jle .L_single_file
.L_args_loop:
# Print header
mov $header_left, %rdi
mov $STDOUT, %rsi
call fputs
cmp $0, %rdx
jl .L_io_error
mov 8(%rsp, %r12, 8), %rdi
mov $STDOUT, %rsi
call fputs
cmp $0, %rdx
jl .L_io_error
mov $header_right, %rdi
mov $STDOUT, %rsi
call fputs
cmp $0, %rdx
jl .L_io_error
.L_open_file:
# Open file
mov 8(%rsp, %r12, 8), %rdi
call open_rdonly
cmp $0, %rax
jl .L_file_error
# Print file content
mov %rax, %rdi
call head
cmp $0, %rax
jl .L_io_error
# All files read?
inc %r12
cmp %rbx, %r12
jge .L_success
# Print nl and continue
mov $line_break, %rdi
mov $STDOUT, %rsi
call fputs
cmp $0, %rdx
jl .L_io_error
# Repeat
jmp .L_args_loop
.L_single_file:
# Open file
mov $file_stdin, %rdi # read from stdin if no file was given
cmp $2, %rbx
cmoveq 8(%rsp, %r12, 8), %rdi
call open_rdonly
cmp $0, %rax
jl .L_file_error
# Print file content
mov %rax, %rdi
call head
cmp $0, %rax
jl .L_io_error
# Exit successfully
.L_success:
mov $SUCCESS, %rdi
mov $SYS_EXIT, %rax
syscall
# Print error message and exit
.L_file_error:
mov $file_error_msg, %rdi
mov $STDOUT, %rsi
call fputs
mov $FILE_ERROR, %rdi
mov $SYS_EXIT, %rax
syscall
.L_io_error:
mov $io_error_msg, %rdi
mov $STDOUT, %rsi
call fputs
mov $IO_ERROR, %rdi
mov $SYS_EXIT, %rax
syscall
#---SUBROUTINES---#
# Function open_rdonly
# Opens a file for reading, returns stdin if path == "-".
# PARAMETERS:
# path (rdi) - Path as a pointer to null-terminated string.
# RETURNS:
# (rax) - File descriptor on success, -1 otherwise.
.type open_rdonly, @function
open_rdonly:
# path == "-"?
cmpb $'-', (%rdi)
jne .L_not_stdin
cmpb $0, 1(%rdi)
jne .L_not_stdin
mov $STDIN, %rax
ret
.L_not_stdin:
# Open path
.equ O_READONLY, 00
.equ NO_MODE, 00
mov $O_READONLY, %rsi
mov $O_READONLY, %rdx
mov $SYS_OPEN, %rax
syscall
ret
# Function fputs
# Prints a null-terminated string to file.
# PARAMETERS:
# s (rdi) - Pointer to null-terminated string.
# file (rsi) - File descriptor.
# RETURNS:
# (rax) - 0 if no error occurred and -1 otherwise.
.type fputs, @function
fputs:
# Find string length
xor %rdx, %rdx
.L_strlen:
inc %rdx
cmpb $0, -1(%rdi, %rdx)
jne .L_strlen
dec %rdx
# Print string
xchg %rsi, %rdi
push %rdx # Save strlen for later
mov $SYS_WRITE, %rax
syscall
# Error?
pop %rdx
cmp %rdx, %rax
jl .L_puts_error
mov $0, %rax
ret
.L_puts_error:
mov $-1, %rax
ret
# Function head
# Prints the first 10 lines in file to stdout.
# PARAMETERS:
# file (rdi) - File descriptor.
# RETURNS:
# (rax) - Returns 0 if no io error occurred and -1 otherwise.
.type head, @function
head:
# The algorithm works by iterating through the buffer until a nl is found.
# Register usage:
# rdx - Input file.
# r12 - Lines found.
# r13 - No. of characters in buffer.
# r14 - Position in buffer.
# Save non-volatile registers
push %r12
push %r13
push %r14
# No lines found yet
xor %r12, %r12
.L_fill_buffer:
# Reset position
xor %r14, %r14
# Read buffer
push %rdi # Save fd
mov $buffer, %rsi
mov $BUFFER_SIZE, %rdx
mov $SYS_READ, %rax
syscall
pop %rdi
mov %rax, %r13
# At EOF or error?
cmp $0, %rax
je .L_head_done
jl .L_head_error
.L_next_char:
inc %r14
# Print buffer and refresh if all characters were visited
cmp %r13, %r14
jg .L_print_and_refresh
# Exit if we've read 10 lines and continue otherwise
cmpb $'\n', buffer-1(%r14)
jne .L_next_char
inc %r12
cmp $10, %r12
je .L_head_done
jmp .L_next_char
.L_print_and_refresh:
push %rdi # Save fd
mov $STDOUT, %rdi
mov $buffer, %rsi
mov %r13, %rdx
mov $SYS_WRITE, %rax
syscall
pop %rdi
# Error?
cmp %r13, %rax
jl .L_head_error
# Refill buffer and continue
jmp .L_fill_buffer
.L_head_error:
# Restore non-volatile registers & return -1
pop %r14
pop %r13
pop %r12
mov $-1, %rax
ret
.L_head_done:
# Print remaining characters
mov $STDOUT, %rdi
mov $buffer, %rsi
mov %r14, %rdx
mov $SYS_WRITE, %rax
syscall
# Error?
cmp %rax, %r14
jne .L_head_error
# Restore non-volatile registers & return 0
pop %r14
pop %r13
pop %r12
mov $0, %rax
ret