Re-implementing head in x86-64 assembly targeting linux

Question

I'm pretty new to x64 programming and tried to recreate parts of head for study (targeting linux).

Are there any stylistic issues?
The head function uses a buffer in the bss section for IO, but I've also seen people allocate buffers on the stack for similar purposes. Is there a benefit to allocating buffers on the stack vs. bss aside from recursive calls?

The program is tested by comparing its output on a few files in examples with the output of head.

Any feedback would be greatly appreciated.

Project layout:

.
├── examples
│   ├── empty
│   ├── ex01
│   ├── ex02
│   └── to_be
├── headx64.s
├── Makefile
├── README.md
├── test.sh
└── UNLICENSE

Makefile:

AS := as
LD := ld

BIN := headx64

.DELETE_ON_ERROR:

$(BIN): headx64.o
    $(LD) -o $@ $^

headx64.o: headx64.s
    $(AS) -o $@ $^ -ggdb

.PHONY: test clean

test: $(BIN)
    bash test.sh examples

clean:
    rm -rf $(BIN) *.o

test.sh:

#!/bin/bash
# PURPOSE: A simple test script.
# USAGE:   To test headx64 on the contents of a directory, use call test.sh DIRECTORY.

directory=$1
echo Testing:

# Test single files
for file in `ls $directory`; do
  echo Testing $directory/$file...

  (head $directory/$file) >> tmp1
  (./headx64 $directory/$file) >> tmp2
  diff tmp1 tmp2 || echo head and headx64 differ on $file
done

# Test all files at once
echo Testing $directory/*...

(head $directory/*) >> tmp1
(./headx64 $directory/*) >> tmp2
diff tmp1 tmp2 || echo head and headx64 differ on all files

rm -f tmp1 tmp2
echo All test ran!

headx64.s:

# PURPOSE: Simple copy of the head command line utility.
# USAGE:   headx64 [FILE ...]
# AUTHOR:  Me ;)
# LICENSE: This code is part of the public domain.


#---CONSTANTS---#

# Syscalls
.equ SYS_READ,  0
.equ SYS_WRITE, 1
.equ SYS_OPEN,  2
.equ SYS_CLOSE, 3
.equ SYS_EXIT,  60

# File descriptors
.equ STDIN,  0
.equ STDOUT, 1
.equ STDERR, 2

# Status codes
.equ SUCCESS,    0
.equ FILE_ERROR, 1
.equ IO_ERROR,   2


#---DATA---#

  .section .data

# Strings for header
header_left:
  .asciz "==> "
header_right:
  .asciz " <==\n"

# String representing stdin
file_stdin:
  .asciz "-"

# Error messages
file_error_msg:
  .asciz "couldn't open file!\n"
io_error_msg:
  .asciz "io error!\n"

line_break:
  .asciz "\n"


#---BUFFERS---#

  .section .bss

# Buffer used in i/o.
.equ BUFFER_SIZE, 256
.lcomm buffer, BUFFER_SIZE

  .section .text
  .globl _start


#---MAIN ROUTINE---#

_start:
  # Register usage:
  #     (rbx) - argc
  #     (r12) - Argument in argv.

  mov (%rsp), %rbx
  mov $1, %r12

  # Only a single file (or stdin)?
  cmp $2, %rbx
  jle .L_single_file

.L_args_loop:
  # Print header
  mov $header_left, %rdi
  mov $STDOUT, %rsi
  call fputs
  cmp $0, %rdx
  jl .L_io_error

  mov 8(%rsp, %r12, 8), %rdi
  mov $STDOUT, %rsi
  call fputs
  cmp $0, %rdx
  jl .L_io_error

  mov $header_right, %rdi
  mov $STDOUT, %rsi
  call fputs
  cmp $0, %rdx
  jl .L_io_error

.L_open_file:
  # Open file
  mov 8(%rsp, %r12, 8), %rdi
  call open_rdonly
  cmp $0, %rax
  jl .L_file_error

  # Print file content
  mov %rax, %rdi
  call head
  cmp $0, %rax
  jl .L_io_error

  # All files read?
  inc %r12
  cmp %rbx, %r12
  jge .L_success

  # Print nl and continue
  mov $line_break, %rdi
  mov $STDOUT, %rsi
  call fputs
  cmp $0, %rdx
  jl .L_io_error

  # Repeat
  jmp .L_args_loop

.L_single_file:
  # Open file
  mov $file_stdin, %rdi # read from stdin if no file was given
  cmp $2, %rbx
  cmoveq 8(%rsp, %r12, 8), %rdi
  call open_rdonly
  cmp $0, %rax
  jl .L_file_error

  # Print file content
  mov %rax, %rdi
  call head
  cmp $0, %rax
  jl .L_io_error

  # Exit successfully
.L_success:
  mov $SUCCESS, %rdi
  mov $SYS_EXIT, %rax
  syscall

  # Print error message and exit
.L_file_error:
  mov $file_error_msg, %rdi
  mov $STDOUT, %rsi
  call fputs

  mov $FILE_ERROR, %rdi
  mov $SYS_EXIT, %rax
  syscall

.L_io_error:
  mov $io_error_msg, %rdi
  mov $STDOUT, %rsi
  call fputs

  mov $IO_ERROR, %rdi
  mov $SYS_EXIT, %rax
  syscall



#---SUBROUTINES---#

# Function open_rdonly
# Opens a file for reading, returns stdin if path == "-".
# PARAMETERS:
#     path (rdi) - Path as a pointer to null-terminated string.
# RETURNS:
#     (rax) - File descriptor on success, -1 otherwise.
.type open_rdonly, @function
open_rdonly:
  # path == "-"?
  cmpb $'-', (%rdi)
  jne .L_not_stdin
  cmpb $0, 1(%rdi)
  jne .L_not_stdin
  mov $STDIN, %rax
  ret

.L_not_stdin:
  # Open path
  .equ O_READONLY, 00
  .equ NO_MODE, 00
  mov $O_READONLY, %rsi
  mov $O_READONLY, %rdx
  mov $SYS_OPEN, %rax
  syscall
  ret

# Function fputs
# Prints a null-terminated string to file.
# PARAMETERS:
#     s    (rdi) - Pointer to null-terminated string.
#     file (rsi) - File descriptor.
# RETURNS:
#     (rax) - 0 if no error occurred and -1 otherwise.
.type fputs, @function
fputs:
  # Find string length
  xor %rdx, %rdx
.L_strlen:
  inc %rdx
  cmpb $0, -1(%rdi, %rdx)
  jne .L_strlen
  dec %rdx

  # Print string
  xchg %rsi, %rdi
  push %rdx # Save strlen for later
  mov $SYS_WRITE, %rax
  syscall

  # Error?
  pop %rdx
  cmp %rdx, %rax
  jl .L_puts_error

  mov $0, %rax
  ret
  
.L_puts_error:
  mov $-1, %rax
  ret

# Function head
# Prints the first 10 lines in file to stdout.
# PARAMETERS:
#     file (rdi) - File descriptor.
# RETURNS:
#     (rax) - Returns 0 if no io error occurred and -1 otherwise.
.type head, @function
head:
  # The algorithm works by iterating through the buffer until a nl is found.
  # Register usage:
  #     rdx - Input file.
  #     r12 - Lines found.
  #     r13 - No. of characters in buffer.
  #     r14 - Position in buffer.

  # Save non-volatile registers
  push %r12
  push %r13
  push %r14

  # No lines found yet
  xor %r12, %r12

.L_fill_buffer:
  # Reset position
  xor %r14, %r14

  # Read buffer
  push %rdi # Save fd
  mov $buffer, %rsi
  mov $BUFFER_SIZE, %rdx
  mov $SYS_READ, %rax
  syscall
  pop %rdi
  mov %rax, %r13

  # At EOF or error?
  cmp $0, %rax
  je .L_head_done
  jl .L_head_error

.L_next_char:
  inc %r14

  # Print buffer and refresh if all characters were visited
  cmp %r13, %r14
  jg .L_print_and_refresh

  # Exit if we've read 10 lines and continue otherwise
  cmpb $'\n', buffer-1(%r14)
  jne .L_next_char

  inc %r12
  cmp $10, %r12
  je .L_head_done
  jmp .L_next_char

.L_print_and_refresh:
  push %rdi # Save fd
  mov $STDOUT, %rdi
  mov $buffer, %rsi
  mov %r13, %rdx
  mov $SYS_WRITE, %rax
  syscall
  pop %rdi

  # Error?
  cmp %r13, %rax
  jl .L_head_error

  # Refill buffer and continue
  jmp .L_fill_buffer

.L_head_error:
  # Restore non-volatile registers & return -1
  pop %r14
  pop %r13
  pop %r12
  mov $-1, %rax
  ret

.L_head_done:
  # Print remaining characters
  mov $STDOUT, %rdi
  mov $buffer, %rsi
  mov %r14, %rdx
  mov $SYS_WRITE, %rax
  syscall

  # Error?
  cmp %rax, %r14
  jne .L_head_error

  # Restore non-volatile registers & return 0
  pop %r14
  pop %r13
  pop %r12
  mov $0, %rax
  ret

Sep Roland · Accepted Answer · 2024-09-28 20:30:22Z

Are there any stylistic issues?

You can enhance the readability of your program a lot by aligning the instructions, the operands, and the tailcomments to their own columns:

push    %rdx                # Preserve strlen
mov     $SYS_WRITE, %eax
syscall                     # -> RAX
pop     %rdx                # Restore strlen

Seeing those nice #---CONSTANTS---#, #---DATA---#, and #---BUFFERS---# comments, it feels like the next comment is missing:

#---CODE---#

  .section .text
  .globl _start

I've also seen people allocate buffers on the stack for similar purposes. Is there a benefit to allocating buffers on the stack vs. bss aside from recursive calls?

Personal preference probably, but I'd say that working from a general purpose buffer is easiest. Though it is not too difficult to have a buffer in stack memory, the occasional push (like the one you were doing in .L_print_and_refresh) might make it a bit more error-prone and require more attention to detail.

The fputs function

Although you state that this function returns RAX = {-1,0}, four of its six callers instead are inspecting the RDX register! It is your own function so it is your choice: either inspect RAX or else state that you return RDX.

Because of the way you pass parameters RDI and RSI, you had to include the xchg %rsi, %rdi instruction prior to the SYS_WRITE system call. I suggest you modify all of the call sites so you pass the string pointer in RSI and the file descriptor in RDI which I believe is also a bit more common practice.

# Function fputs
# Prints a null-terminated string to file.
# PARAMETERS:
#     s    (rsi) - Pointer to null-terminated string.
#     file (rdi) - File descriptor.
# RETURNS:
#     (rax) - 0 if no error occurred and -1 otherwise.
.type fputs, @function

fputs:
  xor     %edx, %edx       # Find string length
.L_strlen:
  inc     %edx
  cmpb    $0, -1(%rsi, %rdx)
  jne     .L_strlen
  dec     %edx
  push    %rdx             # Print string
  mov     $SYS_WRITE, %eax
  syscall                  # -> RAX
  pop     %rdx
  cmp     %rdx, %rax
  mov     $0, %eax         # RAX=0
  je      .L_puts_ok
  dec     %rax             # RAX=-1
.L_puts_ok:
  ret

Use 32-bit registers whenever you can, but keep using 64-bit registers within addressing modes and for pushes and pops. Your string length is never going to exceed 4GB so you don't need the extra space that RDX offers. Remember that writing to the lowest 32 bits of a 64-bit register automatically zeroes the highest 32 bits. The benefit for us is that the assembler doesn't need to insert a REX prefix while encoding the instruction, and smaller code in general runs quicker.

The open_rdonly function

.equ O_READONLY, 00
.equ NO_MODE, 00
mov $O_READONLY, %rsi
mov $O_READONLY, %rdx

It is strange to see you loaded both %RSI and %RDX with the same constant. Also probably better to keep these equates in the dedicated CONSTANTS section.
You don't need two separate byte-sized compares to find out about stdin. Use cmpw $'-\0', (%rdi).

# Function open_rdonly
# Opens a file for reading, returns stdin if path == "-".
# PARAMETERS:
#     path (rdi) - Path as a pointer to null-terminated string.
# RETURNS:
#     (rax) - File descriptor on success, -1 otherwise.
.type open_rdonly, @function

open_rdonly:
  mov     $STDIN, %eax
  cmpw    $'-\0', (%rdi)   # path == "-" ?
  je      .L_stdin
  mov     $..., %esi
  mov     $..., %edx
  mov     $SYS_OPEN, %eax
  syscall
.L_stdin:
  ret

The head function

  # Error?
  cmp %r13, %rax
  jl .L_head_error

  # Refill buffer and continue
  jmp .L_fill_buffer

.L_head_error:

Because .L_head_error immediately follows, it is easy to avoid the extra jmp instruction. Just inverse the conditional branch:

  cmp     %r13, %rax      # Error?
  jnl     .L_fill_buffer  # Refill buffer and continue
.L_head_error:

Stack Exchange Network

Re-implementing head in x86-64 assembly targeting linux

Project layout:

Makefile:

test.sh:

headx64.s:

1 Answer 1

The fputs function

The open_rdonly function

The head function

You must log in to answer this question.

Hot Network Questions

Re-implementing head in x86-64 assembly targeting linux

Project layout:

Makefile:

test.sh:

headx64.s:

1 Answer 1

The fputs function

The open_rdonly function

The head function

You must log in to answer this question.

Related

Hot Network Questions