5
\$\begingroup\$

The task is to do a byte-by-byte comparison and print whether the files are equal or not.

Code:

dosseg

newline macro
    mov ah, 2
    mov dl, 13
    int 21h
    
    mov dl, 10
    int 21h
endm

err macro s 
    mov dx, offset s 
    mov ah, 9
    int 21h
    
    ; Print the error code in AX as a character (decimal).
    mov dl, al      ; Get the low byte of AX.
    add dl, '0'     ; Convert to ASCII
    mov ah, 2       ; Print character
    int 21h
endm

prompt macro s
    mov dx, offset s 
    mov ah, 9
    int 21h
endm

info macro s
    mov dx, offset s 
    mov ah, 9
    int 21h
endm

getchar macro c
    mov ah, 1       ; Read input
    int 21h
endm

.model small
.stack 100h
.data
f1          db 128 dup(0)     ; Null-terminated filename 1.
f2          db 128 dup(0)     ; Filename 2.
h1          dw ?              ; File handle returned from DOS.
h2          dw ?              ; File handle 2.
prompt1     db 'Enter the first filename: $'
prompt2     db 'Enter the second filename: $'
oerr_msg    db 'Error: Failed to open file. Error code: $'
rerr_msg    db 'Error: Failed to read file. Error code: $'
eq_msg      db 'The files are equal.$'
neq_msg     db 'The files are unequal.$'
buf1        db 1              ; Buffer 1 to hold one byte of data.
buf2        db 1              ; Buffer 2 to hold one byte of data.
eof1        db 0              ; To denote if file 1 has reached EOF.
eof2        db 0              ; To denote if file 2 has reached EOF.
.code
main proc
    mov ax, @data
    mov ds, ax

    prompt prompt1
    mov si, offset f1
    ; --- Input the filenames. ---
read:
    getchar
    cmp al, 13              ; EOL?
    je  next 
    mov [si], al            ; Store the byte.
    inc si
    jmp read

next:
    prompt prompt2
    mov si, offset f2
    
read2:
    getchar
    cmp al, 13
    je open
    mov [si], al
    inc si
    jmp read2 

open:
    ; --- Open the files using INT 21h ----
    mov ah, 3dh         ; Open file.
    mov al, 0           ; Read-only mode.
    mov dx, offset f1 
    int 21h
    jc open_error
    mov h1, ax      ; File handle returned in AX.

    mov ah, 3dh
    mov al, 0
    mov dx, offset f2 
    int 21h
    jc open_error
    mov h2, ax 
    jmp read_files

open_error:
    err oerr_msg 
    jmp done

    ; --- Read both files whilst doing a byte-by-byte comparison ---
read_files:
    mov ah, 3fh             ; Read file.
    mov bx, h1              ; First file's handle.
    mov dx, offset buf1     ; Pointer to the buffer where the byte will be stored.
    mov cx, 1               ; Read 1 byte at a time.
    int 21h
    jc read_error
    cmp ax, cx              ; EOF reached?
    jne set_eof1
    jmp read_file2

set_eof1:
    mov eof1, 1             ; Set eof1 to true.

read_file2:
    ; --- Read from the second file ---
    mov ah, 3fh             ; Read file.
    mov bx, h2              ; First file's handle.
    mov dx, offset buf2     ; Pointer to the buffer where the byte will be stored.
    mov cx, 1               ; Read 1 byte at a time.
    int 21h
    jc read_error
    cmp ax, cx              ; EOF reached?
    jne set_eof2
    jmp compare_files

set_eof2:
    mov eof2, 1

compare_files:
    ; If eof1 is set:
    ;     If eof2 is set:
    ;         Cleanup and exit. The files were equal.
    ;     Else:
    ;         Cleanup and exit. The files were not equal. We reached EOF for one file.
    ; Else if eof2 is set:
    ;     Cleanup and exit. The files were not equal. We reached EOF for one file.
    ; Continue comparing characters.
    
    ; Check if EOF is set for file 1.
    mov dl, eof1
    cmp dl, 1
    je eof1_set

    ; Check if EOF if set for file 2
    mov dl, eof2
    cmp dl, 1
    ; The second file has ended, so files are not equal.
    je files_not_equal 
    
    ; Continue comparing characters.
    mov dl, buf1
    cmp dl, buf2
    je  read_files

    ; Bytes are not equal. Bail out.
    jmp files_not_equal 

eof1_set:
    ; If EOF for file 1 is set, check if EOF for file 2 is also set.
    mov dl, eof2
    cmp dl, 1
    je files_equal
    
    ; Else, one file has ended, so files are not equal.
    jmp files_not_equal

files_equal:
    info eq_msg
    jmp cleanup

files_not_equal:
    info neq_msg
    jmp cleanup

read_error:
    err rerr_msg 
    jmp done

cleanup:
    ; --- Close files ----
    mov ah, 3eh
    mov bx, h1
    int 21h

    mov ah, 3eh
    mov bx, h2
    int 21h

done:
    newline
    mov ah, 4ch
    int 21h
main endp
end main

Review Request:

Redundancies, duplication, simplifications. I haven't used assembly before, so anything and everything is welcome.

\$\endgroup\$
2
  • \$\begingroup\$ Not a review of your code or anything you did, but (as someone who used MS-DOS and programmed on an IBM XT) I wish profs would stop teaching assembly with 16-bit MS-DOS. It’s not transferable to how modern software engineers use assembly, either for device drivers or to optimize inner loops. 16-bit MS-DOS is not even simpler! You have to learn all about obsolete 64K segments you’ll never use again. And as on Linux is free, so you could be assigning students to, for example, write 2-D graphics to the framebuffer and count the FPS. \$\endgroup\$ Commented Apr 15 at 17:43
  • \$\begingroup\$ In fact, this isn’t even how software was written for MS-DOS since Doom came out in ’93. \$\endgroup\$ Commented Apr 15 at 17:51

2 Answers 2

3
\$\begingroup\$

i - Still reading a byte at a time

err macro s 
 mov dx, offset s 
 mov ah, 9
 int 21h
 ; Print the error code in AX as a character (decimal).
 mov dl, al       ; Get the low byte of AX.
 add dl, '0'      ; Convert to ASCII
 mov ah, 2        ; Print character
 int 21h
endm

Sadly, there's one error in the err macro! Despite the fact that the documentation might say otherwise, the DOS.PrintString function 09h does clobber the AL register. It returns with the character "$" in AL. This means your displaying of the error code currently can't possibly work. Another problem here is that simply adding '0' is enough for error codes from 1 to 9, but not for the many remaining error codes. Luckily DOS.OpenFile and DOS.ReadFile don't need to return error codes above 6.


The prompt and info macros are identical. Why not have just one and name it string?


Whenever similar tasks need to run, using a subroutine is often better. Retrieving the filenames AND opening the files is a good example. My OpenEx subroutine combines both these operations and your main code invokes the subroutine twice, each time with a different set of arguments (here in SI and DI):

    ...
    
    string prompt1
    mov    si, offset f1
    mov    di, offset h1
    call   OpenEx
    string prompt2
    mov    si, offset f2
    mov    di, offset h2
    call   OpenEx
    
    ...
    
done:
    newline
    mov    ax, 4C00h
    int    21h
; -------------------------
OpenEx:
    ; --- Input the filenames. ---
    push   si              ; (1)
getname:
    getchar                ; -> AL
    cmp    al, 13          ; EOL?
    je     end_of_name
    mov    [si], al        ; Store the byte.
    inc    si
    jmp    getname
end_of_name:
    pop    si              ; (1)
    ; --- Open the files using INT 21h ----
    mov    dx, si
    mov    ax, 3D00h       ; Open file in Read-only mode.
    int    21h
    jc     open_error
    mov    [di], ax        ; File handle returned in AX.
    ret
open_error:
    err    oerr_msg 
    jmp    done
; -------------------------
main endp
end main

You can simplify the task of checking for end-of-files by storing different values in eof1 and eof2. I suggest setting eof1 = 1 and eof2 = 2. A simple OR then can tell you if it is safe to compare bytes or that halting is in order:

    mov   al, eof1        ; {0,1}
    or    al, eof2        ; {0,2}
    jnz   detected_eof    ; AL is {1,2,3}
    mov   al, buf1        ; Continue comparing characters.
    cmp   al, buf2
    je    read_files
files_not_equal:          ; Bytes are not equal. Bail out.
    string neq_msg
    jmp   cleanup
detected_eof:
    jnp   files_not_equal ; AL is {1,2} Only ONE of the files ended
files_equal:              ; AL is {3} BOTH files ended together
    string eq_msg
    jmp   cleanup

The jnp conditional jump, checks the parity flag. If the number of ON bits in a byte (here it was AL from using or al, eof2) is odd then the parity is said to be "off" aka "no parity". If the number of ON bits in a byte is even then the parity is said to be "on" aka "parity".

0  00000000b  parity on
1  00000001b  parity off
2  00000010b  parity off
3  00000011b  parity on

Also note that I used AL instead of the DL register that you wrote. This uses a one byte shorter encoding in mov al, eof1 and in mov al, buf1.

ii - It gets simpler if we read more at once

It is certainly a valuable idea to read more than one byte at a time, and do know that DOS already does that too internally! DOS uses several 512-bytes buffers for the purpose of disk I/O.
Next is my implementation of this idea. If you study my code, you'll see a lot more stuff that I didn't include in my answer so far, eg. err is no longer a macro, but a full-fledged subroutine, and the buffers get re-used for different purposes.

dosseg

newline macro
    mov ah, 2
    mov dl, 13
    int 21h
    mov dl, 10
    int 21h
endm

string macro s
    mov dx, offset s 
    mov ah, 9
    int 21h
endm

.model small
.stack 100h
.data
prompt1     db 'Enter the first filename: $'
prompt2     db 'Enter the second filename: $'
oerr_msg    db 'Error: Failed to open file. Error code: $'
rerr_msg    db 'Error: Failed to read file. Error code: $'
eq_msg      db 'The files are equal.$'
neq_msg     db 'The files are unequal.$'
buf1        db 512 dup(?)     ; Buffer 1 to hold 512 bytes of data.
buf2        db 512 dup(?)     ; Buffer 2 to hold 512 bytes of data.
h1          dw ?              ; File handle 1.
h2          dw ?              ; File handle 2.

.code
main proc
    mov    ax, @data
    mov    ds, ax

    string prompt1
    mov    si, offset buf1
    call   OpenEx          ; -> AX (DX SI)
    mov    h1, ax
    string prompt2
    mov    si, offset buf2
    call   OpenEx          ; -> AX (DX SI)
    mov    h2, ax

ReadFiles:
    mov    si, offset buf1
    mov    dx, si
    mov    bx, h1
    call   ReadSector      ; -> AX (CX DX)
    mov    bp, ax          ; [0,512]

    mov    di, offset buf2
    mov    dx, di
    mov    bx, h2
    call   ReadSector      ; -> AX (CX DX)
    mov    cx, ax          ; [0,512]

    cmp    bp, cx
    jne    files_not_equal ; Files have different lengths 
    jcxz   files_equal     ; BOTH files ended together
    repe cmpsb             ; Compare all the bytes ([1,512]) in the buffers
    jne    files_not_equal ; Found a difference in current sector
    cmp    ax, 512
    je     ReadFiles       ; Got full buffers, so there could be more
files_equal:             
    string eq_msg
    jmp   cleanup
files_not_equal:           ; Bytes are not equal. Bail out.
    string neq_msg

cleanup:                   ; --- Close files ----
    mov    bx, h1
    mov    ah, 3Eh
    int    21h
    mov    bx, h2
    mov    ah, 3Eh
    int    21h
done:
    newline
    mov    ax, 4C00h
    int    21h
; -------------------------
; IN (si) OUT (ax) MOD (dx,si)
OpenEx:                    ; --- Input the filenames. ---
    mov    dx, si
getname:
    mov    ah, 1           ; Read input
    int    21h             ; -> AL
    cmp    al, 13          ; EOL?
    je     end_of_name
    mov    [si], al        ; Store the byte.
    inc    si
    jmp    getname
end_of_name:               ; --- Open the files using INT 21h ----
    mov    ax, 3D00h       ; Open file in Read-only mode.
    mov    [si], al        ; Make zero-terminated filespec
    int    21h
    jc     open_error
    ret
open_error:
    mov    dx, offset oerr_msg
; ---   ---   ---   ---   -
; IN (ax,dx)
err:
    push   ax              ; Preserve error code
    mov    ah, 9
    int    21h
    pop    dx              ; Restore error code
    add    dl, '0'         ; Convert to ASCII
    mov    ah, 2           ; Print character
    int    21h
    jmp    done
; -------------------------
; IN (bx,dx) OUT (ax) MOD (cx,dx)
ReadSector:
    mov   cx, 512
    mov   ah, 3Fh          ; Read file.
    int   21h              ; -> AX CF
    mov   dx, offset rerr_msg 
    jc    err
    ret
; -------------------------
main endp
end main
\$\endgroup\$
3
\$\begingroup\$

Read Sectors, not Bytes

This is extremely slow, and especially slow if your disks are the spinning magnetic disks that were used with MS-DOS. Reading one byte from two files at a time would have required the drive spinning the disk, like a vinyl record, until the drive head is over the correct data, then spinning the same direction to the position of the second file, then spinning back to the position of the first file. Never do this with a floppy! It is much faster to read a large amount of data at once, which is why 16-bit systems had disk defragmenters that tried to make all files readable in a single operation.

Always read an entire sector into a buffer and compare the buffers (which you can do several times faster by using wider instructions). Back in the day, a common sector size was 512 bytes, but in the 32-bit era, filesystems began using 4,096-byte clusters (to simplify transferring them into pages of physical RAM). A device driver on a modern OS might use Direct Memory Access or Programmed I/O, which on some implementations would work best with aligned 64-KiB buffers.

Compare File Sizes

Since you’re using the two default FCBs, INT 21h function 23h will retrieve the file sizes. Two files cannot be identical unless they are the same size, and once you know their size, you might be able to load both files into memory in their entirety (although you still might not want to).

Use String Instructions

Once you have two chunks of the file in buffers that DS:SI and ES:DI point to, you can compare a byte at a type with CMPSB or four bytes at a time with CMPSD. You might load the buffer size into CX and write a LOOP that stops when you either reach the end of the buffer or when words of data compare unequal. Alternatively, you might place the buffer size in CX, CLD to set the direction flag, use REPE CMPSD, and check at the end to see whether the canary values were the first bytes that were different. This is a single-instruction inner loop that does not involve branch prediction.

\$\endgroup\$

You must log in to answer this question.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.