Optimize e.asm

2018-12-10 12:47:43 +00:00 · 2018-12-10 12:47:43 +00:00 · 3b52f1d386
parent d2f145bc00
commit 3b52f1d386
1 changed files with 51 additions and 12 deletions
--- a/e.asm
+++ b/e.asm
@ -1,17 +1,56 @@
+; compile with:
+; $ [ny]asm -felf(32|64) -oe.o e.asm
+; $ (gcc|clang) -m(32|64) -oe e.o -nostdlib -nostartfiles
+
 section     .text
 global      _start

+%if __BITS__ == 32
+%define r(n) e%+n
+%define SYS_write 4
+%define rarg0 ebx
+%define rarg1 ecx
+%define rarg2 edx
+%define syscall int 0x80
+%else
+%define r(n) r%+n
+%define SYS_write 1
+%define rarg0 rdi
+%define rarg1 rsi
+%define rarg2 rdx
+default rel
+%endif
+
+; size of a Linux pipe buffer
+%define PIPE_SIZE 0x10000
+%define STDOUT_FILENO 1
+
+; Instead of simply storing a char in .rodata and write(2)-ing it
+; over and over again, we first fill a buffer full of e's, and *then*
+; write the entire buffer. This is much faster than the first option,
+; because we only need to issue a syscall once every 65536 bytes. (Remember
+; that doing a syscall requires the kernel to handle an interrupt etc etc etc.)
+
 _start:
+        ; allocate space for the message
+        mov r(cx), PIPE_SIZE
+        mov r(bx), r(cx) ; we'll need it later
+        sub r(sp), r(cx)

-	mov rax, 1
-	mov rdi, 1
-	mov rsi, msg
-	mov rdx, len
-	loop:
-	syscall
-	jmp loop                             
+        ; quick memset(3)
+        mov al, 'e'
+        mov r(di), r(sp)
+        rep stosb

-section     .data
+        ; push+pop is actually a smaller encoding than mov for ints that fit within 8 bit
+        push STDOUT_FILENO
+        pop rarg0
+        mov rarg1, r(sp)
+        mov rarg2, r(bx)

-msg: db "e"                 
-len: equ $ - msg                             
+.loop:
+        ; set this within the loop because the syscall's exit code is placed in r(ax)
+        push SYS_write
+        pop r(ax)
+        syscall
+        jmp short .loop