From 3b52f1d3860b87eacd2b79e2d7f9109999e3f9a9 Mon Sep 17 00:00:00 2001 From: PoroCYon <3253268+PoroCYon@users.noreply.github.com> Date: Mon, 10 Dec 2018 12:47:43 +0000 Subject: [PATCH] Optimize e.asm --- e.asm | 63 +++++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 51 insertions(+), 12 deletions(-) diff --git a/e.asm b/e.asm index 0dc586d..552fc09 100644 --- a/e.asm +++ b/e.asm @@ -1,17 +1,56 @@ +; compile with: +; $ [ny]asm -felf(32|64) -oe.o e.asm +; $ (gcc|clang) -m(32|64) -oe e.o -nostdlib -nostartfiles + section .text -global _start +global _start -_start: +%if __BITS__ == 32 +%define r(n) e%+n +%define SYS_write 4 +%define rarg0 ebx +%define rarg1 ecx +%define rarg2 edx +%define syscall int 0x80 +%else +%define r(n) r%+n +%define SYS_write 1 +%define rarg0 rdi +%define rarg1 rsi +%define rarg2 rdx +default rel +%endif - mov rax, 1 - mov rdi, 1 - mov rsi, msg - mov rdx, len - loop: - syscall - jmp loop +; size of a Linux pipe buffer +%define PIPE_SIZE 0x10000 +%define STDOUT_FILENO 1 -section .data +; Instead of simply storing a char in .rodata and write(2)-ing it +; over and over again, we first fill a buffer full of e's, and *then* +; write the entire buffer. This is much faster than the first option, +; because we only need to issue a syscall once every 65536 bytes. (Remember +; that doing a syscall requires the kernel to handle an interrupt etc etc etc.) -msg: db "e" -len: equ $ - msg +_start: + ; allocate space for the message + mov r(cx), PIPE_SIZE + mov r(bx), r(cx) ; we'll need it later + sub r(sp), r(cx) + + ; quick memset(3) + mov al, 'e' + mov r(di), r(sp) + rep stosb + + ; push+pop is actually a smaller encoding than mov for ints that fit within 8 bit + push STDOUT_FILENO + pop rarg0 + mov rarg1, r(sp) + mov rarg2, r(bx) + +.loop: + ; set this within the loop because the syscall's exit code is placed in r(ax) + push SYS_write + pop r(ax) + syscall + jmp short .loop