From: Philip Solobay Date: Thu, 12 Sep 2024 15:43:24 +0000 (-0500) Subject: Add in Compiler for SIMD X-Git-Url: https://git.vouivredigital.com/?a=commitdiff_plain;h=4aeec78062b9d99891462991f77553eb77e6b9cf;p=vouivre.git Add in Compiler for SIMD --- diff --git a/compiler/assembler.scm b/compiler/assembler.scm new file mode 100644 index 0000000..49ff7bc --- /dev/null +++ b/compiler/assembler.scm @@ -0,0 +1,310 @@ +;;;; Copyright (C) 2023 Vouivre Digital Corporation +;;;; +;;;; This file is part of Vouivre. +;;;; +;;;; Vouivre is free software: you can redistribute it and/or +;;;; modify it under the terms of the GNU General Public +;;;; License as published by the Free Software Foundation, either +;;;; version 3 of the License, or (at your option) any later version. +;;;; +;;;; Vouivre is distributed in the hope that it will be useful, +;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;;; General Public License for more details. +;;;; +;;;; You should have received a copy of the GNU General Public +;;;; License along with Vouivre. If not, see . + + + +(define-module (assembler) + #:use-module (ice-9 match) + #:use-module (ice-9 binary-ports) + #:use-module (rnrs bytevectors) + #:export (assemble get-label-positions get-relocation-table)) + +(define label-positions (make-hash-table)) +(define relocation-table '()) +(define current-position #x1000) + +(define (register->code reg) + (let ((reg-sym (cond + ((symbol? reg) reg) + ((string? reg) (string->symbol reg)) + ((list? reg) (car reg)) + (else (error "Invalid register type" reg))))) + (case reg-sym + ((rax eax) 0) + ((rcx ecx) 1) + ((rdx edx) 2) + ((rbx ebx) 3) + ((rsp esp) 4) + ((rbp ebp) 5) + ((rsi esi) 6) + ((rdi edi) 7) + ((r8 r8d) 0) + ((r9 r9d) 1) + ((r10 r10d) 2) + ((r11 r11d) 3) + ((r12 r12d) 4) + ((r13 r13d) 5) + ((r14 r14d) 6) + ((r15 r15d) 7) + (else (error "Unknown register" reg-sym))))) + +(define (ymm-register->code reg) + (let ((reg-sym (cond + ((symbol? reg) reg) + ((string? reg) (string->symbol reg)) + ((list? reg) (car reg)) + (else (error "Invalid register type" reg))))) + (case reg-sym + ((ymm0) 0) + ((ymm1) 1) + ((ymm2) 2) + ((ymm3) 3) + (else (error "Unknown YMM register" reg-sym))))) + +(define (symbolic-reference? x) + (and (symbol? x) + (memq x '(buffer1 buffer2 result multiplier)))) + +(define (add-relocation type symbol) + (set! relocation-table + (cons (list current-position type symbol) relocation-table))) + +(define (encode-push reg) + (u8-list->bytevector (list #x55))) + +(define (encode-pop reg) + (u8-list->bytevector (list #x5D))) + +(define (encode-test reg1 reg2) + (let ((reg1-code (register->code reg1)) + (reg2-code (register->code reg2))) + (u8-list->bytevector + (list #x48 #x85 (logior #xC0 (ash reg2-code 3) reg1-code))))) + +(define (encode-mov dest src) + (cond + ((and (symbol? dest) (symbol? src)) + (let ((dest-code (register->code dest)) + (src-code (register->code src))) + (u8-list->bytevector + (list (if (or (>= dest-code 8) (>= src-code 8)) #x4C #x48) + #x89 + (logior #xC0 (ash (if (>= src-code 8) (- src-code 8) src-code) 3) + (if (>= dest-code 8) (- dest-code 8) dest-code)))))) + ((and (symbol? dest) (list? src)) + (let ((dest-code (register->code dest)) + (src-reg (car src))) + (u8-list->bytevector + (list (if (or (>= dest-code 8) (>= (register->code src-reg) 8)) #x4C #x48) + #x8B + (logior #x00 (ash (if (>= dest-code 8) (- dest-code 8) dest-code) 3) + (if (>= (register->code src-reg) 8) (- (register->code src-reg) 8) (register->code src-reg))))))) + ((and (eq? dest 'rbp) (eq? src 'rsp)) + (u8-list->bytevector (list #x48 #x89 #xE5))) + (else (error "Unsupported mov instruction" dest src)))) + +(define (encode-mov-imm32 reg imm) + (let ((reg-code (register->code reg))) + (if (symbolic-reference? imm) + (begin + (add-relocation 'absolute imm) + (bytevector-append + (u8-list->bytevector (list #x48 #xC7 (logior #xC0 reg-code))) + (make-bytevector 4 0))) + (bytevector-append + (u8-list->bytevector (list #x48 #xC7 (logior #xC0 reg-code))) + (integer->bytevector imm 4))))) + +(define (encode-add dest src) + (let ((dest-code (register->code dest)) + (src-code (register->code src))) + (u8-list->bytevector (list #x48 #x01 (logior #xC0 (ash src-code 3) dest-code))))) + +(define (encode-xor dest src) + (let ((dest-code (register->code dest)) + (src-code (register->code src))) + (u8-list->bytevector (list #x48 #x31 (logior #xC0 (ash src-code 3) dest-code))))) + +(define (encode-syscall) + (u8-list->bytevector (list #x0F #x05))) + +(define (encode-vmovaps dest src) + (cond + ;; Case 1: Destination is a single-element list (memory operand) + ((and (list? dest) (= (length dest) 1)) + (let ((dest-reg (car dest)) + (src-code (ymm-register->code src))) + (u8-list->bytevector + (list #xC5 #xFC #x28 + (logior #x00 + (ash src-code 3) + (if (>= (register->code dest-reg) 8) + (- (register->code dest-reg) 8) + (register->code dest-reg))))))) + + ;; Case 2: Source is a single-element list (memory operand) + ((and (list? src) (= (length src) 1)) + (let ((dest-code (ymm-register->code dest)) + (src-reg (car src))) + (if (symbolic-reference? src-reg) + (begin + (add-relocation 'rip-relative src-reg) + (bytevector-append + (u8-list->bytevector (list #xC5 #xFC #x29 #x05)) + (make-bytevector 4 0))) + (u8-list->bytevector + (list #xC5 #xFC #x29 + (logior #x00 + (ash dest-code 3) + (if (>= (register->code src-reg) 8) + (- (register->code src-reg) 8) + (register->code src-reg)))))))) + + ;; Case 3: Both operands are registers + ((and (symbol? dest) (symbol? src)) + (let ((dest-code (ymm-register->code dest)) + (src-code (ymm-register->code src))) + (u8-list->bytevector (list #xC5 #xFC #x28 (logior #xC0 (ash src-code 3) dest-code))))) + + ;; Error case: Invalid operands + (else (error "Invalid operands for vmovaps" dest src)))) + +(define (encode-vaddps dest src1 src2) + (let ((dest-code (ymm-register->code dest)) + (src1-code (ymm-register->code src1)) + (src2-code (ymm-register->code src2))) + (u8-list->bytevector + (list #xC5 + (logxor #xFC (ash src1-code 3)) + #x58 + (logior #xC0 (ash src2-code 3) dest-code))))) + +(define (encode-vfmadd132ps dest src1 src2) + (let ((dest-code (ymm-register->code dest)) + (src1-code (ymm-register->code src1)) + (src2-code (ymm-register->code src2))) + (u8-list->bytevector + (list #xC4 + (logxor #xE2 (ash (logand src2-code #x08) 1)) + (logxor #x7D (ash (logand src1-code #x0F) 3)) + #x98 + (logior #xC0 (ash (logand src2-code #x07) 3) dest-code))))) + +(define (encode-vxorps dest src1 src2) + (let ((dest-code (ymm-register->code dest)) + (src1-code (ymm-register->code src1)) + (src2-code (ymm-register->code src2))) + (u8-list->bytevector (list #xC5 #xF4 #x57 (logior #xC0 (ash src2-code 3) dest-code))))) + +(define (encode-mod-rm-sib mod-rm reg rm) + (u8-list->bytevector + (list (logior (ash mod-rm 6) + (ash (if (number? reg) reg (register->code reg)) 3) + (if (number? rm) rm (register->code rm)))))) + +(define (encode-lea instruction) + (match instruction + (('lea dest ('rip label)) + (let* ((opcode #x8D) + (rex-prefix (if (>= (register->code dest) 8) #x4C #x48)) + (reg-code (if (>= (register->code dest) 8) + (- (register->code dest) 8) + (register->code dest))) + (displacement 0)) + (add-relocation 'rip-relative label) + (bytevector-append + (u8-list->bytevector (list rex-prefix opcode)) + (encode-mod-rm-sib 0 reg-code 5) + (integer->bytevector displacement 4)))) + (('lea dest ('rip (? (lambda (x) (string-suffix? "@GOTPCREL" (symbol->string x))) label))) + (let* ((opcode #x8D) + (rex-prefix (if (>= (register->code dest) 8) #x4C #x48)) + (reg-code (if (>= (register->code dest) 8) + (- (register->code dest) 8) + (register->code dest))) + (displacement 0)) + (add-relocation 'got-pcrel label) + (bytevector-append + (u8-list->bytevector (list rex-prefix opcode)) + (encode-mod-rm-sib 0 reg-code 5) + (integer->bytevector displacement 4)))) + (_ (error "Unsupported lea instruction" instruction)))) + +(define (encode-ret) + (u8-list->bytevector (list #xC3))) + +(define (encode-jz label) + (add-relocation 'relative label) + (u8-list->bytevector (list #x74 #x00))) + +(define (encode-instruction inst) + (let ((encoded (match inst + (('label name) '()) + (('mov dest src) (encode-mov dest src)) + (('mov.imm32 reg imm) (encode-mov-imm32 reg imm)) + (('add dest src) (encode-add dest src)) + (('xor dest src) (encode-xor dest src)) + (('syscall) (encode-syscall)) + (('vmovaps dest src) (encode-vmovaps dest src)) + (('vaddps dest src1 src2) (encode-vaddps dest src1 src2)) + (('vfmadd132ps dest src1 src2) (encode-vfmadd132ps dest src1 src2)) + (('vxorps dest src1 src2) (encode-vxorps dest src1 src2)) + (('lea dest src) (encode-lea inst)) + (('push reg) (encode-push reg)) + (('pop reg) (encode-pop reg)) + (('ret) (encode-ret)) + (('test reg1 reg2) (encode-test reg1 reg2)) + (('jz label) (encode-jz label)) + (_ (error "Unsupported instruction" inst))))) + (when (bytevector? encoded) + (set! current-position (+ current-position (bytevector-length encoded)))) + encoded)) + +(define (bytevector-append . bvs) + (let* ((total-length (apply + (map bytevector-length bvs))) + (result (make-bytevector total-length 0)) + (offset 0)) + (for-each + (lambda (bv) + (let ((len (bytevector-length bv))) + (bytevector-copy! bv 0 result offset len) + (set! offset (+ offset len)))) + bvs) + result)) + +(define (integer->bytevector n size) + (let ((bv (make-bytevector size 0))) + (do ((i 0 (+ i 1))) + ((= i size) bv) + (bytevector-u8-set! bv i (logand (ash n (* -8 i)) #xFF))))) + +(define (assemble instructions) + (hash-clear! label-positions) + (set! relocation-table '()) + (set! current-position #x1000) + + ;; First pass: collect label positions + (for-each + (lambda (inst) + (when (eq? (car inst) 'label) + (hash-set! label-positions (cadr inst) current-position)) + (encode-instruction inst)) + instructions) + + ;; Second pass: generate code with relocations + (set! current-position #x1000) + (let ((encoded-instructions + (filter bytevector? (map encode-instruction instructions)))) + (values + (apply bytevector-append encoded-instructions) + (reverse relocation-table)))) + +(define (get-label-positions) + label-positions) + +(define (get-relocation-table) + relocation-table) diff --git a/compiler/config.scm b/compiler/config.scm new file mode 100644 index 0000000..a13251b --- /dev/null +++ b/compiler/config.scm @@ -0,0 +1,134 @@ +;;;; Copyright (C) 2023 Vouivre Digital Corporation +;;;; +;;;; This file is part of Vouivre. +;;;; +;;;; Vouivre is free software: you can redistribute it and/or +;;;; modify it under the terms of the GNU General Public +;;;; License as published by the Free Software Foundation, either +;;;; version 3 of the License, or (at your option) any later version. +;;;; +;;;; Vouivre is distributed in the hope that it will be useful, +;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;;; General Public License for more details. +;;;; +;;;; You should have received a copy of the GNU General Public +;;;; License along with Vouivre. If not, see . + + + +(define-module (config) + #:export ( + ;; ELF Header Constants + elf-header-size + code-offset + alignment + elf-magic + elf-class-64 + elf-data-lsb + elf-version-current + elf-osabi-linux + elf-type-shared + elf-machine-x86-64 + + ;; Section Header Constants + section-header-size + num-sections + shstrtab-index + sht-null + sht-progbits + sht-symtab + sht-strtab + sht-rela + sht-dynamic + sht-nobits + sht-dynsym + sht-hash + sht-note + shf-write + shf-alloc + shf-execinstr + + ;; Program Header Constants + program-header-size + num-program-headers + pt-load + pt-note + pt-dynamic + pt-phdr + pt-gnu-relro + pf-x + pf-w + pf-r + + ;; Other Constants + dynamic-entry-size + num-dynamic-entries + got-entry-size + text-addr + shstrtab-addr + shstrtab-size + section-offset + + ;; Elf Layout Constants + word-size + double-word-size + plt-entry-size + )) + +;; ELF Header Constants +(define elf-header-size 64) +(define code-offset #x1000) +(define alignment #x1000) +(define elf-magic #x464c457f) +(define elf-class-64 2) +(define elf-data-lsb 1) +(define elf-version-current 1) +(define elf-osabi-linux 3) +(define elf-type-shared 3) +(define elf-machine-x86-64 #x3e) + +;; Section Header Constants +(define section-header-size 64) +(define num-sections 15) +(define shstrtab-index 14) +(define sht-null 0) +(define sht-progbits 1) +(define sht-symtab 2) +(define sht-strtab 3) +(define sht-rela 4) +(define sht-hash 5) +(define sht-dynamic 6) +(define sht-note 7) +(define sht-nobits 8) +(define sht-dynsym 11) +(define shf-write #x1) +(define shf-alloc #x2) +(define shf-execinstr #x4) + +;; Program Header Constants +(define program-header-size 56) +(define num-program-headers 5) +(define pt-load 1) +(define pt-dynamic 2) +(define pt-note 4) +(define pt-phdr 6) +(define pt-gnu-relro #x6474e552) +(define pf-x #b001) +(define pf-w #b010) +(define pf-r #b100) + +;; Other Constants +(define dynamic-entry-size 16) +(define num-dynamic-entries 9) +(define got-entry-size 8) +(define text-addr #x1000) +(define shstrtab-addr #x3f94) +(define shstrtab-size 108) +(define section-offset #x2000) + +;; Elf Layout Constants +;; Constants for alignment and sizes +(define word-size 8) +(define double-word-size 16) +(define plt-entry-size 16) diff --git a/compiler/data-section.scm b/compiler/data-section.scm new file mode 100644 index 0000000..9861949 --- /dev/null +++ b/compiler/data-section.scm @@ -0,0 +1,37 @@ +;;;; Copyright (C) 2023 Vouivre Digital Corporation +;;;; +;;;; This file is part of Vouivre. +;;;; +;;;; Vouivre is free software: you can redistribute it and/or +;;;; modify it under the terms of the GNU General Public +;;;; License as published by the Free Software Foundation, either +;;;; version 3 of the License, or (at your option) any later version. +;;;; +;;;; Vouivre is distributed in the hope that it will be useful, +;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;;; General Public License for more details. +;;;; +;;;; You should have received a copy of the GNU General Public +;;;; License along with Vouivre. If not, see . + + + +(define-module (data-section) + #:use-module (rnrs bytevectors) + #:use-module (ice-9 format) + #:export (create-data-section)) + +(define (create-data-section data-sections) + (let* ((total-size (apply + (map (lambda (section) (bytevector-length (cdr section))) data-sections))) + (data-section (make-bytevector total-size 0)) + (current-offset 0)) + (for-each + (lambda (section) + (let* ((name (car section)) + (data (cdr section)) + (size (bytevector-length data))) + (bytevector-copy! data 0 data-section current-offset size) + (set! current-offset (+ current-offset size)))) + data-sections) + data-section)) diff --git a/compiler/dynamic-section.scm b/compiler/dynamic-section.scm new file mode 100644 index 0000000..bed070c --- /dev/null +++ b/compiler/dynamic-section.scm @@ -0,0 +1,86 @@ +;;;; Copyright (C) 2023 Vouivre Digital Corporation +;;;; +;;;; This file is part of Vouivre. +;;;; +;;;; Vouivre is free software: you can redistribute it and/or +;;;; modify it under the terms of the GNU General Public +;;;; License as published by the Free Software Foundation, either +;;;; version 3 of the License, or (at your option) any later version. +;;;; +;;;; Vouivre is distributed in the hope that it will be useful, +;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;;; General Public License for more details. +;;;; +;;;; You should have received a copy of the GNU General Public +;;;; License along with Vouivre. If not, see . + + +(define-module (dynamic-section) + #:use-module (rnrs bytevectors) + #:use-module (rnrs io simple) + #:export (create-dynamic-section)) + +;; Constants for dynamic section entry tags +(define DT_NULL 0) +(define DT_PLTGOT 3) +(define DT_HASH 4) +(define DT_STRTAB 5) +(define DT_SYMTAB 6) +(define DT_RELA 7) +(define DT_RELASZ 8) +(define DT_RELAENT 9) +(define DT_STRSZ 10) +(define DT_SYMENT 11) +(define DT_INIT 12) +(define DT_FINI 13) +(define DT_JMPREL 23) +(define DT_PLTRELSZ 2) +(define DT_PLTREL 20) +(define DT_VERSYM #x6ffffff0) +(define DT_VERDEF #x6ffffffc) +(define DT_VERDEFNUM #x6ffffffd) + +;; Constants for sizes and offsets +(define ENTRY_SIZE 16) +(define VALUE_OFFSET 8) +(define SYMENT_SIZE 24) +(define RELAENT_SIZE 24) + +;; Helper function to set a dynamic section entry +(define (set-dynamic-entry! section index tag value) + (let ((offset (* index ENTRY_SIZE))) + (bytevector-u64-set! section offset tag (endianness little)) + (bytevector-u64-set! section (+ offset VALUE_OFFSET) value (endianness little)))) + +(define (add-padding-to-align section size desired-size) + (let ((padding (- desired-size size))) + (if (> padding 0) + (let ((padded-section (make-bytevector (+ size padding) 0))) + ;; Copy original section to the padded section + (bytevector-copy! section 0 padded-section 0 size) + padded-section) + section))) + +(define (create-dynamic-section + dynstr-offset dynsym-offset strtab-size dynsym-size + rela-offset rela-size got-offset hash-offset + gnu-version-offset gnu-version-d-offset gnu-version-d-size + plt-offset plt-size jmprel-offset jmprel-size got-plt-offset) + (let* ((num-entries 9) ;; Adjusted for the new number of entries + (section-size (* num-entries ENTRY_SIZE)) ;; Initial section size + (section (make-bytevector section-size 0)) + (final-size #xE0)) ;; Desired size with padding (0xE0 in hex) + + (set-dynamic-entry! section 0 DT_HASH hash-offset) + (set-dynamic-entry! section 1 DT_STRTAB dynstr-offset) + (set-dynamic-entry! section 2 DT_SYMTAB dynsym-offset) + (set-dynamic-entry! section 3 DT_STRSZ strtab-size) + (set-dynamic-entry! section 4 DT_SYMENT SYMENT_SIZE) + (set-dynamic-entry! section 5 DT_RELA rela-offset) + (set-dynamic-entry! section 6 DT_RELASZ rela-size) + (set-dynamic-entry! section 7 DT_RELAENT RELAENT_SIZE) + (set-dynamic-entry! section 8 DT_NULL 0) + + ;; Add padding to align the section to the final size (0xE0) + (add-padding-to-align section section-size final-size))) diff --git a/compiler/elf-dynamic-calculator.scm b/compiler/elf-dynamic-calculator.scm new file mode 100644 index 0000000..3e6c700 --- /dev/null +++ b/compiler/elf-dynamic-calculator.scm @@ -0,0 +1,80 @@ +;;;; Copyright (C) 2023 Vouivre Digital Corporation +;;;; +;;;; This file is part of Vouivre. +;;;; +;;;; Vouivre is free software: you can redistribute it and/or +;;;; modify it under the terms of the GNU General Public +;;;; License as published by the Free Software Foundation, either +;;;; version 3 of the License, or (at your option) any later version. +;;;; +;;;; Vouivre is distributed in the hope that it will be useful, +;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;;; General Public License for more details. +;;;; +;;;; You should have received a copy of the GNU General Public +;;;; License along with Vouivre. If not, see . + + + +(define-module (elf-dynamic-calculator) + #:use-module (rnrs bytevectors) + #:export (calculate-entry-point + calculate-code-offset + calculate-num-sections + calculate-shstrtab-index + calculate-num-program-headers + calculate-shstrtab-addr + calculate-shstrtab-size + calculate-section-offset + calculate-text-section-offset)) + +(define (calculate-text-section-offset elf-header-size program-headers-size) + "Calculate the offset of the .text section from the start of the file." + (+ elf-header-size program-headers-size)) + +(define (calculate-entry-point text-addr text-section-offset) + "Calculate the entry point based on the text address and offset within the .text section." + text-addr) + +(define (calculate-code-offset elf-header-size program-headers) + "Calculate the code offset based on the size of the ELF header and program headers." + (+ elf-header-size (* (length program-headers) 56))) ; 56 is the size of a program header + +(define (calculate-num-sections sections) + "Calculate the number of sections based on the actual sections in the ELF file." + (length sections)) + +(define (calculate-shstrtab-index sections) + "Calculate the index of the section header string table." + (let ((shstrtab-section (find (lambda (section) (eq? (car section) '.shstrtab)) sections))) + (if shstrtab-section + (list-index (lambda (section) (eq? section shstrtab-section)) sections) + (error "Section header string table not found")))) + +(define (calculate-num-program-headers program-headers) + "Calculate the number of program headers." + (length program-headers)) + +(define (calculate-shstrtab-addr file-layout) + "Calculate the address of the section header string table based on the file layout." + (let ((shstrtab-section (assoc '.shstrtab file-layout))) + (if shstrtab-section + (cdr shstrtab-section) + (error "Section header string table not found in file layout")))) + +(define (calculate-shstrtab-size shstrtab-content) + "Calculate the size of the section header string table." + (bytevector-length shstrtab-content)) + +(define (calculate-section-offset elf-header-size program-headers alignment) + "Calculate the section offset based on the size of the ELF header, program headers, and alignment." + (let ((total-header-size (+ elf-header-size (* (length program-headers) 56)))) + (align-to alignment total-header-size))) + +(define (align-to alignment value) + "Align a value to the specified alignment." + (let ((remainder (modulo value alignment))) + (if (zero? remainder) + value + (+ value (- alignment remainder))))) diff --git a/compiler/elf-header.scm b/compiler/elf-header.scm new file mode 100644 index 0000000..fdafe40 --- /dev/null +++ b/compiler/elf-header.scm @@ -0,0 +1,83 @@ +;;;; Copyright (C) 2023 Vouivre Digital Corporation +;;;; +;;;; This file is part of Vouivre. +;;;; +;;;; Vouivre is free software: you can redistribute it and/or +;;;; modify it under the terms of the GNU General Public +;;;; License as published by the Free Software Foundation, either +;;;; version 3 of the License, or (at your option) any later version. +;;;; +;;;; Vouivre is distributed in the hope that it will be useful, +;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;;; General Public License for more details. +;;;; +;;;; You should have received a copy of the GNU General Public +;;;; License along with Vouivre. If not, see . + + +(define-module (elf-header) + #:use-module (rnrs bytevectors) + #:use-module (srfi srfi-9) + #:use-module (config) + #:export (create-elf-header)) + +;; ELF Header structure +(define-record-type + (make-elf-header entry-point ph-offset ph-size sh-offset ph-count sh-count + total-size shstrtab-index hash-offset hash-size) + elf-header? + (entry-point elf-header-entry-point) + (ph-offset elf-header-ph-offset) + (ph-size elf-header-ph-size) + (sh-offset elf-header-sh-offset) + (ph-count elf-header-ph-count) + (sh-count elf-header-sh-count) + (total-size elf-header-total-size) + (shstrtab-index elf-header-shstrtab-index) + (hash-offset elf-header-hash-offset) + (hash-size elf-header-hash-size)) + +(define (create-elf-header entry-point program-headers-offset program-headers-size + section-headers-offset num-program-headers num-sections + total-size shstrtab-index hash-offset hash-size) + (let ((header (make-elf-header + (max entry-point #x1000) + program-headers-offset + program-headers-size + section-headers-offset + num-program-headers + num-sections + total-size + shstrtab-index + hash-offset + hash-size))) + (elf-header->bytevector header))) + +(define (elf-header->bytevector header) + (let ((bv (make-bytevector elf-header-size 0))) + (bytevector-u32-set! bv 0 elf-magic (endianness little)) + (bytevector-u8-set! bv 4 elf-class-64) + (bytevector-u8-set! bv 5 elf-data-lsb) + (bytevector-u8-set! bv 6 elf-version-current) + (bytevector-u8-set! bv 7 0) ; SYSV ABI + (bytevector-u8-set! bv 8 0) ; ABI version + (bytevector-u16-set! bv 16 elf-type-shared (endianness little)) + (bytevector-u16-set! bv 18 elf-machine-x86-64 (endianness little)) + (bytevector-u32-set! bv 20 elf-version-current (endianness little)) + (bytevector-u64-set! bv 24 (elf-header-entry-point header) (endianness little)) + (bytevector-u64-set! bv 32 (elf-header-ph-offset header) (endianness little)) + (bytevector-u64-set! bv 40 (elf-header-sh-offset header) (endianness little)) + (bytevector-u32-set! bv 48 0 (endianness little)) ; Flags + (bytevector-u16-set! bv 52 elf-header-size (endianness little)) + (bytevector-u16-set! bv 54 program-header-size (endianness little)) + (bytevector-u16-set! bv 56 (elf-header-ph-count header) (endianness little)) + (bytevector-u16-set! bv 58 section-header-size (endianness little)) + (bytevector-u16-set! bv 60 (elf-header-sh-count header) (endianness little)) + (bytevector-u16-set! bv 62 (elf-header-shstrtab-index header) (endianness little)) + bv)) + +;; Helper function to get hash section information +(define (get-hash-section-info header) + (values (elf-header-hash-offset header) + (elf-header-hash-size header))) diff --git a/compiler/elf-layout-calculator.scm b/compiler/elf-layout-calculator.scm new file mode 100644 index 0000000..9293ae2 --- /dev/null +++ b/compiler/elf-layout-calculator.scm @@ -0,0 +1,344 @@ +;;;; Copyright (C) 2023 Vouivre Digital Corporation +;;;; +;;;; This file is part of Vouivre. +;;;; +;;;; Vouivre is free software: you can redistribute it and/or +;;;; modify it under the terms of the GNU General Public +;;;; License as published by the Free Software Foundation, either +;;;; version 3 of the License, or (at your option) any later version. +;;;; +;;;; Vouivre is distributed in the hope that it will be useful, +;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;;; General Public License for more details. +;;;; +;;;; You should have received a copy of the GNU General Public +;;;; License along with Vouivre. If not, see . + + + +(define-module (elf-layout-calculator) + #:use-module (config) + #:use-module (utils) + #:use-module (rnrs bytevectors) + #:use-module (symbol-table) + #:use-module (string-table) + #:use-module (relocation-table) + #:export (calculate-elf-layout + calculate-phdr-size + calculate-text-segment-size + calculate-text-segment-end + calculate-data-segment-start + calculate-data-segment-file-size + calculate-data-segment-mem-size + align-to + calculate-relro-size)) + +(define (align-to value alignment) + (if (= (modulo value alignment) 0) + value + (+ value (- alignment (modulo value alignment))))) + +(define (align-up address alignment) + (let ((remainder (modulo address alignment))) + (if (zero? remainder) + address + (+ address (- alignment remainder))))) + +(define (calculate-code-offset elf-header-size program-headers-offset) + (let ((offset (align-to (+ elf-header-size program-headers-offset) alignment))) + offset)) + +(define (calculate-phdr-size num-program-headers program-header-size) + (* num-program-headers program-header-size)) + +(define (calculate-text-segment-size code-size rodata-size plt-size) + (+ code-size rodata-size plt-size)) + +(define (calculate-text-segment-end text-addr text-segment-size) + (+ text-addr text-segment-size)) + +(define (calculate-data-segment-start text-segment-end alignment) + (align-up text-segment-end alignment)) + +(define (calculate-data-segment-file-size total-size data-segment-start) + (- total-size data-segment-start)) + +(define (calculate-data-segment-mem-size data-segment-file-size bss-size) + (+ data-segment-file-size bss-size)) + +(define (calculate-relro-size got-offset data-segment-start) + (- got-offset data-segment-start)) + +(define (calculate-program-headers-offset) + elf-header-size) + +(define (calculate-code-size code) + (bytevector-length code)) + +(define (calculate-rodata-size) + 0) ; You may need to implement this based on your actual data + +(define (calculate-bss-size) + 0) ; You may need to implement this based on your actual data + +(define (calculate-data-size data-sections) + (let ((total-size 0) + (entry-size 32)) ;; Assuming each entry is 32 bytes + (for-each (lambda (pair) + (let ((name (car pair))) + (set! total-size (+ total-size entry-size)) + )) + data-sections) + total-size)) + +(define (calculate-symtab-size symbol-addresses label-positions) + (let* ((symtab-and-strtab (create-symtab-and-strtab symbol-addresses label-positions)) + (size (bytevector-length (car symtab-and-strtab)))) + size)) + +(define (calculate-strtab-size symbol-addresses label-positions) + (let* ((symtab-and-strtab (create-symtab-and-strtab symbol-addresses label-positions)) + (size (bytevector-length (cdr symtab-and-strtab)))) + size)) + +(define (calculate-dynamic-symbol-table-size symbol-addresses) + (let* ((dynsym-and-dynstr (create-dynsym-and-dynstr symbol-addresses)) + (size (bytevector-length (car dynsym-and-dynstr)))) + size)) + +(define (calculate-shstrtab-size) + (bytevector-length (create-section-header-string-table))) + +(define (calculate-relocation-table-size symbol-addresses) + (bytevector-length (create-relocation-table symbol-addresses))) + +(define (calculate-got-size symbol-addresses) + (let* ((num-symbols (length symbol-addresses)) ;; Number of symbols + (got-entry-size 8) ;; Assuming each GOT entry is 8 bytes + (calculated-size (* num-symbols got-entry-size))) ;; Final size + ;; Logging + calculated-size)) + +(define (calculate-plt-size symbol-addresses) + (* (length symbol-addresses) plt-entry-size)) + +(define (calculate-data-offset code-size rodata-size) + (align-to (+ code-offset code-size rodata-size) alignment)) + +(define (calculate-dynamic-offset data-offset data-size) + (align-to (+ data-offset data-size) alignment)) + +(define (calculate-dynamic-size) + (let* ((num-dynamic-entries 9) ;; Example number of entries + (dynamic-entry-size 16) ;; Each dynamic entry is 16 bytes + (target-size #xe0) ;; Target size to match after alignment + (calculated-size (* num-dynamic-entries dynamic-entry-size)) + (padding (- target-size calculated-size)) ;; Calculate padding required + (total-size (+ calculated-size padding)) ;; Total size with padding + (hex-size (number->string total-size 16))) ;; Convert to hex for display + total-size)) + +(define (calculate-dynsym-offset dynamic-offset dynamic-size) + (align-to (+ dynamic-offset dynamic-size) word-size)) + +(define (calculate-dynstr-offset dynsym-offset dynamic-symbol-table-size) + (align-to (+ dynsym-offset dynamic-symbol-table-size) word-size)) + +(define (calculate-rela-offset dynstr-offset strtab-size) + (align-to (+ dynstr-offset strtab-size) word-size)) + +(define (calculate-got-offset rela-offset relocation-table-size) + (align-to (+ rela-offset relocation-table-size) word-size)) + +(define (calculate-plt-offset got-offset got-size) + (align-to (+ got-offset got-size) double-word-size)) + +(define (calculate-total-dynamic-size dynamic-offset dynamic-size got-offset got-size plt-offset plt-size bss-size alignment) + (let* ((last-offset (+ plt-offset plt-size)) + (total-size (- last-offset dynamic-offset)) + (aligned-size (align-up total-size alignment)) + (total-size-with-bss (+ aligned-size bss-size))) + aligned-size)) + +(define (calculate-section-headers-offset dynamic-offset total-dynamic-size) + (align-to (+ dynamic-offset total-dynamic-size) alignment)) + +(define (calculate-hash-size symbol-addresses) + (let ((num-symbols (length symbol-addresses))) + (* 4 (+ 2 num-symbols)))) ; 2 for nbucket and nchain, then 4 bytes per symbol + +(define (calculate-hash-offset dynamic-offset dynamic-size) + (align-to (+ dynamic-offset dynamic-size) word-size)) + +(define (calculate-shstrtab-addr section-headers-offset shstrtab-size) + (- section-headers-offset shstrtab-size)) + +(define (calculate-data-addr text-addr code-size rodata-size) + (+ text-addr (align-to (+ code-size rodata-size) alignment))) + +(define (calculate-dynamic-addr data-addr data-size) + (align-to (+ data-addr data-size) alignment)) + +(define (calculate-dynsym-addr dynamic-addr dynamic-size) + (+ dynamic-addr dynamic-size)) + +(define (calculate-dynstr-addr dynsym-addr dynamic-symbol-table-size) + (+ dynsym-addr dynamic-symbol-table-size)) + +(define (calculate-rela-addr dynamic-addr rela-offset dynamic-offset) + (+ dynamic-addr (- rela-offset dynamic-offset))) + +(define (calculate-got-addr dynamic-addr got-offset dynamic-offset) + (+ dynamic-addr (- got-offset dynamic-offset))) + +(define (calculate-plt-addr dynamic-addr plt-offset dynamic-offset) + (+ dynamic-addr (- plt-offset dynamic-offset))) + +(define (calculate-symtab-offset section-offset code-size rodata-size data-size) + (+ section-offset code-size rodata-size data-size)) + +(define (calculate-total-size section-headers-offset) + (+ section-headers-offset (* num-sections section-header-size))) + +(define (calculate-strtab-offset symtab-offset symtab-size) + (+ symtab-offset symtab-size)) + +(define (get-dynsym-indices symtab-hash) + (let ((indices (make-hash-table))) + (let loop ((index 0) + (entries (hash-map->list cons symtab-hash))) + (if (null? entries) + indices + (let* ((entry (car entries)) + (name (car entry)) + (value (cdr entry))) + (hash-set! indices name index) + (loop (+ index 1) (cdr entries))))))) + +(define (calculate-elf-layout code data-sections symbol-addresses label-positions) + (let* ((program-headers-offset (calculate-program-headers-offset)) + (code-size (calculate-code-size code)) + (rodata-size (calculate-rodata-size)) + (bss-size (calculate-bss-size)) + (data-size (calculate-data-size data-sections)) + (unique-symbol-addresses + (list + (cons '.text@LOCAL #x1000) + (cons '.data@LOCAL #x3020) + (cons '.dynamic@LOCAL #x2f00) + (cons '.dynsym@LOCAL #x220) + (cons '.dynstr@LOCAL #x2b0) + (cons '.rela.dyn@LOCAL #x2e8) + (cons '.got@LOCAL #x2fe0) + (cons '.got.plt@LOCAL #x3000) + (cons '.note.gnu.build-id@LOCAL #x1c8) + (cons '.eh_frame@LOCAL #x2000) + (cons '.hash@LOCAL #x1f0) + (cons '_GLOBAL_OFFSET_TABLE_@LOCAL #x3000) + (cons '_DYNAMIC@LOCAL #x2f00))) + (combined-symbol-addresses (append symbol-addresses unique-symbol-addresses)) + (symtab-size (calculate-symtab-size combined-symbol-addresses label-positions)) + (strtab-size (calculate-strtab-size combined-symbol-addresses label-positions)) + (shstrtab-size (calculate-shstrtab-size)) + (dynamic-symbol-table-size (calculate-dynamic-symbol-table-size symbol-addresses)) + (relocation-table-size (calculate-relocation-table-size symbol-addresses)) + (got-size (calculate-got-size symbol-addresses)) + (plt-size (calculate-plt-size symbol-addresses)) + (data-offset (calculate-data-offset code-size rodata-size)) + (dynamic-offset (calculate-dynamic-offset data-offset data-size)) + (dynamic-size (calculate-dynamic-size)) + (dynsym-offset (calculate-dynsym-offset dynamic-offset dynamic-size)) + (dynstr-offset (calculate-dynstr-offset dynsym-offset dynamic-symbol-table-size)) + (rela-offset (calculate-rela-offset dynstr-offset strtab-size)) + (got-offset (calculate-got-offset rela-offset relocation-table-size)) + (plt-offset (calculate-plt-offset got-offset got-size)) + (symtab-and-strtab (create-symtab-and-strtab combined-symbol-addresses label-positions)) + (symtab-bv (car symtab-and-strtab)) + (strtab (cdr symtab-and-strtab)) + (dynsymtab-and-dynstrtab (create-dynsym-and-dynstr symbol-addresses label-positions)) + (dynsymtab-bv (car dynsymtab-and-dynstrtab)) + (dynstrtab (cdr dynsymtab-and-dynstrtab)) + (dynsym-size (bytevector-length dynsymtab-bv)) + (dynstr-size (bytevector-length dynstrtab)) + (symtab-hash (create-symbol-table symbol-addresses label-positions)) + (dynsym-indices (get-dynsym-indices symtab-hash)) + (total-dynamic-size + (calculate-total-dynamic-size + dynamic-offset + dynamic-size + got-offset + got-size + plt-offset + plt-size + bss-size + alignment)) + (section-headers-offset (calculate-section-headers-offset dynamic-offset total-dynamic-size)) + (hash-size (calculate-hash-size symbol-addresses)) + (hash-offset (calculate-hash-offset dynamic-offset dynamic-size)) + (shstrtab-addr (calculate-shstrtab-addr section-headers-offset shstrtab-size)) + (data-addr (calculate-data-addr text-addr code-size rodata-size)) + (dynamic-addr (calculate-dynamic-addr data-addr data-size)) + (dynsym-addr (calculate-dynsym-addr dynamic-addr dynamic-size)) + (dynstr-addr (calculate-dynstr-addr dynsym-addr dynamic-symbol-table-size)) + (rela-addr (calculate-rela-addr dynamic-addr rela-offset dynamic-offset)) + (got-addr (calculate-got-addr dynamic-addr got-offset dynamic-offset)) + (plt-addr (calculate-plt-addr dynamic-addr plt-offset dynamic-offset)) + (symtab-offset (calculate-symtab-offset section-offset code-size rodata-size data-size)) + (total-size (calculate-total-size section-headers-offset)) + (code-offset (calculate-code-offset elf-header-size program-headers-offset)) + (init-offset (align-to text-addr 16)) + (init-size 16) + (fini-size 16) + (fini-offset (align-to (+ init-offset init-size) 16)) + (strtab-offset (calculate-strtab-offset symtab-offset symtab-size))) + + (list + (cons 'program-headers-offset program-headers-offset) + (cons 'init-offset init-offset) + (cons 'init-size init-size) + (cons 'code-size code-size) + (cons 'code-offset code-offset) + (cons 'rodata-size rodata-size) + (cons 'bss-size bss-size) + (cons 'dynsym-indices dynsym-indices) + (cons 'data-size data-size) + (cons 'symtab-size symtab-size) + (cons 'strtab-size strtab-size) + (cons 'shstrtab-size shstrtab-size) + (cons 'symtab-hash symtab-hash) + (cons 'dynamic-symbol-table-size dynamic-symbol-table-size) + (cons 'relocation-table-size relocation-table-size) + (cons 'got-size got-size) + (cons 'plt-size plt-size) + (cons 'symtab-and-strtab symtab-and-strtab) + (cons 'symtab-bv symtab-bv) + (cons 'strtab strtab) + (cons 'dynsymtab-bv dynsymtab-bv) + (cons 'dynstrtab dynstrtab) + (cons 'dynsym-size dynsym-size) + (cons 'dynstr-size dynstr-size) + (cons 'data-offset data-offset) + (cons 'dynamic-offset dynamic-offset) + (cons 'dynamic-size dynamic-size) + (cons 'dynsym-offset dynsym-offset) + (cons 'dynstr-offset dynstr-offset) + (cons 'rela-offset rela-offset) + (cons 'got-offset got-offset) + (cons 'plt-offset plt-offset) + (cons 'total-dynamic-size total-dynamic-size) + (cons 'section-headers-offset section-headers-offset) + (cons 'shstrtab-addr shstrtab-addr) + (cons 'data-addr data-addr) + (cons 'dynamic-addr dynamic-addr) + (cons 'dynsym-addr dynsym-addr) + (cons 'dynstr-addr dynstr-addr) + (cons 'rela-addr rela-addr) + (cons 'got-addr got-addr) + (cons 'plt-addr plt-addr) + (cons 'symtab-offset symtab-offset) + (cons 'strtab-offset strtab-offset) + (cons 'total-size total-size) + (cons 'hash-size hash-size) + (cons 'hash-offset hash-offset) + (cons 'text-addr text-addr)))) diff --git a/compiler/executable-example.scm b/compiler/executable-example.scm new file mode 100644 index 0000000..6cb4ae7 --- /dev/null +++ b/compiler/executable-example.scm @@ -0,0 +1,110 @@ +;;;; Copyright (C) 2023 Vouivre Digital Corporation +;;;; +;;;; This file is part of Vouivre. +;;;; +;;;; Vouivre is free software: you can redistribute it and/or +;;;; modify it under the terms of the GNU General Public +;;;; License as published by the Free Software Foundation, either +;;;; version 3 of the License, or (at your option) any later version. +;;;; +;;;; Vouivre is distributed in the hope that it will be useful, +;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;;; General Public License for more details. +;;;; +;;;; You should have received a copy of the GNU General Public +;;;; License along with Vouivre. If not, see . + +(use-modules (linker) + (shared-object-creator) + (assembler) + (rnrs bytevectors) + (ice-9 receive)) ; For receiving multiple values + +;; Define data +(define buffer1 + (u8-list->bytevector + '(0 0 128 63 ; 1.0 + 0 0 0 64 ; 2.0 + 0 0 64 64 ; 3.0 + 0 0 128 64 ; 4.0 + 0 0 160 64 ; 5.0 + 0 0 192 64 ; 6.0 + 0 0 224 64 ; 7.0 + 0 0 0 65))) ; 8.0 + +(define buffer2 + (u8-list->bytevector + '(0 0 0 65 ; 8.0 + 0 0 224 64 ; 7.0 + 0 0 192 64 ; 6.0 + 0 0 160 64 ; 5.0 + 0 0 128 64 ; 4.0 + 0 0 64 64 ; 3.0 + 0 0 0 64 ; 2.0 + 0 0 128 63))) ; 1.0 +(define multiplier + (u8-list->bytevector + (apply append (make-list 8 '(0 0 0 64))))) +(define result (make-bytevector 32 0)) + +(define __dso_handle (make-bytevector 32 0)) + +;; Define symbol addresses (these will be relative to the start of .data section) +(define symbol-addresses + '((buffer1 . #x3020) ; Start of .data section + (buffer2 . #x3040) ; 32 bytes after buffer1 + (result . #x3060) ; 32 bytes after buffer2 + (multiplier . #x3080))) ; 32 bytes after result + +(define example-code + '((label perform_operations) + (push rbp) + (mov rbp rsp) + + ; Load addresses using GOTPCREL for PIC + (lea rdi (rip buffer1@GOTPCREL)) + (lea rsi (rip buffer2@GOTPCREL)) + (lea rdx (rip result@GOTPCREL)) + (lea rax (rip multiplier@GOTPCREL)) + + ; Dereference GOT entries + (mov rdi (rdi)) + (mov rsi (rsi)) + (mov rdx (rdx)) + (mov rax (rax)) + + ; Addition + (vmovaps (rdi) ymm0) + (vmovaps (rsi) ymm1) + (vaddps ymm1 ymm0 ymm0) + (vmovaps ymm0 (rdx)) + + ; Multiplication + (vxorps ymm1 ymm1 ymm1) + (vmovaps (rax) ymm2) + (vfmadd132ps ymm0 ymm1 ymm2) + (vmovaps ymm0 (rdx)) + + ; End of function + (xor eax eax) + (pop rbp) + (ret))) + +;; Assemble the code and get the relocation table +(receive (assembled-code relocation-table) + (assemble example-code) + + (define label-positions (get-label-positions)) + + ;; Create the shared object file + (create-shared-object + assembled-code + `((buffer1 . ,buffer1) + (buffer2 . ,buffer2) + (result . ,result) + (multiplier . ,multiplier)) + "liboutput.so" + symbol-addresses + label-positions + relocation-table)) ; Pass the relocation table to create-shared-object diff --git a/compiler/got-plt-section.scm b/compiler/got-plt-section.scm new file mode 100644 index 0000000..8c6376b --- /dev/null +++ b/compiler/got-plt-section.scm @@ -0,0 +1,41 @@ +;;;; Copyright (C) 2023 Vouivre Digital Corporation +;;;; +;;;; This file is part of Vouivre. +;;;; +;;;; Vouivre is free software: you can redistribute it and/or +;;;; modify it under the terms of the GNU General Public +;;;; License as published by the Free Software Foundation, either +;;;; version 3 of the License, or (at your option) any later version. +;;;; +;;;; Vouivre is distributed in the hope that it will be useful, +;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;;; General Public License for more details. +;;;; +;;;; You should have received a copy of the GNU General Public +;;;; License along with Vouivre. If not, see . + +(define-module (got-plt-section) + #:use-module (rnrs bytevectors) + #:use-module (ice-9 format) + #:export (create-got-plt-section)) + +(define (create-got-plt-section function-labels dynamic-addr plt-addr) + (let* ((num-entries 0) + (got-plt-size (* (+ num-entries 3) 8)) ; 3 reserved entries + num_entries + (got-plt (make-bytevector got-plt-size 0))) + + ; First entry points to the dynamic section (dynamic-addr) + (bytevector-u64-set! got-plt 0 dynamic-addr (endianness little)) + + ; Second and third entries are reserved (set to 0) + (bytevector-u64-set! got-plt 8 0 (endianness little)) + (bytevector-u64-set! got-plt 16 0 (endianness little)) + + ; Ensure the rest of the entries are correct + (do ((i 3 (1+ i))) + ((= i (+ num-entries 3))) + ; Each GOT entry points to the corresponding PLT entry (plt-addr + 16*i) + (bytevector-u64-set! got-plt (* i 8) (+ plt-addr (* i 16)) (endianness little))) + + got-plt)) diff --git a/compiler/liboutput.so b/compiler/liboutput.so new file mode 100644 index 0000000..fa50ed7 Binary files /dev/null and b/compiler/liboutput.so differ diff --git a/compiler/linker.scm b/compiler/linker.scm new file mode 100644 index 0000000..80dbbf5 --- /dev/null +++ b/compiler/linker.scm @@ -0,0 +1,131 @@ +;;;; Copyright (C) 2023 Vouivre Digital Corporation +;;;; +;;;; This file is part of Vouivre. +;;;; +;;;; Vouivre is free software: you can redistribute it and/or +;;;; modify it under the terms of the GNU General Public +;;;; License as published by the Free Software Foundation, either +;;;; version 3 of the License, or (at your option) any later version. +;;;; +;;;; Vouivre is distributed in the hope that it will be useful, +;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;;; General Public License for more details. +;;;; +;;;; You should have received a copy of the GNU General Public +;;;; License along with Vouivre. If not, see . + +(define-module (linker) + #:use-module (rnrs bytevectors) + #:use-module (rnrs io ports) + #:use-module (ice-9 hash-table) + #:use-module (rnrs arithmetic bitwise) + #:use-module (srfi srfi-11) ; for let-values + #:export (link-code resolve-references)) + +(define (process-relocation-table relocation-table) + (define (remove-duplicates lst) + (if (null? lst) + '() + (cons (car lst) + (remove-duplicates + (filter (lambda (x) (not (equal? x (car lst)))) + (cdr lst)))))) + + (define (extract-symbols table) + (define (strip-got-suffix symbol) + (let* ((sym-str (symbol->string symbol)) + (at-index (string-index sym-str #\@))) + (if at-index + (string->symbol (substring sym-str 0 at-index)) + symbol))) + + (remove-duplicates + (map (lambda (reloc) + (strip-got-suffix (caddr reloc))) + table))) + + (let* ((symbols (extract-symbols relocation-table)) + (base-address #x0) + (address-step 8)) + (map (lambda (symbol index) + (cons symbol (+ base-address (* index address-step)))) + symbols + (iota (length symbols))))) + +;; Constants +(define base-address-difference #x1000) + +(define (strip-got-suffix symbol) + (let* ((sym-str (symbol->string symbol)) + (at-index (string-index sym-str #\@))) + (if at-index + (string->symbol (substring sym-str 0 at-index)) + symbol))) + +(define (safe-ref table key) + (let* ((stripped-key (strip-got-suffix key)) + (result (cond + ((hash-table? table) (hash-ref table stripped-key)) + ((list? table) (let ((pair (assoc stripped-key table))) + (and pair (cdr pair)))) + (else #f)))) + result)) + +(define (bytevector->hex-string bv start len) + (string-join + (map (lambda (i) + (format #f "~2,'0x" (bytevector-u8-ref bv i))) + (iota len start)) + " ")) + +(define (adjust-address address) + (+ address base-address-difference)) + +;; Main function +(define (resolve-references code symbol-table label-positions relocation-table code-start-offset got-offset got-relocation-table) + (define (lookup-symbol symbol) + (let ((sym-str (symbol->string symbol))) + (if (string-suffix? "@GOTPCREL" sym-str) + (let* ((base-symbol (strip-got-suffix symbol)) + (got-entry (assoc base-symbol got-relocation-table))) + (if got-entry + (+ got-offset (cdr got-entry)) ; Return GOT entry address + #f)) ; Return #f if not found in GOT table + (safe-ref symbol-table symbol)))) + + (let ((resolved-code (bytevector-copy code)) + (code-base-address code-start-offset)) + (for-each + (lambda (reloc) + (let ((symbol-address (lookup-symbol (caddr reloc)))) + (when (number? symbol-address) ; Only apply relocation if symbol address is found and is a number + (apply-relocation resolved-code + (car reloc) ; offset + (cadr reloc) ; type + (caddr reloc) ; symbol + symbol-address + code-base-address + code-start-offset + got-offset)))) + relocation-table) + resolved-code)) + +(define (apply-relocation code offset type symbol symbol-address code-base-address code-start-offset got-offset) + (let* ((code-offset (- offset code-start-offset)) + (instruction-end offset) + (next-instruction-address (+ offset 4))) + (if (number? symbol-address) + (let* ((relative-address (- symbol-address next-instruction-address 3)) + (displacement (bitwise-and relative-address #xffffffff))) + (if (and (>= code-offset 0) (< (+ code-offset 7) (bytevector-length code))) + ;; Preserve the first 3 bytes (opcode and ModR/M) + (bytevector-u32-set! code (+ code-offset 3) displacement (endianness little))))))) + +(define (link-code code symbol-addresses label-positions relocation-table data-addr) + (define got-offset #x2fe0) + (define got-relocation-table (process-relocation-table relocation-table)) + (let ((symbol-table (if (hash-table? symbol-addresses) + symbol-addresses + (alist->hash-table symbol-addresses)))) + (resolve-references code symbol-table label-positions relocation-table #x1000 got-offset got-relocation-table))) diff --git a/compiler/note-gnu-build-id-section.scm b/compiler/note-gnu-build-id-section.scm new file mode 100644 index 0000000..f8d7905 --- /dev/null +++ b/compiler/note-gnu-build-id-section.scm @@ -0,0 +1,73 @@ +;;;; Copyright (C) 2023 Vouivre Digital Corporation +;;;; +;;;; This file is part of Vouivre. +;;;; +;;;; Vouivre is free software: you can redistribute it and/or +;;;; modify it under the terms of the GNU General Public +;;;; License as published by the Free Software Foundation, either +;;;; version 3 of the License, or (at your option) any later version. +;;;; +;;;; Vouivre is distributed in the hope that it will be useful, +;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;;; General Public License for more details. +;;;; +;;;; You should have received a copy of the GNU General Public +;;;; License along with Vouivre. If not, see . + + + +(define-module (note-gnu-build-id-section) + #:use-module (rnrs bytevectors) + #:use-module (ice-9 format) + #:use-module (srfi srfi-1) + #:export (create-note-gnu-build-id-section)) + +;; Helper function to check if a character is a valid hexadecimal digit +(define (hex-digit? char) + (or (char<=? #\0 char #\9) (char<=? #\a char #\f) (char<=? #\A char #\F))) + +;; Convert a hex string to a bytevector +(define (hex-string->bytevector hex-str) + (let* ((cleaned-str (string-downcase (string-filter hex-digit? hex-str))) + (len (/ (string-length cleaned-str) 2)) + (bv (make-bytevector len))) + (do ((i 0 (+ i 2)) + (byte-offset 0 (+ byte-offset 1))) + ((>= i (string-length cleaned-str)) bv) + (let ((hex-byte (string->number (substring cleaned-str i (+ i 2)) 16))) + (bytevector-u8-set! bv byte-offset hex-byte))))) + +;; Align size to 4 bytes +(define (align-to-4 size) + (+ size (modulo (- 4 (modulo size 4)) 4))) + +(define (create-note-gnu-build-id-section build-id) + (let* ((name "GNU\0") + (name-bytes (string->utf8 name)) + (name-size (bytevector-length name-bytes)) + (name-size-aligned (align-to-4 name-size)) + (descriptor (hex-string->bytevector build-id)) + (descriptor-size (bytevector-length descriptor)) + (descriptor-size-aligned (align-to-4 descriptor-size)) + (total-size (+ 12 name-size-aligned descriptor-size-aligned)) + (note-section (make-bytevector total-size 0))) + + ;; Write name size (4 bytes, little-endian) + (bytevector-u32-set! note-section 0 name-size (endianness little)) + + ;; Write descriptor size (4 bytes, little-endian) + (bytevector-u32-set! note-section 4 descriptor-size (endianness little)) + + ;; Write type (4 bytes, little-endian) (NT_GNU_BUILD_ID is 0x3) + (bytevector-u32-set! note-section 8 #x3 (endianness little)) + + ;; Write the name ("GNU\0") + (bytevector-copy! name-bytes 0 note-section 12 name-size) + + ;; Write the descriptor (the actual build ID) + (bytevector-copy! descriptor 0 note-section (+ 12 name-size-aligned) descriptor-size) + + + ;; Return the bytevector representing the .note.gnu.build-id section + note-section)) diff --git a/compiler/program-headers.scm b/compiler/program-headers.scm new file mode 100644 index 0000000..f83f282 --- /dev/null +++ b/compiler/program-headers.scm @@ -0,0 +1,211 @@ +;;;; Copyright (C) 2023 Vouivre Digital Corporation +;;;; +;;;; This file is part of Vouivre. +;;;; +;;;; Vouivre is free software: you can redistribute it and/or +;;;; modify it under the terms of the GNU General Public +;;;; License as published by the Free Software Foundation, either +;;;; version 3 of the License, or (at your option) any later version. +;;;; +;;;; Vouivre is distributed in the hope that it will be useful, +;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;;; General Public License for more details. +;;;; +;;;; You should have received a copy of the GNU General Public +;;;; License along with Vouivre. If not, see . + + + +(define-module (program-headers) + #:use-module (rnrs bytevectors) + #:use-module (utils) + #:use-module (srfi srfi-9) + #:use-module (config) + #:use-module (elf-layout-calculator) + #:export (create-program-headers)) + +;; Program header record +(define-record-type + (make-program-header type flags offset vaddr paddr filesz memsz align) + program-header? + (type ph-type) + (flags ph-flags) + (offset ph-offset) + (vaddr ph-vaddr) + (paddr ph-paddr) + (filesz ph-filesz) + (memsz ph-memsz) + (align ph-align)) + +;; Calculates the size of the text segment +(define (calculate-text-segment-size code-size rodata-size plt-size) + (+ code-size rodata-size plt-size)) + +;; Calculates the end address of the text segment +(define (calculate-text-segment-end text-addr text-segment-size) + (+ text-addr text-segment-size)) + +;; Calculates the start address of the data segment based on the end of the text segment +(define (calculate-data-segment-start text-segment-end alignment) + (align-up text-segment-end alignment)) + +;; Calculate the file size for the second PT_LOAD segment +(define (calculate-data-segment-file-size total-size data-segment-start) + ;; Since .bss is at the end and has no file size, the file size is calculated normally + (- total-size data-segment-start)) + +;; Calculate the memory size for the second PT_LOAD segment +(define (calculate-data-segment-mem-size data-segment-file-size bss-size) + ;; Include the size of the .bss section in the memory size + (+ data-segment-file-size bss-size)) + +;; Calculates the size of the RELRO segment +(define (calculate-relro-size data-segment-start dynamic-addr) + (- dynamic-addr data-segment-start)) + +(define (calculate-first-load-size text-addr text-size phdr-offset phdr-size alignment) + (let ((first-load-size (max (+ text-size phdr-size)))) + (align-up first-load-size alignment))) + +(define (create-program-headers + elf-header-size + program-header-size + num-program-headers + text-addr + code-size + rodata-size + bss-size + data-size + dynamic-addr + dynamic-offset + dynamic-size + total-dynamic-size + got-offset + got-size + plt-offset + plt-size + total-size + zero-load-size + got-plt-size + alignment) + + ;; Calculate dependent values + (let* ((text-segment-size (calculate-text-segment-size code-size rodata-size plt-size)) + (text-segment-end (calculate-text-segment-end text-addr text-segment-size)) + (data-segment-start (calculate-data-segment-start text-segment-end alignment)) + (data-segment-file-size (calculate-data-segment-file-size total-size data-segment-start)) + (data-segment-mem-size (calculate-data-segment-mem-size data-segment-file-size bss-size)) + (relro-size (calculate-relro-size data-segment-start dynamic-addr)) + (phdr-size (calculate-phdr-size num-program-headers program-header-size)) + ;; Place .bss at the end after all other sections, ensuring alignment + (bss-addr (align-up total-size alignment)) + (phdr-offset (align-up elf-header-size alignment)) + (first-load-size (calculate-first-load-size text-addr text-segment-size phdr-offset phdr-size alignment))) + + (let ((headers + (list + ; First LOAD segment (R) + (make-program-header + pt-load ; Type: Loadable segment + (logior pf-r) ; Flags: Read and execute permissions + #x0 ; Offset: Start of file + #x0 ; Virtual address: Address of text segment + #x0 ; Physical address: Same as virtual address + zero-load-size + zero-load-size + alignment) ; Alignment: Required alignment + + + ; Second LOAD segment (RX) - includes .text and .rodata + (make-program-header + pt-load ; Type: Loadable segment + (logior pf-r pf-x) ; Flags: Read and execute permissions + #x1000 ; Offset: Start of file + text-addr ; Virtual address: Address of text segment + text-addr ; Physical address: Same as virtual address + code-size + code-size + alignment) ; Alignment: Required alignment + + ; Third LOAD segment (RW) - for .data + (make-program-header + pt-load ; Type: Loadable segment + (logior pf-r) ; Flags: Read and write permissions + data-segment-start ; Offset: Start of data segment in file + data-segment-start ; Virtual address: Start of data segment in memory + data-segment-start ; Physical address: Same as virtual address + #x0 + #x0 + alignment) ; Alignment: Required alignment + + ; Fourth LOAD segment (RWX) - starting from .dynamic, includes .plt and ends with .bss + (make-program-header + pt-load ; Type: Loadable segment + (logior pf-r pf-w) ; Flags: Read, write, and execute permissions + dynamic-offset ; Offset: Start of dynamic section + dynamic-addr ; Virtual address: Address of dynamic section + dynamic-addr ; Physical address: Same as virtual address + (+ dynamic-size got-size got-plt-size data-size) + (+ dynamic-size got-size got-plt-size data-size) + alignment) ; Alignment: Required alignment + + ; PT_DYNAMIC + (make-program-header + pt-dynamic ; Type: Dynamic linking information + (logior pf-r pf-w) ; Flags: Read and write permissions + dynamic-offset ; Offset: Start of dynamic section + dynamic-addr ; Virtual address: Address of dynamic section + dynamic-addr ; Physical address: Same as virtual address + dynamic-size ; File size: Size of dynamic section + dynamic-size ; Memory size: Same as file size + #x08) ; Alignment: Required alignment + + ; NOTE + (make-program-header + pt-note ; Type: Loadable segment + pf-r ; Flags: Read permission + #x1c8 ; Offset: File offset, 0 for NOBITS + #x1c8 ; Virtual address: Hardcoded start address for .bss + #x1c8 ; Physical address: Same as virtual address + #x24 ; File size: 0 because .bss is NOBITS + #x24 ; Memory size: Hardcoded size for .bss in memory + #x4 ; Alignment: Usually 0x1000 for page alignment + ) + + ; GNU_RELRO + (make-program-header + pt-gnu-relro ; Type: GNU read-only after relocation + pf-r ; Flags: Read permission + dynamic-offset ; Offset: Start of data segment + dynamic-addr ; Virtual address: Start of data segment + dynamic-addr ; Physical address: Same as virtual address + (+ dynamic-size got-size); File size: Size of read-only after relocation section + (+ dynamic-size got-size); Memory size: Same as file size + 1)))) ; Alignment: 1 byte alignment + + + (program-headers->bytevector headers)))) + +;; Helper function to align addresses +(define (align-up address alignment) + (let ((remainder (modulo address alignment))) + (if (zero? remainder) + address + (+ address (- alignment remainder))))) + +(define (program-headers->bytevector headers) + (let* ((bv (make-bytevector (* (length headers) program-header-size) 0))) + (let loop ((headers headers) (offset 0)) + (if (null? headers) + bv + (let ((ph (car headers))) + (bytevector-u32-set! bv offset (ph-type ph) (endianness little)) + (bytevector-u32-set! bv (+ offset 4) (ph-flags ph) (endianness little)) + (bytevector-u64-set! bv (+ offset 8) (ph-offset ph) (endianness little)) + (bytevector-u64-set! bv (+ offset 16) (ph-vaddr ph) (endianness little)) + (bytevector-u64-set! bv (+ offset 24) (ph-paddr ph) (endianness little)) + (bytevector-u64-set! bv (+ offset 32) (ph-filesz ph) (endianness little)) + (bytevector-u64-set! bv (+ offset 40) (ph-memsz ph) (endianness little)) + (bytevector-u64-set! bv (+ offset 48) (ph-align ph) (endianness little)) + (loop (cdr headers) (+ offset program-header-size))))))) diff --git a/compiler/rela-plt-section.scm b/compiler/rela-plt-section.scm new file mode 100644 index 0000000..318c107 --- /dev/null +++ b/compiler/rela-plt-section.scm @@ -0,0 +1,59 @@ +;;;; Copyright (C) 2023 Vouivre Digital Corporation +;;;; +;;;; This file is part of Vouivre. +;;;; +;;;; Vouivre is free software: you can redistribute it and/or +;;;; modify it under the terms of the GNU General Public +;;;; License as published by the Free Software Foundation, either +;;;; version 3 of the License, or (at your option) any later version. +;;;; +;;;; Vouivre is distributed in the hope that it will be useful, +;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;;; General Public License for more details. +;;;; +;;;; You should have received a copy of the GNU General Public +;;;; License along with Vouivre. If not, see . + + +(define-module (rela-plt-section) + #:use-module (rnrs bytevectors) + #:use-module (ice-9 format) + #:export (create-rela-plt-section)) + +(define (create-rela-entry offset sym-index type) + (let ((entry (make-bytevector 24 0))) ; Each RELA entry is 24 bytes + (bytevector-u64-set! entry 0 offset (endianness little)) ; r_offset + (bytevector-u64-set! entry 8 + (logior (ash (if (number? sym-index) sym-index 0) 32) + type) + (endianness little)) ; r_info + (bytevector-u64-set! entry 16 0 (endianness little)) ; r_addend + entry)) + +(define (create-rela-plt-section function-labels got-plt-offset dynsym-indices) + (let* ((num-entries (length function-labels)) + (rela-plt-size (* num-entries 24)) + (rela-plt (make-bytevector rela-plt-size 0))) + + (let loop ((i 0) + (labels function-labels)) + (if (null? labels) + rela-plt + (let* ((function-pair (car labels)) + (function-name (car function-pair)) + (raw-sym-index (hash-ref dynsym-indices function-name)) + (sym-index (if raw-sym-index (+ raw-sym-index 1) #f)) + (got-plt-entry-offset (+ got-plt-offset (* i 8))) ; Removed +3 offset + (entry (create-rela-entry + got-plt-entry-offset + (or sym-index 0) ; Use 0 if sym-index is #f + 7))) ; 7 is R_X86_64_JUMP_SLOT + (bytevector-copy! entry 0 rela-plt (* i 24) 24) + (loop (+ i 1) (cdr labels))))))) + +(define (print-rela-plt-section rela-plt function-labels) + (let ((size (bytevector-length rela-plt))) + (do ((i 0 (+ i 24)) + (labels function-labels (cdr labels))) + ((or (>= i size) (null? labels)))))) diff --git a/compiler/relocation-table.scm b/compiler/relocation-table.scm new file mode 100644 index 0000000..8950a45 --- /dev/null +++ b/compiler/relocation-table.scm @@ -0,0 +1,79 @@ +;;;; Copyright (C) 2023 Vouivre Digital Corporation +;;;; +;;;; This file is part of Vouivre. +;;;; +;;;; Vouivre is free software: you can redistribute it and/or +;;;; modify it under the terms of the GNU General Public +;;;; License as published by the Free Software Foundation, either +;;;; version 3 of the License, or (at your option) any later version. +;;;; +;;;; Vouivre is distributed in the hope that it will be useful, +;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;;; General Public License for more details. +;;;; +;;;; You should have received a copy of the GNU General Public +;;;; License along with Vouivre. If not, see . + + + +(define-module (relocation-table) + #:use-module (rnrs bytevectors) + #:use-module (srfi srfi-1) + #:use-module (ice-9 hash-table) + #:export (create-relocation-table)) + +(define* (create-relocation-table symbol-addresses + #:optional (options '())) + (let ((reloc-entry-size (or (assoc-ref options 'reloc-entry-size) 24)) + (r-offset-offset (or (assoc-ref options 'r-offset-offset) 0)) + (r-info-offset (or (assoc-ref options 'r-info-offset) 8)) + (r-addend-offset (or (assoc-ref options 'r-addend-offset) 16)) + (reloc-type (or (assoc-ref options 'r-x86-64-glob-dat) 6)) + (symbol-index-start (or (assoc-ref options 'symbol-index-start) 1)) + ;; Update the GOT base with the new offset from the .got section + (got-base (or (assoc-ref options 'got-base) #x2fe0))) ; Updated GOT base + + (let* ((non-function-symbols + (if (hash-table? symbol-addresses) + (filter (lambda (entry) (not (cdr (cdr entry)))) + (hash-map->list cons symbol-addresses)) + (filter (lambda (symbol) (not (and (pair? symbol) (cdr symbol)))) + symbol-addresses))) + (reloc-count (length non-function-symbols)) + (table-size (* reloc-count reloc-entry-size)) + (table (make-bytevector table-size 0))) + + (let loop ((symbols non-function-symbols) + (index 0)) + (if (null? symbols) + table + (let* ((symbol (car symbols)) + (name (if (hash-table? symbol-addresses) + (car symbol) + (if (pair? symbol) (car symbol) symbol))) + (value (if (hash-table? symbol-addresses) + (car (cdr symbol)) + (if (pair? symbol) (car symbol) symbol))) + (address (if (pair? value) (car value) value)) + (entry-offset (* index reloc-entry-size)) + (symbol-index (+ symbol-index-start index)) + (got-entry (+ got-base (* index 8))) ; Each GOT entry is 8 bytes in size + (r-info (logior (ash symbol-index 32) reloc-type))) + + (bytevector-u64-set! table + (+ entry-offset r-offset-offset) + got-entry + (endianness little)) ; r_offset points to GOT entry + + (bytevector-u64-set! table + (+ entry-offset r-info-offset) + r-info + (endianness little)) ; r_info + + (bytevector-u64-set! table + (+ entry-offset r-addend-offset) + 0 + (endianness little)) ; r_addend + + (loop (cdr symbols) (+ index 1)))))))) diff --git a/compiler/section-headers.scm b/compiler/section-headers.scm new file mode 100644 index 0000000..6523d31 --- /dev/null +++ b/compiler/section-headers.scm @@ -0,0 +1,133 @@ +;;;; Copyright (C) 2023 Vouivre Digital Corporation +;;;; +;;;; This file is part of Vouivre. +;;;; +;;;; Vouivre is free software: you can redistribute it and/or +;;;; modify it under the terms of the GNU General Public +;;;; License as published by the Free Software Foundation, either +;;;; version 3 of the License, or (at your option) any later version. +;;;; +;;;; Vouivre is distributed in the hope that it will be useful, +;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;;; General Public License for more details. +;;;; +;;;; You should have received a copy of the GNU General Public +;;;; License along with Vouivre. If not, see . + + +(define-module (section-headers) + #:use-module (rnrs bytevectors) + #:use-module (utils) + #:use-module (string-table) + #:use-module (srfi srfi-9) + #:use-module (config) + #:export (create-section-headers)) + +;; Define a record type for section headers +(define-record-type + (make-section-header name type flags addr offset size link info align entsize) + section-header? + (name sh-name) + (type sh-type) + (flags sh-flags) + (addr sh-addr) + (offset sh-offset) + (size sh-size) + (link sh-link) + (info sh-info) + (align sh-align) + (entsize sh-entsize)) + +(define (create-section-headers + text-addr code-size data-size symtab-size strtab-size shstrtab-size + dynsym-size dynstr-size rela-size total-dynamic-size dynamic-size + rela-offset got-size data-addr dynamic-addr dynsym-addr dynstr-addr + rela-addr got-addr plt-addr symtab-offset strtab-offset shstrtab-addr + plt-size plt-got-addr plt-got-size rela-plt-addr rela-plt-size + got-plt-addr got-plt-size rodata-offset gnu-version-addr gnu-version-r-addr + gnu-version-size gnu-version-r-size gnu-version-d-offset gnu-version-d-size + hash-offset hash-size code-offset init-offset init-size + note-gnu-build-id-addr note-gnu-build-id-size eh-frame-addr eh-frame-size) + + (let ((headers + (list + ;; Null section + (make-section-header 0 sht-null 0 0 0 0 0 0 0 0) + + ;; .note.gnu.build-id section + (make-section-header 1 sht-note shf-alloc + note-gnu-build-id-addr note-gnu-build-id-addr note-gnu-build-id-size 0 0 4 0) + + ;; .hash section + (make-section-header 20 sht-hash shf-alloc + hash-offset hash-offset hash-size 3 0 8 4) + + ;; .dynsym section + (make-section-header 26 sht-dynsym shf-alloc + dynsym-addr dynsym-addr dynsym-size 4 1 8 24) + + ;; .dynstr section + (make-section-header 34 sht-strtab shf-alloc + dynstr-addr dynstr-addr dynstr-size 0 0 1 0) + + ;; .rela.dyn section + (make-section-header 42 sht-rela shf-alloc + rela-addr rela-addr rela-size 3 0 8 24) + + ;; .text section + (make-section-header 52 sht-progbits (logior shf-alloc shf-execinstr) + text-addr text-addr code-size 0 0 1 0) + + ;; .eh_frame section + (make-section-header 58 sht-progbits shf-alloc + eh-frame-addr eh-frame-addr eh-frame-size 0 0 8 0) + + ;; .dynamic section + (make-section-header 68 sht-dynamic (logior shf-write shf-alloc) + dynamic-addr dynamic-addr dynamic-size 4 0 8 dynamic-entry-size) + + ;; .got section + (make-section-header 77 sht-progbits (logior shf-write shf-alloc) + got-addr got-addr got-size 0 0 8 got-entry-size) + + ;; .got.plt section + (make-section-header 82 sht-progbits (logior shf-write shf-alloc) + got-plt-addr got-plt-addr got-plt-size 0 0 8 8) + + ;; .data section + (make-section-header 91 sht-progbits (logior shf-write shf-alloc) + data-addr data-addr data-size 0 0 32 0) + + ;; .symtab section + (make-section-header 97 sht-symtab 0 0 symtab-offset symtab-size 13 17 8 24) + + ;; .strtab section + (make-section-header 105 sht-strtab 0 0 strtab-offset strtab-size 0 0 1 0) + + ;; .shstrtab section + (make-section-header 113 sht-strtab 0 0 shstrtab-addr shstrtab-size 0 0 1 0) + ))) + + ;; Convert the section headers to a bytevector + (section-headers->bytevector headers))) + +;; Convert section headers to bytevector +(define (section-headers->bytevector headers) + (let* ((bv (make-bytevector (* (length headers) section-header-size) 0))) + (for-each + (lambda (index header) + (let ((base (* index section-header-size))) + (bytevector-u32-set! bv (+ base 0) (sh-name header) (endianness little)) + (bytevector-u32-set! bv (+ base 4) (sh-type header) (endianness little)) + (bytevector-u64-set! bv (+ base 8) (sh-flags header) (endianness little)) + (bytevector-u64-set! bv (+ base 16) (sh-addr header) (endianness little)) + (bytevector-u64-set! bv (+ base 24) (sh-offset header) (endianness little)) + (bytevector-u64-set! bv (+ base 32) (sh-size header) (endianness little)) + (bytevector-u32-set! bv (+ base 40) (sh-link header) (endianness little)) + (bytevector-u32-set! bv (+ base 44) (sh-info header) (endianness little)) + (bytevector-u64-set! bv (+ base 48) (sh-align header) (endianness little)) + (bytevector-u64-set! bv (+ base 56) (sh-entsize header) (endianness little)))) + (iota (length headers)) + headers) + bv)) diff --git a/compiler/shared-object-creator.scm b/compiler/shared-object-creator.scm new file mode 100644 index 0000000..3fc8002 --- /dev/null +++ b/compiler/shared-object-creator.scm @@ -0,0 +1,302 @@ +;;;; Copyright (C) 2023 Vouivre Digital Corporation +;;;; +;;;; This file is part of Vouivre. +;;;; +;;;; Vouivre is free software: you can redistribute it and/or +;;;; modify it under the terms of the GNU General Public +;;;; License as published by the Free Software Foundation, either +;;;; version 3 of the License, or (at your option) any later version. +;;;; +;;;; Vouivre is distributed in the hope that it will be useful, +;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;;; General Public License for more details. +;;;; +;;;; You should have received a copy of the GNU General Public +;;;; License along with Vouivre. If not, see . + + + +(define-module (shared-object-creator) + #:use-module (config) + #:use-module (elf-header) + #:use-module (program-headers) + #:use-module (section-headers) + #:use-module (dynamic-section) + #:use-module (symbol-table) + #:use-module (string-table) + #:use-module (rnrs bytevectors) + #:use-module (rnrs io ports) + #:use-module (ice-9 format) + #:use-module (relocation-table) + #:use-module (elf-layout-calculator) + #:use-module (elf-dynamic-calculator) + #:use-module (rela-plt-section) + #:use-module (got-plt-section) + #:use-module (linker) + #:use-module (data-section) + #:use-module (note-gnu-build-id-section) + #:export (create-shared-object)) + +(define (create-got-section got-size) + (make-bytevector got-size 0)) + +(define (create-init-section) + (let ((init-section (make-bytevector 16 0))) + ;; Simple .init section (x86_64 assembly) + (bytevector-u8-set! init-section 0 #x55) ; push rbp + (bytevector-u8-set! init-section 1 #x48) + (bytevector-u8-set! init-section 2 #x89) + (bytevector-u8-set! init-section 3 #xe5) ; mov rbp, rsp + (bytevector-u8-set! init-section 4 #x5d) ; pop rbp + (bytevector-u8-set! init-section 5 #xc3) ; ret + init-section)) + +(define (create-shared-object code data-sections output-file symbol-addresses label-positions assembled-relocation-table) + (let* ((layout (calculate-elf-layout code data-sections symbol-addresses label-positions)) + (program-headers-offset (assoc-ref layout 'program-headers-offset)) + (code-size (assoc-ref layout 'code-size)) + (data-size (assoc-ref layout 'data-size)) + (symtab-size (assoc-ref layout 'symtab-size)) + (strtab-size (assoc-ref layout 'strtab-size)) + (shstrtab-size (assoc-ref layout 'shstrtab-size)) + (relocation-table-size (assoc-ref layout 'relocation-table-size)) + (got-size (assoc-ref layout 'got-size)) + (data-offset (assoc-ref layout 'data-offset)) + (rodata-size (assoc-ref layout 'rodata-size)) + (plt-size (assoc-ref layout 'plt-size)) + (init-offset (assoc-ref layout 'init-offset)) + (init-size (assoc-ref layout 'init-size)) + (section-headers-offset (assoc-ref layout 'section-headers-offset)) + (shstrtab-addr (assoc-ref layout 'shstrtab-addr)) + (dynamic-addr #x2f00) + (bss-size (assoc-ref layout 'bss-size)) + (symtab-offset (assoc-ref layout 'symtab-offset)) + (strtab-offset (assoc-ref layout 'strtab-offset)) + (symtab-hash (assoc-ref layout 'symtab-hash)) + (dynsym-indices (assoc-ref layout 'dynsym-indices)) + (symtab-bv (assoc-ref layout 'symtab-bv)) + (strtab (assoc-ref layout 'strtab)) + (dynsymtab-bv (assoc-ref layout 'dynsymtab-bv)) + (dynstrtab (assoc-ref layout 'dynstrtab)) + (dynsym-size (assoc-ref layout 'dynsym-size)) + (dynstr-size (assoc-ref layout 'dynstr-size)) + (init-section (create-init-section)) + (rodata-offset #x2000) + (note-gnu-build-id-address #x1c8) + (note-gnu-build-id-size #x24) + (hash-offset (align-to (+ note-gnu-build-id-address note-gnu-build-id-size) word-size)) + (dynamic-offset (align-to dynamic-addr word-size)) + (dynamic-size (assoc-ref layout 'dynamic-size)) + (hash-table (create-hash-section dynsymtab-bv dynstrtab)) + (hash-size (bytevector-length hash-table)) + (dynsym-offset (align-to (+ hash-offset hash-size) word-size)) + (dynstr-offset (align-to (+ dynsym-offset dynsym-size) word-size)) + (rela-offset (align-to (+ dynstr-offset dynstr-size) word-size)) + (relocation-table (create-relocation-table symtab-hash)) + (relocation-table-size (bytevector-length relocation-table)) + (code-offset (assoc-ref layout 'code-offset)) + (got-offset (align-to (+ dynamic-offset dynamic-size) word-size)) + (got-size (assoc-ref layout 'got-size)) + (got-plt-offset (align-to (+ got-offset got-size) word-size)) + (init-offset (+ text-addr code-size)) + (init-size 16) + (fini-size 16) + (fini-offset (align-to (+ init-offset init-size) 16)) + (plt-offset (align-to (+ fini-offset fini-size) 16)) + (plt-got-offset (align-to (+ plt-offset plt-size) word-size)) + (plt-got-size (* (length (hash-map->list cons label-positions)) 8)) + (rela-plt-offset (align-to (+ rela-offset relocation-table-size) word-size)) + (rela-addr (+ dynamic-addr (- rela-offset dynamic-offset))) + + (rela-plt-section (create-rela-plt-section + (hash-map->list cons label-positions) + got-plt-offset + dynsym-indices)) + (rela-plt-size (bytevector-length rela-plt-section)) + (gnu-version-offset (align-to (+ hash-offset hash-size) 4)) + (gnu-version-r-size 32) ; Since we're creating an empty .gnu.version_r section + (gnu-version-size (* 2 num-dynamic-entries)) + (gnu-version-r-offset (align-to (+ gnu-version-offset gnu-version-size) word-size)) + (gnu-version-d-size 32) + (gnu-version-size (* 2 num-dynamic-entries)) + (gnu-version-d-offset (align-to (+ gnu-version-offset gnu-version-size) word-size)) + (zero-load-size (+ rela-offset relocation-table-size)) + (data-section (create-data-section data-sections)) + + (got-plt-section (create-got-plt-section + (hash-map->list cons label-positions) + dynamic-addr + plt-offset)) + (got-plt-size (bytevector-length got-plt-section)) + (data-addr (align-to (+ got-plt-offset got-plt-size 2) word-size)) + (total-dynamic-size (- (+ got-plt-offset got-plt-size) dynamic-offset)) + (data-segment-size (+ data-size total-dynamic-size)) + (shstrtab (create-section-header-string-table)) + (dynamic-section (create-dynamic-section + dynstr-offset + dynsym-offset + dynstr-size + dynsym-size + rela-offset + relocation-table-size + got-offset + hash-offset + gnu-version-offset + gnu-version-r-offset + gnu-version-r-size + plt-offset + plt-size + rela-plt-offset + rela-plt-size + got-plt-offset)) + (section-headers (create-section-headers + text-addr + code-size + data-size + symtab-size + strtab-size + shstrtab-size + dynsym-size + dynstr-size + relocation-table-size + total-dynamic-size + dynamic-size + rela-offset + got-size + data-addr + dynamic-addr + dynsym-offset + dynstr-offset + rela-addr + (+ dynamic-addr (- got-offset dynamic-offset)) + (+ dynamic-addr (- plt-offset dynamic-offset)) + symtab-offset + strtab-offset + shstrtab-addr + plt-size + (+ dynamic-addr (- plt-got-offset dynamic-offset)) + plt-got-size + rela-plt-offset + rela-plt-size + got-plt-offset + got-plt-size + rodata-offset + (+ dynamic-addr (- gnu-version-offset dynamic-offset)) + (+ dynamic-addr (- gnu-version-r-offset dynamic-offset)) + (* 2 (/ dynsym-size 24)) ; gnu-version-size + gnu-version-r-size + gnu-version-d-offset + gnu-version-d-size + hash-offset + hash-size + code-offset + init-offset + init-size + note-gnu-build-id-address + note-gnu-build-id-size + #x2000 ; Hardcoded .eh_frame address + #x0 ; Hardcoded .eh_frame size + )) + (program-headers (create-program-headers + elf-header-size + program-header-size + num-program-headers + text-addr + code-size + rodata-size + bss-size + data-segment-size + dynamic-addr + dynamic-offset + dynamic-size + total-dynamic-size + got-offset + got-size + plt-offset + plt-size + (+ data-addr data-segment-size) + zero-load-size + got-plt-size + alignment)) + (program-headers-size (bytevector-length program-headers)) + (num-program-headers (/ program-headers-size program-header-size)) + (section-headers-size (* num-sections section-header-size)) + (total-size (+ section-headers-offset section-headers-size)) + (text-section-offset (calculate-text-section-offset elf-header-size program-headers-size)) + (entry-point (calculate-entry-point text-addr text-section-offset)) + (build-id "ff868e73a156d910e45c2590bb78b1224345b530") + (note-gnu-build-id-section (create-note-gnu-build-id-section build-id)) + (elf-header (create-elf-header + entry-point + program-headers-offset + program-headers-size + section-headers-offset + num-program-headers + num-sections + total-size + shstrtab-index + hash-offset + hash-size))) + + (let ((elf-file (make-bytevector total-size 0))) + ;; Copy ELF header (assumed to start at the beginning of the file) + (bytevector-copy! elf-header 0 elf-file 0 (bytevector-length elf-header)) + + ;; Copy program headers + (bytevector-copy! program-headers 0 elf-file program-headers-offset program-headers-size) + + ;; Copy the .note.gnu.build-id section into the ELF file + (bytevector-copy! note-gnu-build-id-section 0 elf-file note-gnu-build-id-address note-gnu-build-id-size) + + ;; Copy hash table (.hash) + (bytevector-copy! hash-table 0 elf-file hash-offset hash-size) + + ;; Resolve and copy code section + (let* ((resolved-code (link-code code symtab-hash label-positions assembled-relocation-table data-addr)) + (code-offset (assoc-ref layout 'code-offset))) + (bytevector-copy! resolved-code 0 elf-file code-offset (bytevector-length resolved-code))) + + ;; Copy Global Offset Table for PLT (.got.plt) + (bytevector-copy! got-plt-section 0 elf-file got-plt-offset got-plt-size) + + ;; Copy dynamic section + (bytevector-copy! dynamic-section 0 elf-file dynamic-offset dynamic-size) + + ;; Copy dynamic symbol table (.dynsym) + (bytevector-copy! dynsymtab-bv 0 elf-file dynsym-offset dynsym-size) + + ;; Copy dynamic string table (.dynstr) + (bytevector-copy! dynstrtab 0 elf-file dynstr-offset dynstr-size) + + ;; Copy initialization section (.init) + (bytevector-copy! init-section 0 elf-file init-offset init-size) + + ;; Copy relocation table (.rela.dyn) + (bytevector-copy! relocation-table 0 elf-file rela-offset relocation-table-size) + + ;; Copy data section (.data) + (bytevector-copy! data-section 0 elf-file data-addr (bytevector-length data-section)) + + ;; Copy symbol table (.symtab) + (bytevector-copy! symtab-bv 0 elf-file symtab-offset symtab-size) + + ;; Copy string table (.strtab) + (bytevector-copy! strtab 0 elf-file strtab-offset strtab-size) + + ;; Copy Global Offset Table (.got) + (let ((got-section (create-got-section got-size))) + (bytevector-copy! got-section 0 elf-file got-offset got-size)) + + ;; Copy section header string table (.shstrtab) + (bytevector-copy! shstrtab 0 elf-file (- section-headers-offset shstrtab-size) shstrtab-size) + + ;; Copy section headers + (bytevector-copy! section-headers 0 elf-file section-headers-offset section-headers-size) + + ;; Write the ELF file + (call-with-output-file output-file + (lambda (port) + (put-bytevector port elf-file))) + + total-size))) diff --git a/compiler/string-table.scm b/compiler/string-table.scm new file mode 100644 index 0000000..3ebca4b --- /dev/null +++ b/compiler/string-table.scm @@ -0,0 +1,113 @@ +;;;; Copyright (C) 2023 Vouivre Digital Corporation +;;;; +;;;; This file is part of Vouivre. +;;;; +;;;; Vouivre is free software: you can redistribute it and/or +;;;; modify it under the terms of the GNU General Public +;;;; License as published by the Free Software Foundation, either +;;;; version 3 of the License, or (at your option) any later version. +;;;; +;;;; Vouivre is distributed in the hope that it will be useful, +;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;;; General Public License for more details. +;;;; +;;;; You should have received a copy of the GNU General Public +;;;; License along with Vouivre. If not, see . + + + +(define-module (string-table) + #:use-module (rnrs bytevectors) + #:use-module (ice-9 format) + #:export (create-string-table + create-section-header-string-table + string-table-offset + get-section-name-offset + string-match?)) + +(define (string-match? str bv offset) + (let ((str-len (string-length str))) + (and (<= (+ offset str-len) (bytevector-length bv)) + (let loop ((i 0)) + (or (= i str-len) + (and (= (char->integer (string-ref str i)) + (bytevector-u8-ref bv (+ offset i))) + (loop (+ i 1)))))))) + +(define* (create-string-table symbol-addresses label-positions #:optional (options '())) + (let ((null-terminator-size (or (assoc-ref options 'null-terminator-size) 1))) + (let* ((symbol-names + (if (hash-table? symbol-addresses) + (hash-map->list (lambda (key value) (symbol->string key)) symbol-addresses) + (map (lambda (pair) (symbol->string (car pair))) symbol-addresses))) + (label-names + (if (hash-table? label-positions) + (hash-map->list (lambda (key value) (symbol->string key)) label-positions) + (map (lambda (pair) (symbol->string (car pair))) label-positions))) + (names (cons "" (append symbol-names label-names))) + (total-length (apply + (map (lambda (name) (+ (string-length name) null-terminator-size)) names))) + (table (make-bytevector total-length 0)) + (offset 0)) + (for-each (lambda (name) + (let ((len (string-length name))) + (bytevector-copy! (string->utf8 name) 0 table offset len) + (bytevector-u8-set! table (+ offset len) 0) + (set! offset (+ offset len null-terminator-size)))) + names) + table))) + +(define* (create-section-header-string-table #:optional (options '())) + (let ((null-terminator-size (or (assoc-ref options 'null-terminator-size) 1)) + (section-names (or (assoc-ref options 'section-names) + '("" ".note.gnu.build-id" ".hash" ".dynsym" ".dynstr" + ".rela.dyn" ".text" ".eh_frame" ".dynamic" ".got" + ".got.plt" ".data" ".symtab" ".strtab" ".shstrtab")))) + (let* ((total-length (apply + (map (lambda (s) (+ (string-length s) null-terminator-size)) section-names))) + (table (make-bytevector total-length 0)) + (offset 0)) + (for-each (lambda (name) + (let ((len (string-length name))) + (bytevector-copy! (string->utf8 name) 0 table offset len) + (set! offset (+ offset len null-terminator-size)))) + section-names) + table))) + +(define (string-table-offset name string-table) + (let loop ((offset 0)) + (if (>= offset (bytevector-length string-table)) + #f ; String not found + (if (string-match? name string-table offset) + offset + (loop (+ offset 1)))))) + +(define (get-section-name-offset shstrtab section-name) + (string-table-offset section-name shstrtab)) + +(define* (symbols->bytevector entries #:optional (options '())) + (let ((symbol-entry-size (or (assoc-ref options 'symbol-entry-size) 24)) + (st-name-offset (or (assoc-ref options 'st-name-offset) 0)) + (st-info-offset (or (assoc-ref options 'st-info-offset) 4)) + (st-other-offset (or (assoc-ref options 'st-other-offset) 5)) + (st-shndx-offset (or (assoc-ref options 'st-shndx-offset) 6)) + (st-value-offset (or (assoc-ref options 'st-value-offset) 8)) + (st-size-offset (or (assoc-ref options 'st-size-offset) 16)) + (initial-string-offset (or (assoc-ref options 'initial-string-offset) 1)) + (null-terminator-size (or (assoc-ref options 'null-terminator-size) 1))) + (let* ((table-size (* (length entries) symbol-entry-size)) + (table (make-bytevector table-size 0)) + (string-table-offset initial-string-offset)) + (let loop ((entries entries) + (index 0) + (str-offset initial-string-offset)) + (if (null? entries) + table + (let* ((entry (car entries)) + (name (symbol->string (car entry))) + (address (cdr entry)) + (entry-offset (* index symbol-entry-size))) + (bytevector-u32-set! table (+ entry-offset st-name-offset) str-offset (endianness little)) + (bytevector-u64-set! table (+ entry-offset st-value-offset) address (endianness little)) + (loop (cdr entries) + (+ index 1) + (+ str-offset (string-length name) null-terminator-size)))))))) diff --git a/compiler/symbol-table.scm b/compiler/symbol-table.scm new file mode 100644 index 0000000..b9fe383 --- /dev/null +++ b/compiler/symbol-table.scm @@ -0,0 +1,478 @@ +;;;; Copyright (C) 2023 Vouivre Digital Corporation +;;;; +;;;; This file is part of Vouivre. +;;;; +;;;; Vouivre is free software: you can redistribute it and/or +;;;; modify it under the terms of the GNU General Public +;;;; License as published by the Free Software Foundation, either +;;;; version 3 of the License, or (at your option) any later version. +;;;; +;;;; Vouivre is distributed in the hope that it will be useful, +;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;;; General Public License for more details. +;;;; +;;;; You should have received a copy of the GNU General Public +;;;; License along with Vouivre. If not, see . + +(define-module (symbol-table) + #:use-module (ice-9 hash-table) + #:use-module (rnrs bytevectors) + #:use-module (rnrs io ports) + #:use-module (srfi srfi-1) + #:use-module (srfi srfi-9) + #:use-module (string-table) + #:export (make-symbol-table + add-symbol! + add-label-symbol! + get-symbol + symbol-in-table? + symbol-is-function? + create-symtab-and-strtab + create-dynsym-and-dynstr + create-hash-section + create-symbol-table + make-symbol-entry + symbol-entry-name + symbol-entry-address + create-version-section)) + +(define-record-type + (make-symbol-entry name address info other shndx size) + symbol-entry? + (name symbol-entry-name) + (address symbol-entry-address) + (info symbol-entry-info) + (other symbol-entry-other) + (shndx symbol-entry-shndx) + (size symbol-entry-size)) + +(define* (create-symbol-table symbol-addresses label-positions #:optional (options '())) + (let ((table (make-symbol-table))) + (add-symbols-to-table! table symbol-addresses add-symbol!) + (add-symbols-to-table! table (or label-positions '()) add-label-symbol!) + table)) + +(define (string-trim-suffix str suffix) + (if (string-suffix? suffix str) + (substring str 0 (- (string-length str) (string-length suffix))) + str)) + +(define (make-symbol-table) + (make-hash-table)) + +(define (add-symbol! table name address) + (hash-set! table name (cons address #f))) + +(define (add-label-symbol! table name address) + (hash-set! table name (cons address #t))) + +(define (get-symbol table name) + (let ((entry (hash-ref table name #f))) + (and entry (car entry)))) + +(define (symbol-in-table? table name) + (hash-ref table name #f)) + +(define (symbol-is-function? table name) + (let ((entry (hash-ref table name #f))) + (and entry (cdr entry)))) + +(define (add-symbols-to-table! table symbols add-func) + (if (hash-table? symbols) + (hash-for-each (lambda (name address) + (add-func table name address)) + symbols) + (for-each (lambda (sym) + (add-func table (car sym) (cdr sym))) + symbols))) + +(define (calculate-string-table-size symbol-table) + (hash-fold + (lambda (key value acc) + (+ acc (string-length (symbol->string key)) 1)) + 1 ; Start with 1 for the initial null byte + symbol-table)) + +(define (write-initial-data string-table opts) + (let ((initial-offset (assoc-ref opts 'initial-string-offset))) + initial-offset)) + +(define* (create-symtab-and-strtab symbol-addresses label-positions #:optional (options '())) + (define (merge-options default-options custom-options) + (append custom-options default-options)) + + (define (create-symbol-table addresses labels) + (let ((table (make-symbol-table))) + (add-symbols-to-table! table addresses add-symbol!) + (add-symbols-to-table! table (or labels '()) add-label-symbol!) + table)) + + (define (calculate-table-size symbol-table opts) + (let* ((symbol-entry-size (assoc-ref opts 'symbol-entry-size)) + (symbol-count (+ (hash-count (const #t) symbol-table) 1)) + (table-size (* symbol-count symbol-entry-size)) + (string-table-size (calculate-string-table-size symbol-table))) + (cons table-size string-table-size))) + + (define (initialize-table table-size string-table-size) + (cons (make-bytevector table-size 0) + (make-bytevector string-table-size 0))) + + (define section-index-table + (alist->hash-table + '((".note.gnu.build-id" . 1) + (".hash" . 2) + (".dynsym" . 3) + (".dynstr" . 4) + (".rela.dyn" . 5) + (".text" . 6) + (".eh_frame" . 7) + (".dynamic" . 8) + (".got" . 9) + (".got.plt" . 10) + (".data" . 11) + (".symtab" . 12) + (".strtab" . 13) + (".shstrtab" . 14)))) + + (define (process-table symbol-table table string-table opts initial-str-offset symbol-addresses) + (let* ((symbols (hash-map->list cons symbol-table)) + (sorted-symbols + (sort symbols + (lambda (a b) + (let ((name-a (symbol->string (car a))) + (name-b (symbol->string (car b))) + (index-a (list-index (lambda (addr) (equal? (car addr) (car a))) symbol-addresses)) + (index-b (list-index (lambda (addr) (equal? (car addr) (car b))) symbol-addresses))) + (cond + ((eq? (string-contains name-a "@LOCAL") + (string-contains name-b "@LOCAL")) + (cond + ((and index-a index-b) (< index-a index-b)) + (index-a #t) + (index-b #f) + (else (stringstring (car symbol))) + (value (cdr symbol)) + (address (car value)) + (is-function (cdr value)) + (is-local (string-contains name "@LOCAL")) + (is-section (char=? (string-ref name 0) #\.)) + (is-dynamic (equal? name "_DYNAMIC@LOCAL")) + (is-got-local (equal? name "_GLOBAL_OFFSET_TABLE_@LOCAL")) + (clean-name (if is-local + (string-trim-suffix name "@LOCAL") + name)) + (name-bytes (string->utf8 clean-name))) + (let* ((stb-value (if is-local + (assoc-ref opts 'stb-local) + (assoc-ref opts 'stb-global))) + (stt-func (assoc-ref opts 'stt-func)) + (stt-object (assoc-ref opts 'stt-object)) + (stt-section (assoc-ref opts 'stt-section)) + (shn-text (assoc-ref opts 'shn-text)) + (shn-data (assoc-ref opts 'shn-data)) + (shn-dynamic (assoc-ref opts 'shn-dynamic)) + (stt-notype (assoc-ref opts 'stt-notype)) + (shn-got-local 10) + (null-terminator-size (assoc-ref opts 'null-terminator-size)) + (symbol-entry-size (assoc-ref opts 'symbol-entry-size)) + (section-index (if is-section + (hash-ref section-index-table clean-name 0) + (cond + (is-got-local shn-got-local) + (is-dynamic shn-dynamic) + (is-function shn-text) + (else shn-data)))) + (symbol-type (cond + (is-section stt-section) + (is-function stt-notype) + ((or (string=? clean-name "_DYNAMIC") + (string=? clean-name "_GLOBAL_OFFSET_TABLE_")) + stt-object) + (else stt-notype))) + (entry (make-symbol-entry str-offset address + (logior (ash stb-value 4) + symbol-type) + 0 + section-index + (if is-function 0 0))) + (entry-offset (* index symbol-entry-size))) + (bytevector-copy! name-bytes 0 string-table str-offset (bytevector-length name-bytes)) + (bytevector-u8-set! string-table (+ str-offset (bytevector-length name-bytes)) 0) + (write-symbol-entry! table entry-offset opts entry) + (loop (cdr symbols) + (+ index 1) + (+ str-offset (bytevector-length name-bytes) null-terminator-size)))))))) + + (let* ((default-options '((symbol-entry-size . 24) + (st-name-offset . 0) + (st-info-offset . 4) + (st-other-offset . 5) + (st-shndx-offset . 6) + (st-value-offset . 8) + (st-size-offset . 16) + (null-terminator-size . 1) + (initial-string-offset . 1) + (stt-notype . 0) + (stt-object . 1) + (stt-func . 2) + (stt-section . 3) + (stb-global . 1) + (stb-local . 0) + (shn-data . 11) + (shn-text . 6) + (shn-dynamic . 8))) + (opts (merge-options default-options options)) + (symbol-table (create-symbol-table symbol-addresses label-positions)) + (symbol-sizes (calculate-table-size symbol-table opts)) + (symbol-tables (initialize-table (car symbol-sizes) (cdr symbol-sizes))) + (symbol-table-bytevector (car symbol-tables)) + (symbol-string-table (cdr symbol-tables)) + (initial-str-offset (write-initial-data symbol-string-table opts))) + + (write-symbol-entry! symbol-table-bytevector 0 opts (make-symbol-entry 0 0 0 0 0 0)) + + (process-table symbol-table symbol-table-bytevector symbol-string-table opts initial-str-offset symbol-addresses))) + +(define* (create-dynsym-and-dynstr dynamic-symbol-addresses + #:optional (label-positions '()) (options '())) + (define (merge-options default-options custom-options) + (append custom-options default-options)) + + (define (create-symbol-table addresses labels) + (let ((table (make-symbol-table))) + (add-symbols-to-table! table addresses add-symbol!) + (add-symbols-to-table! table (or labels '()) add-label-symbol!) + table)) + + (define (calculate-table-size symbol-table opts) + (let* ((symbol-entry-size (assoc-ref opts 'symbol-entry-size)) + (symbol-count (+ (hash-count (const #t) symbol-table) 1)) + (table-size (* symbol-count symbol-entry-size)) + (string-table-size (calculate-string-table-size symbol-table))) + (cons table-size string-table-size))) + + (define (initialize-table table-size string-table-size) + (cons (make-bytevector table-size 0) + (make-bytevector string-table-size 0))) + + (define (process-table symbol-table table string-table opts initial-str-offset dynamic-symbol-addresses) + (let* ((symbols (hash-map->list cons symbol-table)) + (sorted-symbols + (sort symbols + (lambda (a b) + (let ((name-a (symbol->string (car a))) + (name-b (symbol->string (car b))) + (index-a (list-index (lambda (addr) (equal? (car addr) (car a))) dynamic-symbol-addresses)) + (index-b (list-index (lambda (addr) (equal? (car addr) (car b))) dynamic-symbol-addresses))) + (cond + ;; Both local or both global, sort by original order + ((eq? (string-contains name-a "@LOCAL") + (string-contains name-b "@LOCAL")) + (cond + ((and index-a index-b) (< index-a index-b)) + (index-a #t) + (index-b #f) + (else (stringstring (car symbol))) + (value (cdr symbol)) + (address (car value)) + (is-function (cdr value)) + (is-local (string-contains name "@LOCAL")) + (clean-name (if is-local + (string-trim-suffix name "@LOCAL") + name)) + (name-bytes (string->utf8 clean-name)) + (entry (make-symbol-entry str-offset address + (logior (ash (if is-local + (assoc-ref opts 'stb-local) + (assoc-ref opts 'stb-global)) + 4) + (if is-function + (assoc-ref opts 'stt-notype) + (assoc-ref opts 'stt-notype))) + 0 + (if is-function + (assoc-ref opts 'shn-text) + (assoc-ref opts 'shn-data)) + (if is-function 0 0))) + (entry-offset (* index (assoc-ref opts 'symbol-entry-size)))) + (bytevector-copy! name-bytes 0 string-table str-offset (bytevector-length name-bytes)) + (bytevector-u8-set! string-table (+ str-offset (bytevector-length name-bytes)) 0) + (write-symbol-entry! table entry-offset opts entry) + (loop (cdr symbols) + (+ index 1) + (+ str-offset (bytevector-length name-bytes) (assoc-ref opts 'null-terminator-size)))))))) + + (let* ((default-options '((symbol-entry-size . 24) + (st-name-offset . 0) + (st-info-offset . 4) + (st-other-offset . 5) + (st-shndx-offset . 6) + (st-value-offset . 8) + (st-size-offset . 16) + (null-terminator-size . 1) + (initial-string-offset . 1) + (stt-notype . 0) + (stt-object . 1) + (stt-func . 2) + (stb-global . 1) + (stb-local . 0) + (shn-data . 11) + (shn-text . 6))) + (opts (merge-options default-options options)) + (dynamic-symbol-table (create-symbol-table dynamic-symbol-addresses label-positions)) + (dynamic-sizes (calculate-table-size dynamic-symbol-table opts)) + (dynamic-tables (initialize-table (car dynamic-sizes) (cdr dynamic-sizes))) + (dynamic-table-bytevector (car dynamic-tables)) + (dynamic-string-table (cdr dynamic-tables)) + (dynamic-initial-str-offset (write-initial-data dynamic-string-table opts))) + + (write-symbol-entry! dynamic-table-bytevector 0 opts (make-symbol-entry 0 0 0 0 0 0)) + + (process-table dynamic-symbol-table dynamic-table-bytevector dynamic-string-table opts dynamic-initial-str-offset dynamic-symbol-addresses))) + +(define (write-symbol-entry! table entry-offset opts entry) + (define (write-field offset size value) + (case size + ((1) (bytevector-u8-set! table (+ entry-offset offset) value)) + ((2) (bytevector-u16-set! table (+ entry-offset offset) value (endianness little))) + ((4) (bytevector-u32-set! table (+ entry-offset offset) value (endianness little))) + ((8) (bytevector-u64-set! table (+ entry-offset offset) value (endianness little))))) + + (write-field (assoc-ref opts 'st-name-offset) 4 (symbol-entry-name entry)) + (write-field (assoc-ref opts 'st-info-offset) 1 (symbol-entry-info entry)) + (write-field (assoc-ref opts 'st-other-offset) 1 (symbol-entry-other entry)) + (write-field (assoc-ref opts 'st-shndx-offset) 2 (symbol-entry-shndx entry)) + (write-field (assoc-ref opts 'st-value-offset) 8 (symbol-entry-address entry)) + (write-field (assoc-ref opts 'st-size-offset) 8 (symbol-entry-size entry))) + +(define (ash n k) + (if (>= k 0) + (* n (expt 2 k)) ; Left shift + (quotient n (expt 2 (- k))))) ; Right shift + +(define (arithmetic-shift n k) + (if (>= k 0) + (ash n k) ; Left shift + (let ((m (ash 1 (- k)))) ; Right shift + (if (>= n 0) + (quotient n m) + (- (quotient (+ n 1) m) 1))))) + +(define* (create-hash-section dynsym-table dynstr-table #:optional (options '())) + (let* ((opts (append options '((hash-header-size . 8) + (hash-entry-size . 4) + (symbol-entry-size . 24)))) + (symbol-count (/ (bytevector-length dynsym-table) (assoc-ref opts 'symbol-entry-size))) + (nbucket (next-prime (max 1 (min symbol-count 3)))) ; Adjust bucket count + (nchain symbol-count) + (hash-size (+ (assoc-ref opts 'hash-header-size) + (* (assoc-ref opts 'hash-entry-size) (+ nbucket nchain)))) + (hash-section (make-bytevector hash-size 0)) + (buckets (make-vector nbucket 0)) + (chains (make-vector nchain 0))) + + ; Write number of buckets and chains + (bytevector-u32-set! hash-section 0 nbucket (endianness little)) + (bytevector-u32-set! hash-section 4 nchain (endianness little)) + + ; Hash each symbol and add to appropriate bucket + (let loop ((i 1)) ; Start from 1 to skip the null symbol + (when (< i symbol-count) + (let* ((name (get-symbol-name dynsym-table dynstr-table i)) + (hash (elf-hash name)) + (bucket (modulo hash nbucket)) + (chain-index (vector-ref buckets bucket))) + (if (= chain-index 0) + (vector-set! buckets bucket i) + (let chain-loop ((prev chain-index)) + (if (= (vector-ref chains prev) 0) + (vector-set! chains prev i) + (chain-loop (vector-ref chains prev))))) + (vector-set! chains i 0)) + (loop (+ i 1)))) + + ; Write bucket array + (let loop ((i 0)) + (when (< i nbucket) + (bytevector-u32-set! hash-section + (+ (assoc-ref opts 'hash-header-size) + (* (assoc-ref opts 'hash-entry-size) i)) + (vector-ref buckets i) + (endianness little)) + (loop (+ i 1)))) + + ; Write chain array + (let loop ((i 0)) + (when (< i nchain) + (bytevector-u32-set! hash-section + (+ (assoc-ref opts 'hash-header-size) + (* (assoc-ref opts 'hash-entry-size) nbucket) + (* (assoc-ref opts 'hash-entry-size) i)) + (vector-ref chains i) + (endianness little)) + (loop (+ i 1)))) + + hash-section)) + +; Helper function to get symbol name from dynsym and dynstr tables +(define (get-symbol-name dynsym-table dynstr-table index) + (let* ((entry-offset (* index 24)) ; 24-byte entries in dynsym + (name-offset (bytevector-u32-ref dynsym-table entry-offset (endianness little)))) + (let loop ((i 0)) + (if (zero? (bytevector-u8-ref dynstr-table (+ name-offset i))) + (utf8->string (bytevector-slice dynstr-table name-offset (+ name-offset i))) + (loop (+ i 1)))))) + +(define (elf-hash name) + (let loop ((h 0) (chars (string->list name))) + (if (null? chars) + h + (let* ((h (logand #xffffffff (+ (arithmetic-shift h 4) (char->integer (car chars))))) + (g (logand h #xf0000000))) + (loop (logand #xffffffff (logxor (logand h (lognot g)) (arithmetic-shift g -24))) + (cdr chars)))))) + +; Helper function to find the next prime number (unchanged) +(define (next-prime n) + (let loop ((i (if (even? n) (+ n 1) n))) + (if (prime? i) + i + (loop (+ i 2))))) + +; Helper function to check if a number is prime (unchanged) +(define (prime? n) + (if (< n 2) + #f + (let loop ((i 2)) + (cond ((> (* i i) n) #t) + ((zero? (modulo n i)) #f) + (else (loop (+ i 1))))))) + +; Helper function to slice a bytevector (unchanged) +(define (bytevector-slice bv start end) + (let* ((length (- end start)) + (result (make-bytevector length))) + (bytevector-copy! bv start result 0 length) + result)) diff --git a/compiler/utils.scm b/compiler/utils.scm new file mode 100644 index 0000000..2642bdf --- /dev/null +++ b/compiler/utils.scm @@ -0,0 +1,38 @@ +;;;; Copyright (C) 2023 Vouivre Digital Corporation +;;;; +;;;; This file is part of Vouivre. +;;;; +;;;; Vouivre is free software: you can redistribute it and/or +;;;; modify it under the terms of the GNU General Public +;;;; License as published by the Free Software Foundation, either +;;;; version 3 of the License, or (at your option) any later version. +;;;; +;;;; Vouivre is distributed in the hope that it will be useful, +;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;;; General Public License for more details. +;;;; +;;;; You should have received a copy of the GNU General Public +;;;; License along with Vouivre. If not, see . + + + +(define-module (utils) + #:use-module (rnrs bytevectors) + #:export (align-to + bytevector-append)) + +(define (align-to value alignment) + (* (ceiling (/ value alignment)) alignment)) + +; Add this definition near the top of the file, after the module definition +(define (bytevector-append . bvs) + (let* ((total-length (apply + (map bytevector-length bvs))) + (result (make-bytevector total-length))) + (let loop ((offset 0) + (bvs bvs)) + (if (null? bvs) + result + (let ((bv (car bvs))) + (bytevector-copy! bv 0 result offset (bytevector-length bv)) + (loop (+ offset (bytevector-length bv)) (cdr bvs)))))))