From 405c4f104975e5456ce6b28ad31fd996d3f4b657 Mon Sep 17 00:00:00 2001 From: Aleksey Veresov Date: Fri, 16 Oct 2020 12:45:33 +0300 Subject: Initial --- Makefile | 12 + ia32/arithmetic.sts | 103 +++++++++ ia32/branch.sts | 19 ++ ia32/core.sts | 26 +++ ia32/elf/begin.sts | 127 ++++++++++ ia32/elf/end.sts | 21 ++ ia32/ia32.sts | 7 + ia32/macro.sts | 50 ++++ ia32/memory.sts | 15 ++ ia32/stack.sts | 25 ++ ia32/syscall.sts | 50 ++++ main.sts | 26 +++ smack.c | 579 ++++++++++++++++++++++++++++++++++++++++++++++ strans.c | 652 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 14 files changed, 1712 insertions(+) create mode 100644 Makefile create mode 100644 ia32/arithmetic.sts create mode 100644 ia32/branch.sts create mode 100644 ia32/core.sts create mode 100644 ia32/elf/begin.sts create mode 100644 ia32/elf/end.sts create mode 100644 ia32/ia32.sts create mode 100644 ia32/macro.sts create mode 100644 ia32/memory.sts create mode 100644 ia32/stack.sts create mode 100644 ia32/syscall.sts create mode 100644 main.sts create mode 100644 smack.c create mode 100644 strans.c diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..fc94f79 --- /dev/null +++ b/Makefile @@ -0,0 +1,12 @@ +cryptrobber: main.sts strans smack + ./smack $< | ./strans > $@ + chmod 0755 $@ + +smack: smack.c + gcc $< -o $@ +strans: strans.c + gcc $< -o $@ + +.PHONY: clean +clean: + rm -f smack strans cryptrobber diff --git a/ia32/arithmetic.sts b/ia32/arithmetic.sts new file mode 100644 index 0000000..c9b4bf8 --- /dev/null +++ b/ia32/arithmetic.sts @@ -0,0 +1,103 @@ +defasm + +; pop eax +# 58 ?' +; add [esp], eax +# 01 ?' # 04 ?' # 24 ?' +next + +defasm - +; pop eax +# 58 ?' +; sub [esp], eax +# 29 ?' # 04 ?' # 24 ?' +next + +defasm / +; pop ebx +# 5b ?' +; pop eax +# 58 ?' +; xor edx, edx +# 31 ?' # d2 ?' +; div ebx +# f7 ?' # f3 ?' +; push edx +# 52 ?' +; push eax +# 50 ?' +next + +defasm = +; pop ebx +# 5b ?' +; pop eax +# 58 ?' +; cmp eax, ebx +# 39 ?' # d8 ?' +; sete al +# 0f ?' # 94 ?' # c0 ?' +; movzx eax, al +# 0f ?' # b6 ?' # c0 ?' +; push eax +# 50 ?' +next + +defasm != +; pop ebx +# 5b ?' +; pop eax +# 58 ?' +; cmp eax, ebx +# 39 ?' # d8 ?' +; sete al +# 0f ?' # 95 ?' # c0 ?' +; movzx eax, al +# 0f ?' # b6 ?' # c0 ?' +; push eax +# 50 ?' +next + +defasm != +; pop ebx +# 5b ?' +; pop eax +# 58 ?' +; cmp eax, ebx +# 39 ?' # d8 ?' +; sete al +# 0f ?' # 95 ?' # c0 ?' +; movzx eax, al +# 0f ?' # b6 ?' # c0 ?' +; push eax +# 50 ?' +next + +defasm < +; pop ebx +# 5b ?' +; pop eax +# 58 ?' +; cmp eax, ebx +# 39 ?' # d8 ?' +; setb al +# 0f ?' # 92 ?' # c0 ?' +; movzx eax, al +# 0f ?' # b6 ?' # c0 ?' +; push eax +# 50 ?' +next + +defasm > +; pop ebx +# 5b ?' +; pop eax +# 58 ?' +; cmp eax, ebx +# 39 ?' # d8 ?' +; seta al +# 0f ?' # 97 ?' # c0 ?' +; movzx eax, al +# 0f ?' # b6 ?' # c0 ?' +; push eax +# 50 ?' +next diff --git a/ia32/branch.sts b/ia32/branch.sts new file mode 100644 index 0000000..bedfbba --- /dev/null +++ b/ia32/branch.sts @@ -0,0 +1,19 @@ +defasm branch +; mov esi, [esi] +# 8b ?' # 36 ?' +; go next +next + +defasm 0branch +; pop eax +# 58 ?' +; add esi, 4 +# 83 ?' # c6 ?' # 4 ?' +; cmp eax, 0 +# 83 ?' # f8 ?' # 00 ?' +; jne +3 +# 75 ?' # 3 ?' +; mov esi, [esi - 4] +# 8b ?' # 76 ?' # fc ?' +; go next +next diff --git a/ia32/core.sts b/ia32/core.sts new file mode 100644 index 0000000..fd2d062 --- /dev/null +++ b/ia32/core.sts @@ -0,0 +1,26 @@ +defasm docol +; sub ebp, 4 +# 83 ?' # ed ?' # 4 ?' +; mov [ebp], esi +# 89 ?' # 75 ?' # 0 ?' +; add eax, 4 +# 83 ?' # c0 ?' # 4 ?' +; mov esi, eax +# 89 ?' # c6 ?' +; go next +# ad ?' # ff ?' # 20 ?' +next + +defasm exit +; mov esi, [ebp] +# 8b ?' # 75 ?' # 0 ?' +; add ebp, 4 +# 83 ?' # c5 ?' # 4 ?' +next + +defasm lit +; lodsd +# ad ?' +; push eax +# 50 ?' +next diff --git a/ia32/elf/begin.sts b/ia32/elf/begin.sts new file mode 100644 index 0000000..41c1efc --- /dev/null +++ b/ia32/elf/begin.sts @@ -0,0 +1,127 @@ +?create ?? _code_segment +_code_segment ?choose + +# 08048000 ?org +# 4 ?size +# 0 ?endianness + +?create ?? _data_segment +_data_segment ?choose + +# 09048000 ?org +# 4 ?size +# 0 ?endianness + +_code_segment ?choose + +; ELF header +; EI_MAG - ELF magic number +# 7f ?' # 45 ?' # 4c ?' # 46 ?' +; EI_CLASS - 32-bit format +# 1 ?' +; EI_DATA - little-endiann +# 1 ?' +; EI_VERSION - current ELF header version +# 1 ?' +; EI_OSABI - Linux ABI +# 3 ?' +; EI_ABIVERSION - none +# 0 ?' +; EI_PAD - padding +# 7 ?res +; Executable file type +# 2 ?2. +; IA-32 architecture +# 3 ?2. +; ELF version +# 1 ?4. +; Entry point +?$ ?? _elf_entry +# 0 ?4. +; Program headers offset +# 34 ?4. +; Section headers offset +# 0 ?4. +; Extra flags +# 0 ?4. +; ELF header size +# 34 ?2. +; Program header size +# 20 ?2. +; Number of program headers +# 2 ?2. +; Section header size +# 28 ?2. +; Number of section headers +# 0 ?2. +; Section name section index +# 0 ?2. + +; ELF CODE PROGRAM HEADER +; Segment type +# 1 ?4. +; Segment offset +# 0 ?4. +; Segment virtual address +?$$ ?4. +; Segment physical address +?$$ ?4. +; Segment file size +?$ ?? _code_segment_file_size +# 0 ?4. +; Segment memory size +?$ ?? _code_segment_memory_size +# 0 ?4. +; Segment flags +# 5 ?4. +; Segment alignment +# 1000 ?4. + +; ELF DATA PROGRAM HEADER +; Segment type +# 1 ?4. +; Segment offset +?$ ?? _data_segment_offset +# 0 ?4. +; Segment virtual address +_data_segment ?choose +?$$ +_code_segment ?choose +?4. +; Segment physical address +_data_segment ?choose +?$$ +_code_segment ?choose +?4. +; Segment file size +?$ ?? _data_segment_file_size +# 0 ?4. +; Segment memory size +?$ ?? _data_segment_memory_size +# 0 ?4. +; Segment flags +# 6 ?4. +; Segment alignment +# 1000 ?4. + +; Return stack +_data_segment ?choose +1000 ?res +?$ ?dup ?? _return_stack +_code_segment ?choose + +; PROGRAM ENTRY CODE +?$ _elf_entry ?! +; mov ebp, _return_stack +# bd ?' +_return_stack ?. +; mov esi, _threaded_code_entry +# be ?' +?$ ?? _threaded_code_entry +# 0 ?. +; lodsd +# ad ?' +; jmp dword [eax] +# ff ?' # 20 ?' + +module ia32/ia32.sts diff --git a/ia32/elf/end.sts b/ia32/elf/end.sts new file mode 100644 index 0000000..7c80eec --- /dev/null +++ b/ia32/elf/end.sts @@ -0,0 +1,21 @@ +; Code segment alignment +# 1000 ?$ # 1000 ?mod ?- ?dup ?res + +; Code segment ELF sizes +?$ ?$$ ?- _code_segment_file_size ?! +?$ ?$$ ?- _code_segment_memory_size ?! + +; Data segment ELF offset +?$ ?$$ ?- +_code_segment ?choose +_data_segment_offset ?! + +; Data segment ELF sizes +_data_segment ?choose +?$ ?$$ ?- +_code_segment ?choose +_data_segment_file_size ?! +_data_segment ?choose +?$ ?$$ ?- +_code_segment ?choose +_data_segment_memory_size ?! diff --git a/ia32/ia32.sts b/ia32/ia32.sts new file mode 100644 index 0000000..bedec54 --- /dev/null +++ b/ia32/ia32.sts @@ -0,0 +1,7 @@ +module ia32/macro.sts +module ia32/core.sts +module ia32/stack.sts +module ia32/branch.sts +module ia32/memory.sts +module ia32/arithmetic.sts +module ia32/syscall.sts diff --git a/ia32/macro.sts b/ia32/macro.sts new file mode 100644 index 0000000..befcccb --- /dev/null +++ b/ia32/macro.sts @@ -0,0 +1,50 @@ +[ next 0 + # ad ?' # ff ?' # 20 ?' +] + +[ defasm 1 + [ %1% 0 + _%0% ?. + \] + ?$ ?? _%1% + ?$ # 4 ?+ ?. +] + +[ defword 1 + [ %1% 0 + _%0% ?. + \] + ?$ ?? _%1% + _docol # 4 ?+ ?. +] + +[ defarr 2 + _data_segment ?choose + ?$ ?? _%1%_arr + # %2% # 4 ?* ?res + _code_segment ?choose + defasm %1% + ; push _%1%_arr + # 68 ?' _%1%_arr ?. + next +] + +[ defvar 1 + defarr %1% 1 +] + +[ literal 0 + lit # %1% ?. +] + +[ set_entry 0 + ?$ _threaded_code_entry ?! +] + +[ if 0 + 0branch ?$ # 0 ?. +] + +[ fi 0 + ?$ ?swap ?! +] diff --git a/ia32/memory.sts b/ia32/memory.sts new file mode 100644 index 0000000..f176c13 --- /dev/null +++ b/ia32/memory.sts @@ -0,0 +1,15 @@ +defasm @ +; pop eax +# 58 ?' +; push dword [eax] +# ff ?' # 30 ?' +next + +defasm ! +; pop ebx +# 5b ?' +; pop eax +# 58 ?' +; mov [ebx], eax +# 89 ?' # 03 ?' +next diff --git a/ia32/stack.sts b/ia32/stack.sts new file mode 100644 index 0000000..a8ac749 --- /dev/null +++ b/ia32/stack.sts @@ -0,0 +1,25 @@ +defasm drop +; pop eax +# 58 ?' +next + +defasm dup +; push dword [esp] +# ff ?' # 34 ?' # 24 ?' +next + +defasm over +; push dword [esp + 4] +# ff ?' # 74 ?' # 24 ?' # 4 ?' +next + +defasm swap +; pop eax +# 58 ?' +; pop ebx +# 5b ?' +; push eax +# 50 ?' +; push ebx +# 53 ?' +next diff --git a/ia32/syscall.sts b/ia32/syscall.sts new file mode 100644 index 0000000..58a6ffd --- /dev/null +++ b/ia32/syscall.sts @@ -0,0 +1,50 @@ +defasm quit +; mov eax, 1 +# b8 ?' # 1 ?. +; xor ebx, ebx +# 31 ?' # db ?' +; int 0x80 +# cd ?' # 80 ?' + +_data_segment ?choose +?$ ?? _io_buffer +# 0 ?' +_code_segment ?choose + +defasm key +; mov eax, 3 +# b8 ?' # 3 ?. +; xor ebx, ebx +# 31 ?' # db ?' +; mov ecx, _io_buffer +# b9 ?' _io_buffer ?. +; mov edx, 1 +# ba ?' # 1 ?. +; int 0x80 +# cd ?' # 80 ?' +; xor ebx, ebx +# 31 ?' # db ?' +; mov bl, [_io_buffer] +# 8a ?' # 1d ?' _io_buffer ?. +; push ebx +# 53 ?' +; push eax +# 50 ?' +next + +defasm emit +; pop eax +# 58 ?' +; mov [_io_buffer], al +# a2 ?' _io_buffer ?. +; mov eax, 4 +# b8 ?' # 4 ?. +; mov ebx, 1 +# bb ?' # 1 ?. +; mov ecx, _io_buffer +# b9 ?' _io_buffer ?. +; mov edx, 1 +# ba ?' # 1 ?. +; int 0x80 +# cd ?' # 80 ?' +next diff --git a/main.sts b/main.sts new file mode 100644 index 0000000..7cbca9f --- /dev/null +++ b/main.sts @@ -0,0 +1,26 @@ +module ia32/elf/begin.sts + +defword myword +key if + emit +fi +drop +exit + +defarr my_array 3 +defvar my_variable + +set_entry +31 my_variable ! +my_variable @ emit +32 my_array ! +33 my_array 1 + ! +34 my_array 2 + ! +my_array @ emit +my_array 1 + @ emit +my_array 2 + @ emit +35 36 swap emit emit +myword +quit + +module ia32/elf/end.sts diff --git a/smack.c b/smack.c new file mode 100644 index 0000000..f6f9f30 --- /dev/null +++ b/smack.c @@ -0,0 +1,579 @@ +/* + Simple Macro Generator + created by exegete +*/ + +#include +#include +#include + +struct macro +{ + const char *lexem; + unsigned long long param_count; + const char *text; + struct macro *next; +}; + +struct macro_parameter +{ + char *lexem; + struct macro_parameter *next; +}; + +struct input_stream +{ + FILE *fd; + const struct macro *macro; + unsigned long long text_offset; + struct macro_parameter *param_list; + struct input_stream *next; +}; + +struct module +{ + const char *file_name; + struct module *next; +}; + +#define ERROR_STATUS_ARG 1 +#define ERROR_STATUS_OPEN 2 +#define ERROR_STATUS_CHDIR 3 +#define ERROR_STATUS_MALLOC 4 +#define ERROR_STATUS_EMPTY_STREAM_LIST 5 +#define ERROR_STATUS_INVALID_STREAM 6 +#define ERROR_STATUS_INVALID_MACRO 7 +#define ERROR_STATUS_INVALID_PARAMETER 8 +#define ERROR_STATUS_INVALID_CALL 9 +#define ERROR_STATUS_INVALID_INCLUDE 10 +#define ERROR_STATUS_INVALID_SHIELD 11 + +const char *error_msgs[] = +{ + "Please, specify input file", + "Unable to open file: ", + "Unable to change working directory: ", + "Unable to allocate memory", + "Input stream list is empty", + "Input stream is invalid", + "Macro definition is invalid", + "Macro parameter is invalid", + "Macro call is invalid", + "Invalid include/module argument", + "Invalid '#' argument" +}; + +void exit_error(int status, const char *msg) +{ + if(msg) + fprintf(stderr, "Error: %s%s\n", error_msgs[status - 1], msg); + else + fprintf(stderr, "Error: %s\n", error_msgs[status - 1]); + exit(status); +} + +void add_input_stream(struct input_stream **stream_list, + FILE *fd, const struct macro *macro) +{ + struct input_stream *head = *stream_list; + *stream_list = malloc(sizeof(struct input_stream)); + if(!*stream_list) + exit_error(ERROR_STATUS_MALLOC, NULL); + (*stream_list)->fd = fd; + (*stream_list)->macro = macro; + (*stream_list)->text_offset = 0; + (*stream_list)->param_list = NULL; + (*stream_list)->next = head; +} + +int delete_input_stream(struct input_stream **stream_list) +{ + struct input_stream *head = *stream_list; + if(!head) + exit_error(ERROR_STATUS_EMPTY_STREAM_LIST, NULL); + *stream_list = (*stream_list)->next; + if(head->fd) + fclose(head->fd); + while(head->param_list) + { + struct macro_parameter *param_head = head->param_list; + head->param_list = param_head->next; + free(param_head->lexem); + free(param_head); + } + free(head); + return *stream_list != NULL; +} + +void add_input_stream_parameter(struct input_stream *stream_list, char *lexem) +{ + struct macro_parameter **param = &stream_list->param_list; + while(*param) + param = &(*param)->next; + *param = malloc(sizeof(struct macro_parameter)); + if(!*param) + exit_error(ERROR_STATUS_MALLOC, NULL); + (*param)->lexem = lexem; + (*param)->next = NULL; +} + +const char *get_input_stream_parameter(struct input_stream *stream_list, + unsigned long long chosen) +{ + struct macro_parameter *param = stream_list->param_list; + if(!chosen) + return stream_list->macro->lexem; + chosen--; + for(; param && chosen > 0; chosen--) + param = param->next; + if(!param) + exit_error(ERROR_STATUS_INVALID_CALL, NULL); + return param->lexem; +} + +#define BUFFER_INITIAL_SIZE 4 + +int double_buffer(char **buffer, unsigned long long *buffer_size) +{ + char *tmp_pointer = *buffer; + unsigned long long tmp_size = *buffer_size; + + if(*buffer_size == 0) + *buffer_size = BUFFER_INITIAL_SIZE; + else + *buffer_size *= 2; + + *buffer = malloc(*buffer_size); + if(!*buffer) + return 0; + + for(; tmp_size > 0; tmp_size--) + (*buffer)[tmp_size - 1] = tmp_pointer[tmp_size - 1]; + + free(tmp_pointer); + return 1; +} + +int read_symb_from_text(struct input_stream *stream_list) +{ + char symb; + if(!stream_list->macro) + exit_error(ERROR_STATUS_INVALID_STREAM, NULL); + if(!stream_list->macro->text) + exit_error(ERROR_STATUS_INVALID_STREAM, NULL); + symb = stream_list->macro->text[stream_list->text_offset]; + if(!symb) + return EOF; + stream_list->text_offset++; + return symb; +} + +#define COMMENT_SYMB ';' + +int read_real_symb(struct input_stream *stream_list) +{ + int symb; + + if(!stream_list) + exit_error(ERROR_STATUS_EMPTY_STREAM_LIST, NULL); + + if(stream_list->fd) + symb = fgetc(stream_list->fd); + else + symb = read_symb_from_text(stream_list); + + if(symb == COMMENT_SYMB) + { + for(; symb != '\n' && symb != EOF; + symb = fgetc(stream_list->fd)) + {} + } + return symb; +} + +#define SHIELD_SYMB '\\' + +int read_symb(struct input_stream *stream_list, int *shield) +{ + int symb = read_real_symb(stream_list); + if(shield) + { + *shield = 0; + if(symb == SHIELD_SYMB) + { + symb = read_real_symb(stream_list); + *shield = 1; + } + } + return symb; +} + +int is_symb_hex(char symb) +{ + if(symb >= '0' && symb <= '9') + return symb - '0'; + else + if(symb >= 'A' && symb <= 'F') + return symb - 'A' + 10; + else + if(symb >= 'a' && symb <= 'f') + return symb - 'a' + 10; + else + return -1; +} + +int is_lexem_hex(const char *lexem, unsigned long long *dest) +{ + int sign = 0; + unsigned long long hex = 0; + if(*lexem == '-' && *(lexem + 1)) + { + sign = 1; + lexem++; + } + + for(; *lexem; lexem++) + { + int num = is_symb_hex(*lexem); + if(num != -1) + hex = (hex << 4) + num; + else + return 0; + } + hex = sign ? ~hex + 1 : hex; + if(dest) + *dest = hex; + return 1; +} + +int compare_lexems(const char *lexem_one, const char *lexem_two) +{ + for(; *lexem_one && *lexem_two; lexem_one++, lexem_two++) + if(*lexem_one != *lexem_two) + return 0; + return *lexem_one == *lexem_two; +} + +char *copy_lexem(const char *lexem) +{ + char *buffer = NULL; + unsigned long long buffer_size = 0; + unsigned long long index; + + for(index = 0;; index++) + { + if(index == buffer_size) + if(!double_buffer(&buffer, &buffer_size)) + exit_error(ERROR_STATUS_MALLOC, NULL); + buffer[index] = lexem[index]; + if(!lexem[index]) + break; + } + return buffer; +} + +#define EVAL_SYMB '%' + +unsigned long long get_eval_end(const char *lexem) +{ + unsigned long long counter = 0; + + for(; *lexem && *lexem != EVAL_SYMB; lexem++, counter++) + {} + if(!*lexem) + exit_error(ERROR_STATUS_INVALID_CALL, NULL); + return counter; +} + +char *param_eval(struct input_stream *stream_list, char *lexem) +{ + char *input_lexem = lexem; + char *buffer = NULL; + unsigned long long buffer_size = 0; + const char *param = NULL; + unsigned long long index = 0; + + for(;;) + { + char symb = param && *param ? *param : *lexem++; + param = !param || !*param ? NULL : param + 1; + if(symb == EVAL_SYMB) + { + unsigned long long param_offset = get_eval_end(lexem); + unsigned long long param_num; + *(lexem + param_offset) = 0; + if(!is_lexem_hex(lexem, ¶m_num)) + exit_error(ERROR_STATUS_INVALID_CALL, NULL); + lexem += param_offset + 1; + param = get_input_stream_parameter(stream_list, + param_num); + continue; + } + if(index == buffer_size) + if(!double_buffer(&buffer, &buffer_size)) + exit_error(ERROR_STATUS_MALLOC, NULL); + buffer[index] = symb; + if(!symb) + break; + index++; + } + free(input_lexem); + return buffer; +} + +int is_separator(int symb) +{ + return symb == ' ' || symb == '\n' || symb == '\t' || symb == '\r'; +} + +char *read_lexem(struct input_stream *stream_list) +{ + char *buffer = NULL; + unsigned long long buffer_size = 0; + int symb; + int shield; + unsigned long long index; + + for(;;) + { + symb = read_symb(stream_list, &shield); + if(is_separator(symb) && !shield) + continue; + if(symb == EOF) + return NULL; + break; + } + + for(index = 0;; index++) + { + if(index == buffer_size) + if(!double_buffer(&buffer, &buffer_size)) + exit_error(ERROR_STATUS_MALLOC, NULL); + if(symb == EOF || (is_separator(symb) && !shield)) + { + buffer[index] = 0; + break; + } + buffer[index] = symb; + symb = read_symb(stream_list, &shield); + } + buffer = param_eval(stream_list, buffer); + return buffer; +} + +#define MACRO_END_SYMB ']' + +const char *read_macro_text(struct input_stream *stream_list) +{ + char *buffer = NULL; + unsigned long long buffer_size = 0; + unsigned long long index; + + for(index = 0;; index++) + { + int shield; + int symb = read_symb(stream_list, &shield); + if(index == buffer_size) + if(!double_buffer(&buffer, &buffer_size)) + exit_error(ERROR_STATUS_MALLOC, NULL); + if(symb == EOF || (symb == MACRO_END_SYMB && !shield)) + { + buffer[index] = 0; + break; + } + buffer[index] = symb; + } + return buffer; +} + +void create_macro(struct input_stream *stream_list, struct macro **macro_list) +{ + char *param_count_lexem; + struct macro *head = *macro_list; + *macro_list = malloc(sizeof(struct macro)); + if(!*macro_list) + exit_error(ERROR_STATUS_MALLOC, NULL); + (*macro_list)->lexem = read_lexem(stream_list); + if(!(*macro_list)->lexem) + exit_error(ERROR_STATUS_INVALID_MACRO, NULL); + param_count_lexem = read_lexem(stream_list); + if(!param_count_lexem || !is_lexem_hex(param_count_lexem, + &(*macro_list)->param_count)) + exit_error(ERROR_STATUS_INVALID_MACRO, NULL); + free(param_count_lexem); + (*macro_list)->text = read_macro_text(stream_list); + (*macro_list)->next = head; +} + +int find_module(struct module *module_list, const char *file_name) +{ + for(; module_list; module_list = module_list->next) + if(compare_lexems(module_list->file_name, file_name)) + return 1; + return 0; +} + +void add_module(struct module **module_list, const char *file_name) +{ + struct module *head = *module_list; + *module_list = malloc(sizeof(struct module)); + if(!*module_list) + exit_error(ERROR_STATUS_MALLOC, NULL); + (*module_list)->file_name = file_name; + (*module_list)->next = head; +} + +void include_file(struct input_stream **stream_list, + struct module **module_list) +{ + char *file_name = read_lexem(*stream_list); + FILE *fd; + if(!file_name) + exit_error(ERROR_STATUS_INVALID_INCLUDE, NULL); + if(module_list && find_module(*module_list, file_name)) + { + free(file_name); + return; + } + fd = fopen(file_name, "r"); + if(!fd) + exit_error(ERROR_STATUS_OPEN, file_name); + if(module_list) + add_module(module_list, file_name); + else + free(file_name); + add_input_stream(stream_list, fd, NULL); +} + +void push_parameter(struct input_stream *input_stream, + struct input_stream *actual_stream) +{ + char *lexem = read_lexem(input_stream); + if(!lexem) + exit_error(ERROR_STATUS_INVALID_PARAMETER, NULL); + add_input_stream_parameter(actual_stream, lexem); +} + +int find_macro(struct input_stream **stream_list, struct macro *macro_list, + const char *lexem) +{ + for(; macro_list; macro_list = macro_list->next) + { + if(compare_lexems(macro_list->lexem, lexem)) + { + struct input_stream *old_stream = *stream_list; + unsigned long long param_count = + macro_list->param_count; + add_input_stream(stream_list, NULL, macro_list); + for(; param_count > 0; param_count--) + push_parameter(old_stream, *stream_list); + return 1; + } + } + return 0; +} + +#define DIRECTORY_SEPARATOR_SYMB '/' + +unsigned long long find_last_dir_separator_pos(const char *file_name) +{ + const char *orig_file_name = file_name; + unsigned long long pos = 0; + for(; *file_name; file_name++) + if(*file_name == DIRECTORY_SEPARATOR_SYMB) + pos = file_name - orig_file_name; + return pos; +} + +void initialize_stream(struct input_stream **stream_list, + const char *file_name) +{ + char *dir_path = copy_lexem(file_name); + unsigned long long dir_sep_pos = find_last_dir_separator_pos(dir_path); + FILE *fd = fopen(file_name, "r"); + if(!fd) + exit_error(ERROR_STATUS_OPEN, file_name); + add_input_stream(stream_list, fd, NULL); + if(dir_sep_pos) + { + dir_path[dir_sep_pos] = 0; + if(chdir(dir_path) == -1) + exit_error(ERROR_STATUS_CHDIR, NULL); + } + free(dir_path); +} + +void print_lexem(const char *lexem) +{ + printf("%s ", lexem); +} + +#define MACRO_SHIELD_LEXEM "#" +#define MACRO_START_LEXEM "[" +#define MACRO_INCLUDE_LEXEM "include" +#define MACRO_MODULE_LEXEM "module" +#define MACRO_LIT_LEXEM "literal" + +int main(int argc, char **argv) +{ + struct input_stream *stream_list = NULL; + struct macro *macro_list = NULL; + struct module *module_list = NULL; + char *lexem; + + if(argc <= 1) + exit_error(ERROR_STATUS_ARG, NULL); + else + initialize_stream(&stream_list, argv[1]); + + for(;; free(lexem)) + { + lexem = read_lexem(stream_list); + if(!lexem) + { + if(delete_input_stream(&stream_list)) + continue; + else + break; + } + if(compare_lexems(lexem, MACRO_SHIELD_LEXEM)) + { + free(lexem); + lexem = read_lexem(stream_list); + if(!lexem) + exit_error(ERROR_STATUS_INVALID_SHIELD, NULL); + print_lexem(lexem); + continue; + } + if(compare_lexems(lexem, MACRO_START_LEXEM)) + { + create_macro(stream_list, ¯o_list); + continue; + } + if(compare_lexems(lexem, MACRO_INCLUDE_LEXEM)) + { + include_file(&stream_list, NULL); + continue; + } + if(compare_lexems(lexem, MACRO_MODULE_LEXEM)) + { + include_file(&stream_list, &module_list); + continue; + } + if(is_lexem_hex(lexem, NULL)) + { + if(find_macro(&stream_list, macro_list, + MACRO_LIT_LEXEM)) + { + add_input_stream_parameter(stream_list, + copy_lexem(lexem)); + continue; + } + } + if(find_macro(&stream_list, macro_list, lexem)) + continue; + print_lexem(lexem); + } + + return 0; +} diff --git a/strans.c b/strans.c new file mode 100644 index 0000000..e3d834d --- /dev/null +++ b/strans.c @@ -0,0 +1,652 @@ +/* + Simple Stack Translator + created by exegete +*/ + +#include +#include + +struct stack_elem +{ + unsigned long long value; + struct stack_elem *next; +}; + +struct identifier +{ + const char *lexem; + unsigned long long value; + struct identifier *next; +}; + +struct segment +{ + char *buffer; + unsigned long long real_size; + unsigned long long size; + unsigned long long pointer_address; + unsigned long long base_address; + unsigned long long base_offset; + int data_size; + int data_endianness; + struct segment *next; +}; + +struct environment +{ + struct stack_elem *stack; + struct identifier *id_list; + struct segment *seg_list; + unsigned long long chosen_segment; +}; + +struct operator +{ + void (*func)(struct environment *env); + const char *lexem; +}; + +#define ERROR_STATUS_STACK_OVERFLOW 1 +#define ERROR_STATUS_STACK_EMPTY 2 +#define ERROR_STATUS_ID_LIST_OVERFLOW 3 +#define ERROR_STATUS_SEG_LIST_OVERFLOW 4 +#define ERROR_STATUS_INVALID_SEGMENT 5 +#define ERROR_STATUS_WORD_BUFFER_OVERFLOW 6 +#define ERROR_STATUS_LEXEM_OVERFLOW 7 +#define ERROR_STATUS_UNDEFINED 8 +#define ERROR_STATUS_INVALID_ACCESS 9 +#define ERROR_STATUS_INVALID_WORD_SIZE 10 +#define ERROR_STATUS_WORD_ENDIANNESS_INVALID 11 + +const char *error_msgs[] = +{ + "Stack overflow", + "Stack is empty", + "Identifier list overflow", + "Segment list overflow", + "Invalid chosen segment", + "Word buffer overflow", + "Lexem is too big", + "Undefined lexem: ", + "Invalid segment access", + "Invalid segment data size", + "Invalid segment data endianness" +}; + +void exit_error(int status, const char *lexem) +{ + if(lexem) + fprintf(stderr, "Error: %s%s\n", error_msgs[status - 1], + lexem); + else + fprintf(stderr, "Error: %s\n", error_msgs[status - 1]); + exit(status); +} + +unsigned long long create_segment(struct environment *env) +{ + struct segment **chosen_segment = &env->seg_list; + unsigned long long segment_counter = 0; + for(; *chosen_segment; chosen_segment = &(*chosen_segment)->next) + segment_counter++; + *chosen_segment = malloc(sizeof(struct segment)); + if(!*chosen_segment) + exit_error(ERROR_STATUS_SEG_LIST_OVERFLOW, NULL); + (*chosen_segment)->buffer = NULL; + (*chosen_segment)->real_size = 0; + (*chosen_segment)->size = 0; + (*chosen_segment)->pointer_address = 0; + (*chosen_segment)->base_address = 0; + (*chosen_segment)->base_offset = 0; + (*chosen_segment)->data_size = 1; + (*chosen_segment)->data_endianness = 0; + (*chosen_segment)->next = NULL; + return segment_counter; +} + +struct segment *get_segment(struct environment *env) +{ + struct segment *chosen_segment = env->seg_list; + unsigned long long segment_counter = env->chosen_segment; + while(chosen_segment && segment_counter--) + chosen_segment = chosen_segment->next; + if(!chosen_segment) + exit_error(ERROR_STATUS_INVALID_SEGMENT, NULL); + return chosen_segment; +} + +void initialize_environment(struct environment *env) +{ + env->stack = NULL; + env->id_list = NULL; + env->seg_list = NULL; + env->chosen_segment = 0; +} + +int is_separator(int symb) +{ + return symb == ' ' || symb == '\n' || symb == '\t' || symb == '\r'; +} + +#define BUFFER_INITIAL_SIZE 4 + +int double_buffer(char **buffer, unsigned long long *buffer_size) +{ + char *tmp_pointer = *buffer; + unsigned long long tmp_size = *buffer_size; + + if(*buffer_size == 0) + *buffer_size = BUFFER_INITIAL_SIZE; + else + *buffer_size *= 2; + + *buffer = malloc(*buffer_size); + if(!*buffer) + return 0; + + for(; tmp_size > 0; tmp_size--) + (*buffer)[tmp_size - 1] = tmp_pointer[tmp_size - 1]; + + free(tmp_pointer); + return 1; +} + +char *read_lexem() +{ + char *buffer = NULL; + unsigned long long buffer_size = 0; + int symb; + unsigned long long index; + + while(is_separator(symb = getchar())) + {} + + if(symb == EOF) + return NULL; + + for(index = 0;; index++) + { + if(index == buffer_size) + if(!double_buffer(&buffer, &buffer_size)) + exit_error(ERROR_STATUS_LEXEM_OVERFLOW, NULL); + if(symb == EOF || is_separator(symb)) + { + buffer[index] = 0; + break; + } + buffer[index] = symb; + symb = getchar(); + } + return buffer; +} + +void push_stack_elem(struct stack_elem **stack, unsigned long long value) +{ + struct stack_elem *head = *stack; + *stack = malloc(sizeof(struct stack_elem)); + if(!*stack) + exit_error(ERROR_STATUS_STACK_OVERFLOW, NULL); + (*stack)->value = value; + (*stack)->next = head; +} + +unsigned long long pop_stack_elem(struct stack_elem **stack) +{ + struct stack_elem *head = *stack; + unsigned long long value; + if(!head) + exit_error(ERROR_STATUS_STACK_EMPTY, NULL); + value = head->value; + *stack = head->next; + free(head); + return value; +} + +int is_symb_hex(char symb) +{ + if(symb >= '0' && symb <= '9') + return symb - '0'; + else + if(symb >= 'A' && symb <= 'F') + return symb - 'A' + 10; + else + if(symb >= 'a' && symb <= 'f') + return symb - 'a' + 10; + else + return -1; +} + +int is_lexem_hex(struct environment *env, const char *lexem) +{ + int sign = 0; + unsigned long long hex = 0; + if(*lexem == '-' && *(lexem + 1)) + { + sign = 1; + lexem++; + } + + for(; *lexem; lexem++) + { + int num = is_symb_hex(*lexem); + if(num != -1) + hex = (hex << 4) + num; + else + return 0; + } + hex = sign ? ~hex + 1 : hex; + push_stack_elem(&env->stack, hex); + return 1; +} + +int compare_lexems(const char *lexem_one, const char *lexem_two) +{ + for(; *lexem_one && *lexem_two; lexem_one++, lexem_two++) + if(*lexem_one != *lexem_two) + return 0; + return *lexem_one == *lexem_two; +} + +void add_identifier(struct identifier **id_list, const char *lexem, + unsigned long long value) +{ + struct identifier *head = *id_list; + *id_list = malloc(sizeof(struct identifier)); + if(!id_list) + exit_error(ERROR_STATUS_ID_LIST_OVERFLOW, NULL); + (*id_list)->lexem = lexem; + (*id_list)->value = value; + (*id_list)->next = head; +} + +int find_identifier(struct environment *env, const char *lexem) +{ + struct identifier *id_list = env->id_list; + for(; id_list; id_list = id_list->next) + { + if(compare_lexems(lexem, id_list->lexem)) + { + push_stack_elem(&env->stack, id_list->value); + return 1; + } + } + return 0; +} + +void segment_compile_byte(struct environment *env, char byte) +{ + unsigned long long index = get_segment(env)->size; + if(index == get_segment(env)->real_size) + if(!double_buffer(&get_segment(env)->buffer, + &get_segment(env)->real_size)) + exit_error(ERROR_STATUS_WORD_BUFFER_OVERFLOW, NULL); + get_segment(env)->buffer[index] = byte; + get_segment(env)->size++; + get_segment(env)->pointer_address++; +} + +void segment_compile(struct environment *env, unsigned long long value, + int size) +{ + int dir = get_segment(env)->data_endianness ? -1 : 1; + int index = get_segment(env)->data_endianness * (size - 1); + for(; index < size && index >= 0; index += dir) + segment_compile_byte(env, value >> index * 8); +} + +unsigned long long convert_address(struct environment *env, + unsigned long long address, int size) +{ + unsigned long long pointer_address = + get_segment(env)->pointer_address; + unsigned long long base_address = get_segment(env)->base_address; + unsigned long long base_offset = get_segment(env)->base_offset; + if(address < base_address || address + size > pointer_address) + exit_error(ERROR_STATUS_INVALID_ACCESS, NULL); + return address - base_address + base_offset; +} + +unsigned long long segment_read(struct environment *env, + unsigned long long address, int size) +{ + unsigned long long value = 0; + int dir = get_segment(env)->data_endianness ? 1 : -1; + int index = get_segment(env)->data_endianness ? 0 : (size - 1); + address = convert_address(env, address, size); + for(; index < size && index >= 0; index += dir) + value = (value << 8) + + (get_segment(env)->buffer[address + index] & 0xFF); + return value; +} + +void segment_write(struct environment *env, unsigned long long value, + unsigned long long address, int size) +{ + int dir = get_segment(env)->data_endianness ? -1 : 1; + int index = get_segment(env)->data_endianness * (size - 1); + address = convert_address(env, address, size); + for(; index < size && index >= 0; index += dir) + { + get_segment(env)->buffer[address + index] = value; + value >>= 8; + } +} + +void translator_drop(struct environment *env) +{ + pop_stack_elem(&env->stack); +} + +void translator_dup(struct environment *env) +{ + unsigned long long value = pop_stack_elem(&env->stack); + push_stack_elem(&env->stack, value); + push_stack_elem(&env->stack, value); +} + +void translator_over(struct environment *env) +{ + unsigned long long value_one = pop_stack_elem(&env->stack); + unsigned long long value_two = pop_stack_elem(&env->stack); + push_stack_elem(&env->stack, value_two); + push_stack_elem(&env->stack, value_one); + push_stack_elem(&env->stack, value_two); +} + +void translator_swap(struct environment *env) +{ + unsigned long long value_one = pop_stack_elem(&env->stack); + unsigned long long value_two = pop_stack_elem(&env->stack); + push_stack_elem(&env->stack, value_one); + push_stack_elem(&env->stack, value_two); +} + +void translator_add(struct environment *env) +{ + unsigned long long first = pop_stack_elem(&env->stack); + unsigned long long second = pop_stack_elem(&env->stack); + push_stack_elem(&env->stack, second + first); +} + +void translator_sub(struct environment *env) +{ + unsigned long long first = pop_stack_elem(&env->stack); + unsigned long long second = pop_stack_elem(&env->stack); + push_stack_elem(&env->stack, second - first); +} + +void translator_mul(struct environment *env) +{ + unsigned long long first = pop_stack_elem(&env->stack); + unsigned long long second = pop_stack_elem(&env->stack); + push_stack_elem(&env->stack, second * first); +} + +void translator_div(struct environment *env) +{ + unsigned long long first = pop_stack_elem(&env->stack); + unsigned long long second = pop_stack_elem(&env->stack); + push_stack_elem(&env->stack, second / first); +} + +void translator_mod(struct environment *env) +{ + unsigned long long first = pop_stack_elem(&env->stack); + unsigned long long second = pop_stack_elem(&env->stack); + push_stack_elem(&env->stack, second % first); +} + +void translator_define_identifier(struct environment *env) +{ + unsigned long long value = pop_stack_elem(&env->stack); + add_identifier(&env->id_list, read_lexem(), value); +} + +void translator_compile_one(struct environment *env) +{ + unsigned long long value = pop_stack_elem(&env->stack); + segment_compile(env, value, 1); +} + +void translator_compile_two(struct environment *env) +{ + unsigned long long value = pop_stack_elem(&env->stack); + segment_compile(env, value, 2); +} + +void translator_compile_four(struct environment *env) +{ + unsigned long long value = pop_stack_elem(&env->stack); + segment_compile(env, value, 4); +} + +void translator_compile_eight(struct environment *env) +{ + unsigned long long value = pop_stack_elem(&env->stack); + segment_compile(env, value, 8); +} + +void translator_compile(struct environment *env) +{ + unsigned long long value = pop_stack_elem(&env->stack); + segment_compile(env, value, get_segment(env)->data_size); +} + +void translator_reserve(struct environment *env) +{ + unsigned long long count = pop_stack_elem(&env->stack); + while(count--) + segment_compile(env, 0, 1); +} + +void translator_read_one(struct environment *env) +{ + unsigned long long value = segment_read(env, + pop_stack_elem(&env->stack), 1); + push_stack_elem(&env->stack, value); +} + +void translator_read_two(struct environment *env) +{ + unsigned long long value = segment_read(env, + pop_stack_elem(&env->stack), 2); + push_stack_elem(&env->stack, value); +} + +void translator_read_four(struct environment *env) +{ + unsigned long long value = segment_read(env, + pop_stack_elem(&env->stack), 4); + push_stack_elem(&env->stack, value); +} + +void translator_read_eight(struct environment *env) +{ + unsigned long long value = segment_read(env, + pop_stack_elem(&env->stack), 8); + push_stack_elem(&env->stack, value); +} + +void translator_read(struct environment *env) +{ + unsigned long long value = segment_read(env, + pop_stack_elem(&env->stack), get_segment(env)->data_size); + push_stack_elem(&env->stack, value); +} + +void translator_write_one(struct environment *env) +{ + unsigned long long address = pop_stack_elem(&env->stack); + unsigned long long value = pop_stack_elem(&env->stack); + segment_write(env, value, address, 1); +} + +void translator_write_two(struct environment *env) +{ + unsigned long long address = pop_stack_elem(&env->stack); + unsigned long long value = pop_stack_elem(&env->stack); + segment_write(env, value, address, 2); +} + +void translator_write_four(struct environment *env) +{ + unsigned long long address = pop_stack_elem(&env->stack); + unsigned long long value = pop_stack_elem(&env->stack); + segment_write(env, value, address, 4); +} + +void translator_write_eight(struct environment *env) +{ + unsigned long long address = pop_stack_elem(&env->stack); + unsigned long long value = pop_stack_elem(&env->stack); + segment_write(env, value, address, 8); +} + +void translator_write(struct environment *env) +{ + unsigned long long address = pop_stack_elem(&env->stack); + unsigned long long value = pop_stack_elem(&env->stack); + segment_write(env, value, address, get_segment(env)->data_size); +} + +void translator_create_segment(struct environment *env) +{ + push_stack_elem(&env->stack, create_segment(env)); +} + +void translator_choose_segment(struct environment *env) +{ + env->chosen_segment = pop_stack_elem(&env->stack); +} + +void translator_set_base(struct environment *env) +{ + get_segment(env)->base_address = pop_stack_elem(&env->stack); + get_segment(env)->pointer_address = get_segment(env)->base_address; + get_segment(env)->base_offset = get_segment(env)->size; +} + +#define WORD_SIZE_MAX 8 + +void translator_set_data_size(struct environment *env) +{ + int target_size = pop_stack_elem(&env->stack); + if(target_size <= 0 || target_size > WORD_SIZE_MAX) + exit_error(ERROR_STATUS_INVALID_WORD_SIZE, NULL); + get_segment(env)->data_size = target_size; +} + +void translator_set_data_endianness(struct environment *env) +{ + int endianness = pop_stack_elem(&env->stack); + if(endianness > 1) + exit_error(ERROR_STATUS_WORD_ENDIANNESS_INVALID, NULL); + get_segment(env)->data_endianness = endianness; +} + +void translator_get_offset(struct environment *env) +{ + push_stack_elem(&env->stack, get_segment(env)->pointer_address); +} + +void translator_get_base(struct environment *env) +{ + push_stack_elem(&env->stack, get_segment(env)->base_address); +} + +void translator_print(struct environment *env) +{ + unsigned long long value = pop_stack_elem(&env->stack); + fprintf(stderr, "%llx\n", value); +} + +const struct operator translator_operators[] = +{ + { &translator_drop, "?drop" }, + { &translator_dup, "?dup" }, + { &translator_over, "?over" }, + { &translator_swap, "?swap" }, + { &translator_add, "?+" }, + { &translator_sub, "?-" }, + { &translator_mul, "?*" }, + { &translator_div, "?/" }, + { &translator_mod, "?mod" }, + { &translator_define_identifier, "??" }, + { &translator_compile_one, "?'" }, + { &translator_compile_one, "?1." }, + { &translator_compile_two, "?2." }, + { &translator_compile_four, "?4." }, + { &translator_compile_eight, "?8." }, + { &translator_compile, "?." }, + { &translator_reserve, "?res" }, + { &translator_read_one, "?1@" }, + { &translator_read_two, "?2@" }, + { &translator_read_four, "?4@" }, + { &translator_read_eight, "?8@" }, + { &translator_read, "?@" }, + { &translator_write_one, "?1!" }, + { &translator_write_two, "?2!" }, + { &translator_write_four, "?4!" }, + { &translator_write_eight, "?8!" }, + { &translator_write, "?!" }, + { &translator_create_segment, "?create" }, + { &translator_choose_segment, "?choose" }, + { &translator_set_base, "?org" }, + { &translator_set_data_size, "?size" }, + { &translator_set_data_endianness, "?endianness" }, + { &translator_get_offset, "?$" }, + { &translator_get_base, "?$$" }, + { &translator_print, "?print" }, +}; + +int execute_operator(struct environment *env, const char *lexem) +{ + int operators_count = + sizeof(translator_operators) / sizeof(*translator_operators); + int index; + + for(index = 0; index < operators_count; index++) + { + if(compare_lexems(lexem, translator_operators[index].lexem)) + { + (*translator_operators[index].func)(env); + break; + } + } + + return index != operators_count; +} + +void print_target_buffer(struct environment *env) +{ + struct segment *seg_list; + unsigned long long index; + for(seg_list = env->seg_list; seg_list; seg_list = seg_list->next) + for(index = 0; index < seg_list->size; index++) + putchar(seg_list->buffer[index]); +} + +int main() +{ + struct environment env; + char *lexem; + + initialize_environment(&env); + + for(;; free(lexem)) + { + lexem = read_lexem(); + if(!lexem) + break; + if(find_identifier(&env, lexem)) + continue; + if(is_lexem_hex(&env, lexem)) + continue; + if(!execute_operator(&env, lexem)) + exit_error(ERROR_STATUS_UNDEFINED, lexem); + } + + print_target_buffer(&env); + + return 0; +} -- cgit v1.2.3