diff --git a/README.md b/README.md index 919eda0..df57647 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ Dependencies: wat2wasm from [wabt](https://github.com/WebAssembly/wabt/tree/main ```sh make make tests -./wai tests/factorial.wasw +./wai tests/factorial.wasm fac 4 ``` ## Resources diff --git a/wai.c b/wai.c index 48f897f..06235c2 100644 --- a/wai.c +++ b/wai.c @@ -5,7 +5,7 @@ * * This program, named WAI, is a WebAssembly Interpreter. * Compile using make. - * Usage: wai [FILE.wasm] + * Usage: wai [file.wasm] [function name] [function arguments ...] * * Copyright (C) 2025 orangrot * @@ -25,11 +25,17 @@ #include #include +#include #include #include #include #include -#include + +#define STACK_CAPACITY 1024 +#define MAX_FUNCTIONS 128 +#define MAX_FUNCTION_PARAMETERS 32 +#define MAX_FUNCTION_RESULTS 32 +#define MAX_EXPORT_NAME_LENGTH 128 enum section { Section_Custom, @@ -47,13 +53,34 @@ enum section { Section_Data_Count, }; -#define STACK_CAPACITY 1024 +enum TYPE { + TYPE_I32 = 0x7F, + TYPE_I64 = 0x7E, + TYPE_F32 = 0x7D, + TYPE_F64 = 0x7C, + TYPE_V128 = 0x7B, + TYPE_FUNCREF = 0x70, + TYPE_EXTERNREF = 0x6F +}; + struct stack { u_char items[STACK_CAPACITY]; size_t bytes; }; -#define MAX_FUNCTIONS 128 +struct func_type_t { + enum TYPE param[MAX_FUNCTION_PARAMETERS]; + enum TYPE result[MAX_FUNCTION_RESULTS]; + size_t num_params; + size_t num_results; +}; + +struct func_t { + size_t func_type_index; + size_t num_local_vars; + u_char *func_start_addr; +}; + enum export_desc { Export_Func, Export_Table, @@ -62,15 +89,16 @@ enum export_desc { }; struct export_t { - u_char name[128]; + char name[MAX_EXPORT_NAME_LENGTH]; size_t name_length; - uint32_t index; + size_t func_index; enum export_desc description; }; struct module { - struct type_t *types; - u_char *funcs[MAX_FUNCTIONS]; + struct func_type_t func_types[MAX_FUNCTIONS]; // TYPE SECTION + u_char *code[MAX_FUNCTIONS]; // CODE SECTION + size_t func_to_func_type[MAX_FUNCTIONS]; // FUNCTION SECTION struct table_t *tables; struct mem_t *mems; struct global_t *globals; @@ -81,6 +109,7 @@ struct module { struct export_t exports[MAX_FUNCTIONS]; u_char *binary; struct stack stack; + size_t num_exports; int scope; }; @@ -96,16 +125,6 @@ enum INSTRUCTION { INSTR_LOCAL_GET = 0x20, }; -enum TYPE { - TYPE_I32 = 0x7F, - TYPE_I64 = 0x7E, - TYPE_F32 = 0x7D, - TYPE_F64 = 0x7C, - TYPE_V128= 0x7B, - TYPE_FUNCREF = 0x70, - TYPE_EXTERNREF = 0x6F -}; - static const int TYPE_SIZE[] = { [TYPE_I32] = 4, [TYPE_I64] = 8, @@ -116,6 +135,16 @@ static const int TYPE_SIZE[] = { [TYPE_EXTERNREF] = 16 }; +static const char *TYPE_NAME[] = { + [TYPE_I32] = "I32", + [TYPE_I64] = "I64", + [TYPE_F32] = "F32", + [TYPE_F64] = "F64", + [TYPE_V128] = "V128", + [TYPE_FUNCREF] = "FREF", + [TYPE_EXTERNREF] = "EXTR" +}; + struct value_t { enum TYPE type; union { @@ -131,6 +160,43 @@ struct value_t { #define incr(i, len) i++; if (i >= len) {return -1;} +void print_value(struct value_t *value) { + void *number = &value->value; + switch (value->type) { + case TYPE_I32: + printf("%d", *(int32_t*)number); + break; + case TYPE_I64: + printf("%ld", *(int64_t*)number); + break; + case TYPE_F32: + printf("%f", *(float*)number); + break; + case TYPE_F64: + printf("%f", *(double*)number); + break; + case TYPE_V128: + printf("%ld", *(__int128*)number); + break; + case TYPE_FUNCREF: + printf("%ld", *(int64_t*)number); + break; + case TYPE_EXTERNREF: + printf("%ld", *(int64_t*)number); + break; + } + printf(" (%s)", TYPE_NAME[value->type]); +} + +void stack_peak(struct stack *s, struct value_t *value, size_t nth, size_t from) { + int byte_i = from - 1; + for (int element_i = 0; element_i < nth && byte_i > 0; element_i++) { + byte_i -= TYPE_SIZE[s->items[byte_i]]; + } + value->type = s->items[byte_i]; + memcpy(&value->value, &(s->items[byte_i - TYPE_SIZE[value->type]]), TYPE_SIZE[value->type]); +} + void stack_push(struct stack *s, const struct value_t *value) { size_t type_size = TYPE_SIZE[value->type]; memcpy(&(s->items[s->bytes]), &value->value, type_size); @@ -141,31 +207,10 @@ void stack_push(struct stack *s, const struct value_t *value) { for (int i = s->bytes - 1; i > 0; i -= TYPE_SIZE[s->items[i]] + 1) { enum TYPE t = s->items[i]; size_t type_size = TYPE_SIZE[t]; - void *value = &s->items[i - type_size]; - - switch (t) { - case TYPE_I32: - printf("%d (I32)", *(int32_t*)value); - break; - case TYPE_I64: - printf("%ld (I32)", *(int64_t*)value); - break; - case TYPE_F32: - printf("%f (F32)", *(float*)value); - break; - case TYPE_F64: - printf("%f (F64)", *(double*)value); - break; - case TYPE_V128: - printf("%ld (V128)", *(__int128*)value); - break; - case TYPE_FUNCREF: - printf("%ld (EREF)", *(int64_t*)value); - break; - case TYPE_EXTERNREF: - printf("%ld (EREF)", *(int64_t*)value); - break; - } + void *number = &s->items[i - type_size]; + struct value_t stack_value = {0}; + stack_peak(s, &stack_value, 0, i + 1); + print_value(&stack_value); printf(", "); } printf("\n"); @@ -181,30 +226,10 @@ void stack_pop(struct stack *s, struct value_t *value) { s->bytes -= TYPE_SIZE[value->type] + 1; } -int parse_type(u_char *binary, int len) { +int parse_function(struct module *module, size_t func_i, int len); +int parse_instruction(struct module *module, u_char *binary, size_t func_i, size_t func_stack_begin, int len) { int i = 0; - enum TYPE param = binary[i]; - printf("type %x\n", param); - incr(i, len); - switch (param) { - case TYPE_I32: - case TYPE_I64: - case TYPE_F32: - case TYPE_F64: - case TYPE_V128: - case TYPE_FUNCREF: - case TYPE_EXTERNREF: - break; - default: - return -1; - } - return i; -} - -int parse_function(struct module *module, u_char *binary, double param, int len); -int parse_instruction(struct module *module, u_char *binary, double param, int len) { - int i = 0; - enum INSTRUCTION instr = (u_char) binary[i]; + enum INSTRUCTION instr = binary[i]; u_char *instr_addr = &binary[i]; struct value_t a = {0}; struct value_t b = {0}; @@ -216,8 +241,8 @@ int parse_instruction(struct module *module, u_char *binary, double param, int l case INSTR_CALL: { int func_index = binary[i]; incr(i, len); - stack_pop(&module->stack, &a); - parse_function(module, module->funcs[func_index], a.value.f64, len); + // stack_pop(&module->stack, &a); + parse_function(module, func_index, len); break; } case INSTR_ELSE: @@ -271,7 +296,7 @@ int parse_instruction(struct module *module, u_char *binary, double param, int l // TODO test condition with correct type. // This might not matter since all types are false with 0x0 if (a.value.i64) { - i += parse_instruction(module, &binary[i], param, len); + i += parse_instruction(module, &binary[i], func_i, func_stack_begin, len); } else { incr(i, len); } @@ -281,7 +306,7 @@ int parse_instruction(struct module *module, u_char *binary, double param, int l if (a.value.i64) { incr(i, len); } else { - i += parse_instruction(module, &binary[i], param, len); + i += parse_instruction(module, &binary[i], func_i, func_stack_begin, len); } } incr(i, len); @@ -289,8 +314,14 @@ int parse_instruction(struct module *module, u_char *binary, double param, int l } case INSTR_LOCAL_GET: { int local_index = binary[i]; - incr(i, len); - stack_push(&module->stack, &(struct value_t) {.value.f64 = param, .type = TYPE_F64}); + size_t func_type_i = module->func_to_func_type[func_i]; + struct func_type_t *func_type = &module->func_types[func_type_i]; + // TODO: take local variables into account in addition to parameters + int num_locals = func_type->num_params; + printf("num locals %d, %d\n", num_locals, num_locals - 1 - local_index); + stack_peak(&module->stack, &result, num_locals - 1 - local_index, func_stack_begin); + incr(i, len); + stack_push(&module->stack, &result); break; } default: @@ -300,17 +331,34 @@ int parse_instruction(struct module *module, u_char *binary, double param, int l return i; } -int parse_function(struct module *module, u_char *binary, double param, int len) { +int parse_function(struct module *module, size_t func_i, int len) { int i = 0; + u_char *binary = module->code[func_i]; + size_t func_type_i = module->func_to_func_type[func_i]; + struct func_type_t *func_type = &module->func_types[func_type_i]; int body_size = binary[i]; + size_t func_stack_begin = module->stack.bytes; + size_t func_stack_end; + struct value_t result = {0}; + incr(i, len); // int local_decl_cound = binary[i]; incr(i, len); module->scope = 1; while (binary[i] != INSTR_END) { - i += parse_instruction(module, &binary[i], param, len); + i += parse_instruction(module, &binary[i], func_i, func_stack_begin, len); } incr(i, len); + + func_stack_end = module->stack.bytes; + module->stack.bytes = func_stack_begin; + for (size_t param_i = 0; param_i < func_type->num_params; param_i++) { + stack_pop(&module->stack, &result); + } + for (size_t result_i = 0; result_i < func_type->num_results; result_i++) { + stack_peak(&module->stack, &result, func_type->num_results - 1 - result_i, func_stack_end); + stack_push(&module->stack, &result); + } return i; } @@ -327,23 +375,27 @@ int parse_section(struct module *module, u_char *binary, int len) { break; case Section_Type: printf("section: type\n"); - int num_types = binary[i]; + struct func_type_t *func_type; + size_t num_types = binary[i]; incr(i, len); - for (int type_i = 0; type_i < num_types; type_i++) { + for (size_t type_i = 0; type_i < num_types; type_i++) { if (binary[i] != 0x60) { printf("expected function type, found %x\n", binary[i]); return -1; } incr(i, len); - int num_params = binary[i]; + func_type = &module->func_types[type_i]; + func_type->num_params = binary[i]; incr(i, len); - for (int params_i = 0; params_i < num_params; params_i++) { - i += (parse_type(&binary[i], len)); + for (size_t param_i = 0; param_i < func_type->num_params; param_i++) { + func_type->param[param_i] = binary[i]; + incr(i, len); } - int num_results = binary[i]; + func_type->num_results = binary[i]; incr(i, len); - for (int results_i = 0; results_i < num_results; results_i++) { - i += (parse_type(&binary[i], len)); + for (size_t result_i = 0; result_i < func_type->num_results; result_i++) { + func_type->result[result_i] = binary[i]; + incr(i, len); } } break; @@ -351,9 +403,10 @@ int parse_section(struct module *module, u_char *binary, int len) { break; case Section_Function: printf("section: function\n"); - int num_functions = binary[i]; + size_t num_functions = binary[i]; incr(i, len); - for (int function_i = 0; function_i < num_functions; function_i++) { + for (size_t function_i = 0; function_i < num_functions; function_i++) { + module->func_to_func_type[function_i] = binary[i]; incr(i, len); } break; @@ -365,28 +418,48 @@ int parse_section(struct module *module, u_char *binary, int len) { break; case Section_Export: printf("section: exports\n"); - int num_exports = binary[i]; + module->num_exports = binary[i]; - if(num_exports > MAX_FUNCTIONS) { - printf("Number of exports exceeds maximum number of functions in a module (%d)", MAX_FUNCTIONS); - return -1; + if(module->num_exports > MAX_FUNCTIONS) { + printf("Number of exports exceeds maximum number of functions in a module (%d)", MAX_FUNCTIONS); + return -1; } incr(i, len); - for (int exports_i = 0; exports_i < num_exports; exports_i++) { - struct export_t *export = &module->exports[i]; + for (size_t exports_i = 0; exports_i < module->num_exports; exports_i++) { + struct export_t *export = &module->exports[exports_i]; export->name_length = binary[i]; incr(i, len); - - for (int si = 0; si < export->name_length; si++) { - export->name[si] = binary[i]; + + for (size_t si = 0; si < export->name_length; si++) { + export->name[si] = (char) binary[i]; incr(i, len); } - export->description = (int) binary[i]; + export->description = (enum export_desc) binary[i]; incr(i, len); - export->index = (uint32_t) binary[i]; - printf("export name: %s of type %d\n", export->name, export->description); + export->func_index = (size_t) binary[i]; + printf("export name: %s of type %d\n", export->name, export->description); + if (export->description == Export_Func) { + printf("exported function %s(", export->name); + size_t func_type_index = module->func_to_func_type[export->func_index]; + struct func_type_t *func_type = &module->func_types[func_type_index]; + for (size_t param_i = 0; param_i < func_type->num_params; param_i++) { + printf("%s", TYPE_NAME[func_type->param[param_i]]); + if (param_i == func_type->num_params -1) { + printf(") -> "); + } else { + printf(","); + } + } + for (size_t result_i = 0; result_i < func_type->num_results; result_i++) { + printf("%s", TYPE_NAME[func_type->result[result_i]]); + if (result_i != func_type->num_params -1) { + printf(","); + } + } + printf("\n"); + } incr(i, len); } break; @@ -399,8 +472,10 @@ int parse_section(struct module *module, u_char *binary, int len) { int num_functions2 = binary[i]; incr(i, len); for (int function_i = 0; function_i < num_functions2; function_i++) { - module->funcs[function_i] = &binary[i]; - i += parse_function(module, &binary[i], 4, len); + module->code[function_i] = &binary[i]; + stack_push(&module->stack, &(struct value_t) {.type = TYPE_F64, .value.f64 = 1}); + i += parse_function(module, function_i, len); + stack_pop(&module->stack, &(struct value_t) {0}); } // printf("result: %f\n", module->stack.items[0]); break; @@ -417,7 +492,7 @@ int parse_section(struct module *module, u_char *binary, int len) { return i; } -int parse_module(u_char *binary, size_t len) { +int parse_module(struct module *module, u_char *binary, size_t len) { int i = 0; char *magic = "\0asm"; while (i < 4) { @@ -432,40 +507,89 @@ int parse_module(u_char *binary, size_t len) { i += 4; printf("addr %d\n", i); - struct module module = {0}; - module.binary = binary; + module->binary = binary; while (i < len) { - i += parse_section(&module, &binary[i], len); + i += parse_section(module, &binary[i], len); } return i; } int main(int argc, char **argv) { - + FILE *file; + u_char *binary; + struct stat st; + struct module module = {0}; + if (argc != 2) { - fprintf(stderr, "Usage: %s [file]\n", argv[0]); - return 1; + printf("Usage: %s [file] [function name] [function arguments ...]\n", argv[0]); }; - FILE *file = fopen(argv[1], "r"); + file = fopen(argv[1], "r"); if (file == NULL) { fprintf(stderr, "Failed to open file\n"); fclose(file); return 1; } - struct stat st; stat(argv[1], &st); printf("size: %ld\n", st.st_size); - u_char *binary = malloc(st.st_size); + binary = malloc(st.st_size); fread(binary, st.st_size, 1, file); + fclose(file); - if (parse_module(binary, st.st_size) == -1) { + if (parse_module(&module, binary, st.st_size) == -1) { printf("error :(\n"); } - + printf("%zu\n", module.num_exports); + printf("%s\n", module.exports[0].name); + size_t export_search_i = 0; + while (export_search_i < module.num_exports && + (strcmp(module.exports[export_search_i].name, argv[2]) != 0)) { + export_search_i++; + } + if (export_search_i == module.num_exports) { + printf("Provided function name %s not recognised. \n", argv[2]); + exit(1); + } + size_t function_search_i = module.exports[export_search_i].func_index; + struct func_type_t *func_type_search = &module.func_types[module.func_to_func_type[function_search_i]]; + if (func_type_search->num_params != argc - 3) { + printf("Not enough function arguments provided. Got %d expected %zu. \n", argc - 3, func_type_search->num_params); + exit(1); + } + + for (size_t param_i = 0; param_i < func_type_search->num_params; param_i++) { + enum TYPE param_type = func_type_search->param[param_i]; + struct value_t param = { + .type = param_type, + .value = {0} + }; + char *param_str = argv[param_i + 3]; + switch (param_type) { + case TYPE_I32: + param.value.i32 = atoi(param_str); + break; + case TYPE_I64: + param.value.i64 = atoll(param_str); + break; + case TYPE_F32: + param.value.f32 = strtof(param_str, NULL); + break; + case TYPE_F64: + param.value.f64 = strtod(param_str, NULL); + break; + case TYPE_V128: + case TYPE_FUNCREF: + case TYPE_EXTERNREF: + default: + printf("%s, %s, %s unsupported\n", TYPE_NAME[TYPE_V128], TYPE_NAME[TYPE_FUNCREF], TYPE_NAME[TYPE_EXTERNREF]); + exit(1); + } + stack_push(&module.stack, ¶m); + } + parse_function(&module, function_search_i, 100); + free(binary); - fclose(file); return 0; }