wai.h

WASM Interpreter
git clone git://source.orangerot.dev:/wai.h.git
Log | Files | Refs | README | LICENSE

wai.c (22364B)


      1 /*
      2  * SPDX-FileCopyrightText: 2025 orangerot <me@orangerot.dev>
      3  *
      4  * SPDX-License-Identifier: GPL-3.0
      5  *
      6  * This program, named WAI, is a WebAssembly Interpreter. 
      7  * Compile using make. 
      8  * Usage: wai [file.wasm] [function name] [function arguments ...]
      9  *
     10  * Copyright (C) 2025  orangrot <me@orangerot.dev>
     11  *
     12  * This program is free software: you can redistribute it and/or modify
     13  * it under the terms of the GNU General Public License as published by
     14  * the Free Software Foundation, either version 3 of the License, or
     15  * (at your option) any later version.
     16  *
     17  * This program is distributed in the hope that it will be useful,
     18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     20  * GNU General Public License for more details.
     21  *
     22  * You should have received a copy of the GNU General Public License
     23  * along with this program.  If not, see <https://www.gnu.org/licenses/>.
     24  */
     25 
     26 #include <endian.h>
     27 #include <stddef.h>
     28 #include <stdint.h>
     29 #include <stdio.h>
     30 #include <stdlib.h>
     31 #include <string.h>
     32 #include <sys/stat.h>
     33 
     34 #define STACK_CAPACITY 1024
     35 #define MAX_FUNCTIONS 128
     36 #define MAX_FUNCTION_PARAMETERS 32
     37 #define MAX_FUNCTION_RESULTS 32
     38 #define MAX_EXPORT_NAME_LENGTH 128
     39 
     40 enum section {
     41   Section_Custom,
     42   Section_Type,
     43   Section_Import,
     44   Section_Function,
     45   Section_Table,
     46   Section_Memory,
     47   Section_Global,
     48   Section_Export,
     49   Section_Start,
     50   Section_Element,
     51   Section_Code,
     52   Section_Data,
     53   Section_Data_Count,
     54 };
     55 
     56 enum TYPE {
     57   TYPE_ANY = 0,
     58   TYPE_I32 = 0x7F,
     59   TYPE_I64 = 0x7E,
     60   TYPE_F32 = 0x7D,
     61   TYPE_F64 = 0x7C,
     62   TYPE_V128 = 0x7B,
     63   TYPE_FUNCREF = 0x70,
     64   TYPE_EXTERNREF = 0x6F
     65 };
     66 
     67 static const int TYPE_SIZE[] = {
     68   [TYPE_I32] = 4,
     69   [TYPE_I64] = 8,
     70   [TYPE_F32] = 4,
     71   [TYPE_F64] = 8,
     72   [TYPE_V128] = 16,
     73   [TYPE_FUNCREF] = 16,
     74   [TYPE_EXTERNREF] = 16
     75 };
     76 
     77 static const char *TYPE_NAME[] = {
     78   [TYPE_I32] = "I32",
     79   [TYPE_I64] = "I64",
     80   [TYPE_F32] = "F32",
     81   [TYPE_F64] = "F64",
     82   [TYPE_V128] = "V128",
     83   [TYPE_FUNCREF] = "FREF",
     84   [TYPE_EXTERNREF] = "EXTR",
     85   [TYPE_ANY] = "ANY",
     86 };
     87 
     88 struct stack {
     89   u_char items[STACK_CAPACITY];
     90   size_t bytes;
     91 };
     92 
     93 struct func_type_t {
     94   enum TYPE param[MAX_FUNCTION_PARAMETERS];
     95   enum TYPE result[MAX_FUNCTION_RESULTS];
     96   size_t num_params;
     97   size_t num_results;
     98 };
     99 
    100 struct func_t {
    101   size_t func_type_index;
    102   size_t num_local_vars;
    103   u_char *addr;
    104 };
    105 
    106 enum export_desc {
    107   Export_Func,
    108   Export_Table,
    109   Export_Mem,
    110   Export_Global,
    111 };
    112 
    113 struct export_t {
    114   char name[MAX_EXPORT_NAME_LENGTH];
    115   size_t name_length;
    116   size_t func_index;
    117   enum export_desc description;
    118 };
    119 
    120 struct module {
    121   struct func_type_t func_types[MAX_FUNCTIONS];
    122   struct func_t func[MAX_FUNCTIONS];
    123   struct table_t *tables;
    124   struct mem_t *mems;
    125   struct global_t *globals;
    126   struct elem_t *elems;
    127   struct data_t *datas;
    128   struct start_t *start;
    129   struct import_t *imports;
    130   struct export_t exports[MAX_FUNCTIONS];
    131   u_char *binary;
    132   struct stack stack;
    133   size_t num_exports;
    134 };
    135 
    136 struct value_t {
    137   enum TYPE type;
    138   union {
    139     int32_t i32; 
    140     int64_t i64; 
    141     float f32; 
    142     double f64; 
    143     __int128 v128; 
    144     int64_t funcref; 
    145     int64_t extref; 
    146   } value;
    147 };
    148 
    149 struct context {
    150   struct module *module;
    151   size_t func_i;
    152   size_t func_stack_begin;
    153 };
    154 
    155 #define incr(i, len) i++; if (i >= len) {return 0;}
    156 
    157 void print_value(struct value_t *value) {
    158   void *number = &value->value;
    159   switch (value->type) {
    160     case TYPE_I32:
    161       printf("%d", *(int32_t*)number);
    162       break;
    163     case TYPE_I64:
    164     case TYPE_ANY:
    165       printf("%ld", *(int64_t*)number);
    166       break;
    167     case TYPE_F32:
    168       printf("%f", *(float*)number);
    169       break;
    170     case TYPE_F64:
    171       printf("%f", *(double*)number);
    172       break;
    173     case TYPE_V128:
    174       printf("%f", (double) *(__int128*)number);
    175       break;
    176     case TYPE_FUNCREF:
    177       printf("%ld", *(int64_t*)number);
    178       break;
    179     case TYPE_EXTERNREF:
    180       printf("%ld", *(int64_t*)number);
    181       break;
    182   }
    183   printf(" (%s)", TYPE_NAME[value->type]);
    184 }
    185 
    186 void stack_peak(struct stack *s, struct value_t *value, size_t nth, size_t from) {
    187   int byte_i = from - 1; 
    188   for (size_t element_i = 0; element_i < nth && byte_i > 0; element_i++) {
    189     byte_i -= TYPE_SIZE[s->items[byte_i]];
    190   }
    191   value->type = s->items[byte_i];
    192   memcpy(&value->value, &(s->items[byte_i - TYPE_SIZE[value->type]]), TYPE_SIZE[value->type]);
    193 } 
    194 
    195 void stack_push(struct stack *s, const struct value_t *value) {
    196   size_t type_size = TYPE_SIZE[value->type];
    197   memcpy(&(s->items[s->bytes]), &value->value, type_size);
    198   s->items[s->bytes + type_size] = value->type;
    199   s->bytes += type_size + 1;
    200 
    201   printf("stack: ");
    202   for (int i = s->bytes - 1; i > 0; i -= TYPE_SIZE[s->items[i]] + 1) {
    203     struct value_t stack_value = {0};
    204     stack_peak(s, &stack_value, 0, i + 1);
    205     print_value(&stack_value);
    206     printf(", ");
    207   }
    208   printf("\n");
    209 } 
    210 
    211 void stack_top(struct stack *s, struct value_t *value) {
    212   value->type = s->items[s->bytes-1];
    213   memcpy(&value->value, &(s->items[s->bytes - 1 - TYPE_SIZE[value->type]]), TYPE_SIZE[value->type]);
    214 } 
    215 
    216 void stack_pop(struct stack *s, struct value_t *value) {
    217   stack_top(s, value);
    218   s->bytes -= TYPE_SIZE[value->type] + 1;
    219 }
    220 
    221 int parse_function(struct module *module, size_t func_i, size_t len);
    222 int parse_instruction(struct context context, u_char *binary, size_t len);
    223 
    224 #define PARAMS(...) __VA_ARGS__
    225 
    226 // https://webassembly.github.io/spec/core/appendix/index-instructions.html
    227 // OP(NAME, CODE, PARAM, NUM_RESULTS, LEN_IMMIDIATE, BODY)
    228 #define DEFINE_OPERATIONS(OP)                                                                                                         \
    229 OP(INSTR_F64_MUL,   0xa2, PARAMS(TYPE_F64, TYPE_F64), 1, 0, result->type = TYPE_F64; result->value.f64 = a->value.f64 * b->value.f64) \
    230 OP(INSTR_F64_SUB,   0xa1, PARAMS(TYPE_F64, TYPE_F64), 1, 0, result->type = TYPE_F64; result->value.f64 = b->value.f64 - a->value.f64) \
    231 OP(INSTR_F64_LT,    0x63, PARAMS(TYPE_F64, TYPE_F64), 1, 0, result->type = TYPE_F64; result->value.f64 = b->value.f64 < a->value.f64) \
    232 OP(INSTR_F64_CONST, 0x44, PARAMS(), 1, 8, result->type = TYPE_F64; result->value.f64 = *(double*)immidiate)                           \
    233 OP(INSTR_LOCAL_GET, 0x20, PARAMS(), 1, 1,                                                                                             \
    234   size_t func_type_i = context.module->func[context.func_i].func_type_index;                                                          \
    235   struct func_type_t *func_type = &context.module->func_types[func_type_i];                                                           \
    236   int num_locals = func_type->num_params + context.module->func[context.func_i].num_local_vars;                                       \
    237                                                                                                                                       \
    238   printf("num locals %d, %d\n", num_locals, num_locals - 1 - *(u_char*)immidiate);                                                    \
    239   stack_peak(&context.module->stack, result, num_locals - 1 - *(u_char*)immidiate, context.func_stack_begin);                         \
    240 )                                                                                                                                     \
    241 OP(INSTR_CALL, 0x10, PARAMS(), 0, 1, parse_function(context.module, *(u_char*)immidiate, len))                                        \
    242 OP(INSTR_IF, 0x04, PARAMS(TYPE_ANY), 0, 1,                                                                                            \
    243   size_t i = 0;                                                                                                                       \
    244   enum TYPE condition_type = *(u_char*) immidiate;                                                                                    \
    245   if (a->type != condition_type)                                                                                                      \
    246     printf("Wrong types!\n");                                                                                                         \
    247                                                                                                                                       \
    248   while (binary[i] != INSTR_ELSE) {                                                                                                   \
    249     if (a->value.i64) {                                                                                                               \
    250       i += parse_instruction(context, &binary[i], len);                                                                               \
    251     } else {                                                                                                                          \
    252       incr(i, len);                                                                                                                   \
    253     }                                                                                                                                 \
    254   }                                                                                                                                   \
    255   incr(i, len);                                                                                                                       \
    256   while (binary[i] != INSTR_END) {                                                                                                    \
    257     if (a->value.i64) {                                                                                                               \
    258       incr(i, len);                                                                                                                   \
    259     } else {                                                                                                                          \
    260       i += parse_instruction(context, &binary[i], len);                                                                               \
    261     }                                                                                                                                 \
    262   }                                                                                                                                   \
    263   incr(i, len);                                                                                                                       \
    264   return i;                                                                                                                           \
    265 )
    266 
    267 enum OP_CODES {
    268 #define AS_ENUM(NAME, CODE, PARAM, NUM_RESULTS, LEN_IMMIDIATE, BODY) NAME = CODE,
    269 DEFINE_OPERATIONS(AS_ENUM)
    270   INSTR_END = 0x0B,
    271   INSTR_ELSE = 0x05
    272 };  
    273 
    274 
    275 #define AS_FUNCTION(NAME, CODE, PARAM, NUM_RESULTS, LEN_IMMIDIATE, BODY)       \
    276   int exec_##NAME(struct context context, struct value_t *a,                   \
    277                   struct value_t *b, void *immidiate, struct value_t *result,  \
    278                   u_char *binary, size_t len) {                                \
    279     (void) context;                                                            \
    280     (void) a;                                                                  \
    281     (void) b;                                                                  \
    282     (void) immidiate;                                                          \
    283     (void) result;                                                             \
    284     (void) binary;                                                             \
    285     (void) len;                                                                \
    286     BODY;                                                                      \
    287     return 0;                                                                  \
    288   }
    289 DEFINE_OPERATIONS(AS_FUNCTION)
    290 
    291 struct instruction {
    292   size_t num_param;
    293   enum TYPE params[2];
    294   size_t len_immidiate;
    295   size_t num_results;
    296   int (*exec) (struct context context, struct value_t *a, struct value_t *b, void *immidiate, struct value_t *result, u_char *binary, size_t len);
    297 };
    298 
    299 struct instruction INSTRUCTIONS[] = {
    300 #define AS_INSTRUCTION(NAME, CODE, PARAM, NUM_RESULTS, LEN_IMMIDIATE, BODY)    \
    301   [NAME] = {                                                                   \
    302     .num_param = sizeof((enum TYPE[]) {PARAM}) / sizeof(enum TYPE),            \
    303     .params = {PARAM},                                                         \
    304     .num_results = NUM_RESULTS,                                                \
    305     .len_immidiate = LEN_IMMIDIATE,                                            \
    306     .exec = &exec_##NAME                                                       \
    307   },                                                                                                    
    308 DEFINE_OPERATIONS(AS_INSTRUCTION)
    309 };
    310 
    311 int parse_instruction(struct context context, u_char *binary, size_t len) {
    312   size_t i = 0;	
    313   enum OP_CODES op_code = binary[i];
    314   u_char *instr_addr = &binary[i];
    315   struct value_t result = {0};
    316   struct value_t arguments[2];
    317 
    318   incr(i, len);
    319 
    320   struct instruction *instr = &INSTRUCTIONS[op_code];
    321   if (instr->exec == NULL) {
    322     printf("not implemented/illegal instruction %x at %lx\n", op_code, instr_addr - context.module->binary);
    323     exit(1);
    324   };
    325 
    326   for (size_t param_i = 0; param_i < instr->num_param; param_i++) {
    327     stack_pop(&context.module->stack, &arguments[param_i]);
    328     if (instr->params[param_i] != TYPE_ANY && arguments[param_i].type != instr->params[param_i]) {
    329       printf("wrong type! %x\n", op_code);
    330     }
    331   }
    332   i += instr->exec(context, &arguments[0], &arguments[1], &binary[i], &result, &binary[i + instr->len_immidiate], len);
    333   i += instr->len_immidiate;
    334   if (instr->num_results) {
    335     stack_push(&context.module->stack, &result);
    336   }
    337 
    338   return i;
    339 }
    340 
    341 int parse_function(struct module *module, size_t func_i, size_t len) {
    342   size_t i = 0;	
    343   struct func_t *func = &module->func[func_i];
    344   u_char *binary = func->addr;
    345   size_t func_type_i = func->func_type_index;
    346   struct func_type_t *func_type = &module->func_types[func_type_i];
    347   // int body_size = binary[i];
    348   size_t func_stack_begin = module->stack.bytes;
    349   size_t func_stack_end;
    350   struct value_t result = {0};
    351   struct context context = {
    352     .module = module,
    353     .func_i = func_i,
    354     .func_stack_begin = func_stack_begin
    355   };
    356 
    357   incr(i, len);
    358   func->num_local_vars = binary[i];
    359   incr(i, len);
    360   for (size_t local_var_i = 0; local_var_i < func->num_local_vars; local_var_i++) {
    361     stack_push(&module->stack, &(struct value_t) {.type = binary[i], .value = {0}}); 
    362     incr(i, len);
    363   }
    364   while (binary[i] != INSTR_END) {
    365     i += parse_instruction(context, &binary[i], len);
    366   }
    367   incr(i, len);
    368   
    369   func_stack_end = module->stack.bytes;
    370   module->stack.bytes = func_stack_begin;
    371   for (size_t local_i = 0; local_i < func_type->num_params + func->num_local_vars; local_i++) {
    372     stack_pop(&module->stack, &result);
    373   }
    374   for (size_t result_i = 0; result_i < func_type->num_results; result_i++) {
    375     stack_peak(&module->stack, &result, func_type->num_results - 1 - result_i, func_stack_end);
    376     stack_push(&module->stack, &result);
    377   }
    378   return i;
    379 }
    380 
    381 int parse_section(struct module *module, u_char *binary, size_t len) {
    382   size_t i = 0;	
    383   enum section type = binary[i];
    384   incr(i, len);
    385   int size = binary[i];
    386   incr(i, len);
    387   printf("section %x with size %d\n", type, size);
    388 
    389   switch ((enum section) type) {
    390     case Section_Custom:
    391       break;
    392     case Section_Type: 
    393         printf("section: type\n");
    394         struct func_type_t *func_type;
    395         size_t num_types = binary[i];
    396         incr(i, len);
    397         for (size_t type_i = 0; type_i < num_types; type_i++) {
    398           if (binary[i] != 0x60) {
    399             printf("expected function type, found %x\n", binary[i]);
    400             return 0;
    401           }
    402           incr(i, len);
    403           func_type = &module->func_types[type_i];
    404           func_type->num_params = binary[i];
    405           incr(i, len);
    406           for (size_t param_i = 0; param_i < func_type->num_params; param_i++) {
    407             func_type->param[param_i] = binary[i];
    408             incr(i, len);
    409           }
    410           func_type->num_results = binary[i];
    411           incr(i, len);
    412           for (size_t result_i = 0; result_i < func_type->num_results; result_i++) {
    413             func_type->result[result_i] = binary[i];
    414             incr(i, len);
    415           }
    416         }
    417         break;
    418     case Section_Import:
    419       break;
    420     case Section_Function:
    421       printf("section: function\n");
    422       size_t num_functions = binary[i];
    423       incr(i, len);
    424       for (size_t function_i = 0; function_i < num_functions; function_i++) {
    425         module->func[function_i].func_type_index = binary[i];
    426         incr(i, len);
    427       }
    428       break;
    429     case Section_Table:
    430       break;
    431     case Section_Memory:
    432       break;
    433     case Section_Global:
    434       break;
    435     case Section_Export:
    436       printf("section: exports\n");
    437       module->num_exports = binary[i];
    438       
    439       if(module->num_exports > MAX_FUNCTIONS) {
    440         printf("Number of exports exceeds maximum number of functions in a module (%d)", MAX_FUNCTIONS);
    441         return 0;
    442       }
    443       
    444       incr(i, len);
    445       for (size_t exports_i = 0; exports_i < module->num_exports; exports_i++) {
    446         struct export_t *export = &module->exports[exports_i];
    447 
    448         export->name_length = binary[i];
    449         incr(i, len);
    450         
    451         for (size_t si = 0; si < export->name_length; si++) {
    452           export->name[si] = (char) binary[i];
    453           incr(i, len);
    454         }
    455         export->description = (enum export_desc) binary[i];
    456         incr(i, len);
    457         export->func_index = (size_t) binary[i];
    458         printf("export name: %s of type %d\n", export->name, export->description);
    459         if (export->description == Export_Func) {
    460           printf("exported function %s(", export->name);
    461           size_t func_type_index = module->func[export->func_index].func_type_index;
    462           struct func_type_t *func_type = &module->func_types[func_type_index];
    463           for (size_t param_i = 0; param_i < func_type->num_params; param_i++) {
    464             printf("%s", TYPE_NAME[func_type->param[param_i]]);
    465             if (param_i == func_type->num_params -1) {
    466               printf(") -> ");
    467             } else {
    468               printf(",");
    469             }
    470           }
    471           for (size_t result_i = 0; result_i < func_type->num_results; result_i++) {
    472             printf("%s", TYPE_NAME[func_type->result[result_i]]);
    473             if (result_i != func_type->num_params -1) {
    474               printf(",");
    475             }
    476           }
    477           printf("\n");
    478         }
    479         incr(i, len);
    480       }
    481       break;
    482     case Section_Start:
    483       break;
    484     case Section_Element:
    485       break;
    486     case Section_Code:
    487       printf("section: code\n");
    488       int num_functions2 = binary[i];
    489       incr(i, len);
    490       for (int function_i = 0; function_i < num_functions2; function_i++) {
    491         module->func[function_i].addr = &binary[i];
    492         stack_push(&module->stack, &(struct value_t) {.type = TYPE_F64, .value.f64 = 1});
    493         i += parse_function(module, function_i, len);
    494         stack_pop(&module->stack, &(struct value_t) {0});
    495       }
    496       // printf("result: %f\n", module->stack.items[0]);
    497       break;
    498     case Section_Data:
    499       break;
    500     case Section_Data_Count:
    501       break;
    502     default:
    503       fprintf(stderr, "expectet section\n");
    504       exit(1);
    505   }
    506 
    507   if (size == 0x0) {incr(i, len);}
    508   return i;
    509 }
    510 
    511 int parse_module(struct module *module, u_char *binary, size_t len) {
    512   size_t i = 0;	
    513   char *magic = "\0asm";
    514   while (i < 4) {
    515     if ((char) binary[i] != magic[i]) {
    516       fprintf(stderr, "no wasm magic\n");
    517       return 0;
    518     }
    519     incr(i, len);
    520   }
    521   printf("magic found\n");
    522   printf("wasm version: %d\n", le32toh(*(int*)&binary[i]));
    523   i += 4;
    524   printf("addr %zu\n", i);
    525 
    526   module->binary = binary;
    527 
    528   while (i < len) {
    529     i += parse_section(module, &binary[i], len);
    530   }
    531   return i;
    532 }
    533 
    534 int main(int argc, char **argv) {
    535   FILE *file;
    536   u_char *binary;
    537   struct stat st;
    538   struct module module = {0};
    539 
    540   if (argc < 3) {
    541     printf("Usage: %s [file] [function name] [function arguments ...]\n", argv[0]);
    542     exit(1);
    543   };
    544   file = fopen(argv[1], "r");
    545   if (file == NULL) {
    546     fprintf(stderr, "Failed to open file\n");
    547     fclose(file);
    548     return 1;
    549   }
    550   stat(argv[1], &st);
    551   printf("size: %ld\n", st.st_size);
    552   
    553   binary = malloc(st.st_size);
    554   fread(binary, st.st_size, 1, file);
    555   fclose(file);
    556 
    557   if (parse_module(&module, binary, st.st_size) == -1) {
    558     printf("error :(\n");
    559   }
    560   printf("%zu\n", module.num_exports);
    561   printf("%s\n", module.exports[0].name);
    562   size_t export_search_i = 0;
    563   while (export_search_i < module.num_exports && 
    564       (strcmp(module.exports[export_search_i].name, argv[2]) != 0)) {
    565     export_search_i++;
    566   }
    567   if (export_search_i == module.num_exports) {
    568     printf("Provided function name %s not recognised. \n", argv[2]);
    569     exit(1);
    570   }
    571   size_t function_search_i = module.exports[export_search_i].func_index;
    572   size_t function_search_type_index = module.func[function_search_i].func_type_index;
    573   struct func_type_t *func_type_search = &module.func_types[function_search_type_index];
    574   if (func_type_search->num_params != (size_t) (argc - 3)) {
    575     printf("Not enough function arguments provided. Got %d expected %zu. \n", argc - 3, func_type_search->num_params);
    576     exit(1);
    577   }
    578 
    579   for (size_t param_i = 0; param_i < func_type_search->num_params; param_i++) {
    580     enum TYPE param_type = func_type_search->param[param_i];
    581     struct value_t param = {
    582       .type = param_type,
    583       .value = {0}
    584     };
    585     char *param_str = argv[param_i + 3];
    586     switch (param_type) {
    587     case TYPE_I32:
    588       param.value.i32 = atoi(param_str);
    589       break;
    590     case TYPE_I64:
    591       param.value.i64 = atoll(param_str);
    592       break;
    593     case TYPE_F32:
    594       param.value.f32 = strtof(param_str, NULL);
    595       break;
    596     case TYPE_F64:
    597       param.value.f64 = strtod(param_str, NULL);
    598       break;
    599     case TYPE_V128:
    600     case TYPE_FUNCREF:
    601     case TYPE_EXTERNREF:
    602     default:
    603       printf("%s, %s, %s unsupported\n", TYPE_NAME[TYPE_V128], TYPE_NAME[TYPE_FUNCREF], TYPE_NAME[TYPE_EXTERNREF]);
    604       exit(1);
    605     }
    606     stack_push(&module.stack, &param);
    607   }
    608   parse_function(&module, function_search_i, 100);
    609 
    610   free(binary);
    611   return 0;
    612 }
    613