I would like to receive some advice on how to improve a small concatenative stack-based interpreter, executing a joy-like programming language. It is really minimal, and is in fact a subset of a larger version that should, a priori, be used as a compilation target.
The version I would like to review here, however, is functional and representative of the other's base. In short, I would like to have global opinions on what should be improved or prohibited, and I would also like to draw more attention to the way the interpreter manages function calls:
void evalfunc(Stack *stack, struct Function *function, struct Dico *dico)
{
for (size_t i = 0; i < function->body.size; i++)
evalop(stack, &function->body.array[i], dico);
}
I actually have the impression that it's quite rudimentary as a way of doing things, and it doesn't allow you to recursion infinitely since it uses the program's stack; and as in this style of language, the notion of loop doesn't exist, there's only recursion, I wonder if there wasn't something you could do to allow a function like these:
undefined = undefined
forever = doSomeStuff forever // If we need to execute an entire program in an "infinite loop" for example, it would be really nice
not to cause a stack-overflow, even if the recursion limit is most of the time unreachable and sufficient. But, in a superset version of this project, programs to be interpreted will likely perform operations as we would with a simple while(true) loop, and recursion is the only way to do it now; I would not like to have to implement something that goes beyond the philosophy of concatenative functional programming... But if there is no other way to do it, let me know :)
So, here are the files and the makefile (using GCC, compiled under Windows with mingw):
Makefile
CC = gcc
CCFLAGS = -g -W -Wall -Wno-missing-braces -O2 -Wno-unused-value
OBJS = Combinator.o Function.o Interpret.o RawCode.o Stack.o Show.o
all : main
main : ${OBJS} main.o
@echo Linking...
${CC} ${CCFLAGS} ${OBJS} main.o -o main
Stack.o : Stack.c
${CC} ${CCFLAGS} -c Stack.c
Combinator.o : Combinator.c
${CC} ${CCFLAGS} -c Combinator.c
Function.o : Function.c
${CC} ${CCFLAGS} -c Function.c
Interpret.o : Interpret.c
${CC} ${CCFLAGS} -c Interpret.c
RawCode.o : RawCode.c
${CC} ${CCFLAGS} -c RawCode.c
Show.o : Show.c
${CC} ${CCFLAGS} -c Show.c
Combinator.h
#pragma once
enum Combinator
{ POP, DUP, SWAP
, FLIP, ID, QUOTE
, UNQUOTE, UNKNOWN };
static const char Str_Combs[][8] =
{ "pop", "dup", "swap"
, "flip", "id", "quote"
, "unquote" };
enum Combinator string_to_comb(const char*);
Combinator.c
#include <string.h>
#include "Combinator.h"
enum Combinator string_to_comb(const char *s)
{
for (unsigned int i = 0; i < sizeof(Str_Combs) / sizeof(Str_Combs[0]); i++)
if (!strcmp(Str_Combs[i], s))
return (enum Combinator)i;
return UNKNOWN;
}
Function.h
#pragma once
#include <ctype.h>
#include "RawCode.h"
struct Function
{
char *name;
RawCode body;
};
struct Dico
{
struct Function *functions;
size_t size;
size_t used;
};
void init_dico(struct Dico *, size_t);
void push_dico(struct Dico *, struct Function);
struct Function make_Function(char *, RawCode);
struct Function *get_specific_function(char *, struct Dico *);
Function.c
#include <stdlib.h>
#include <string.h>
#include "Function.h"
#include "RawCode.h"
void init_dico(struct Dico *dico, size_t size)
{
dico->functions = (struct Function *)malloc(size * sizeof(struct Function));
dico->used = 0;
dico->size = size;
}
void push_dico(struct Dico *dico, struct Function function)
{
if (dico->used == dico->size)
{
dico->size *= 2;
dico->functions = (struct Function *)realloc(dico->functions, dico->size * sizeof(struct Function));
if (dico->functions == NULL)
perror("Out of vector memory");
}
dico->functions[dico->used++] = make_Function(function.name, function.body);
}
struct Function make_Function(char *name, RawCode body)
{
return (struct Function){.name = name, .body = body};
}
struct Function *get_specific_function(char *name, struct Dico *dico)
{
for (size_t i = 0; i < dico->used; i++)
if (!strcmp(dico->functions[i].name, name))
return &dico->functions[i];
return NULL;
}
Interpret.h
#pragma once
#include <ctype.h>
#include "Stack.h"
#include "RawCode.h"
#include "Combinator.h"
#include "LiteralOperation.h"
#include "Function.h"
// Execute the code from RawCode and return the resulting stack
Stack interpret(RawCode, struct Dico);
// Evaluate the given operation on the stack
void evalop(Stack *, struct Value *, struct Dico *);
// Evaluate the given combinator on the stack
void evalcomb(Stack *, enum Combinator, struct Dico *);
// Evalute a word on the stack
void evalword(Stack *, struct Dico *, char *);
// Evaluate a function on the stack
void evalfunc(Stack *, struct Function *, struct Dico *);
// Evalute the given literal operation on the stack
void evallo(Stack *, enum LiteralOperation, struct Dico *);
Interpret.c
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include "Interpret.h"
#include "RawCode.h"
#include "Function.h"
#include "Stack.h"
#include "Combinator.h"
#include "LiteralOperation.h"
#include "Show.h"
Stack interpret(RawCode rcode, struct Dico dico)
{
Stack runtime_stack;
init_stack(&runtime_stack, rcode.used);
for (size_t i = 0; i < rcode.used; i++)
evalop(&runtime_stack, &rcode.array[i], &dico);
return runtime_stack;
}
void evalop(Stack *stack, struct Value *value, struct Dico *dico)
{
switch (value->kind)
{
// If the value is a quotation or any literal, then just push it on the stack
case Val_Quotation ... Val_String:
push(stack, *value);
break;
// If this is a stack combinator, then apply it on the stack
case Val_Combinator:
evalcomb(stack, value->u.comb_.comb, dico);
break;
// If this is a "word" (a function), then evaluate it
case Val_Word:
evalword(stack, dico, value->u.word_.word);
break;
case Val_LiteralOperation:
evallo(stack, value->u.literalOperation_.literalOperation, dico);
break;
case Val_Empty:
printf("Empty stack!\n");
break;
}
}
void evalcomb(Stack *stack, enum Combinator comb, struct Dico *dico)
{
switch (comb)
{
case POP:
pop(stack);
break;
case DUP:
push(stack, *top_ptr(stack));
break;
case SWAP:
{
struct Value tmp = *topx_ptr(stack, 1);
*topx_ptr(stack, 1) = *topx_ptr(stack, 2);
*topx_ptr(stack, 2) = tmp;
}
break;
case FLIP:
{
struct Value tmp = *topx_ptr(stack, 1);
*topx_ptr(stack, 1) = *topx_ptr(stack, 3);
*topx_ptr(stack, 3) = tmp;
}
break;
case ID:
break;
case QUOTE:
{
RawCode *quote = (RawCode *)malloc(sizeof(*quote));
init_rcode(quote, 1);
push_rcode(quote, *top_ptr(stack));
*top_ptr(stack) = make_Val_Quotation(quote);
}
break;
case UNQUOTE:
{
struct Value quote = drop(stack);
for (size_t i = 0; i < quote.u.quote_.quote->used; i++)
evalop(stack, "e.u.quote_.quote->array[i], dico);
}
break;
case UNKNOWN:
default:
printf("Unknown combinator '%d'\n", (int)comb);
}
}
void evalfunc(Stack *stack, struct Function *function, struct Dico *dico)
{
for (size_t i = 0; i < function->body.size; i++)
evalop(stack, &function->body.array[i], dico);
}
void evalword(Stack *stack, struct Dico *dico, char *word)
{
struct Function *tmptr_function = get_specific_function(word, dico);
if (tmptr_function != NULL)
{
evalfunc(stack, tmptr_function, dico);
}
else if (!strcmp(word, "lowereq"))
{
// `x y lowereq` returns 1 or 0 depending on x is lower or equals to y
evalcomb(stack, SWAP, dico);
*top_ptr(stack) = make_Val_Integer((bool)(drop(stack).u.integer_.integer <= top(*stack).u.integer_.integer));
}
else if (!strcmp(word, "if"))
{
if ((bool)topx_ptr(stack, 3)->u.integer_.integer == true)
{
pop(stack);
evalcomb(stack, SWAP, dico);
pop(stack);
evalcomb(stack, UNQUOTE, dico);
}
else
{
evalcomb(stack, SWAP, dico);
pop(stack);
evalcomb(stack, SWAP, dico);
pop(stack);
evalcomb(stack, UNQUOTE, dico);
}
}
else
{
printf("Unknown word '%s'\n", word);
}
}
void evallo(Stack *stack, enum LiteralOperation lo, struct Dico *dico)
{
// We have to swap the top of the stack for more logic:
// `5 3 -` will give 3 - 5 with the method below,
// it's more logic for us to think that as `5 - 3`
evalcomb(stack, SWAP, dico);
switch (lo)
{
case ADD:
push(stack, make_Val_Integer(drop(stack).u.integer_.integer + drop(stack).u.integer_.integer));
break;
case SUB:
push(stack, make_Val_Integer(drop(stack).u.integer_.integer - drop(stack).u.integer_.integer));
break;
case MUL:
push(stack, make_Val_Integer(drop(stack).u.integer_.integer * drop(stack).u.integer_.integer));
break;
case DIV:
push(stack, make_Val_Integer(drop(stack).u.integer_.integer / drop(stack).u.integer_.integer));
break;
}
}
LiteralOperation.h
#pragma once
enum LiteralOperation
{ ADD, SUB, MUL, DIV };
RawCode.h
#pragma once
#include <ctype.h>
#include "Combinator.h"
#include "LiteralOperation.h"
typedef struct RawCode RawCode;
typedef enum Kind Kind;
struct Value {
enum Kind
{ Val_Quotation, Val_Integer, Val_String, Val_Word, Val_LiteralOperation, Val_Combinator, Val_Empty } kind;
union {
// Quotation
struct { RawCode* quote; } quote_;
// Integer literal
struct { int integer; } integer_;
// Char literal
struct { char character; } character_;
// char* literal
struct { char* string; } string_;
// A stack combinator
struct { enum Combinator comb; } comb_;
// A word on the stack (a function)
struct { char* word; } word_;
// A literal operation (+, -, *, /)
struct { enum LiteralOperation literalOperation; } literalOperation_;
} u;
};
struct Value make_Val_Quotation(RawCode*);
struct Value make_Val_Integer(int);
struct Value make_Val_string(char*);
struct Value make_Val_Word(char*);
struct Value make_Val_LiteralOperation(enum LiteralOperation);
struct Value make_Val_Combinator(enum Combinator);
struct RawCode {
struct Value* array;
size_t used;
size_t size;
};
void init_rcode(RawCode*, size_t);
void push_rcode(RawCode*, struct Value);
void pop_rcode(RawCode*);
struct Value drop_rcode(RawCode*);
struct Value top_rcode(RawCode);
// When the stack is empty
static const struct Value empty_value = { .kind = Val_Empty };
RawCode.c
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "RawCode.h"
#include "Combinator.h"
#include "LiteralOperation.h"
struct Value make_Val_Quotation(RawCode *rcode)
{
return (struct Value){.kind = Val_Quotation, .u = {.quote_ = rcode}};
}
struct Value make_Val_Integer(int x)
{
return (struct Value){.kind = Val_Integer, .u = {.integer_ = x}};
}
struct Value make_Val_String(char *s)
{
return (struct Value){.kind = Val_String, .u = {.string_ = s}};
}
struct Value make_Val_Word(char *s)
{
return (struct Value){.kind = Val_Word, .u = {.word_ = s}};
}
struct Value make_Val_LiteralOperation(enum LiteralOperation lo)
{
return (struct Value){.kind = Val_LiteralOperation, .u = {.literalOperation_ = lo}};
}
struct Value make_Val_Combinator(enum Combinator comb)
{
return (struct Value){.kind = Val_Combinator, .u = {.comb_ = comb}};
}
void init_rcode(RawCode *rcode, size_t initSize)
{
rcode->array = (struct Value *)malloc(initSize * sizeof(struct Value));
rcode->used = 0;
rcode->size = initSize;
}
void push_rcode(RawCode *rcode, struct Value item)
{
if (rcode->used == rcode->size)
{
rcode->size *= 2;
rcode->array = (struct Value *)realloc(rcode->array, rcode->size * sizeof(struct Value));
if (rcode->array == NULL)
perror("Out of raw code memory");
}
rcode->array[rcode->used++] = item;
}
void pop_rcode(RawCode *rcode)
{
rcode->array[rcode->used--];
}
struct Value drop_rcode(RawCode *rcode)
{
struct Value last = top_rcode(*rcode);
pop_rcode(rcode);
return last;
}
struct Value top_rcode(RawCode rcode)
{
if (rcode.size == 0)
return empty_value;
return rcode.array[rcode.used - 1];
}
Show.h
#pragma once
#include "Stack.h"
#include "RawCode.h"
#include "Combinator.h"
char* showStack(Stack);
char* showValue(struct Value);
char* showLo(enum LiteralOperation);
char* showQuote(RawCode);
char* showComb(enum Combinator);
Show.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "Stack.h"
#include "Show.h"
#include "RawCode.h"
#include "Combinator.h"
char *showValue(struct Value value)
{
char *result;
switch (value.kind)
{
case Val_Integer:
__mingw_asprintf(&result, "%d", value.u.integer_.integer);
break;
case Val_String:
__mingw_asprintf(&result, "\"%s\"", value.u.string_.string);
break;
case Val_Word:
__mingw_asprintf(&result, "%s", value.u.word_.word);
break;
case Val_Quotation:
__mingw_asprintf(&result, "%s", showQuote(*value.u.quote_.quote));
break;
case Val_Combinator:
__mingw_asprintf(&result, "%s", showComb(value.u.comb_.comb));
break;
case Val_Empty:
return ""; // 'empty-stack
case Val_LiteralOperation:
__mingw_asprintf(&result, showLo(value.u.literalOperation_.literalOperation));
break;
default:
return "'Unknown";
}
return result;
}
char *showComb(enum Combinator comb)
{
return (char *)Str_Combs[(int)comb];
}
char *showLo(enum LiteralOperation lo)
{
switch (lo)
{
case ADD:
return "+";
case SUB:
return "-";
case MUL:
return "*";
case DIV:
return "/";
}
return "??";
}
char *showStack(Stack stack)
{
char *result;
__mingw_asprintf(&result, "[ ");
for (size_t i = 0; i < stack.used; i++)
{
if (stack.used >= 100 && i == 50)
{
i = stack.used - 1;
__mingw_asprintf(&result, "%s (...) ", result);
}
__mingw_asprintf(&result, "%s%s ", result, showValue(stack.array[i]));
}
__mingw_asprintf(&result, "%s]", result);
return result;
}
char *showQuote(RawCode rcode)
{
if (rcode.used >= 50)
return "[...]";
char *result;
__mingw_asprintf(&result, "[");
for (size_t i = 0; i < rcode.used; i++)
{
if (rcode.used >= 100 && i == 50)
{
i = rcode.used - 1;
__mingw_asprintf(&result, "%s (...) ", result);
}
__mingw_asprintf(&result, "%s%s ", result, showValue(rcode.array[i]));
}
__mingw_asprintf(&result, "%s\b]", result);
return result;
}
Stack.h
#pragma once
#include <stdio.h>
#include <stdbool.h>
#include "RawCode.h"
typedef struct Stack
{
struct Value *array;
size_t used;
size_t size;
} Stack;
void init_stack(Stack *, size_t);
// Adds an item to the top of the stack
void push(Stack *, struct Value);
// Removes the most recently added item
void pop(Stack *);
// Removes the most recently added item and returns it
struct Value drop(Stack *);
// Gets the last added item => the top of the stack
struct Value top(Stack);
struct Value *top_ptr(Stack *);
struct Value *topx_ptr(Stack *, const size_t);
Stack.c
#include <stdlib.h>
#include <string.h>
#include "Stack.h"
void init_stack(Stack *stack, const size_t initSize)
{
stack->array = (struct Value *)malloc(initSize * sizeof(struct Value));
stack->used = 0;
stack->size = initSize;
}
void push(Stack *stack, const struct Value item)
{
if (stack->used == stack->size)
{
stack->size *= 2;
stack->array = (struct Value *)realloc(stack->array, stack->size * sizeof(struct Value));
if (stack->array == NULL)
perror("Out of stack memory");
}
stack->array[stack->used++] = item;
}
void pop(Stack *stack)
{
stack->array[stack->used == 0 ? 0 : stack->used--];
}
struct Value drop(Stack *stack)
{
struct Value last = top(*stack);
pop(stack);
return last;
}
struct Value top(const Stack stack)
{
return stack.used == 0 ? empty_value : stack.array[stack.used - 1];
}
struct Value *top_ptr(Stack *stack)
{
return &stack->array[stack->used - 1];
}
struct Value *topx_ptr(Stack *stack, const size_t x)
{
return &stack->array[stack->used - x];
}
main.c
#include <stdlib.h>
#include <stdio.h>
#include "Interpret.h"
#include "RawCode.h"
#include "Function.h"
#include "Show.h"
// Define a function that just call itself
void _undefined(struct Dico *dico)
{
RawCode body;
init_rcode(&body, 1);
push_rcode(&body, make_Val_Word("undefined"));
push_dico(dico, make_Function("undefined", body));
}
// Define a function with a const value
void _const(struct Dico *dico)
{
RawCode body;
init_rcode(&body, 1);
push_rcode(&body, make_Val_Integer(42));
push_dico(dico, make_Function("const", body));
}
// Define the factorial function
void _fac(struct Dico *dico)
{
// fac = dup 1 lowereq [pop 1] [dup -- fac *] if
RawCode body;
init_rcode(&body, 6);
push_rcode(&body, make_Val_Combinator(DUP));
push_rcode(&body, make_Val_Integer(1));
push_rcode(&body, make_Val_Word("lowereq"));
RawCode *if_true = (RawCode *)malloc(sizeof(*if_true));
init_rcode(if_true, 2);
push_rcode(if_true, make_Val_Combinator(POP));
push_rcode(if_true, make_Val_Integer(1));
RawCode *if_false = (RawCode *)malloc(sizeof(*if_false));
init_rcode(if_false, 4);
push_rcode(if_false, make_Val_Combinator(DUP));
push_rcode(if_false, make_Val_Integer(1));
push_rcode(if_false, make_Val_LiteralOperation(SUB));
push_rcode(if_false, make_Val_Word("fac"));
push_rcode(if_false, make_Val_LiteralOperation(MUL));
push_rcode(&body, make_Val_Quotation(if_true));
push_rcode(&body, make_Val_Quotation(if_false));
push_rcode(&body, make_Val_Word("if"));
push_dico(dico, make_Function("fac", body));
}
int main(void)
{
struct Dico dico;
RawCode rcode;
init_dico(&dico, 1);
init_rcode(&rcode, 1);
_undefined(&dico);
_fac(&dico);
_const(&dico);
push_rcode(&rcode, make_Val_Integer(4000));
push_rcode(&rcode, make_Val_Word("fac"));
// So now, the raw code is like `5 fac`
// The interpreter will interpret that and return 120
Stack result = interpret(rcode, dico);
printf("%s\n", showStack(result));
return 0;
}
PS: I use a function specific to MINGW in the Show.c file, it is __mingw_asprintf. If there are compilation problems, it probably comes from there, so it has to be removed.
PPS: Here is a link to download the files.