From cf0354eb8e043fcd9c6c17756701972f948a16f1 Mon Sep 17 00:00:00 2001 From: Michael Smith Date: Thu, 22 Aug 2024 00:02:48 +0100 Subject: Rewrite the gamedata and entprops systems entirely This removes the horrible janky old KeyValues parser and replaces it with a couple of trivial ad-hoc text parsers. In doing so, make the format of the actual gamedata files more human-friendly too. We also gain support for nested SendTables in mkentprops, which are required to get at various things like player velocity. And, the actual string matching is made more efficient (or, at least, more scalable) by way of a cool radix tree thing which generates a bunch of switch cases on distinct characters. --- src/build/mkentprops.c | 447 ++++++++++++++++++++++++++++++++++--------------- src/build/mkgamedata.c | 345 +++++++++++++++++++++----------------- src/build/vec.h | 2 +- 3 files changed, 498 insertions(+), 296 deletions(-) (limited to 'src/build') diff --git a/src/build/mkentprops.c b/src/build/mkentprops.c index fdbb8af..4806996 100644 --- a/src/build/mkentprops.c +++ b/src/build/mkentprops.c @@ -17,13 +17,11 @@ #include #include #include +#include #include "../intdefs.h" -#include "../kv.h" #include "../langext.h" #include "../os.h" -#include "skiplist.h" -#include "vec.h" #ifdef _WIN32 #define fS "S" @@ -31,176 +29,349 @@ #define fS "s" #endif -static noreturn die(const char *s) { +static noreturn die(int status, const char *s) { fprintf(stderr, "mkentprops: %s\n", s); - exit(100); + exit(status); } - -struct prop { - const char *varname; /* the C global name */ - const char *propname; /* the entity property name */ - struct prop *next; -}; -struct vec_prop VEC(struct prop *); - -DECL_SKIPLIST(static, class, struct class, const char *, 4) -struct class { - const char *name; /* the entity class name */ - struct vec_prop props; - struct skiplist_hdr_class hdr; -}; -static inline int cmp_class(struct class *c, const char *s) { - return strcmp(c->name, s); +static noreturn dieparse(const os_char *file, int line, const char *s) { + fprintf(stderr, "mkentprops: %" fS ":%d: %s\n", file, line, s); + exit(2); } -static inline struct skiplist_hdr_class *hdr_class(struct class *c) { - return &c->hdr; + +static char *sbase; // input file contents - string values are indices off this + +// custom data structure of the day: zero-alloc half-SoA adaptive radix trees(!) + +#define ART_MAXNODES 16384 +static uchar art_firstbytes[ART_MAXNODES]; +static struct art_core { + int soff; // string offset to entire key chunk (including firstbyte) + u16 slen; // number of bytes in key chunk (including firstbyte, >=1) + u16 next; // sibling node (same prefix, different firstbyte) +} art_cores[ART_MAXNODES]; +// if node doesn't end in \0: child node (i.e. key appends more bytes) +// if node DOES end in \0: offset into art_leaves (below) +static u16 art_children[ART_MAXNODES]; +static int art_nnodes = 0; + +#define ART_NULL ((u16)-1) + +static inline int art_newnode(void) { + if (art_nnodes == ART_MAXNODES) die(2, "out of tree nodes"); + return art_nnodes++; } -DEF_SKIPLIST(static, class, cmp_class, hdr_class) -static struct skiplist_hdr_class classes = {0}; -static int nclasses = 0; - -struct parsestate { - const os_char *filename; - struct kv_parser *parser; - char *lastvar; -}; - -static noreturn badparse(struct parsestate *state, const char *e) { - fprintf(stderr, "mkentprops: %" fS ":%d:%d: parse error: %s", - state->filename, state->parser->line, state->parser->col, e); - exit(1); + +#define ART_MAXLEAVES 8192 +static struct art_leaf { + int varstr; // offset of string (generated variable), if any, or -1 if none + u16 subtree; // art index of subtree (nested SendTable), or -1 if none + u16 nsubs; // number of subtrees (used to short-circuit the runtime search) +} art_leaves[ART_MAXLEAVES]; +static int art_nleaves = 0; +static u16 art_root = ART_NULL; // ServerClasses (which point at SendTables) +static u16 nclasses = 0; // similar short circuit for ServerClasses + +#define VAR_NONE -1 + +// for quick iteration over var names to generate decls header without ART faff +#define MAXDECLS 4096 +static int decls[MAXDECLS]; +static int ndecls = 0; + +static inline int art_newleaf(void) { + if (art_nleaves == ART_MAXLEAVES) die(2, "out of leaf nodes"); + return art_nleaves++; } -static void kv_cb(enum kv_token type, const char *p, uint len, void *ctxt) { - struct parsestate *state = ctxt; - switch_exhaust_enum (kv_token, type) { - case KV_IDENT: case KV_IDENT_QUOTED: - state->lastvar = malloc(len + 1); - if (!state->lastvar) die("couldn't allocate memory"); - memcpy(state->lastvar, p, len); state->lastvar[len] = '\0'; - break; - case KV_NEST_START: badparse(state, "unexpected nested block"); - case KV_NEST_END: badparse(state, "unexpected closing brace"); - case KV_VAL: case KV_VAL_QUOTED:; - struct prop *prop = malloc(sizeof(*prop)); - if (!prop) die("couldn't allocate memory"); - prop->varname = state->lastvar; - char *classname = malloc(len + 1); - if (!classname) die("couldn't allocate memory"); - memcpy(classname, p, len); classname[len] = '\0'; - char *propname = strchr(classname, '/'); - if (!propname) { - badparse(state, "network name not in class/prop format"); +static struct art_lookup_ret { + bool isnew; // true if node created, false if found + u16 leafidx; // index of value (leaf) node +} art_lookup(u16 *art, int soff, int len) { + assume(len > 0); // everything must be null-terminated! + for (;;) { + const uchar *p = (const uchar *)sbase + soff; + u16 cur = *art; + if (cur == ART_NULL) { // append + int new = art_newnode(); + *art = new; + art_firstbytes[new] = *p; + art_cores[new].soff = soff; + art_cores[new].slen = len; + art_cores[new].next = ART_NULL; + int leaf = art_newleaf(); // N.B. firstbyte is already 0 here + art_children[new] = leaf; + return (struct art_lookup_ret){true, leaf}; + } + while (art_firstbytes[cur] == *p) { + int nodelen = art_cores[cur].slen; + int matchlen = 1; + const uchar *q = (const uchar *)sbase + art_cores[cur].soff; + for (; matchlen < nodelen; ++matchlen) { + if (p[matchlen] != q[matchlen]) { + // node and key diverge: split into child nodes + // (left is new part of key string, right is existing tail) + art_cores[cur].slen = matchlen; + int l = art_newnode(), r = art_newnode(); + art_firstbytes[l] = p[matchlen]; + art_cores[l].soff = soff + matchlen; + art_cores[l].slen = len - matchlen; + art_cores[l].next = r; + art_firstbytes[r] = q[matchlen]; + art_cores[r].soff = art_cores[cur].soff + matchlen; + art_cores[r].slen = nodelen - matchlen; + art_cores[r].next = ART_NULL; + art_children[r] = art_children[cur]; + art_children[cur] = l; + int leaf = art_newleaf(); + art_children[l] = leaf; + return (struct art_lookup_ret){true, leaf}; + } } - *propname = '\0'; ++propname; // split! - prop->propname = propname; - struct class *class = skiplist_get_class(&classes, classname); - if (!class) { - class = malloc(sizeof(*class)); - if (!class) die("couldn't allocate memory"); - *class = (struct class){.name = classname}; - skiplist_insert_class(&classes, classname, class); - ++nclasses; + if (matchlen == len) { + // node matches entire key: we have matched an existing entry + return (struct art_lookup_ret){false, art_children[cur]}; } - // (if class is already there just leak classname, no point freeing) - if (!vec_push(&class->props, prop)) die("couldn't append to array"); + // node is substring of key: descend into child nodes + soff += matchlen; + len -= matchlen; + cur = art_children[cur]; // note: this can't be ART_NULL (thus loop) + p = (const uchar *)sbase + soff; // XXX: kinda silly dupe + } + // if we didn't match this node, try the next sibling node. + // if sibling is null, we'll hit the append case above. + art = &art_cores[cur].next; + } +} + +static struct art_leaf *helpgetleaf(u16 *art, const char *s, int len, + const os_char *parsefile, int parseline, u16 *countvar) { + struct art_lookup_ret leaf = art_lookup(art, s - sbase, len); + if (leaf.isnew) { + art_leaves[leaf.leafidx].varstr = VAR_NONE; + art_leaves[leaf.leafidx].subtree = ART_NULL; + ++*countvar; + } + else if (parsefile) { // it's null if an existing entry is allowed + dieparse(parsefile, parseline, "duplicate property name"); + } + return art_leaves + leaf.leafidx; +} + +static inline void handleentry(char *k, char *v, int vlen, + const os_char *file, int line) { + if (ndecls == MAXDECLS) die(2, "out of declaration entries"); + decls[ndecls++] = k - sbase; + char *propname = memchr(v, '/', vlen); + if (!propname) { + dieparse(file, line, "network name not in class/property format"); + } + *propname++ = '\0'; + int sublen = propname - v; + if (sublen > 65535) { + dieparse(file, line, "network class name is far too long"); + } + vlen -= sublen; + struct art_leaf *leaf = helpgetleaf(&art_root, v, sublen, 0, 0, &nclasses); + u16 *subtree = &leaf->subtree; + for (;;) { + if (vlen > 65535) { + dieparse(file, line, "property (SendTable) name is far too long"); + } + char *nextpart = memchr(propname, '/', vlen); + if (!nextpart) { + leaf = helpgetleaf(subtree, propname, vlen, file, line, + &leaf->nsubs); + leaf->varstr = k - sbase; break; - case KV_COND_PREFIX: case KV_COND_SUFFIX: - badparse(state, "unexpected conditional"); + } + *nextpart++ = '\0'; + sublen = nextpart - propname; + leaf = helpgetleaf(subtree, propname, sublen, file, line, &leaf->nsubs); + subtree = &leaf->subtree; + vlen -= sublen; + propname = nextpart; } } -static inline noreturn diewrite(void) { die("couldn't write to file"); } +static void parse(const os_char *file, int len) { + char *s = sbase; // for convenience + if (s[len - 1] != '\n') dieparse(file, 0, "invalid text file (missing EOL)"); + enum { BOL = 0, KEY = 4, KWS = 8, VAL = 12, COM = 16, ERR = -1 }; + static const s8 statetrans[] = { + // layout: any, space|tab, #, \n + [BOL + 0] = KEY, [BOL + 1] = ERR, [BOL + 2] = COM, [BOL + 3] = BOL, + [KEY + 0] = KEY, [KEY + 1] = KWS, [KEY + 2] = ERR, [KEY + 3] = ERR, + [KWS + 0] = VAL, [KWS + 1] = KWS, [KWS + 2] = ERR, [KWS + 3] = ERR, + [VAL + 0] = VAL, [VAL + 1] = VAL, [VAL + 2] = COM, [VAL + 3] = BOL, + [COM + 0] = COM, [COM + 1] = COM, [COM + 2] = COM, [COM + 3] = BOL + }; + char *key, *val; + for (int state = BOL, i = 0, line = 1; i < len; ++i) { + int transidx = state; + switch (s[i]) { + case '\0': dieparse(file, line, "unexpected null byte"); + case ' ': case '\t': transidx += 1; break; + case '#': transidx += 2; break; + case '\n': transidx += 3; + } + int newstate = statetrans[transidx]; + if_cold (newstate == ERR) { + if (state == BOL) dieparse(file, line, "unexpected indentation"); + if (s[i] == '\n') dieparse(file, line, "unexpected end of line"); + dieparse(file, line, "unexpected comment"); + } + switch_exhaust (newstate) { + case KEY: if_cold (state != KEY) key = s + i; break; + case KWS: if_cold (state != KWS) s[i] = '\0'; break; + case VAL: if_cold (state == KWS) val = s + i; break; + case COM: case BOL: + if (state == VAL) { + int j = i; + while (s[j - 1] == ' ' || s[j - 1] == '\t') --j; + s[j] = '\0'; + int vallen = j - (val - s) + 1; + handleentry(key, val, vallen, file, line); + } + } + line += state == BOL; + state = newstate; + } +} -#define _(x) \ - if (fprintf(out, "%s\n", x) < 0) diewrite(); -#define F(f, ...) \ - if (fprintf(out, f "\n", __VA_ARGS__) < 0) diewrite(); +static inline noreturn diewrite(void) { die(100, "couldn't write to file"); } +#define _(x) if (fprintf(out, "%s\n", x) < 0) diewrite(); +#define _i(x) for (int i = 0; i < indent; ++i) fputc('\t', out); _(x) +#define F(f, ...) if (fprintf(out, f "\n", __VA_ARGS__) < 0) diewrite(); +#define Fi(...) for (int i = 0; i < indent; ++i) fputc('\t', out); F(__VA_ARGS__) #define H() \ _( "/* This file is autogenerated by src/build/mkentprops.c. DO NOT EDIT! */") \ _( "") -static void decls(FILE *out) { - for (struct class *c = classes.x[0]; c; c = c->hdr.x[0]) { - for (struct prop **pp = c->props.data; - pp - c->props.data < c->props.sz; ++pp) { -F( "extern bool has_%s;", (*pp)->varname) -F( "extern int %s;", (*pp)->varname) +static void dosendtables(FILE *out, u16 art, int indent) { +_i("switch (*p) {") + while (art != ART_NULL) { + if (art_cores[art].slen != 1) { + const char *tail = sbase + art_cores[art].soff + 1; + int len = art_cores[art].slen - 1; +Fi(" case '%c': if (!strncmp(p + 1, \"%.*s\", %d)) {", +art_firstbytes[art], len, tail, len) + } + else { +Fi(" case '%c': {", art_firstbytes[art]) } + int idx = art_children[art]; + // XXX: kind of a dumb and bad way to distinguish these. okay for now... + if (sbase[art_cores[art].soff + art_cores[art].slen - 1] != '\0') { + dosendtables(out, idx, indent + 1); + } + else { + if (art_leaves[idx].varstr != VAR_NONE) { +_i(" if (mem_loads32(mem_offset(sp, off_SP_type)) != DT_DataTable) {") +Fi(" %s = mem_loads32(mem_offset(sp, off_SP_offset));", +sbase + art_leaves[idx].varstr); +Fi(" --need;") +_i(" }") + } + if (art_leaves[idx].subtree != ART_NULL) { +_i(" if (mem_loads32(mem_offset(sp, off_SP_type)) == DT_DataTable) {") +_i(" const struct SendTable *st = mem_loadptr(mem_offset(sp, off_SP_subtable));") +_i(" // BEGIN SUBTABLE") +Fi(" for (int i = 0, need = %d; i < st->nprops && need; ++i) {", +art_leaves[idx].nsubs) +_i(" const struct SendProp *sp = mem_offset(st->props, sz_SendProp * i);") +_i(" const char *p = mem_loadptr(mem_offset(sp, off_SP_varname));") + dosendtables(out, art_leaves[idx].subtree, indent + 4); +_i(" }") +_i(" // END SUBTABLE") +_i(" }") + } + } +_i(" } break;") + art = art_cores[art].next; } +_i("}") } -static void defs(FILE *out) { - for (struct class *c = classes.x[0]; c; c = c->hdr.x[0]) { - for (struct prop **pp = c->props.data; - pp - c->props.data < c->props.sz; ++pp) { -F( "bool has_%s = false;", (*pp)->varname) -F( "int %s;", (*pp)->varname) +static void doclasses(FILE *out, u16 art, int indent) { +_i("switch (*p) {") + for (; art != ART_NULL; art = art_cores[art].next) { + if (art_cores[art].slen != 1) { + const char *tail = sbase + art_cores[art].soff + 1; + int len = art_cores[art].slen - 1; +Fi(" case '%c': if (!strncmp(p + 1, \"%.*s\", %d)) {", +art_firstbytes[art], len, tail, len) } - } -_( "") -_( "static void initentprops(struct ServerClass *class) {") -F( " for (int needclasses = %d; class; class = class->next) {", nclasses) - char *else1 = ""; - for (struct class *c = classes.x[0]; c; c = c->hdr.x[0]) { - // TODO(opt): some sort of PHF or trie instead of chained strcmp, if we - // ever have more than a few classes/properties? -F( " %sif (!strcmp(class->name, \"%s\")) {", else1, c->name) -_( " struct SendTable *st = class->table;") -F( " int needprops = %d;", c->props.sz) -_( " for (struct SendProp *p = st->props;") -_( " mem_diff(p, st->props) < st->nprops * sz_SendProp;") -_( " p = mem_offset(p, sz_SendProp)) {") -_( " const char *varname = mem_loadptr(mem_offset(p, off_SP_varname));") - char *else2 = ""; - for (struct prop **pp = c->props.data; - pp - c->props.data < c->props.sz; ++pp) { -F( " %sif (!strcmp(varname, \"%s\")) {", else2, (*pp)->propname) -F( " has_%s = true;", (*pp)->varname) -F( " %s = mem_loads32(mem_offset(p, off_SP_offset));", - (*pp)->varname) -_( " if (!--needprops) break;") -_( " }") - else2 = "else "; + else { +Fi(" case '%c': {", art_firstbytes[art]) } -_( " }") -_( " if (!--needclasses) break;") -_( " }") - else1 = "else "; + int idx = art_children[art]; + // XXX: same dumb-and-bad-ness as above. there must be a better way! + if (sbase[art_cores[art].soff + art_cores[art].slen - 1] != '\0') { +Fi(" p += %d;", art_cores[art].slen) + doclasses(out, art_children[art], indent + 2); + } + else { + assume(art_leaves[idx].varstr == VAR_NONE); + assume(art_leaves[idx].subtree != ART_NULL); +_i(" const struct SendTable *st = class->table;") +Fi(" for (int i = 0, need = %d; i < st->nprops && need; ++i) {", +art_leaves[idx].nsubs) + // note: annoyingly long line here, but the generated code gets + // super nested anyway, so there's no point in caring really + // XXX: basically a dupe of dosendtables() - fold into above? +_i(" const struct SendProp *sp = mem_offset(st->props, sz_SendProp * i);") +_i(" const char *p = mem_loadptr(mem_offset(sp, off_SP_varname));") + dosendtables(out, art_leaves[idx].subtree, indent + 3); +_i(" }") + } +_i(" } break;") + } +_i("}") +} + +static void dodecls(FILE *out) { + for (int i = 0; i < ndecls; ++i) { + const char *s = sbase + decls[i]; +F( "extern int %s;", s); +F( "#define has_%s (!!%s)", s, s); // offsets will NEVER be 0, due to vtable! + } +} + +static void doinit(FILE *out) { + for (int i = 0; i < ndecls; ++i) { + const char *s = sbase + decls[i]; +F( "int %s = 0;", s); } +_( "") +_( "static inline void initentprops(const struct ServerClass *class) {") +_( " const char *p = class->name;") +F( " for (int need = %d; need && class; class = class->next) {", nclasses) + doclasses(out, art_root, 2); _( " }") _( "}") } int OS_MAIN(int argc, os_char *argv[]) { - for (++argv; *argv; ++argv) { - int f = os_open_read(*argv); - if (f == -1) die("couldn't open file"); - struct kv_parser kv = {0}; - struct parsestate state = {*argv, &kv}; - char buf[1024]; - int nread; - while (nread = os_read(f, buf, sizeof(buf))) { - if (nread == -1) die("couldn't read file"); - if (!kv_parser_feed(&kv, buf, nread, &kv_cb, &state)) goto ep; - } - if (!kv_parser_done(&kv)) { -ep: fprintf(stderr, "mkentprops: %" fS ":%d:%d: bad syntax: %s\n", - *argv, kv.line, kv.col, kv.errmsg); - exit(1); - } - os_close(f); - } + if (argc != 2) die(1, "wrong number of arguments"); + int f = os_open_read(argv[1]); + if (f == -1) die(100, "couldn't open file"); + vlong len = os_fsize(f); + if (len > 1u << 30 - 1) die(2, "input file is far too large"); + sbase = malloc(len); + if (!sbase) die(100, "couldn't allocate memory"); + if (os_read(f, sbase, len) != len) die(100, "couldn't read file"); + os_close(f); + parse(argv[1], len); FILE *out = fopen(".build/include/entprops.gen.h", "wb"); - if (!out) die("couldn't open entprops.gen.h"); + if (!out) die(100, "couldn't open entprops.gen.h"); H(); - decls(out); + dodecls(out); out = fopen(".build/include/entpropsinit.gen.h", "wb"); - if (!out) die("couldn't open entpropsinit.gen.h"); + if (!out) die(100, "couldn't open entpropsinit.gen.h"); H(); - defs(out); + doinit(out); return 0; } diff --git a/src/build/mkgamedata.c b/src/build/mkgamedata.c index 266a411..1fce1cf 100644 --- a/src/build/mkgamedata.c +++ b/src/build/mkgamedata.c @@ -19,10 +19,8 @@ #include #include "../intdefs.h" -#include "../kv.h" #include "../langext.h" #include "../os.h" -#include "vec.h" #ifdef _WIN32 #define fS "S" @@ -30,214 +28,247 @@ #define fS "s" #endif -static noreturn die(const char *s) { - fprintf(stderr, "mkgamedata: %s\n", s); - exit(100); +static noreturn die(int status, const char *s) { + fprintf(stderr, "mkentprops: %s\n", s); + exit(status); +} + +// concatenated input file contents - string values are indices off this +static char *sbase = 0; + +static const os_char *const *srcnames; +static noreturn dieparse(int file, int line, const char *s) { + fprintf(stderr, "mkentprops: %" fS ":%d: %s\n", srcnames[file], line, s); + exit(2); +} + +#define MAXENTS 32768 +static int tags[MAXENTS]; // varname/gametype +static int exprs[MAXENTS]; +static uchar indents[MAXENTS]; // nesting level +static schar srcfiles[MAXENTS]; +static int srclines[MAXENTS]; +static int nents = 0; + +static inline void handleentry(char *k, char *v, int indent, + int file, int line) { + int previndent = nents ? indents[nents - 1] : -1; // meh + if_cold (indent > previndent + 1) { + dieparse(file, line, "excessive indentation"); + } + if_cold (indent == previndent && !exprs[nents - 1]) { + dieparse(file, line - 1, "missing a value and/or conditional(s)"); + } + if_cold (nents == MAXENTS) die(2, "out of array indices"); + tags[nents] = k - sbase; + exprs[nents] = v - sbase; // will produce garbage for null v. this is fine! + indents[nents] = indent; + srcfiles[nents] = file; + srclines[nents++] = line; } /* - * We keep the gamedata KV format as simple as possible. Default values are + * -- Quick file format documentation! -- + * + * We keep the gamedata format as simple as possible. Default values are * specified as direct key-value pairs: * - * + * * - * Game- or engine-specific values are set using blocks: + * Game- or engine-specific values are set using indented blocks: * - * { ... [default ] } + * + * + * # you can write EOL comments too! + * * * The most complicated it can get is if conditionals are nested, which - * basically translates directly into nested ifs: - * { { } } - * [however many entries...] + * basically translates directly into nested ifs. * - * If that doesn't make sense, just look at one of the existing data files and - * then it should be obvious. :^) - * - * Note: if `default` isn't given in a conditional block, that piece of gamedata - * is considered unavailable and modules that use it won't get initialised/used - * unless all the conditions are met. + * Just be aware that whitespace is significant, and you have to use tabs. + * Any and all future complaints about that decision SHOULD - and MUST - be + * directed to the Python Software Foundation and the authors of the POSIX + * Makefile specification. In that order. */ -struct vec_ent VEC(struct ent *); -struct ent { - const char *name; // (or condition tag, in a child node) - const char *defexpr; - struct vec_ent subents; - struct ent *parent; // to back up a level during parse -}; -// root only contains subents list but it's easier to use the same struct -static struct ent root = {0}; - -struct parsestate { - const os_char *filename; - struct kv_parser *parser; - struct ent *curent; // current ent lol - bool haddefault; // blegh; -}; - -static noreturn badparse(struct parsestate *state, const char *e) { - fprintf(stderr, "mkgamedata: %" fS ":%d:%d: parse error: %s", - state->filename, state->parser->line, state->parser->col, e); - exit(1); -} -static void kv_cb(enum kv_token type, const char *p, uint len, void *ctxt) { - struct parsestate *state = ctxt; - switch (type) { - case KV_IDENT: case KV_IDENT_QUOTED:; - if (len == 7 && !memcmp(p, "default", 7)) { // special case! - if (state->curent == &root) { - badparse(state, "unexpected default keyword at top level"); +static void parse(int file, char *s, int len) { + if (s[len - 1] != '\n') dieparse(file, 0, "invalid text file (missing EOL)"); + enum { BOL = 0, KEY = 4, KWS = 8, VAL = 12, COM = 16, ERR = -1 }; + static const s8 statetrans[] = { + // layout: any, space|tab, #, \n + [BOL + 0] = KEY, [BOL + 1] = BOL, [BOL + 2] = COM, [BOL + 3] = BOL, + [KEY + 0] = KEY, [KEY + 1] = KWS, [KEY + 2] = COM, [KEY + 3] = BOL, + [KWS + 0] = VAL, [KWS + 1] = KWS, [KWS + 2] = COM, [KWS + 3] = BOL, + [VAL + 0] = VAL, [VAL + 1] = VAL, [VAL + 2] = COM, [VAL + 3] = BOL, + [COM + 0] = COM, [COM + 1] = COM, [COM + 2] = COM, [COM + 3] = BOL + }; + char *key, *val = sbase; // 0 index by default (invalid value works as null) + for (int state = BOL, i = 0, line = 1, indent = 0; i < len; ++i) { + int transidx = state; + char c = s[i]; + switch (c) { + case '\0': dieparse(file, line, "unexpected null byte"); + case ' ': + if_cold (state == BOL) { + dieparse(file, line, "unexpected space at start of line"); } - struct ent *e = state->curent; - if (e->defexpr) { - badparse(state, "multiple default keywords"); - } - state->haddefault = true; + case '\t': + transidx += 1; break; - } - state->haddefault = false; - char *k = malloc(len + 1); - if (!k) die("couldn't allocate key string"); - // FIXME(?): should check and prevent duplicate keys probably! - // need table.h or something to avoid O(n^2) :) - memcpy(k, p, len); k[len] = '\0'; - struct ent *e = malloc(sizeof(*e)); - if (!e) die("couldn't allocate memory"); - e->name = k; - e->defexpr = 0; - e->subents = (struct vec_ent){0}; - if (!vec_push(&state->curent->subents, e)) { - die("couldn't append to array"); - } - e->parent = state->curent; - state->curent = e; - break; - case KV_NEST_START: - if (state->haddefault) badparse(state, "default cannot be a block"); - break; - case KV_NEST_END: - if (!state->curent->parent) { - badparse(state, "unexpected closing brace"); - } - state->curent = state->curent->parent; - break; - case KV_VAL: case KV_VAL_QUOTED:; - char *s = malloc(len + 1); - if (!s) die("couldn't allocate value string"); - memcpy(s, p, len); s[len] = '\0'; - state->curent->defexpr = s; - if (!state->haddefault) { - // a non-default value is just a node that itself only has a - // default value. - state->curent = state->curent->parent; - } - break; - case KV_COND_PREFIX: case KV_COND_SUFFIX: - badparse(state, "unexpected conditional"); + case '#': transidx += 2; break; + case '\n': transidx += 3; + } + int newstate = statetrans[transidx]; + switch_exhaust (newstate) { + case KEY: if_cold (state != KEY) key = s + i; break; + case KWS: if_cold (state != KWS) s[i] = '\0'; break; + case VAL: if_cold (state == KWS) val = s + i; break; + case BOL: + indent += state == BOL; + if_cold (indent > 255) { // this shouldn't happen if we're sober + dieparse(file, line, "exceeded max nesting level (255)"); + } + case COM: + if_hot (state != BOL) { + if (state != COM) { // blegh! + int j = i; + while (s[j - 1] == ' ' || s[j - 1] == '\t') --j; + s[j] = '\0'; + handleentry(key, val, indent, file, line); + } + val = sbase; // reset this again + } + } + if_cold (c == '\n') { // ugh, so much for state transitions. + indent = 0; + ++line; + } + state = newstate; } } -static inline noreturn diewrite(void) { die("couldn't write to file"); } - -#define _doindent \ - for (int _indent = 0; _indent < indent; ++_indent) { \ - if (fputs("\t", out) == -1) diewrite(); \ - } -#define _(x) \ - if (fprintf(out, "%s\n", x) < 0) diewrite(); -#define _i(x) _doindent _(x) -#define F(f, ...) \ - if (fprintf(out, f "\n", __VA_ARGS__) < 0) diewrite(); -#define Fi(...) _doindent F(__VA_ARGS__) +static inline noreturn diewrite(void) { die(100, "couldn't write to file"); } +#define _(x) if (fprintf(out, "%s\n", x) < 0) diewrite(); +#define _i(x) for (int i = 0; i < indent; ++i) fputc('\t', out); _(x) +#define F(f, ...) if (fprintf(out, f "\n", __VA_ARGS__) < 0) diewrite(); +#define Fi(...) for (int i = 0; i < indent; ++i) fputc('\t', out); F(__VA_ARGS__) #define H() \ _( "/* This file is autogenerated by src/build/mkgamedata.c. DO NOT EDIT! */") \ _( "") static void decls(FILE *out) { - for (struct ent *const *pp = root.subents.data; - pp - root.subents.data < root.subents.sz; ++pp) { - if ((*pp)->defexpr) { -F( "#define has_%s true", (*pp)->name) - if ((*pp)->subents.sz) { -F( "extern int %s;", (*pp)->name) - } - else { -F( "enum { %s = %s };", (*pp)->name, (*pp)->defexpr) - } + for (int i = 0; i < nents; ++i) { + if (indents[i] != 0) continue; +F( "#line %d \"%" fS "\"", srclines[i], srcnames[srcfiles[i]]) + if (exprs[i]) { // default value is specified - entry always exists + // *technically* this case is redundant - the other has_ macro would + // still work. however, having a distinct case makes the generated + // header a little easier to read at a glance. +F( "#define has_%s 1", sbase + tags[i]) } - else { -F( "extern bool has_%s;", (*pp)->name) -F( "extern int %s;", (*pp)->name) + else { // entry is missing unless a tag is matched + // implementation detail: INT_MIN is reserved for missing gamedata! + // XXX: for max robustness, should probably check for this in input? +F( "#define has_%s (%s != -2147483648)", sbase + tags[i], sbase + tags[i]) + } +F( "#line %d \"%" fS "\"", srclines[i], srcnames[srcfiles[i]]) + if_cold (i == nents - 1 || !indents[i + 1]) { // no tags - it's constant +F( "enum { %s = (%s) };", sbase + tags[i], sbase + exprs[i]) + } + else { // global variable intialised by gamedata_init() call +F( "extern int %s;", sbase + tags[i]); } } } -static void inits(FILE *out, const char *var, struct vec_ent *v, bool needhas, - int indent) { - for (struct ent *const *pp = v->data; pp - v->data < v->sz; ++pp) { -Fi("if (GAMETYPE_MATCHES(%s)) {", (*pp)->name) - if ((*pp)->defexpr) { - if (needhas) { -Fi(" has_%s = true;", var); +static void defs(FILE *out) { + for (int i = 0; i < nents; ++i) { + if (indents[i] != 0) continue; + if_hot (i < nents - 1 && indents[i + 1]) { +F( "#line %d \"%" fS "\"", srclines[i], srcnames[srcfiles[i]]) + if (exprs[i]) { +F( "int %s = (%s);", sbase + tags[i], sbase + exprs[i]) + } + else { +F( "int %s = -2147483648;", sbase + tags[i]) } -Fi(" %s = %s;", var, (*pp)->defexpr); } - inits(out, var, &(*pp)->subents, needhas && !(*pp)->defexpr, indent + 1); -_i("}") } } -static void defs(FILE *out) { - for (struct ent *const *pp = root.subents.data; - pp - root.subents.data < root.subents.sz; ++pp) { - if ((*pp)->defexpr) { - if ((*pp)->subents.sz) { -F( "int %s = %s;", (*pp)->name, (*pp)->defexpr); +static void init(FILE *out) { +_( "void gamedata_init(void) {") + int varidx; + int indent = 0; + for (int i = 0; i < nents; ++i) { + if (indents[i] < indents[i - 1]) { + for (; indent != indents[i]; --indent) { +_i("}") } } + if (indents[i] == 0) { + varidx = i; + continue; + } +F( "#line %d \"%" fS "\"", srclines[i], srcnames[srcfiles[i]]) + if (indents[i] > indents[i - 1]) { +Fi(" if (GAMETYPE_MATCHES(%s)) {", sbase + tags[i]) + ++indent; + } else { -F( "int %s;", (*pp)->name); -F( "bool has_%s = false;", (*pp)->name); +_i("}") +F( "#line %d \"%" fS "\"", srclines[i], srcnames[srcfiles[i]]) +Fi("else if (GAMETYPE_MATCHES(%s)) {", sbase + tags[i]) + } + if (exprs[i]) { +F( "#line %d \"%" fS "\"", srclines[i], srcnames[srcfiles[i]]) +Fi(" %s = (%s);", sbase + tags[varidx], sbase + exprs[i]) } } -_( "") -_( "void gamedata_init(void) {") - for (struct ent *const *pp = root.subents.data; - pp - root.subents.data < root.subents.sz; ++pp) { - inits(out, (*pp)->name, &(*pp)->subents, !(*pp)->defexpr, 1); + for (; indent != 0; --indent) { +_i("}") } _( "}") } int OS_MAIN(int argc, os_char *argv[]) { - for (++argv; *argv; ++argv) { - int fd = os_open_read(*argv); - if (fd == -1) die("couldn't open file"); - struct kv_parser kv = {0}; - struct parsestate state = {*argv, &kv, &root}; - char buf[1024]; - int nread; - while (nread = os_read(fd, buf, sizeof(buf))) { - if (nread == -1) die("couldn't read file"); - if (!kv_parser_feed(&kv, buf, nread, &kv_cb, &state)) goto ep; + srcnames = (const os_char *const *)argv; + int sbase_len = 0, sbase_max = 65536; + sbase = malloc(sbase_max); + if (!sbase) die(100, "couldn't allocate memory"); + int n = 1; + for (++argv; *argv; ++argv, ++n) { + int f = os_open_read(*argv); + if (f == -1) die(100, "couldn't open file"); + vlong len = os_fsize(f); + if (sbase_len + len > 1u << 29) { + die(2, "combined input files are far too large"); } - if (!kv_parser_done(&kv)) { -ep: fprintf(stderr, "mkgamedata: %" fS ":%d:%d: bad syntax: %s\n", - *argv, kv.line, kv.col, kv.errmsg); - exit(1); + if (sbase_len + len > sbase_max) { + fprintf(stderr, "mkgamedata: warning: need to resize string. " + "increase sbase_max to avoid this extra work!\n"); + sbase_max *= 4; + sbase = realloc(sbase, sbase_max); + if (!sbase) die(100, "couldn't grow memory allocation"); } - os_close(fd); + char *s = sbase + sbase_len; + if (os_read(f, s, len) != len) die(100, "couldn't read file"); + os_close(f); + parse(n, s, len); + sbase_len += len; } FILE *out = fopen(".build/include/gamedata.gen.h", "wb"); - if (!out) die("couldn't open gamedata.gen.h"); + if (!out) die(100, "couldn't open gamedata.gen.h"); H(); decls(out); out = fopen(".build/include/gamedatainit.gen.h", "wb"); - if (!out) die("couldn't open gamedatainit.gen.h"); + if (!out) die(100, "couldn't open gamedatainit.gen.h"); H(); defs(out); + _("") + init(out); return 0; } diff --git a/src/build/vec.h b/src/build/vec.h index 6b4dffb..6dfa645 100644 --- a/src/build/vec.h +++ b/src/build/vec.h @@ -62,7 +62,7 @@ static bool _vec_make_room(struct _vec *v, uint tsize, uint addcnt) { // internal: for reuse by vec0 #define _vec_push(v, val, slack) ( \ ((v)->sz + (slack) < (v)->max || \ - _vec_make_room((struct _vec *)(v), sizeof(val), 1)) && \ + _vec_make_room((struct _vec *)(v), sizeof(val), 1)) && /*NOLINT*/ \ ((v)->data[(v)->sz++ - slack] = (val), true) \ ) -- cgit v1.2.3