diff options
author | Michael Smith <mikesmiffy128@gmail.com> | 2024-08-22 00:02:48 +0100 |
---|---|---|
committer | Michael Smith <mikesmiffy128@gmail.com> | 2024-08-23 20:40:01 +0100 |
commit | cf0354eb8e043fcd9c6c17756701972f948a16f1 (patch) | |
tree | de931afc161f43e95d040ae655a6a2081aeb4ff2 /src/build/mkentprops.c | |
parent | 78323e416f79ef9c26bbd742082627bc45e116c1 (diff) |
Rewrite the gamedata and entprops systems entirely
This removes the horrible janky old KeyValues parser and replaces it
with a couple of trivial ad-hoc text parsers. In doing so, make the
format of the actual gamedata files more human-friendly too.
We also gain support for nested SendTables in mkentprops, which are
required to get at various things like player velocity. And, the actual
string matching is made more efficient (or, at least, more scalable) by
way of a cool radix tree thing which generates a bunch of switch cases
on distinct characters.
Diffstat (limited to 'src/build/mkentprops.c')
-rw-r--r-- | src/build/mkentprops.c | 447 |
1 files changed, 309 insertions, 138 deletions
diff --git a/src/build/mkentprops.c b/src/build/mkentprops.c index fdbb8af..4806996 100644 --- a/src/build/mkentprops.c +++ b/src/build/mkentprops.c @@ -17,13 +17,11 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <sys/stat.h> #include "../intdefs.h" -#include "../kv.h" #include "../langext.h" #include "../os.h" -#include "skiplist.h" -#include "vec.h" #ifdef _WIN32 #define fS "S" @@ -31,176 +29,349 @@ #define fS "s" #endif -static noreturn die(const char *s) { +static noreturn die(int status, const char *s) { fprintf(stderr, "mkentprops: %s\n", s); - exit(100); + exit(status); } - -struct prop { - const char *varname; /* the C global name */ - const char *propname; /* the entity property name */ - struct prop *next; -}; -struct vec_prop VEC(struct prop *); - -DECL_SKIPLIST(static, class, struct class, const char *, 4) -struct class { - const char *name; /* the entity class name */ - struct vec_prop props; - struct skiplist_hdr_class hdr; -}; -static inline int cmp_class(struct class *c, const char *s) { - return strcmp(c->name, s); +static noreturn dieparse(const os_char *file, int line, const char *s) { + fprintf(stderr, "mkentprops: %" fS ":%d: %s\n", file, line, s); + exit(2); } -static inline struct skiplist_hdr_class *hdr_class(struct class *c) { - return &c->hdr; + +static char *sbase; // input file contents - string values are indices off this + +// custom data structure of the day: zero-alloc half-SoA adaptive radix trees(!) + +#define ART_MAXNODES 16384 +static uchar art_firstbytes[ART_MAXNODES]; +static struct art_core { + int soff; // string offset to entire key chunk (including firstbyte) + u16 slen; // number of bytes in key chunk (including firstbyte, >=1) + u16 next; // sibling node (same prefix, different firstbyte) +} art_cores[ART_MAXNODES]; +// if node doesn't end in \0: child node (i.e. key appends more bytes) +// if node DOES end in \0: offset into art_leaves (below) +static u16 art_children[ART_MAXNODES]; +static int art_nnodes = 0; + +#define ART_NULL ((u16)-1) + +static inline int art_newnode(void) { + if (art_nnodes == ART_MAXNODES) die(2, "out of tree nodes"); + return art_nnodes++; } -DEF_SKIPLIST(static, class, cmp_class, hdr_class) -static struct skiplist_hdr_class classes = {0}; -static int nclasses = 0; - -struct parsestate { - const os_char *filename; - struct kv_parser *parser; - char *lastvar; -}; - -static noreturn badparse(struct parsestate *state, const char *e) { - fprintf(stderr, "mkentprops: %" fS ":%d:%d: parse error: %s", - state->filename, state->parser->line, state->parser->col, e); - exit(1); + +#define ART_MAXLEAVES 8192 +static struct art_leaf { + int varstr; // offset of string (generated variable), if any, or -1 if none + u16 subtree; // art index of subtree (nested SendTable), or -1 if none + u16 nsubs; // number of subtrees (used to short-circuit the runtime search) +} art_leaves[ART_MAXLEAVES]; +static int art_nleaves = 0; +static u16 art_root = ART_NULL; // ServerClasses (which point at SendTables) +static u16 nclasses = 0; // similar short circuit for ServerClasses + +#define VAR_NONE -1 + +// for quick iteration over var names to generate decls header without ART faff +#define MAXDECLS 4096 +static int decls[MAXDECLS]; +static int ndecls = 0; + +static inline int art_newleaf(void) { + if (art_nleaves == ART_MAXLEAVES) die(2, "out of leaf nodes"); + return art_nleaves++; } -static void kv_cb(enum kv_token type, const char *p, uint len, void *ctxt) { - struct parsestate *state = ctxt; - switch_exhaust_enum (kv_token, type) { - case KV_IDENT: case KV_IDENT_QUOTED: - state->lastvar = malloc(len + 1); - if (!state->lastvar) die("couldn't allocate memory"); - memcpy(state->lastvar, p, len); state->lastvar[len] = '\0'; - break; - case KV_NEST_START: badparse(state, "unexpected nested block"); - case KV_NEST_END: badparse(state, "unexpected closing brace"); - case KV_VAL: case KV_VAL_QUOTED:; - struct prop *prop = malloc(sizeof(*prop)); - if (!prop) die("couldn't allocate memory"); - prop->varname = state->lastvar; - char *classname = malloc(len + 1); - if (!classname) die("couldn't allocate memory"); - memcpy(classname, p, len); classname[len] = '\0'; - char *propname = strchr(classname, '/'); - if (!propname) { - badparse(state, "network name not in class/prop format"); +static struct art_lookup_ret { + bool isnew; // true if node created, false if found + u16 leafidx; // index of value (leaf) node +} art_lookup(u16 *art, int soff, int len) { + assume(len > 0); // everything must be null-terminated! + for (;;) { + const uchar *p = (const uchar *)sbase + soff; + u16 cur = *art; + if (cur == ART_NULL) { // append + int new = art_newnode(); + *art = new; + art_firstbytes[new] = *p; + art_cores[new].soff = soff; + art_cores[new].slen = len; + art_cores[new].next = ART_NULL; + int leaf = art_newleaf(); // N.B. firstbyte is already 0 here + art_children[new] = leaf; + return (struct art_lookup_ret){true, leaf}; + } + while (art_firstbytes[cur] == *p) { + int nodelen = art_cores[cur].slen; + int matchlen = 1; + const uchar *q = (const uchar *)sbase + art_cores[cur].soff; + for (; matchlen < nodelen; ++matchlen) { + if (p[matchlen] != q[matchlen]) { + // node and key diverge: split into child nodes + // (left is new part of key string, right is existing tail) + art_cores[cur].slen = matchlen; + int l = art_newnode(), r = art_newnode(); + art_firstbytes[l] = p[matchlen]; + art_cores[l].soff = soff + matchlen; + art_cores[l].slen = len - matchlen; + art_cores[l].next = r; + art_firstbytes[r] = q[matchlen]; + art_cores[r].soff = art_cores[cur].soff + matchlen; + art_cores[r].slen = nodelen - matchlen; + art_cores[r].next = ART_NULL; + art_children[r] = art_children[cur]; + art_children[cur] = l; + int leaf = art_newleaf(); + art_children[l] = leaf; + return (struct art_lookup_ret){true, leaf}; + } } - *propname = '\0'; ++propname; // split! - prop->propname = propname; - struct class *class = skiplist_get_class(&classes, classname); - if (!class) { - class = malloc(sizeof(*class)); - if (!class) die("couldn't allocate memory"); - *class = (struct class){.name = classname}; - skiplist_insert_class(&classes, classname, class); - ++nclasses; + if (matchlen == len) { + // node matches entire key: we have matched an existing entry + return (struct art_lookup_ret){false, art_children[cur]}; } - // (if class is already there just leak classname, no point freeing) - if (!vec_push(&class->props, prop)) die("couldn't append to array"); + // node is substring of key: descend into child nodes + soff += matchlen; + len -= matchlen; + cur = art_children[cur]; // note: this can't be ART_NULL (thus loop) + p = (const uchar *)sbase + soff; // XXX: kinda silly dupe + } + // if we didn't match this node, try the next sibling node. + // if sibling is null, we'll hit the append case above. + art = &art_cores[cur].next; + } +} + +static struct art_leaf *helpgetleaf(u16 *art, const char *s, int len, + const os_char *parsefile, int parseline, u16 *countvar) { + struct art_lookup_ret leaf = art_lookup(art, s - sbase, len); + if (leaf.isnew) { + art_leaves[leaf.leafidx].varstr = VAR_NONE; + art_leaves[leaf.leafidx].subtree = ART_NULL; + ++*countvar; + } + else if (parsefile) { // it's null if an existing entry is allowed + dieparse(parsefile, parseline, "duplicate property name"); + } + return art_leaves + leaf.leafidx; +} + +static inline void handleentry(char *k, char *v, int vlen, + const os_char *file, int line) { + if (ndecls == MAXDECLS) die(2, "out of declaration entries"); + decls[ndecls++] = k - sbase; + char *propname = memchr(v, '/', vlen); + if (!propname) { + dieparse(file, line, "network name not in class/property format"); + } + *propname++ = '\0'; + int sublen = propname - v; + if (sublen > 65535) { + dieparse(file, line, "network class name is far too long"); + } + vlen -= sublen; + struct art_leaf *leaf = helpgetleaf(&art_root, v, sublen, 0, 0, &nclasses); + u16 *subtree = &leaf->subtree; + for (;;) { + if (vlen > 65535) { + dieparse(file, line, "property (SendTable) name is far too long"); + } + char *nextpart = memchr(propname, '/', vlen); + if (!nextpart) { + leaf = helpgetleaf(subtree, propname, vlen, file, line, + &leaf->nsubs); + leaf->varstr = k - sbase; break; - case KV_COND_PREFIX: case KV_COND_SUFFIX: - badparse(state, "unexpected conditional"); + } + *nextpart++ = '\0'; + sublen = nextpart - propname; + leaf = helpgetleaf(subtree, propname, sublen, file, line, &leaf->nsubs); + subtree = &leaf->subtree; + vlen -= sublen; + propname = nextpart; } } -static inline noreturn diewrite(void) { die("couldn't write to file"); } +static void parse(const os_char *file, int len) { + char *s = sbase; // for convenience + if (s[len - 1] != '\n') dieparse(file, 0, "invalid text file (missing EOL)"); + enum { BOL = 0, KEY = 4, KWS = 8, VAL = 12, COM = 16, ERR = -1 }; + static const s8 statetrans[] = { + // layout: any, space|tab, #, \n + [BOL + 0] = KEY, [BOL + 1] = ERR, [BOL + 2] = COM, [BOL + 3] = BOL, + [KEY + 0] = KEY, [KEY + 1] = KWS, [KEY + 2] = ERR, [KEY + 3] = ERR, + [KWS + 0] = VAL, [KWS + 1] = KWS, [KWS + 2] = ERR, [KWS + 3] = ERR, + [VAL + 0] = VAL, [VAL + 1] = VAL, [VAL + 2] = COM, [VAL + 3] = BOL, + [COM + 0] = COM, [COM + 1] = COM, [COM + 2] = COM, [COM + 3] = BOL + }; + char *key, *val; + for (int state = BOL, i = 0, line = 1; i < len; ++i) { + int transidx = state; + switch (s[i]) { + case '\0': dieparse(file, line, "unexpected null byte"); + case ' ': case '\t': transidx += 1; break; + case '#': transidx += 2; break; + case '\n': transidx += 3; + } + int newstate = statetrans[transidx]; + if_cold (newstate == ERR) { + if (state == BOL) dieparse(file, line, "unexpected indentation"); + if (s[i] == '\n') dieparse(file, line, "unexpected end of line"); + dieparse(file, line, "unexpected comment"); + } + switch_exhaust (newstate) { + case KEY: if_cold (state != KEY) key = s + i; break; + case KWS: if_cold (state != KWS) s[i] = '\0'; break; + case VAL: if_cold (state == KWS) val = s + i; break; + case COM: case BOL: + if (state == VAL) { + int j = i; + while (s[j - 1] == ' ' || s[j - 1] == '\t') --j; + s[j] = '\0'; + int vallen = j - (val - s) + 1; + handleentry(key, val, vallen, file, line); + } + } + line += state == BOL; + state = newstate; + } +} -#define _(x) \ - if (fprintf(out, "%s\n", x) < 0) diewrite(); -#define F(f, ...) \ - if (fprintf(out, f "\n", __VA_ARGS__) < 0) diewrite(); +static inline noreturn diewrite(void) { die(100, "couldn't write to file"); } +#define _(x) if (fprintf(out, "%s\n", x) < 0) diewrite(); +#define _i(x) for (int i = 0; i < indent; ++i) fputc('\t', out); _(x) +#define F(f, ...) if (fprintf(out, f "\n", __VA_ARGS__) < 0) diewrite(); +#define Fi(...) for (int i = 0; i < indent; ++i) fputc('\t', out); F(__VA_ARGS__) #define H() \ _( "/* This file is autogenerated by src/build/mkentprops.c. DO NOT EDIT! */") \ _( "") -static void decls(FILE *out) { - for (struct class *c = classes.x[0]; c; c = c->hdr.x[0]) { - for (struct prop **pp = c->props.data; - pp - c->props.data < c->props.sz; ++pp) { -F( "extern bool has_%s;", (*pp)->varname) -F( "extern int %s;", (*pp)->varname) +static void dosendtables(FILE *out, u16 art, int indent) { +_i("switch (*p) {") + while (art != ART_NULL) { + if (art_cores[art].slen != 1) { + const char *tail = sbase + art_cores[art].soff + 1; + int len = art_cores[art].slen - 1; +Fi(" case '%c': if (!strncmp(p + 1, \"%.*s\", %d)) {", +art_firstbytes[art], len, tail, len) + } + else { +Fi(" case '%c': {", art_firstbytes[art]) } + int idx = art_children[art]; + // XXX: kind of a dumb and bad way to distinguish these. okay for now... + if (sbase[art_cores[art].soff + art_cores[art].slen - 1] != '\0') { + dosendtables(out, idx, indent + 1); + } + else { + if (art_leaves[idx].varstr != VAR_NONE) { +_i(" if (mem_loads32(mem_offset(sp, off_SP_type)) != DT_DataTable) {") +Fi(" %s = mem_loads32(mem_offset(sp, off_SP_offset));", +sbase + art_leaves[idx].varstr); +Fi(" --need;") +_i(" }") + } + if (art_leaves[idx].subtree != ART_NULL) { +_i(" if (mem_loads32(mem_offset(sp, off_SP_type)) == DT_DataTable) {") +_i(" const struct SendTable *st = mem_loadptr(mem_offset(sp, off_SP_subtable));") +_i(" // BEGIN SUBTABLE") +Fi(" for (int i = 0, need = %d; i < st->nprops && need; ++i) {", +art_leaves[idx].nsubs) +_i(" const struct SendProp *sp = mem_offset(st->props, sz_SendProp * i);") +_i(" const char *p = mem_loadptr(mem_offset(sp, off_SP_varname));") + dosendtables(out, art_leaves[idx].subtree, indent + 4); +_i(" }") +_i(" // END SUBTABLE") +_i(" }") + } + } +_i(" } break;") + art = art_cores[art].next; } +_i("}") } -static void defs(FILE *out) { - for (struct class *c = classes.x[0]; c; c = c->hdr.x[0]) { - for (struct prop **pp = c->props.data; - pp - c->props.data < c->props.sz; ++pp) { -F( "bool has_%s = false;", (*pp)->varname) -F( "int %s;", (*pp)->varname) +static void doclasses(FILE *out, u16 art, int indent) { +_i("switch (*p) {") + for (; art != ART_NULL; art = art_cores[art].next) { + if (art_cores[art].slen != 1) { + const char *tail = sbase + art_cores[art].soff + 1; + int len = art_cores[art].slen - 1; +Fi(" case '%c': if (!strncmp(p + 1, \"%.*s\", %d)) {", +art_firstbytes[art], len, tail, len) } - } -_( "") -_( "static void initentprops(struct ServerClass *class) {") -F( " for (int needclasses = %d; class; class = class->next) {", nclasses) - char *else1 = ""; - for (struct class *c = classes.x[0]; c; c = c->hdr.x[0]) { - // TODO(opt): some sort of PHF or trie instead of chained strcmp, if we - // ever have more than a few classes/properties? -F( " %sif (!strcmp(class->name, \"%s\")) {", else1, c->name) -_( " struct SendTable *st = class->table;") -F( " int needprops = %d;", c->props.sz) -_( " for (struct SendProp *p = st->props;") -_( " mem_diff(p, st->props) < st->nprops * sz_SendProp;") -_( " p = mem_offset(p, sz_SendProp)) {") -_( " const char *varname = mem_loadptr(mem_offset(p, off_SP_varname));") - char *else2 = ""; - for (struct prop **pp = c->props.data; - pp - c->props.data < c->props.sz; ++pp) { -F( " %sif (!strcmp(varname, \"%s\")) {", else2, (*pp)->propname) -F( " has_%s = true;", (*pp)->varname) -F( " %s = mem_loads32(mem_offset(p, off_SP_offset));", - (*pp)->varname) -_( " if (!--needprops) break;") -_( " }") - else2 = "else "; + else { +Fi(" case '%c': {", art_firstbytes[art]) } -_( " }") -_( " if (!--needclasses) break;") -_( " }") - else1 = "else "; + int idx = art_children[art]; + // XXX: same dumb-and-bad-ness as above. there must be a better way! + if (sbase[art_cores[art].soff + art_cores[art].slen - 1] != '\0') { +Fi(" p += %d;", art_cores[art].slen) + doclasses(out, art_children[art], indent + 2); + } + else { + assume(art_leaves[idx].varstr == VAR_NONE); + assume(art_leaves[idx].subtree != ART_NULL); +_i(" const struct SendTable *st = class->table;") +Fi(" for (int i = 0, need = %d; i < st->nprops && need; ++i) {", +art_leaves[idx].nsubs) + // note: annoyingly long line here, but the generated code gets + // super nested anyway, so there's no point in caring really + // XXX: basically a dupe of dosendtables() - fold into above? +_i(" const struct SendProp *sp = mem_offset(st->props, sz_SendProp * i);") +_i(" const char *p = mem_loadptr(mem_offset(sp, off_SP_varname));") + dosendtables(out, art_leaves[idx].subtree, indent + 3); +_i(" }") + } +_i(" } break;") + } +_i("}") +} + +static void dodecls(FILE *out) { + for (int i = 0; i < ndecls; ++i) { + const char *s = sbase + decls[i]; +F( "extern int %s;", s); +F( "#define has_%s (!!%s)", s, s); // offsets will NEVER be 0, due to vtable! + } +} + +static void doinit(FILE *out) { + for (int i = 0; i < ndecls; ++i) { + const char *s = sbase + decls[i]; +F( "int %s = 0;", s); } +_( "") +_( "static inline void initentprops(const struct ServerClass *class) {") +_( " const char *p = class->name;") +F( " for (int need = %d; need && class; class = class->next) {", nclasses) + doclasses(out, art_root, 2); _( " }") _( "}") } int OS_MAIN(int argc, os_char *argv[]) { - for (++argv; *argv; ++argv) { - int f = os_open_read(*argv); - if (f == -1) die("couldn't open file"); - struct kv_parser kv = {0}; - struct parsestate state = {*argv, &kv}; - char buf[1024]; - int nread; - while (nread = os_read(f, buf, sizeof(buf))) { - if (nread == -1) die("couldn't read file"); - if (!kv_parser_feed(&kv, buf, nread, &kv_cb, &state)) goto ep; - } - if (!kv_parser_done(&kv)) { -ep: fprintf(stderr, "mkentprops: %" fS ":%d:%d: bad syntax: %s\n", - *argv, kv.line, kv.col, kv.errmsg); - exit(1); - } - os_close(f); - } + if (argc != 2) die(1, "wrong number of arguments"); + int f = os_open_read(argv[1]); + if (f == -1) die(100, "couldn't open file"); + vlong len = os_fsize(f); + if (len > 1u << 30 - 1) die(2, "input file is far too large"); + sbase = malloc(len); + if (!sbase) die(100, "couldn't allocate memory"); + if (os_read(f, sbase, len) != len) die(100, "couldn't read file"); + os_close(f); + parse(argv[1], len); FILE *out = fopen(".build/include/entprops.gen.h", "wb"); - if (!out) die("couldn't open entprops.gen.h"); + if (!out) die(100, "couldn't open entprops.gen.h"); H(); - decls(out); + dodecls(out); out = fopen(".build/include/entpropsinit.gen.h", "wb"); - if (!out) die("couldn't open entpropsinit.gen.h"); + if (!out) die(100, "couldn't open entpropsinit.gen.h"); H(); - defs(out); + doinit(out); return 0; } |