From 6818b362a776f0cc5a6068ed119dc2ebcbc5a9cc Mon Sep 17 00:00:00 2001 From: Michael Smith Date: Thu, 24 Feb 2022 00:47:05 +0000 Subject: Fix some old KV parser issues - Implement conditionals in the lexer and reject or ignore them in callbacks. This will allow something to use them later if needed. - Make error handling less stupid (return a bool instead of using the state struct). --- src/build/mkgamedata.c | 11 ++-- src/gameinfo.c | 12 ++-- src/kv.c | 169 +++++++++++++++++++++++++++++++++---------------- src/kv.h | 27 ++++---- 4 files changed, 142 insertions(+), 77 deletions(-) (limited to 'src') diff --git a/src/build/mkgamedata.c b/src/build/mkgamedata.c index ca2e130..e2e59ff 100644 --- a/src/build/mkgamedata.c +++ b/src/build/mkgamedata.c @@ -1,5 +1,5 @@ /* - * Copyright © 2021 Michael Smith + * Copyright © 2022 Michael Smith * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -144,6 +144,9 @@ static void kv_cb(enum kv_token type, const char *p, uint len, void *ctxt) { *ents_tail = e; ents_tail = &e->next; } + break; + case KV_COND_PREFIX: case KV_COND_SUFFIX: + badparse(state, "unexpected conditional"); } } @@ -165,11 +168,9 @@ int OS_MAIN(int argc, os_char *argv[]) { int nread; while (nread = read(fd, buf, sizeof(buf))) { if (nread == -1) die("couldn't read file"); - kv_parser_feed(&kv, buf, nread, &kv_cb, &state); - if (kv.state == KV_PARSER_ERROR) goto ep; + if (!kv_parser_feed(&kv, buf, nread, &kv_cb, &state)) goto ep; } - kv_parser_done(&kv); - if (kv.state == KV_PARSER_ERROR) { + if (!kv_parser_done(&kv)) { ep: fprintf(stderr, "mkgamedata: %" fS ":%d:%d: bad syntax: %s\n", *argv, kv.line, kv.col, kv.errmsg); exit(1); diff --git a/src/gameinfo.c b/src/gameinfo.c index a5f1a42..4af5df7 100644 --- a/src/gameinfo.c +++ b/src/gameinfo.c @@ -1,5 +1,5 @@ /* - * Copyright © 2021 Michael Smith + * Copyright © 2022 Michael Smith * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -228,6 +228,10 @@ static void kv_cb(enum kv_token type, const char *p, uint len, void *_ctxt) { break; case KV_NEST_END: if (ctxt->dontcarelvl) --ctxt->dontcarelvl; else --ctxt->nestlvl; + break; + case KV_COND_PREFIX: case KV_COND_SUFFIX: + con_warn("gameinfo: warning: just ignoring conditional \"%.*s\"", + len, p); } #undef MATCH } @@ -353,11 +357,9 @@ bool gameinfo_init(void) { strerror(errno)); goto e; } - kv_parser_feed(&kvp, buf, nread, &kv_cb, &ctxt); - if (kvp.state == KV_PARSER_ERROR) goto ep; + if (!kv_parser_feed(&kvp, buf, nread, &kv_cb, &ctxt)) goto ep; } - kv_parser_done(&kvp); - if (kvp.state == KV_PARSER_ERROR) goto ep; + if (!kv_parser_done(&kvp)) goto ep; close(fd); return true; diff --git a/src/kv.c b/src/kv.c index 8258b16..7ac48e1 100644 --- a/src/kv.c +++ b/src/kv.c @@ -1,5 +1,5 @@ /* - * Copyright © 2021 Michael Smith + * Copyright © 2022 Michael Smith * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -18,10 +18,22 @@ #include "intdefs.h" #include "kv.h" +#include "unreachable.h" #define EOF -1 -void kv_parser_feed(struct kv_parser *this, const char *in, uint sz, +// parser states, implemented by STATE() macros in kv_parser_feed() below. +// needs to be kept in sync! +enum { + ok, ok_slash, + ident, ident_slash, identq, + sep, sep_slash, condsep, condsep_slash, + cond_prefix, + val, val_slash, valq, afterval, afterval_slash, + cond_suffix +}; + +bool kv_parser_feed(struct kv_parser *this, const char *in, uint sz, kv_parser_cb cb, void *ctxt) { const char *p = in; short c; @@ -34,9 +46,8 @@ void kv_parser_feed(struct kv_parser *this, const char *in, uint sz, #define INCCOL() (*p == '\n' ? (++this->line, this->col = 0) : ++this->col) #define READ() (p == in + sz ? EOF : (INCCOL(), *p++)) #define ERROR(s) do { \ - this->state = KV_PARSER_ERROR; \ this->errmsg = s; \ - return; \ + return false; \ } while (0) #define OUT(c) do { \ if (this->outp - this->tokbuf == KV_TOKEN_MAX) { \ @@ -48,7 +59,7 @@ void kv_parser_feed(struct kv_parser *this, const char *in, uint sz, // note: multi-eval #define IS_WS(c) ((c) == ' ' || (c) == '\t' || (c) == '\n' || (c) == '\r') #define STATE(s) case s: s - #define HANDLE_EOF() do { case EOF: return; } while (0) + #define HANDLE_EOF() do { case EOF: return true; } while (0) #define SKIP_COMMENT(next) do { \ this->state = next; \ this->incomment = true; \ @@ -59,29 +70,31 @@ void kv_parser_feed(struct kv_parser *this, const char *in, uint sz, cb(type, this->tokbuf, this->outp - this->tokbuf, ctxt); \ this->outp = this->tokbuf; \ } while (0) - - // parser states, implemented by STATE() macros below - enum { - ok, - ok_slash, - ident, - ident_slash, - identq, - sep, - sep_slash, - val, - val_slash, - valq - }; + // prefix and suffix conditions are more or less the same, just in different + // contexts, because very good syntax yes. + #define CONDSTATE(name, type, next) do { \ + STATE(name): \ + switch (c = READ()) { \ + HANDLE_EOF(); \ + CASE_WS: ERROR("unexpected whitespace in conditional"); \ + case '[': ERROR("unexpected opening bracket in conditional"); \ + case '{': case '}': ERROR("unexpected brace in conditional"); \ + case '/': ERROR("unexpected slash in conditional"); \ + case ']': CB(type); GOTO(next); \ + default: OUT(c); goto name; \ + } \ + } while (0) start: // special spaghetti so we don't have a million different comment states - if (this->incomment) while ((c = READ()) != '\n') if (c == EOF) return; + if (this->incomment) while ((c = READ()) != '\n') if (c == EOF) return true; this->incomment = false; switch (this->state) { STATE(ok): - switch (c = READ()) { + c = READ(); +ident_postread: + switch (c) { HANDLE_EOF(); CASE_WS: goto ok; case '#': ERROR("kv macros not supported"); @@ -94,6 +107,7 @@ STATE(ok): goto ok; case '"': GOTO(identq); case '/': GOTO(ok_slash); + case '[': case ']': ERROR("unexpected conditional bracket"); default: GOTO(ident); } @@ -101,7 +115,7 @@ STATE(ok_slash): switch (c = READ()) { HANDLE_EOF(); case '/': SKIP_COMMENT(ok); - default: OUT('/'); GOTO(ident); + default: GOTO(ident); } ident: @@ -115,10 +129,12 @@ case ident: // continue here char c_ = c; cb(KV_NEST_START, &c_, 1, ctxt); GOTO(ok); - case '}': case '"': ERROR("unexpected control character"); - CASE_WS: - CB(KV_IDENT); - GOTO(sep); + // XXX: assuming [ is a token break; haven't checked Valve's code + case '[': CB(KV_IDENT); GOTO(cond_prefix); + case '}': ERROR("unexpected closing brace"); + case ']': ERROR("unexpected closing bracket"); + case '"': ERROR("unexpected quote mark"); + CASE_WS: CB(KV_IDENT); GOTO(sep); case '/': GOTO(ident_slash); default: goto ident; } @@ -126,18 +142,14 @@ case ident: // continue here STATE(ident_slash): switch (c = READ()) { HANDLE_EOF(); - case '/': - CB(KV_IDENT); - SKIP_COMMENT(sep); - default: OUT('/'); GOTO(ident); + case '/': CB(KV_IDENT); SKIP_COMMENT(sep); + default: GOTO(ident); } STATE(identq): switch (c = READ()) { HANDLE_EOF(); - case '"': - CB(KV_IDENT_QUOTED); - GOTO(sep); + case '"': CB(KV_IDENT_QUOTED); GOTO(sep); default: OUT(c); goto identq; } @@ -145,14 +157,15 @@ STATE(sep): do c = READ(); while (IS_WS(c)); switch (c) { HANDLE_EOF(); - case '[': ERROR("conditionals not supported"); case '{':; char c_ = c; ++this->nestlvl; cb(KV_NEST_START, &c_, 1, ctxt); GOTO(ok); + case '[': GOTO(cond_prefix); case '"': GOTO(valq); - case '}': ERROR("unexpected control character"); + case '}': ERROR("unexpected closing brace"); + case ']': ERROR("unexpected closing bracket"); case '/': GOTO(sep_slash); default: GOTO(val); } @@ -161,7 +174,33 @@ STATE(sep_slash): switch (c = READ()) { HANDLE_EOF(); case '/': SKIP_COMMENT(sep); - default: OUT('/'); GOTO(val); + default: GOTO(val); + } + +CONDSTATE(cond_prefix, KV_COND_PREFIX, condsep); + +STATE(condsep): + do c = READ(); while (IS_WS(c)); + switch (c) { + HANDLE_EOF(); + case '{':; + char c_ = c; + ++this->nestlvl; + cb(KV_NEST_START, &c_, 1, ctxt); + GOTO(ok); + case '}': ERROR("unexpected closing brace"); + case '[': ERROR("unexpected opening bracket"); + case ']': ERROR("unexpected closing bracket"); + case '/': GOTO(condsep_slash); + // these conditions only go before braces because very good syntax + default: ERROR("unexpected string value after prefix condition"); + } + +STATE(condsep_slash): + switch (c = READ()) { + HANDLE_EOF(); + case '/': SKIP_COMMENT(condsep); + default: ERROR("unexpected string value after prefix condition"); } val: @@ -169,17 +208,18 @@ val: case val: // continue here switch (c = READ()) { HANDLE_EOF(); - case '{': case '"': ERROR("unexpected control character"); - // might get } with no whitespace + case '{': ERROR("unexpected opening brace"); + case ']': ERROR("unexpected closing bracket"); + case '"': ERROR("unexpected quotation mark"); + // might get [ or } with no whitespace case '}': CB(KV_VAL); --this->nestlvl; char c_ = c; cb(KV_NEST_END, &c_, 1, ctxt); - GOTO(ok); - CASE_WS: - CB(KV_VAL); - GOTO(ok); + GOTO(afterval); + case '[': CB(KV_VAL); GOTO(cond_suffix); + CASE_WS: CB(KV_VAL); GOTO(afterval); case '/': GOTO(val_slash); default: goto val; } @@ -187,23 +227,41 @@ case val: // continue here STATE(val_slash): switch (c = READ()) { HANDLE_EOF(); - case '/': - CB(KV_VAL); - SKIP_COMMENT(ok); - default: OUT('/'); GOTO(val); + case '/': CB(KV_VAL); SKIP_COMMENT(afterval); + default: GOTO(val); } STATE(valq): switch (c = READ()) { HANDLE_EOF(); - case '"': - CB(KV_VAL_QUOTED); - GOTO(ok); + case '"': CB(KV_VAL_QUOTED); GOTO(afterval); default: OUT(c); goto valq; } +STATE(afterval): + switch (c = READ()) { + HANDLE_EOF(); + CASE_WS: goto afterval; + case '[': GOTO(cond_suffix); + case '/': GOTO(afterval_slash); + // mildly dumb hack: if no conditional, we can just use the regular + // starting state handler to get next transition correct - just avoid + // double-reading the character + default: goto ident_postread; + } + +STATE(afterval_slash): + switch (c = READ()) { + HANDLE_EOF(); + case '/': SKIP_COMMENT(afterval); + default: GOTO(ident); + } + +CONDSTATE(cond_suffix, KV_COND_SUFFIX, ok); + } + #undef CONDSTATE #undef CB #undef GOTO #undef SKIP_COMMENT @@ -215,17 +273,20 @@ STATE(valq): #undef ERROR #undef READ #undef INCCOL + + unreachable; // pretty sure! } -void kv_parser_done(struct kv_parser *this) { - if (this->state > 0) { - this->state = -1; +bool kv_parser_done(struct kv_parser *this) { + if (this->state != ok && this->state != afterval) { this->errmsg = "unexpected end of input"; + return false; } - else if (this->state == 0 && this->nestlvl != 0) { - this->state = -1; + if (this->nestlvl != 0) { this->errmsg = "unterminated object (unbalanced braces)"; + return false; } + return true; } // vi: sw=4 ts=4 noet tw=80 cc=80 diff --git a/src/kv.h b/src/kv.h index 4ed459b..44dc896 100644 --- a/src/kv.h +++ b/src/kv.h @@ -34,8 +34,8 @@ */ struct kv_parser { ushort line, col; /* the current line and column in the text */ - schar state; /* internal, shouldn't usually be touched directly */ - bool incomment; /* internal */ + char state : 7; /* internal, shouldn't usually be touched directly */ + bool incomment : 1; /* internal */ ushort nestlvl; /* internal */ const char *errmsg; /* the error message, *IF* parsing just failed */ @@ -46,8 +46,6 @@ struct kv_parser { char tokbuf[KV_TOKEN_MAX]; }; -#define KV_PARSER_ERROR -1 - /* * These are the tokens that can be received by a kv_parser_cb (below). * The x-macro and string descriptions are given to allow for easy debug @@ -61,6 +59,8 @@ struct kv_parser { X(KV_IDENT_QUOTED, "quoted-ident") \ X(KV_VAL, "value") \ X(KV_VAL_QUOTED, "quoted-value") \ + X(KV_COND_PREFIX, "cond-prefix") \ + X(KV_COND_SUFFIX, "cond-suffix") \ X(KV_NEST_START, "object-start") \ X(KV_NEST_END, "object-end") @@ -76,20 +76,21 @@ typedef void (*kv_parser_cb)(enum kv_token type, const char *p, uint len, * read in from a file. * * The lexer is reentrant and can be fed arbitrarily sized blocks of data at a - * time. The function may return early in the event of an error; you must check - * if parser->state == KV_PARSER_ERROR between calls! Continuing to try parsing - * after an error is undefined. + * time. The function may return early in the event of an error; a return value + * of false indicates thaat this has happened, otherwise true is returned. + * + * In the event of an error, the errmsg, line and col fields of the parser + * struct can be used for diagnostics. */ -// FIXME: revise API usage so errors aren't passed through "state" value -void kv_parser_feed(struct kv_parser *this, const char *in, uint sz, +bool kv_parser_feed(struct kv_parser *this, const char *in, uint sz, kv_parser_cb cb, void *ctxt); /* - * This indicates that parsing is done; if the state is midway through a token - * this will be converted into an error state which can be checked in the same - * way as noted above. + * This indicates that parsing is done; if this is called at an unexpected time, + * a parsing error will result; this is indicated in the return value as with + * kv_parser_feed. */ -void kv_parser_done(struct kv_parser *this); +bool kv_parser_done(struct kv_parser *this); #endif -- cgit v1.2.3