summaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorMichael Smith <mikesmiffy128@gmail.com>2022-02-24 00:47:05 +0000
committerMichael Smith <mikesmiffy128@gmail.com>2022-03-19 03:51:45 +0000
commit6818b362a776f0cc5a6068ed119dc2ebcbc5a9cc (patch)
treed2f32f226229cdfce0c61540396f4a7d3a4a8ced
parent98378138a521fa52758f1ed3501900e6c323c474 (diff)
Fix some old KV parser issues
- Implement conditionals in the lexer and reject or ignore them in callbacks. This will allow something to use them later if needed. - Make error handling less stupid (return a bool instead of using the state struct).
-rw-r--r--src/build/mkgamedata.c11
-rw-r--r--src/gameinfo.c12
-rw-r--r--src/kv.c169
-rw-r--r--src/kv.h27
-rw-r--r--test/kv.test.c20
5 files changed, 155 insertions, 84 deletions
diff --git a/src/build/mkgamedata.c b/src/build/mkgamedata.c
index ca2e130..e2e59ff 100644
--- a/src/build/mkgamedata.c
+++ b/src/build/mkgamedata.c
@@ -1,5 +1,5 @@
/*
- * Copyright © 2021 Michael Smith <mikesmiffy128@gmail.com>
+ * Copyright © 2022 Michael Smith <mikesmiffy128@gmail.com>
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -144,6 +144,9 @@ static void kv_cb(enum kv_token type, const char *p, uint len, void *ctxt) {
*ents_tail = e;
ents_tail = &e->next;
}
+ break;
+ case KV_COND_PREFIX: case KV_COND_SUFFIX:
+ badparse(state, "unexpected conditional");
}
}
@@ -165,11 +168,9 @@ int OS_MAIN(int argc, os_char *argv[]) {
int nread;
while (nread = read(fd, buf, sizeof(buf))) {
if (nread == -1) die("couldn't read file");
- kv_parser_feed(&kv, buf, nread, &kv_cb, &state);
- if (kv.state == KV_PARSER_ERROR) goto ep;
+ if (!kv_parser_feed(&kv, buf, nread, &kv_cb, &state)) goto ep;
}
- kv_parser_done(&kv);
- if (kv.state == KV_PARSER_ERROR) {
+ if (!kv_parser_done(&kv)) {
ep: fprintf(stderr, "mkgamedata: %" fS ":%d:%d: bad syntax: %s\n",
*argv, kv.line, kv.col, kv.errmsg);
exit(1);
diff --git a/src/gameinfo.c b/src/gameinfo.c
index a5f1a42..4af5df7 100644
--- a/src/gameinfo.c
+++ b/src/gameinfo.c
@@ -1,5 +1,5 @@
/*
- * Copyright © 2021 Michael Smith <mikesmiffy128@gmail.com>
+ * Copyright © 2022 Michael Smith <mikesmiffy128@gmail.com>
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -228,6 +228,10 @@ static void kv_cb(enum kv_token type, const char *p, uint len, void *_ctxt) {
break;
case KV_NEST_END:
if (ctxt->dontcarelvl) --ctxt->dontcarelvl; else --ctxt->nestlvl;
+ break;
+ case KV_COND_PREFIX: case KV_COND_SUFFIX:
+ con_warn("gameinfo: warning: just ignoring conditional \"%.*s\"",
+ len, p);
}
#undef MATCH
}
@@ -353,11 +357,9 @@ bool gameinfo_init(void) {
strerror(errno));
goto e;
}
- kv_parser_feed(&kvp, buf, nread, &kv_cb, &ctxt);
- if (kvp.state == KV_PARSER_ERROR) goto ep;
+ if (!kv_parser_feed(&kvp, buf, nread, &kv_cb, &ctxt)) goto ep;
}
- kv_parser_done(&kvp);
- if (kvp.state == KV_PARSER_ERROR) goto ep;
+ if (!kv_parser_done(&kvp)) goto ep;
close(fd);
return true;
diff --git a/src/kv.c b/src/kv.c
index 8258b16..7ac48e1 100644
--- a/src/kv.c
+++ b/src/kv.c
@@ -1,5 +1,5 @@
/*
- * Copyright © 2021 Michael Smith <mikesmiffy128@gmail.com>
+ * Copyright © 2022 Michael Smith <mikesmiffy128@gmail.com>
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -18,10 +18,22 @@
#include "intdefs.h"
#include "kv.h"
+#include "unreachable.h"
#define EOF -1
-void kv_parser_feed(struct kv_parser *this, const char *in, uint sz,
+// parser states, implemented by STATE() macros in kv_parser_feed() below.
+// needs to be kept in sync!
+enum {
+ ok, ok_slash,
+ ident, ident_slash, identq,
+ sep, sep_slash, condsep, condsep_slash,
+ cond_prefix,
+ val, val_slash, valq, afterval, afterval_slash,
+ cond_suffix
+};
+
+bool kv_parser_feed(struct kv_parser *this, const char *in, uint sz,
kv_parser_cb cb, void *ctxt) {
const char *p = in;
short c;
@@ -34,9 +46,8 @@ void kv_parser_feed(struct kv_parser *this, const char *in, uint sz,
#define INCCOL() (*p == '\n' ? (++this->line, this->col = 0) : ++this->col)
#define READ() (p == in + sz ? EOF : (INCCOL(), *p++))
#define ERROR(s) do { \
- this->state = KV_PARSER_ERROR; \
this->errmsg = s; \
- return; \
+ return false; \
} while (0)
#define OUT(c) do { \
if (this->outp - this->tokbuf == KV_TOKEN_MAX) { \
@@ -48,7 +59,7 @@ void kv_parser_feed(struct kv_parser *this, const char *in, uint sz,
// note: multi-eval
#define IS_WS(c) ((c) == ' ' || (c) == '\t' || (c) == '\n' || (c) == '\r')
#define STATE(s) case s: s
- #define HANDLE_EOF() do { case EOF: return; } while (0)
+ #define HANDLE_EOF() do { case EOF: return true; } while (0)
#define SKIP_COMMENT(next) do { \
this->state = next; \
this->incomment = true; \
@@ -59,29 +70,31 @@ void kv_parser_feed(struct kv_parser *this, const char *in, uint sz,
cb(type, this->tokbuf, this->outp - this->tokbuf, ctxt); \
this->outp = this->tokbuf; \
} while (0)
-
- // parser states, implemented by STATE() macros below
- enum {
- ok,
- ok_slash,
- ident,
- ident_slash,
- identq,
- sep,
- sep_slash,
- val,
- val_slash,
- valq
- };
+ // prefix and suffix conditions are more or less the same, just in different
+ // contexts, because very good syntax yes.
+ #define CONDSTATE(name, type, next) do { \
+ STATE(name): \
+ switch (c = READ()) { \
+ HANDLE_EOF(); \
+ CASE_WS: ERROR("unexpected whitespace in conditional"); \
+ case '[': ERROR("unexpected opening bracket in conditional"); \
+ case '{': case '}': ERROR("unexpected brace in conditional"); \
+ case '/': ERROR("unexpected slash in conditional"); \
+ case ']': CB(type); GOTO(next); \
+ default: OUT(c); goto name; \
+ } \
+ } while (0)
start: // special spaghetti so we don't have a million different comment states
- if (this->incomment) while ((c = READ()) != '\n') if (c == EOF) return;
+ if (this->incomment) while ((c = READ()) != '\n') if (c == EOF) return true;
this->incomment = false;
switch (this->state) {
STATE(ok):
- switch (c = READ()) {
+ c = READ();
+ident_postread:
+ switch (c) {
HANDLE_EOF();
CASE_WS: goto ok;
case '#': ERROR("kv macros not supported");
@@ -94,6 +107,7 @@ STATE(ok):
goto ok;
case '"': GOTO(identq);
case '/': GOTO(ok_slash);
+ case '[': case ']': ERROR("unexpected conditional bracket");
default: GOTO(ident);
}
@@ -101,7 +115,7 @@ STATE(ok_slash):
switch (c = READ()) {
HANDLE_EOF();
case '/': SKIP_COMMENT(ok);
- default: OUT('/'); GOTO(ident);
+ default: GOTO(ident);
}
ident:
@@ -115,10 +129,12 @@ case ident: // continue here
char c_ = c;
cb(KV_NEST_START, &c_, 1, ctxt);
GOTO(ok);
- case '}': case '"': ERROR("unexpected control character");
- CASE_WS:
- CB(KV_IDENT);
- GOTO(sep);
+ // XXX: assuming [ is a token break; haven't checked Valve's code
+ case '[': CB(KV_IDENT); GOTO(cond_prefix);
+ case '}': ERROR("unexpected closing brace");
+ case ']': ERROR("unexpected closing bracket");
+ case '"': ERROR("unexpected quote mark");
+ CASE_WS: CB(KV_IDENT); GOTO(sep);
case '/': GOTO(ident_slash);
default: goto ident;
}
@@ -126,18 +142,14 @@ case ident: // continue here
STATE(ident_slash):
switch (c = READ()) {
HANDLE_EOF();
- case '/':
- CB(KV_IDENT);
- SKIP_COMMENT(sep);
- default: OUT('/'); GOTO(ident);
+ case '/': CB(KV_IDENT); SKIP_COMMENT(sep);
+ default: GOTO(ident);
}
STATE(identq):
switch (c = READ()) {
HANDLE_EOF();
- case '"':
- CB(KV_IDENT_QUOTED);
- GOTO(sep);
+ case '"': CB(KV_IDENT_QUOTED); GOTO(sep);
default: OUT(c); goto identq;
}
@@ -145,14 +157,15 @@ STATE(sep):
do c = READ(); while (IS_WS(c));
switch (c) {
HANDLE_EOF();
- case '[': ERROR("conditionals not supported");
case '{':;
char c_ = c;
++this->nestlvl;
cb(KV_NEST_START, &c_, 1, ctxt);
GOTO(ok);
+ case '[': GOTO(cond_prefix);
case '"': GOTO(valq);
- case '}': ERROR("unexpected control character");
+ case '}': ERROR("unexpected closing brace");
+ case ']': ERROR("unexpected closing bracket");
case '/': GOTO(sep_slash);
default: GOTO(val);
}
@@ -161,7 +174,33 @@ STATE(sep_slash):
switch (c = READ()) {
HANDLE_EOF();
case '/': SKIP_COMMENT(sep);
- default: OUT('/'); GOTO(val);
+ default: GOTO(val);
+ }
+
+CONDSTATE(cond_prefix, KV_COND_PREFIX, condsep);
+
+STATE(condsep):
+ do c = READ(); while (IS_WS(c));
+ switch (c) {
+ HANDLE_EOF();
+ case '{':;
+ char c_ = c;
+ ++this->nestlvl;
+ cb(KV_NEST_START, &c_, 1, ctxt);
+ GOTO(ok);
+ case '}': ERROR("unexpected closing brace");
+ case '[': ERROR("unexpected opening bracket");
+ case ']': ERROR("unexpected closing bracket");
+ case '/': GOTO(condsep_slash);
+ // these conditions only go before braces because very good syntax
+ default: ERROR("unexpected string value after prefix condition");
+ }
+
+STATE(condsep_slash):
+ switch (c = READ()) {
+ HANDLE_EOF();
+ case '/': SKIP_COMMENT(condsep);
+ default: ERROR("unexpected string value after prefix condition");
}
val:
@@ -169,17 +208,18 @@ val:
case val: // continue here
switch (c = READ()) {
HANDLE_EOF();
- case '{': case '"': ERROR("unexpected control character");
- // might get } with no whitespace
+ case '{': ERROR("unexpected opening brace");
+ case ']': ERROR("unexpected closing bracket");
+ case '"': ERROR("unexpected quotation mark");
+ // might get [ or } with no whitespace
case '}':
CB(KV_VAL);
--this->nestlvl;
char c_ = c;
cb(KV_NEST_END, &c_, 1, ctxt);
- GOTO(ok);
- CASE_WS:
- CB(KV_VAL);
- GOTO(ok);
+ GOTO(afterval);
+ case '[': CB(KV_VAL); GOTO(cond_suffix);
+ CASE_WS: CB(KV_VAL); GOTO(afterval);
case '/': GOTO(val_slash);
default: goto val;
}
@@ -187,23 +227,41 @@ case val: // continue here
STATE(val_slash):
switch (c = READ()) {
HANDLE_EOF();
- case '/':
- CB(KV_VAL);
- SKIP_COMMENT(ok);
- default: OUT('/'); GOTO(val);
+ case '/': CB(KV_VAL); SKIP_COMMENT(afterval);
+ default: GOTO(val);
}
STATE(valq):
switch (c = READ()) {
HANDLE_EOF();
- case '"':
- CB(KV_VAL_QUOTED);
- GOTO(ok);
+ case '"': CB(KV_VAL_QUOTED); GOTO(afterval);
default: OUT(c); goto valq;
}
+STATE(afterval):
+ switch (c = READ()) {
+ HANDLE_EOF();
+ CASE_WS: goto afterval;
+ case '[': GOTO(cond_suffix);
+ case '/': GOTO(afterval_slash);
+ // mildly dumb hack: if no conditional, we can just use the regular
+ // starting state handler to get next transition correct - just avoid
+ // double-reading the character
+ default: goto ident_postread;
+ }
+
+STATE(afterval_slash):
+ switch (c = READ()) {
+ HANDLE_EOF();
+ case '/': SKIP_COMMENT(afterval);
+ default: GOTO(ident);
+ }
+
+CONDSTATE(cond_suffix, KV_COND_SUFFIX, ok);
+
}
+ #undef CONDSTATE
#undef CB
#undef GOTO
#undef SKIP_COMMENT
@@ -215,17 +273,20 @@ STATE(valq):
#undef ERROR
#undef READ
#undef INCCOL
+
+ unreachable; // pretty sure!
}
-void kv_parser_done(struct kv_parser *this) {
- if (this->state > 0) {
- this->state = -1;
+bool kv_parser_done(struct kv_parser *this) {
+ if (this->state != ok && this->state != afterval) {
this->errmsg = "unexpected end of input";
+ return false;
}
- else if (this->state == 0 && this->nestlvl != 0) {
- this->state = -1;
+ if (this->nestlvl != 0) {
this->errmsg = "unterminated object (unbalanced braces)";
+ return false;
}
+ return true;
}
// vi: sw=4 ts=4 noet tw=80 cc=80
diff --git a/src/kv.h b/src/kv.h
index 4ed459b..44dc896 100644
--- a/src/kv.h
+++ b/src/kv.h
@@ -34,8 +34,8 @@
*/
struct kv_parser {
ushort line, col; /* the current line and column in the text */
- schar state; /* internal, shouldn't usually be touched directly */
- bool incomment; /* internal */
+ char state : 7; /* internal, shouldn't usually be touched directly */
+ bool incomment : 1; /* internal */
ushort nestlvl; /* internal */
const char *errmsg; /* the error message, *IF* parsing just failed */
@@ -46,8 +46,6 @@ struct kv_parser {
char tokbuf[KV_TOKEN_MAX];
};
-#define KV_PARSER_ERROR -1
-
/*
* These are the tokens that can be received by a kv_parser_cb (below).
* The x-macro and string descriptions are given to allow for easy debug
@@ -61,6 +59,8 @@ struct kv_parser {
X(KV_IDENT_QUOTED, "quoted-ident") \
X(KV_VAL, "value") \
X(KV_VAL_QUOTED, "quoted-value") \
+ X(KV_COND_PREFIX, "cond-prefix") \
+ X(KV_COND_SUFFIX, "cond-suffix") \
X(KV_NEST_START, "object-start") \
X(KV_NEST_END, "object-end")
@@ -76,20 +76,21 @@ typedef void (*kv_parser_cb)(enum kv_token type, const char *p, uint len,
* read in from a file.
*
* The lexer is reentrant and can be fed arbitrarily sized blocks of data at a
- * time. The function may return early in the event of an error; you must check
- * if parser->state == KV_PARSER_ERROR between calls! Continuing to try parsing
- * after an error is undefined.
+ * time. The function may return early in the event of an error; a return value
+ * of false indicates thaat this has happened, otherwise true is returned.
+ *
+ * In the event of an error, the errmsg, line and col fields of the parser
+ * struct can be used for diagnostics.
*/
-// FIXME: revise API usage so errors aren't passed through "state" value
-void kv_parser_feed(struct kv_parser *this, const char *in, uint sz,
+bool kv_parser_feed(struct kv_parser *this, const char *in, uint sz,
kv_parser_cb cb, void *ctxt);
/*
- * This indicates that parsing is done; if the state is midway through a token
- * this will be converted into an error state which can be checked in the same
- * way as noted above.
+ * This indicates that parsing is done; if this is called at an unexpected time,
+ * a parsing error will result; this is indicated in the return value as with
+ * kv_parser_feed.
*/
-void kv_parser_done(struct kv_parser *this);
+bool kv_parser_done(struct kv_parser *this);
#endif
diff --git a/test/kv.test.c b/test/kv.test.c
index cd08d16..12f2801 100644
--- a/test/kv.test.c
+++ b/test/kv.test.c
@@ -23,8 +23,14 @@ static void tokcb(enum kv_token type, const char *p, uint len,
}
static const char data[] =
-"KeyValues {\n\tKey/1\tVal1! \tKey2\nVal2// comment\n\"String Key\"// also comment\nVal3 Key4{ Key5 \"Value Five\" } // one more\n\t\n}"
-;
+"KeyValues {\n\
+ Key/1 Val1![conditional]\n\
+ Key2\n\
+Val2// comment\n\
+ \"String Key\" // also comment\n\
+ Val3 Key4 [conditional!]{ Key5 \"Value Five\" } // one more\n\
+} \n\
+";
static const int sz = sizeof(data) - 1;
TEST("parsing should work with any buffer size", 0) {
@@ -36,12 +42,12 @@ TEST("parsing should work with any buffer size", 0) {
if (chunk * chunksz + thischunk > sz) {
thischunk = sz - chunk * chunksz;
}
- kv_parser_feed(&kvp, data + chunk * chunksz, thischunk,
- tokcb, 0);
- if (kvp.state == KV_PARSER_ERROR) die(&kvp);
+ if (!kv_parser_feed(&kvp, data + chunk * chunksz, thischunk,
+ tokcb, 0)) {
+ die(&kvp);
+ }
}
- kv_parser_done(&kvp);
- if (kvp.state == KV_PARSER_ERROR) die(&kvp);
+ if (!kv_parser_done(&kvp)) die(&kvp);
}
return true;
}