From da6f343032cb01597dc7866e66f091adf3243a62 Mon Sep 17 00:00:00 2001 From: Michael Smith Date: Sat, 20 Nov 2021 03:10:50 +0000 Subject: Initial public snapshot With code from Bill. Thanks Bill! --- src/kv.c | 231 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 231 insertions(+) create mode 100644 src/kv.c (limited to 'src/kv.c') diff --git a/src/kv.c b/src/kv.c new file mode 100644 index 0000000..8258b16 --- /dev/null +++ b/src/kv.c @@ -0,0 +1,231 @@ +/* + * Copyright © 2021 Michael Smith + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH + * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, + * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM + * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR + * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THIS SOFTWARE. + */ + +#include + +#include "intdefs.h" +#include "kv.h" + +#define EOF -1 + +void kv_parser_feed(struct kv_parser *this, const char *in, uint sz, + kv_parser_cb cb, void *ctxt) { + const char *p = in; + short c; + + // slight hack, makes init more convenient (just {0}) + if (!this->line) this->line = 1; + if (!this->outp) this->outp = this->tokbuf; + + // this is a big ol' blob of ugly state machine macro spaghetti - too bad! + #define INCCOL() (*p == '\n' ? (++this->line, this->col = 0) : ++this->col) + #define READ() (p == in + sz ? EOF : (INCCOL(), *p++)) + #define ERROR(s) do { \ + this->state = KV_PARSER_ERROR; \ + this->errmsg = s; \ + return; \ + } while (0) + #define OUT(c) do { \ + if (this->outp - this->tokbuf == KV_TOKEN_MAX) { \ + ERROR("token unreasonably large!"); \ + } \ + *this->outp++ = (c); \ + } while (0) + #define CASE_WS case ' ': case '\t': case '\n': case '\r' + // note: multi-eval + #define IS_WS(c) ((c) == ' ' || (c) == '\t' || (c) == '\n' || (c) == '\r') + #define STATE(s) case s: s + #define HANDLE_EOF() do { case EOF: return; } while (0) + #define SKIP_COMMENT(next) do { \ + this->state = next; \ + this->incomment = true; \ + goto start; \ + } while (0) + #define GOTO(s) do { this->state = s; goto s; } while (0) + #define CB(type) do { \ + cb(type, this->tokbuf, this->outp - this->tokbuf, ctxt); \ + this->outp = this->tokbuf; \ + } while (0) + + // parser states, implemented by STATE() macros below + enum { + ok, + ok_slash, + ident, + ident_slash, + identq, + sep, + sep_slash, + val, + val_slash, + valq + }; + +start: // special spaghetti so we don't have a million different comment states + if (this->incomment) while ((c = READ()) != '\n') if (c == EOF) return; + this->incomment = false; + +switch (this->state) { + +STATE(ok): + switch (c = READ()) { + HANDLE_EOF(); + CASE_WS: goto ok; + case '#': ERROR("kv macros not supported"); + case '{': ERROR("unexpected control character"); + case '}': + if (!this->nestlvl) ERROR("too many closing braces"); + --this->nestlvl; + char c_ = c; + cb(KV_NEST_END, &c_, 1, ctxt); + goto ok; + case '"': GOTO(identq); + case '/': GOTO(ok_slash); + default: GOTO(ident); + } + +STATE(ok_slash): + switch (c = READ()) { + HANDLE_EOF(); + case '/': SKIP_COMMENT(ok); + default: OUT('/'); GOTO(ident); + } + +ident: + OUT(c); +case ident: // continue here + switch (c = READ()) { + HANDLE_EOF(); + case '{': + CB(KV_IDENT); + ++this->nestlvl; + char c_ = c; + cb(KV_NEST_START, &c_, 1, ctxt); + GOTO(ok); + case '}': case '"': ERROR("unexpected control character"); + CASE_WS: + CB(KV_IDENT); + GOTO(sep); + case '/': GOTO(ident_slash); + default: goto ident; + } + +STATE(ident_slash): + switch (c = READ()) { + HANDLE_EOF(); + case '/': + CB(KV_IDENT); + SKIP_COMMENT(sep); + default: OUT('/'); GOTO(ident); + } + +STATE(identq): + switch (c = READ()) { + HANDLE_EOF(); + case '"': + CB(KV_IDENT_QUOTED); + GOTO(sep); + default: OUT(c); goto identq; + } + +STATE(sep): + do c = READ(); while (IS_WS(c)); + switch (c) { + HANDLE_EOF(); + case '[': ERROR("conditionals not supported"); + case '{':; + char c_ = c; + ++this->nestlvl; + cb(KV_NEST_START, &c_, 1, ctxt); + GOTO(ok); + case '"': GOTO(valq); + case '}': ERROR("unexpected control character"); + case '/': GOTO(sep_slash); + default: GOTO(val); + } + +STATE(sep_slash): + switch (c = READ()) { + HANDLE_EOF(); + case '/': SKIP_COMMENT(sep); + default: OUT('/'); GOTO(val); + } + +val: + OUT(c); +case val: // continue here + switch (c = READ()) { + HANDLE_EOF(); + case '{': case '"': ERROR("unexpected control character"); + // might get } with no whitespace + case '}': + CB(KV_VAL); + --this->nestlvl; + char c_ = c; + cb(KV_NEST_END, &c_, 1, ctxt); + GOTO(ok); + CASE_WS: + CB(KV_VAL); + GOTO(ok); + case '/': GOTO(val_slash); + default: goto val; + } + +STATE(val_slash): + switch (c = READ()) { + HANDLE_EOF(); + case '/': + CB(KV_VAL); + SKIP_COMMENT(ok); + default: OUT('/'); GOTO(val); + } + +STATE(valq): + switch (c = READ()) { + HANDLE_EOF(); + case '"': + CB(KV_VAL_QUOTED); + GOTO(ok); + default: OUT(c); goto valq; + } + +} + + #undef CB + #undef GOTO + #undef SKIP_COMMENT + #undef HANDLE_EOF + #undef STATE + #undef IS_WS + #undef CASE_WS + #undef OUT + #undef ERROR + #undef READ + #undef INCCOL +} + +void kv_parser_done(struct kv_parser *this) { + if (this->state > 0) { + this->state = -1; + this->errmsg = "unexpected end of input"; + } + else if (this->state == 0 && this->nestlvl != 0) { + this->state = -1; + this->errmsg = "unterminated object (unbalanced braces)"; + } +} + +// vi: sw=4 ts=4 noet tw=80 cc=80 -- cgit v1.2.3