From da6f343032cb01597dc7866e66f091adf3243a62 Mon Sep 17 00:00:00 2001 From: Michael Smith Date: Sat, 20 Nov 2021 03:10:50 +0000 Subject: Initial public snapshot With code from Bill. Thanks Bill! --- src/kv.h | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 src/kv.h (limited to 'src/kv.h') diff --git a/src/kv.h b/src/kv.h new file mode 100644 index 0000000..6de2c67 --- /dev/null +++ b/src/kv.h @@ -0,0 +1,96 @@ +/* + * Copyright © 2021 Michael Smith + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH + * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, + * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM + * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR + * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef INC_KV_H +#define INC_KV_H + +#include + +#include "intdefs.h" + +/* + * Maximum length of a single token. Since this code is trying to avoid dynamic + * memory allocations, this arbitrary limit is chosen to accomodate all known + * "reasonable" tokens likely to come in any real files, probably. + */ +#define KV_TOKEN_MAX 512 + +/* + * Contains all the state associated with parsing (lexing?) a KeyValues file. + * Should be zeroed out prior to the first call (initialise with `= {0};`). + */ +struct kv_parser { + ushort line, col; /* the current line and column in the text */ + schar state; /* internal, shouldn't usually be touched directly */ + bool incomment; /* internal */ + ushort nestlvl; /* internal */ + const char *errmsg; /* the error message, *IF* parsing just failed */ + + // trying to avoid dynamic allocations - valve's own parser seems to have + // a similar limit as well and our use case doesn't really need to worry + // about stupid massive values, so it's fine + char *outp; + char tokbuf[KV_TOKEN_MAX]; +}; + +#define KV_PARSER_ERROR -1 + +/* + * These are the tokens that can be receieved by a kv_parser_cb (below). + * The x-macro and string descriptions are given to allow for easy debug + * stringification. Note that this "parser" is really just lexing out these + * tokens - handling the actual structure of the file should be done in the + * callback. This is so that data can be streamed rather than all read into + * memory at once. + */ +#define KV_TOKENS(X) \ + X(KV_IDENT, "ident") \ + X(KV_IDENT_QUOTED, "quoted-ident") \ + X(KV_VAL, "value") \ + X(KV_VAL_QUOTED, "quoted-value") \ + X(KV_NEST_START, "object-start") \ + X(KV_NEST_END, "object-end") + +#define _ENUM(s, ignore) s, +enum kv_token { KV_TOKENS(_ENUM) }; +#undef _ENUM + +typedef void (*kv_parser_cb)(enum kv_token type, const char *p, uint len, + void *ctxt); + +/* + * Feed a block of text into the lexer. This would usually be a block of data + * read in from a file. + * + * The lexer is reentrant and can be fed arbitrarily sized blocks of data at a + * time. The function may return early in the event of an error; you must check + * if parser->state == KV_PARSER_ERROR between calls! Continuing to try parsing + * after an error is undefined. + */ +// FIXME: revise API usage so errors aren't passed through "state" value +void kv_parser_feed(struct kv_parser *this, const char *in, uint sz, + kv_parser_cb cb, void *ctxt); + +/* + * This indicates that parsing is done; if the state is midway through a token + * this will be converted into an error state which can be checked in the same + * way as noted above. + */ +void kv_parser_done(struct kv_parser *this); + +#endif + +// vi: sw=4 ts=4 noet tw=80 cc=80 -- cgit v1.2.3