diff options
Diffstat (limited to 'src/3p/chibicc/preprocess.c')
-rw-r--r-- | src/3p/chibicc/preprocess.c | 1208 |
1 files changed, 0 insertions, 1208 deletions
diff --git a/src/3p/chibicc/preprocess.c b/src/3p/chibicc/preprocess.c deleted file mode 100644 index cd8d1d8..0000000 --- a/src/3p/chibicc/preprocess.c +++ /dev/null @@ -1,1208 +0,0 @@ -// This file implements the C preprocessor. -// -// The preprocessor takes a list of tokens as an input and returns a -// new list of tokens as an output. -// -// The preprocessing language is designed in such a way that that's -// guaranteed to stop even if there is a recursive macro. -// Informally speaking, a macro is applied only once for each token. -// That is, if a macro token T appears in a result of direct or -// indirect macro expansion of T, T won't be expanded any further. -// For example, if T is defined as U, and U is defined as T, then -// token T is expanded to U and then to T and the macro expansion -// stops at that point. -// -// To achieve the above behavior, we attach for each token a set of -// macro names from which the token is expanded. The set is called -// "hideset". Hideset is initially empty, and every time we expand a -// macro, the macro name is added to the resulting tokens' hidesets. -// -// The above macro expansion algorithm is explained in this document -// written by Dave Prossor, which is used as a basis for the -// standard's wording: -// https://github.com/rui314/chibicc/wiki/cpp.algo.pdf - -#include "chibicc.h" - -typedef struct MacroParam MacroParam; -struct MacroParam { - MacroParam *next; - char *name; -}; - -typedef struct MacroArg MacroArg; -struct MacroArg { - MacroArg *next; - char *name; - bool is_va_args; - Token *tok; -}; - -typedef Token *macro_handler_fn(Token *); - -typedef struct Macro Macro; -struct Macro { - char *name; - bool is_objlike; // Object-like or function-like - MacroParam *params; - char *va_args_name; - Token *body; - macro_handler_fn *handler; -}; - -// `#if` can be nested, so we use a stack to manage nested `#if`s. -typedef struct CondIncl CondIncl; -struct CondIncl { - CondIncl *next; - enum { IN_THEN, IN_ELIF, IN_ELSE } ctx; - Token *tok; - bool included; -}; - -typedef struct Hideset Hideset; -struct Hideset { - Hideset *next; - char *name; -}; - -static HashMap macros; -static CondIncl *cond_incl; -static HashMap pragma_once; -static int include_next_idx; - -static Token *preprocess2(Token *tok); -static Macro *find_macro(Token *tok); - -static bool is_hash(Token *tok) { - return tok->at_bol && equal(tok, "#"); -} - -// Some preprocessor directives such as #include allow extraneous -// tokens before newline. This function skips such tokens. -static Token *skip_line(Token *tok) { - if (tok->at_bol) - return tok; - warn_tok(tok, "extra token"); - while (!tok->at_bol) - tok = tok->next; - return tok; -} - -static Token *copy_token(Token *tok) { - Token *t = calloc(1, sizeof(Token)); - *t = *tok; - t->next = NULL; - return t; -} - -static Token *new_eof(Token *tok) { - Token *t = copy_token(tok); - t->kind = TK_EOF; - t->len = 0; - return t; -} - -static Hideset *new_hideset(char *name) { - Hideset *hs = calloc(1, sizeof(Hideset)); - hs->name = name; - return hs; -} - -static Hideset *hideset_union(Hideset *hs1, Hideset *hs2) { - Hideset head = {}; - Hideset *cur = &head; - - for (; hs1; hs1 = hs1->next) - cur = cur->next = new_hideset(hs1->name); - cur->next = hs2; - return head.next; -} - -static bool hideset_contains(Hideset *hs, char *s, int len) { - for (; hs; hs = hs->next) - if (strlen(hs->name) == len && !strncmp(hs->name, s, len)) - return true; - return false; -} - -static Hideset *hideset_intersection(Hideset *hs1, Hideset *hs2) { - Hideset head = {}; - Hideset *cur = &head; - - for (; hs1; hs1 = hs1->next) - if (hideset_contains(hs2, hs1->name, strlen(hs1->name))) - cur = cur->next = new_hideset(hs1->name); - return head.next; -} - -static Token *add_hideset(Token *tok, Hideset *hs) { - Token head = {}; - Token *cur = &head; - - for (; tok; tok = tok->next) { - Token *t = copy_token(tok); - t->hideset = hideset_union(t->hideset, hs); - cur = cur->next = t; - } - return head.next; -} - -// Append tok2 to the end of tok1. -static Token *append(Token *tok1, Token *tok2) { - if (tok1->kind == TK_EOF) - return tok2; - - Token head = {}; - Token *cur = &head; - - for (; tok1->kind != TK_EOF; tok1 = tok1->next) - cur = cur->next = copy_token(tok1); - cur->next = tok2; - return head.next; -} - -static Token *skip_cond_incl2(Token *tok) { - while (tok->kind != TK_EOF) { - if (is_hash(tok) && - (equal(tok->next, "if") || equal(tok->next, "ifdef") || - equal(tok->next, "ifndef"))) { - tok = skip_cond_incl2(tok->next->next); - continue; - } - if (is_hash(tok) && equal(tok->next, "endif")) - return tok->next->next; - tok = tok->next; - } - return tok; -} - -// Skip until next `#else`, `#elif` or `#endif`. -// Nested `#if` and `#endif` are skipped. -static Token *skip_cond_incl(Token *tok) { - while (tok->kind != TK_EOF) { - if (is_hash(tok) && - (equal(tok->next, "if") || equal(tok->next, "ifdef") || - equal(tok->next, "ifndef"))) { - tok = skip_cond_incl2(tok->next->next); - continue; - } - - if (is_hash(tok) && - (equal(tok->next, "elif") || equal(tok->next, "else") || - equal(tok->next, "endif"))) - break; - tok = tok->next; - } - return tok; -} - -// Double-quote a given string and returns it. -static char *quote_string(char *str) { - int bufsize = 3; - for (int i = 0; str[i]; i++) { - if (str[i] == '\\' || str[i] == '"') - bufsize++; - bufsize++; - } - - char *buf = calloc(1, bufsize); - char *p = buf; - *p++ = '"'; - for (int i = 0; str[i]; i++) { - if (str[i] == '\\' || str[i] == '"') - *p++ = '\\'; - *p++ = str[i]; - } - *p++ = '"'; - *p++ = '\0'; - return buf; -} - -static Token *new_str_token(char *str, Token *tmpl) { - char *buf = quote_string(str); - return tokenize(new_file(tmpl->file->name, tmpl->file->file_no, buf)); -} - -// Copy all tokens until the next newline, terminate them with -// an EOF token and then returns them. This function is used to -// create a new list of tokens for `#if` arguments. -static Token *copy_line(Token **rest, Token *tok) { - Token head = {}; - Token *cur = &head; - - for (; !tok->at_bol; tok = tok->next) - cur = cur->next = copy_token(tok); - - cur->next = new_eof(tok); - *rest = tok; - return head.next; -} - -static Token *new_num_token(int val, Token *tmpl) { - char *buf = format("%d\n", val); - return tokenize(new_file(tmpl->file->name, tmpl->file->file_no, buf)); -} - -static Token *read_const_expr(Token **rest, Token *tok) { - tok = copy_line(rest, tok); - - Token head = {}; - Token *cur = &head; - - while (tok->kind != TK_EOF) { - // "defined(foo)" or "defined foo" becomes "1" if macro "foo" - // is defined. Otherwise "0". - if (equal(tok, "defined")) { - Token *start = tok; - bool has_paren = consume(&tok, tok->next, "("); - - if (tok->kind != TK_IDENT) - error_tok(start, "macro name must be an identifier"); - Macro *m = find_macro(tok); - tok = tok->next; - - if (has_paren) - tok = skip(tok, ")"); - - cur = cur->next = new_num_token(m ? 1 : 0, start); - continue; - } - - cur = cur->next = tok; - tok = tok->next; - } - - cur->next = tok; - return head.next; -} - -// Read and evaluate a constant expression. -static long eval_const_expr(Token **rest, Token *tok) { - Token *start = tok; - Token *expr = read_const_expr(rest, tok->next); - expr = preprocess2(expr); - - if (expr->kind == TK_EOF) - error_tok(start, "no expression"); - - // [https://www.sigbus.info/n1570#6.10.1p4] The standard requires - // we replace remaining non-macro identifiers with "0" before - // evaluating a constant expression. For example, `#if foo` is - // equivalent to `#if 0` if foo is not defined. - for (Token *t = expr; t->kind != TK_EOF; t = t->next) { - if (t->kind == TK_IDENT) { - Token *next = t->next; - *t = *new_num_token(0, t); - t->next = next; - } - } - - // Convert pp-numbers to regular numbers - convert_pp_tokens(expr); - - Token *rest2; - long val = const_expr(&rest2, expr); - if (rest2->kind != TK_EOF) - error_tok(rest2, "extra token"); - return val; -} - -static CondIncl *push_cond_incl(Token *tok, bool included) { - CondIncl *ci = calloc(1, sizeof(CondIncl)); - ci->next = cond_incl; - ci->ctx = IN_THEN; - ci->tok = tok; - ci->included = included; - cond_incl = ci; - return ci; -} - -static Macro *find_macro(Token *tok) { - if (tok->kind != TK_IDENT) - return NULL; - return hashmap_get2(¯os, tok->loc, tok->len); -} - -static Macro *add_macro(char *name, bool is_objlike, Token *body) { - Macro *m = calloc(1, sizeof(Macro)); - m->name = name; - m->is_objlike = is_objlike; - m->body = body; - hashmap_put(¯os, name, m); - return m; -} - -static MacroParam *read_macro_params(Token **rest, Token *tok, char **va_args_name) { - MacroParam head = {}; - MacroParam *cur = &head; - - while (!equal(tok, ")")) { - if (cur != &head) - tok = skip(tok, ","); - - if (equal(tok, "...")) { - *va_args_name = "__VA_ARGS__"; - *rest = skip(tok->next, ")"); - return head.next; - } - - if (tok->kind != TK_IDENT) - error_tok(tok, "expected an identifier"); - - if (equal(tok->next, "...")) { - *va_args_name = strndup(tok->loc, tok->len); - *rest = skip(tok->next->next, ")"); - return head.next; - } - - MacroParam *m = calloc(1, sizeof(MacroParam)); - m->name = strndup(tok->loc, tok->len); - cur = cur->next = m; - tok = tok->next; - } - - *rest = tok->next; - return head.next; -} - -static void read_macro_definition(Token **rest, Token *tok) { - if (tok->kind != TK_IDENT) - error_tok(tok, "macro name must be an identifier"); - char *name = strndup(tok->loc, tok->len); - tok = tok->next; - - if (!tok->has_space && equal(tok, "(")) { - // Function-like macro - char *va_args_name = NULL; - MacroParam *params = read_macro_params(&tok, tok->next, &va_args_name); - - Macro *m = add_macro(name, false, copy_line(rest, tok)); - m->params = params; - m->va_args_name = va_args_name; - } else { - // Object-like macro - add_macro(name, true, copy_line(rest, tok)); - } -} - -static MacroArg *read_macro_arg_one(Token **rest, Token *tok, bool read_rest) { - Token head = {}; - Token *cur = &head; - int level = 0; - - for (;;) { - if (level == 0 && equal(tok, ")")) - break; - if (level == 0 && !read_rest && equal(tok, ",")) - break; - - if (tok->kind == TK_EOF) - error_tok(tok, "premature end of input"); - - if (equal(tok, "(")) - level++; - else if (equal(tok, ")")) - level--; - - cur = cur->next = copy_token(tok); - tok = tok->next; - } - - cur->next = new_eof(tok); - - MacroArg *arg = calloc(1, sizeof(MacroArg)); - arg->tok = head.next; - *rest = tok; - return arg; -} - -static MacroArg * -read_macro_args(Token **rest, Token *tok, MacroParam *params, char *va_args_name) { - Token *start = tok; - tok = tok->next->next; - - MacroArg head = {}; - MacroArg *cur = &head; - - MacroParam *pp = params; - for (; pp; pp = pp->next) { - if (cur != &head) - tok = skip(tok, ","); - cur = cur->next = read_macro_arg_one(&tok, tok, false); - cur->name = pp->name; - } - - if (va_args_name) { - MacroArg *arg; - if (equal(tok, ")")) { - arg = calloc(1, sizeof(MacroArg)); - arg->tok = new_eof(tok); - } else { - if (pp != params) - tok = skip(tok, ","); - arg = read_macro_arg_one(&tok, tok, true); - } - arg->name = va_args_name;; - arg->is_va_args = true; - cur = cur->next = arg; - } else if (pp) { - error_tok(start, "too many arguments"); - } - - skip(tok, ")"); - *rest = tok; - return head.next; -} - -static MacroArg *find_arg(MacroArg *args, Token *tok) { - for (MacroArg *ap = args; ap; ap = ap->next) - if (tok->len == strlen(ap->name) && !strncmp(tok->loc, ap->name, tok->len)) - return ap; - return NULL; -} - -// Concatenates all tokens in `tok` and returns a new string. -static char *join_tokens(Token *tok, Token *end) { - // Compute the length of the resulting token. - int len = 1; - for (Token *t = tok; t != end && t->kind != TK_EOF; t = t->next) { - if (t != tok && t->has_space) - len++; - len += t->len; - } - - char *buf = calloc(1, len); - - // Copy token texts. - int pos = 0; - for (Token *t = tok; t != end && t->kind != TK_EOF; t = t->next) { - if (t != tok && t->has_space) - buf[pos++] = ' '; - strncpy(buf + pos, t->loc, t->len); - pos += t->len; - } - buf[pos] = '\0'; - return buf; -} - -// Concatenates all tokens in `arg` and returns a new string token. -// This function is used for the stringizing operator (#). -static Token *stringize(Token *hash, Token *arg) { - // Create a new string token. We need to set some value to its - // source location for error reporting function, so we use a macro - // name token as a template. - char *s = join_tokens(arg, NULL); - return new_str_token(s, hash); -} - -// Concatenate two tokens to create a new token. -static Token *paste(Token *lhs, Token *rhs) { - // Paste the two tokens. - char *buf = format("%.*s%.*s", lhs->len, lhs->loc, rhs->len, rhs->loc); - - // Tokenize the resulting string. - Token *tok = tokenize(new_file(lhs->file->name, lhs->file->file_no, buf)); - if (tok->next->kind != TK_EOF) - error_tok(lhs, "pasting forms '%s', an invalid token", buf); - return tok; -} - -static bool has_varargs(MacroArg *args) { - for (MacroArg *ap = args; ap; ap = ap->next) - if (!strcmp(ap->name, "__VA_ARGS__")) - return ap->tok->kind != TK_EOF; - return false; -} - -// Replace func-like macro parameters with given arguments. -static Token *subst(Token *tok, MacroArg *args) { - Token head = {}; - Token *cur = &head; - - while (tok->kind != TK_EOF) { - // "#" followed by a parameter is replaced with stringized actuals. - if (equal(tok, "#")) { - MacroArg *arg = find_arg(args, tok->next); - if (!arg) - error_tok(tok->next, "'#' is not followed by a macro parameter"); - cur = cur->next = stringize(tok, arg->tok); - tok = tok->next->next; - continue; - } - - // [GNU] If __VA_ARG__ is empty, `,##__VA_ARGS__` is expanded - // to the empty token list. Otherwise, its expaned to `,` and - // __VA_ARGS__. - if (equal(tok, ",") && equal(tok->next, "##")) { - MacroArg *arg = find_arg(args, tok->next->next); - if (arg && arg->is_va_args) { - if (arg->tok->kind == TK_EOF) { - tok = tok->next->next->next; - } else { - cur = cur->next = copy_token(tok); - tok = tok->next->next; - } - continue; - } - } - - if (equal(tok, "##")) { - if (cur == &head) - error_tok(tok, "'##' cannot appear at start of macro expansion"); - - if (tok->next->kind == TK_EOF) - error_tok(tok, "'##' cannot appear at end of macro expansion"); - - MacroArg *arg = find_arg(args, tok->next); - if (arg) { - if (arg->tok->kind != TK_EOF) { - *cur = *paste(cur, arg->tok); - for (Token *t = arg->tok->next; t->kind != TK_EOF; t = t->next) - cur = cur->next = copy_token(t); - } - tok = tok->next->next; - continue; - } - - *cur = *paste(cur, tok->next); - tok = tok->next->next; - continue; - } - - MacroArg *arg = find_arg(args, tok); - - if (arg && equal(tok->next, "##")) { - Token *rhs = tok->next->next; - - if (arg->tok->kind == TK_EOF) { - MacroArg *arg2 = find_arg(args, rhs); - if (arg2) { - for (Token *t = arg2->tok; t->kind != TK_EOF; t = t->next) - cur = cur->next = copy_token(t); - } else { - cur = cur->next = copy_token(rhs); - } - tok = rhs->next; - continue; - } - - for (Token *t = arg->tok; t->kind != TK_EOF; t = t->next) - cur = cur->next = copy_token(t); - tok = tok->next; - continue; - } - - // If __VA_ARG__ is empty, __VA_OPT__(x) is expanded to the - // empty token list. Otherwise, __VA_OPT__(x) is expanded to x. - if (equal(tok, "__VA_OPT__") && equal(tok->next, "(")) { - MacroArg *arg = read_macro_arg_one(&tok, tok->next->next, true); - if (has_varargs(args)) - for (Token *t = arg->tok; t->kind != TK_EOF; t = t->next) - cur = cur->next = t; - tok = skip(tok, ")"); - continue; - } - - // Handle a macro token. Macro arguments are completely macro-expanded - // before they are substituted into a macro body. - if (arg) { - Token *t = preprocess2(arg->tok); - t->at_bol = tok->at_bol; - t->has_space = tok->has_space; - for (; t->kind != TK_EOF; t = t->next) - cur = cur->next = copy_token(t); - tok = tok->next; - continue; - } - - // Handle a non-macro token. - cur = cur->next = copy_token(tok); - tok = tok->next; - continue; - } - - cur->next = tok; - return head.next; -} - -// If tok is a macro, expand it and return true. -// Otherwise, do nothing and return false. -static bool expand_macro(Token **rest, Token *tok) { - if (hideset_contains(tok->hideset, tok->loc, tok->len)) - return false; - - Macro *m = find_macro(tok); - if (!m) - return false; - - // Built-in dynamic macro application such as __LINE__ - if (m->handler) { - *rest = m->handler(tok); - (*rest)->next = tok->next; - return true; - } - - // Object-like macro application - if (m->is_objlike) { - Hideset *hs = hideset_union(tok->hideset, new_hideset(m->name)); - Token *body = add_hideset(m->body, hs); - for (Token *t = body; t->kind != TK_EOF; t = t->next) - t->origin = tok; - *rest = append(body, tok->next); - (*rest)->at_bol = tok->at_bol; - (*rest)->has_space = tok->has_space; - return true; - } - - // If a funclike macro token is not followed by an argument list, - // treat it as a normal identifier. - if (!equal(tok->next, "(")) - return false; - - // Function-like macro application - Token *macro_token = tok; - MacroArg *args = read_macro_args(&tok, tok, m->params, m->va_args_name); - Token *rparen = tok; - - // Tokens that consist a func-like macro invocation may have different - // hidesets, and if that's the case, it's not clear what the hideset - // for the new tokens should be. We take the interesection of the - // macro token and the closing parenthesis and use it as a new hideset - // as explained in the Dave Prossor's algorithm. - Hideset *hs = hideset_intersection(macro_token->hideset, rparen->hideset); - hs = hideset_union(hs, new_hideset(m->name)); - - Token *body = subst(m->body, args); - body = add_hideset(body, hs); - for (Token *t = body; t->kind != TK_EOF; t = t->next) - t->origin = macro_token; - *rest = append(body, tok->next); - (*rest)->at_bol = macro_token->at_bol; - (*rest)->has_space = macro_token->has_space; - return true; -} - -char *search_include_paths(char *filename) { - if (filename[0] == '/') - return filename; - - static HashMap cache; - char *cached = hashmap_get(&cache, filename); - if (cached) - return cached; - - // Search a file from the include paths. - for (int i = 0; i < include_paths.len; i++) { - char *path = format("%s/%s", include_paths.data[i], filename); - if (!file_exists(path)) - continue; - hashmap_put(&cache, filename, path); - include_next_idx = i + 1; - return path; - } - return NULL; -} - -static char *search_include_next(char *filename) { - for (; include_next_idx < include_paths.len; include_next_idx++) { - char *path = format("%s/%s", include_paths.data[include_next_idx], filename); - if (file_exists(path)) - return path; - } - return NULL; -} - -// Read an #include argument. -static char *read_include_filename(Token **rest, Token *tok, bool *is_dquote) { - // Pattern 1: #include "foo.h" - if (tok->kind == TK_STR) { - // A double-quoted filename for #include is a special kind of - // token, and we don't want to interpret any escape sequences in it. - // For example, "\f" in "C:\foo" is not a formfeed character but - // just two non-control characters, backslash and f. - // So we don't want to use token->str. - *is_dquote = true; - *rest = skip_line(tok->next); - return strndup(tok->loc + 1, tok->len - 2); - } - - // Pattern 2: #include <foo.h> - if (equal(tok, "<")) { - // Reconstruct a filename from a sequence of tokens between - // "<" and ">". - Token *start = tok; - - // Find closing ">". - for (; !equal(tok, ">"); tok = tok->next) - if (tok->at_bol || tok->kind == TK_EOF) - error_tok(tok, "expected '>'"); - - *is_dquote = false; - *rest = skip_line(tok->next); - return join_tokens(start->next, tok); - } - - // Pattern 3: #include FOO - // In this case FOO must be macro-expanded to either - // a single string token or a sequence of "<" ... ">". - if (tok->kind == TK_IDENT) { - Token *tok2 = preprocess2(copy_line(rest, tok)); - return read_include_filename(&tok2, tok2, is_dquote); - } - - error_tok(tok, "expected a filename"); -} - -// Detect the following "include guard" pattern. -// -// #ifndef FOO_H -// #define FOO_H -// ... -// #endif -static char *detect_include_guard(Token *tok) { - // Detect the first two lines. - if (!is_hash(tok) || !equal(tok->next, "ifndef")) - return NULL; - tok = tok->next->next; - - if (tok->kind != TK_IDENT) - return NULL; - - char *macro = strndup(tok->loc, tok->len); - tok = tok->next; - - if (!is_hash(tok) || !equal(tok->next, "define") || !equal(tok->next->next, macro)) - return NULL; - - // Read until the end of the file. - while (tok->kind != TK_EOF) { - if (!is_hash(tok)) { - tok = tok->next; - continue; - } - - if (equal(tok->next, "endif") && tok->next->next->kind == TK_EOF) - return macro; - - if (equal(tok, "if") || equal(tok, "ifdef") || equal(tok, "ifndef")) - tok = skip_cond_incl(tok->next); - else - tok = tok->next; - } - return NULL; -} - -static Token *include_file(Token *tok, char *path, Token *filename_tok) { - // Check for "#pragma once" - if (hashmap_get(&pragma_once, path)) - return tok; - - // If we read the same file before, and if the file was guarded - // by the usual #ifndef ... #endif pattern, we may be able to - // skip the file without opening it. - static HashMap include_guards; - char *guard_name = hashmap_get(&include_guards, path); - if (guard_name && hashmap_get(¯os, guard_name)) - return tok; - - Token *tok2 = tokenize_file(path); - if (!tok2) - error_tok(filename_tok, "%s: cannot open file: %s", path, strerror(errno)); - - guard_name = detect_include_guard(tok2); - if (guard_name) - hashmap_put(&include_guards, path, guard_name); - - return append(tok2, tok); -} - -// Read #line arguments -static void read_line_marker(Token **rest, Token *tok) { - Token *start = tok; - tok = preprocess(copy_line(rest, tok)); - - if (tok->kind != TK_NUM || tok->ty->kind != TY_INT) - error_tok(tok, "invalid line marker"); - start->file->line_delta = tok->val - start->line_no; - - tok = tok->next; - if (tok->kind == TK_EOF) - return; - - if (tok->kind != TK_STR) - error_tok(tok, "filename expected"); - start->file->display_name = tok->str; -} - -// Visit all tokens in `tok` while evaluating preprocessing -// macros and directives. -static Token *preprocess2(Token *tok) { - Token head = {}; - Token *cur = &head; - - while (tok->kind != TK_EOF) { - // If it is a macro, expand it. - if (expand_macro(&tok, tok)) - continue; - - // Pass through if it is not a "#". - if (!is_hash(tok)) { - tok->line_delta = tok->file->line_delta; - tok->filename = tok->file->display_name; - cur = cur->next = tok; - tok = tok->next; - continue; - } - - Token *start = tok; - tok = tok->next; - - if (equal(tok, "include")) { - bool is_dquote; - char *filename = read_include_filename(&tok, tok->next, &is_dquote); - - if (filename[0] != '/' && is_dquote) { - char *path = format("%s/%s", dirname(strdup(start->file->name)), filename); - if (file_exists(path)) { - tok = include_file(tok, path, start->next->next); - continue; - } - } - - char *path = search_include_paths(filename); - tok = include_file(tok, path ? path : filename, start->next->next); - continue; - } - - if (equal(tok, "include_next")) { - bool ignore; - char *filename = read_include_filename(&tok, tok->next, &ignore); - char *path = search_include_next(filename); - tok = include_file(tok, path ? path : filename, start->next->next); - continue; - } - - if (equal(tok, "define")) { - read_macro_definition(&tok, tok->next); - continue; - } - - if (equal(tok, "undef")) { - tok = tok->next; - if (tok->kind != TK_IDENT) - error_tok(tok, "macro name must be an identifier"); - undef_macro(strndup(tok->loc, tok->len)); - tok = skip_line(tok->next); - continue; - } - - if (equal(tok, "if")) { - long val = eval_const_expr(&tok, tok); - push_cond_incl(start, val); - if (!val) - tok = skip_cond_incl(tok); - continue; - } - - if (equal(tok, "ifdef")) { - bool defined = find_macro(tok->next); - push_cond_incl(tok, defined); - tok = skip_line(tok->next->next); - if (!defined) - tok = skip_cond_incl(tok); - continue; - } - - if (equal(tok, "ifndef")) { - bool defined = find_macro(tok->next); - push_cond_incl(tok, !defined); - tok = skip_line(tok->next->next); - if (defined) - tok = skip_cond_incl(tok); - continue; - } - - if (equal(tok, "elif")) { - if (!cond_incl || cond_incl->ctx == IN_ELSE) - error_tok(start, "stray #elif"); - cond_incl->ctx = IN_ELIF; - - if (!cond_incl->included && eval_const_expr(&tok, tok)) - cond_incl->included = true; - else - tok = skip_cond_incl(tok); - continue; - } - - if (equal(tok, "else")) { - if (!cond_incl || cond_incl->ctx == IN_ELSE) - error_tok(start, "stray #else"); - cond_incl->ctx = IN_ELSE; - tok = skip_line(tok->next); - - if (cond_incl->included) - tok = skip_cond_incl(tok); - continue; - } - - if (equal(tok, "endif")) { - if (!cond_incl) - error_tok(start, "stray #endif"); - cond_incl = cond_incl->next; - tok = skip_line(tok->next); - continue; - } - - if (equal(tok, "line")) { - read_line_marker(&tok, tok->next); - continue; - } - - if (tok->kind == TK_PP_NUM) { - read_line_marker(&tok, tok); - continue; - } - - if (equal(tok, "pragma") && equal(tok->next, "once")) { - hashmap_put(&pragma_once, tok->file->name, (void *)1); - tok = skip_line(tok->next->next); - continue; - } - - if (equal(tok, "pragma")) { - do { - tok = tok->next; - } while (!tok->at_bol); - continue; - } - - if (equal(tok, "error")) - error_tok(tok, "error"); - - // `#`-only line is legal. It's called a null directive. - if (tok->at_bol) - continue; - - error_tok(tok, "invalid preprocessor directive"); - } - - cur->next = tok; - return head.next; -} - -void define_macro(char *name, char *buf) { - Token *tok = tokenize(new_file("<built-in>", 1, buf)); - add_macro(name, true, tok); -} - -void undef_macro(char *name) { - hashmap_delete(¯os, name); -} - -static Macro *add_builtin(char *name, macro_handler_fn *fn) { - Macro *m = add_macro(name, true, NULL); - m->handler = fn; - return m; -} - -static Token *file_macro(Token *tmpl) { - while (tmpl->origin) - tmpl = tmpl->origin; - return new_str_token(tmpl->file->display_name, tmpl); -} - -static Token *line_macro(Token *tmpl) { - while (tmpl->origin) - tmpl = tmpl->origin; - int i = tmpl->line_no + tmpl->file->line_delta; - return new_num_token(i, tmpl); -} - -// __COUNTER__ is expanded to serial values starting from 0. -static Token *counter_macro(Token *tmpl) { - static int i = 0; - return new_num_token(i++, tmpl); -} - -// __TIMESTAMP__ is expanded to a string describing the last -// modification time of the current file. E.g. -// "Fri Jul 24 01:32:50 2020" -static Token *timestamp_macro(Token *tmpl) { - struct stat st; - if (stat(tmpl->file->name, &st) != 0) - return new_str_token("??? ??? ?? ??:??:?? ????", tmpl); - - char buf[30]; - ctime_r(&st.st_mtime, buf); - buf[24] = '\0'; - return new_str_token(buf, tmpl); -} - -static Token *base_file_macro(Token *tmpl) { - return new_str_token(base_file, tmpl); -} - -// __DATE__ is expanded to the current date, e.g. "May 17 2020". -static char *format_date(struct tm *tm) { - static char mon[][4] = { - "Jan", "Feb", "Mar", "Apr", "May", "Jun", - "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", - }; - - return format("\"%s %2d %d\"", mon[tm->tm_mon], tm->tm_mday, tm->tm_year + 1900); -} - -// __TIME__ is expanded to the current time, e.g. "13:34:03". -static char *format_time(struct tm *tm) { - return format("\"%02d:%02d:%02d\"", tm->tm_hour, tm->tm_min, tm->tm_sec); -} - -void init_macros(void) { - // Define predefined macros - define_macro("_LP64", "1"); - define_macro("__C99_MACRO_WITH_VA_ARGS", "1"); - define_macro("__ELF__", "1"); - define_macro("__LP64__", "1"); - define_macro("__SIZEOF_DOUBLE__", "8"); - define_macro("__SIZEOF_FLOAT__", "4"); - define_macro("__SIZEOF_INT__", "4"); - define_macro("__SIZEOF_LONG_DOUBLE__", "8"); - define_macro("__SIZEOF_LONG_LONG__", "8"); - define_macro("__SIZEOF_LONG__", "8"); - define_macro("__SIZEOF_POINTER__", "8"); - define_macro("__SIZEOF_PTRDIFF_T__", "8"); - define_macro("__SIZEOF_SHORT__", "2"); - define_macro("__SIZEOF_SIZE_T__", "8"); - define_macro("__SIZE_TYPE__", "unsigned long"); - define_macro("__STDC_HOSTED__", "1"); - define_macro("__STDC_NO_COMPLEX__", "1"); - define_macro("__STDC_UTF_16__", "1"); - define_macro("__STDC_UTF_32__", "1"); - define_macro("__STDC_VERSION__", "201112L"); - define_macro("__STDC__", "1"); - define_macro("__USER_LABEL_PREFIX__", ""); - define_macro("__alignof__", "_Alignof"); - define_macro("__amd64", "1"); - define_macro("__amd64__", "1"); - define_macro("__chibicc__", "1"); - define_macro("__const__", "const"); - define_macro("__gnu_linux__", "1"); - define_macro("__inline__", "inline"); - define_macro("__linux", "1"); - define_macro("__linux__", "1"); - define_macro("__signed__", "signed"); - define_macro("__typeof__", "typeof"); - define_macro("__unix", "1"); - define_macro("__unix__", "1"); - define_macro("__volatile__", "volatile"); - define_macro("__x86_64", "1"); - define_macro("__x86_64__", "1"); - define_macro("linux", "1"); - define_macro("unix", "1"); - - add_builtin("__FILE__", file_macro); - add_builtin("__LINE__", line_macro); - add_builtin("__COUNTER__", counter_macro); - add_builtin("__TIMESTAMP__", timestamp_macro); - add_builtin("__BASE_FILE__", base_file_macro); - - time_t now = time(NULL); - struct tm *tm = localtime(&now); - define_macro("__DATE__", format_date(tm)); - define_macro("__TIME__", format_time(tm)); -} - -typedef enum { - STR_NONE, STR_UTF8, STR_UTF16, STR_UTF32, STR_WIDE, -} StringKind; - -static StringKind getStringKind(Token *tok) { - if (!strcmp(tok->loc, "u8")) - return STR_UTF8; - - switch (tok->loc[0]) { - case '"': return STR_NONE; - case 'u': return STR_UTF16; - case 'U': return STR_UTF32; - case 'L': return STR_WIDE; - } - unreachable(); -} - -// Concatenate adjacent string literals into a single string literal -// as per the C spec. -static void join_adjacent_string_literals(Token *tok) { - // First pass: If regular string literals are adjacent to wide - // string literals, regular string literals are converted to a wide - // type before concatenation. In this pass, we do the conversion. - for (Token *tok1 = tok; tok1->kind != TK_EOF;) { - if (tok1->kind != TK_STR || tok1->next->kind != TK_STR) { - tok1 = tok1->next; - continue; - } - - StringKind kind = getStringKind(tok1); - Type *basety = tok1->ty->base; - - for (Token *t = tok1->next; t->kind == TK_STR; t = t->next) { - StringKind k = getStringKind(t); - if (kind == STR_NONE) { - kind = k; - basety = t->ty->base; - } else if (k != STR_NONE && kind != k) { - error_tok(t, "unsupported non-standard concatenation of string literals"); - } - } - - if (basety->size > 1) - for (Token *t = tok1; t->kind == TK_STR; t = t->next) - if (t->ty->base->size == 1) - *t = *tokenize_string_literal(t, basety); - - while (tok1->kind == TK_STR) - tok1 = tok1->next; - } - - // Second pass: concatenate adjacent string literals. - for (Token *tok1 = tok; tok1->kind != TK_EOF;) { - if (tok1->kind != TK_STR || tok1->next->kind != TK_STR) { - tok1 = tok1->next; - continue; - } - - Token *tok2 = tok1->next; - while (tok2->kind == TK_STR) - tok2 = tok2->next; - - int len = tok1->ty->array_len; - for (Token *t = tok1->next; t != tok2; t = t->next) - len = len + t->ty->array_len - 1; - - char *buf = calloc(tok1->ty->base->size, len); - - int i = 0; - for (Token *t = tok1; t != tok2; t = t->next) { - memcpy(buf + i, t->str, t->ty->size); - i = i + t->ty->size - t->ty->base->size; - } - - *tok1 = *copy_token(tok1); - tok1->ty = array_of(tok1->ty->base, len); - tok1->str = buf; - tok1->next = tok2; - tok1 = tok2; - } -} - -// Entry point function of the preprocessor. -Token *preprocess(Token *tok) { - tok = preprocess2(tok); - if (cond_incl) - error_tok(cond_incl->tok, "unterminated conditional directive"); - convert_pp_tokens(tok); - join_adjacent_string_literals(tok); - - for (Token *t = tok; t; t = t->next) - t->line_no += t->line_delta; - return tok; -} |