summaryrefslogtreecommitdiffhomepage
path: root/src/3p/chibicc/chibicc.h
diff options
context:
space:
mode:
authorMichael Smith <mikesmiffy128@gmail.com>2021-11-20 03:10:50 +0000
committerMichael Smith <mikesmiffy128@gmail.com>2021-11-20 03:18:08 +0000
commitda6f343032cb01597dc7866e66f091adf3243a62 (patch)
tree870f8cb8e82bb42202ab92bea03fc6ab35ada7ca /src/3p/chibicc/chibicc.h
Initial public snapshot
With code from Bill. Thanks Bill!
Diffstat (limited to 'src/3p/chibicc/chibicc.h')
-rw-r--r--src/3p/chibicc/chibicc.h486
1 files changed, 486 insertions, 0 deletions
diff --git a/src/3p/chibicc/chibicc.h b/src/3p/chibicc/chibicc.h
new file mode 100644
index 0000000..1719bc5
--- /dev/null
+++ b/src/3p/chibicc/chibicc.h
@@ -0,0 +1,486 @@
+// include guards: upstream doesn't have these but we add them so we can cat
+// source files together (or #include them, in particular)
+#ifndef INC_CHIBICC_H
+#define INC_CHIBICC_H
+
+// note: removing defs/headers that aren't needed in tokenize.c and/or don't
+// exist on Windows, in order to get our stuff working. total hack; oh well.
+//#define _POSIX_C_SOURCE 200809L
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+//#include <glob.h>
+//#include <libgen.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+// stdnoreturn means we can't use our noreturn (_Noreturn void)
+// there are no noreturns in tokenize.c anyway, and the ones in this header have
+// been changed to just _Noreturn to avoid any possible conflict
+//#include <stdnoreturn.h>
+#include <string.h>
+//#include <strings.h>
+#include <sys/stat.h>
+//#include <sys/types.h>
+//#include <sys/wait.h>
+#include <time.h>
+//#include <unistd.h>
+
+// exists on all Unixes but normally hidden _GNU_SOURCE on Linux.
+// missing entirely on Windows (implemented in 3p/openbsd/asprintf.c for compat)
+int vasprintf(char **str, const char *fmt, va_list ap);
+
+#define MAX(x, y) ((x) < (y) ? (y) : (x))
+#define MIN(x, y) ((x) < (y) ? (x) : (y))
+
+#if !defined(__GNUC__) && !defined(__clang__)
+# define __attribute__(x)
+#endif
+
+typedef struct Type Type;
+typedef struct Node Node;
+typedef struct Member Member;
+typedef struct Relocation Relocation;
+typedef struct Hideset Hideset;
+
+//
+// strings.c
+//
+
+typedef struct {
+ char **data;
+ int capacity;
+ int len;
+} StringArray;
+
+void strarray_push(StringArray *arr, char *s);
+
+//
+// tokenize.c
+//
+
+// Token
+typedef enum {
+ TK_IDENT, // Identifiers
+ TK_PUNCT, // Punctuators
+ TK_KEYWORD, // Keywords
+ TK_STR, // String literals
+ TK_NUM, // Numeric literals
+ TK_PP_NUM, // Preprocessing numbers
+ TK_EOF, // End-of-file markers
+} TokenKind;
+
+typedef struct {
+ char *name;
+ int file_no;
+ char *contents;
+
+ // For #line directive
+ char *display_name;
+ int line_delta;
+} File;
+
+// Token type
+typedef struct Token Token;
+struct Token {
+ TokenKind kind; // Token kind
+ Token *next; // Next token
+ int64_t val; // If kind is TK_NUM, its value
+ long double fval; // If kind is TK_NUM, its value
+ char *loc; // Token location
+ int len; // Token length
+ Type *ty; // Used if TK_NUM or TK_STR
+ char *str; // String literal contents including terminating '\0'
+
+ File *file; // Source location
+ char *filename; // Filename
+ int line_no; // Line number
+ int line_delta; // Line number
+ bool at_bol; // True if this token is at beginning of line
+ bool has_space; // True if this token follows a space character
+ Hideset *hideset; // For macro expansion
+ Token *origin; // If this is expanded from a macro, the original token
+};
+
+_Noreturn void error(char *fmt, ...) __attribute__((format(printf, 1, 2)));
+_Noreturn void error_at(char *loc, char *fmt, ...) __attribute__((format(printf, 2, 3)));
+_Noreturn void error_tok(Token *tok, char *fmt, ...) __attribute__((format(printf, 2, 3)));
+void warn_tok(Token *tok, char *fmt, ...) __attribute__((format(printf, 2, 3)));
+bool equal(Token *tok, char *op);
+Token *skip(Token *tok, char *op);
+bool consume(Token **rest, Token *tok, char *str);
+void convert_pp_tokens(Token *tok);
+File **get_input_files(void);
+File *new_file(char *name, int file_no, char *contents);
+Token *tokenize_string_literal(Token *tok, Type *basety);
+Token *tokenize(File *file);
+//Token *tokenize_file(char *filename);
+Token *tokenize_buf(const char *name, char *p);
+
+// note: replacing memstream-based format with asprintf version. moved down here
+// as error() is declared above.
+//char *format(char *fmt, ...) __attribute__((format(printf, 1, 2)));
+__attribute__((format(printf, 1, 2)))
+static inline char *format(const char *fmt, ...) {
+ char *ret;
+ va_list va;
+ va_start(va, fmt);
+ if (vasprintf(&ret, fmt, va) == -1) error("couldn't allocate memory");
+ va_end(va);
+ return ret;
+}
+
+#define unreachable() \
+ error("internal error at %s:%d", __FILE__, __LINE__)
+
+//
+// preprocess.c
+//
+
+char *search_include_paths(char *filename);
+void init_macros(void);
+void define_macro(char *name, char *buf);
+void undef_macro(char *name);
+Token *preprocess(Token *tok);
+
+//
+// parse.c
+//
+
+// Variable or function
+typedef struct Obj Obj;
+struct Obj {
+ Obj *next;
+ char *name; // Variable name
+ Type *ty; // Type
+ Token *tok; // representative token
+ bool is_local; // local or global/function
+ int align; // alignment
+
+ // Local variable
+ int offset;
+
+ // Global variable or function
+ bool is_function;
+ bool is_definition;
+ bool is_static;
+
+ // Global variable
+ bool is_tentative;
+ bool is_tls;
+ char *init_data;
+ Relocation *rel;
+
+ // Function
+ bool is_inline;
+ Obj *params;
+ Node *body;
+ Obj *locals;
+ Obj *va_area;
+ Obj *alloca_bottom;
+ int stack_size;
+
+ // Static inline function
+ bool is_live;
+ bool is_root;
+ StringArray refs;
+};
+
+// Global variable can be initialized either by a constant expression
+// or a pointer to another global variable. This struct represents the
+// latter.
+typedef struct Relocation Relocation;
+struct Relocation {
+ Relocation *next;
+ int offset;
+ char **label;
+ long addend;
+};
+
+// AST node
+typedef enum {
+ ND_NULL_EXPR, // Do nothing
+ ND_ADD, // +
+ ND_SUB, // -
+ ND_MUL, // *
+ ND_DIV, // /
+ ND_NEG, // unary -
+ ND_MOD, // %
+ ND_BITAND, // &
+ ND_BITOR, // |
+ ND_BITXOR, // ^
+ ND_SHL, // <<
+ ND_SHR, // >>
+ ND_EQ, // ==
+ ND_NE, // !=
+ ND_LT, // <
+ ND_LE, // <=
+ ND_ASSIGN, // =
+ ND_COND, // ?:
+ ND_COMMA, // ,
+ ND_MEMBER, // . (struct member access)
+ ND_ADDR, // unary &
+ ND_DEREF, // unary *
+ ND_NOT, // !
+ ND_BITNOT, // ~
+ ND_LOGAND, // &&
+ ND_LOGOR, // ||
+ ND_RETURN, // "return"
+ ND_IF, // "if"
+ ND_FOR, // "for" or "while"
+ ND_DO, // "do"
+ ND_SWITCH, // "switch"
+ ND_CASE, // "case"
+ ND_BLOCK, // { ... }
+ ND_GOTO, // "goto"
+ ND_GOTO_EXPR, // "goto" labels-as-values
+ ND_LABEL, // Labeled statement
+ ND_LABEL_VAL, // [GNU] Labels-as-values
+ ND_FUNCALL, // Function call
+ ND_EXPR_STMT, // Expression statement
+ ND_STMT_EXPR, // Statement expression
+ ND_VAR, // Variable
+ ND_VLA_PTR, // VLA designator
+ ND_NUM, // Integer
+ ND_CAST, // Type cast
+ ND_MEMZERO, // Zero-clear a stack variable
+ ND_ASM, // "asm"
+ ND_CAS, // Atomic compare-and-swap
+ ND_EXCH, // Atomic exchange
+} NodeKind;
+
+// AST node type
+struct Node {
+ NodeKind kind; // Node kind
+ Node *next; // Next node
+ Type *ty; // Type, e.g. int or pointer to int
+ Token *tok; // Representative token
+
+ Node *lhs; // Left-hand side
+ Node *rhs; // Right-hand side
+
+ // "if" or "for" statement
+ Node *cond;
+ Node *then;
+ Node *els;
+ Node *init;
+ Node *inc;
+
+ // "break" and "continue" labels
+ char *brk_label;
+ char *cont_label;
+
+ // Block or statement expression
+ Node *body;
+
+ // Struct member access
+ Member *member;
+
+ // Function call
+ Type *func_ty;
+ Node *args;
+ bool pass_by_stack;
+ Obj *ret_buffer;
+
+ // Goto or labeled statement, or labels-as-values
+ char *label;
+ char *unique_label;
+ Node *goto_next;
+
+ // Switch
+ Node *case_next;
+ Node *default_case;
+
+ // Case
+ long begin;
+ long end;
+
+ // "asm" string literal
+ char *asm_str;
+
+ // Atomic compare-and-swap
+ Node *cas_addr;
+ Node *cas_old;
+ Node *cas_new;
+
+ // Atomic op= operators
+ Obj *atomic_addr;
+ Node *atomic_expr;
+
+ // Variable
+ Obj *var;
+
+ // Numeric literal
+ int64_t val;
+ long double fval;
+};
+
+Node *new_cast(Node *expr, Type *ty);
+int64_t const_expr(Token **rest, Token *tok);
+Obj *parse(Token *tok);
+
+//
+// type.c
+//
+
+typedef enum {
+ TY_VOID,
+ TY_BOOL,
+ TY_CHAR,
+ TY_SHORT,
+ TY_INT,
+ TY_LONG,
+ TY_FLOAT,
+ TY_DOUBLE,
+ TY_LDOUBLE,
+ TY_ENUM,
+ TY_PTR,
+ TY_FUNC,
+ TY_ARRAY,
+ TY_VLA, // variable-length array
+ TY_STRUCT,
+ TY_UNION,
+} TypeKind;
+
+struct Type {
+ TypeKind kind;
+ int size; // sizeof() value
+ int align; // alignment
+ bool is_unsigned; // unsigned or signed
+ bool is_atomic; // true if _Atomic
+ Type *origin; // for type compatibility check
+
+ // Pointer-to or array-of type. We intentionally use the same member
+ // to represent pointer/array duality in C.
+ //
+ // In many contexts in which a pointer is expected, we examine this
+ // member instead of "kind" member to determine whether a type is a
+ // pointer or not. That means in many contexts "array of T" is
+ // naturally handled as if it were "pointer to T", as required by
+ // the C spec.
+ Type *base;
+
+ // Declaration
+ Token *name;
+ Token *name_pos;
+
+ // Array
+ int array_len;
+
+ // Variable-length array
+ Node *vla_len; // # of elements
+ Obj *vla_size; // sizeof() value
+
+ // Struct
+ Member *members;
+ bool is_flexible;
+ bool is_packed;
+
+ // Function type
+ Type *return_ty;
+ Type *params;
+ bool is_variadic;
+ Type *next;
+};
+
+// Struct member
+struct Member {
+ Member *next;
+ Type *ty;
+ Token *tok; // for error message
+ Token *name;
+ int idx;
+ int align;
+ int offset;
+
+ // Bitfield
+ bool is_bitfield;
+ int bit_offset;
+ int bit_width;
+};
+
+extern Type *ty_void;
+extern Type *ty_bool;
+
+extern Type *ty_char;
+extern Type *ty_short;
+extern Type *ty_int;
+extern Type *ty_long;
+
+extern Type *ty_uchar;
+extern Type *ty_ushort;
+extern Type *ty_uint;
+extern Type *ty_ulong;
+
+extern Type *ty_float;
+extern Type *ty_double;
+extern Type *ty_ldouble;
+
+bool is_integer(Type *ty);
+bool is_flonum(Type *ty);
+bool is_numeric(Type *ty);
+bool is_compatible(Type *t1, Type *t2);
+Type *copy_type(Type *ty);
+Type *pointer_to(Type *base);
+Type *func_type(Type *return_ty);
+Type *array_of(Type *base, int size);
+Type *vla_of(Type *base, Node *expr);
+Type *enum_type(void);
+Type *struct_type(void);
+void add_type(Node *node);
+
+//
+// codegen.c
+//
+
+void codegen(Obj *prog, FILE *out);
+int align_to(int n, int align);
+
+//
+// unicode.c
+//
+
+int encode_utf8(char *buf, uint32_t c);
+uint32_t decode_utf8(char **new_pos, char *p);
+bool is_ident1(uint32_t c);
+bool is_ident2(uint32_t c);
+int display_width(char *p, int len);
+
+//
+// hashmap.c
+//
+
+typedef struct {
+ char *key;
+ int keylen;
+ void *val;
+} HashEntry;
+
+typedef struct {
+ HashEntry *buckets;
+ int capacity;
+ int used;
+} HashMap;
+
+void *hashmap_get(HashMap *map, char *key);
+void *hashmap_get2(HashMap *map, char *key, int keylen);
+void hashmap_put(HashMap *map, char *key, void *val);
+void hashmap_put2(HashMap *map, char *key, int keylen, void *val);
+void hashmap_delete(HashMap *map, char *key);
+void hashmap_delete2(HashMap *map, char *key, int keylen);
+void hashmap_test(void);
+
+//
+// main.c
+//
+
+bool file_exists(char *path);
+
+extern StringArray include_paths;
+extern bool opt_fpic;
+extern bool opt_fcommon;
+extern char *base_file;
+
+#endif