diff options
author | Michael Smith <mikesmiffy128@gmail.com> | 2021-11-20 03:10:50 +0000 |
---|---|---|
committer | Michael Smith <mikesmiffy128@gmail.com> | 2021-11-20 03:18:08 +0000 |
commit | da6f343032cb01597dc7866e66f091adf3243a62 (patch) | |
tree | 870f8cb8e82bb42202ab92bea03fc6ab35ada7ca /src/3p/chibicc/chibicc.h |
Initial public snapshot
With code from Bill. Thanks Bill!
Diffstat (limited to 'src/3p/chibicc/chibicc.h')
-rw-r--r-- | src/3p/chibicc/chibicc.h | 486 |
1 files changed, 486 insertions, 0 deletions
diff --git a/src/3p/chibicc/chibicc.h b/src/3p/chibicc/chibicc.h new file mode 100644 index 0000000..1719bc5 --- /dev/null +++ b/src/3p/chibicc/chibicc.h @@ -0,0 +1,486 @@ +// include guards: upstream doesn't have these but we add them so we can cat +// source files together (or #include them, in particular) +#ifndef INC_CHIBICC_H +#define INC_CHIBICC_H + +// note: removing defs/headers that aren't needed in tokenize.c and/or don't +// exist on Windows, in order to get our stuff working. total hack; oh well. +//#define _POSIX_C_SOURCE 200809L +#include <assert.h> +#include <ctype.h> +#include <errno.h> +//#include <glob.h> +//#include <libgen.h> +#include <stdarg.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +// stdnoreturn means we can't use our noreturn (_Noreturn void) +// there are no noreturns in tokenize.c anyway, and the ones in this header have +// been changed to just _Noreturn to avoid any possible conflict +//#include <stdnoreturn.h> +#include <string.h> +//#include <strings.h> +#include <sys/stat.h> +//#include <sys/types.h> +//#include <sys/wait.h> +#include <time.h> +//#include <unistd.h> + +// exists on all Unixes but normally hidden _GNU_SOURCE on Linux. +// missing entirely on Windows (implemented in 3p/openbsd/asprintf.c for compat) +int vasprintf(char **str, const char *fmt, va_list ap); + +#define MAX(x, y) ((x) < (y) ? (y) : (x)) +#define MIN(x, y) ((x) < (y) ? (x) : (y)) + +#if !defined(__GNUC__) && !defined(__clang__) +# define __attribute__(x) +#endif + +typedef struct Type Type; +typedef struct Node Node; +typedef struct Member Member; +typedef struct Relocation Relocation; +typedef struct Hideset Hideset; + +// +// strings.c +// + +typedef struct { + char **data; + int capacity; + int len; +} StringArray; + +void strarray_push(StringArray *arr, char *s); + +// +// tokenize.c +// + +// Token +typedef enum { + TK_IDENT, // Identifiers + TK_PUNCT, // Punctuators + TK_KEYWORD, // Keywords + TK_STR, // String literals + TK_NUM, // Numeric literals + TK_PP_NUM, // Preprocessing numbers + TK_EOF, // End-of-file markers +} TokenKind; + +typedef struct { + char *name; + int file_no; + char *contents; + + // For #line directive + char *display_name; + int line_delta; +} File; + +// Token type +typedef struct Token Token; +struct Token { + TokenKind kind; // Token kind + Token *next; // Next token + int64_t val; // If kind is TK_NUM, its value + long double fval; // If kind is TK_NUM, its value + char *loc; // Token location + int len; // Token length + Type *ty; // Used if TK_NUM or TK_STR + char *str; // String literal contents including terminating '\0' + + File *file; // Source location + char *filename; // Filename + int line_no; // Line number + int line_delta; // Line number + bool at_bol; // True if this token is at beginning of line + bool has_space; // True if this token follows a space character + Hideset *hideset; // For macro expansion + Token *origin; // If this is expanded from a macro, the original token +}; + +_Noreturn void error(char *fmt, ...) __attribute__((format(printf, 1, 2))); +_Noreturn void error_at(char *loc, char *fmt, ...) __attribute__((format(printf, 2, 3))); +_Noreturn void error_tok(Token *tok, char *fmt, ...) __attribute__((format(printf, 2, 3))); +void warn_tok(Token *tok, char *fmt, ...) __attribute__((format(printf, 2, 3))); +bool equal(Token *tok, char *op); +Token *skip(Token *tok, char *op); +bool consume(Token **rest, Token *tok, char *str); +void convert_pp_tokens(Token *tok); +File **get_input_files(void); +File *new_file(char *name, int file_no, char *contents); +Token *tokenize_string_literal(Token *tok, Type *basety); +Token *tokenize(File *file); +//Token *tokenize_file(char *filename); +Token *tokenize_buf(const char *name, char *p); + +// note: replacing memstream-based format with asprintf version. moved down here +// as error() is declared above. +//char *format(char *fmt, ...) __attribute__((format(printf, 1, 2))); +__attribute__((format(printf, 1, 2))) +static inline char *format(const char *fmt, ...) { + char *ret; + va_list va; + va_start(va, fmt); + if (vasprintf(&ret, fmt, va) == -1) error("couldn't allocate memory"); + va_end(va); + return ret; +} + +#define unreachable() \ + error("internal error at %s:%d", __FILE__, __LINE__) + +// +// preprocess.c +// + +char *search_include_paths(char *filename); +void init_macros(void); +void define_macro(char *name, char *buf); +void undef_macro(char *name); +Token *preprocess(Token *tok); + +// +// parse.c +// + +// Variable or function +typedef struct Obj Obj; +struct Obj { + Obj *next; + char *name; // Variable name + Type *ty; // Type + Token *tok; // representative token + bool is_local; // local or global/function + int align; // alignment + + // Local variable + int offset; + + // Global variable or function + bool is_function; + bool is_definition; + bool is_static; + + // Global variable + bool is_tentative; + bool is_tls; + char *init_data; + Relocation *rel; + + // Function + bool is_inline; + Obj *params; + Node *body; + Obj *locals; + Obj *va_area; + Obj *alloca_bottom; + int stack_size; + + // Static inline function + bool is_live; + bool is_root; + StringArray refs; +}; + +// Global variable can be initialized either by a constant expression +// or a pointer to another global variable. This struct represents the +// latter. +typedef struct Relocation Relocation; +struct Relocation { + Relocation *next; + int offset; + char **label; + long addend; +}; + +// AST node +typedef enum { + ND_NULL_EXPR, // Do nothing + ND_ADD, // + + ND_SUB, // - + ND_MUL, // * + ND_DIV, // / + ND_NEG, // unary - + ND_MOD, // % + ND_BITAND, // & + ND_BITOR, // | + ND_BITXOR, // ^ + ND_SHL, // << + ND_SHR, // >> + ND_EQ, // == + ND_NE, // != + ND_LT, // < + ND_LE, // <= + ND_ASSIGN, // = + ND_COND, // ?: + ND_COMMA, // , + ND_MEMBER, // . (struct member access) + ND_ADDR, // unary & + ND_DEREF, // unary * + ND_NOT, // ! + ND_BITNOT, // ~ + ND_LOGAND, // && + ND_LOGOR, // || + ND_RETURN, // "return" + ND_IF, // "if" + ND_FOR, // "for" or "while" + ND_DO, // "do" + ND_SWITCH, // "switch" + ND_CASE, // "case" + ND_BLOCK, // { ... } + ND_GOTO, // "goto" + ND_GOTO_EXPR, // "goto" labels-as-values + ND_LABEL, // Labeled statement + ND_LABEL_VAL, // [GNU] Labels-as-values + ND_FUNCALL, // Function call + ND_EXPR_STMT, // Expression statement + ND_STMT_EXPR, // Statement expression + ND_VAR, // Variable + ND_VLA_PTR, // VLA designator + ND_NUM, // Integer + ND_CAST, // Type cast + ND_MEMZERO, // Zero-clear a stack variable + ND_ASM, // "asm" + ND_CAS, // Atomic compare-and-swap + ND_EXCH, // Atomic exchange +} NodeKind; + +// AST node type +struct Node { + NodeKind kind; // Node kind + Node *next; // Next node + Type *ty; // Type, e.g. int or pointer to int + Token *tok; // Representative token + + Node *lhs; // Left-hand side + Node *rhs; // Right-hand side + + // "if" or "for" statement + Node *cond; + Node *then; + Node *els; + Node *init; + Node *inc; + + // "break" and "continue" labels + char *brk_label; + char *cont_label; + + // Block or statement expression + Node *body; + + // Struct member access + Member *member; + + // Function call + Type *func_ty; + Node *args; + bool pass_by_stack; + Obj *ret_buffer; + + // Goto or labeled statement, or labels-as-values + char *label; + char *unique_label; + Node *goto_next; + + // Switch + Node *case_next; + Node *default_case; + + // Case + long begin; + long end; + + // "asm" string literal + char *asm_str; + + // Atomic compare-and-swap + Node *cas_addr; + Node *cas_old; + Node *cas_new; + + // Atomic op= operators + Obj *atomic_addr; + Node *atomic_expr; + + // Variable + Obj *var; + + // Numeric literal + int64_t val; + long double fval; +}; + +Node *new_cast(Node *expr, Type *ty); +int64_t const_expr(Token **rest, Token *tok); +Obj *parse(Token *tok); + +// +// type.c +// + +typedef enum { + TY_VOID, + TY_BOOL, + TY_CHAR, + TY_SHORT, + TY_INT, + TY_LONG, + TY_FLOAT, + TY_DOUBLE, + TY_LDOUBLE, + TY_ENUM, + TY_PTR, + TY_FUNC, + TY_ARRAY, + TY_VLA, // variable-length array + TY_STRUCT, + TY_UNION, +} TypeKind; + +struct Type { + TypeKind kind; + int size; // sizeof() value + int align; // alignment + bool is_unsigned; // unsigned or signed + bool is_atomic; // true if _Atomic + Type *origin; // for type compatibility check + + // Pointer-to or array-of type. We intentionally use the same member + // to represent pointer/array duality in C. + // + // In many contexts in which a pointer is expected, we examine this + // member instead of "kind" member to determine whether a type is a + // pointer or not. That means in many contexts "array of T" is + // naturally handled as if it were "pointer to T", as required by + // the C spec. + Type *base; + + // Declaration + Token *name; + Token *name_pos; + + // Array + int array_len; + + // Variable-length array + Node *vla_len; // # of elements + Obj *vla_size; // sizeof() value + + // Struct + Member *members; + bool is_flexible; + bool is_packed; + + // Function type + Type *return_ty; + Type *params; + bool is_variadic; + Type *next; +}; + +// Struct member +struct Member { + Member *next; + Type *ty; + Token *tok; // for error message + Token *name; + int idx; + int align; + int offset; + + // Bitfield + bool is_bitfield; + int bit_offset; + int bit_width; +}; + +extern Type *ty_void; +extern Type *ty_bool; + +extern Type *ty_char; +extern Type *ty_short; +extern Type *ty_int; +extern Type *ty_long; + +extern Type *ty_uchar; +extern Type *ty_ushort; +extern Type *ty_uint; +extern Type *ty_ulong; + +extern Type *ty_float; +extern Type *ty_double; +extern Type *ty_ldouble; + +bool is_integer(Type *ty); +bool is_flonum(Type *ty); +bool is_numeric(Type *ty); +bool is_compatible(Type *t1, Type *t2); +Type *copy_type(Type *ty); +Type *pointer_to(Type *base); +Type *func_type(Type *return_ty); +Type *array_of(Type *base, int size); +Type *vla_of(Type *base, Node *expr); +Type *enum_type(void); +Type *struct_type(void); +void add_type(Node *node); + +// +// codegen.c +// + +void codegen(Obj *prog, FILE *out); +int align_to(int n, int align); + +// +// unicode.c +// + +int encode_utf8(char *buf, uint32_t c); +uint32_t decode_utf8(char **new_pos, char *p); +bool is_ident1(uint32_t c); +bool is_ident2(uint32_t c); +int display_width(char *p, int len); + +// +// hashmap.c +// + +typedef struct { + char *key; + int keylen; + void *val; +} HashEntry; + +typedef struct { + HashEntry *buckets; + int capacity; + int used; +} HashMap; + +void *hashmap_get(HashMap *map, char *key); +void *hashmap_get2(HashMap *map, char *key, int keylen); +void hashmap_put(HashMap *map, char *key, void *val); +void hashmap_put2(HashMap *map, char *key, int keylen, void *val); +void hashmap_delete(HashMap *map, char *key); +void hashmap_delete2(HashMap *map, char *key, int keylen); +void hashmap_test(void); + +// +// main.c +// + +bool file_exists(char *path); + +extern StringArray include_paths; +extern bool opt_fpic; +extern bool opt_fcommon; +extern char *base_file; + +#endif |