summaryrefslogtreecommitdiffhomepage
path: root/src/3p/chibicc/main.c
diff options
context:
space:
mode:
authorMichael Smith <mikesmiffy128@gmail.com>2021-11-20 03:10:50 +0000
committerMichael Smith <mikesmiffy128@gmail.com>2021-11-20 03:18:08 +0000
commitda6f343032cb01597dc7866e66f091adf3243a62 (patch)
tree870f8cb8e82bb42202ab92bea03fc6ab35ada7ca /src/3p/chibicc/main.c
Initial public snapshot
With code from Bill. Thanks Bill!
Diffstat (limited to 'src/3p/chibicc/main.c')
-rw-r--r--src/3p/chibicc/main.c791
1 files changed, 791 insertions, 0 deletions
diff --git a/src/3p/chibicc/main.c b/src/3p/chibicc/main.c
new file mode 100644
index 0000000..ffaabf4
--- /dev/null
+++ b/src/3p/chibicc/main.c
@@ -0,0 +1,791 @@
+#include "chibicc.h"
+
+typedef enum {
+ FILE_NONE, FILE_C, FILE_ASM, FILE_OBJ, FILE_AR, FILE_DSO,
+} FileType;
+
+StringArray include_paths;
+bool opt_fcommon = true;
+bool opt_fpic;
+
+static FileType opt_x;
+static StringArray opt_include;
+static bool opt_E;
+static bool opt_M;
+static bool opt_MD;
+static bool opt_MMD;
+static bool opt_MP;
+static bool opt_S;
+static bool opt_c;
+static bool opt_cc1;
+static bool opt_hash_hash_hash;
+static bool opt_static;
+static bool opt_shared;
+static char *opt_MF;
+static char *opt_MT;
+static char *opt_o;
+
+static StringArray ld_extra_args;
+static StringArray std_include_paths;
+
+char *base_file;
+static char *output_file;
+
+static StringArray input_paths;
+static StringArray tmpfiles;
+
+static void usage(int status) {
+ fprintf(stderr, "chibicc [ -o <path> ] <file>\n");
+ exit(status);
+}
+
+static bool take_arg(char *arg) {
+ char *x[] = {
+ "-o", "-I", "-idirafter", "-include", "-x", "-MF", "-MT", "-Xlinker",
+ };
+
+ for (int i = 0; i < sizeof(x) / sizeof(*x); i++)
+ if (!strcmp(arg, x[i]))
+ return true;
+ return false;
+}
+
+static void add_default_include_paths(char *argv0) {
+ // We expect that chibicc-specific include files are installed
+ // to ./include relative to argv[0].
+ strarray_push(&include_paths, format("%s/include", dirname(strdup(argv0))));
+
+ // Add standard include paths.
+ strarray_push(&include_paths, "/usr/local/include");
+ strarray_push(&include_paths, "/usr/include/x86_64-linux-gnu");
+ strarray_push(&include_paths, "/usr/include");
+
+ // Keep a copy of the standard include paths for -MMD option.
+ for (int i = 0; i < include_paths.len; i++)
+ strarray_push(&std_include_paths, include_paths.data[i]);
+}
+
+static void define(char *str) {
+ char *eq = strchr(str, '=');
+ if (eq)
+ define_macro(strndup(str, eq - str), eq + 1);
+ else
+ define_macro(str, "1");
+}
+
+static FileType parse_opt_x(char *s) {
+ if (!strcmp(s, "c"))
+ return FILE_C;
+ if (!strcmp(s, "assembler"))
+ return FILE_ASM;
+ if (!strcmp(s, "none"))
+ return FILE_NONE;
+ error("<command line>: unknown argument for -x: %s", s);
+}
+
+static char *quote_makefile(char *s) {
+ char *buf = calloc(1, strlen(s) * 2 + 1);
+
+ for (int i = 0, j = 0; s[i]; i++) {
+ switch (s[i]) {
+ case '$':
+ buf[j++] = '$';
+ buf[j++] = '$';
+ break;
+ case '#':
+ buf[j++] = '\\';
+ buf[j++] = '#';
+ break;
+ case ' ':
+ case '\t':
+ for (int k = i - 1; k >= 0 && s[k] == '\\'; k--)
+ buf[j++] = '\\';
+ buf[j++] = '\\';
+ buf[j++] = s[i];
+ break;
+ default:
+ buf[j++] = s[i];
+ break;
+ }
+ }
+ return buf;
+}
+
+static void parse_args(int argc, char **argv) {
+ // Make sure that all command line options that take an argument
+ // have an argument.
+ for (int i = 1; i < argc; i++)
+ if (take_arg(argv[i]))
+ if (!argv[++i])
+ usage(1);
+
+ StringArray idirafter = {};
+
+ for (int i = 1; i < argc; i++) {
+ if (!strcmp(argv[i], "-###")) {
+ opt_hash_hash_hash = true;
+ continue;
+ }
+
+ if (!strcmp(argv[i], "-cc1")) {
+ opt_cc1 = true;
+ continue;
+ }
+
+ if (!strcmp(argv[i], "--help"))
+ usage(0);
+
+ if (!strcmp(argv[i], "-o")) {
+ opt_o = argv[++i];
+ continue;
+ }
+
+ if (!strncmp(argv[i], "-o", 2)) {
+ opt_o = argv[i] + 2;
+ continue;
+ }
+
+ if (!strcmp(argv[i], "-S")) {
+ opt_S = true;
+ continue;
+ }
+
+ if (!strcmp(argv[i], "-fcommon")) {
+ opt_fcommon = true;
+ continue;
+ }
+
+ if (!strcmp(argv[i], "-fno-common")) {
+ opt_fcommon = false;
+ continue;
+ }
+
+ if (!strcmp(argv[i], "-c")) {
+ opt_c = true;
+ continue;
+ }
+
+ if (!strcmp(argv[i], "-E")) {
+ opt_E = true;
+ continue;
+ }
+
+ if (!strncmp(argv[i], "-I", 2)) {
+ strarray_push(&include_paths, argv[i] + 2);
+ continue;
+ }
+
+ if (!strcmp(argv[i], "-D")) {
+ define(argv[++i]);
+ continue;
+ }
+
+ if (!strncmp(argv[i], "-D", 2)) {
+ define(argv[i] + 2);
+ continue;
+ }
+
+ if (!strcmp(argv[i], "-U")) {
+ undef_macro(argv[++i]);
+ continue;
+ }
+
+ if (!strncmp(argv[i], "-U", 2)) {
+ undef_macro(argv[i] + 2);
+ continue;
+ }
+
+ if (!strcmp(argv[i], "-include")) {
+ strarray_push(&opt_include, argv[++i]);
+ continue;
+ }
+
+ if (!strcmp(argv[i], "-x")) {
+ opt_x = parse_opt_x(argv[++i]);
+ continue;
+ }
+
+ if (!strncmp(argv[i], "-x", 2)) {
+ opt_x = parse_opt_x(argv[i] + 2);
+ continue;
+ }
+
+ if (!strncmp(argv[i], "-l", 2) || !strncmp(argv[i], "-Wl,", 4)) {
+ strarray_push(&input_paths, argv[i]);
+ continue;
+ }
+
+ if (!strcmp(argv[i], "-Xlinker")) {
+ strarray_push(&ld_extra_args, argv[++i]);
+ continue;
+ }
+
+ if (!strcmp(argv[i], "-s")) {
+ strarray_push(&ld_extra_args, "-s");
+ continue;
+ }
+
+ if (!strcmp(argv[i], "-M")) {
+ opt_M = true;
+ continue;
+ }
+
+ if (!strcmp(argv[i], "-MF")) {
+ opt_MF = argv[++i];
+ continue;
+ }
+
+ if (!strcmp(argv[i], "-MP")) {
+ opt_MP = true;
+ continue;
+ }
+
+ if (!strcmp(argv[i], "-MT")) {
+ if (opt_MT == NULL)
+ opt_MT = argv[++i];
+ else
+ opt_MT = format("%s %s", opt_MT, argv[++i]);
+ continue;
+ }
+
+ if (!strcmp(argv[i], "-MD")) {
+ opt_MD = true;
+ continue;
+ }
+
+ if (!strcmp(argv[i], "-MQ")) {
+ if (opt_MT == NULL)
+ opt_MT = quote_makefile(argv[++i]);
+ else
+ opt_MT = format("%s %s", opt_MT, quote_makefile(argv[++i]));
+ continue;
+ }
+
+ if (!strcmp(argv[i], "-MMD")) {
+ opt_MD = opt_MMD = true;
+ continue;
+ }
+
+ if (!strcmp(argv[i], "-fpic") || !strcmp(argv[i], "-fPIC")) {
+ opt_fpic = true;
+ continue;
+ }
+
+ if (!strcmp(argv[i], "-cc1-input")) {
+ base_file = argv[++i];
+ continue;
+ }
+
+ if (!strcmp(argv[i], "-cc1-output")) {
+ output_file = argv[++i];
+ continue;
+ }
+
+ if (!strcmp(argv[i], "-idirafter")) {
+ strarray_push(&idirafter, argv[i++]);
+ continue;
+ }
+
+ if (!strcmp(argv[i], "-static")) {
+ opt_static = true;
+ strarray_push(&ld_extra_args, "-static");
+ continue;
+ }
+
+ if (!strcmp(argv[i], "-shared")) {
+ opt_shared = true;
+ strarray_push(&ld_extra_args, "-shared");
+ continue;
+ }
+
+ if (!strcmp(argv[i], "-L")) {
+ strarray_push(&ld_extra_args, "-L");
+ strarray_push(&ld_extra_args, argv[++i]);
+ continue;
+ }
+
+ if (!strncmp(argv[i], "-L", 2)) {
+ strarray_push(&ld_extra_args, "-L");
+ strarray_push(&ld_extra_args, argv[i] + 2);
+ continue;
+ }
+
+ if (!strcmp(argv[i], "-hashmap-test")) {
+ hashmap_test();
+ exit(0);
+ }
+
+ // These options are ignored for now.
+ if (!strncmp(argv[i], "-O", 2) ||
+ !strncmp(argv[i], "-W", 2) ||
+ !strncmp(argv[i], "-g", 2) ||
+ !strncmp(argv[i], "-std=", 5) ||
+ !strcmp(argv[i], "-ffreestanding") ||
+ !strcmp(argv[i], "-fno-builtin") ||
+ !strcmp(argv[i], "-fno-omit-frame-pointer") ||
+ !strcmp(argv[i], "-fno-stack-protector") ||
+ !strcmp(argv[i], "-fno-strict-aliasing") ||
+ !strcmp(argv[i], "-m64") ||
+ !strcmp(argv[i], "-mno-red-zone") ||
+ !strcmp(argv[i], "-w"))
+ continue;
+
+ if (argv[i][0] == '-' && argv[i][1] != '\0')
+ error("unknown argument: %s", argv[i]);
+
+ strarray_push(&input_paths, argv[i]);
+ }
+
+ for (int i = 0; i < idirafter.len; i++)
+ strarray_push(&include_paths, idirafter.data[i]);
+
+ if (input_paths.len == 0)
+ error("no input files");
+
+ // -E implies that the input is the C macro language.
+ if (opt_E)
+ opt_x = FILE_C;
+}
+
+static FILE *open_file(char *path) {
+ if (!path || strcmp(path, "-") == 0)
+ return stdout;
+
+ FILE *out = fopen(path, "w");
+ if (!out)
+ error("cannot open output file: %s: %s", path, strerror(errno));
+ return out;
+}
+
+static bool endswith(char *p, char *q) {
+ int len1 = strlen(p);
+ int len2 = strlen(q);
+ return (len1 >= len2) && !strcmp(p + len1 - len2, q);
+}
+
+// Replace file extension
+static char *replace_extn(char *tmpl, char *extn) {
+ char *filename = basename(strdup(tmpl));
+ char *dot = strrchr(filename, '.');
+ if (dot)
+ *dot = '\0';
+ return format("%s%s", filename, extn);
+}
+
+static void cleanup(void) {
+ for (int i = 0; i < tmpfiles.len; i++)
+ unlink(tmpfiles.data[i]);
+}
+
+static char *create_tmpfile(void) {
+ char *path = strdup("/tmp/chibicc-XXXXXX");
+ int fd = mkstemp(path);
+ if (fd == -1)
+ error("mkstemp failed: %s", strerror(errno));
+ close(fd);
+
+ strarray_push(&tmpfiles, path);
+ return path;
+}
+
+static void run_subprocess(char **argv) {
+ // If -### is given, dump the subprocess's command line.
+ if (opt_hash_hash_hash) {
+ fprintf(stderr, "%s", argv[0]);
+ for (int i = 1; argv[i]; i++)
+ fprintf(stderr, " %s", argv[i]);
+ fprintf(stderr, "\n");
+ }
+
+ if (fork() == 0) {
+ // Child process. Run a new command.
+ execvp(argv[0], argv);
+ fprintf(stderr, "exec failed: %s: %s\n", argv[0], strerror(errno));
+ _exit(1);
+ }
+
+ // Wait for the child process to finish.
+ int status;
+ while (wait(&status) > 0);
+ if (status != 0)
+ exit(1);
+}
+
+static void run_cc1(int argc, char **argv, char *input, char *output) {
+ char **args = calloc(argc + 10, sizeof(char *));
+ memcpy(args, argv, argc * sizeof(char *));
+ args[argc++] = "-cc1";
+
+ if (input) {
+ args[argc++] = "-cc1-input";
+ args[argc++] = input;
+ }
+
+ if (output) {
+ args[argc++] = "-cc1-output";
+ args[argc++] = output;
+ }
+
+ run_subprocess(args);
+}
+
+// Print tokens to stdout. Used for -E.
+static void print_tokens(Token *tok) {
+ FILE *out = open_file(opt_o ? opt_o : "-");
+
+ int line = 1;
+ for (; tok->kind != TK_EOF; tok = tok->next) {
+ if (line > 1 && tok->at_bol)
+ fprintf(out, "\n");
+ if (tok->has_space && !tok->at_bol)
+ fprintf(out, " ");
+ fprintf(out, "%.*s", tok->len, tok->loc);
+ line++;
+ }
+ fprintf(out, "\n");
+}
+
+static bool in_std_include_path(char *path) {
+ for (int i = 0; i < std_include_paths.len; i++) {
+ char *dir = std_include_paths.data[i];
+ int len = strlen(dir);
+ if (strncmp(dir, path, len) == 0 && path[len] == '/')
+ return true;
+ }
+ return false;
+}
+
+// If -M options is given, the compiler write a list of input files to
+// stdout in a format that "make" command can read. This feature is
+// used to automate file dependency management.
+static void print_dependencies(void) {
+ char *path;
+ if (opt_MF)
+ path = opt_MF;
+ else if (opt_MD)
+ path = replace_extn(opt_o ? opt_o : base_file, ".d");
+ else if (opt_o)
+ path = opt_o;
+ else
+ path = "-";
+
+ FILE *out = open_file(path);
+ if (opt_MT)
+ fprintf(out, "%s:", opt_MT);
+ else
+ fprintf(out, "%s:", quote_makefile(replace_extn(base_file, ".o")));
+
+ File **files = get_input_files();
+
+ for (int i = 0; files[i]; i++) {
+ if (opt_MMD && in_std_include_path(files[i]->name))
+ continue;
+ fprintf(out, " \\\n %s", files[i]->name);
+ }
+
+ fprintf(out, "\n\n");
+
+ if (opt_MP) {
+ for (int i = 1; files[i]; i++) {
+ if (opt_MMD && in_std_include_path(files[i]->name))
+ continue;
+ fprintf(out, "%s:\n\n", quote_makefile(files[i]->name));
+ }
+ }
+}
+
+static Token *must_tokenize_file(char *path) {
+ Token *tok = tokenize_file(path);
+ if (!tok)
+ error("%s: %s", path, strerror(errno));
+ return tok;
+}
+
+static Token *append_tokens(Token *tok1, Token *tok2) {
+ if (!tok1 || tok1->kind == TK_EOF)
+ return tok2;
+
+ Token *t = tok1;
+ while (t->next->kind != TK_EOF)
+ t = t->next;
+ t->next = tok2;
+ return tok1;
+}
+
+static void cc1(void) {
+ Token *tok = NULL;
+
+ // Process -include option
+ for (int i = 0; i < opt_include.len; i++) {
+ char *incl = opt_include.data[i];
+
+ char *path;
+ if (file_exists(incl)) {
+ path = incl;
+ } else {
+ path = search_include_paths(incl);
+ if (!path)
+ error("-include: %s: %s", incl, strerror(errno));
+ }
+
+ Token *tok2 = must_tokenize_file(path);
+ tok = append_tokens(tok, tok2);
+ }
+
+ // Tokenize and parse.
+ Token *tok2 = must_tokenize_file(base_file);
+ tok = append_tokens(tok, tok2);
+ tok = preprocess(tok);
+
+ // If -M or -MD are given, print file dependencies.
+ if (opt_M || opt_MD) {
+ print_dependencies();
+ if (opt_M)
+ return;
+ }
+
+ // If -E is given, print out preprocessed C code as a result.
+ if (opt_E) {
+ print_tokens(tok);
+ return;
+ }
+
+ Obj *prog = parse(tok);
+
+ // Open a temporary output buffer.
+ char *buf;
+ size_t buflen;
+ FILE *output_buf = open_memstream(&buf, &buflen);
+
+ // Traverse the AST to emit assembly.
+ codegen(prog, output_buf);
+ fclose(output_buf);
+
+ // Write the asembly text to a file.
+ FILE *out = open_file(output_file);
+ fwrite(buf, buflen, 1, out);
+ fclose(out);
+}
+
+static void assemble(char *input, char *output) {
+ char *cmd[] = {"as", "-c", input, "-o", output, NULL};
+ run_subprocess(cmd);
+}
+
+static char *find_file(char *pattern) {
+ char *path = NULL;
+ glob_t buf = {};
+ glob(pattern, 0, NULL, &buf);
+ if (buf.gl_pathc > 0)
+ path = strdup(buf.gl_pathv[buf.gl_pathc - 1]);
+ globfree(&buf);
+ return path;
+}
+
+// Returns true if a given file exists.
+bool file_exists(char *path) {
+ struct stat st;
+ return !stat(path, &st);
+}
+
+static char *find_libpath(void) {
+ if (file_exists("/usr/lib/x86_64-linux-gnu/crti.o"))
+ return "/usr/lib/x86_64-linux-gnu";
+ if (file_exists("/usr/lib64/crti.o"))
+ return "/usr/lib64";
+ error("library path is not found");
+}
+
+static char *find_gcc_libpath(void) {
+ char *paths[] = {
+ "/usr/lib/gcc/x86_64-linux-gnu/*/crtbegin.o",
+ "/usr/lib/gcc/x86_64-pc-linux-gnu/*/crtbegin.o", // For Gentoo
+ "/usr/lib/gcc/x86_64-redhat-linux/*/crtbegin.o", // For Fedora
+ };
+
+ for (int i = 0; i < sizeof(paths) / sizeof(*paths); i++) {
+ char *path = find_file(paths[i]);
+ if (path)
+ return dirname(path);
+ }
+
+ error("gcc library path is not found");
+}
+
+static void run_linker(StringArray *inputs, char *output) {
+ StringArray arr = {};
+
+ strarray_push(&arr, "ld");
+ strarray_push(&arr, "-o");
+ strarray_push(&arr, output);
+ strarray_push(&arr, "-m");
+ strarray_push(&arr, "elf_x86_64");
+
+ char *libpath = find_libpath();
+ char *gcc_libpath = find_gcc_libpath();
+
+ if (opt_shared) {
+ strarray_push(&arr, format("%s/crti.o", libpath));
+ strarray_push(&arr, format("%s/crtbeginS.o", gcc_libpath));
+ } else {
+ strarray_push(&arr, format("%s/crt1.o", libpath));
+ strarray_push(&arr, format("%s/crti.o", libpath));
+ strarray_push(&arr, format("%s/crtbegin.o", gcc_libpath));
+ }
+
+ strarray_push(&arr, format("-L%s", gcc_libpath));
+ strarray_push(&arr, "-L/usr/lib/x86_64-linux-gnu");
+ strarray_push(&arr, "-L/usr/lib64");
+ strarray_push(&arr, "-L/lib64");
+ strarray_push(&arr, "-L/usr/lib/x86_64-linux-gnu");
+ strarray_push(&arr, "-L/usr/lib/x86_64-pc-linux-gnu");
+ strarray_push(&arr, "-L/usr/lib/x86_64-redhat-linux");
+ strarray_push(&arr, "-L/usr/lib");
+ strarray_push(&arr, "-L/lib");
+
+ if (!opt_static) {
+ strarray_push(&arr, "-dynamic-linker");
+ strarray_push(&arr, "/lib64/ld-linux-x86-64.so.2");
+ }
+
+ for (int i = 0; i < ld_extra_args.len; i++)
+ strarray_push(&arr, ld_extra_args.data[i]);
+
+ for (int i = 0; i < inputs->len; i++)
+ strarray_push(&arr, inputs->data[i]);
+
+ if (opt_static) {
+ strarray_push(&arr, "--start-group");
+ strarray_push(&arr, "-lgcc");
+ strarray_push(&arr, "-lgcc_eh");
+ strarray_push(&arr, "-lc");
+ strarray_push(&arr, "--end-group");
+ } else {
+ strarray_push(&arr, "-lc");
+ strarray_push(&arr, "-lgcc");
+ strarray_push(&arr, "--as-needed");
+ strarray_push(&arr, "-lgcc_s");
+ strarray_push(&arr, "--no-as-needed");
+ }
+
+ if (opt_shared)
+ strarray_push(&arr, format("%s/crtendS.o", gcc_libpath));
+ else
+ strarray_push(&arr, format("%s/crtend.o", gcc_libpath));
+
+ strarray_push(&arr, format("%s/crtn.o", libpath));
+ strarray_push(&arr, NULL);
+
+ run_subprocess(arr.data);
+}
+
+static FileType get_file_type(char *filename) {
+ if (opt_x != FILE_NONE)
+ return opt_x;
+
+ if (endswith(filename, ".a"))
+ return FILE_AR;
+ if (endswith(filename, ".so"))
+ return FILE_DSO;
+ if (endswith(filename, ".o"))
+ return FILE_OBJ;
+ if (endswith(filename, ".c"))
+ return FILE_C;
+ if (endswith(filename, ".s"))
+ return FILE_ASM;
+
+ error("<command line>: unknown file extension: %s", filename);
+}
+
+int main(int argc, char **argv) {
+ atexit(cleanup);
+ init_macros();
+ parse_args(argc, argv);
+
+ if (opt_cc1) {
+ add_default_include_paths(argv[0]);
+ cc1();
+ return 0;
+ }
+
+ if (input_paths.len > 1 && opt_o && (opt_c || opt_S | opt_E))
+ error("cannot specify '-o' with '-c,' '-S' or '-E' with multiple files");
+
+ StringArray ld_args = {};
+
+ for (int i = 0; i < input_paths.len; i++) {
+ char *input = input_paths.data[i];
+
+ if (!strncmp(input, "-l", 2)) {
+ strarray_push(&ld_args, input);
+ continue;
+ }
+
+ if (!strncmp(input, "-Wl,", 4)) {
+ char *s = strdup(input + 4);
+ char *arg = strtok(s, ",");
+ while (arg) {
+ strarray_push(&ld_args, arg);
+ arg = strtok(NULL, ",");
+ }
+ continue;
+ }
+
+ char *output;
+ if (opt_o)
+ output = opt_o;
+ else if (opt_S)
+ output = replace_extn(input, ".s");
+ else
+ output = replace_extn(input, ".o");
+
+ FileType type = get_file_type(input);
+
+ // Handle .o or .a
+ if (type == FILE_OBJ || type == FILE_AR || type == FILE_DSO) {
+ strarray_push(&ld_args, input);
+ continue;
+ }
+
+ // Handle .s
+ if (type == FILE_ASM) {
+ if (!opt_S)
+ assemble(input, output);
+ continue;
+ }
+
+ assert(type == FILE_C);
+
+ // Just preprocess
+ if (opt_E || opt_M) {
+ run_cc1(argc, argv, input, NULL);
+ continue;
+ }
+
+ // Compile
+ if (opt_S) {
+ run_cc1(argc, argv, input, output);
+ continue;
+ }
+
+ // Compile and assemble
+ if (opt_c) {
+ char *tmp = create_tmpfile();
+ run_cc1(argc, argv, input, tmp);
+ assemble(tmp, output);
+ continue;
+ }
+
+ // Compile, assemble and link
+ char *tmp1 = create_tmpfile();
+ char *tmp2 = create_tmpfile();
+ run_cc1(argc, argv, input, tmp1);
+ assemble(tmp1, tmp2);
+ strarray_push(&ld_args, tmp2);
+ continue;
+ }
+
+ if (ld_args.len > 0)
+ run_linker(&ld_args, opt_o ? opt_o : "a.out");
+ return 0;
+}