blob: 2ff5c36be6773c95e7a43d4d1efd846cc9ce0d43 [file] [log] [blame]
swissChili97b5d8b2020-08-15 20:00:54 -07001#include "as.h"
2#include "../cpu.h"
3#include "../instructions.h"
4#include "../mnemonics.h"
swissChilica0d2e22020-08-16 15:09:25 -07005#include "map.h"
swissChili97b5d8b2020-08-15 20:00:54 -07006
swissChili97b5d8b2020-08-15 20:00:54 -07007#include <string.h>
8#include <ctype.h>
9#include <stdbool.h>
10
11enum
12{
13 ARG_16, /* Absolute 16 bit argument */
14 ARG_8, /* Absolute 8 bit argument */
15 ARG_8REL, /* Relative 8 bit argument */
16 ARG_REL, /* Relative label */
17 ARG_ABS, /* Absolute label */
18 ARG_IMP, /* Implied argument */
19};
20
swissChili7acb4ce2020-08-16 20:16:10 -070021#define ERR "\033[31m"
22#define RESET "\033[0m"
23
swissChili97b5d8b2020-08-15 20:00:54 -070024typedef struct
25{
26 uint8_t opcode;
27 uint8_t arg_type;
swissChili7acb4ce2020-08-16 20:16:10 -070028 uint16_t line;
swissChili97b5d8b2020-08-15 20:00:54 -070029 union
30 {
31 char label[32];
32 uint16_t long_arg;
33 uint8_t byte_arg;
34 int8_t rel_arg;
35 };
36} inst_t;
37
swissChili7acb4ce2020-08-16 20:16:10 -070038// Normal strtok() counts 2 seperators as one, ie asdf\n\njlk is 2 lines.
39// This functions counts that as 3 lines, and will return an empty line in between.
40char *strtok_fix(char *string, const char *token)
41{
42 static char *pos;
43 if (string)
44 pos = string;
45 else
46 string = pos;
47
48 if (*pos == 0)
49 return NULL;
50
51 for (; *string; string++)
52 {
53 for (int i = 0; i < strlen(token); i++)
54 {
55 if (*string == token[i])
56 {
57 *string = 0;
58 char *old_pos = pos;
59 pos = string + 1;
60
61 return old_pos;
62 }
63 }
64 }
65 return pos;
66}
67
swissChili97b5d8b2020-08-15 20:00:54 -070068void print_inst(inst_t *arg)
69{
70 char *arg_types =
71 "16 8 8RELREL ABS IMP ";
72
73 printf("\033[33mInst: %.4s $%x ", arg_types + arg->arg_type * 4, arg->opcode);
74
75 switch (arg->arg_type)
76 {
77 case ARG_16:
78 printf("%x", arg->long_arg);
79 break;
80 case ARG_8:
81 printf("%x", arg->byte_arg);
82 break;
83 case ARG_8REL:
84 printf("%d", arg->rel_arg);
85 break;
86 case ARG_REL:
87 case ARG_ABS:
88 printf("%s", arg->label);
89 break;
90 }
91
92 printf("\033[0m\n");
93}
94
95bool is_ident(char c)
96{
swissChilia4f49b52020-08-16 17:35:37 -070097 return c && (isalpha(c) || isdigit(c)
98 || c == '_' || c == '-'
99 || c == '$' || c == '.');
swissChili97b5d8b2020-08-15 20:00:54 -0700100}
101
102uint32_t skip_ws(char **code)
103{
104 uint32_t len = 0;
105
swissChilia4f49b52020-08-16 17:35:37 -0700106 for (; **code == ' ' || **code == '\t'; (*code)++, len++)
swissChili97b5d8b2020-08-15 20:00:54 -0700107 {}
108
109 return len;
110}
111
112uint32_t skip_to_eol(char **code)
113{
114 uint32_t len = 0;
115
116 for (; **code && **code != '\n'; (*code)++, len++)
117 {}
118
119 if (**code)
120 (*code)++;
121
122 return len;
123}
124
125char *parse_label_name(char **code)
126{
127 char *start = *code;
128 for (; is_ident(**code); (*code)++)
129 {}
130
131 if (start == *code)
132 return false;
133
134 **code = 0;
135 return start;
136}
137
138char *parse_label(char **code)
139{
140 char *start = *code;
141
142 for (; is_ident(**code); (*code)++)
143 {}
144
145 skip_ws(code);
146
swissChilia4f49b52020-08-16 17:35:37 -0700147 if (*code != start && **code == ':')
swissChili97b5d8b2020-08-15 20:00:54 -0700148 {
149 **code = 0;
150 (*code)++;
151 return start;
152 }
153
154 *code = start;
155
156 return NULL;
157}
158
159char *parse_inst(char **code)
160{
161 char *start = *code;
162
163 for (; isalpha(**code); (*code)++)
164 {}
165
swissChili97b5d8b2020-08-15 20:00:54 -0700166 if (start == *code)
167 return NULL;
168
swissChilia4f49b52020-08-16 17:35:37 -0700169 // If code is incremented when it points to \0, it will wrap to the next line
170 // returned by strtok(), which causes a bug where instructions followed immediately
171 // by a newline and no arguments causes the next instruction to parse the entire
172 // program as its argument (not good)
173 if (**code)
174 {
175 **code = 0;
176 (*code)++;
177 }
178
swissChili97b5d8b2020-08-15 20:00:54 -0700179 return start;
180}
181
182bool is_eol(char c)
183{
184 return c == ';' ||
185 c == '\n' ||
swissChili97b5d8b2020-08-15 20:00:54 -0700186 c == '\0';
187}
188
189bool skip(char **code, const char *p)
190{
191 for (; *p && *p == **code; p++, (*code)++)
192 {}
193
194 if (!*p)
195 return true;
196 return false;
197}
198
199bool parse_num(char **code, uint64_t *num)
200{
201 char *start = *code;
202 int base = 10;
203 if (**code == '$')
204 {
205 base = 16;
206 (*code)++;
207 }
208
209 skip_ws(code);
210
211 char *endptr = *code;
212 int64_t val = strtol(*code, &endptr, base);
213
214 if (*code == endptr)
215 {
216 *code = start;
217 return false;
218 }
219 *num = val;
220 *code = endptr;
221 return true;
222}
223
224bool parse_num_max(char **code, uint64_t *num, uint64_t max)
225{
226 uint64_t n;
227 if (parse_num(code, &n))
228 {
229 if (n > max)
230 return false;
231
232 *num = n;
233 return true;
234 }
235 else return false;
236}
237
238bool parse_u8(char **code, uint8_t *num)
239{
240 uint64_t n;
241 if (!parse_num_max(code, &n, 0xFF))
242 return false;
243
244 *num = n & 0xFF;
245 return true;
246}
247
248bool parse_u16(char **code, uint16_t *num)
249{
250 uint64_t n;
251 if (!parse_num_max(code, &n, 0xFFFF))
252 return false;
253
254 *num = n & 0xFFFF;
255 return true;
256}
257
258bool ws_end(char **code)
259{
260 skip_ws(code);
261 return is_eol(**code);
262}
263
264bool parse_arg(char *code, int am, inst_t *inst)
265{
266 skip_ws(&code);
267
268 uint16_t num;
269 uint8_t num8;
270 char *lbl;
271
272 switch (am)
273 {
274 case AM_ACC:
275 case AM_IMP:
276 printf("Trying AM_IMP on '%.8s'\n", code);
277 skip_ws(&code);
278 if (is_eol(*code))
279 {
280 inst->arg_type = ARG_IMP;
281 return ws_end(&code);
282 }
283 break;
284
285 case AM_IMM:
286 printf("Trying AM_IMM on '%.8s'\n", code);
287 if (!skip(&code, "#"))
288 return false;
289 skip_ws(&code);
290 case AM_ZP:
291 if (parse_u8(&code, &num8))
292 {
293 inst->arg_type = ARG_8;
294 inst->byte_arg = num8;
295
296 return ws_end(&code);
297 }
298 break;
299
300 case AM_ABS:
301 if (parse_u16(&code, &num))
302 {
303 inst->arg_type = ARG_16;
304 inst->long_arg = num;
305 return true;
306 }
307 else if ((lbl = parse_label_name(&code)))
308 {
309 inst->arg_type = ARG_ABS;
310 strncpy(inst->label, lbl, 32);
311 return true;
312 }
313 break;
314
315 case AM_REL:
316 if (parse_u8(&code, &num8))
317 {
318 inst->arg_type = ARG_8REL;
319 inst->rel_arg = num;
320 return ws_end(&code);
321 }
322 else if ((lbl = parse_label_name(&code)))
323 {
324 inst->arg_type = ARG_REL;
325 strncpy(inst->label, lbl, 32);
326 return ws_end(&code);
327 }
328 break;
329
330 case AM_IND:
331 if (!skip(&code,"("))
332 return false;
333
334 if (!parse_u16(&code, &num))
335 return false;
336
337 if (!skip(&code, ")"))
338 return false;
339
340 inst->arg_type = ARG_16;
341 inst->long_arg = num;
342 return true;
343
344 case AM_AX:
345 case AM_ZPX:
346 case AM_AY:
347 case AM_ZPY:
348 if (am == AM_AX || am == AM_AY)
349 {
350 if (!parse_u16(&code, &num))
351 return false;
352 inst->arg_type = ARG_16;
353 inst->long_arg = num;
354 }
355 else
356 {
357 if (!parse_u8(&code, &num8))
358 return false;
359 inst->arg_type = ARG_8;
360 inst->byte_arg = num8;
361 }
362 if (!skip(&code, ","))
363 return false;
364
365 skip_ws(&code);
366
367 if (tolower(*code) != (am == AM_AY || am == AM_ZPY ? 'y' : 'x'))
368 return false;
369
370 return ws_end(&code);
371
372 case AM_ZIX:
373 if (!skip(&code, "("))
374 break;
375 skip_ws(&code);
376 if (!parse_u8(&code, &num8))
377 break;
378 skip_ws(&code);
379 if (!skip(&code, ","))
380 break;
381 skip_ws(&code);
382 if (tolower(*code) != 'x')
383 return false;
384 skip_ws(&code);
385
386 if (!skip(&code, ")"))
387 break;
388
389 inst->arg_type = ARG_8;
390 inst->byte_arg = num8;
391 return ws_end(&code);
392
393 case AM_ZIY:
394 if (!skip(&code, "("))
395 break;
396 skip_ws(&code);
397 if (!parse_u8(&code, &num8))
398 break;
399 skip_ws(&code);
400 if (!skip(&code, ")"))
401 break;
402 skip_ws(&code);
403 if (!skip(&code, ","))
404 break;
405 skip_ws(&code);
406 if (tolower(*code) != 'x')
407 break;
408
409 inst->arg_type = ARG_8;
410 inst->byte_arg = num8;
411 return ws_end(&code);
412 }
413 return false;
414}
415
416uint32_t assemble(char *code, FILE *out)
417{
swissChilica0d2e22020-08-16 15:09:25 -0700418 uintptr_t num_insts = 0,
419 pc = 0x600;
swissChili97b5d8b2020-08-15 20:00:54 -0700420 uint32_t line_no = 1;
swissChilica0d2e22020-08-16 15:09:25 -0700421 map_t *labels = new_map();
swissChili7acb4ce2020-08-16 20:16:10 -0700422 char *line,
423 *orig_line,
424 *line_start;
swissChili97b5d8b2020-08-15 20:00:54 -0700425
426 printf("Assembling File\n");
427 printf("%s\n", code);
428
swissChili7acb4ce2020-08-16 20:16:10 -0700429 orig_line = strtok_fix(code, "\n");
430
431 while (orig_line)
swissChili97b5d8b2020-08-15 20:00:54 -0700432 {
swissChili7acb4ce2020-08-16 20:16:10 -0700433 line = strdup(orig_line);
434 line_start = line;
435
436 if (*line == 0)
437 goto end_of_line;
438 printf("line %d: \033[36m%.12s\033[0m\n", line_no, line);
439
swissChili97b5d8b2020-08-15 20:00:54 -0700440 skip_ws(&line);
441
swissChili7acb4ce2020-08-16 20:16:10 -0700442 if (is_eol(*line))
443 {
444 printf("skip_ws() brought us to EOL\n");
445 goto end_of_line;
446 }
swissChili97b5d8b2020-08-15 20:00:54 -0700447
swissChilia4f49b52020-08-16 17:35:37 -0700448 char *label = parse_label(&line);
swissChili7acb4ce2020-08-16 20:16:10 -0700449 skip_ws(&code);
450 if (is_eol(*line))
451 goto end_of_line;
swissChilia4f49b52020-08-16 17:35:37 -0700452 char *mn = parse_inst(&line);
453 printf(" skipping %d ", skip_ws(&line));
454 //printf("\033[33m%s\033[0m\n", line);
455
456 bool no_argument = false;
457 printf("eol is %c ($%x)\n", *line, *line);
458 if (is_eol(*line))
459 {
460 no_argument = true;
461 printf("... no argument\n");
462 }
swissChili97b5d8b2020-08-15 20:00:54 -0700463 int32_t mnemonic = -1;
464
465 if (label)
466 {
swissChilica0d2e22020-08-16 15:09:25 -0700467 map_set(labels, label, (void *)pc);
swissChilia4f49b52020-08-16 17:35:37 -0700468 printf("Set label %s at $%lx\n", label, pc);
swissChili97b5d8b2020-08-15 20:00:54 -0700469 }
470
471 if (mn)
472 {
473#define MN(a) if (!strcasecmp(mn, #a)) \
swissChilica0d2e22020-08-16 15:09:25 -0700474 { \
swissChili97b5d8b2020-08-15 20:00:54 -0700475 mnemonic = a; \
swissChilica0d2e22020-08-16 15:09:25 -0700476 } \
swissChili97b5d8b2020-08-15 20:00:54 -0700477 else
478
swissChili7acb4ce2020-08-16 20:16:10 -0700479 MNEMONICS
480 {
481 printf(ERR "Could not parse instruction on line %d\n%s\n" RESET, line_no, orig_line);
482 goto cleanup;
483 }
swissChili97b5d8b2020-08-15 20:00:54 -0700484#undef MN
485
486 printf("Got instruction %s %d\n", mn, mnemonic);
487
488 inst_t arg;
489 // printf("Parsing '%s'\n", line);
490#define INST(_mn, am, op, len) \
swissChilia4f49b52020-08-16 17:35:37 -0700491 if ((no_argument && (_mn == AM_IMP || _mn == AM_ACC)) \
492 || (mnemonic == _mn && parse_arg(line, am, &arg))) \
493 { \
494 arg.opcode = op; \
495 pc += len; \
496 print_inst(&arg); \
497 } \
swissChili97b5d8b2020-08-15 20:00:54 -0700498 else
499
500 INSTRUCTIONS
501 {
502 printf("\033[31mCould not be parsed: %s '%s'\033[0m\n", mn, line);
503 }
504#undef INST
505 }
swissChili7acb4ce2020-08-16 20:16:10 -0700506 end_of_line:
507 line_no++;
508 printf("Line is %d\n", line_no);
509 orig_line = strtok_fix(NULL, "\n");
510 free(line_start);
swissChili97b5d8b2020-08-15 20:00:54 -0700511 }
512
swissChili7acb4ce2020-08-16 20:16:10 -0700513cleanup:
swissChili97b5d8b2020-08-15 20:00:54 -0700514 free_map(labels);
515
516 return num_insts;
517}