-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsicas.py
executable file
·613 lines (556 loc) · 23.1 KB
/
sicas.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
#!/usr/bin/python
import argparse
import os
from sicxe import *
# A class to indicate assembly error
class AssembleError(BaseException):
pass
# class to store info of each source statements
class Line:
def __init__(self, assembly, lineno):
self.src = assembly
self.assembly = assembly.split('.')[0]
self.code = ""
self.lineno = lineno
self.fmt = 0
self.loc = None
self.base = -1
self.litpool = []
def __str__(self):
return self.assembly
def __repr__(self):
return str(self)
# split the source statement into several tokens
def tokenize(self):
return self.assembly.split()
# return a tuple for assembly listing
def listing_tuple(self):
locfmt = ""
codefmt = ""
if self.loc != None:
locfmt = "%04X" % self.loc
if self.code != "":
codefmt = "%%0%dX" % (self.fmt * 2)
codefmt = codefmt % self.code
if any(self.litpool):
locfmt = ""
return (self.lineno, locfmt, self.src.expandtabs(8), codefmt)
# class to store each program info
class Program:
def __init__(self, source):
self.source = os.path.basename(source)
self.name = ''
self.start_addr = 0x0
self.start_exec = -1
self.started = False
self.LOCCTR = 0
self.lineno = 0
self.content = list(Line(line.strip('\n'), lineno) for lineno, line in enumerate(open(source, "r").readlines(), 1))
self.symtab = PRELOAD_SYMTAB.copy()
self.littab = {}
self.endlitpool = []
self.base = -1
# print error message indicating the line number and throw the error
def error(self, msg, line = None):
if line == None:
line = self.current_line()
print("\n%s:%s" % (self.source, str(line.lineno)) + " " + str(line))
print("Error : " + msg + '\n')
raise AssembleError
# assemble the program
def assemble(self):
for line in self.content:
program.lineno += 1
line.loc = self.LOCCTR
line.base = self.base
if line.assembly == '':
line.loc = None
continue
tokens = line.tokenize()
self.lineno = line.lineno
if has_directives(self, tokens):
continue
elif has_instructions(self, tokens):
continue
else:
program.error("Except a directive, opcde or label.")
end_LITPOOL(self)
for k, v in self.symtab.items():
if type(v) == list:
program.error("Undefined symbol %s." % v)
# write assembly listing to file
def listing(self, filename):
stmt_len = len(max(self.content, key=lambda stmt: len(stmt.src)).src) + 10
fmt = "\n%%-8s%%-8s%%-%ds%%-10s" % stmt_len
with open(filename, "w") as f:
f.write(fmt % ("Lineno", "LOCCTR", "Source Statements", "Object Code"))
for line in self.content:
f.write(fmt % line.listing_tuple())
if any(line.litpool):
for lit in line.litpool:
code = lit[2]
if lit[1][0] == 'C':
code = "%%0%dX" % (len(lit[1]) - 3)
code = code % lit[2]
elif lit[1][0] == 'X':
code = "%02X" % lit[2]
f.write(fmt % ("", "%04X" % lit[0], "*\t".expandtabs(8) + lit[1], code))
for lit in self.endlitpool:
code = lit[2]
if lit[1][0] == 'C':
code = "%%0%dX" % (len(lit[1]) - 3)
code = code % lit[2]
elif lit[1][0] == 'X':
code = "%02X" % lit[2]
f.write(fmt % ("", "%04X" % lit[0], "*\t".expandtabs(8) + lit[1], code))
# get current line
def current_line(self):
return self.content[self.lineno - 1]
# output object file
def output(self, file_name):
with open(file_name, "w") as f:
f.write("H%-6s%06X%06X" % (self.name, self.start_addr, self.LOCCTR - self.start_addr))
M_list = []
newrecord = True
colcount = 0
i = 0
line = self.content[0]
last = line
brk = False
litpool = False
while i < len(self.content):
line = self.content[i]
if newrecord:
if line.loc == None:
i += 1
continue
rec = "\nT%06XLL" % line.loc
newrecord = False
colcount = 0
# INSTRUCTIONS OR BYTE/WORD
if line.code != "":
codefmt = "%%0%dX" % (line.fmt * 2)
code = codefmt % line.code
# other directives or empty line
else:
if any(line.litpool):
lit = line.litpool.pop(0)
code = lit[2]
codefmt = "%%0%dX" % (len(lit[1]) - 3)
code = codefmt % code
line.litpool = line.litpool[:1]
litpool = True
else:
i += 1
continue
# check if have to break the record
# too far to last instruction
if line.code != "" and line.loc != None and last.code != "" and (line.loc > last.loc + last.fmt * 2) and not brk:
newrecord = True
brk = True
# exceed record length
elif colcount + len(code) > 60:
newrecord = True
# need to relocate
if line.fmt == 4 and line.code & ((DEFAULT_ADDR ^ IMM_ADDR) << BYTESIZE):
M_list.append(line)
if newrecord:
rec = rec.replace("LL", "%02X" % (colcount // 2))
f.write(rec)
continue
else:
colcount += len(code)
rec += code
last = line
i += 1
if brk:
brk = False
for lit in self.endlitpool:
code = lit[2]
codefmt = "%%0%dX" % (len(lit[1]) - 3)
code = codefmt % code
line.litpool = line.litpool[:1]
litpool = True
colcount += len(code)
rec += code
rec = rec.replace("LL", "%02X" % (colcount // 2))
f.write(rec)
for line in M_list:
f.write("\nM%06X%02X" % (line.loc + 1, 5))
f.write("\nE%06X" % self.start_exec)
def handler_START(program, tokens):
if "START" in tokens:
# validate format
if program.started:
program.error("Multiple START detected.")
elif tokens[0] == "START":
program.error("Must specify a name for program before START.")
elif tokens[2] == "START":
program.error("Multiple tokens were specified before START.")
if "CHECK PROGRAM NAME FORMAT" != 0:
pass
program.name = tokens[0]
if len(program.name) > 6:
program.error("Program name must not longer than 6 characters.")
try:
program.start_addr = int(tokens[2], 16)
except ValueError:
program.error("%s is an invalid value for starting address (hexadecimal is required)." % tokens[2])
program.started = True
program.current_line().loc = None
def handler_END(program, tokens):
program.current_line().loc = None
def handler_BYTE(program, tokens):
if tokens[0] == "BYTE":
program.error("Must specify a label for the allocated space.")
elif tokens[2] == "BYTE":
program.error("Multiple label were specified for BYTE.")
elif len(tokens) < 3:
program.error("Requires an value for BYTE.")
"CHECK LABEL NAME"
value = tokens[2]
if value[0] == 'C':
"CHECK MATCHING QUOTION MARKS"
hexstr = ''.join(["%2X" % c for c in value[2:-1].encode()])
program.current_line().code = int(hexstr, 16)
program.current_line().fmt = len(value[2:-1])
if tokens[0] in program.symtab and type(program.symtab[tokens[0]]) == list:
fill_forward(program.symtab[tokens[0]], program.LOCCTR, program)
program.symtab[tokens[0]] = program.LOCCTR
program.LOCCTR += len(value[2:-1])
elif value[0] == 'X':
try:
"CHECK QUOTION MARKS"
program.current_line().code = int(value[2:-1], 16)
program.current_line().fmt = 1
if tokens[0] in program.symtab and type(program.symtab[tokens[0]]) == list:
fill_forward(program.symtab[tokens[0]], program.LOCCTR, program)
program.symtab[tokens[0]] = program.LOCCTR
program.LOCCTR += 1
except ValueError:
program.error("The \"X\" requires a hex value, but %s is not." % value[2:-1])
def handler_WORD(program, tokens):
if tokens[0] == "WORD":
program.error("Must specify a label for the allocated space.")
elif tokens[2] == "WORD":
program.error("Multiple label were specified for WORD.")
elif len(tokens) < 3:
program.error("Requires an value for WORD.")
try:
value = int(tokens[2], 16)
if not -(2**11) < value < 2**11 - 1:
program.current_line().code = value
program.current_line().fmt = 3
else:
program.error("Value exceed the range of a byte.")
if tokens[0] in program.symtab and type(program.symtab[tokens[0]]) == list:
fill_forward(program.symtab[tokens[0]], program.LOCCTR, program)
program.symtab[tokens[0]] = program.LOCCTR
program.LOCCTR += 3
except ValueError:
program.error("Invalid hex value %s." % tokens[2])
def handler_RESW(program, tokens):
if tokens[0] == "RESW":
program.error("Must specify a label for the allocated space.")
elif tokens[2] == "RESW":
program.error("Multiple label were specified for RESW.")
elif len(tokens) < 3:
program.error("Requires an length for RESW.")
"CHECK LABEL NAME"
"LENGTH IS DECIMAL"
if tokens[0] in program.symtab and type(program.symtab[tokens[0]]) == list:
fill_forward(program.symtab[tokens[0]], program.LOCCTR, program)
program.symtab[tokens[0]] = program.LOCCTR
program.LOCCTR += int(tokens[2]) * 3
def handler_RESB(program, tokens):
if tokens[0] == "RESB":
program.error("Must specify a label for the allocated space.")
elif tokens[2] == "RESB":
program.error("Multiple label were specified for RESB.")
elif len(tokens) < 3:
program.error("Requires an length for RESB.")
"CHECK LABEL NAME"
"LENGTH IS DECIMAL"
if tokens[0] in program.symtab and type(program.symtab[tokens[0]]) == list:
fill_forward(program.symtab[tokens[0]], program.LOCCTR, program)
program.symtab[tokens[0]] = program.LOCCTR
program.LOCCTR += int(tokens[2])
def handler_BASE(program, tokens):
program.base = tokens[1]
program.current_line().loc = None
def handler_NOBASE(program, tokens):
program.base = -1
program.current_line().loc = None
def handler_LTORG(program, tokens):
for key, lit_lst in program.littab.items():
if key[0] == 'C':
hexstr = ''.join(["%2X" % c for c in key[2:-1].encode()])
code = int(hexstr, 16)
fill_lit(lit_lst, program.LOCCTR, program)
program.littab[key] = program.LOCCTR
program.current_line().litpool.append((program.LOCCTR, key, code))
program.LOCCTR += len(key[2:-1])
elif key[0] == 'X':
try:
code = int(key[2:-1], 16)
fill_lit(lit_lst, program.LOCCTR, program)
program.littab[key] = program.LOCCTR
program.current_line().litpool.append((program.LOCCTR, key, code))
program.LOCCTR += 1
except ValueError:
program.error("The \"X\" requires a hex value, but %s is not." % value[2:-1])
def end_LITPOOL(program):
for key, lit_lst in program.littab.items():
if type(lit_lst) != list:
continue
if key[0] == 'C':
hexstr = ''.join(["%2X" % c for c in key[2:-1].encode()])
fill_lit(lit_lst, program.LOCCTR, program)
program.littab[key] = program.LOCCTR
program.endlitpool.append((program.LOCCTR, key, hexstr))
program.LOCCTR += len(key[2:-1])
elif key[0] == 'X':
try:
code = int(key[2:-1], 16)
fill_lit(lit_lst, program.LOCCTR, program)
program.littab[key] = program.LOCCTR
program.endlitpool.append((program.LOCCTR, key, code))
program.LOCCTR += 1
except ValueError:
program.error("The \"X\" requires a hex value, but %s is not." % value[2:-1])
def handler_EQU(program, tokens):
print("EQU")
DIRTAB = {
"START" : handler_START,
"END" : handler_END,
"BYTE" : handler_BYTE,
"WORD" : handler_WORD,
"RESB" : handler_RESB,
"RESW" : handler_RESW,
"BASE" : handler_BASE,
"NOBASE" : handler_NOBASE,
"LTORG" : handler_LTORG,
"EQU" : handler_EQU,
}
# fill the instructions which referencing foward symbols
def fill_forward(fwd_lst, addr, program):
for line, ref, reftype in fwd_lst:
if reftype == REF_OP:
if line.fmt == 3:
disp = (addr - (line.loc + line.fmt))
if -2048 <= disp < 2048:
line.code |= (disp & 0xFFF) | PC_RELATIVE
elif line.base != -1:
# if base is defined
if line.base in program.symtab and type(program.symtab[line.base]) != list:
disp = (addr - program.symtab[line.base])
if 0 <= disp < 4096:
code |= (disp & 0xFFF) | BASE_RELATIVE
else:
program.error("no enough length to hold the displacement, try format 4.", line)
# forward base reference
elif line.base in program.symtab:
program.symtab[line.base].append((program.current_line(), REF_BASE))
else:
program.symtab[line.base] = [(program.current_line(), REF_BASE)]
else:
program.error("no enough length to hold the displacement, try format 4.", line)
elif line.fmt == 4:
line.code |= addr
elif reftype == REF_BASE:
disp = program.symtab[ref] - addr
if 0 <= disp < 4096:
line.code |= (disp & 0xFFF) | BASE_RELATIVE
else:
program.error("no enough length to hold the displacement, try format 4.", line)
def fill_lit(lit_lst, addr, program):
for line in lit_lst:
if line.fmt == 3:
disp = (addr - (line.loc + line.fmt))
if -2048 <= disp < 2048:
line.code |= (disp & 0xFFF) | PC_RELATIVE
elif line.base != -1:
# if base is defined
if line.base in program.symtab and type(program.symtab[line.base]) != list:
disp = (addr - program.symtab[line.base])
if 0 <= disp < 4096:
code |= (disp & 0xFFF) | BASE_RELATIVE
else:
program.error("no enough length to hold the displacement, try format 4.", line)
# forward base reference
elif line.base in program.symtab:
program.symtab[line.base].append((program.current_line(), REF_BASE))
else:
program.symtab[line.base] = [(program.current_line(), REF_BASE)]
else:
program.error("no enough length to hold the displacement, try format 4.", line)
elif line.fmt == 4:
line.code |= addr
def has_directives(program, tokens):
for token in tokens:
if token in DIRTAB:
DIRTAB[token](program, tokens)
return True
return False
def has_instructions(program, tokens):
if len(tokens) == 0:
return False
inst = ""
operand = ""
operand2 = ""
fmt = 0
for idx, token in enumerate(tokens):
# instruction hasn't meet yet (instruction or label)
if inst == "":
# instruction without prefix
if token in OPTAB:
inst = token
# instruction with prefix
elif token[1:] in OPTAB:
prefix = token[0]
inst = token[1:]
if prefix != '+':
program.error("invalid instruction prefix \"%s\"." % prefix)
elif not (OPTAB[inst].inf & FORMAT4):
program.error("%s does not support format 4." % inst)
else:
fmt = 4
# is label
else:
label = tokens[0]
# check label format
if label in OPTAB:
program.error("symbol name \"%s\" is same as an insturction." % label)
elif label in program.symtab and type(program.symtab[label]) != list:
program.error("redefined symbol \"%s\"." % label)
elif label in program.symtab:
fill_forward(program.symtab[label], program.LOCCTR, program)
program.symtab[label] = program.LOCCTR
continue
# instruction met (operand)
else:
if token.find(',') != -1:
operand, operand2, *dummy = token.split(',')
if any(dummy):
program.error("too many operands")
else:
operand = token
# compute the instruction format
if fmt != 4:
mask = OPTAB[inst].inf & (FORMAT1 | FORMAT2 | FORMAT3)
while mask != 0b1000:
mask >>= 1
fmt += 1
fmt = (5 - fmt)
# validate the foramt
if (operand2 != "" and fmt != 2) and operand2 != 'X':
program.error("Only format 2 insturctions allow two operands.")
if fmt == 1 and operand != "":
program.error("Format 1 instructions should not have any operands.")
# generate opcode
code = OPTAB[inst].opcode
isLiteral = False
# parse the prefix for format 3 & 4 instructions
if (fmt == 3 or fmt == 4) and inst != "RSUB":
prefix = ""
# parse the operand
if not operand[0].isalnum():
prefix = operand[0]
operand = operand[1:]
# generate the addressing mask (nixbpe)
mask = DEFAULT_ADDR
if prefix == '#':
mask = IMM_ADDR
elif prefix == '@':
mask = INDR_ADDR
elif prefix == '=':
isLiteral = True
if operand not in program.littab:
program.littab[operand] = [program.current_line()]
# already defined or is not yet
elif operand in program.littab:
# try to use but if is too far, wait next
pass
else:
program.littab[operand].append(program.current_line())
elif prefix != "":
program.error("Unrecognized addressing prefix \"%s\"." % prefix)
if operand2 == 'X':
mask |= INDEX_ADDR
if fmt == 4:
mask |= EXTEND_FMT
code |= mask
# handle format 3/4 instruction which has no operand
elif inst == "RSUB":
code |= DEFAULT_ADDR
# shift format 4 instructions
if fmt == 4:
code <<= BYTESIZE
# generate operand
if inst != "RSUB" and not isLiteral:
if operand.isnumeric():
operand = int(operand)
if (fmt == 3 and operand > 2**12 - 1) or (fmt == 4 and operand > 2**20 - 1):
program.error("operand with value = %d is out of range." % operand)
else:
code |= operand
elif operand in program.symtab and type(program.symtab[operand]) != list:
if fmt == 2:
# some format 2 instruction accept 2 operands
if inst in ["ADDR", "COMPR", "DIVR", "MULR", "RMO", "SHIFTL", "SHIFTR", "SUBR"]:
code |= program.symtab[operand2]
"VALIDATE FORMAT2"
code |= program.symtab[operand] << 4
elif fmt == 3:
disp = (program.symtab[operand] - (program.LOCCTR + fmt))
# try to use PC-realtive
if -2048 <= disp < 2048:
code |= (disp & 0xFFF) | PC_RELATIVE
# try to use base-relative
elif program.base != -1:
# if base is defined
if program.base in program.symtab and type(program.symtab[program.base]) != list:
disp = (program.symtab[operand] - program.symtab[program.base]) & 0xFFF
if 0 <= disp < 4096:
code |= (disp & 0xFFF) | BASE_RELATIVE
else:
program.error("no enough length to hold the displacement, try format 4.")
# forward base reference
elif program.base in program.symtab:
program.symtab[program.base].append((program.current_line(), operand, REF_BASE))
else:
program.symtab[program.base] = [(program.current_line(), operand, REF_BASE)]
else:
program.error("no enough length to hold the displacement, try format 4.")
elif fmt == 4:
code |= program.symtab[operand]
elif operand in program.symtab:
program.symtab[operand].append((program.current_line(), operand, REF_OP))
else:
program.symtab[operand] = [(program.current_line(), operand, REF_OP)]
# find the first executable location
if program.start_exec == -1:
program.start_exec = program.LOCCTR
program.LOCCTR += fmt
program.current_line().fmt = fmt
program.current_line().code = code
return True
if __name__ == "__main__":
# Parse the arguments
parser = argparse.ArgumentParser(description="A Python SIC/XE Assembler")
parser.add_argument('-o', '--output', help='the output file.', default='a.out')
parser.add_argument('-L', '--listing', help='generate assembly listing.')
parser.add_argument('input', nargs=1, help='the source assembly file(s).')
args = parser.parse_args()
print("SIC/XE Assembler")
# Open files in the list
program = Program(args.input[0])
try:
print("\nStarting assemble %s ..." % program.source)
program.assemble()
print("Done.")
if args.listing:
program.listing(args.listing)
program.output(args.output)
except AssembleError:
print("Assemble failed.")