1 /**
2  * DCPU-16 cpu disassembler
3  */
4 module dcpu.disassembler;
5 
6 import std.stdio, std.array, std..string, std.conv, std.getopt, std.regex;
7 import dcpu.microcode;
8 
9 public import std.typecons;
10 
11 private:
12 
13 /**
14  * Give the string representation of a operand
15  * Params:
16  *  op        = Operand type ("OpA" o "OpB")
17  *  words     = First instruction word and the word that could contain the "next word" value
18  *  n_words   = Add 1 if uses "next word".
19  * Returns: A string that contains operand representation
20  */
21 string operand(string op ) (ushort[] words, ref ushort n_words)
22 in {
23   assert(words.length >= 2, "Need first word of instruction and the word that could contain the \"next word\" value");
24 } body{
25   ushort operand = decode!op(words[0]);
26 
27   switch (operand) {
28       case Operand.A:   // Register x
29     return "A";
30       case Operand.B:
31     return "B";
32       case Operand.C:
33     return "C";
34       case Operand.X:
35     return "X";
36       case Operand.Y:
37     return "Y";
38       case Operand.Z:
39     return "Z";
40       case Operand.I:
41     return "I";
42       case Operand.J:
43     return "J";
44 
45       case Operand.Aptr: // Register pointer [x]
46     return "[A]";
47       case Operand.Bptr:
48     return "[B]";
49       case Operand.Cptr:
50     return "[C]";
51       case Operand.Xptr:
52     return "[X]";
53       case Operand.Yptr:
54     return "[Y]";
55       case Operand.Zptr:
56     return "[Z]";
57       case Operand.Iptr:
58     return "[I]";
59       case Operand.Jptr:
60     return "[J]";
61 
62       case Operand.Aptr_word: // Register pointer with added word
63     n_words++;
64     return format("[A+ 0x%04X]", words[1]);
65 
66       case Operand.Bptr_word:
67     n_words++;
68     return format("[B+ 0x%04X]", words[1]);
69 
70       case Operand.Cptr_word:
71     n_words++;
72     return format("[C+ 0x%04X]", words[1]);
73 
74 
75       case Operand.Xptr_word:
76     n_words++;
77     return format("[X+ 0x%04X]", words[1]);
78 
79 
80       case Operand.Yptr_word:
81     n_words++;
82     return format("[Y+ 0x%04X]", words[1]);
83 
84 
85       case Operand.Zptr_word:
86     n_words++;
87     return format("[Z+ 0x%04X]", words[1]);
88 
89       case Operand.Iptr_word:
90     n_words++;
91     return format("[I+ 0x%04X]", words[1]);
92 
93 
94       case Operand.Jptr_word:
95     n_words++;
96     return format("[J+ 0x%04X]", words[1]);
97 
98 
99       case Operand.POP_PUSH: // POP
100     static if (op == "OpB") {
101       return "PUSH";
102     } else {
103       return "POP";
104     }
105 
106       case Operand.PEEK:
107     return "PEEK";
108 
109       case Operand.PICK_word:
110     n_words++;
111     return format("[SP+ 0x%04X]", words[1]);
112 
113 
114       case Operand.SP: // SP
115     return "SP";
116 
117       case Operand.PC: // PC
118     return "PC";
119 
120       case Operand.EX: // Overflow register
121     return "EX";
122 
123       case Operand.NWord_ptr: // next word pointer
124     n_words++;
125     return format("[0x%04X]", words[1]);
126 
127       case Operand.NWord: // word literal
128     n_words++;
129     return format("0x%04X", words[1]);
130 
131       default: // literal
132     return format("0x%04X", cast(ushort)(operand - Operand.Literal -1)); // -1 to 30
133   }
134 }
135 
136 public:
137 
138 /**
139  * Diassamble ONE instruction
140  * Params:
141  *  words    = Instruction to disassemble (A word and his two next words)
142  *  n_words  = Size of disassambled instruction
143  * Returns: A string that contains a diassambled code
144  */
145 string disassamble(ushort[] words, out ushort n_words)
146 in {
147   assert(words.length >= 3, "Instructions can ben 3 words long");
148 }body {
149   ubyte opcode = decode!"OpCode"(words[0]);
150   n_words = 1;
151   string op_a = operand!"OpA"(words, n_words);
152   if (opcode == OpCode.ExtOpCode) { // Non basic instruction
153     opcode = decode!"ExtOpCode"(words[0]);
154     foreach (s; __traits(allMembers, ExtOpCode)) {
155       if (opcode == mixin("ExtOpCode." ~ s)) {
156         return s ~ " " ~ op_a;
157       }
158     }
159     return format("DAT 0x%04X", words[0]);//";Unknow Extended OpCode";
160 
161   } else { // Decode operation
162     ushort[] tmp;
163     tmp ~= words[0];
164     tmp ~= words[n_words];
165     string op_b = operand!"OpB"(tmp, n_words);
166     foreach (s; __traits(allMembers, OpCode)) {
167       if (opcode == mixin("OpCode." ~ s)) {
168         //string str = s ~ " " ~ op_b ~ ", " ~ op_a;
169         //writeln(format("0x%04X", opcode), " ", str);
170         return s  ~ " " ~ op_b ~ ", " ~ op_a;
171       }
172     }
173     return format("DAT 0x%04X", words[0]);//";Unknow Extended OpCode";
174   }
175 }
176 
177 /**
178  * Diassamble a slice of binary data
179  * Params:
180  *  data    = Slice of DCPU-16 binary data
181  *  comment = Add comments to assembly code with the addre and hex machine code
182  *  tab     = auto tab
183  *  offset  = add a offset to addresses of each instruction
184  * Returns a asociative array where the key is a pair of addreses that contains
185  * the instruction in machine code
186  */
187 string[ushort] range_diassamble(in ushort[]data, bool comment = false, bool tab = false, ushort offset = 0)
188 in {
189   assert(data.length > 0, "Can't disassamble empty data");
190 } body {
191   ushort[] slice;
192   if (slice.length > ushort.max) { // Chop to maximun data addresable
193     slice = data[0..ushort.max+1].dup;
194   } else {
195     slice = data.dup;
196   }
197 
198   string[ushort] ret;
199   ushort n_words = 1;
200   for(ushort pos=0; pos < slice.length; pos+=n_words) {
201     ushort word = slice[pos];
202     string inst;
203 
204     if (pos < slice.length -3 && slice.length >= 3) {
205       inst= disassamble(slice[pos..pos+3], n_words); // Disamble one instruction and jump pos to the next instruction
206     } else {
207       ushort[] tmp = slice[pos..$] ~ cast(ushort[])[0, 0];
208       inst= disassamble(tmp, n_words);
209     }
210 
211 
212     if (tab) { // Appends a 16 wide space
213       ret[cast(ushort)(pos + offset)] = "                 " ~ inst;
214     } else {
215       ret[cast(ushort)(pos + offset)] = inst;
216     }
217 
218     if (comment) { // Add coment  like: spaces ; [addr] - xxxx ....
219       for(long i=0; i<(29- inst.length); i++) {
220         ret[cast(ushort)(pos + offset)] ~= " ";
221       }
222       ret[cast(ushort)(pos + offset)] ~= ";" ~ format("[%04X] - %04X ", pos + offset, slice[pos]);
223 
224 
225       for (auto i=pos +1; i < pos + n_words && i < slice.length; i++) {
226         ret[cast(ushort)(pos + offset)] ~= format("%04X ", slice[i]);
227       }
228     }
229   }
230 
231   return ret;
232 }
233 
234 /**
235  * Auto labeling a source code
236  * Params:
237  *  code    = Associtive array that contains lines of code
238  * Returns: The same table autolabeled
239  */
240 ref string[ushort] auto_label(ref string[ushort] code) {
241   auto reg = regex(r"(SET PC, 0x)|(JSR 0x)","g");
242   foreach (key, ref line ;code) {
243     auto m = match(line, reg);
244     if (! m.empty && m.pre.length > 6 ) {
245       string post = m.post;
246       auto jmp = parse!ushort(post, cast(uint)16); // Get jump address
247       if (m.post.length > 7) { // has comments
248         line = m.pre ~ m.hit[0..$-2] ~ format("lb%04X ", jmp) ~ m.post[5..$];
249       } else {
250         line = m.pre ~ m.hit[0..$-2] ~ format("lb%04X ", jmp);
251       }
252       if (jmp in code) {
253         code[jmp] = format(":lb%04X", jmp) ~ code[jmp][7..$];
254       } else {
255         code[jmp] = format(":lb%04X", jmp);
256       }
257     }
258   }
259 
260   return code;
261 }