1 /** 2 * DCPU-16 cpu disassembler 3 */ 4 module dcpu.disassembler; 5 6 import std.stdio, std.array, std..string, std.conv, std.getopt, std.regex; 7 import dcpu.microcode; 8 9 public import std.typecons; 10 11 private: 12 13 /** 14 * Give the string representation of a operand 15 * Params: 16 * op = Operand type ("OpA" o "OpB") 17 * words = First instruction word and the word that could contain the "next word" value 18 * n_words = Add 1 if uses "next word". 19 * Returns: A string that contains operand representation 20 */ 21 string operand(string op ) (ushort[] words, ref ushort n_words) 22 in { 23 assert(words.length >= 2, "Need first word of instruction and the word that could contain the \"next word\" value"); 24 } body{ 25 ushort operand = decode!op(words[0]); 26 27 switch (operand) { 28 case Operand.A: // Register x 29 return "A"; 30 case Operand.B: 31 return "B"; 32 case Operand.C: 33 return "C"; 34 case Operand.X: 35 return "X"; 36 case Operand.Y: 37 return "Y"; 38 case Operand.Z: 39 return "Z"; 40 case Operand.I: 41 return "I"; 42 case Operand.J: 43 return "J"; 44 45 case Operand.Aptr: // Register pointer [x] 46 return "[A]"; 47 case Operand.Bptr: 48 return "[B]"; 49 case Operand.Cptr: 50 return "[C]"; 51 case Operand.Xptr: 52 return "[X]"; 53 case Operand.Yptr: 54 return "[Y]"; 55 case Operand.Zptr: 56 return "[Z]"; 57 case Operand.Iptr: 58 return "[I]"; 59 case Operand.Jptr: 60 return "[J]"; 61 62 case Operand.Aptr_word: // Register pointer with added word 63 n_words++; 64 return format("[A+ 0x%04X]", words[1]); 65 66 case Operand.Bptr_word: 67 n_words++; 68 return format("[B+ 0x%04X]", words[1]); 69 70 case Operand.Cptr_word: 71 n_words++; 72 return format("[C+ 0x%04X]", words[1]); 73 74 75 case Operand.Xptr_word: 76 n_words++; 77 return format("[X+ 0x%04X]", words[1]); 78 79 80 case Operand.Yptr_word: 81 n_words++; 82 return format("[Y+ 0x%04X]", words[1]); 83 84 85 case Operand.Zptr_word: 86 n_words++; 87 return format("[Z+ 0x%04X]", words[1]); 88 89 case Operand.Iptr_word: 90 n_words++; 91 return format("[I+ 0x%04X]", words[1]); 92 93 94 case Operand.Jptr_word: 95 n_words++; 96 return format("[J+ 0x%04X]", words[1]); 97 98 99 case Operand.POP_PUSH: // POP 100 static if (op == "OpB") { 101 return "PUSH"; 102 } else { 103 return "POP"; 104 } 105 106 case Operand.PEEK: 107 return "PEEK"; 108 109 case Operand.PICK_word: 110 n_words++; 111 return format("[SP+ 0x%04X]", words[1]); 112 113 114 case Operand.SP: // SP 115 return "SP"; 116 117 case Operand.PC: // PC 118 return "PC"; 119 120 case Operand.EX: // Overflow register 121 return "EX"; 122 123 case Operand.NWord_ptr: // next word pointer 124 n_words++; 125 return format("[0x%04X]", words[1]); 126 127 case Operand.NWord: // word literal 128 n_words++; 129 return format("0x%04X", words[1]); 130 131 default: // literal 132 return format("0x%04X", cast(ushort)(operand - Operand.Literal -1)); // -1 to 30 133 } 134 } 135 136 public: 137 138 /** 139 * Diassamble ONE instruction 140 * Params: 141 * words = Instruction to disassemble (A word and his two next words) 142 * n_words = Size of disassambled instruction 143 * Returns: A string that contains a diassambled code 144 */ 145 string disassamble(ushort[] words, out ushort n_words) 146 in { 147 assert(words.length >= 3, "Instructions can ben 3 words long"); 148 }body { 149 ubyte opcode = decode!"OpCode"(words[0]); 150 n_words = 1; 151 string op_a = operand!"OpA"(words, n_words); 152 if (opcode == OpCode.ExtOpCode) { // Non basic instruction 153 opcode = decode!"ExtOpCode"(words[0]); 154 foreach (s; __traits(allMembers, ExtOpCode)) { 155 if (opcode == mixin("ExtOpCode." ~ s)) { 156 return s ~ " " ~ op_a; 157 } 158 } 159 return format("DAT 0x%04X", words[0]);//";Unknow Extended OpCode"; 160 161 } else { // Decode operation 162 ushort[] tmp; 163 tmp ~= words[0]; 164 tmp ~= words[n_words]; 165 string op_b = operand!"OpB"(tmp, n_words); 166 foreach (s; __traits(allMembers, OpCode)) { 167 if (opcode == mixin("OpCode." ~ s)) { 168 //string str = s ~ " " ~ op_b ~ ", " ~ op_a; 169 //writeln(format("0x%04X", opcode), " ", str); 170 return s ~ " " ~ op_b ~ ", " ~ op_a; 171 } 172 } 173 return format("DAT 0x%04X", words[0]);//";Unknow Extended OpCode"; 174 } 175 } 176 177 /** 178 * Diassamble a slice of binary data 179 * Params: 180 * data = Slice of DCPU-16 binary data 181 * comment = Add comments to assembly code with the addre and hex machine code 182 * tab = auto tab 183 * offset = add a offset to addresses of each instruction 184 * Returns a asociative array where the key is a pair of addreses that contains 185 * the instruction in machine code 186 */ 187 string[ushort] range_diassamble(in ushort[]data, bool comment = false, bool tab = false, ushort offset = 0) 188 in { 189 assert(data.length > 0, "Can't disassamble empty data"); 190 } body { 191 ushort[] slice; 192 if (slice.length > ushort.max) { // Chop to maximun data addresable 193 slice = data[0..ushort.max+1].dup; 194 } else { 195 slice = data.dup; 196 } 197 198 string[ushort] ret; 199 ushort n_words = 1; 200 for(ushort pos=0; pos < slice.length; pos+=n_words) { 201 ushort word = slice[pos]; 202 string inst; 203 204 if (pos < slice.length -3 && slice.length >= 3) { 205 inst= disassamble(slice[pos..pos+3], n_words); // Disamble one instruction and jump pos to the next instruction 206 } else { 207 ushort[] tmp = slice[pos..$] ~ cast(ushort[])[0, 0]; 208 inst= disassamble(tmp, n_words); 209 } 210 211 212 if (tab) { // Appends a 16 wide space 213 ret[cast(ushort)(pos + offset)] = " " ~ inst; 214 } else { 215 ret[cast(ushort)(pos + offset)] = inst; 216 } 217 218 if (comment) { // Add coment like: spaces ; [addr] - xxxx .... 219 for(long i=0; i<(29- inst.length); i++) { 220 ret[cast(ushort)(pos + offset)] ~= " "; 221 } 222 ret[cast(ushort)(pos + offset)] ~= ";" ~ format("[%04X] - %04X ", pos + offset, slice[pos]); 223 224 225 for (auto i=pos +1; i < pos + n_words && i < slice.length; i++) { 226 ret[cast(ushort)(pos + offset)] ~= format("%04X ", slice[i]); 227 } 228 } 229 } 230 231 return ret; 232 } 233 234 /** 235 * Auto labeling a source code 236 * Params: 237 * code = Associtive array that contains lines of code 238 * Returns: The same table autolabeled 239 */ 240 ref string[ushort] auto_label(ref string[ushort] code) { 241 auto reg = regex(r"(SET PC, 0x)|(JSR 0x)","g"); 242 foreach (key, ref line ;code) { 243 auto m = match(line, reg); 244 if (! m.empty && m.pre.length > 6 ) { 245 string post = m.post; 246 auto jmp = parse!ushort(post, cast(uint)16); // Get jump address 247 if (m.post.length > 7) { // has comments 248 line = m.pre ~ m.hit[0..$-2] ~ format("lb%04X ", jmp) ~ m.post[5..$]; 249 } else { 250 line = m.pre ~ m.hit[0..$-2] ~ format("lb%04X ", jmp); 251 } 252 if (jmp in code) { 253 code[jmp] = format(":lb%04X", jmp) ~ code[jmp][7..$]; 254 } else { 255 code[jmp] = format(":lb%04X", jmp); 256 } 257 } 258 } 259 260 return code; 261 }