- 192
- Posts
- 17
- Years
- Seen Oct 28, 2020
I wrote a program that is supposed to find all areas in the rom that contain executable data. Although it doesn't always work the way it should I want to share it. It outputs a file with the same size as the rom. A 0x01 means "At this position in the rom there is ARM-code". 0x02 stands for THUMB. and 0x03 for data that is used by the code directly.
Now I have a few questions...
Code:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
typedef unsigned int uint32;
typedef unsigned short uint16;
typedef unsigned char uint8;
typedef signed int sint32;
typedef signed short sint16;
typedef signed char sint8;
#define NA(p) (*(uint32*)(rom+(p)))
#define NT(p) (*(uint16*)(rom+(p)))
#define OA(st,dt) (ra&st)==dt
#define OT(st,dt) (rt&st)==dt
#define ALWAYS OA(0xF0000000,0xE0000000)
#define STACK_SIZE 100
#define LIWRP(f) if(link){bsid=sid++;where_lr[sid]=14;} f; if(link)sid=bsid;
#define L printf("%08x ",reg[0]) // For debugging. Replace it if you want
#define BU memcpy(reg_backup,reg,4*16) // Backup registers
#define BD memcpy(reg,reg_backup,4*16)
#define MAP_ARM 0x01010101
#define MAP_THUMB 0x0202
#define MAP_DIR_REF 0x03030303
const uint32 base=0x08000000;
uint32 len;
uint8 *rom;
uint8 *map;
uint32 reg[16];
uint8 where_lr[STACK_SIZE]; // 0-15 Reg 16-256 Stack
static uint16 sid=0; // "Stack-id"
void arm(uint32);
void thumb(uint32);
uint8 inrange(uint32 pos){
return pos>=base&&pos<base+len;
}
void arm(uint32 pos){
uint32 reg_backup[16];
uint16 bsid;
uint8 link;
if(!inrange(pos)){
L;printf("Out of range: %08x\n",pos);
return;
}
for(;!map[pos];pos+=4){
uint32 ra=NA(pos);
(*(uint32*)(map+pos))=MAP_ARM;
L;printf("ARM %08x: %08x\n",pos,ra);
if(ra==0xFFFFFFFF){
uint8 a=0/0;
}else if(OA(0x0FFFFFD0,0x012FFF10)){ // B(L)X
uint32 o=reg[ra&0xF]|=1;
L;printf("BX R%d\n",ra&0xF);
link=(ra>>6)&1;
BU;
LIWRP(thumb(o-1));
if(ALWAYS&&!link)return;
BD;
}else if(OA(0xFE000000,0xFA000000)){ // BLX
L;printf("BLX\n");
link=1;
BU;
LIWRP(thumb(pos+8+((ra&0xFFFFFF)<<2)+(ra>>23)&0x2));
BD;
}else if(OA(0xFE700000,0xE4100000)){ // LDR
reg[15]=pos+8;
uint16 o=ra&0xFFF;
uint32 addr=reg[(ra>>16)&0xF]+(ra>>24?o:-o);
L;printf("LDR PC-Relative (%08x: %08x)\n",addr,NA(addr));
reg[(ra>>12)&0xF]=NA(addr);
(*(uint32*)(map+addr))=MAP_DIR_REF;
}else if(OA(0x0E000000,0x0A000000)){ //B(L)
L;printf("B(L)\n");
link=(ra>>24)&1;
BU;
LIWRP(arm(pos+8+((ra&0xFFFFFF)<<2)));
if(ALWAYS&&!link)return;
BD;
}
}
}
void thumb(uint32 pos){
uint32 reg_backup[16];
sint32 jump_buf;
uint16 bsid;
uint8 link;
if(!inrange(pos)){
L;printf("Out of range: %08x\n",pos);
return;
}
for(;!map[pos];pos+=2){
uint16 rt=(*(uint16*)(rom+pos));
(*(uint16*)(map+pos))=MAP_THUMB;
L;printf("THUMB %08x: %04x\n",pos,rt);
if(OT(0xFF00,0x4700)){ // BX
uint8 ri=(rt>>3)&0xF;
if(ri==where_lr[sid]||ri==14){
L;printf("Return\n");
sid--;
return;
}
L;printf("BX (High-reg.)\n");
arm(reg[ri]);
if(!rt&0x80)return;
}else if(OT(0xF800,0x4800)){ // LDR Ra, [$b]
uint32 addr=((rt&0xFF)<<2)+((pos+4)&~2);
if(inrange(addr)){
L;printf("LDR PC-Relative (%08x: %08x)\n",addr,NA(addr));
reg[(rt>>8)&0x7]=NA(addr);
(*(uint32*)(map+addr))=MAP_DIR_REF;
}else{
reg[(rt>>8)&0x7]=0;
L;printf("LDR PC-Relative out of range\n");
}
}else if(OT(0xF800,0x6800)){ // LDR Ra, [Rb, #c]
uint32 addr=reg[(rt>>3)&0x7]+((rt>>6)&0xF);
if(inrange(addr)){
L;printf("LDR with immediate offset (%08x: %08x)\n",addr,NA(addr));
reg[rt&0x7]=NA(addr);
(*(uint32*)(map+addr))=MAP_DIR_REF;
}else{
reg[rt&0x7]=0;
L;printf("LDR with immediate offset out of range\n");
}
}else if(OT(0xF800,0xE000)){ // B
jump_buf=((rt&0x7FF)<<1);
if(jump_buf>=0x00400000)jump_buf|=0xFF800000;
L;printf("B\n");
thumb(pos+4+jump_buf);
return;
}else if(OT(0xF000,0xD000)){ // B (cond.)
sint8 jump_buf=rt&0xFF;
L;printf("B (cond.)\n");
thumb(pos+4+jump_buf+jump_buf);
}else if(OT(0xFF78,0x4670)){ // MOV foo, LR
where_lr[sid]=(rt&7)+((rt>>3)&0x8);
L;printf("RL (R14) moved to R%d\n",where_lr[sid]);
}else if(OT(0xFE00,0xB400)){ // PUSH
if(rt&256){
L;printf("Pushing LR (R14)\n");
where_lr[sid]=16;
}
for(uint8 bit=8;bit>0;bit--){
if(rt&(1<<(bit-1))){
L;printf("Pushing R%d\n",bit-1);
if(where_lr[sid]>=16)where_lr[sid]++;
}
}
}else if(OT(0xFE00,0xBC00)){ // POP
if(rt&256){
L;printf("Popping to PC (R15)\n");
if(where_lr[sid]==16){
where_lr[sid]=15; // Just for clarification
L;printf("Popped LR (R14) to PC (R15)\n");
return;
}
L;printf("TODO: IMPLEMENT THIZ\n");
if(where_lr[sid]>16)where_lr[sid]--;
return;
}
for(uint8 bit=8;bit>0;bit--){
if(rt&(1<<(bit-1))){
L;printf("Popping to R%d\n",bit-1);
if(where_lr[sid]==16){
L;printf("Popped LR (R14) to %d\n",bit-1);
where_lr[sid]=bit-1;
}
if(where_lr[sid]>16)where_lr[sid]--;
}
}
}else if(OT(0xF800,0xF000)){ // Long branch 1
jump_buf=(rt&0x7FF)<<12;
}else if(OT(0xE800,0xE800)){ // Long branch 2
jump_buf+=(rt&0x7FF)<<1;
if(jump_buf>=0x00400000)jump_buf|=0xFF800000;
uint32 o=pos+2+jump_buf;
L;printf("Long branch with link! (%08x to %08x){\n",pos,o);
link=1;
BU;
LIWRP(if(rt>>12)thumb(o);else arm(o))
BD;
L;printf("}\n");
}
}
}
int main(int argc, char **argv){
FILE *f;
if(argc!=3){L;printf("Usage: %s <rom.gba> <rom.map>",argv[0]);return 0;}
if(!(f=fopen(argv[1],"rb"))){L;printf("Couldn't open %s\n",argv[1]);return 0;}
fseek(f,0,SEEK_END); len=ftell(f); rom=malloc(len); map=malloc(len);
fseek(f,0,SEEK_SET); fread(rom,len,1,f);
fclose(f);
memset(map,0,len);
memset(reg,0,64);
rom-=base; map-=base;
arm(base);
thumb(0x080e607e);
rom+=base; map+=base;
if(!(f=fopen(argv[2],"wb"))){L;printf("Couldn't open %s\n",argv[2]);return 0;}
fwrite(map,len,1,f);
fclose(f);
}
Now I have a few questions...
- Pokemon Ruby reads (and even executes) a lots of data from the 0x03000000-area. How do these code-segments get there and where are their original positions (in the rom)?
- Is it true that the 3rd games gen only use THUMB-Mode?
- What is the code at 081e082c good for? Spoiler:081e082c 4700 bx r0
081e082e 46c0 mov r8, r8
081e0830 4708 bx r1
081e0832 46c0 mov r8, r8
081e0834 4710 bx r2
081e0836 46c0 mov r8, r8
081e0838 4718 bx r3
081e083a 46c0 mov r8, r8
081e083c 4720 bx r4
081e083e 46c0 mov r8, r8
081e0840 4728 bx r5
081e0842 46c0 mov r8, r8
081e0844 4730 bx r6
081e0846 46c0 mov r8, r8
081e0848 4738 bx r7
081e084a 46c0 mov r8, r8
081e084c 4740 bx r8
081e084e 46c0 mov r8, r8
081e0850 4748 bx r9
081e0852 46c0 mov r8, r8
Last edited: