Thread: Research: Asm-scanner
View Single Post
  #1    
Old June 10th, 2010, 10:40 AM
knizz's Avatar
knizz
 
Join Date: Aug 2007
I wrote a program that is supposed to find all areas in the rom that contain executable data. Although it doesn't always work the way it should I want to share it. It outputs a file with the same size as the rom. A 0x01 means "At this position in the rom there is ARM-code". 0x02 stands for THUMB. and 0x03 for data that is used by the code directly.

Code:
// Copyright (c) 2010, David Kreuter
// Do what you want (with this code) cause a pirate is free... YOU ARE A PIRATE
#include <stdio.h>
#include <string.h>
#include <stdlib.h>

typedef unsigned int   uint32;
typedef unsigned short uint16;
typedef unsigned char  uint8;

typedef   signed int   sint32;
typedef   signed short sint16;
typedef   signed char  sint8;

#define NA(p) (*(uint32*)(rom+(p)))
#define NT(p) (*(uint16*)(rom+(p)))

#define OA(st,dt) (ra&st)==dt
#define OT(st,dt) (rt&st)==dt

#define ALWAYS OA(0xF0000000,0xE0000000)
#define STACK_SIZE 100

#define LIWRP(f) if(link){bsid=sid++;where_lr[sid]=14;} f; if(link)sid=bsid;
#define L printf("%08x ",reg[0]) // For debugging. Replace it if you want
#define BU memcpy(reg_backup,reg,4*16) // Backup registers
#define BD memcpy(reg,reg_backup,4*16)

#define MAP_ARM 0x01010101
#define MAP_THUMB 0x0202
#define MAP_DIR_REF 0x03030303

const uint32 base=0x08000000;
uint32 len;
uint8 *rom;
uint8 *map;

uint32 reg[16];
uint8  where_lr[STACK_SIZE]; // 0-15 Reg 16-256 Stack
static uint16 sid=0; // "Stack-id"

void arm(uint32);
void thumb(uint32);

uint8 inrange(uint32 pos){
	return pos>=base&&pos<base+len;
}

void arm(uint32 pos){
	uint32 reg_backup[16];
	uint16 bsid;
	uint8 link;
	
	if(!inrange(pos)){
		L;printf("Out of range: %08x\n",pos);
		return;
	}
	for(;!map[pos];pos+=4){
		uint32 ra=NA(pos);
		(*(uint32*)(map+pos))=MAP_ARM;
		L;printf("ARM   %08x: %08x\n",pos,ra);
		if(ra==0xFFFFFFFF){
			uint8 a=0/0;
		}else if(OA(0x0FFFFFD0,0x012FFF10)){ // B(L)X
			uint32 o=reg[ra&0xF]|=1;
			L;printf("BX R%d\n",ra&0xF);
			link=(ra>>6)&1;
			BU;
			LIWRP(thumb(o-1));
			if(ALWAYS&&!link)return;
			BD;
		}else if(OA(0xFE000000,0xFA000000)){ // BLX
			L;printf("BLX\n");
			link=1;
			BU;
			LIWRP(thumb(pos+8+((ra&0xFFFFFF)<<2)+(ra>>23)&0x2));
			BD;
		}else if(OA(0xFE700000,0xE4100000)){ // LDR
			reg[15]=pos+8;
			uint16 o=ra&0xFFF;
			uint32 addr=reg[(ra>>16)&0xF]+(ra>>24?o:-o);
			L;printf("LDR PC-Relative (%08x: %08x)\n",addr,NA(addr));
			reg[(ra>>12)&0xF]=NA(addr);
			(*(uint32*)(map+addr))=MAP_DIR_REF;
		}else if(OA(0x0E000000,0x0A000000)){ //B(L)
			L;printf("B(L)\n");
			link=(ra>>24)&1;
			BU;
			LIWRP(arm(pos+8+((ra&0xFFFFFF)<<2)));
			if(ALWAYS&&!link)return;
			BD;
		}
	}
}

void thumb(uint32 pos){
	uint32 reg_backup[16];
	sint32 jump_buf;
	uint16 bsid;
	uint8 link;

	if(!inrange(pos)){
		L;printf("Out of range: %08x\n",pos);
		return;
	}
	for(;!map[pos];pos+=2){
		uint16 rt=(*(uint16*)(rom+pos));
		(*(uint16*)(map+pos))=MAP_THUMB;
		L;printf("THUMB %08x: %04x\n",pos,rt);

		if(OT(0xFF00,0x4700)){ // BX
			uint8 ri=(rt>>3)&0xF;
			if(ri==where_lr[sid]||ri==14){
				L;printf("Return\n");
				sid--;
				return;
			}
			L;printf("BX (High-reg.)\n");
			arm(reg[ri]);
			if(!rt&0x80)return;
		}else if(OT(0xF800,0x4800)){ // LDR Ra, [$b]
			uint32 addr=((rt&0xFF)<<2)+((pos+4)&~2);
			if(inrange(addr)){
				L;printf("LDR PC-Relative (%08x: %08x)\n",addr,NA(addr));
				reg[(rt>>8)&0x7]=NA(addr);
				(*(uint32*)(map+addr))=MAP_DIR_REF;
			}else{
				reg[(rt>>8)&0x7]=0;
				L;printf("LDR PC-Relative out of range\n");
			}
		}else if(OT(0xF800,0x6800)){ // LDR Ra, [Rb, #c]
			uint32 addr=reg[(rt>>3)&0x7]+((rt>>6)&0xF);
			if(inrange(addr)){
				L;printf("LDR with immediate offset (%08x: %08x)\n",addr,NA(addr));
				reg[rt&0x7]=NA(addr);
				(*(uint32*)(map+addr))=MAP_DIR_REF;
			}else{
				reg[rt&0x7]=0;
				L;printf("LDR with immediate offset out of range\n");
			}
		}else if(OT(0xF800,0xE000)){ // B
			jump_buf=((rt&0x7FF)<<1);
			if(jump_buf>=0x00400000)jump_buf|=0xFF800000;
			L;printf("B\n");
			thumb(pos+4+jump_buf);
			return;
		}else if(OT(0xF000,0xD000)){ // B (cond.)
			sint8 jump_buf=rt&0xFF;
			L;printf("B (cond.)\n");
			thumb(pos+4+jump_buf+jump_buf);
		}else if(OT(0xFF78,0x4670)){ // MOV foo, LR
			where_lr[sid]=(rt&7)+((rt>>3)&0x8);
			L;printf("RL (R14) moved to R%d\n",where_lr[sid]);
		}else if(OT(0xFE00,0xB400)){ // PUSH
			if(rt&256){
				L;printf("Pushing LR (R14)\n");
				where_lr[sid]=16;
			}
			for(uint8 bit=8;bit>0;bit--){
				if(rt&(1<<(bit-1))){
					L;printf("Pushing R%d\n",bit-1);
					if(where_lr[sid]>=16)where_lr[sid]++;
				}
			}
		}else if(OT(0xFE00,0xBC00)){ // POP
			if(rt&256){
				L;printf("Popping to PC (R15)\n");
				if(where_lr[sid]==16){
					where_lr[sid]=15; // Just for clarification
					L;printf("Popped LR (R14) to PC (R15)\n");
					return;
				}
				L;printf("TODO: IMPLEMENT THIZ\n");
				if(where_lr[sid]>16)where_lr[sid]--;
				return;
			}
			for(uint8 bit=8;bit>0;bit--){
				if(rt&(1<<(bit-1))){
					L;printf("Popping to R%d\n",bit-1);
					if(where_lr[sid]==16){
						L;printf("Popped LR (R14) to %d\n",bit-1);
						where_lr[sid]=bit-1;
					}
					if(where_lr[sid]>16)where_lr[sid]--;
				}
			}
		}else if(OT(0xF800,0xF000)){ // Long branch 1
			jump_buf=(rt&0x7FF)<<12;
		}else if(OT(0xE800,0xE800)){ // Long branch 2
			jump_buf+=(rt&0x7FF)<<1;
			if(jump_buf>=0x00400000)jump_buf|=0xFF800000;
			uint32 o=pos+2+jump_buf;
			L;printf("Long branch with link! (%08x to %08x){\n",pos,o);
			link=1;
			BU;
			LIWRP(if(rt>>12)thumb(o);else arm(o))
			BD;
			L;printf("}\n");
		}
	}
}

int main(int argc, char **argv){
	FILE *f;
	if(argc!=3){L;printf("Usage: %s <rom.gba> <rom.map>",argv[0]);return 0;}
	if(!(f=fopen(argv[1],"rb"))){L;printf("Couldn't open %s\n",argv[1]);return 0;}
	fseek(f,0,SEEK_END); len=ftell(f); rom=malloc(len); map=malloc(len);
	fseek(f,0,SEEK_SET); fread(rom,len,1,f);
	fclose(f);

	memset(map,0,len);
	memset(reg,0,64);
	rom-=base; map-=base;
	arm(base);
	thumb(0x080e607e);
	rom+=base; map+=base;

	if(!(f=fopen(argv[2],"wb"))){L;printf("Couldn't open %s\n",argv[2]);return 0;}
	fwrite(map,len,1,f);
	fclose(f);
}
Now I have a few questions...
  1. Pokemon Ruby reads (and even executes) a lots of data from the 0x03000000-area. How do these code-segments get there and where are their original positions (in the rom)?
  2. Is it true that the 3rd games gen only use THUMB-Mode?
  3. What is the code at 081e082c good for?
    Spoiler:
    081e082c 4700 bx r0
    081e082e 46c0 mov r8, r8
    081e0830 4708 bx r1
    081e0832 46c0 mov r8, r8
    081e0834 4710 bx r2
    081e0836 46c0 mov r8, r8
    081e0838 4718 bx r3
    081e083a 46c0 mov r8, r8
    081e083c 4720 bx r4
    081e083e 46c0 mov r8, r8
    081e0840 4728 bx r5
    081e0842 46c0 mov r8, r8
    081e0844 4730 bx r6
    081e0846 46c0 mov r8, r8
    081e0848 4738 bx r7
    081e084a 46c0 mov r8, r8
    081e084c 4740 bx r8
    081e084e 46c0 mov r8, r8
    081e0850 4748 bx r9
    081e0852 46c0 mov r8, r8
__________________
Firered IDA 6.1 DB: https://www.dropbox.com/s/hvvmxxoo1dkmdzc/firered.idb
VBA-M with lua scripting support (no longer in development)
Reply With Quote