• Our software update is now concluded. You will need to reset your password to log in. In order to do this, you will have to click "Log in" in the top right corner and then "Forgot your password?".
  • Forum moderator applications are now open! Click here for details.
  • Welcome to PokéCommunity! Register now and join one of the best fan communities on the 'net to talk Pokémon and more! We are not affiliated with The Pokémon Company or Nintendo.

Research: Asm-scanner

knizz

192
Posts
16
Years
  • Seen Oct 28, 2020
I wrote a program that is supposed to find all areas in the rom that contain executable data. Although it doesn't always work the way it should I want to share it. It outputs a file with the same size as the rom. A 0x01 means "At this position in the rom there is ARM-code". 0x02 stands for THUMB. and 0x03 for data that is used by the code directly.

Code:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>

typedef unsigned int   uint32;
typedef unsigned short uint16;
typedef unsigned char  uint8;

typedef   signed int   sint32;
typedef   signed short sint16;
typedef   signed char  sint8;

#define NA(p) (*(uint32*)(rom+(p)))
#define NT(p) (*(uint16*)(rom+(p)))

#define OA(st,dt) (ra&st)==dt
#define OT(st,dt) (rt&st)==dt

#define ALWAYS OA(0xF0000000,0xE0000000)
#define STACK_SIZE 100

#define LIWRP(f) if(link){bsid=sid++;where_lr[sid]=14;} f; if(link)sid=bsid;
#define L printf("%08x ",reg[0]) // For debugging. Replace it if you want
#define BU memcpy(reg_backup,reg,4*16) // Backup registers
#define BD memcpy(reg,reg_backup,4*16)

#define MAP_ARM 0x01010101
#define MAP_THUMB 0x0202
#define MAP_DIR_REF 0x03030303

const uint32 base=0x08000000;
uint32 len;
uint8 *rom;
uint8 *map;

uint32 reg[16];
uint8  where_lr[STACK_SIZE]; // 0-15 Reg 16-256 Stack
static uint16 sid=0; // "Stack-id"

void arm(uint32);
void thumb(uint32);

uint8 inrange(uint32 pos){
	return pos>=base&&pos<base+len;
}

void arm(uint32 pos){
	uint32 reg_backup[16];
	uint16 bsid;
	uint8 link;
	
	if(!inrange(pos)){
		L;printf("Out of range: %08x\n",pos);
		return;
	}
	for(;!map[pos];pos+=4){
		uint32 ra=NA(pos);
		(*(uint32*)(map+pos))=MAP_ARM;
		L;printf("ARM   %08x: %08x\n",pos,ra);
		if(ra==0xFFFFFFFF){
			uint8 a=0/0;
		}else if(OA(0x0FFFFFD0,0x012FFF10)){ // B(L)X
			uint32 o=reg[ra&0xF]|=1;
			L;printf("BX R%d\n",ra&0xF);
			link=(ra>>6)&1;
			BU;
			LIWRP(thumb(o-1));
			if(ALWAYS&&!link)return;
			BD;
		}else if(OA(0xFE000000,0xFA000000)){ // BLX
			L;printf("BLX\n");
			link=1;
			BU;
			LIWRP(thumb(pos+8+((ra&0xFFFFFF)<<2)+(ra>>23)&0x2));
			BD;
		}else if(OA(0xFE700000,0xE4100000)){ // LDR
			reg[15]=pos+8;
			uint16 o=ra&0xFFF;
			uint32 addr=reg[(ra>>16)&0xF]+(ra>>24?o:-o);
			L;printf("LDR PC-Relative (%08x: %08x)\n",addr,NA(addr));
			reg[(ra>>12)&0xF]=NA(addr);
			(*(uint32*)(map+addr))=MAP_DIR_REF;
		}else if(OA(0x0E000000,0x0A000000)){ //B(L)
			L;printf("B(L)\n");
			link=(ra>>24)&1;
			BU;
			LIWRP(arm(pos+8+((ra&0xFFFFFF)<<2)));
			if(ALWAYS&&!link)return;
			BD;
		}
	}
}

void thumb(uint32 pos){
	uint32 reg_backup[16];
	sint32 jump_buf;
	uint16 bsid;
	uint8 link;

	if(!inrange(pos)){
		L;printf("Out of range: %08x\n",pos);
		return;
	}
	for(;!map[pos];pos+=2){
		uint16 rt=(*(uint16*)(rom+pos));
		(*(uint16*)(map+pos))=MAP_THUMB;
		L;printf("THUMB %08x: %04x\n",pos,rt);

		if(OT(0xFF00,0x4700)){ // BX
			uint8 ri=(rt>>3)&0xF;
			if(ri==where_lr[sid]||ri==14){
				L;printf("Return\n");
				sid--;
				return;
			}
			L;printf("BX (High-reg.)\n");
			arm(reg[ri]);
			if(!rt&0x80)return;
		}else if(OT(0xF800,0x4800)){ // LDR Ra, [$b]
			uint32 addr=((rt&0xFF)<<2)+((pos+4)&~2);
			if(inrange(addr)){
				L;printf("LDR PC-Relative (%08x: %08x)\n",addr,NA(addr));
				reg[(rt>>8)&0x7]=NA(addr);
				(*(uint32*)(map+addr))=MAP_DIR_REF;
			}else{
				reg[(rt>>8)&0x7]=0;
				L;printf("LDR PC-Relative out of range\n");
			}
		}else if(OT(0xF800,0x6800)){ // LDR Ra, [Rb, #c]
			uint32 addr=reg[(rt>>3)&0x7]+((rt>>6)&0xF);
			if(inrange(addr)){
				L;printf("LDR with immediate offset (%08x: %08x)\n",addr,NA(addr));
				reg[rt&0x7]=NA(addr);
				(*(uint32*)(map+addr))=MAP_DIR_REF;
			}else{
				reg[rt&0x7]=0;
				L;printf("LDR with immediate offset out of range\n");
			}
		}else if(OT(0xF800,0xE000)){ // B
			jump_buf=((rt&0x7FF)<<1);
			if(jump_buf>=0x00400000)jump_buf|=0xFF800000;
			L;printf("B\n");
			thumb(pos+4+jump_buf);
			return;
		}else if(OT(0xF000,0xD000)){ // B (cond.)
			sint8 jump_buf=rt&0xFF;
			L;printf("B (cond.)\n");
			thumb(pos+4+jump_buf+jump_buf);
		}else if(OT(0xFF78,0x4670)){ // MOV foo, LR
			where_lr[sid]=(rt&7)+((rt>>3)&0x8);
			L;printf("RL (R14) moved to R%d\n",where_lr[sid]);
		}else if(OT(0xFE00,0xB400)){ // PUSH
			if(rt&256){
				L;printf("Pushing LR (R14)\n");
				where_lr[sid]=16;
			}
			for(uint8 bit=8;bit>0;bit--){
				if(rt&(1<<(bit-1))){
					L;printf("Pushing R%d\n",bit-1);
					if(where_lr[sid]>=16)where_lr[sid]++;
				}
			}
		}else if(OT(0xFE00,0xBC00)){ // POP
			if(rt&256){
				L;printf("Popping to PC (R15)\n");
				if(where_lr[sid]==16){
					where_lr[sid]=15; // Just for clarification
					L;printf("Popped LR (R14) to PC (R15)\n");
					return;
				}
				L;printf("TODO: IMPLEMENT THIZ\n");
				if(where_lr[sid]>16)where_lr[sid]--;
				return;
			}
			for(uint8 bit=8;bit>0;bit--){
				if(rt&(1<<(bit-1))){
					L;printf("Popping to R%d\n",bit-1);
					if(where_lr[sid]==16){
						L;printf("Popped LR (R14) to %d\n",bit-1);
						where_lr[sid]=bit-1;
					}
					if(where_lr[sid]>16)where_lr[sid]--;
				}
			}
		}else if(OT(0xF800,0xF000)){ // Long branch 1
			jump_buf=(rt&0x7FF)<<12;
		}else if(OT(0xE800,0xE800)){ // Long branch 2
			jump_buf+=(rt&0x7FF)<<1;
			if(jump_buf>=0x00400000)jump_buf|=0xFF800000;
			uint32 o=pos+2+jump_buf;
			L;printf("Long branch with link! (%08x to %08x){\n",pos,o);
			link=1;
			BU;
			LIWRP(if(rt>>12)thumb(o);else arm(o))
			BD;
			L;printf("}\n");
		}
	}
}

int main(int argc, char **argv){
	FILE *f;
	if(argc!=3){L;printf("Usage: %s <rom.gba> <rom.map>",argv[0]);return 0;}
	if(!(f=fopen(argv[1],"rb"))){L;printf("Couldn't open %s\n",argv[1]);return 0;}
	fseek(f,0,SEEK_END); len=ftell(f); rom=malloc(len); map=malloc(len);
	fseek(f,0,SEEK_SET); fread(rom,len,1,f);
	fclose(f);

	memset(map,0,len);
	memset(reg,0,64);
	rom-=base; map-=base;
	arm(base);
	thumb(0x080e607e);
	rom+=base; map+=base;

	if(!(f=fopen(argv[2],"wb"))){L;printf("Couldn't open %s\n",argv[2]);return 0;}
	fwrite(map,len,1,f);
	fclose(f);
}

Now I have a few questions...
  1. Pokemon Ruby reads (and even executes) a lots of data from the 0x03000000-area. How do these code-segments get there and where are their original positions (in the rom)?
  2. Is it true that the 3rd games gen only use THUMB-Mode?
  3. What is the code at 081e082c good for?
    Spoiler:
 
Last edited:

sonic1

ASM is my life now...
77
Posts
15
Years
Hi

That program sure is awsome (cant compile it xD)

About the 3 questions, ive come with an answer for 2 of them(perhaps)

1.Have you ever thinked in the structure of DMA (that thing that make dinamic offsets in RAM in FR/LF/EM)?Well, not quite, as R/S dont have this... i dont know the structure of DMA well, but i formuled an hypothesis...
Well, imagine that there's a pointer to 020F000 (in RAM)... Now imagine that that offset has a pointer to another offset...
Like ROM POINTER>STATIC RAM POINTER>DYNAMIC RAM POINTER
I think its the same with ruby except for dynamic pointer...This was made to prevent hackers without using DMA...

Now for secound question... YES, Mainly, it only uses ARM when really needed...(like in the rom header, to set rom starting position,and the starting rom being arm too...)

Well thats all for now

Hope it helped
 

knizz

192
Posts
16
Years
  • Seen Oct 28, 2020
Now I'm relieved. I thought it was a bugs fault that my program didn't find any ARM-Code but in the header.
 

ZodiacDaGreat

Working on a Mobile System
429
Posts
17
Years
Uhm, hardly any routines in Pokemon is ARM based, besides the routine calling the main loop I think.

The IWRAM are used for data (speaking for Ruby), for example, game play time, Pokemon party data, RNG, and so much more. Some are loaded from the ROM, while some are stored, still there are some that are updated at every cycle of gameplay.

Regarding the functions at 081e082c. These are all used for routine jumping/branching, when a address is loaded to a register and branching is required, those functions are called via BL (branch link). The code after the bx are all dead code.

Edit: I don't think a ASM scanner is wise, a disassembler like IDA Pro can do the job better and more accurate. Uhm, that's my opinion only. Hope all these helps.
 

Xenesis

Syogun Changer
55
Posts
17
Years
Uhm, hardly any routines in Pokemon is ARM based, besides the routine calling the main loop I think.

You'll find this is the case with pretty well all GBA games for one simple reason: An ARM Opcode is 32 bits, a THUMB opcode (with the exception of the bl instruction) is 16 bits and the GBA's bus width for reading from cartridge is 16 bits. Thus, any ARM opcodes take two reads to load which is slow. Very slow. That being said, if you pre-load the code to areas in RAM (and you will find this happens) it will execute fine.

46C0 mov r8, r8 is a functional but completely useless opcode that in that particular situation is just being used as padding to make all of the bx rx opcodes word aligned, but some people also like to use it as a breakpoint opcode when debugging. Anyhow, that chunk of code would be used for launching subroutines, as the address range of a bl opcode is limited.
 

knizz

192
Posts
16
Years
  • Seen Oct 28, 2020
You create "map-files" by running this program.
asm-scanner(.exe) pokemon.gba pokemon.map

I tried out IDA Pro. It's very useful.
 

HackMew

Mewtwo Strikes Back
1,314
Posts
17
Years
  • Seen Oct 26, 2011
Now I have a few questions...
  1. Pokemon Ruby reads (and even executes) a lots of data from the 0x03000000-area. How do these code-segments get there and where are their original positions (in the rom)?
  2. Is it true that the 3rd games gen only use THUMB-Mode?
  3. What is the code at 081e082c good for?
    Spoiler:

  1. Ruby (and all other 3rd gen games) copy those routines from the ROM. To track their original position you should use a debugger like VBA-SDL-H.
  2. 99% of the routines are THUMB, because they take less space and execute faster. Few of them, in particular the initialization ones, are coded in ARM though.
  3. Those are small helper routines used in most THUMB routines to call a particular subroutine stored at an arbitrary address.
 

knizz

192
Posts
16
Years
  • Seen Oct 28, 2020
I worked a lot on the successor of the "asm-scanner" and now it is finally in a presentable state. You can find the online-demo with Pokemon Ruby at: LINK REMOVED

On the right side there is a flow-chart of the rom that grows when you browse through the asm-code on the left side. (Move away the red box before clicking anything)

The programm tries to simplify the asm-code like this:
Step 1:
mov r0, 0x08123456
ldr r2, r0, 0x00​
Step 2:
r0 = 0x08123456
r2 = *w(r0)​
Step 3:
*removed*
r2 = *w(0x08123456)​

Here is a "worst-case-screenshot"
 
Last edited:

Full Metal

C(++) Developer.
810
Posts
16
Years
:O
This looks like it could really help me learn asm ;D
so...this is like the olly of gba roms?
But most of that code didn't look like asm...hm...would be nice if it was shown side-by-side XP(no, i dont mean that as in it has an emulator built in, thats what VBA is for)
 

knizz

192
Posts
16
Years
  • Seen Oct 28, 2020
:O
This looks like it could really help me learn asm ;D
so...this is like the olly of gba roms?
But most of that code didn't look like asm...hm...would be nice if it was shown side-by-side XP(no, i dont mean that as in it has an emulator built in, thats what VBA is for)

In fact the first version displayed real asm-code. It doesn't look like asm *now* because I don't *want* it to look like asm. I described the transformation in my previous post.
 

HackMew

Mewtwo Strikes Back
1,314
Posts
17
Years
  • Seen Oct 26, 2011
Well, the non-ASM code in the picture is actually more confusing to read than pure ASM code from my point of view...

Code:
081de39c  b570 push {r4-r6,lr}
081de39e  1c02 add r2, r0, #0x0
081de3a0  481e ldr r0, [$081de41c] (=$03007ff0)
081de3a2  6804 ldr r4, [r0, #0x0]
081de3a4  20f0 mov r0, #0xf0
081de3a6  0300 lsl r0, r0, #0x0c
081de3a8  4010 and r0, r2
081de3aa  0c02 lsr r2, r0, #0x10
081de3ac  2600 mov r6, #0x0
081de3ae  7222 strb r2, [r4, #0x8]
081de3b0  491b ldr r1, [$081de420] (=$0842fae8)
081de3b2  1e50 sub r0, r2, #0x1
081de3b4  0040 lsl r0, r0, #0x01
081de3b6  1840 add r0, r0, r1
081de3b8  8805 ldrh r5, [r0, #0x0]
081de3ba  6125 str r5, [r4, #0x10]
081de3bc  20c6 mov r0, #0xc6
081de3be  00c0 lsl r0, r0, #0x03
081de3c0  1c29 add r1, r5, #0x0
081de3c2  f002 bl $081e0868
081de3c6  72e0 strb r0, [r4, #0xb]
081de3c8  4816 ldr r0, [$081de424] (=$00091d1b)
081de3ca  4368 mul r0, r5
081de3cc  4916 ldr r1, [$081de428] (=$00001388)
081de3ce  1840 add r0, r0, r1
081de3d0  4916 ldr r1, [$081de42c] (=$00002710)
081de3d2  f002 bl $081e0868
081de3d6  1c01 add r1, r0, #0x0
081de3d8  6161 str r1, [r4, #0x14]
081de3da  2080 mov r0, #0x80
081de3dc  0440 lsl r0, r0, #0x11
081de3de  f002 bl $081e0868
081de3e2  3001 add r0, #0x1
081de3e4  1040 asr r0, r0, #0x01
081de3e6  61a0 str r0, [r4, #0x18]
081de3e8  4811 ldr r0, [$081de430] (=$04000102)
081de3ea  8006 strh r6, [r0, #0x0]
081de3ec  4c11 ldr r4, [$081de434] (=$04000100)
081de3ee  4812 ldr r0, [$081de438] (=$00044940)
081de3f0  1c29 add r1, r5, #0x0

All those "+00" could be easily skipped to remove unuseful complexity, for example. Pointer dereferencing is not a very good idea as well. Oh, and brackets could help too.
 
Back
Top