@ created by ~ipatix~
@ revision 2.1
/* globals */
.global main_mixer
.global main_mixer_end
/* game code definitions */
.equ GAME_BPED, 0
.equ GAME_BPEE, 1
.equ GAME_BPRE, 2
.equ GAME_KWJ6, 3
.equ GAME_AE7E, 4
.equ GAME_BPRD, 5
/* SELECT USED GAME HERE */
.equ USED_GAME, GAME_BPRE @ CHOOSE YOUR GAME
.equ FRAME_LENGTH_5734, 0x60
.equ FRAME_LENGTH_7884, 0x84 @ THIS MODE IS NOT SUPPORTED BY THIS ENGINE BECAUSE IT DOESN'T USE AN 8 ALIGNED BUFFER LENGTH
.equ FRAME_LENGTH_10512, 0xB0
.equ FRAME_LENGTH_13379, 0xE0 @ DEFAULT
.equ FRAME_LENGTH_15768, 0x108
.equ FRAME_LENGTH_18157, 0x130
.equ FRAME_LENGTH_21024, 0x160
.equ FRAME_LENGTH_26758, 0x1C0
.equ FRAME_LENGTH_31536, 0x210
.equ FRAME_LENGTH_36314, 0x260
.equ FRAME_LENGTH_40137, 0x2A0
.equ FRAME_LENGTH_42048, 0x2C0
.equ DECODER_BUFFER_BPE, 0x03001300
.equ DECODER_BUFFER_BPR, 0x03002088
.equ DECODER_BUFFER_KWJ, 0x03005800
.equ BUFFER_IRAM_BPE, 0x03001AA8
.equ BUFFER_IRAM_BPR, 0x030028E0
.equ BUFFER_IRAM_KWJ, 0x03005840
.equ BUFFER_IRAM_AE7, 0x03006D60 @ PUT THE WORKBUFFER ADDRESS FOR FIRE EMBLEM HERE!!!
/* stack variables */
.equ ARG_FRAME_LENGTH, 0x0 @ TODO actually use this variable
.equ ARG_REMAIN_CHN, 0x4 @ This is the channel count variable
.equ ARG_BUFFER_POS, 0x8 @ stores the current output buffer pointer
.equ ARG_LOOP_START_POS, 0xC @ stores wave loop start position in channel loop
.equ ARG_LOOP_LENGTH, 0x10 @ '' '' '' end position
@ .equ ARG_UKNOWN, 0x14
.equ ARG_VAR_AREA, 0x18 @ pointer to engine the main work area
/* channel struct */
.equ CHN_STATUS, 0x0 @ [byte] channel status bitfield
.equ CHN_MODE, 0x1 @ [byte] channel mode bitfield
.equ CHN_VOL_1, 0x2 @ [byte] volume right
.equ CHN_VOL_2, 0x3 @ [byte] volume left
.equ CHN_ATTACK, 0x4 @ [byte] wave attack summand
.equ CHN_DECAY, 0x5 @ [byte] wave decay factor
.equ CHN_SUSTAIN, 0x6 @ [byte] wave sustain level
.equ CHN_RELEASE, 0x7 @ [byte] wave release factor
.equ CHN_ADSR_LEVEL, 0x9 @ [byte] current envelope level
.equ CHN_FINAL_VOL_1, 0xA @ [byte] not used anymore!
.equ CHN_FINAL_VOL_2, 0xB @ [byte] not used anymore!
.equ CHN_ECHO_VOL, 0xC @ [byte] pseudo echo volume
.equ CHN_ECHO_REMAIN, 0xD @ [byte] pseudo echo length
.equ CHN_POSITION_REL, 0x18 @ [word] sample countdown in mixing loop
.equ CHN_FINE_POSITION, 0x1C @ [word] inter sample position (23 bits)
.equ CHN_FREQUENCY, 0x20 @ [word] sample rate (in Hz)
.equ CHN_WAVE_OFFSET, 0x24 @ [word] wave header pointer
.equ CHN_POSITION_ABS, 0x28 @ [word] points to the current position in the wave data (relative offset for compressed samples)
.equ CHN_BLOCK_COUNT, 0x3C @ [word] only used for compressed samples: contains the value of the block that is currently decoded
/* wave header struct */
.equ WAVE_LOOP_FLAG, 0x3 @ [byte] 0x0 = oneshot; 0x40 = looped
.equ WAVE_FREQ, 0x4 @ [word] pitch adjustment value = mid-C samplerate * 1024
.equ WAVE_LOOP_START, 0x8 @ [word] loop start position
.equ WAVE_LENGTH, 0xC @ [word] loop end / wave end position
.equ WAVE_DATA, 0x10 @ [byte array] actual wave data
/* pulse wave synth configuration offset */
.equ SYNTH_BASE_WAVE_DUTY, 0x1 @ [byte]
.equ SYNTH_WIDTH_CHANGE_1, 0x2 @ [byte]
.equ SYNTH_MOD_AMOUNT, 0x3 @ [byte]
.equ SYNTH_WIDTH_CHANGE_2, 0x4 @ [byte]
/* CHN_STATUS flags - 0x0 = OFF */
.equ FLAG_CHN_INIT, 0x80 @ [bit] write this value to init a channel
.equ FLAG_CHN_RELEASE, 0x40 @ [bit] write this value to release (fade out) the channel
.equ FLAG_CHN_COMP, 0x20 @ [bit] is wave being played compressed (yes/no)
.equ FLAG_CHN_LOOP, 0x10 @ [bit] loop (yes/no)
.equ FLAG_CHN_ECHO, 0x4 @ [bit] echo phase
.equ FLAG_CHN_ATTACK, 0x3 @ [bit] attack phase
.equ FLAG_CHN_DECAY, 0x2 @ [bit] decay phase
.equ FLAG_CHN_SUSTAIN, 0x1 @ [bit] sustain phase
/* CHN_MODE flags */
.equ MODE_FIXED_FREQ, 0x8 @ [bit] set to disable resampling (i.e. playback with output rate)
.equ MODE_REVERSE, 0x10 @ [bit] set to reverse sample playback
.equ MODE_COMP, 0x30 @ [bit] is wave being played compressed or reversed (TODO: rename flag)
.equ MODE_SYNTH, 0x40 @ [bit] READ ONLY, indicates synthzied output
/* variables of the engine work area */
.equ VAR_REVERB, 0x5 @ [byte] 0-127 = reverb level
.equ VAR_MAX_CHN, 0x6 @ [byte] maximum channels to process
.equ VAR_MASTER_VOL, 0x7 @ [byte] PCM master volume
.equ VAR_DEF_PITCH_FAC, 0x18 @ [word] this value get's multiplied with the samplerate for the inter sample distance
.equ VAR_FIRST_CHN, 0x50 @ [CHN struct] relative offset to channel array
/* just some more defines */
.equ REG_DMA3_SRC, 0x040000D4
.equ ARM_OP_LEN, 0x4
@#######################################
@*********** GAME CONFIGS **************
@ add the game's name above to the ASM .equ-s before creating new configs
@#######################################
@*********** IF GERMAN POKEMON EMERALD
.if USED_GAME==GAME_BPED
.equ hq_buffer, BUFFER_IRAM_BPE
.equ decoder_buffer_target, DECODER_BUFFER_BPE
.equ ALLOW_PAUSE, 1
.equ DMA_FIX, 1
.equ ENABLE_DECOMPRESSION, 1
.equ PREVENT_CLIP, 1
.endif
@*********** IF ENGLISH POKEMON FIRE RED
.if USED_GAME==GAME_BPRD
.equ hq_buffer, BUFFER_IRAM_BPR
.equ decoder_buffer_target, DECODER_BUFFER_BPR
.equ ALLOW_PAUSE, 1
.equ DMA_FIX, 1
.equ ENABLE_DECOMPRESSION, 1
.equ PREVENT_CLIP, 1
.endif
@*********** IF ENGLISH POKEMON EMERALD
.if USED_GAME==GAME_BPEE
.equ hq_buffer, BUFFER_IRAM_BPE
.equ decoder_buffer_target, DECODER_BUFFER_BPE
.equ ALLOW_PAUSE, 1
.equ DMA_FIX, 1
.equ ENABLE_DECOMPRESSION, 1
.equ PREVENT_CLIP, 1
.endif
@*********** IF ENGLISH POKEMON FIRE RED
.if USED_GAME==GAME_BPRE
.equ hq_buffer, BUFFER_IRAM_BPR
.equ decoder_buffer_target, DECODER_BUFFER_BPR
.equ ALLOW_PAUSE, 1
.equ DMA_FIX, 1
.equ ENABLE_DECOMPRESSION, 1
.equ PREVENT_CLIP, 1
.endif
@*********** IF KAWAs JUKEBOX 2006
.if USED_GAME==GAME_KWJ6
.equ hq_buffer, BUFFER_IRAM_KWJ
.equ decoder_buffer_target, DECODER_BUFFER_KWJ
.equ ALLOW_PAUSE, 0
.equ DMA_FIX, 0
.equ ENABLE_DECOMPRESSION, 0
.equ PREVENT_CLIP, 1
.endif
@*********** IF US FIRE EMBLEM
.if USED_GAME==GAME_AE7E
.equ hq_buffer, BUFFER_IRAM_AE7
.equ ALLOW_PAUSE, 0
.equ DMA_FIX, 0
.equ ENABLE_DECOMPRESSION, 0
.equ PREVENT_CLIP, 0
.endif
@***********
.thumb
main_mixer:
/* load Reverb level and check if we need to apply it */
LDRB R3, [R0, #VAR_REVERB]
LSR R3, R3, #2
BEQ clear_buffer
ADR R1, do_reverb
BX R1
.align 2
.arm
do_reverb:
/*
* reverb is calculated by the following: new_sample = old_sample * reverb_level / 127
* note that reverb is mono (both sides get mixed together)
*
* reverb get's applied to the frame we are currently looking at and the one after that
* the magic below simply calculateds the pointer for the one after the current one
*/
CMP R4, #2
ADDEQ R7, R0, #0x350
ADDNE R7, R5, R8
MOV R4, R8
ORR R3, R3, R3, LSL#16
STMFD SP!, {R8, LR}
LDR LR, hq_buffer_label
reverb_loop:
/* This loop does the reverb processing */
LDRSB R0, [R5, R6]
LDRSB R1, [R5], #1
LDRSB R2, [R7, R6]
LDRSB R8, [R7], #1
LDRSB R9, [R5, R6]
LDRSB R10, [R5], #1
LDRSB R11, [R7, R6]
LDRSB R12, [R7], #1
ADD R0, R0, R1
ADD R0, R0, R2
ADDS R0, R0, R8
ADDMI R0, R0, #0x4
ADD R1, R9, R10
ADD R1, R1, R11
ADDS R1, R1, R12
ADDMI R1, R1, #0x4
MUL R0, R3, R0
MUL R1, R3, R1
STMIA LR!, {R0, R1}
SUBS R4, R4, #2
BGT reverb_loop
/* end of loop */
LDMFD SP!, {R8, LR}
ADR R0, (adsr_setup+1)
BX R0
.thumb
clear_buffer:
/* Incase reverb is disabled the buffer get's set to zero */
LDR R3, hq_buffer_label
MOV R1, R8
MOV R4, #0
MOV R5, #0
MOV R6, #0
MOV R7, #0
/*
* Setting the buffer to zero happens in a very efficient loop
* Depending on the alignment of the buffer length, twice or quadruple the amount of bytes
* get cleared at once
*/
LSR R1, #3
BCC clear_buffer_align_8
STMIA R3!, {R4, R5, R6, R7}
clear_buffer_align_8:
LSR R1, #1
BCC clear_buffer_align_16
STMIA R3!, {R4, R5, R6, R7}
STMIA R3!, {R4, R5, R6, R7}
clear_buffer_align_16:
/* This repeats until the buffer has been cleared */
STMIA R3!, {R4, R5, R6, R7}
STMIA R3!, {R4, R5, R6, R7}
STMIA R3!, {R4, R5, R6, R7}
STMIA R3!, {R4, R5, R6, R7}
SUB R1, #1
BGT clear_buffer_align_16
/* loop end */
adsr_setup:
/*
* okay, before the actual mixing starts
* the volume and envelope calculation happens
*/
MOV R4, R8 @ R4 = buffer length
/* this buffers the buffer length to a backup location
* TODO: Move this variable to stack
*/
ADR R0, hq_buffer_length_label
STR R4, [R0]
/* init channel loop */
LDR R4, [SP, #ARG_VAR_AREA] @ R4 = main work area pointer
LDR R0, [R4, #VAR_DEF_PITCH_FAC] @ R0 = samplingrate pitch factor
MOV R12, R0 @ --> R12
LDRB R0, [R4, #VAR_MAX_CHN] @ load MAX channels to R0
ADD R4, #VAR_FIRST_CHN @ R4 = Base channel Offset (Channel #0)
mixer_entry:
/* this is the main channel processing loop */
STR R0, [SP, #ARG_REMAIN_CHN]
LDR R3, [R4, #CHN_WAVE_OFFSET]
LDRB R6, [R4, #CHN_STATUS]
MOVS R0, #0xC7 @ check if any of the channel status flags is set
TST R0, R6 @ check if none of the flags is set
BEQ return_channel_null @ skip channel
/* check channel flags */
LSL R0, R6, #25 @ shift over the FLAG_CHN_INIT to CARRY
BCC adsr_echo_check @ continue with normal channel procedure
/* check leftmost bit */
BMI stop_channel_handler @ if the channel is initiated but on release it gets turned off immediatley
/* channel init procedure */
MOVS R6, #FLAG_CHN_ATTACK @ set the channel status to ATTACK
MOVS R0, R3 @ R0 = CHN_WAVE_OFFSET
ADD R0, #WAVE_DATA @ R0 = wave data offset
/* Pokemon games seem to init channels differently than other m4a games */
.if ALLOW_PAUSE==0
STR R0, [R4, #CHN_POSITION_ABS]
LDR R0, [R3, #WAVE_LENGTH]
STR R0, [R4, #CHN_POSITION_REL]
.else
LDR R1, [R4, #CHN_POSITION_REL]
ADD R0, R0, R1
STR R0, [R4, #CHN_POSITION_ABS]
LDR R0, [R3, #WAVE_LENGTH]
SUB R0, R0, R1
STR R0, [R4, #CHN_POSITION_REL]
.endif
MOVS R5, #0 @ initial envelope = #0
STRB R5, [R4, #CHN_ADSR_LEVEL]
STR R5, [R4, #CHN_FINE_POSITION]
LDRB R2, [R3, #WAVE_LOOP_FLAG]
LSR R0, R2, #6
BEQ adsr_attack_handler @ if loop disabled --> branch
/* loop enabled here */
MOVS R0, #FLAG_CHN_LOOP
ORR R6, R0 @ update channel status
B adsr_attack_handler
adsr_echo_check:
/* this is the normal ADSR procedure without init */
LDRB R5, [R4, #CHN_ADSR_LEVEL]
LSL R0, R6, #29 @ echo flag --> bit 31
BPL adsr_release_check @ PL == false
/* pseudo echo handler */
LDRB R0, [R4, #CHN_ECHO_REMAIN]
SUB R0, #1
STRB R0, [R4, #CHN_ECHO_REMAIN]
BHI channel_vol_calc @ if echo still on --> branch
stop_channel_handler:
MOVS R0, #0
STRB R0, [R4, #CHN_STATUS]
return_channel_null:
/* go to end of the channel loop */
B check_remain_channels
adsr_release_check:
LSL R0, R6, #25 @ bit 31 = release bit
BPL adsr_decay_check @ if release == 0 --> branch
/* release handler */
LDRB R0, [R4, #CHN_RELEASE]
@SUB R0, #0xFF @ linear decay; TODO make option for triggering it
@SUB R0, #1
@ADD R5, R5, R0
MUL R5, R5, R0 @ default release algorithm
LSR R5, R5, #8
@BMI adsr_released_handler @ part of linear decay
BEQ adsr_released_handler @ release gone down to #0 --> branch
/* pseudo echo init handler */
LDRB R0, [R4, #CHN_ECHO_VOL]
CMP R5, R0
BHI channel_vol_calc @ if release still above echo level --> branch
adsr_released_handler:
/* if volume released to #0 */
LDRB R5, [R4, #CHN_ECHO_VOL] @ TODO: replace with MOV R5, R0
CMP R5, #0
BEQ stop_channel_handler @ if pseudo echo vol = 0 --> branch
/* pseudo echo volume handler */
MOVS R0, #FLAG_CHN_ECHO
ORR R6, R0 @ set the echo flag
B adsr_update_status
adsr_decay_check:
/* check if decay is active */
MOVS R2, #3
AND R2, R6 @ seperate phase status bits
CMP R2, #FLAG_CHN_DECAY
BNE adsr_attack_check @ decay not active --> branch
/* decay handler */
LDRB R0, [R4, #CHN_DECAY]
MUL R5, R0
LSR R5, R5, #8
LDRB R0, [R4, #CHN_SUSTAIN]
CMP R5, R0
BHI channel_vol_calc @ sample didn't decay yet --> branch
/* sustain handler */
MOVS R5, R0 @ current level = sustain level
BEQ adsr_released_handler @ sustain level #0 --> branch
/* step to next phase otherweise */
B adsr_switchto_next
adsr_attack_check:
/* attack handler */
CMP R2, #FLAG_CHN_ATTACK
BNE channel_vol_calc @ if it isn't in attack attack phase, it has to be in sustain (no adsr change needed) --> branch
adsr_attack_handler:
/* apply attack summand */
LDRB R0, [R4, #CHN_ATTACK]
ADD R5, R5, R0
CMP R5, #0xFF
BCC adsr_update_status
/* cap attack at 0xFF */
MOVS R5, #0xFF
adsr_switchto_next:
/* switch to next adsr phase */
SUB R6, #1
adsr_update_status:
/* store channel status */
STRB R6, [R4, #CHN_STATUS]
channel_vol_calc:
/* store the calculated ADSR level */
STRB R5, [R4, #CHN_ADSR_LEVEL]
/* apply master volume */
LDR R0, [SP, #ARG_VAR_AREA]
LDRB R0, [R0, #VAR_MASTER_VOL]
ADD R0, #1
MUL R5, R0, R5
/* left side volume */
LDRB R0, [R4, #CHN_VOL_2]
MUL R0, R5
LSR R0, R0, #13
MOV R10, R0 @ R10 = left volume
/* right side volume */
LDRB R0, [R4, #CHN_VOL_1]
MUL R0, R5
LSR R0, R0, #13
MOV R11, R0 @ R11 = right volume
/*
* Now we get closer to actual mixing:
* For looped samples some additional operations are required
*/
MOVS R0, #FLAG_CHN_LOOP
AND R0, R6
BEQ mixing_loop_setup @ TODO: This label should rather be called "skip_loop_setup"
/* loop setup handler */
ADD R3, #WAVE_LOOP_START
LDMIA R3!, {R0, R1} @ R0 = loop start, R1 = loop end
ADD R3, R0, R3 @ R3 = loop start position (absolute)
STR R3, [SP, #ARG_LOOP_START_POS] @ backup loop start
SUB R0, R1, R0
mixing_loop_setup:
/* do the rest of the setup */
STR R0, [SP, #ARG_LOOP_LENGTH] @ if loop is off --> R0 = 0x0
LDR R5, hq_buffer_label
LDR R2, [R4, #CHN_POSITION_REL] @ remaining samples for channel
LDR R3, [R4, #CHN_POSITION_ABS] @ current stream position (abs)
LDRB R0, [R4, #CHN_MODE]
ADR R1, mixing_arm_setup
BX R1
.align 2
hq_buffer_label:
.word hq_buffer
hq_buffer_length_label: @ TODO: Replace with variable on stack
.word 0xFFFFFFFF
.arm
mixing_arm_setup:
/* frequency and mixing loading routine */
LDR R8, hq_buffer_length_label
ORRS R11, R10, R11, LSL#16 @ R11 = 00RR00LL
BEQ switchto_thumb @ volume #0 --> branch and skip channel processing
/* normal processing otherwise */
TST R0, #MODE_FIXED_FREQ
BNE fixed_mixing_setup
TST R0, #MODE_COMP
BNE special_mixing @ compressed? --> branch
/* same here */
STMFD SP!, {R4, R9, R12}
/*
* This mixer supports 4 different kind of synthesized sounds
* They are triggered when the loop end = 0
* This get's checked below
*/
MOVS R2, R2
ORREQ R0, R0, #MODE_SYNTH
STREQB R0, [R4, #CHN_MODE]
ADD R4, R4, #CHN_FINE_POSITION
LDMIA R4, {R7, LR} @ R7 = Fine Position, LR = Frequency
MUL R4, R12, LR @ R4 = inter sample steps = output rate factor * samplerate
/* now the first samples get loaded */
LDRSB R6, [R3], #1
LDRSB R12, [R3]
TST R0, #MODE_SYNTH
BNE init_synth
/* incase no synth mode should be used, code contiues here */
SUB R12, R12, R6 @ R12 = DELTA
/*
* Mixing goes with volume ranges 0-127
* They come in 0-255 --> divide by 2
*/
MOVS R11, R11, LSR#1
ADC R11, R11, #0x8000
BIC R11, R11, #0xFF00
MOV R1, R7 @ R1 = inter sample position
/*
* There is 2 different mixing codepaths for uncompressed data
* path 1: fast mixing, but doesn't supports loop or stop
* path 2: not so fast but supports sample loops / stop
* This checks if there is enough samples aviable for path 1.
* important: R0 is expected to be #0
*/
UMLAL R1, R0, R4, R8
MOV R1, R1, LSR#23
ORR R0, R1, R0, LSL#9
CMP R2, R0 @ actual comparison
BLE split_sample_loading @ if not enough samples are available for path 1 --> branch
/*
* This is the mixer path 1.
* The interesting thing here is that the code will
* buffer enough samples on stack if enough space
* on stack is available (or goes over the limit of 0x400 bytes)
*/
SUB R2, R2, R0
LDR R10, stack_capacity
ADD R10, R10, R0
CMP R10, SP
ADD R10, R3, R0
ADR R9, custom_stack_3
/*
* R2 = remaining samples
* R10 = final sample position
* SP = original stack location
* These values will get reloaded after channel processing
* due to the lack of registers.
*/
STMIA R9, {R2, R10, SP}
CMPCC R0, #0x400 @ > 0x400 bytes --> read directly from ROM rather than buffered
BCS select_mixing_mode @ TODO rename
/*
* The code below inits the DMA to read word aligned
* samples from ROM to stack
*/
BIC R1, R3, #3
MOV R9, #0x04000000
ADD R9, R9, #0xD4
ADD R0, R0, #7
MOV R0, R0, LSR#2
SUB SP, SP, R0, LSL#2
AND R3, R3, #3
ADD R3, R3, SP
ORR LR, R0, #0x84000000
STMIA R9, {R1, SP, LR} @ actually starts the DMA
/* Somehow is neccesary for some games not to break */
.if DMA_FIX==1
MOV R0, #0
MOV R1, R0
MOV R2, R1
STMIA R9, {R0, R1, R2}
.endif
select_mixing_mode:
/*
* This code decides which piece of code to load
* depending on playback-rate / default-rate ratio.
* Modes > 1.0 run with different volume levels.
*/
SUBS R4, R4, #0x800000
MOVPL R11, R11, LSL#1
ADR R0, math_resources @ loads the base pointer of the code
ADDPL R0, R0, #(ARM_OP_LEN*6) @ 6 instructions further
SUBPLS R4, R4, #0x800000
ADDPL R0, R0, #(ARM_OP_LEN*6)
ADDPL R4, R4, #0x800000 @ TODO how does restoring for > 2.0 ratios work?
LDR R2, function_pointer
CMP R0, R2 @ code doesn't need to be reloaded if it's already in place
BEQ mixing_init
/* This loads the needed code to RAM */
STR R0, function_pointer
LDMIA R0, {R0-R2, R8-R10} @ load 6 opcodes
ADR LR, runtime_created_routine
create_routine_loop:
/* paste code to destination, see below for patterns */
STMIA LR, {R0, R1}
ADD LR, LR, #0x98
STMIA LR, {R0, R1}
SUB LR, LR, #0x8C
STMIA LR, {R2, R8-R10}
ADD LR, LR, #0x98
STMIA LR, {R2, R8-R10}
SUB LR, LR, #0x80
ADDS R5, R5, #0x40000000 @ do that for 4 blocks
BCC create_routine_loop
LDR R8, hq_buffer_length_label
mixing_init:
MOV R2, #0xFF000000 @ load the fine position overflow bitmask
mixing_loop:
/* This is the actual processing and interpolation code loop; NOPs will be replaced by the code above */
LDMIA R5, {R0, R1, R10, LR} @ load 4 stereo samples to Registers
MUL R9, R7, R12
runtime_created_routine:
NOP @ Block #1
NOP
MLANE R0, R11, R9, R0
NOP
NOP
NOP
NOP
BIC R7, R7, R2, ASR#1
MULNE R9, R7, R12
NOP @ Block #2
NOP
MLANE R1, R11, R9, R1
NOP
NOP
NOP
NOP
BIC R7, R7, R2, ASR#1
MULNE R9, R7, R12
NOP @ Block #3
NOP
MLANE R10, R11, R9, R10
NOP
NOP
NOP
NOP
BIC R7, R7, R2, ASR#1
MULNE R9, R7, R12
NOP @ Block #4
NOP
MLANE LR, R11, R9, LR
NOP
NOP
NOP
NOP
BIC R7, R7, R2, ASR#1
STMIA R5!, {R0, R1, R10, LR} @ write 4 stereo samples
LDMIA R5, {R0, R1, R10, LR} @ load the next 4 stereo samples
MULNE R9, R7, R12
NOP @ Block #1
NOP
MLANE R0, R11, R9, R0
NOP
NOP
NOP
NOP
BIC R7, R7, R2, ASR#1
MULNE R9, R7, R12
NOP @ Block #2
NOP
MLANE R1, R11, R9, R1
NOP
NOP
NOP
NOP
BIC R7, R7, R2, ASR#1
MULNE R9, R7, R12
NOP @ Block #3
NOP
MLANE R10, R11, R9, R10
NOP
NOP
NOP
NOP
BIC R7, R7, R2, ASR#1
MULNE R9, R7, R12
NOP @ Block #4
NOP
MLANE LR, R11, R9, LR
NOP
NOP
NOP
NOP
BIC R7, R7, R2, ASR#1
STMIA R5!, {R0, R1, R10, LR} @ write 4 stereo samples
SUBS R8, R8, #8 @ subtract 8 from the sample count
BGT mixing_loop
/* restore previously saved values */
ADR R12, custom_stack_3
LDMIA R12, {R2, R3, SP}
B mixing_end_func
@ work variables
.align 2
custom_stack_3:
.word 0x0, 0x0, 0x0
stack_capacity:
.word 0x03007910
function_pointer:
.word 0x0
@ math resources, not directly used
math_resources:
MOV R9, R9, ASR#22 @ Frequency Lower than default Frequency
ADDS R9, R9, R6, LSL#1
ADDS R7, R7, R4
ADDPL R6, R12, R6
LDRPLSB R12, [R3, #1]!
SUBPLS R12, R12, R6
ADDS R9, R6, R9, ASR#23 @ Frequency < 2x && Frequency > default frequency
ADD R6, R12, R6
ADDS R7, R7, R4
LDRPLSB R6, [R3, #1]!
LDRSB R12, [R3, #1]!
SUBS R12, R12, R6
ADDS R9, R6, R9, ASR#23 @ Frequency >= 2x higher than default Frequency
ADD R7, R7, R4
ADD R3, R3, R7, LSR#23
LDRSB R6, [R3]
LDRSB R12, [R3, #1]!
SUBS R12, R12, R6
split_sample_loading:
ADD R5, R5, R8, LSL#2 @ R5 = End of HQ buffer
uncached_mixing_loop:
MUL R9, R7, R12 @ calc interpolated DELTA
MOV R9, R9, ASR#22 @ scale down the DELTA
ADDS R9, R9, R6, LSL#1 @ Add to Base Sample (upscaled to 8 bits again)
LDRNE R0, [R5, -R8, LSL#2] @ load sample from buffer
MLANE R0, R11, R9, R0 @ add it to the buffer sample
STRNE R0, [R5, -R8, LSL#2] @ write the sample
ADD R7, R7, R4 @ add the step size to the fine position
MOVS R9, R7, LSR#23 @ write the overflow amount to R9
BEQ uncached_mixing_load_skip @ skip the mixing load if it isn't required
SUBS R2, R2, R7, LSR#23 @ remove the overflow count from the remaning samples
BLLE loop_end_sub @ if the loop end is reached call the loop handler
SUBS R9, R9, #1 @ remove #1 from the overflow count
ADDEQ R6, R12, R6 @ new base sample is previous sample + DELTA
@RETURN LOCATION FROM LOOP HANDLER
LDRNESB R6, [R3, R9]! @ load new sample
LDRSB R12, [R3, #1]! @ load the delta sample (always required)
SUB R12, R12, R6 @ calc new DELTA
BIC R7, R7, #0x3F800000 @ clear the overflow from the fine position by using the bitmask
uncached_mixing_load_skip:
SUBS R8, R8, #1 @ reduce the sample count for the buffer by #1
BGT uncached_mixing_loop
mixing_end_func:
SUB R3, R3, #1 @ reduce sample pointer by #1 (???)
LDMFD SP!, {R4, R9, R12} @ pop values from stack
STR R7, [R4, #CHN_FINE_POSITION] @ store the fine position
B store_coarse_sample_pos @ jump over to code to store coarse channel position
loop_end_sub:
ADD R3, SP, #ARG_LOOP_START_POS+0xC @ prepare sample loop start loading and lopo length loading (0xC due to the pushed stack pointer)
LDMIA R3, {R3, R6} @ R3 = Loop Start; R6 = Loop Length
CMP R6, #0 @ check if loop is enabled; if Loop is enabled R6 is != 0
RSBNE R9, R2, #0 @ the sample overflow from the resampling needs to get subtracted so the remaining samples is slightly less
ADDNE R2, R6, R2 @ R2 = add the loop length
ADDNE PC, LR, #8 @ return from the subroutine to 2 instructions after the actual return location
LDMFD SP!, {R4, R9, R12} @ restore registers from stack
B update_channel_status
fixed_freq_loop_end_handler:
LDR R2, [SP, #ARG_LOOP_LENGTH+0x8] @ load the loop length value
MOVS R6, R2 @ copy it to R6 and check if loop is disabled
LDRNE R3, [SP, #ARG_LOOP_START_POS+0x8] @ reset the sample pointer to the loop start position
BXNE LR @ if it loops return to mixing function, if it doesn't go on and end mixing
LDMFD SP!, {R4, R9}
update_channel_status:
STRB R6, [R4] @ if loop ist disabled R6 = 0 and we can disable the channel by writing R6 to R4 (channel area)
B switchto_thumb @ switch to thumb
fixed_math_resource: @ not exectued, used to create mixing function
MOVS R6, R10, LSL#24
MOVS R6, R6, ASR#24
MOVS R6, R10, LSL#16
MOVS R6, R6, ASR#24
MOVS R6, R10, LSL#8
MOVS R6, R6, ASR#24
MOVS R6, R10, ASR#24
LDMIA R3!, {R10} @ load chunk of samples
MOVS R6, R10, LSL#24
MOVS R6, R6, ASR#24
MOVS R6, R10, LSL#16
MOVS R6, R6, ASR#24
MOVS R6, R10, LSL#8
MOVS R6, R6, ASR#24
LDMFD SP!, {R4, R9, R12}
fixed_mixing_setup:
STMFD SP!, {R4, R9} @ backup the channel pointer and
fixed_mixing_check_length:
MOV LR, R2 @ move absolute sample position to LR
CMP R2, R8 @
MOVGT LR, R8 @ if there is less samples than the buffer to process write the smaller sample amount to LR
SUB LR, LR, #1 @ shorten samples to process by #1
MOVS LR, LR, LSR#2 @ calculate the amount of words to process (-1/4)
BEQ fixed_mixing_process_unaligned @ process the unaligned samples if there is <= 3 samples to process
SUB R8, R8, LR, LSL#2 @ subtract the amount of samples we need to process from the buffer length
SUB R2, R2, LR, LSL#2 @ subtract the amount of samples we need to process from the remaining samples
ADR R1, fixed_mixing_custom_routine
ADR R0, fixed_math_resource @ load the 2 pointers to create function (@R0) by instructions from R1
MOV R9, R3, LSL#30 @ move sample alignment bits to the leftmost position
ADD R0, R0, R9, LSR#27 @ alignment * 8 + resource offset = new resource offset
LDMIA R0!, {R6, R7, R9, R10} @ load 4 instructions
STMIA R1, {R6, R7} @ write the 1st 2 instructions
ADD R1, R1, #0xC @ move label pointer over to the next slot
STMIA R1, {R9, R10} @ write 2nd block
ADD R1, R1, #0xC @ move label pointer to next block
LDMIA R0, {R6, R7, R9, R10} @ load instructions for block #3 and #4
STMIA R1, {R6, R7} @ write block #3
ADD R1, R1, #0xC @ ...
STMIA R1, {R9, R10} @ write block #4
LDMIA R3!, {R10} @ write read 4 samples from ROM
fixed_mixing_loop:
LDMIA R5, {R0, R1, R7, R9} @ load 4 samples from hq buffer
fixed_mixing_custom_routine:
NOP
NOP
MLANE R0, R11, R6, R0 @ add new sample if neccessary
NOP
NOP
MLANE R1, R11, R6, R1
NOP
NOP
MLANE R7, R11, R6, R7
NOP
NOP
MLANE R9, R11, R6, R9
STMIA R5!, {R0, R1, R7, R9} @ write the samples to the work area buffer
SUBS LR, LR, #1 @ countdown the sample blocks to process
BNE fixed_mixing_loop @ if the end wasn't reached yet, repeat the loop
SUB R3, R3, #4 @ reduce sample position by #4, we'll need to load the samples again
fixed_mixing_process_unaligned:
MOV R1, #4 @ we need to repeat the loop #4 times to completley get rid of alignment errors
fixed_mixing_unaligned_loop:
LDR R0, [R5] @ load sample from buffer
LDRSB R6, [R3], #1 @ load sample from ROM ro R6
MLA R0, R11, R6, R0 @ write the sample to the buffer
STR R0, [R5], #4
SUBS R2, R2, #1 @ reduce alignment error by #1
BLEQ fixed_freq_loop_end_handler
SUBS R1, R1, #1
BGT fixed_mixing_unaligned_loop @ repeat the loop #4 times
SUBS R8, R8, #4 @ reduce the sample amount we wrote to the buffer by #1
BGT fixed_mixing_check_length @ go up to repeat the mixing procedure until the buffer is filled
LDMFD SP!, {R4, R9} @ pop registers from stack
store_coarse_sample_pos:
STR R2, [R4, #CHN_POSITION_REL] @ store relative and absolute sample position
STR R3, [R4, #CHN_POSITION_ABS]
switchto_thumb:
ADR R0, (check_remain_channels+1) @ load the label offset and switch to thumb
BX R0
.thumb
check_remain_channels:
LDR R0, [SP, #ARG_REMAIN_CHN] @ load the remaining channels
SUB R0, #1 @ reduce the amount by #1
BLE mixer_return @ end the mixing when finished processing all channels
ADD R4, #0x40
B mixer_entry
mixer_return:
ADR R0, downsampler
BX R0
downsampler_return:
LDR R0, [SP, #ARG_VAR_AREA] @ load the main var area to R0
LDR R3, mixer_finished_status @ load some status indication value to R3
STR R3, [R0] @ store this value to the main var area
ADD SP, SP, #0x1C
POP {R0-R7}
MOV R8, R0
MOV R9, R1
MOV R10, R2
MOV R11, R3
POP {R3}
BX R3
.align 2
mixer_finished_status:
.word 0x68736D53
.arm
downsampler:
LDR R10, hq_buffer_label
LDR R9, [SP, #ARG_BUFFER_POS]
LDR R8, hq_buffer_length_label
MOV R11, #0xFF
.if PREVENT_CLIP==1
MOV R12, #0xFFFFFFFF
MOV R12, R12, LSL#14
MOV R7, #0x630
downsampler_loop:
LDRSH R2, [R10], #2
LDRSH R0, [R10], #2
LDRSH R3, [R10], #2
LDRSH R1, [R10], #2
CMP R0, #0x4000
MOVGE R0, #0x3F80
CMP R0, #-0x4000
MOVLT R0, R12
CMP R1, #0x4000
MOVGE R1, #0x3F80
CMP R1, #-0x4000
MOVLT R1, R12
CMP R2, #0x4000
MOVGE R2, #0x3F80
CMP R2, #-0x4000
MOVLT R2, R12
CMP R3, #0x4000
MOVGE R3, #0x3F80
CMP R3, #-0x4000
MOVLT R3, R12
AND R0, R11, R0, ASR#7
AND R1, R11, R1, ASR#7
AND R2, R11, R2, ASR#7
AND R3, R11, R3, ASR#7
ORR R2, R2, R3, LSL#8
ORR R0, R0, R1, LSL#8
STRH R2, [R9, R7]
STRH R0, [R9], #2
SUBS R8, #2
BGT downsampler_loop
.else
downsampler_loop:
LDRH R4, [R10], #2
LDRH R0, [R10], #2
LDRH R5, [R10], #2
LDRH R1, [R10], #2
LDRH R6, [R10], #2
LDRH R2, [R10], #2
LDRH R7, [R10], #2
LDRH R3, [R10], #2
AND R0, R11, R0, LSR#7
AND R1, R11, R1, LSR#7
AND R2, R11, R2, LSR#7
AND R3, R11, R3, LSR#7
AND R4, R11, R4, LSR#7
AND R5, R11, R5, LSR#7
AND R6, R11, R6, LSR#7
AND R7, R11, R7, LSR#7
ORR R4, R4, R5, LSL#8
ORR R4, R4, R6, LSL#16
ORR R4, R4, R7, LSL#24
ORR R0, R0, R1, LSL#8
ORR R0, R0, R2, LSL#16
ORR R0, R0, R3, LSL#24
STR R4, [R9, #0x630]
STR R0, [R9], #4
SUBS R8, #4
BGT downsampler_loop
.endif
ADR R0, (downsampler_return+1)
BX R0
.align 2
init_synth:
CMP R12, #0 @ $030057C4
BNE check_synth_type
LDRB R6, [R3, #SYNTH_WIDTH_CHANGE_1] @ for saw wave -> 0xF0 (base duty cycle change)
ADD R2, R2, R6, LSL#24 @ add it to the current synt
LDRB R6, [R3, #SYNTH_WIDTH_CHANGE_2] @ for saw wave -> 0x80 (base duty cycle change #2)
ADDS R6, R2, R6, LSL#24 @ add this to the synth state aswell but keep the old value in R2 and put the new one in R6
MVNMI R6, R6 @ negate if duty cycle is > 50%
MOV R10, R6, LSR#8 @ dividide the final duty cycle by 8 to R10
LDRB R1, [R3, #SYNTH_MOD_AMOUNT] @ for saw wave -> 0xE0
LDRB R0, [R3, #SYNTH_BASE_WAVE_DUTY] @ for saw wave -> 0x10 (base duty cycle offset)
MOV R0, R0, LSL#24 @ convert it to a usable duty cycle
MLA R6, R10, R1, R0 @ calculate the final duty cycle with the offset, and intensity * rotating duty cycle amount
STMFD SP!, {R2, R3, R9, R12}
synth_type_0_loop:
LDMIA R5, {R0-R3, R9, R10, R12, LR} @ load 8 samples
CMP R7, R6 @ Block #1
ADDCC R0, R0, R11, LSL#6
SUBCS R0, R0, R11, LSL#6
ADDS R7, R7, R4, LSL#3
CMP R7, R6 @ Block #2
ADDCC R1, R1, R11, LSL#6
SUBCS R1, R1, R11, LSL#6
ADDS R7, R7, R4, LSL#3
CMP R7, R6 @ Block #3
ADDCC R2, R2, R11, LSL#6
SUBCS R2, R2, R11, LSL#6
ADDS R7, R7, R4, LSL#3
CMP R7, R6 @ Block #4
ADDCC R3, R3, R11, LSL#6
SUBCS R3, R3, R11, LSL#6
ADDS R7, R7, R4, LSL#3
CMP R7, R6 @ Block #5
ADDCC R9, R9, R11, LSL#6
SUBCS R9, R9, R11, LSL#6
ADDS R7, R7, R4, LSL#3
CMP R7, R6 @ Block #6
ADDCC R10, R10, R11, LSL#6
SUBCS R10, R10, R11, LSL#6
ADDS R7, R7, R4, LSL#3
CMP R7, R6 @ Block #7
ADDCC R12, R12, R11, LSL#6
SUBCS R12, R12, R11, LSL#6
ADDS R7, R7, R4, LSL#3
CMP R7, R6 @ Block #8
ADDCC LR, LR, R11, LSL#6
SUBCS LR, LR, R11, LSL#6
ADDS R7, R7, R4, LSL#3
STMIA R5!, {R0-R3, R9, R10, R12, LR} @ write 8 samples
SUBS R8, R8, #8 @ remove #8 from sample count
BGT synth_type_0_loop
LDMFD SP!, {R2, R3, R9, R12}
B mixing_end_func
check_synth_type:
SUBS R12, R12, #1 @ remove #1 from the synth type byte and check if it's #0
BNE synth_type_2 @ if it still isn't it's synth type 2 (smooth pan flute)
MOV R6, #0x300 @ R6 = 0x300
MOV R11, R11, LSR#1 @ halve the volume
BIC R11, R11, #0xFF00 @ clear bad bits from division
MOV R12, #0x70 @ R12 = 0x70
synth_type_1_loop:
LDMIA R5, {R0, R1, R10, LR} @ load 4 samples from memory
ADDS R7, R7, R4, LSL#3 @ Block #1 (some oscillator type code)
RSB R9, R12, R7, LSR#24
MOV R6, R7, LSL#1
SUB R9, R9, R6, LSR#27
ADDS R2, R9, R2, ASR#1
MLANE R0, R11, R2, R0
ADDS R7, R7, R4, LSL#3 @ Block #2
RSB R9, R12, R7, LSR#24
MOV R6, R7, LSL#1
SUB R9, R9, R6, LSR#27
ADDS R2, R9, R2, ASR#1
MLANE R1, R11, R2, R1
ADDS R7, R7, R4, LSL#3 @ Block #3
RSB R9, R12, R7, LSR#24
MOV R6, R7, LSL#1
SUB R9, R9, R6, LSR#27
ADDS R2, R9, R2, ASR#1
MLANE R10, R11, R2, R10
ADDS R7, R7, R4, LSL#3 @ Block #4
RSB R9, R12, R7, LSR#24
MOV R6, R7, LSL#1
SUB R9, R9, R6, LSR#27
ADDS R2, R9, R2, ASR#1
MLANE LR, R11, R2, LR
STMIA R5!, {R0, R1, R10, LR}
SUBS R8, R8, #4
BGT synth_type_1_loop
B mixing_end_func @ goto end
synth_type_2:
MOV R6, #0x80 @ write base values to the registers
MOV R12, #0x180
synth_type_2_loop:
LDMIA R5, {R0, R1, R10, LR} @ load samples from work buffer
ADDS R7, R7, R4, LSL#3 @ Block #1
RSBPL R9, R6, R7, ASR#23
SUBMI R9, R12, R7, LSR#23
MLA R0, R11, R9, R0
ADDS R7, R7, R4, LSL#3 @ Block #2
RSBPL R9, R6, R7, ASR#23
SUBMI R9, R12, R7, LSR#23
MLA R1, R11, R9, R1
ADDS R7, R7, R4, LSL#3 @ Block #3
RSBPL R9, R6, R7, ASR#23
SUBMI R9, R12, R7, LSR#23
MLA R10, R11, R9, R10
ADDS R7, R7, R4, LSL#3 @ Block #4
RSBPL R9, R6, R7, ASR#23
SUBMI R9, R12, R7, LSR#23
MLA LR, R11, R9, LR
STMIA R5!, {R0, R1, R10, LR} @ store the samples back to the buffer
SUBS R8, R8, #4 @ subtract #4 from the remainging samples
BGT synth_type_2_loop
B mixing_end_func
@****************** SPECIAL MIXING ******************@
.if ENABLE_DECOMPRESSION==1
special_mixing: @ $03006BF8
LDR R6, [R4, #CHN_WAVE_OFFSET] @ load the wave header offset to R6
LDRB R0, [R4]
TST R0, #FLAG_CHN_COMP @ check if the channel is initialized
BNE setup_compressed_mixing_frequency @ skip the setup procedure if it's running in compressed mode already
ORR R0, R0, #FLAG_CHN_COMP @ enable the flag in the channel status
STRB R0, [R4] @ store the channel status
LDRB R0, [R4, #CHN_MODE] @ load the channel mode byte
TST R0, #MODE_REVERSE @ check if reverse mode is not enabled
BEQ determine_compression @ if Reverse Mode isn't enabled we can directly check if the sample has to get decoded
LDR R1, [R6, #WAVE_LENGTH] @ load the amount of samples
ADD R1, R1, R6, LSL#1 @ do some start position calculation (???)
ADD R1, R1, #0x20
SUB R3, R1, R3
STR R3, [R4, #CHN_POSITION_ABS] @ store the final seek position
determine_compression:
LDRH R0, [R6] @ load the compression flag from the sample header
CMP R0, #0 @ check if the compression is not enabled
BEQ setup_compressed_mixing_frequency @ skip the compression handler
SUB R3, R3, R6 @ calc initial position
SUB R3, R3, #0x10
STR R3, [R4, #CHN_POSITION_ABS] @ store the inital position (relative, not absolute)
setup_compressed_mixing_frequency:
STMFD SP!, {R4, R9, R12}
MOVS R11, R11, LSR#1 @ divide master volume by 2
ADC R11, R11, #0x8000
BIC R11, R11, #0xFF00
LDR R7, [R4, #CHN_FINE_POSITION] @ load the fine position
LDR R1, [R4, #CHN_FREQUENCY] @ load the channel frequency
LDRB R0, [R4, #CHN_MODE] @ load the channel mode again
TST R0, #MODE_FIXED_FREQ @ check if fixed frequency mode is enabled
MOVNE R1, #0x800000 @ ### SAMPLE STEP FREQUENCY CHANGED TO R7
MULEQ R1, R12, R1 @ default rate factor * frequency = sample steps
ADD R5, R5, R8, LSL#2 @ set the buffer pointer to the end of the channel
LDRH R0, [R6] @ load the codec type
CMP R0, #0 @ check if compression is disabled
BEQ uncompressed_mixing_reverse_check
MOV R0, #0xFF000000 @ set the current decoding block to "something very high" so that the first block always gets decoded
STR R0, [R4, #CHN_BLOCK_COUNT] @ write the last decoded block into the channel vars
LDRB R0, [R4, #CHN_MODE] @ check again if reverse mode is enabled
TST R0, #MODE_REVERSE @ test if reverse mode is enabled
BNE compressed_mixing_reverse_init @ check again of reverse mixing is enabled
BL bdpcm_decoder @ load a sample from the stream to R12
MOV R6, R12 @ move the base sample to R6
ADD R3, R3, #1 @ increase stream position by #1
BL bdpcm_decoder @ load the delta sample and calculate delta value
SUB R12, R12, R6
@***** MIXING LOOP REGISTER USAGE ***********@
@ R0: Sample to modify from buffer
@ R1: sample steps (MOVED FROM R4)
@ R2: remaining samples before loop/end
@ R3: sample position
@ R4: channel pointer
@ R5: pointer to the end of buffer
@ R6: Base sample
@ R7: fine position
@ R8: remaining samples for current buffer
@ R9: interpolated sample
@ R10: not used
@ R11: volume
@ R12: Delta Sample
@ LR: not used
@********************************************@
compressed_mixing_loop:
MUL R9, R7, R12 @ delta sample * fine position = interpolated DELTA
MOV R9, R9, ASR#22 @ scale down the sample
ADDS R9, R9, R6, LSL#1 @ double the base sample and add it to the interpolated downscaled DELTA
LDRNE R0, [R5, -R8, LSL#2] @ if the sample is NOT 0 load the sample from buffer and store the calulated value
MLANE R0, R11, R9, R0 @ add the sample to the buffer sample and apply volume
STRNE R0, [R5, -R8, LSL#2] @ store the sample if it's not Zero
ADD R7, R7, R1 @ ### changed from R4 to R1
MOVS R9, R7, LSR#23 @ check if there is new samples to load
BEQ compressed_mixing_load_skip @ no new samples need to be loaded
SUBS R2, R2, R7, LSR#23 @ remove the sample overflow from the remaining samples
BLLE loop_end_sub @ call the loop/ending handler if the countdown reached zero or something negative
SUBS R9, R9, #1 @ check if only one sample has to get loaded
ADDEQ R6, R12, R6 @ if this is the case we can calculate the new base sample
BEQ compressed_mixing_base_load_skip
ADD R3, R3, R9 @ these opcodes are equivalent to LDRNESB R6, [R3, R9]!
BL bdpcm_decoder
MOV R6, R12
compressed_mixing_base_load_skip:
ADD R3, R3, #1 @ equivalent to LDRSB R12, [R3, #1]!
BL bdpcm_decoder
SUB R12, R12, R6
BIC R7, R7, #0x3F800000 @ clear the overflow bits by using the according bitmask
compressed_mixing_load_skip:
SUBS R8, R8, #1 @ remove #1 from the remaining samples
BGT compressed_mixing_loop
@SUB R3, R3, #1 @ sample pointer -1 (???); ALREADY DONE BY mixing_end_func
B mixing_end_func
compressed_mixing_reverse_init:
SUB R3, R3, #1 @ subtract one from the reverse playback location initially
BL bdpcm_decoder @ fetch a sample from stream
MOV R6, R12 @ bdpcm_decoder returns base sample in R12 --> R6
SUB R3, R3, #1 @ seek one sample further backwards
BL bdpcm_decoder @ detch the DELTA sample
SUB R12, R12, R6 @ calc the Delta value
compressed_mixing_reverse_loop:
MUL R9, R7, R12 @ delta sample * fine position = interpolated DELTA
MOV R9, R9, ASR#22 @ scale down the sample
ADDS R9, R9, R6, LSL#1 @ double the base sample and add it to the interpolated downscaled DELTA
LDRNE R0, [R5, -R8, LSL#2] @ if the sample is NOT 0 load the sample from buffer and store the calulated value
MLANE R0, R11, R9, R0 @ add the sample to the buffer sample and apply volume
STRNE R0, [R5, -R8, LSL#2] @ store the sample if it's not Zero
ADD R7, R7, R1 @ ### changed from R4 to R1
MOVS R9, R7, LSR#23 @ check if there is new samples to load
BEQ compressed_mixing_reverse_load_skip @ skip sample loading if we don't need to load new samples from ROM
SUBS R2, R2, R7, LSR#23 @ remove the overflowed samples from the remaining samples
BLLE loop_end_sub @ if the sample playback finished go to end handler
SUBS R9, R9, #1 @ remove sample overflow count by #1
ADDEQ R6, R12, R6 @ make the previous delta sample the new base sample if only #1 sample needs to get loaded
BEQ compressed_mixing_reverse_base_load_skip @skip base sample loading
SUB R3, R3, R9 @
BL bdpcm_decoder @
MOV R6, R12 @
compressed_mixing_reverse_base_load_skip:
SUB R3, R3, #1
BL bdpcm_decoder
SUB R12, R12, R6 @ load next samples???
BIC R7, R7, #0x3F800000 @ clear overflow bits
compressed_mixing_reverse_load_skip:
SUBS R8, R8, #1
BGT compressed_mixing_reverse_loop
@ADD R3, R3, #2 @ ???, copied from original code
ADD R3, R3, #3
B mixing_end_func
uncompressed_mixing_reverse_check:
LDRB R0, [R4, #1] @ load the channel mode =$03006D84
TST R0, #MODE_REVERSE @ check if reverse mode is even enabled
BEQ mixing_end_func @ skip the channel if the mode is "akward"
LDRSB R6, [R3, #-1]! @ load first negative sample
LDRSB R12, [R3, #-1] @ load the DELTA sample
SUB R12, R12, R6 @ calculate DELTA
reverse_mixing_loop:
MUL R9, R7, R12 @ delta sample * fine position = interpolated DELTA
MOV R9, R9, ASR#22 @ scale down the sample
ADDS R9, R9, R6, LSL#1 @ double the base sample and add it to the interpolated downscaled DELTA
LDRNE R0, [R5, -R8, LSL#2] @ if the sample is NOT 0 load the sample from buffer and store the calulated value
MLANE R0, R11, R9, R0 @ add the sample to the buffer sample and apply volume
STRNE R0, [R5, -R8, LSL#2] @ store the sample if it's not Zero
ADD R7, R7, R1 @ ### changed from R4 to R1
MOVS R9, R7, LSR#23 @ check if there is new samples to load
BEQ reverse_mixing_load_skip
SUBS R2, R2, R7, LSR#23 @ blablabla, all same as above
BLLE loop_end_sub
MOVS R9, R9 @ check if sample
ADDEQ R6, R12, R6
LDRNESB R6, [R3, -R9]!
LDRSB R12, [R3, #-1] @ load samples dependent on conditions
SUB R12, R12, R6
BIC R7, R7, #0x3F800000 @ cut off overflow count to get new fine position
reverse_mixing_load_skip:
SUBS R8, R8, #1 @ remaining samples -1
BGT reverse_mixing_loop @ continue lopo if there is still samples to process
@ADD R3, R3, #1 @ copied from original code (???)
ADD R3, R3, #2 @ =$03006DE8
B mixing_end_func
@**************** SPECIAL MIXING END ****************@
@************** SPECIAL MIXING LOOPING **************@
compressed_loop_end_sub:
@************ SPECIAL MIXING LOOPING END ************@
@****************** BDPCM DEOCODER ******************@
bdpcm_decoder: @ RETURNS SAMPLE FROM POSITION XXX in R12
STMFD SP!, {R0, R2, R5-R7, LR} @ push registers to make them free to use: R0, R2, R5, R6, R7, LR
MOV R0, R3, LSR#6 @ shift the relative position over to clip of every but the block offset
LDR R12, [R4, #CHN_BLOCK_COUNT] @ check if the current sample position is at the beginning of the current block
CMP R0, R12
BEQ bdpcm_decoder_return
STR R0, [R4, #CHN_BLOCK_COUNT] @ store the block position to Channel Vars
MOV R12, #0x21 @ load decoding byte count to R1 (1 Block = 0x21 Bytes)
MUL R2, R12, R0 @ multiply the block count with the block length to calc actual byte position of current block
LDR R12, [R4, #CHN_WAVE_OFFSET] @ load the wave data offset to R1
ADD R2, R2, R12 @ add the wave data offset and 0x10 to get the actual position in ROM
ADD R2, R2, #0x10 @
LDR R5, decoder_buffer @ load the decoder buffer pointer to R5
ADR R6, delta_lookup_table @ load the lookup table pointer to R6
MOV R7, #0x40 @ load the block sample count (0x40) to R7
LDRB LR, [R2], #1 @ load the first byte & sample from the wave data to LR (each block starts with a signed 8 bit pcm sample) LDRSB not necessary due to the 24 high bits being cut off anyway
STRB LR, [R5], #1 @ write the sample to the decoder buffer
LDRB R12, [R2], #1 @ load the next 2 samples to R1 (to get decoded) --- LSBits is decoded first and MSBits last
B bdpcm_decoder_lsb
bdpcm_decoder_msb:
LDRB R12, [R2], #1 @ load the next 2 samples to get decoded
MOV R0, R12, LSR#4 @ seperate the current samples' bits
LDRSB R0, [R6, R0] @ load the differential value from the lookup table
ADD LR, LR, R0 @ add the decoded value to the previous sample value to calc the current samples' level
STRB LR, [R5], #1 @ write the output sample to the decoder buffer and increment buffer pointer
bdpcm_decoder_lsb:
AND R0, R12, #0xF @ seperate the 4 LSBits
LDRSB R0, [R6, R0] @ but the 4 bit value into the lookup table and save the result to R0
ADD LR, LR, R0 @ add the value from the lookup table to the previous value to calc the new one
STRB LR, [R5], #1 @ store the decoded sample to the decoding buffer
SUBS R7, R7, #2 @ decrease the block sample counter by 2 (2 samples each byte) and check if it is still above 0
BGT bdpcm_decoder_msb @ if there is still samples to decode jump to the MSBits
bdpcm_decoder_return:
LDR R5, decoder_buffer @ reload the decompressor buffer offset to R5
AND R0, R3, #0x3F @ cut off the main position bits to read data from short buffer
LDRSB R12, [R5, R0] @ read the decoded sample from buffer
LDMFD SP!, {R0, R2, R5-R7, PC} @ pop registers and return to the compressed sample mixer
@**************** END BDPCM DECODER *****************@
decoder_buffer:
.word decoder_buffer_target
delta_lookup_table:
.byte 0x0, 0x1, 0x4, 0x9, 0x10, 0x19, 0x24, 0x31, 0xC0, 0xCF, 0xDC, 0xE7, 0xF0, 0xF7, 0xFC, 0xFF
.endif
main_mixer_end:
.end