//////////////////////////////////////////////////////////
// CPU routines

#include <string.h>
#include <math.h>

#include "vb_io.h"
#include "v810_opt.h"
#include "v810_cpu.h"
#include "vb_vbt.h"
#include "vb_dsp.h"

#ifdef VB_DEBUGGER
#include "v810_cpuD.h"
#endif

//Options...
#include "vb_set.h"

////////////////////////////////////////////////////////////
// Globals
VB_WORD P_REG[32];  // Program registers pr0-pr31
VB_WORD S_REG[32];  // System registers sr0-sr31
VB_WORD PC;         // Program Counter

const VB_BYTE opcycle[0x50] = {
	0x01,0x01,0x01,0x01,0x01,0x01,0x03,0x01,0x0D,0x26,0x0D,0x24,0x01,0x01,0x01,0x01,
	0x01,0x01,0x01,0x01,0x01,0x01,0x03,0x01,0x0F,0x0A,0x05,0x00,0x01,0x01,0x03,0x00, //CLI, HALT, LDSR, STSR, SEI -- Unknown clocks
	0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x01,0x03,0x03,0x01,0x01,0x01,0x01,
	0x01,0x01,0x0D,0x01,0x01,0x01,0x00,0x01,0x03,0x03,0x1A,0x05,0x01,0x01,0x00,0x01, //these are based on 16-bit bus!! (should be 32-bit?)
	0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01
};

typedef struct
{
	unsigned char BLK_DATA[8];
} ICH_BLK;
VB_WORD ICHT[128]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; //Cache Tag Memory (initialized to 0 to make all tags invalid)
ICH_BLK ICHD[128]; //Cache Data Memory
/*VB_INT64U*/ VB_WORD cache_hit_cnt=0;
/*VB_INT64U*/ VB_WORD cache_miss_cnt=0;
/*VB_INT64U*/ VB_WORD total_inst=0;

unsigned int play_breakpnt_en=0;
unsigned int play_breakpnt=0;
unsigned int mem_watchpnt_en=0;
unsigned int mem_watchpnt=0;

//////////////////////////////////////////////////////

// Reinitialize the defaults in the CPU
void v810_reset() {
    P_REG[0]      =  0x00000000;
    PC            =  0xFFFFFFF0;
    S_REG[ECR]    =  0x0000FFF0;
    S_REG[PSW]    =  0x00008000;
    S_REG[PIR]    =  0x00005346;
    S_REG[TKCW]   =  0x000000E0;
}

//could be done a lot better, later maby!
int v810_trc(int dbg_trc) {
    int lowB, highB, lowB2, highB2;             // up to 4 bytes for instruction (either 16 or 32 bits)
    static int opcode;
	int arg1 = 0;
	int arg2 = 0;
	int arg3 = 0;
    int tmp2;
    //int j;
    int flags = 0;
    VB_INT64 temp = 0;
    VB_INT64U tempu = 0;
    int val = 0;
    VB_WORD msb = 0;
    int tCount = 0;
	static unsigned int clocks;
	static int lastop,lastclock;
#ifdef VB_DEBUGGER
	unsigned int Cache_Offset, Cache_Subblock, Cache_Index, Cache_Tag;
#endif
	VB_WORD addr;
	VB_HWORD h_data;
	VB_BYTE b_data;

	for(;;) {

		serviceInt(clocks);
		if (serviceDisplayInt(clocks)) return 0; //serviceDisplayInt() returns with 1 when the screen needs to be redrawn

        PC = (PC&0x07FFFFFE);

#ifdef VB_DEBUGGER
        // Interactive Dissasemble, remove in release...
        if(tVBOpt.DISASM) { //turn on and off
            v810_addDasm(PC);
        }

        //Special Stack Trace (of sourts), remove in release
        v810_add_queue(PC); // Circular queue of last 100 instructions

		if ((play_breakpnt_en)&&(PC==play_breakpnt))
		{
			vb_printf("\nBreakpoint waiting, exit to debugger");
			play_breakpnt_en=0;
			return 1;
		}
#endif
		if ((PC>>24) >= 0x07) { //ROM
			PC     = (PC & V810_ROM1.highaddr);
			lowB   = ((VB_BYTE *)(V810_ROM1.off + PC))[0];
			highB  = ((VB_BYTE *)(V810_ROM1.off + PC))[1];
			lowB2  = ((VB_BYTE *)(V810_ROM1.off + PC))[2];
			highB2 = ((VB_BYTE *)(V810_ROM1.off + PC))[3];
		}
		else if ((PC>>24) == 0x05) { //RAM
			PC     = (PC & V810_VB_RAM.highaddr);
			lowB   = ((VB_BYTE *)(V810_VB_RAM.off + PC))[0];
			highB  = ((VB_BYTE *)(V810_VB_RAM.off + PC))[1];
			lowB2  = ((VB_BYTE *)(V810_VB_RAM.off + PC))[2];
			highB2 = ((VB_BYTE *)(V810_VB_RAM.off + PC))[3];
		}
		else {
			vb_log_msg(10,"\nInvalid PC - %08X",PC);
#ifdef VB_DEBUGGER
			v810_dump_queue();
#endif
			return 1;
		}
#ifdef VB_DEBUGGER
		//some cache stuff, not actually used for execution, but for profiling cache use (could be used for proper wait states for cache usage)
		total_inst++;
		if (S_REG[CHCW]&0x02) //cache enabled
		{
			Cache_Offset=(PC&0x07);
			Cache_Subblock=Cache_Offset>>2;
			Cache_Index=((PC>>3)&0x7F);
			Cache_Tag=((PC>>10)&0x3FFFFF);
			if ((ICHT[Cache_Index]&0x3FFFFF)!=Cache_Tag) //cache miss if tag differs, write cache and set tag
			{
				//assuming even 16 bit instructions must fill the full 32 bit subblock (no valid bit for half subblock)
				ICHD[Cache_Index].BLK_DATA[(Cache_Subblock<<2)+0]=lowB;
				ICHD[Cache_Index].BLK_DATA[(Cache_Subblock<<2)+1]=highB;
				ICHD[Cache_Index].BLK_DATA[(Cache_Subblock<<2)+2]=lowB2;
				ICHD[Cache_Index].BLK_DATA[(Cache_Subblock<<2)+3]=highB2;
				ICHT[Cache_Index]=((1<<(22+Cache_Subblock))|Cache_Tag); //set tag
				cache_miss_cnt++;
			}
			else if (!(ICHT[Cache_Index]&(1<<(22+Cache_Subblock)))) //or if valid bit not set, write cache and set valid bit
			{
				//assuming even 16 bit instructions must fill the full 32 bit subblock (no valid bit for half subblock)
				ICHD[Cache_Index].BLK_DATA[(Cache_Subblock<<2)+0]=lowB;
				ICHD[Cache_Index].BLK_DATA[(Cache_Subblock<<2)+1]=highB;
				ICHD[Cache_Index].BLK_DATA[(Cache_Subblock<<2)+2]=lowB2;
				ICHD[Cache_Index].BLK_DATA[(Cache_Subblock<<2)+3]=highB2;
				ICHT[Cache_Index]|=(1<<(22+Cache_Subblock)); //set valid bit
				cache_miss_cnt++;
			}
			else //cache hit
			{
				cache_hit_cnt++;
			}
		}
#endif

        P_REG[0]=0; //Zero the Zero Reg!!!

		tmp2 = ((PC&0x00FFFFFF)>>1);

		if ((opcode >0) && (opcode < 0x50)) { //hooray for instruction cache! (cache only if last opcode was not bad!)
			lastop = opcode;
			lastclock = opcycle[opcode];
		}

        opcode = highB >> 2;
        if((highB & 0xE0) == 0x80)        // Special opcode format for          
            opcode = (highB >> 1);    // type III instructions.

        if((opcode > 0x4F) || (opcode < 0)) {
            //Error Invalid opcode!
            vb_log_msg(10,"\n%08lx\t\t%2x %2x  ;Invalid Opcode", PC, lowB, highB);
			return 1;
        }

        clocks += opcycle[opcode];
		//decode opcode and arguments form packed instruction
        switch(addr_mode[opcode]) {
          case AM_I:       // Do the same Ither way =)
          case AM_II:
            arg1 = (lowB & 0x1F);
            arg2 = (lowB >> 5) + ((highB & 0x3) << 3);
            PC += 2;   // 16 bit instruction
            break;

          case AM_III:
            arg1 = ((highB & 0x1) << 8) + (lowB & 0xFE);
            break;

          case AM_IV:
            arg1 = ((highB & 0x3) << 24) + (lowB << 16) + (highB2 << 8) + lowB2;
            break;

          case AM_V:       
            arg3 = (lowB >> 5) + ((highB & 0x3) << 3);
            arg2 = (lowB & 0x1F);
            arg1 = (highB2 << 8) + lowB2;
            PC += 4;   // 32 bit instruction
            break;

          case AM_VIa:  // Mode6 form1
            arg1 = (highB2 << 8) + lowB2;
            arg2 = (lowB & 0x1F);
            arg3 = (lowB >> 5) + ((highB & 0x3) << 3);
            PC += 4;   // 32 bit instruction
            break;

          case AM_VIb:  // Mode6 form2
            arg1 = (lowB >> 5) + ((highB & 0x3) << 3);
            arg2 = (highB2 << 8) + lowB2;                              //  whats the order??? 2,3,1 or 1,3,2
            arg3 = (lowB & 0x1F);
            PC += 4;   // 32 bit instruction
            break;

          case AM_VII:   // Unhandled
            vb_log_msg(6,"\n%08lx\t\t%2x %2x %2x %2x", PC, lowB, highB, lowB2, highB2);
            PC +=4; // 32 bit instruction
            break;

          case AM_VIII:  // Unhandled
            vb_log_msg(6,"\n%08lx\t%\t2x %2x %2x %2x", PC, lowB, highB, lowB2, highB2);
            PC += 4;   // 32 bit instruction
            break;

          case AM_IX:
            arg1 = (lowB & 0x1); // Mode ID, Ignore for now
            PC += 2;   // 16 bit instruction
            break;

          case AM_BSTR:  // Bit String Subopcodes
            arg1 = (lowB >> 5) + ((highB & 0x3) << 3);
            arg2 = (lowB & 0x1F);
            PC += 2;   // 16 bit instruction
            break;

          case AM_FPP:   // Floating Point Subcode
            arg1 = (lowB >> 5) + ((highB & 0x3) << 3);
            arg2 = (lowB & 0x1F);
            arg3 = ((highB2 >> 2)&0x3F);
            PC += 4;   // 32 bit instruction
            break;

          case AM_UDEF:  // Invalid opcode.
          default:           // Invalid opcode.
            vb_log_msg(6,"\n%08lx\t\t%2x %2x  ;Invalid Opcode", PC, lowB, highB);
            PC += 2;                                                
            break;
        }

		//process opcode & set flags
        switch(opcode) {
          case MOV:
            P_REG[arg2] = P_REG[arg1];
            break;

          case ADD:
            flags = 0;
            temp = P_REG[arg2] + P_REG[arg1];
            // Set Flags
            if ((long)temp == 0) flags = flags | PSW_Z;
            if ((long)temp & 0x80000000)  flags = flags | PSW_S;
			if (temp < P_REG[arg2]) flags = flags | PSW_CY;
            if(((P_REG[arg2]^(~P_REG[arg1]))&(P_REG[arg2]^temp))&0x80000000) flags = flags | PSW_OV;

            S_REG[PSW] = (S_REG[PSW] & 0xFFFFFFF0)|flags;
            P_REG[arg2] = (long)temp;
            break;

          case SUB:
            flags = 0;
			temp = (VB_INT64)((VB_INT64U)(P_REG[arg2])-(VB_INT64U)(P_REG[arg1]));
            // Set Flags
            if ((long)temp == 0) flags = flags | PSW_Z;
            if ((long)temp & 0x80000000)  flags = flags | PSW_S;
            if(((P_REG[arg2]^P_REG[arg1])&(P_REG[arg2]^temp))&0x80000000) flags = flags | PSW_OV;
			if ((VB_INT64U)(temp) >> 32) flags = flags | PSW_CY;

            S_REG[PSW] = (S_REG[PSW] & 0xFFFFFFF0)|flags;
            P_REG[arg2] = (long)temp;
            break;

          case CMP:
            flags = 0;
			temp = (VB_INT64)((VB_INT64U)(P_REG[arg2])-(VB_INT64U)(P_REG[arg1]));
            // Set Flags
            if ((long)temp == 0) flags = flags | PSW_Z;
            if ((long)temp & 0x80000000)  flags = flags | PSW_S;
            if(((P_REG[arg2]^P_REG[arg1])&(P_REG[arg2]^temp))&0x80000000)
				flags = flags | PSW_OV;
			if ((VB_INT64U)(temp) >> 32) flags = flags | PSW_CY;
            S_REG[PSW] = (S_REG[PSW] & 0xFFFFFFF0)|flags;
            break;

          case SHL:
            flags = 0;
            val = P_REG[arg1] & 0x1F;
            // set CY before we destroy the regisrer info....
            if((val != 0)&&((P_REG[arg2] >> (32 - val))&0x01)) flags = flags | PSW_CY;
            P_REG[arg2] = P_REG[arg2] << val;
            // Set Flags
            if (P_REG[arg2] == 0) flags = flags | PSW_Z;
            if (P_REG[arg2] & 0x80000000)  flags = flags | PSW_S;
            S_REG[PSW] = (S_REG[PSW] & 0xFFFFFFF0)|flags;
            break;
			
          case SHR:
            flags = 0; 
            val = P_REG[arg1] & 0x1F;
            // set CY before we destroy the regisrer info....
			if ((val) && ((P_REG[arg2] >> (val-1))&0x01)) flags = flags | PSW_CY;
            P_REG[arg2] = P_REG[arg2] >> val;
            // Set Flags
            if (P_REG[arg2] == 0) flags = flags | PSW_Z;
            if (P_REG[arg2] & 0x80000000)  flags = flags | PSW_S;
            S_REG[PSW] = (S_REG[PSW] & 0xFFFFFFF0)|flags;
            break;

          case JMP:
            PC = (P_REG[arg1] & 0xFFFFFFFE);
            break;

          case SAR:
            flags = 0;
            val = P_REG[arg1] & 0x1F;
            msb = P_REG[arg2] & 0x80000000; // Grab the MSB

			//carry is last bit shifted out
			//if( (val) && ((P_REG[arg2]>>(val-1))&0x01) )
			//(this should be checked if compiled with a different compiler, as a right shift of a signed number in C isn't explicitly required to be the same as an arithmetic shift)
			if( (val) && ((((long)P_REG[arg2])>>(val-1))&0x01) ) //needs to be arithmetic shift, so cast as signed
				flags = flags | PSW_CY;

            //for(j = 0; j < val; j++)
            //    P_REG[arg2] = (P_REG[arg2] >> 1)|msb; // Apend the MSB to the end
			P_REG[arg2] = ((long)P_REG[arg2]) >> val;
            
            // Set Flags
            if (P_REG[arg2] & 0x80000000)  flags = flags | PSW_S;
            if (!P_REG[arg2]) flags = flags | PSW_Z;
            S_REG[PSW] = (S_REG[PSW] & 0xFFFFFFF0)|flags;
            break;

          case MUL:
            flags=0;
            temp = (VB_INT64)(long)P_REG[arg1] * (VB_INT64)(long)P_REG[arg2]; //need to cast signed before casting to 64 bit to get sign extension
            P_REG[30]   = (long)(temp >> 32);
			P_REG[arg2] = (long)temp;

            // Set Flags
			if (temp != sign_32((long)temp)) flags = flags | PSW_OV;
            if ((long)temp == 0) flags = flags | PSW_Z;
            if ((long)temp & 0x80000000)  flags = flags | PSW_S;
            S_REG[PSW] = (S_REG[PSW] & (0xFFFFFFF0|PSW_CY))|flags;

            break;

          case DIV:
            flags = 0;
            if((long)P_REG[arg1] == 0) { // Div by zero error
                // Generate exception!
				v810_exp(8, 0xFF80);
            } else {
                if((P_REG[arg2]==0x80000000)&&(P_REG[arg1]==0xFFFFFFFF)) { //manual wording is unclear on this, should be checked w/ hardware (-2B/-1 shouldn't be negative, it should be +2B-1 if saturated like the manual seems to say)
					flags = flags |PSW_OV;
					P_REG[30]=0;
					P_REG[arg2] = 0x7FFFFFFF;//0x80000000;
				} else {
					temp        = (long)P_REG[arg2] % (long)P_REG[arg1];
					P_REG[arg2] = (long)P_REG[arg2] / (long)P_REG[arg1];
					if (arg2 != 30) P_REG[30] = (long)temp;
				}

				// Set Flags
				if (P_REG[arg2] == 0) flags = flags | PSW_Z;
				if (P_REG[arg2] & 0x80000000)  flags = flags | PSW_S;
				S_REG[PSW] = (S_REG[PSW] & (0xFFFFFFF0|PSW_CY))|flags; 
            }
            break;

          case MULU:
            flags = 0;
            tempu = (VB_INT64U)P_REG[arg1] * (VB_INT64U)P_REG[arg2];
            P_REG[30]   = (VB_WORD)(tempu >> 32);
			P_REG[arg2] = (VB_WORD)tempu;

            // Set Flags
			if (tempu != (VB_WORD)tempu) flags = flags | PSW_OV;
            if ((VB_WORD)tempu == 0) flags = flags | PSW_Z;
            if ((VB_WORD)tempu & 0x80000000)  flags = flags | PSW_S; //Does unsigned multiply set this?  The manual specifically states that DIVU does, guessing MULU does too?
            S_REG[PSW] = (S_REG[PSW] & (0xFFFFFFF0|PSW_CY))|flags;
            break;

          case DIVU:
            flags = 0;
            if(P_REG[arg1] == 0) { // Div by zero error
                // Generate exception!
				v810_exp(8, 0xFF80);
            } else {
				temp        = (VB_WORD)P_REG[arg2] % (VB_WORD)P_REG[arg1];
                P_REG[arg2] = (VB_WORD)P_REG[arg2] / (VB_WORD)P_REG[arg1];
				if (arg2 != 30) P_REG[30] = (VB_WORD)temp;
                // Set Flags
                if (P_REG[arg2] == 0) flags = flags | PSW_Z;
                if (P_REG[arg2] & 0x80000000)  flags = flags | PSW_S;
                S_REG[PSW] = (S_REG[PSW] & (0xFFFFFFF0|PSW_CY))|flags;
            }
            break;

          case OR:
            flags = 0;
            P_REG[arg2] = P_REG[arg1] | P_REG[arg2];
            // Set Flags
            if (P_REG[arg2] == 0) flags = flags | PSW_Z;
            if (P_REG[arg2] & 0x80000000)  flags = flags | PSW_S;
            S_REG[PSW] = (S_REG[PSW] & (0xFFFFFFF0|PSW_CY))|flags;
            break;

          case AND:
            flags = 0;
            P_REG[arg2] = P_REG[arg1] & P_REG[arg2];
            // Set Flags
            if (P_REG[arg2] == 0) flags = flags | PSW_Z;
            if (P_REG[arg2] & 0x80000000)  flags = flags | PSW_S;
            S_REG[PSW] = (S_REG[PSW] & (0xFFFFFFF0|PSW_CY))|flags;
            break;

          case XOR:
            flags = 0;
            P_REG[arg2] = P_REG[arg1] ^ P_REG[arg2];
            // Set Flags
            if (P_REG[arg2] == 0) flags = flags | PSW_Z;
            if (P_REG[arg2] & 0x80000000)  flags = flags | PSW_S;
            S_REG[PSW] = (S_REG[PSW] & (0xFFFFFFF0|PSW_CY))|flags;
            break;

          case NOT:
            flags = 0;
			P_REG[arg2] = ~P_REG[arg1];
            // Set Flags
            if (P_REG[arg2] == 0) flags = flags | PSW_Z;
            if (P_REG[arg2] & 0x80000000)  flags = flags | PSW_S;
            S_REG[PSW] = (S_REG[PSW] & (0xFFFFFFF0|PSW_CY))|flags;
            break;

          case MOV_I:
            P_REG[arg2] = sign_5(arg1);
            break;

          case ADD_I:
            flags = 0;
            temp = P_REG[arg2] + sign_5(arg1);
            // Set Flags
            if ((long)temp == 0) flags = flags | PSW_Z;
            if ((long)temp & 0x80000000)  flags = flags | PSW_S;
            if(((P_REG[arg2]^(~sign_5(arg1)))&(P_REG[arg2]^temp))&0x80000000) flags = flags | PSW_OV;
			if (temp < P_REG[arg2]) flags = flags | PSW_CY;

            S_REG[PSW] = (S_REG[PSW] & 0xFFFFFFF0)|flags;
            P_REG[arg2] = (VB_WORD)temp;
            break;

          case SETF:
			//SETF may contain bugs
			P_REG[arg2] = 0;
			switch (arg1 & 0x0F) {
				case COND_V:
					if (S_REG[PSW] & PSW_OV) P_REG[arg2] = 1;
					break;
				case COND_C:
					if (S_REG[PSW] & PSW_CY) P_REG[arg2] = 1;
					break;
				case COND_Z:
					if (S_REG[PSW] & PSW_Z) P_REG[arg2] = 1;
					break;
				case COND_NH:
					if (S_REG[PSW] & (PSW_CY|PSW_Z)) P_REG[arg2] = 1;
					break;
				case COND_S:
					if (S_REG[PSW] & PSW_S) P_REG[arg2] = 1;
					break;
				case COND_T:
					P_REG[arg2] = 1;
					break;
				case COND_LT:
					if ((!!(S_REG[PSW]&PSW_S))^(!!(S_REG[PSW]&PSW_OV))) P_REG[arg2] = 1;
					break;
				case COND_LE:
					if (((!!(S_REG[PSW]&PSW_S))^(!!(S_REG[PSW]&PSW_OV)))|(S_REG[PSW]&PSW_Z)) P_REG[arg2] = 1;
					break;
				case COND_NV:
					if (!(S_REG[PSW] & PSW_OV)) P_REG[arg2] = 1;
					break;
				case COND_NC:
					if (!(S_REG[PSW] & PSW_CY)) P_REG[arg2] = 1;
					break;
				case COND_NZ:
					if (!(S_REG[PSW] & PSW_Z)) P_REG[arg2] = 1;
					break;
				case COND_H:
					if (!(S_REG[PSW] & (PSW_CY|PSW_Z))) P_REG[arg2] = 1;
					break;
				case COND_NS:
					if (!(S_REG[PSW] & PSW_S)) P_REG[arg2] = 1;
					break;
				case COND_F:
					//always false! do nothing more
					break;
				case COND_GE:
					if (!((!!(S_REG[PSW]&PSW_S))^(!!(S_REG[PSW]&PSW_OV)))) P_REG[arg2] = 1;
					break;
				case COND_GT:
					if (!(((!!(S_REG[PSW]&PSW_S))^(!!(S_REG[PSW]&PSW_OV)))|(S_REG[PSW]&PSW_Z))) P_REG[arg2] = 1;
					break;
			}
			break;

          case CMP_I:
            flags = 0;
			temp = (VB_INT64)((VB_INT64U)(P_REG[arg2])-(VB_INT64U)(sign_5(arg1)));

            if ((long)temp == 0) flags = flags | PSW_Z;
            if ((long)temp & 0x80000000)  flags = flags | PSW_S;
            if(((P_REG[arg2]^(sign_5(arg1)))&(P_REG[arg2]^temp))&0x80000000) flags = flags | PSW_OV;
			if ((VB_INT64U)(temp) >> 32) flags = flags | PSW_CY;

            S_REG[PSW] = (S_REG[PSW] & 0xFFFFFFF0)|flags;
            break;

          case SHL_I:
            flags = 0;
            if((arg1)&&((P_REG[arg2] >> (32 - arg1))&0x01)) flags = flags | PSW_CY;
            // set CY before we destroy the regisrer info....
            P_REG[arg2] = P_REG[arg2] << arg1;
            // Set Flags
            if (P_REG[arg2] == 0) flags = flags | PSW_Z;
            if (P_REG[arg2] & 0x80000000)  flags = flags | PSW_S;
            S_REG[PSW] = (S_REG[PSW] & 0xFFFFFFF0)|flags;
            break;

          case SHR_I:
            flags = 0;
            if((arg1)&&((P_REG[arg2] >> (arg1-1))&0x01)) flags = flags | PSW_CY;
            // set CY before we destroy the regisrer info....
            P_REG[arg2] = P_REG[arg2] >> arg1;
            // Set Flags
            if (P_REG[arg2] == 0) flags = flags | PSW_Z;
            if (P_REG[arg2] & 0x80000000)  flags = flags | PSW_S;
            S_REG[PSW] = (S_REG[PSW] & 0xFFFFFFF0)|flags;
            break;

		  case CLI:
            S_REG[PSW] = S_REG[PSW] & (0xFFFFEFFF);
            break;

          case SAR_I:
            flags = 0;
            msb = P_REG[arg2] & 0x80000000; // Grab the MSB

			//if( (arg1) && ((P_REG[arg2]>>(arg1-1))&0x01) )
			//(this should be checked if compiled with a different compiler, as a right shift of a signed number in C isn't explicitly required to be the same as an arithmetic shift)
			if( (arg1) && ((((long)P_REG[arg2])>>(arg1-1))&0x01) )
				flags = flags | PSW_CY;

            //for(j = 0; j < arg1; j++)
			//	P_REG[arg2] = (P_REG[arg2] >> 1) | msb; //Keep sticking the msb on the end
			P_REG[arg2] = ((long)P_REG[arg2]) >> arg1;

            // Set Flags
            if (P_REG[arg2] & 0x80000000)  flags = flags | PSW_S;
            if (!P_REG[arg2]) flags = flags | PSW_Z;
            S_REG[PSW] = (S_REG[PSW] & 0xFFFFFFF0)|flags;
            break;

          case TRAP:
            vb_log_msg(6,"\nUnhandled opcode! trap");
            break;

          case LDSR:
            S_REG[(arg1 & 0x1F)] = P_REG[(arg2 & 0x1F)];
            break;

          case STSR:
            P_REG[(arg2 & 0x1F)] = S_REG[(arg1 & 0x1F)];
            break;

          case SEI:
            S_REG[PSW] = S_REG[PSW] | 0x00001000;
            break;

          case BV:
            if(S_REG[PSW]&PSW_OV) {
                PC += (sign_9(arg1) & 0xFFFFFFFE);
				clocks += 2;
            } else {
                PC +=2;
            }
            break;

          case BL: //same as BC
            if(S_REG[PSW]&PSW_CY) {
                PC += (sign_9(arg1) & 0xFFFFFFFE);
				clocks += 2;
            } else {
                PC +=2;
            }
            break;

          case BE: //same as BZ
            if(S_REG[PSW]&PSW_Z) {
                PC += (sign_9(arg1) & 0xFFFFFFFE);
				clocks += 2;
            } else {
                PC +=2;
            }
            break;

          case BNH:
            if((S_REG[PSW]&PSW_Z)||(S_REG[PSW]&PSW_CY)) {
                PC += (sign_9(arg1) & 0xFFFFFFFE);
				clocks += 2;
            } else {
                PC +=2;
            }
            break;

          case BN:
            if(S_REG[PSW]&PSW_S) {
                PC += (sign_9(arg1) & 0xFFFFFFFE);
				clocks += 2;
            } else {
                PC +=2;
            }
            break;

          case BR:
            PC += (sign_9(arg1) & 0xFFFFFFFE);
			clocks += 2;
            break;
          case BLT:
            if((!!(S_REG[PSW]&PSW_S))^(!!(S_REG[PSW]&PSW_OV))) {
                PC += (sign_9(arg1) & 0xFFFFFFFE);
				clocks += 2;
            } else {
                PC +=2;
            }
            break;

          case BLE:
            if(((!!(S_REG[PSW]&PSW_S))^(!!(S_REG[PSW]&PSW_OV)))||(S_REG[PSW]&PSW_Z)) {
                PC += (sign_9(arg1) & 0xFFFFFFFE);
				clocks += 2;
            } else {
                PC +=2;
            }
            break;

          case BNV:
            if(!(S_REG[PSW]&PSW_OV)) {
                PC += (sign_9(arg1) & 0xFFFFFFFE);
				clocks += 2;
            } else {
                PC +=2;
            }
            break;

          case BNL: //same as BNC
            if(!(S_REG[PSW]&PSW_CY)) {
                PC += (sign_9(arg1) & 0xFFFFFFFE);
				clocks += 2;
            } else {
                PC +=2;
            }
            break;

          case BNE: //same as BNZ
            if(!(S_REG[PSW] & PSW_Z)) {
                PC += (sign_9(arg1) & 0xFFFFFFFE);
				clocks += 2;
            } else {
                PC +=2;
            }
            break;

          case BH:
            if(!((S_REG[PSW]&PSW_Z)||(S_REG[PSW]&PSW_CY))) {
                PC += (sign_9(arg1) & 0xFFFFFFFE);
				clocks += 2;
            } else {
                PC +=2;
            }
            break;

          case BP:
            if(!(S_REG[PSW] & PSW_S)) {
                PC += (sign_9(arg1) & 0xFFFFFFFE);
				clocks += 2;
            } else {
                PC +=2;
            }
            break;

          case NOP:
            //Its a NOP do nothing =)
            PC +=2;
            break;

          case BGE:
            if(!((!!(S_REG[PSW]&PSW_S))^(!!(S_REG[PSW]&PSW_OV)))) {
                PC += (sign_9(arg1) & 0xFFFFFFFE);
				clocks += 2;
            } else {
                PC +=2;
            }
            break;

          case BGT:
            if(!(((!!(S_REG[PSW]&PSW_S))^(!!(S_REG[PSW]&PSW_OV)))||(S_REG[PSW]&PSW_Z))) {
                PC += (sign_9(arg1) & 0xFFFFFFFE);
				clocks += 2;
            } else {
                PC +=2;
			}
			break;

          case JR:
            PC += (sign_26(arg1) & 0xFFFFFFFE);
            break;

          case JAL:
            P_REG[31]=PC+4;
            PC += (sign_26(arg1) & 0xFFFFFFFE);
            break;

          case MOVEA:
            P_REG[arg3] = P_REG[arg2] + sign_16(arg1);
            break;

          case ADDI:
            flags = 0;
            temp = P_REG[arg2] + sign_16(arg1);
            // Set Flags
            if ((long)temp == 0) flags = flags | PSW_Z;
            if ((long)temp & 0x80000000)  flags = flags | PSW_S;
            if (((P_REG[arg2]^(~sign_16(arg1)))&(P_REG[arg2]^temp))&0x80000000) flags = flags | PSW_OV;
			if (temp < P_REG[arg2]) flags = flags | PSW_CY;

            S_REG[PSW] = (S_REG[PSW] & 0xFFFFFFF0)|flags;
            P_REG[arg3] = (long)temp;
            break;

          case ORI:
            flags = 0;
            P_REG[arg3] = arg1 | P_REG[arg2];
            // Set Flags
            if (P_REG[arg3] == 0) flags = flags | PSW_Z;
            if (P_REG[arg3] & 0x80000000)  flags = flags | PSW_S;
            S_REG[PSW] = (S_REG[PSW] & (0xFFFFFFF0|PSW_CY))|flags;
            break;

          case ANDI:
            flags = 0;
            P_REG[arg3] = (arg1 & P_REG[arg2]);
            // Set Flags
            if (P_REG[arg3] == 0) flags = (flags | PSW_Z);
            S_REG[PSW] = (S_REG[PSW] & (0xFFFFFFF0|PSW_CY))|flags;
            break;

          case XORI:
			flags = 0;
			P_REG[arg3] = arg1 ^ P_REG[arg2];
			// Set Flags
			if (P_REG[arg3] == 0) flags = flags | PSW_Z;
			if (P_REG[arg3] & 0x80000000)  flags = flags | PSW_S;
			S_REG[PSW] = (S_REG[PSW] & (0xFFFFFFF0|PSW_CY))|flags;
			break;

          case MOVHI:
            P_REG[arg3] = (arg1 << 16) + P_REG[arg2];
			break;

          case RETI:
            //Return from Trap/Interupt
            if(S_REG[PSW] & PSW_NP) { // Read the FE Reg
                PC = S_REG[FEPC];
                S_REG[PSW] = S_REG[FEPSW];
            } else { 	//Read the EI Reg Interupt
                PC = S_REG[EIPC];
                S_REG[PSW] = S_REG[EIPSW];
            }
            break;

          case HALT:
            vb_log_msg(6,"\nUnhandled opcode! halt");
            break;

          case LD_B:
			tmp2 = (sign_16(arg1)+P_REG[arg2])&0x07FFFFFF;
			
#ifndef VB_DEBUGGER
			//avoid calling rbyte for most frequent cases
			if ((tmp2 & 0x7000000)==0x5000000) {
				P_REG[arg3] = sign_8(((VB_BYTE *)(V810_VB_RAM.off + (tmp2 & V810_VB_RAM.highaddr)))[0]);
			} else if ((tmp2 & 0x7000000)==0x7000000) {
				P_REG[arg3] = sign_8(((VB_BYTE *)(V810_ROM1.off + (tmp2 & V810_ROM1.highaddr)))[0]);
			} else 
#endif
				P_REG[arg3] = sign_8(mem_rbyte(tmp2));
#ifdef VB_DEBUGGER
				if ((mem_watchpnt_en)&&(tmp2==(int)mem_watchpnt))
				{
					vb_log_msg(10,"\nPC=0x%08X Address 0x%08X read with byte 0x%02X",PC,tmp2,P_REG[arg3]);
					if (mem_watchpnt_en==2)
					{
						vb_log_msg(10,"\nBreakpoint waiting, exit to debugger");
						mem_watchpnt_en=0;
						return 1;
					}
				}
#endif
			
			//should be 3 clocks when executed alone, 2 when precedes another LD, or 1
			//when precedes an instruction with many clocks (I'm guessing FP, MUL, DIV, etc)
			if (lastclock < 6) {
				if ((lastop == LD_B) || (lastop == LD_H) || (lastop == LD_W)) clocks += 1;
				else clocks += 2;
			}
            break;

          case LD_H:
			tmp2 = (sign_16(arg1)+P_REG[arg2]) & 0x07FFFFFE;

#ifndef VB_DEBUGGER
			//avoid calling rhword for most frequent cases
			if((tmp2 & 0x7000000) == 0x5000000) {
				P_REG[arg3] = sign_16(((VB_HWORD *)(V810_VB_RAM.off + (tmp2 & V810_VB_RAM.highaddr)))[0]);
			} else if((tmp2 & 0x7000000) == 0x7000000) {
        		P_REG[arg3] = sign_16(((VB_HWORD *)(V810_ROM1.off + (tmp2 & V810_ROM1.highaddr)))[0]);
			} else if((tmp2 >= V810_VIPCREG.lowaddr)&&(tmp2 <=V810_VIPCREG.highaddr)) {
				P_REG[arg3] = sign_16(((VB_HWORD *)(&tVIPREG))[(tmp2&0x7E)>>1]);
			} else 
#endif
	            P_REG[arg3] = sign_16(mem_rhword(tmp2));
#ifdef VB_DEBUGGER
				if ((mem_watchpnt_en)&&(tmp2==(int)mem_watchpnt))
				{
					vb_log_msg(10,"\nPC=0x%08X Address 0x%08X read with hword 0x%04X",PC,tmp2,P_REG[arg3]);
					if (mem_watchpnt_en==2)
					{
						vb_log_msg(10,"\nBreakpoint waiting, exit to debugger");
						mem_watchpnt_en=0;
						return 1;
					}
				}
#endif

            break;

          case LD_W:
			tmp2 = (sign_16(arg1)+P_REG[arg2]) & 0x07FFFFFC;

#ifndef VB_DEBUGGER
			//avoid calling rword for most frequent cases
			if((tmp2 & 0x7000000) == 0x5000000) {
				P_REG[arg3] = ((VB_WORD *)(V810_VB_RAM.off + (tmp2 & V810_VB_RAM.highaddr)))[0];
			} else if((tmp2 & 0x7000000) == 0x7000000) {
        		P_REG[arg3] = ((VB_WORD *)(V810_ROM1.off + (tmp2 & V810_ROM1.highaddr)))[0];
			} else 
#endif
				P_REG[arg3] = mem_rword(tmp2);
#ifdef VB_DEBUGGER
				if ((mem_watchpnt_en)&&(tmp2==(int)mem_watchpnt))
				{
					vb_log_msg(10,"\nPC=0x%08X Address 0x%08X read with word 0x%08X",PC,tmp2,P_REG[arg3]);
					if (mem_watchpnt_en==2)
					{
						vb_log_msg(10,"\nBreakpoint waiting, exit to debugger");
						mem_watchpnt_en=0;
						return 1;
					}
				}
#endif
			

			if (lastclock < 6) {
				if ((lastop == LD_B) || (lastop == LD_H) || (lastop == LD_W)) clocks += 3;
				else clocks += 4;
			}
            break;

          case ST_B:
            addr=sign_16(arg2)+P_REG[arg3];
			b_data=P_REG[arg1]&0xFF;
			mem_wbyte(addr,b_data);
#ifdef VB_DEBUGGER
			if ((mem_watchpnt_en)&&(addr==mem_watchpnt))
			{
				vb_log_msg(10,"\nPC=0x%08X Address 0x%08X written with byte 0x%02X",PC, addr,b_data);
				if (mem_watchpnt_en==2)
				{
					vb_log_msg(10,"\nBreakpoint waiting, exit to debugger");
					mem_watchpnt_en=0;
					return 1;
				}
			}
#endif
			//clocks should be 2 clocks when follows another ST
			if (lastop == ST_B) clocks += 1;
            break;

          case ST_H:
            addr=(sign_16(arg2)+P_REG[arg3])&0xFFFFFFFE;
			h_data=P_REG[arg1]&0xFFFF;
			mem_whword(addr,h_data);
#ifdef VB_DEBUGGER
			if ((mem_watchpnt_en)&&(addr==mem_watchpnt))
			{
				vb_log_msg(10,"\nPC=0x%08X Address 0x%08X written with hword 0x%04X",PC, addr,h_data);
				if (mem_watchpnt_en==2)
				{
					vb_log_msg(10,"\nBreakpoint waiting, exit to debugger");
					mem_watchpnt_en=0;
					return 1;
				}
			}
#endif
			if (lastop == ST_H) clocks += 1;
            break;

          case ST_W:
			tmp2 = (sign_16(arg2)+P_REG[arg3]) & 0x07FFFFFC;

#ifndef VB_DEBUGGER
			//avoid calling wword for most frequent cases
			if((tmp2 & 0x7000000) == 0x5000000) {
		        ((VB_WORD *)(V810_VB_RAM.off + (tmp2 & V810_VB_RAM.highaddr)))[0] = P_REG[arg1];
			} else 
#endif
	            mem_wword(tmp2,P_REG[arg1]);
#ifdef VB_DEBUGGER
				if ((mem_watchpnt_en)&&(tmp2==(int)mem_watchpnt))
				{
					vb_log_msg(10,"\nPC=0x%08X Address 0x%08X written with word 0x%08X",PC, tmp2,P_REG[arg1]);
					if (mem_watchpnt_en==2)
					{
						vb_log_msg(10,"\nBreakpoint waiting, exit to debugger");
						mem_watchpnt_en=0;
						return 1;
					}
				}
#endif

			if (lastop == ST_W) clocks += 3;
            break;

          case IN_B:
            P_REG[arg3] = port_rbyte(sign_16(arg1)+P_REG[arg2]);
            break;

          case IN_H:
            P_REG[arg3] = port_rhword((sign_16(arg1)+P_REG[arg2]) & 0xFFFFFFFE);
            break;

          case CAXI:
            vb_log_msg(6,"\nUnhandled opcode! caxi");
            break;

          case IN_W:
            P_REG[arg3] = port_rword((sign_16(arg1)+P_REG[arg2]) & 0xFFFFFFFC);
            break;

          case OUT_B:
            port_wbyte(sign_16(arg2)+P_REG[arg3],P_REG[arg1]&0xFF);
			//clocks should be 2 when follows another OUT
			if (lastop == OUT_B) clocks += 1;
            break;

          case OUT_H:
            port_whword((sign_16(arg2)+P_REG[arg3])&0xFFFFFFFE,P_REG[arg1]&0xFFFF);
			if (lastop == OUT_H) clocks += 1;
            break;

          case OUT_W:
            port_wword((sign_16(arg2)+P_REG[arg3])&0xFFFFFFFC,P_REG[arg1]);
			if (lastop == OUT_W) clocks += 3;
            break;

          case FPP:
			fpu_subop(arg3,arg1,arg2,&clocks);
            break;

          case BSTR:
            bstr_subop(arg2,arg1,&clocks);    
            break;

          default:
            vb_log_msg(6,"\n%08lx\t\t%2x %2x  ;Invalid Opcode", PC, lowB, highB);
            break;
        }

		if(dbg_trc)
			return 1;
    }
}


//Bitstring routines, wrapper functions for bitstring instructions!
void get_bitstr(VB_WORD *str, VB_WORD src, VB_WORD srcoff, VB_WORD len) {
	VB_WORD i=0,tword,tmp;

if(srcoff!=0)
	i=i;

	memset(str,0,(((len>>5)+1)<<2)); //clear bitstring data ((len/32)+1)*4

	tmp = ((i+srcoff)>>5);
	tword = mem_rword(src+(tmp<<2));
	while (i < len) {
		//if next byte, grab it
		if (((i+srcoff)>>5) != tmp) {
			tmp = ((i+srcoff)>>5);
			tword = mem_rword(src+(tmp<<2));
		}
		str[i>>5] |= (((tword >> ((srcoff+i)&0x1F)) & 1) << (i&0x1F));
		i++;
	}
}

void set_bitstr(VB_WORD *str, VB_WORD dst, VB_WORD dstoff, VB_WORD len) {
	VB_WORD i=0,tword,tmp;

if(dstoff!=0)
	i=i;

	tmp = ((i+dstoff)>>5);
	tword = mem_rword(dst+(tmp<<2));
	while (i < len) {
		if (((i+dstoff)>>5) != tmp) {
			tmp = ((i+dstoff)>>5);
			tword = mem_rword(dst+(tmp<<2));
		}
		tword &= (~(1<<((dstoff+i)&0x1F)));
		tword |= (((str[i>>5]>>(i&0x1F))&1)<<((dstoff+i)&0x1F));
		i++;
		if (!((i+dstoff)&0x1F)) mem_wword(dst+(tmp<<2),tword);
	}
	mem_wword(dst+(tmp<<2),tword);
}

typedef struct
{
	int cycles_1;
	int cycles_2;
	int cycles_p_mul;
	int cycles_p_const;
	int cycles_n_mul;
	int cycles_n_const;
	int cycles_none_mul;
	int cycles_none_const;
} SRCH_BSU_TYPE;

SRCH_BSU_TYPE srch_up_bsu_cycles[10]={{0,0,0,0,0,0,0,13},{31,0,0,0,0,0,0,31},{40,43,0,0,0,0,0,0},{30,51,0,0,0,0,0,0},{30,56,0,0,0,0,0,0},{40,45,0,0,0,0,0,0},{40,45,5,35,5,33,5,34},{30,56,5,46,5,44,5,45},{30,56,5,46,5,46,5,40},{40,45,5,35,5,35,5,29}};
SRCH_BSU_TYPE srch_dn_bsu_cycles[10]={{0,0,0,0,0,0,0,15},{28,0,0,0,0,0,0,30},{33,52,0,0,0,0,0,0},{33,52,0,0,0,0,0,0},{45,52,0,0,0,0,0,0},{45,50,0,0,0,0,0,0},{33,59,5,49,5,49,5,43},{33,59,5,49,5,51,5,50},{45,50,5,40,5,42,5,41},{45,50,5,40,5,40,5,34}};

//based on V810 User's Manual table 5-12
//found=number of words until found or -1 if not found
//this function (and all clock cycles counts in RB I believe?) doesn't take wait states into account
int calc_srch_bsu_clk(int up_or_dn, int found, int src, int srcoff, int dst, int dstoff, int length) {
/*
	int src_words, dst_words, cycles, type=0;

	src_words=((srcoff+length)>>5)+1;
	dst_words=((dstoff+length)>>5)+1;
	
	//determine type
	if (length!=0)
	{
		if ((src_words==1)&&(dst_words==1)&&(srcoff>dstoff))
			type=7;
		else if (srcoff==dstoff) //type 1-2
		{
			if (!((srcoff+length)&31))
				type=1;
			else
				type=2;
		}
		else //type 3-5
		{
			if (src_words==dst_words)
			{
				if (dstoff==0)
					type=3;
				else
					type=5;
			}
			else
				type=4;
		}
	}
	else
		type=6;		

	//calculate clock cycles
	if (src_words==1)
		cycles=srch_bsu_cycles[type].cycles_1;
	else if (src_words==2)
		cycles=srch_bsu_cycles[type].cycles_2;
	else if (src_words>=3)
		cycles=(src_words*srch_bsu_cycles[type].cycles_p_mul)+srch_bsu_cycles[type].cycles_p_const;

	return cycles;
*/
	return 30; //temp placeholder
}

typedef struct
{
	int cycles_1;
	int cycles_2;
	int cycles_3_mul;
	int cycles_3_const;
} ARITH_BSU_TYPE;

ARITH_BSU_TYPE arith_bsu_cycles[8]={{0,0,0,0},{38,53,12,30},{38,54,12,31},{43,60,12,35},{49,61,6,36},{38,55,12,31},{20,0,0,0},{43,0,0,0}};

//based on V810 User's Manual tables 5-13 and 5-14 (except some assumptions made since tables are incorrect/incomplete/missing symbols, or REALLY confusing)
//this function (and all clock cycles counts in RB I believe?) doesn't take wait states into account
int calc_arith_bsu_clk(int src, int srcoff, int dst, int dstoff, int length)
{
	int src_words, dst_words, cycles, type=0;

	src_words=((srcoff+length)>>5)+1;
	dst_words=((dstoff+length)>>5)+1;
	
	//determine type
	if (length!=0) {
		if ((src_words==1)&&(dst_words==1)&&(srcoff>dstoff)) {
			type=7;
		} else if (srcoff==dstoff) { //type 1-2
			if (!((srcoff+length)&31))
				type=1;
			else
				type=2;
		} else { //type 3-5
			if (src_words==dst_words) {
				if (dstoff==0)
					type=3;
				else
					type=5;
			} else {
				type=4;
			}
		}
	} else {
		type=6;		
	}

	//calculate clock cycles
	if (src_words==1)
		cycles=arith_bsu_cycles[type].cycles_1;
	else if (src_words==2)
		cycles=arith_bsu_cycles[type].cycles_2;
	else //if (src_words>=3)
		cycles=(src_words*arith_bsu_cycles[type].cycles_3_mul)+arith_bsu_cycles[type].cycles_3_const;

	return cycles;
}

int bstr_subop(int sub_op, int arg1, unsigned int *clocks) {
	VB_WORD i,tmp[16384],tmp2[16384];

	VB_WORD dstoff = (P_REG[26] & 0x1F);
	VB_WORD srcoff = (P_REG[27] & 0x1F);
	VB_WORD len =     P_REG[28];
	VB_WORD dst =    (P_REG[29] & 0xFFFFFFFC);
	VB_WORD src =    (P_REG[30] & 0xFFFFFFFC);


    if(sub_op > 15) {
        vb_log_msg(10,"\n%08lx\tBSR Error: %04x", PC,sub_op);
		return 0;
    }

    switch(sub_op) {
	case SCH0BSU:
		vb_log_msg(10,"\nSCH0BSU, len: %08X, src: %08X, srcoff: %08X, dst: %08X, dstoff: %08X",len,src,srcoff,dst,dstoff);
		clocks+=calc_srch_bsu_clk(1,0,src,srcoff,dst,dstoff,len);
		break;

	case SCH0BSD:
		vb_log_msg(10,"\nSCH0BSD, len: %08X, src: %08X, srcoff: %08X, dst: %08X, dstoff: %08X",len,src,srcoff,dst,dstoff);
		clocks+=calc_srch_bsu_clk(0,0,src,srcoff,dst,dstoff,len);
		break;

	case SCH1BSU:
		vb_log_msg(10,"\nSCH1BSU, len: %08X, src: %08X, srcoff: %08X, dst: %08X, dstoff: %08X",len,src,srcoff,dst,dstoff);
		clocks+=calc_srch_bsu_clk(1,0,src,srcoff,dst,dstoff,len);
		break;

	case SCH1BSD:
		vb_log_msg(10,"\nSCH1BSD, len: %08X, src: %08X, srcoff: %08X, dst: %08X, dstoff: %08X",len,src,srcoff,dst,dstoff);
		clocks+=calc_srch_bsu_clk(0,0,src,srcoff,dst,dstoff,len);
		break;

	case ORBSU:
		get_bitstr(tmp,src,srcoff,len);
		get_bitstr(tmp2,dst,dstoff,len);
		for (i = 0; i < ((len>>5)+1); i++) tmp[i] |= tmp2[i];
		set_bitstr(tmp,dst,dstoff,len);
		clocks+=calc_arith_bsu_clk(src,srcoff,dst,dstoff,len);
		break;

	case ANDBSU:
		get_bitstr(tmp,src,srcoff,len);
		get_bitstr(tmp2,dst,dstoff,len);
		for (i = 0; i < ((len>>5)+1); i++) tmp[i] &= tmp2[i];
		set_bitstr(tmp,dst,dstoff,len);
		clocks+=calc_arith_bsu_clk(src,srcoff,dst,dstoff,len);
		break;

	case XORBSU:
		get_bitstr(tmp,src,srcoff,len);
		get_bitstr(tmp2,dst,dstoff,len);
		for (i = 0; i < ((len>>5)+1); i++) tmp[i] ^= tmp2[i];
		set_bitstr(tmp,dst,dstoff,len);
		clocks+=calc_arith_bsu_clk(src,srcoff,dst,dstoff,len);
		break;

	case MOVBSU:
		get_bitstr(tmp,src,srcoff,len);
		set_bitstr(tmp,dst,dstoff,len);
		clocks+=calc_arith_bsu_clk(src,srcoff,dst,dstoff,len);
		break;

	case ORNBSU:
		get_bitstr(tmp,src,srcoff,len);
		get_bitstr(tmp2,dst,dstoff,len);
		for (i = 0; i < ((len>>5)+1); i++) tmp[i] = (~tmp[i] | tmp2[i]);
		set_bitstr(tmp,dst,dstoff,len);
		clocks+=calc_arith_bsu_clk(src,srcoff,dst,dstoff,len);
		break;

	case ANDNBSU:
		get_bitstr(tmp,src,srcoff,len);
		get_bitstr(tmp2,dst,dstoff,len);
		for (i = 0; i < ((len>>5)+1); i++) tmp[i] = (~tmp[i] & tmp2[i]);
		set_bitstr(tmp,dst,dstoff,len);
		clocks+=calc_arith_bsu_clk(src,srcoff,dst,dstoff,len);
		break;

	case XORNBSU:
		get_bitstr(tmp,src,srcoff,len);
		get_bitstr(tmp2,dst,dstoff,len);
		for (i = 0; i < ((len>>5)+1); i++) tmp[i] = (~tmp[i] ^ tmp2[i]);
		set_bitstr(tmp,dst,dstoff,len);
		clocks+=calc_arith_bsu_clk(src,srcoff,dst,dstoff,len);
		break;

	case NOTBSU:
		get_bitstr(tmp,src,srcoff,len);
		for (i = 0; i < ((len>>5)+1); i++) tmp[i] = ~tmp[i];
		set_bitstr(tmp,dst,dstoff,len);
		clocks+=calc_arith_bsu_clk(src,srcoff,dst,dstoff,len);
		break;

	default:
        vb_log_msg(10,"\n%08lx\tBSR Error: %04x", PC,sub_op);
		break;
	}

	return 0;
}

//added clocks, based on best case (not sure how to determine what the actual value is)
int fpu_subop(int sub_op, int arg1, int arg2, unsigned int *clocks) {
	int i, flags = 0; // Set Flags, OV set to Zero
	//double dTemp;
	float fTemp;
	int temp;

    if(sub_op > 15) {
        vb_log_msg(10,"\n%08lx\tFPU Error: %04x", PC, sub_op);
		return 0;
    }

	switch(sub_op) {
	case CMPF_S:
		/*dTemp = (double)(*((float *)&P_REG[arg1])) - (double)(*((float *)&P_REG[arg2]));

		if (dTemp == 0.0F) flags = flags | PSW_Z;
		if (dTemp < 0.0F)  flags = flags | PSW_S;
		if (dTemp > ((float)dTemp)) flags = flags | PSW_CY; //How???
		S_REG[PSW] = (S_REG[PSW] & 0xFFFFFFF0)|flags;*/
		fTemp = (*((float *)&P_REG[arg1])) - (*((float *)&P_REG[arg2]));

		if (fTemp == 0.0F) flags = flags | PSW_Z;
		if (fTemp < 0.0F)  flags = flags | PSW_S | PSW_CY; //changed according to NEC docs
		S_REG[PSW] = (S_REG[PSW] & 0xFFFFFFF0)|flags;
		clocks+=7; //7 to 10
		//clocks+=10; //7 to 10
		break;

	case CVT_WS:
		/*fTemp = (float)((long)P_REG[arg2]);

		if (fTemp == 0) flags = flags | PSW_Z;
		if (fTemp < 0.0F)  flags = flags | PSW_S;
		if (P_REG[arg2] != fTemp) flags = flags | PSW_CY; //How???
		S_REG[PSW] = (S_REG[PSW] & 0xFFFFFFF0)|flags;

		P_REG[arg1] = *((VB_WORD *)&fTemp);*/
		fTemp = (float)((long)P_REG[arg2]);

		if (fTemp == 0) flags = flags | PSW_Z;
		if (fTemp < 0.0F)  flags = flags | PSW_S | PSW_CY; //changed according to NEC docs
		S_REG[PSW] = (S_REG[PSW] & 0xFFFFFFF0)|flags;

		P_REG[arg1] = *((VB_WORD *)&fTemp);
		clocks+=5; //5 to 16
		//clocks+=16; //5 to 16
		break;

	case CVT_SW:
		/*P_REG[arg1] = (long)(*((float *)&P_REG[arg2])+0.5F);

		if (P_REG[arg1] == 0) flags = flags | PSW_Z;
		if (P_REG[arg1] & 0x80000000)  flags = flags | PSW_S;
		S_REG[PSW] = (S_REG[PSW] & 0xFFFFFFF7)|flags;*/
		if ((*((float *)&P_REG[arg2]))>=0.0) //round
			P_REG[arg1] = (long)floorf(*((float *)&P_REG[arg2])+0.5F);
		else
			P_REG[arg1] = (long)ceilf(*((float *)&P_REG[arg2])-0.5F);

		if (P_REG[arg1] == 0) flags = flags | PSW_Z;
		if (P_REG[arg1] & 0x80000000)  flags = flags | PSW_S;
		S_REG[PSW] = (S_REG[PSW] & 0xFFFFFFF8)|flags;
		clocks+=9; //9 to 14
		//clocks+=14; //9 to 14
		break;

	case ADDF_S:
		/*dTemp = (double)(*((float *)&P_REG[arg1])) + (double)(*((float *)&P_REG[arg2]));

		if (dTemp == 0.0F) flags = flags | (PSW_Z | PSW_CY);  //changed by frostgiant based on NEC docs
		if (dTemp < 0.0F)  flags = flags | PSW_S;

		S_REG[PSW] = (S_REG[PSW] & 0xFFFFFFF0)|flags;

		fTemp = ((float)dTemp);
		P_REG[arg1] = *((VB_WORD *)&fTemp);*/
		fTemp = (*((float *)&P_REG[arg1])) + (*((float *)&P_REG[arg2]));

		if (fTemp == 0.0F) flags = flags | PSW_Z;
		if (fTemp < 0.0F)  flags = flags | PSW_S | PSW_CY; //changed according to NEC docs

		S_REG[PSW] = (S_REG[PSW] & 0xFFFFFFF0)|flags;

		//fTemp = ((float)dTemp);
		P_REG[arg1] = *((VB_WORD *)&fTemp);
		clocks+=9; //9 to 28
		//clocks+=28; //9 to 28
		break;

	case SUBF_S:
		/*dTemp = (double)(*((float *)&P_REG[arg1])) - (double)(*((float *)&P_REG[arg2]));

		if (dTemp == 0.0F) flags = flags | (PSW_Z | PSW_CY);  //changed by frostgiant based on NEC docs
		if (dTemp < 0.0F)  flags = flags | PSW_S;

		S_REG[PSW] = (S_REG[PSW] & 0xFFFFFFF0)|flags;

		fTemp = ((float)dTemp);
		P_REG[arg1] = *((VB_WORD *)&fTemp);*/
		fTemp = (*((float *)&P_REG[arg1])) - (*((float *)&P_REG[arg2]));

		if (fTemp == 0.0F) flags = flags | PSW_Z;
		if (fTemp < 0.0F)  flags = flags | PSW_S | PSW_CY; //changed according to NEC docs

		S_REG[PSW] = (S_REG[PSW] & 0xFFFFFFF0)|flags;

		//fTemp = ((float)dTemp);
		P_REG[arg1] = *((VB_WORD *)&fTemp);
		clocks+=12; //12 to 28
		//clocks+=28; //12 to 28
		break;

	case MULF_S:
		/*dTemp = (double)(*((float *)&P_REG[arg1])) * (double)(*((float *)&P_REG[arg2]));

		if (dTemp == 0.0F) flags = flags | (PSW_Z | PSW_CY);  //changed by frostgiant based on NEC docs
		if (dTemp < 0.0F)  flags = flags | PSW_S;
		S_REG[PSW] = (S_REG[PSW] & 0xFFFFFFF0)|flags;

		fTemp = ((float)dTemp);
		P_REG[arg1] = *((VB_WORD *)&fTemp);*/
		fTemp = (*((float *)&P_REG[arg1])) * (*((float *)&P_REG[arg2]));

		if (fTemp == 0.0F) flags = flags | PSW_Z;
		if (fTemp < 0.0F)  flags = flags | PSW_S | PSW_CY; //changed according to NEC docs
		S_REG[PSW] = (S_REG[PSW] & 0xFFFFFFF0)|flags;

		//fTemp = ((float)dTemp);
		P_REG[arg1] = *((VB_WORD *)&fTemp);
		clocks+=8; //8 to 30
		//clocks+=30; //8 to 30
		break;

	case DIVF_S:
		/*dTemp = (double)(*((float *)&P_REG[arg1])) / (double)(*((float *)&P_REG[arg2]));

		if (dTemp == 0.0F) flags = flags | (PSW_Z | PSW_CY);  //changed by frostgiant based on NEC docs
		if (dTemp < 0.0F)  flags = flags | PSW_S;

		S_REG[PSW] = (S_REG[PSW] & 0xFFFFFFF0)|flags;

		fTemp = ((float)dTemp);
		P_REG[arg1] = *((VB_WORD *)&fTemp);*/
		fTemp = (*((float *)&P_REG[arg1])) / (*((float *)&P_REG[arg2]));

		if (fTemp == 0.0F) flags = flags | PSW_Z;
		if (fTemp < 0.0F)  flags = flags | PSW_S | PSW_CY; //changed according to NEC docs

		S_REG[PSW] = (S_REG[PSW] & 0xFFFFFFF0)|flags;

		//fTemp = ((float)dTemp);
		P_REG[arg1] = *((VB_WORD *)&fTemp);
		clocks+=44; //always 44
		break;

	case XB:
		if (arg2) vb_log_msg(10,"\nXB Instruction, arg2 = r%d",arg2);
		P_REG[arg1] = ((P_REG[arg1]&0xFFFF0000) | (((P_REG[arg1]<<8)&0xFF00) | ((P_REG[arg1]>>8)&0xFF)));
		clocks+=1; //just a guess
		break;

	case XH:
		if (arg2) vb_log_msg(10,"\nXH Instruction, arg2 = r%d",arg2);
		P_REG[arg1] = (P_REG[arg1]<<16)|(P_REG[arg1]>>16);
		clocks+=1; //just a guess
		break;

	case REV:
		temp = 0;
		for (i = 0; i < 32; i++) temp = ((temp << 1) | ((P_REG[arg2] >> i) & 1));
		P_REG[arg1] = temp;
		clocks+=1; //just a guess
		break;

	case TRNC_SW:
		/*P_REG[arg1] = (VB_WORD)(*((float *)&P_REG[arg2])+0.5F);

		if (!P_REG[arg1]) flags = flags | PSW_Z;
		if (P_REG[arg1] & 0x80000000)  flags = flags | PSW_S;
		S_REG[PSW] = (S_REG[PSW] & 0xFFFFFFF7)|flags;*/
		if ((*((float *)&P_REG[arg2]))>=0.0) //truncate, round toward 0
			P_REG[arg1] = (long)floorf(*((float *)&P_REG[arg2]));
		else
			P_REG[arg1] = (long)ceilf(*((float *)&P_REG[arg2]));

		if (P_REG[arg1] == 0) flags = flags | PSW_Z;
		if (P_REG[arg1] & 0x80000000)  flags = flags | PSW_S;
		S_REG[PSW] = (S_REG[PSW] & 0xFFFFFFF8)|flags;
		clocks+=8; //8 to 14
		//clocks+=14; //8 to 14
		break;

	case MPYHW:
		//if (P_REG[arg1] & 0xFFFF0000) vb_log_msg(10,"\nMPYHW Instruction, arg1 = %08X",P_REG[arg1]);
		//if (P_REG[arg2] & 0xFFFF0000) vb_log_msg(10,"\nMPYHW Instruction, arg2 = %08X",P_REG[arg2]);
		//vb_printf("%d*%d",(long)P_REG[arg1 & 0x1F],(long)P_REG[arg2 & 0x1F]);
		P_REG[arg1 & 0x1F] = (long)P_REG[arg1 & 0x1F] * (long)P_REG[arg2 & 0x1F]; //signed multiplication
		//vb_printf("=%d\n",(long)P_REG[arg1 & 0x1F]);
		clocks+=9; //always 9
		break;

	default:
        vb_log_msg(10,"\n%08lx\tFPU Error: %04x", PC, sub_op);
		break;
	}

	return 0;
}


void serviceInt(unsigned int cycles) {
	static unsigned int lasttime=0;

	//OK, this is a strange muck of code... basically it attempts to hit interrupts and
	//handle the VIP regs at the correct time. The timing needs a LOT of work. Right now,
	//the count values I'm using are the best values from my old clock cycle table. In
	//other words, the values are so far off. PBBT!  FIXME

	//For whatever reason we dont need this code
	//actualy it totaly breaks the emu if you don't call it on
	//every cycle, fixme, what causes this to error out.
	//Controller Int
	//if ((!(tHReg.SCR & 0x80)) && (handle_input()&0xFFFC)) {
	//	v810_int(0);
	//}

	if (tHReg.TCR & 0x01) { // Timer Enabled
		if ((cycles-lasttime) > tHReg.tTRC) {
			if (tHReg.tCount)
				tHReg.tCount--;
			tHReg.TLB = (tHReg.tCount&0xFF);
			tHReg.THB = ((tHReg.tCount>>8)&0xFF);
			lasttime=cycles;
			if (tHReg.tCount == 0) {
				tHReg.tCount = tHReg.tTHW; //reset counter
				tHReg.TCR |= 0x02; //Zero Status
				if (tHReg.TCR & 0x08) {
					v810_int(1);
				}
			}
		}
	}

}

int serviceDisplayInt(unsigned int cycles) {
	static unsigned int lastfb=0;
	static int rowcount,tmp1,frames=0;
	int gamestart;
	unsigned int tfb = (cycles-lastfb);


	//Handle DPSTTS, XPSTTS, and Frame interrupts
	if (rowcount < 0x1C) {
		if ((rowcount == 0) && (tfb > 0x0210) && (!tmp1)) {
			tmp1=1;
			tVIPREG.XPSTTS &= 0x000F;
			tVIPREG.DPSTTS = ((tVIPREG.DPCTRL&0x0302)|0xC0);
			if (++frames > tVIPREG.FRMCYC) {
				frames = 0;
				gamestart = 0x0008;
			} else {
				gamestart = 0;
			}
			if (tVIPREG.INTENB&(0x0010|gamestart))
				v810_int(4);
			tVIPREG.INTPND |= (0x0010|gamestart);
			return 1;
		}
		if ((tfb > 0x0500) && (!(tVIPREG.XPSTTS&0x8000))) 
			tVIPREG.XPSTTS |= 0x8000;
		if (tfb > 0x0A00) {
			tVIPREG.XPSTTS = ((tVIPREG.XPSTTS&0xE0)|(rowcount<<8)|(tVIPREG.XPCTRL & 0x02));
			rowcount++;
			lastfb=cycles;
		}
		if ((rowcount == 0x12) && (tfb > 0x670))
			tVIPREG.DPSTTS = ((tVIPREG.DPCTRL&0x0302)|(tVIPREG.tFrame&1?0xD0:0xC4));
	} else {
		if ((rowcount == 0x1C) && (tfb > 0x10000)) {			//0x100000
			tVIPREG.XPSTTS = (0x1B00|(tVIPREG.XPCTRL & 0x02));

			if(tVBOpt.VFHACK)					//vertical force hack
				v810_int(4);
			else if (tVIPREG.INTENB&0x4000) 
				v810_int(4);					//XPEND

			tVIPREG.INTPND |= 0x4000;				//(tVIPREG.INTENB&0x4000);
			rowcount++;
		} else if ((rowcount == 0x1D) && (tfb > 0x18000)) {		//0xE690
			tVIPREG.DPSTTS = ((tVIPREG.DPCTRL&0x0302)|0xC0);
			if (tVIPREG.INTENB&0x0002) 
				v810_int(4);					//LFBEND
			tVIPREG.INTPND |= 0x0002;				//(tVIPREG.INTENB&0x0002);
			rowcount++;
		} else if ((rowcount == 0x1E) && (tfb > 0x20000)) {		//0x15E70
			tVIPREG.DPSTTS = ((tVIPREG.DPCTRL&0x0302)|0x40);
			if (tVIPREG.INTENB&0x0004) 
				v810_int(4);					//RFBEND
			tVIPREG.INTPND |= 0x0004;				//(tVIPREG.INTENB&0x0004);
			rowcount++;
		} else if ((rowcount == 0x1F) && (tfb > 0x28000)) {		//0x1FAD8
			//tVIPREG.DPSTTS = ((tVIPREG.DPCTRL&0x0302)|((tVIPREG.tFrame&1)?0x48:0x60));
			tVIPREG.DPSTTS = ((tVIPREG.DPCTRL&0x0302)|((tVIPREG.tFrame&1)?0x60:0x48)); //if editing FB0, shouldn't be drawing FB0
			if (tVIPREG.INTENB&0x2000) 
				v810_int(4);					//SBHIT
			tVIPREG.INTPND |= 0x2000;
			rowcount++;
		} else if ((rowcount == 0x20) && (tfb > 0x38000)) {		//0x33FD8
			tVIPREG.DPSTTS = ((tVIPREG.DPCTRL&0x0302)|0x40);
			rowcount++;
		} else if ((rowcount == 0x21) && (tfb > 0x42000)) {
			tmp1=0;
			rowcount=0;
			tVIPREG.tFrame++;
			if ((tVIPREG.tFrame < 1) || (tVIPREG.tFrame > 2)) tVIPREG.tFrame = 1;
			tVIPREG.XPSTTS = (0x1B00|(tVIPREG.tFrame<<2)|(tVIPREG.XPCTRL & 0x02));
			//if (tVIPREG.XPSTTS&2) //clear screen buffer
			//{
				memset((VB_BYTE *)(V810_DISPLAY_RAM.off+(tVIPREG.tFrame-1)*0x8000),0,0x6000);
				memset((VB_BYTE *)(V810_DISPLAY_RAM.off+((tVIPREG.tFrame-1)+2)*0x8000),0,0x6000);
			//}
			lastfb=cycles;
		}
	}
	return 0;
}



// Generate Interupt #n
void v810_int(VB_WORD iNum) {
    vb_log_msg(4,"\nInt atempt %x",iNum);

    if (iNum > 0x0F) return;  // Invalid Interupt number...
    if((S_REG[PSW] & PSW_NP)) return;
    if((S_REG[PSW] & PSW_EP)) return; // Exception pending?
    if((S_REG[PSW] & PSW_ID)) return; // Interupt disabled
    if(iNum < ((S_REG[PSW] & PSW_IA)>>16)) return; // Interupt to low on the chain

    vb_log_msg(6,"\nInt %x",iNum);

    //Ready to Generate the Interupts
    S_REG[EIPC]  = PC;
    S_REG[EIPSW] = S_REG[PSW];

    PC = 0xFFFFFE00 | (iNum << 4);
    
    S_REG[ECR] = 0xFE00 | (iNum << 4);
    S_REG[PSW] = S_REG[PSW] | PSW_EP;
    S_REG[PSW] = S_REG[PSW] | PSW_ID;
    if((iNum+=1) > 0x0F) 
		(iNum = 0x0F);
    S_REG[PSW] = S_REG[PSW] | (iNum << 16); //Set the Interupt

}


// Generate exception #n
//Exceptions are Div by zero, trap and Invalid Opcode, we can live without...
void v810_exp(VB_WORD iNum, VB_WORD eCode) {
    if (iNum > 0x0F) return;  // Invalid Exception number...

    //if(!S_REG[PSW]&PSW_ID) return;
    //if(iNum < ((S_REG[PSW] & PSW_IA)>>16)) return; // Interupt to low on the mask level....
    if ((S_REG[PSW] & PSW_IA)>>16) return; //Interrupt Pending

	eCode &= 0xFFFF;
/*
    if(S_REG[PSW]&PSW_NP) { //Fatal Exception
        S_REG[DPC] = PC;
        S_REG[DPSW] = S_REG[PSW];
        S_REG[PSW] = S_REG[PSW] | PSW_DP;
        S_REG[PSW] = S_REG[PSW] | PSW_NP;
        S_REG[PSW] = S_REG[PSW] | PSW_EP;
        S_REG[PSW] = S_REG[PSW] | PSW_ID;
        //S_REG[PSW] = S_REG[PSW] | (((iNum+1) & 0x0f) << 16); //Set the Interupt status

        PC = 0xFFFFFFE0;
        return;
    }else
*/
    if(S_REG[PSW]&PSW_EP) { //Double Exception
        S_REG[FEPC] = PC;
        S_REG[FEPSW] = S_REG[PSW];
        S_REG[ECR] = (eCode << 16); //Exception Code, dont get it???
        S_REG[PSW] = S_REG[PSW] | PSW_NP;
        S_REG[PSW] = S_REG[PSW] | PSW_ID;
        //S_REG[PSW] = S_REG[PSW] | (((iNum+1) & 0x0f) << 16); //Set the Interupt status

        PC = 0xFFFFFFD0;
        return;
    } else {                                // Regular Exception
        S_REG[EIPC] = PC;
        S_REG[EIPSW] = S_REG[PSW];
        S_REG[ECR] = eCode; //Exception Code, dont get it???
        S_REG[PSW] = S_REG[PSW] | PSW_EP;
        S_REG[PSW] = S_REG[PSW] | PSW_ID;
        //S_REG[PSW] = S_REG[PSW] | (((iNum+1) & 0x0f) << 16); //Set the Interupt status

        PC = 0xFFFFFF00 | (iNum << 4);
        return;
    }
}



