// ----------------------------------------------------------------------------
// Copyright 2000, Paul Nettle. All rights reserved.
//
// Fast floating point substitution routines
//
// This file has been entered into the public domain by the author.
// ----------------------------------------------------------------------------
//
// COMPILE WITH WATCOM
//
// ----------------------------------------------------------------------------
//
// Routine Equivalent C code
// -----------------------------------------
// ICHOP (int) value
// FCHOP (float) ((int) value)
// IFLOOR (int) floor(value)
// FFLOOR floor(value)
// ICEIL (int) ceil(value)
// FCEIL ceil(value)
// FFRAC value - floor(value)
// SUB_PIX ceil(value) - value
// FTOX24 (int) (value * 0x1000000)
// FTOX16 (int) (value * 0x10000)
// FTOX8 (int) (value * 0x100)
// IS_NEG (bool) (value1 < 0.0f)
// IS_LESS (bool) (value1 < value2)
// IS_LEQUAL (bool) (value1 <= value2)
// IS_GREATER (bool) (value1 > value2)
// IS_GEQUAL (bool) (value1 >= value2)
//
// ----------------------------------------------------------------------------
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
// ----------------------------------------------------------------------------
#define ABS(a) ((a) < 0 ? -(a) : (a))
// ----------------------------------------------------------------------------
static unsigned char chptab[512];
static unsigned char f24tab[512];
static unsigned char f16tab[512];
static unsigned char fx8tab[512];
static unsigned int msktab[256];
static unsigned short _savedControl;
static unsigned short _fchopControl;
static unsigned short _ffloorControl;
static unsigned short _fceilControl;
// ----------------------------------------------------------------------------
static unsigned int timerLo, timerHi, timerAdjust, writeValue;
static unsigned int _t_conv_int[2] = {0,0x43380000};
static unsigned int _t_conv24_08[2] = {0,0x42B80000};
static unsigned int _t_conv16_16[2] = {0,0x42380000};
static unsigned int _t_conv08_24[2] = {0,0x41B80000};
static unsigned int _t_fxfl[2];
// ----------------------------------------------------------------------------
void startTimer();
#pragma aux startTimer = \
"db 0x0F, 0x31" \
"mov timerLo,eax" \
"mov timerHi,edx" \
modify exact [edx eax];
// ----------------------------------------------------------------------------
int stopTimer();
#pragma aux stopTimer = \
"db 0x0F, 0x31" \
"sub eax,timerLo" \
value [eax] \
modify exact [edx eax];
// ----------------------------------------------------------------------------
short FINIT();
#pragma aux FINIT = \
"fstcw _savedControl" \
"mov ax, _savedControl" \
"and ax, 0xf3ff" \
"or ax, 0x0C00" \
"mov _fchopControl, ax" \
"and ax, 0xf3ff" \
"or ax, 0x0400" \
"mov _ffloorControl, ax" \
"and ax, 0xf3ff" \
"or ax, 0x0800" \
"mov _fceilControl, ax" \
parm nomemory caller [] \
modify nomemory exact [];
// ----------------------------------------------------------------------------
// This routine executes in approximately 16 cycles
int ICHOP( float Val );
#pragma aux ICHOP = \
"fldcw _fchopControl" \
"fistp dword ptr writeValue" \
"mov eax,writeValue" \
value [eax] \
parm nomemory caller [8087] \
modify nomemory exact [eax 8087];
// ----------------------------------------------------------------------------
// This routine executes in approximately 21 cycles
int ICHOP2( float Val );
#pragma aux ICHOP2 = \
"mov ebx,eax" \
"and eax,0x7fffff" \
"mov edx,ebx" \
"and ebx,0x7f800000" \
"shr ebx,23" \
"or eax,0x800000" \
"sar edx,31" \
"mov cl,chptab[ebx*2+1]" \
"sar eax,cl" \
"mov cl,chptab[ebx*2+0]" \
"sal eax,cl" \
"xor eax,edx" \
"sub eax,edx" \
value [eax] \
parm nomemory caller [eax] \
modify nomemory exact [eax ebx ecx edx];
// ----------------------------------------------------------------------------
// This routine executes in approximately 19 cycles
float FCHOP( float Val );
#pragma aux FCHOP = \
"fldcw _fchopControl" \
"fistp dword ptr writeValue" \
"fild writeValue" \
value [8087] \
parm nomemory caller [8087] \
modify nomemory exact [8087];
// ----------------------------------------------------------------------------
// This routine executes in approximately 30 cycles
float FCHOP2( float Val );
#pragma aux FCHOP2 = \
"fldcw _fchopControl" \
"frndint" \
value [8087] \
parm nomemory caller [8087] \
modify nomemory exact [8087];
// ----------------------------------------------------------------------------
// This routine executes in approximately 16 cycles
int IFLOOR( float Val );
#pragma aux IFLOOR = \
"fldcw _ffloorControl" \
"fistp dword ptr writeValue" \
"mov eax,writeValue" \
value [eax] \
parm nomemory caller [8087] \
modify nomemory exact [eax 8087];
// ----------------------------------------------------------------------------
// This routine executes in approximately 16 cycles
int IFLOOR2( float Val );
#pragma aux IFLOOR2 = \
"fldcw _ffloorControl" \
"fadd qword ptr _t_conv_int" \
"fstp qword ptr _t_fxfl" \
"mov eax,_t_fxfl" \
value [eax] \
parm nomemory caller [8087] \
modify nomemory exact [eax 8087];
// ----------------------------------------------------------------------------
// This routine executes in approximately 25 cycles
int IFLOOR3( float Val );
#pragma aux IFLOOR3 = \
"mov ebx,eax" \
"and eax,0x7fffff" \
"mov edx,ebx" \
"and ebx,0x7f800000" \
"shr ebx,23" \
"or eax,0x800000" \
"sar edx,31" \
"mov esi,eax" \
"mov cl,chptab[ebx*2+1]" \
"sar eax,cl" \
"mov cl,chptab[ebx*2+0]" \
"sal eax,cl" \
"xor eax,edx" \
"sub eax,edx" \
"or edx,edx" \
"jz l1" \
"and esi,msktab[ebx * 4]" \
"jz l1" \
"dec eax" \
"l1:" \
value [eax] \
parm nomemory caller [eax] \
modify nomemory exact [eax ebx ecx edx esi];
// ----------------------------------------------------------------------------
// This routine executes in approximately 19 cycles
float FFLOOR( float Val );
#pragma aux FFLOOR = \
"fldcw _ffloorControl" \
"fistp dword ptr writeValue" \
"fild writeValue" \
value [8087] \
parm nomemory caller [8087] \
modify nomemory exact [8087];
// ----------------------------------------------------------------------------
// This routine executes in approximately 30 cycles
float FFLOOR2( float Val );
#pragma aux FFLOOR2 = \
"fldcw _ffloorControl" \
"frndint" \
value [8087] \
parm nomemory caller [8087] \
modify nomemory exact [8087];
// ----------------------------------------------------------------------------
// This routine executes in approximately 16 cycles
int ICEIL( float Val );
#pragma aux ICEIL = \
"fldcw _fceilControl" \
"fistp dword ptr writeValue" \
"mov eax,writeValue" \
value [eax] \
parm nomemory caller [8087] \
modify nomemory exact [eax 8087];
// ----------------------------------------------------------------------------
// This routine executes in approximately 26 cycles
int ICEIL2( float Val );
#pragma aux ICEIL2 = \
"or eax,eax" \
"jz l1" \
"mov ebx,eax" \
"and eax,0x7fffff" \
"mov edx,ebx" \
"and ebx,0x7f800000" \
"shr ebx,23" \
"sar edx,31" \
"or eax,0x800000" \
"mov esi,eax" \
"mov cl,chptab[ebx*2+1]" \
"sar eax,cl" \
"mov cl,chptab[ebx*2+0]" \
"sal eax,cl" \
"xor eax,edx" \
"sub eax,edx" \
"or edx,edx" \
"jnz l1" \
"and esi,msktab[ebx * 4]" \
"jz l1" \
"inc eax" \
"l1:" \
value [eax] \
parm nomemory caller [eax] \
modify nomemory exact [eax ebx ecx edx esi];
// ----------------------------------------------------------------------------
// This routine executes in approximately 19 cycles
float FCEIL( float Val );
#pragma aux FCEIL = \
"fldcw _fceilControl" \
"fistp dword ptr writeValue" \
"fild writeValue" \
value [8087] \
parm nomemory caller [8087] \
modify nomemory exact [8087];
// ----------------------------------------------------------------------------
// This routine executes in approximately 31 cycles
float FCEIL2( float Val );
#pragma aux FCEIL2 = \
"fldcw _fceilControl" \
"frndint" \
value [8087] \
parm nomemory caller [8087] \
modify nomemory exact [8087];
// ----------------------------------------------------------------------------
// This routine executes in approximately 22 cycles
float FFRAC( float Val );
#pragma aux FFRAC = \
"fldcw _ffloorControl" \
"fist dword ptr writeValue" \
"fisub dword ptr writeValue" \
value [8087] \
parm nomemory caller [8087] \
modify nomemory exact [8087];
// ----------------------------------------------------------------------------
// This routine executes in approximately 34 cycles
float FFRAC2( float Val );
#pragma aux FFRAC2 = \
"fldcw _ffloorControl" \
"fld st(0)" \
"frndint" \
"fsubp st(1), st" \
value [8087] \
parm nomemory caller [8087] \
modify nomemory exact [8087];
// ----------------------------------------------------------------------------
// This routine executes in approximately 22 cycles
float SUB_PIX( float Val );
#pragma aux SUB_PIX = \
"fldcw _fceilControl" \
"fist dword ptr writeValue" \
"fisubr dword ptr writeValue" \
value [8087] \
parm nomemory caller [8087] \
modify nomemory exact [8087];
// ----------------------------------------------------------------------------
// This routine executes in approximately 34 cycles
float SUB_PIX2( float Val );
#pragma aux SUB_PIX2 = \
"fldcw _fceilControl" \
"fld st(0)" \
"frndint" \
"fsubrp st(1), st" \
value [8087] \
parm nomemory caller [8087] \
modify nomemory exact [8087];
// ----------------------------------------------------------------------------
// This routine executes in approximately 10 cycles
int FTOX24( float Val );
#pragma aux FTOX24 = \
"fadd qword ptr _t_conv08_24" \
"fstp qword ptr _t_fxfl" \
"mov eax,_t_fxfl" \
value [eax] \
parm nomemory caller [8087] \
modify nomemory exact [eax 8087];
// ----------------------------------------------------------------------------
// This routine executes in approximately 21 cycles
int FTOX242( float Val );
#pragma aux FTOX242 = \
"mov ebx,eax" \
"and eax,0x7fffff" \
"mov edx,ebx" \
"and ebx,0x7f800000" \
"shr ebx,23" \
"or eax,0x800000" \
"sar edx,31" \
"mov cl,f24tab[ebx*2+1]" \
"sar eax,cl" \
"mov cl,f24tab[ebx*2+0]" \
"sal eax,cl" \
"xor eax,edx" \
"sub eax,edx" \
value [eax] \
parm nomemory caller [eax] \
modify nomemory exact [eax ebx ecx edx];
// ----------------------------------------------------------------------------
// This routine executes in approximately 10 cycles
int FTOX16( float Val );
#pragma aux FTOX16 = \
"fadd qword ptr _t_conv16_16" \
"fstp qword ptr _t_fxfl" \
"mov eax,_t_fxfl" \
value [eax] \
parm nomemory caller [8087] \
modify nomemory exact [eax 8087];
// ----------------------------------------------------------------------------
// This routine executes in approximately 21 cycles
int FTOX162( float Val );
#pragma aux FTOX162 = \
"mov ebx,eax" \
"and eax,0x7fffff" \
"mov edx,ebx" \
"and ebx,0x7f800000" \
"shr ebx,23" \
"or eax,0x800000" \
"sar edx,31" \
"mov cl,f16tab[ebx*2+1]" \
"sar eax,cl" \
"mov cl,f16tab[ebx*2+0]" \
"sal eax,cl" \
"xor eax,edx" \
"sub eax,edx" \
value [eax] \
parm nomemory caller [eax] \
modify nomemory exact [eax ebx ecx edx];
// ----------------------------------------------------------------------------
// This routine executes in approximately 10 cycles
int FTOX8( float Val );
#pragma aux FTOX8 = \
"fadd qword ptr _t_conv24_08" \
"fstp qword ptr _t_fxfl" \
"mov eax,_t_fxfl" \
value [eax] \
parm nomemory caller [8087] \
modify nomemory exact [eax 8087];
// ----------------------------------------------------------------------------
// This routine executes in approximately 21 cycles
int FTOX82( float Val );
#pragma aux FTOX82 = \
"mov ebx,eax" \
"and eax,0x7fffff" \
"mov edx,ebx" \
"and ebx,0x7f800000" \
"shr ebx,23" \
"or eax,0x800000" \
"sar edx,31" \
"mov cl,fx8tab[ebx*2+1]" \
"sar eax,cl" \
"mov cl,fx8tab[ebx*2+0]" \
"sal eax,cl" \
"xor eax,edx" \
"sub eax,edx" \
value [eax] \
parm nomemory caller [eax] \
modify nomemory exact [eax ebx ecx edx];
// ----------------------------------------------------------------------------
bool IS_NEG( float Val );
#pragma aux IS_NEG = \
"shr eax,31" \
value [al] \
parm nomemory caller [eax] \
modify nomemory exact [eax];
// ----------------------------------------------------------------------------
bool IS_LESS( float Val1, float Val2 );
#pragma aux IS_LESS = \
"sub eax,ebx" \
"shr eax,31" \
value [al] \
parm nomemory caller [eax] [ebx]\
modify nomemory exact [eax];
// ----------------------------------------------------------------------------
bool IS_LEQUAL( float Val1, float Val2 );
#pragma aux IS_LEQUAL = \
"sub eax,ebx" \
"shr eax,31" \
value [al] \
parm nomemory caller [eax] [ebx]\
modify nomemory exact [eax];
// ----------------------------------------------------------------------------
bool IS_GREATER( float Val1, float Val2 );
#pragma aux IS_GREATER = \
"sub eax,ebx" \
"shr eax,31" \
value [al] \
parm nomemory caller [ebx] [eax]\
modify nomemory exact [eax];
// ----------------------------------------------------------------------------
bool IS_GEQUAL( float Val1, float Val2 );
#pragma aux IS_GEQUAL = \
"sub eax,ebx" \
"shr eax,31" \
value [al] \
parm nomemory caller [ebx] [eax]\
modify nomemory exact [eax];
// ----------------------------------------------------------------------------
void writeRead();
#pragma aux writeRead = \
"mov writeValue,eax" \
"mov ebx,writeValue" \
modify nomemory exact [eax ebx];
// ----------------------------------------------------------------------------
void readRead();
#pragma aux readRead = \
"mov eax,writeValue" \
"mov ebx,writeValue" \
modify nomemory exact [eax ebx];
// ----------------------------------------------------------------------------
void flushWriteBack();
#pragma aux flushWriteBack = \
"mov writeValue,eax" \
"mov writeValue,eax" \
"mov writeValue,eax" \
"mov writeValue,eax" \
"mov writeValue,eax" \
modify nomemory;
// ----------------------------------------------------------------------------
float makeTestValue()
{
// The formula for the test values uses three random values per
// iteration in this way: (r1 - r2) * (r3 - r4) / r5
float r1 = rand();
float r2 = rand();
float r3 = rand();
float r4 = rand();
float r5 = rand();
// Make sure we don't get a divide by 0
while(!r5) r5 = rand();
// Make the test value
return (r1 - r2) * (r3 - r4) / r5;
}
// ----------------------------------------------------------------------------
void initFPU()
{
// Setup the tiny tables
//
// Yeah.. tables can suck for the cache, BUT, these tables only have
// a used range of about 40 bytes someplace in the middle, and the most
// commonly used portion of each table is within a single cache line,
// so the cache thrashing is minimized.
for (int e = 0; e < 256; e++)
{
int x = 150 - e;
if (x > 31) x = 31;
if (x < -31) x = -31;
if (x < 0) chptab[e*2+0] = (unsigned char) -x;
else chptab[e*2+1] = (unsigned char) x;
x = 150 - e - 24;
if (x > 31) x = 31;
if (x < -31) x = -31;
if (x < 0) f24tab[e*2+0] = (unsigned char) -x;
else f24tab[e*2+1] = (unsigned char) x;
x = 150 - e - 16;
if (x > 31) x = 31;
if (x < -31) x = -31;
if (x < 0) f16tab[e*2+0] = (unsigned char) -x;
else f16tab[e*2+1] = (unsigned char) x;
x = 150 - e - 8;
if (x > 31) x = 31;
if (x < -31) x = -31;
if (x < 0) fx8tab[e*2+0] = (unsigned char) -x;
else fx8tab[e*2+1] = (unsigned char) x;
x = 150 - e - 1;
if (x > 31) x = 31;
if (x < -31) x = -31;
if (x < 0) msktab[e] = 0;
else msktab[e] = (unsigned int) 0xffffffff >> (31 - x);
}
// Finally, init the FPU routines
FINIT();
}
// ----------------------------------------------------------------------------
void testICHOP(const int count)
{
printf( " Testing ICHOP...." );
unsigned int fastTime = 0, slowTime = 0, diffCount = 0;
for (int i = 0; i < count; i++)
{
float testValue = makeTestValue();
startTimer();
int test = ICHOP(testValue);
fastTime += stopTimer() - timerAdjust;
startTimer();
int real = (int) testValue;
slowTime += stopTimer() - timerAdjust;
if (test != real) diffCount++;
}
fastTime = (int) ((float) fastTime / (float) count);
slowTime = (int) ((float) slowTime / (float) count);
printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
// ----------------------------------------------------------------------------
void testICHOP2(const int count)
{
printf( " Testing ICHOP2..." );
unsigned int fastTime = 0, slowTime = 0, diffCount = 0;
for (int i = 0; i < count; i++)
{
float testValue = makeTestValue();
startTimer();
int test = ICHOP2(testValue);
fastTime += stopTimer() - timerAdjust;
startTimer();
int real = (int) testValue;
slowTime += stopTimer() - timerAdjust;
if (test != real) diffCount++;
}
fastTime = (int) ((float) fastTime / (float) count);
slowTime = (int) ((float) slowTime / (float) count);
printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
// ----------------------------------------------------------------------------
void testFCHOP(const int count)
{
printf( " Testing FCHOP...." );
unsigned int fastTime = 0, slowTime = 0, diffCount = 0;
for (int i = 0; i < count; i++)
{
float testValue = makeTestValue();
startTimer();
float test = FCHOP(testValue);
fastTime += stopTimer() - timerAdjust;
startTimer();
float real = (int) ((float) testValue);
slowTime += stopTimer() - timerAdjust;
if (test != real) diffCount++;
}
fastTime = (int) ((float) fastTime / (float) count);
slowTime = (int) ((float) slowTime / (float) count);
printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
// ----------------------------------------------------------------------------
void testFCHOP2(const int count)
{
printf( " Testing FCHOP2..." );
unsigned int fastTime = 0, slowTime = 0, diffCount = 0;
for (int i = 0; i < count; i++)
{
float testValue = makeTestValue();
startTimer();
float test = FCHOP2(testValue);
fastTime += stopTimer() - timerAdjust;
startTimer();
float real = (int) ((float) testValue);
slowTime += stopTimer() - timerAdjust;
if (test != real) diffCount++;
}
fastTime = (int) ((float) fastTime / (float) count);
slowTime = (int) ((float) slowTime / (float) count);
printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
// ----------------------------------------------------------------------------
void testIFLOOR(const int count)
{
printf( " Testing IFLOOR..." );
unsigned int fastTime = 0, slowTime = 0, diffCount = 0;
for (int i = 0; i < count; i++)
{
float testValue = makeTestValue();
startTimer();
int test = IFLOOR(testValue);
fastTime += stopTimer() - timerAdjust;
startTimer();
int real = (int) ((float) floor(testValue));
slowTime += stopTimer() - timerAdjust;
if (test != real) diffCount++;
}
fastTime = (int) ((float) fastTime / (float) count);
slowTime = (int) ((float) slowTime / (float) count);
printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
// ----------------------------------------------------------------------------
void testIFLOOR2(const int count)
{
printf( " Testing IFLOOR2.." );
unsigned int fastTime = 0, slowTime = 0, diffCount = 0;
for (int i = 0; i < count; i++)
{
float testValue = makeTestValue();
startTimer();
int test = IFLOOR2(testValue);
fastTime += stopTimer() - timerAdjust;
startTimer();
int real = (int) ((float) floor(testValue));
slowTime += stopTimer() - timerAdjust;
if (test != real) diffCount++;
}
fastTime = (int) ((float) fastTime / (float) count);
slowTime = (int) ((float) slowTime / (float) count);
printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
// ----------------------------------------------------------------------------
void testIFLOOR3(const int count)
{
printf( " Testing IFLOOR3.." );
int expected = 0;
unsigned int fastTime = 0, slowTime = 0, diffCount = 0;
for (int i = 0; i < count; i++)
{
float testValue = makeTestValue();
startTimer();
int test = IFLOOR3(testValue);
fastTime += stopTimer() - timerAdjust;
startTimer();
int real = (int) ((float) floor(testValue));
slowTime += stopTimer() - timerAdjust;
// Expect -0.0f to return different results
if ((*(unsigned int *) &testValue) == 0x80000000) expected++;
if (test != real) diffCount++;
}
fastTime = (int) ((float) fastTime / (float) count);
slowTime = (int) ((float) slowTime / (float) count);
printf( "Done. %s ticks: %d vs %d\n", expected == diffCount ? "OK":"ERROR!", fastTime, slowTime);
}
// ----------------------------------------------------------------------------
void testFFLOOR(const int count)
{
printf( " Testing FFLOOR..." );
unsigned int fastTime = 0, slowTime = 0, diffCount = 0;
for (int i = 0; i < count; i++)
{
float testValue = makeTestValue();
startTimer();
float test = FFLOOR(testValue);
fastTime += stopTimer() - timerAdjust;
startTimer();
float real = (float) floor(testValue);
slowTime += stopTimer() - timerAdjust;
if (test != real) diffCount++;
}
fastTime = (int) ((float) fastTime / (float) count);
slowTime = (int) ((float) slowTime / (float) count);
printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
// ----------------------------------------------------------------------------
void testFFLOOR2(const int count)
{
printf( " Testing FFLOOR2.." );
unsigned int fastTime = 0, slowTime = 0, diffCount = 0;
for (int i = 0; i < count; i++)
{
float testValue = makeTestValue();
startTimer();
float test = FFLOOR2(testValue);
fastTime += stopTimer() - timerAdjust;
startTimer();
float real = (float) floor(testValue);
slowTime += stopTimer() - timerAdjust;
if (test != real) diffCount++;
}
fastTime = (int) ((float) fastTime / (float) count);
slowTime = (int) ((float) slowTime / (float) count);
printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
// ----------------------------------------------------------------------------
void testICEIL(const int count)
{
printf( " Testing ICEIL...." );
unsigned int fastTime = 0, slowTime = 0, diffCount = 0;
for (int i = 0; i < count; i++)
{
float testValue = makeTestValue();
startTimer();
int test = ICEIL(testValue);
fastTime += stopTimer() - timerAdjust;
startTimer();
int real = (int) ((float) ceil(testValue));
slowTime += stopTimer() - timerAdjust;
if (test != real) diffCount++;
}
fastTime = (int) ((float) fastTime / (float) count);
slowTime = (int) ((float) slowTime / (float) count);
printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
// ----------------------------------------------------------------------------
void testICEIL2(const int count)
{
printf( " Testing ICEIL2..." );
unsigned int fastTime = 0, slowTime = 0, diffCount = 0;
for (int i = 0; i < count; i++)
{
float testValue = makeTestValue();
startTimer();
int test = ICEIL2(testValue);
fastTime += stopTimer() - timerAdjust;
startTimer();
int real = (int) ((float) ceil(testValue));
slowTime += stopTimer() - timerAdjust;
if (test != real) diffCount++;
}
fastTime = (int) ((float) fastTime / (float) count);
slowTime = (int) ((float) slowTime / (float) count);
printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
// ----------------------------------------------------------------------------
void testFCEIL(const int count)
{
printf( " Testing FCEIL...." );
unsigned int fastTime = 0, slowTime = 0, diffCount = 0;
for (int i = 0; i < count; i++)
{
float testValue = makeTestValue();
startTimer();
float test = FCEIL(testValue);
fastTime += stopTimer() - timerAdjust;
startTimer();
float real = (float) ceil(testValue);
slowTime += stopTimer() - timerAdjust;
if (test != real) diffCount++;
}
fastTime = (int) ((float) fastTime / (float) count);
slowTime = (int) ((float) slowTime / (float) count);
printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
// ----------------------------------------------------------------------------
void testFCEIL2(const int count)
{
printf( " Testing FCEIL2..." );
unsigned int fastTime = 0, slowTime = 0, diffCount = 0;
for (int i = 0; i < count; i++)
{
float testValue = makeTestValue();
startTimer();
float test = FCEIL2(testValue);
fastTime += stopTimer() - timerAdjust;
startTimer();
float real = (float) ceil(testValue);
slowTime += stopTimer() - timerAdjust;
if (test != real) diffCount++;
}
fastTime = (int) ((float) fastTime / (float) count);
slowTime = (int) ((float) slowTime / (float) count);
printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
// ----------------------------------------------------------------------------
void testFFRAC(const int count)
{
printf( " Testing FFRAC...." );
unsigned int fastTime = 0, slowTime = 0, diffCount = 0;
for (int i = 0; i < count; i++)
{
float testValue = makeTestValue();
startTimer();
float test = FFRAC(testValue);
fastTime += stopTimer() - timerAdjust;
startTimer();
float real = testValue - (float) floor((double) testValue);
slowTime += stopTimer() - timerAdjust;
if (test != real) diffCount++;
}
fastTime = (int) ((float) fastTime / (float) count);
slowTime = (int) ((float) slowTime / (float) count);
printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
// ----------------------------------------------------------------------------
void testFFRAC2(const int count)
{
printf( " Testing FFRAC2..." );
unsigned int fastTime = 0, slowTime = 0, diffCount = 0;
for (int i = 0; i < count; i++)
{
float testValue = makeTestValue();
startTimer();
float test = FFRAC2(testValue);
fastTime += stopTimer() - timerAdjust;
startTimer();
float real = testValue - (float) floor((double) testValue);
slowTime += stopTimer() - timerAdjust;
if (test != real) diffCount++;
}
fastTime = (int) ((float) fastTime / (float) count);
slowTime = (int) ((float) slowTime / (float) count);
printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
// ----------------------------------------------------------------------------
void testSUB_PIX(const int count)
{
printf( " Testing SUB_PIX.." );
unsigned int fastTime = 0, slowTime = 0, diffCount = 0;
for (int i = 0; i < count; i++)
{
float testValue = makeTestValue();
startTimer();
float test = SUB_PIX(testValue);
fastTime += stopTimer() - timerAdjust;
startTimer();
float real = (float) ceil((double) testValue) - testValue;
slowTime += stopTimer() - timerAdjust;
if (test != real) diffCount++;
}
fastTime = (int) ((float) fastTime / (float) count);
slowTime = (int) ((float) slowTime / (float) count);
printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
// ----------------------------------------------------------------------------
void testSUB_PIX2(const int count)
{
printf( " Testing SUB_PIX2." );
unsigned int fastTime = 0, slowTime = 0, diffCount = 0;
for (int i = 0; i < count; i++)
{
float testValue = makeTestValue();
startTimer();
float test = SUB_PIX2(testValue);
fastTime += stopTimer() - timerAdjust;
startTimer();
float real = (float) ceil((double) testValue) - testValue;
slowTime += stopTimer() - timerAdjust;
if (test != real) diffCount++;
}
fastTime = (int) ((float) fastTime / (float) count);
slowTime = (int) ((float) slowTime / (float) count);
printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
// ----------------------------------------------------------------------------
void testIS_NEG(const int count)
{
printf( " Testing IS_NEG..." );
unsigned int fastTime = 0, slowTime = 0, diffCount = 0;
for (int i = 0; i < count; i++)
{
float testValue = makeTestValue();
startTimer();
bool test = IS_NEG(testValue);
fastTime += stopTimer() - timerAdjust;
startTimer();
bool real = testValue < 0.0f;
slowTime += stopTimer() - timerAdjust;
if (test != real && *(int *) &testValue != 0x80000000) diffCount++;
}
fastTime = (int) ((float) fastTime / (float) count);
slowTime = (int) ((float) slowTime / (float) count);
printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
// ----------------------------------------------------------------------------
void testIS_LESS(const int count)
{
printf( " Testing IS_LESS.." );
unsigned int fastTime = 0, slowTime = 0, diffCount = 0;
for (int i = 0; i < count; i++)
{
float testValue1 = makeTestValue();
float testValue2 = makeTestValue();
if (*(int *) &testValue1 == 0x80000000) testValue1 = 0.0f;
if (*(int *) &testValue2 == 0x80000000) testValue2 = 0.0f;
if (testValue1 < 0.0f) testValue1 = -testValue1;
if (testValue2 < 0.0f) testValue2 = -testValue2;
startTimer();
bool test = IS_LESS(testValue1, testValue2);
fastTime += stopTimer() - timerAdjust;
startTimer();
bool real = testValue1 < testValue2;
slowTime += stopTimer() - timerAdjust;
if (test != real) diffCount++;
}
fastTime = (int) ((float) fastTime / (float) count);
slowTime = (int) ((float) slowTime / (float) count);
printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
// ----------------------------------------------------------------------------
void testIS_LEQUAL(const int count)
{
printf( " Testing IS_LEQ..." );
unsigned int fastTime = 0, slowTime = 0, diffCount = 0;
for (int i = 0; i < count; i++)
{
float testValue1 = makeTestValue();
float testValue2 = makeTestValue();
if (*(int *) &testValue1 == 0x80000000) testValue1 = 0.0f;
if (*(int *) &testValue2 == 0x80000000) testValue2 = 0.0f;
if (testValue1 < 0.0f) testValue1 = -testValue1;
if (testValue2 < 0.0f) testValue2 = -testValue2;
startTimer();
bool test = IS_LEQUAL(testValue1, testValue2);
fastTime += stopTimer() - timerAdjust;
startTimer();
bool real = testValue1 <= testValue2;
slowTime += stopTimer() - timerAdjust;
if (test != real) diffCount++;
}
fastTime = (int) ((float) fastTime / (float) count);
slowTime = (int) ((float) slowTime / (float) count);
printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
// ----------------------------------------------------------------------------
void testIS_GREATER(const int count)
{
printf( " Testing IS_GREAT." );
unsigned int fastTime = 0, slowTime = 0, diffCount = 0;
for (int i = 0; i < count; i++)
{
float testValue1 = makeTestValue();
float testValue2 = makeTestValue();
if (*(int *) &testValue1 == 0x80000000) testValue1 = 0.0f;
if (*(int *) &testValue2 == 0x80000000) testValue2 = 0.0f;
if (testValue1 < 0.0f) testValue1 = -testValue1;
if (testValue2 < 0.0f) testValue2 = -testValue2;
startTimer();
bool test = IS_GREATER(testValue1, testValue2);
fastTime += stopTimer() - timerAdjust;
startTimer();
bool real = testValue1 > testValue2;
slowTime += stopTimer() - timerAdjust;
if (test != real) diffCount++;
}
fastTime = (int) ((float) fastTime / (float) count);
slowTime = (int) ((float) slowTime / (float) count);
printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
// ----------------------------------------------------------------------------
void testIS_GEQUAL(const int count)
{
printf( " Testing IS_GEQ..." );
unsigned int fastTime = 0, slowTime = 0, diffCount = 0;
for (int i = 0; i < count; i++)
{
float testValue1 = makeTestValue();
float testValue2 = makeTestValue();
if (*(int *) &testValue1 == 0x80000000) testValue1 = 0.0f;
if (*(int *) &testValue2 == 0x80000000) testValue2 = 0.0f;
if (testValue1 < 0.0f) testValue1 = -testValue1;
if (testValue2 < 0.0f) testValue2 = -testValue2;
startTimer();
bool test = IS_GEQUAL(testValue1, testValue2);
fastTime += stopTimer() - timerAdjust;
startTimer();
bool real = testValue1 >= testValue2;
slowTime += stopTimer() - timerAdjust;
if (test != real) diffCount++;
}
fastTime = (int) ((float) fastTime / (float) count);
slowTime = (int) ((float) slowTime / (float) count);
printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
// ----------------------------------------------------------------------------
void testFTOX24(const int count)
{
printf( " Testing FTOX24..." );
unsigned int fastTime = 0, slowTime = 0, diffCount = 0;
for (int i = 0; i < count; i++)
{
enum {MAX_TESTVAL = RAND_MAX * RAND_MAX};
float testValue = (makeTestValue() / (float) MAX_TESTVAL) * 128.0f;
startTimer();
int test = FTOX24(testValue);
fastTime += stopTimer() - timerAdjust;
startTimer();
int real = (int) (testValue * (float) 0x1000000);
slowTime += stopTimer() - timerAdjust;
if (ABS(test - real) > 1) diffCount++;
}
fastTime = (int) ((float) fastTime / (float) count);
slowTime = (int) ((float) slowTime / (float) count);
printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
// ----------------------------------------------------------------------------
void testFTOX242(const int count)
{
printf( " Testing FTOX242.." );
unsigned int fastTime = 0, slowTime = 0, diffCount = 0;
for (int i = 0; i < count; i++)
{
enum {MAX_TESTVAL = RAND_MAX * RAND_MAX};
float testValue = (makeTestValue() / (float) MAX_TESTVAL) * 128.0f;
startTimer();
int test = FTOX242(testValue);
fastTime += stopTimer() - timerAdjust;
startTimer();
int real = (int) (testValue * (float) 0x1000000);
slowTime += stopTimer() - timerAdjust;
if (test != real) diffCount++;
}
fastTime = (int) ((float) fastTime / (float) count);
slowTime = (int) ((float) slowTime / (float) count);
printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
// ----------------------------------------------------------------------------
void testFTOX16(const int count)
{
printf( " Testing FTOX16..." );
unsigned int fastTime = 0, slowTime = 0, diffCount = 0;
for (int i = 0; i < count; i++)
{
enum {MAX_TESTVAL = RAND_MAX * RAND_MAX};
float testValue = (makeTestValue() / (float) MAX_TESTVAL) * 32767.0f;
startTimer();
int test = FTOX16(testValue);
fastTime += stopTimer() - timerAdjust;
startTimer();
int real = (int) (testValue * (float) 0x10000);
slowTime += stopTimer() - timerAdjust;
if (ABS(test - real) > 1) diffCount++;
}
fastTime = (int) ((float) fastTime / (float) count);
slowTime = (int) ((float) slowTime / (float) count);
printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
// ----------------------------------------------------------------------------
void testFTOX162(const int count)
{
printf( " Testing FTOX162.." );
unsigned int fastTime = 0, slowTime = 0, diffCount = 0;
for (int i = 0; i < count; i++)
{
enum {MAX_TESTVAL = RAND_MAX * RAND_MAX};
float testValue = (makeTestValue() / (float) MAX_TESTVAL) * 32767.0f;
startTimer();
int test = FTOX162(testValue);
fastTime += stopTimer() - timerAdjust;
startTimer();
int real = (int) (testValue * (float) 0x10000);
slowTime += stopTimer() - timerAdjust;
if (test != real) diffCount++;
}
fastTime = (int) ((float) fastTime / (float) count);
slowTime = (int) ((float) slowTime / (float) count);
printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
// ----------------------------------------------------------------------------
void testFTOX8(const int count)
{
printf( " Testing FTOX8...." );
unsigned int fastTime = 0, slowTime = 0, diffCount = 0;
for (int i = 0; i < count; i++)
{
enum {MAX_TESTVAL = RAND_MAX * RAND_MAX};
float testValue = (makeTestValue() / (float) MAX_TESTVAL) * 8000000.0f;
startTimer();
int test = FTOX8(testValue);
fastTime += stopTimer() - timerAdjust;
startTimer();
int real = (int) (testValue * (float) 0x100);
slowTime += stopTimer() - timerAdjust;
if (ABS(test - real) > 1) diffCount++;
}
fastTime = (int) ((float) fastTime / (float) count);
slowTime = (int) ((float) slowTime / (float) count);
printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
// ----------------------------------------------------------------------------
void testFTOX82(const int count)
{
printf( " Testing FTOX82..." );
unsigned int fastTime = 0, slowTime = 0, diffCount = 0;
for (int i = 0; i < count; i++)
{
enum {MAX_TESTVAL = RAND_MAX * RAND_MAX};
float testValue = (makeTestValue() / (float) MAX_TESTVAL) * 8000000.0f;
startTimer();
int test = FTOX82(testValue);
fastTime += stopTimer() - timerAdjust;
startTimer();
int real = (int) (testValue * (float) 0x100);
slowTime += stopTimer() - timerAdjust;
if (test != real) diffCount++;
}
fastTime = (int) ((float) fastTime / (float) count);
slowTime = (int) ((float) slowTime / (float) count);
printf( "Done. %s ticks: %d vs %d\n", diffCount ? "ERROR!":"OK", fastTime, slowTime);
}
// ----------------------------------------------------------------------------
void testMemSpeed(const int count)
{
printf( " Testing mem spd.." );
unsigned int fastTime = 0, slowTime = 0, flushTime = 0;
for (int i = 0; i < count; i++)
{
startTimer();
readRead();
fastTime += stopTimer() - timerAdjust;
startTimer();
writeRead();
slowTime += stopTimer() - timerAdjust;
startTimer();
flushWriteBack();
flushTime += stopTimer() - timerAdjust;
}
fastTime = (int) ((float) fastTime / (float) count);
slowTime = (int) ((float) slowTime / (float) count);
flushTime = (int) ((float) flushTime / (float) count);
printf( "Done. r/r ticks: %d w/r ticks: %d flush: %d\n", fastTime, slowTime, flushTime);
}
// ----------------------------------------------------------------------------
void main(int argc, char *argv[])
{
setbuf(stdout, 0);
// Warn the user about the /op option
#ifndef __SW_OP
printf( "---------------------------------------------------------\n" );
printf( "\aThis program was not compiled with /op. You can expect\n" );
printf( "problems since Watcom is less accurate without /op.\n " );
printf( "---------------------------------------------------------\n" );
printf( "\n" );
#endif
// Init the flaoting point routines
initFPU();
// Calculate the timer routine overhead
printf( "Syncronizing the timer..." );
int timerTestCount = 1000000;
for (int i = 0; i < timerTestCount; i++)
{
startTimer();
timerAdjust += stopTimer();
}
timerAdjust = (int) ((float) timerAdjust / (float) timerTestCount);
printf( "Done. Timer adjustment is %d cycles.\n", timerAdjust );
// Prime the random number generator
srand(time(NULL));
// Get the run count
int count = 1000000;
if (argc > 1) count = atoi(argv[1]);
// Start testing
printf( "Testing each routine with %d iterations.\n", count );
printf( "\nFastest routines:\n" );
testICHOP(count);
testFCHOP(count);
testIFLOOR(count);
testFFLOOR(count);
testICEIL(count);
testFCEIL(count);
testFFRAC(count);
testSUB_PIX(count);
testIS_NEG(count);
testIS_LESS(count);
testIS_LEQUAL(count);
testIS_GREATER(count);
testIS_GEQUAL(count);
testFTOX24(count);
testFTOX16(count);
testFTOX8(count);
printf( "\nMedium-speed routines:\n" );
testICHOP2(count);
testFCHOP2(count);
testIFLOOR2(count);
testFFLOOR2(count);
testICEIL2(count);
testFCEIL2(count);
testFFRAC2(count);
testSUB_PIX2(count);
testFTOX242(count);
testFTOX162(count);
testFTOX82(count);
printf( "\nSlowest routines:\n" );
testIFLOOR3(count);
printf( "\nTesting memory:\n" );
testMemSpeed(count);
}
// ----------------------------------------------------------------------------
|