8turbo is really turning into a real 8turbo

This commit is contained in:
cnlohr 2019-04-29 01:28:52 -04:00
parent 1432f22b77
commit cd56e249bc
2 changed files with 180 additions and 244 deletions

View file

@ -1,3 +1,5 @@
//NOTE DO NOT EDIT THIS FILE WITHOUT ALSO EDITING DFT8TURBO!!!
#include <stdint.h> #include <stdint.h>
#include <stdlib.h> #include <stdlib.h>
#include "DFT12Small.h" #include "DFT12Small.h"
@ -32,9 +34,6 @@
#define FINAL_DECIMATE (1) #define FINAL_DECIMATE (1)
#endif #endif
//Right now, we need 8*freqs*octaves bytes.
//This is bad.
//What can we do to fix it?
//4x the hits (sin/cos and we need to do it once for each edge) //4x the hits (sin/cos and we need to do it once for each edge)
//8x for selecting a higher octave. //8x for selecting a higher octave.
@ -199,116 +198,104 @@ void Small12BitRun( int8_t adcval )
if( adcv < -128 ) adcv = -128; if( adcv < -128 ) adcv = -128;
running_integral += adcv>>INITIAL_DECIMATE; running_integral += adcv>>INITIAL_DECIMATE;
#define dprintf( ... )
uint32_t action = actiontable[actiontableplace++]; uint32_t action = actiontable[actiontableplace++];
int n; int n;
dprintf( "%4d ", actiontableplace ); for( n = 0; n < MAX_FREQS; n++, action>>=1 )
for( n = 0; n < MAX_FREQS; n++ )
{ {
if( action & (1<<n) ) if( !( action & 1 ) ) continue;
int ao = which_octave_for_op[n];
ao++;
if( ao >= NR_OF_OPS ) ao = 0;
which_octave_for_op[n] = ao;
int op = optable[ao];
if( op == 255 )
continue;
//int octaveplace = op & 0xf;
//Tricky: We share the integral with SIN and COS.
//We don't need to. It would produce a slightly cleaner signal. See: NOTE 3
uint8_t octave = op & 0xf;
uint8_t intindex = octave * MAX_FREQS + n;
//int invoct = OCTAVES-1-octaveplace;
int16_t diff;
if( op & 0x10 ) //ADD
{ {
int ao = which_octave_for_op[n]; diff = integral_at[intindex] - running_integral;
int op = optable[ao]; }
ao++; else //SUBTRACT
if( ao >= NR_OF_OPS ) ao = 0; {
which_octave_for_op[n] = ao; diff = running_integral - integral_at[intindex];
}
if( op == 255 ) integral_at[intindex] = running_integral;
{
dprintf( "*" ); //NOP
}
else
{
//int octaveplace = op & 0xf;
//Tricky: We share the integral with SIN and COS.
//We don't need to. It would produce a slightly cleaner signal. See: NOTE 3
uint8_t octave = op & 0xf;
uint8_t intindex = octave * MAX_FREQS + n;
//int invoct = OCTAVES-1-octaveplace;
int16_t diff;
if( op & 0x10 ) //ADD
{
diff = integral_at[intindex] - running_integral;
dprintf( "%c", 'a' + (op & 0xf) );
}
else //SUBTRACT
{
diff = running_integral - integral_at[intindex];
dprintf( "%c", 'A' + (op & 0xf) );
}
integral_at[intindex] = running_integral;
#ifdef TWELVEBIT #ifdef TWELVEBIT
if( diff > 2000 || diff < -2000 ) printf( "!!!!!!!!!!!! %d !!!!!!!!!!!\n", diff ); if( diff > 2000 || diff < -2000 ) printf( "!!!!!!!!!!!! %d !!!!!!!!!!!\n", diff );
#elif defined( EIGHTBIT ) #elif defined( EIGHTBIT )
if( diff > 124 || diff < -124 ) printf( "!!!!!!!!!!!! %d !!!!!!!!!!!\n", diff ); if( diff > 124 || diff < -124 ) printf( "!!!!!!!!!!!! %d !!!!!!!!!!!\n", diff );
#endif #endif
uint8_t idx = ( intindex << 1 ); //uint8_t idx = ( intindex << 1 );
if( op&(1<<6) ) intindex<<=1;
{
idx |= 1;
}
//printf( "%d: %d + %d * %d >> 8 - %d\n", idx, cossindata[idx], diff, mulmux[idx/2], cossindata[idx]>>4 ); if( op&(1<<6) )
{
intindex |= 1;
}
uint8_t mulmuxval = mulmux[n]; //printf( "%d: %d + %d * %d >> 8 - %d\n", intindex, cossindata[intindex], diff, mulmux[intindex/2], cossindata[intindex]>>4 );
uint8_t mulmuxval = mulmux[n];
//Do you live on a super lame processor? {NOTE 4} //Do you live on a super lame processor? {NOTE 4}
//If you do, you might not have good signed multiply operations. So, an alternative mechanism is found here. //If you do, you might not have good signed multiply operations. So, an alternative mechanism is found here.
// +) Able to more cleanly crush to an 8-bit multiply. // +) Able to more cleanly crush to an 8-bit multiply.
// +) Gets extra bit of precision back, i.e. the sign bit is now used as a data bit. // +) Gets extra bit of precision back, i.e. the sign bit is now used as a data bit.
// -) More than 1 line of C code. Requires possible double invert. // -) More than 1 line of C code. Requires possible double invert.
#if 1 #if 1
//Terrible processor, i.e. PMS133 //Terrible processor, i.e. PMS133
if( 0 && diff < 0 ) if( 0 && diff < 0 )
{ {
diff *= -1; diff *= -1;
diff >>= (OCTAVES-1-octave); diff >>= (OCTAVES-1-octave);
if( diff > 250 ) printf( "!!!!!!!**** %d ****!!!!!!!\n", diff ); if( diff > 250 ) printf( "!!!!!!!**** %d ****!!!!!!!\n", diff );
diff = (uint16_t)diff * (uint16_t)mulmuxval; diff = (uint16_t)diff * (uint16_t)mulmuxval;
diff >>= INTEGRATOR_DECIMATE; diff >>= INTEGRATOR_DECIMATE;
diff *= -1; diff *= -1;
}
else
{
diff >>= (OCTAVES-1-octave);
if( diff > 250 ) printf( "!!!!!!!**** %d ****!!!!!!!\n", diff );
diff = (uint16_t)diff * (uint16_t)mulmuxval;
diff >>= INTEGRATOR_DECIMATE;
}
#else
//Decent processor, i.e. ATTiny85.
diff = ((diff>>(OCTAVES-1-octave)) * mulmuxval ) >> 6;
#endif
cossindata[idx] = cossindata[idx]
+ diff
- (cossindata[idx]>>4)
;
#ifdef EIGHTBIT
if( cossindata[idx] > 0 ) cossindata[idx]--;
if( cossindata[idx] < 0 ) cossindata[idx]++;
#endif
}
} }
else else
{ {
dprintf( " " ); diff >>= (OCTAVES-1-octave);
}
if( diff > 250 ) printf( "!!!!!!!**** %d ****!!!!!!!\n", diff );
diff = (uint16_t)diff * (uint16_t)mulmuxval;
diff >>= INTEGRATOR_DECIMATE;
}
#else
//Decent processor, i.e. ATTiny85.
diff = ((diff>>(OCTAVES-1-octave)) * mulmuxval ) >> 6;
#endif
cossindata[intindex] = cossindata[intindex]
+ diff
- (cossindata[intindex]>>4)
;
#ifdef EIGHTBIT
if( cossindata[intindex] > 0 ) cossindata[intindex]--;
if( cossindata[intindex] < 0 ) cossindata[intindex]++;
#endif
} }
dprintf( "\n" );
} }

View file

@ -1,3 +1,5 @@
//NOTE DO NOT EDIT THIS FILE WITHOUT ALSO EDITING DFT12SMALL!!!
#include <stdint.h> #include <stdint.h>
#include <stdlib.h> #include <stdlib.h>
#include "DFT8Turbo.h" #include "DFT8Turbo.h"
@ -5,7 +7,6 @@
#include <stdio.h> #include <stdio.h>
#define MAX_FREQS (12) #define MAX_FREQS (12)
#define OCTAVES (4) #define OCTAVES (4)
@ -15,67 +16,24 @@
You should test with extreme cases, like square wave sweeps in, etc. You should test with extreme cases, like square wave sweeps in, etc.
*/ */
//#define TWELVEBIT
#define EIGHTBIT
#ifdef TWELVEBIT
//No larger than 12-bit signed values for integration or sincos
#define FRONTEND_AMPLITUDE (0)
#define INITIAL_DECIMATE (2)
#define INTEGRATOR_DECIMATE (8)
#define FINAL_DECIMATE (4)
#elif defined( EIGHTBIT )
//No larger than 8-bit signed values for integration or sincos //No larger than 8-bit signed values for integration or sincos
#define FRONTEND_AMPLITUDE (2) #define FRONTEND_AMPLITUDE (2)
#define INITIAL_DECIMATE (5) //Yurgh... only 3 bits of ADC data. That's 8 unique levels :( #define INITIAL_DECIMATE (5) //Yurgh... only 3 bits of ADC data. That's 8 unique levels :(
#define INTEGRATOR_DECIMATE (8) #define INTEGRATOR_DECIMATE (8)
#define FINAL_DECIMATE (1) #define FINAL_DECIMATE (1)
#endif
//Right now, we need 8*freqs*octaves bytes.
//This is bad. #define OPTABLETYPE uint16_t //Make uint8_t if on attiny.
//What can we do to fix it?
//4x the hits (sin/cos and we need to do it once for each edge) //4x the hits (sin/cos and we need to do it once for each edge)
//8x for selecting a higher octave. //8x for selecting a higher octave.
#define FREQREBASE 8.0 #define FREQREBASE 8.0
#define TARGFREQ 10000.0 #define TARGFREQ 10000.0
/* Tradeoff guide:
* We will optimize for RAM size here.
* INITIAL_DECIMATE; A larger decimation: {NOTE 1}
+) Reduces the bit depth needed for the integral map.
If you use "1" and a fully saturted map (highest note is every sample), it will not overflow a signed 12-bit number.
-) Increases noise.
With full-scale: 0->1 minimal 1->2 minimal 2->3 significantly noticable, 3->4 major.
If sound is quieter, it matters more. Not sure with other changes in system. (2) seems ok.
-) If you make it (1) or (0) You can't do an 8-bit multiply and keep the output in a signed range.
Also, other things, like frequency of hits can manipulate the maximum bit depth needed for integral map.
* If you weight the bins in advance see "mulmux", you can: {NOTE 2}
+) potentially use shallower bit depth but
-) have to compute the multiply every time you update the bin.
* You can use a modified-square-wave which only integrates for 1/2 of the duty cycle. {NOTE 3}
+) uses 1/2 the integral memory.
-) Not as pretty of an output. See "integral_at"
*TODO: Investigate using all unsigned (to make multiply and/or 12-bit storage easier)
*TODO: Consider a mode which has 16-bit integrals, but still 8-bit cossin data.
So, the idea here is we would keep a running total of the current ADC value, kept away in a int16_t.
It is constantly summing, so we can take an integral of it. Or rather an integral range.
Over time, we perform operations like adding or subtracting from a current place. It basically is
a DFT where the kernel is computed using square waves (or modified square waves)
*/
//These live in RAM. //These live in RAM.
int16_t running_integral; //Realistically treat as 12-bits on ramjet8 int8_t running_integral; //Realistically treat as 12-bits on ramjet8
int16_t integral_at[MAX_FREQS*OCTAVES]; //For ramjet8, make 12-bits int8_t integral_at[MAX_FREQS*OCTAVES]; //For ramjet8, make 12-bits
int32_t cossindata[MAX_FREQS*OCTAVES*2]; //Contains COS and SIN data. (32-bit for now, will be 16-bit, potentially even 8.) int8_t cossindata[MAX_FREQS*OCTAVES*2]; //Contains COS and SIN data. (32-bit for now, will be 16-bit, potentially even 8.)
uint8_t which_octave_for_op[MAX_FREQS]; //counts up, tells you which ocative you are operating on. PUT IN RAM. uint8_t which_octave_for_op[MAX_FREQS]; //counts up, tells you which ocative you are operating on. PUT IN RAM.
uint8_t actiontableplace; uint8_t actiontableplace;
@ -84,13 +42,13 @@ uint8_t actiontableplace;
// 255 = DO NOT OPERATE // 255 = DO NOT OPERATE
// bits 0..3 unfolded octave, i.e. sin/cos are offset by one. // bits 0..3 unfolded octave, i.e. sin/cos are offset by one.
// bit 4 = add or subtract. // bit 4 = add or subtract.
uint8_t optable[NR_OF_OPS]; //PUT IN FLASH OPTABLETYPE optable[NR_OF_OPS]; //PUT IN FLASH
#define ACTIONTABLESIZE 256 #define ACTIONTABLESIZE 256
uint16_t actiontable[ACTIONTABLESIZE]; //PUT IN FLASH // If there are more than 8 freqbins, this must be a uint16_t, otherwise if more than 16, 32. uint16_t actiontable[ACTIONTABLESIZE]; //PUT IN FLASH // If there are more than 8 freqbins, this must be a uint16_t, otherwise if more than 16, 32.
//Format is //Format is
uint8_t mulmux[MAX_FREQS]; //PUT IN FLASH OPTABLETYPE mulmux[MAX_FREQS]; //PUT IN FLASH
static int Setup( float * frequencies, int bins ) static int Setup( float * frequencies, int bins )
{ {
@ -199,117 +157,112 @@ void Turbo8BitRun( int8_t adcval )
if( adcv < -128 ) adcv = -128; if( adcv < -128 ) adcv = -128;
running_integral += adcv>>INITIAL_DECIMATE; running_integral += adcv>>INITIAL_DECIMATE;
#define dprintf( ... ) uint16_t action = actiontable[actiontableplace++];
uint8_t n;
uint32_t action = actiontable[actiontableplace++]; //Counts are approximate counts for PMS133
int n;
dprintf( "%4d ", actiontableplace ); for( n = 0; //1CYC
for( n = 0; n < MAX_FREQS; n++ ) n < MAX_FREQS; //2CYC
n++, //1CYC
action>>=1 //2CYC
)
{ {
if( action & (1<<n) ) //Everything inside this loop is executed ~3/4 * MAX_FREQS. so.. ~9x.
//If op @ 4MHz, we get 44 cycles in here.
//If no operation is scheduled, continue.
if( !( action & 1 ) ) continue; //1CYC
uint8_t ao = which_octave_for_op[n]; //4CYC
ao++; //1CYC
if( ao >= NR_OF_OPS ) ao = 0; //2CYC
which_octave_for_op[n] = ao; //2CYC (idxm)
uint8_t op = optable[ao]; //"theoretically" 3CYC (if you align things right)
//1CYC (Put A into specific RAM location)
//If we are on the one thing we aren't supposed to operate within, cancel.
if( op == 255 ) continue; //2CYC (if op is in A)
//Tricky: We share the integral with SIN and COS.
//We don't need to. It would produce a slightly cleaner signal. See: NOTE 3
uint8_t octave = op & 0xf; //1CYC (if op is in A)
uint8_t intindex = octave * MAX_FREQS //Load mulop with 12 [2CYC]; mul [1CYC]
+ n; //Add [1CYC]
//[1CYC] more cycle to write A into RAM[(intindex)
//int invoct = OCTAVES-1-octaveplace;
int8_t diff;
if( op & 0x10 ) //ADD //2CYC
{ {
int ao = which_octave_for_op[n]; diff = integral_at[intindex] //Assume "IntIndex" is in A, add integral_at to A [1], move A to an index [1]. [2] to read into acc. [4CYC]
int op = optable[ao]; - running_integral; //1CYC to subtract.
ao++; //1CYC to write diff into a memory location.
if( ao >= NR_OF_OPS ) ao = 0; }
which_octave_for_op[n] = ao; else //SUBTRACT
{
diff = running_integral - integral_at[intindex];
}
if( op == 255 ) //30 cycles so far.
{
dprintf( "*" ); //NOP
}
else
{
//int octaveplace = op & 0xf;
//Tricky: We share the integral with SIN and COS. integral_at[intindex] = running_integral; //[3CYC]
//We don't need to. It would produce a slightly cleaner signal. See: NOTE 3
uint8_t octave = op & 0xf;
uint8_t intindex = octave * MAX_FREQS + n;
//int invoct = OCTAVES-1-octaveplace; //if( diff > 124 || diff < -124 ) printf( "!!!!!!!!!!!! %d !!!!!!!!!!!\n", diff );
int16_t diff;
//uint8_t idx = ( intindex << 1 ); //Overwrite intindex.
intindex <<= 1; //1CYC
if( op & 0x10 ) //ADD if( op&(1<<6) ) //2CYC
{ {
diff = integral_at[intindex] - running_integral; intindex |= 1; //1CYC
dprintf( "%c", 'a' + (op & 0xf) ); }
}
else //SUBTRACT
{
diff = running_integral - integral_at[intindex];
dprintf( "%c", 'A' + (op & 0xf) );
}
integral_at[intindex] = running_integral; uint8_t mulmuxval = mulmux[n]; //[4CYC]
#ifdef TWELVEBIT
if( diff > 2000 || diff < -2000 ) printf( "!!!!!!!!!!!! %d !!!!!!!!!!!\n", diff );
#elif defined( EIGHTBIT )
if( diff > 124 || diff < -124 ) printf( "!!!!!!!!!!!! %d !!!!!!!!!!!\n", diff );
#endif
uint8_t idx = ( intindex << 1 );
if( op&(1<<6) )
{
idx |= 1;
}
//printf( "%d: %d + %d * %d >> 8 - %d\n", idx, cossindata[idx], diff, mulmux[idx/2], cossindata[idx]>>4 );
uint8_t mulmuxval = mulmux[n];
//Do you live on a super lame processor? {NOTE 4} //Do you live on a super lame processor? {NOTE 4}
//If you do, you might not have good signed multiply operations. So, an alternative mechanism is found here. //If you do, you might not have good signed multiply operations. So, an alternative mechanism is found here.
// +) Able to more cleanly crush to an 8-bit multiply. // +) Able to more cleanly crush to an 8-bit multiply.
// +) Gets extra bit of precision back, i.e. the sign bit is now used as a data bit. // +) Gets extra bit of precision back, i.e. the sign bit is now used as a data bit.
// -) More than 1 line of C code. Requires possible double invert. // -) More than 1 line of C code. Requires possible double invert.
#if 1 #if 1
//Terrible processor, i.e. PMS133 //rough processor, i.e. PMS133
if( 0 && diff < 0 ) if( diff < 0 ) //[2CYC]
{ {
diff *= -1; diff *= -1; //[1CYC]
diff >>= (OCTAVES-1-octave); diff >>= (OCTAVES-1-octave); // ???TRICKY???
//if( diff > 250 ) printf( "!!!!!!!**** %d ****!!!!!!!\n", diff );
if( diff > 250 ) printf( "!!!!!!!**** %d ****!!!!!!!\n", diff ); diff = ((uint16_t)diff * (uint16_t)mulmuxval)>>INTEGRATOR_DECIMATE; //[3CYC]
diff *= -1; //[1CYC]
diff = (uint16_t)diff * (uint16_t)mulmuxval;
diff >>= INTEGRATOR_DECIMATE;
diff *= -1;
}
else
{
diff >>= (OCTAVES-1-octave);
if( diff > 250 ) printf( "!!!!!!!**** %d ****!!!!!!!\n", diff );
diff = (uint16_t)diff * (uint16_t)mulmuxval;
diff >>= INTEGRATOR_DECIMATE;
}
#else
//Decent processor, i.e. ATTiny85.
diff = ((diff>>(OCTAVES-1-octave)) * mulmuxval ) >> 6;
#endif
cossindata[idx] = cossindata[idx]
+ diff
- (cossindata[idx]>>4)
;
#ifdef EIGHTBIT
if( cossindata[idx] > 0 ) cossindata[idx]--;
if( cossindata[idx] < 0 ) cossindata[idx]++;
#endif
}
} }
else else
{ {
dprintf( " " ); diff >>= (OCTAVES-1-octave);
} //if( diff > 250 ) printf( "!!!!!!!**** %d ****!!!!!!!\n", diff );
} diff = ((uint16_t)diff * (uint16_t)mulmuxval)>>INTEGRATOR_DECIMATE;
dprintf( "\n" ); }
//@48 cycles :( :( :(
#else
//Decent processor, i.e. ATTiny85.
diff = ((diff>>(OCTAVES-1-octave)) * mulmuxval ) >> 6;
#endif
//printf( "%d\n", diff );
cossindata[intindex] = cossindata[intindex]
+ diff
- (cossindata[intindex]>>4)
;
if( cossindata[intindex] > 0 ) cossindata[intindex]--;
if( cossindata[intindex] < 0 ) cossindata[intindex]++;
}
} }
@ -344,13 +297,9 @@ void DoDFT8BitTurbo( float * outbins, float * frequencies, int bins, const float
{ {
outbins[i] = sqrt((float)mux)/50.0; outbins[i] = sqrt((float)mux)/50.0;
#ifdef TWELVEBIT if( abs( cossindata[i*2+0] ) > 120 || abs( cossindata[i*2+1] ) > 120 )
if( abs( cossindata[i*2+0] ) > 1000 || abs( cossindata[i*2+1] ) > 1000 ) printf( "CS OVF %d/%d/%d/%f\n", i, cossindata[i*2+0], cossindata[i*2+1],outbins[i] );
printf( "CS OVF %d/%d/%d/%f\n", i, cossindata[i*2+0], cossindata[i*2+1],outbins[i] );
#elif defined( EIGHTBIT )
if( abs( cossindata[i*2+0] ) > 120 || abs( cossindata[i*2+1] ) > 120 )
printf( "CS OVF %d/%d/%d/%f\n", i, cossindata[i*2+0], cossindata[i*2+1],outbins[i] );
#endif
} }
} }
#endif #endif