Switch over to making 8Turbo *actually* turbo.

This commit is contained in:
cnlohr 2019-04-29 00:04:28 -04:00
parent 8677baebd3
commit 1432f22b77
4 changed files with 477 additions and 105 deletions

View file

@ -28,7 +28,6 @@ channels = 2
#10000 / 2^4{octaves} / 8 #10000 / 2^4{octaves} / 8
base_hz = 82.41 base_hz = 82.41
samplerate = 10000 samplerate = 10000
freqbins = 12 freqbins = 12
octaves = 4 octaves = 4

359
embeddedcommon/DFT12Small.c Normal file
View file

@ -0,0 +1,359 @@
#include <stdint.h>
#include <stdlib.h>
#include "DFT12Small.h"
#include <math.h>
#include <stdio.h>
#define MAX_FREQS (12)
#define OCTAVES (4)
/*
General procedure - use this code, with uint16_t or uint32_t buffers, and make sure none of the alarms go off.
All of the paths still require no more than an 8-bit multiply.
You should test with extreme cases, like square wave sweeps in, etc.
*/
//#define TWELVEBIT
#define EIGHTBIT
#ifdef TWELVEBIT
//No larger than 12-bit signed values for integration or sincos
#define FRONTEND_AMPLITUDE (0)
#define INITIAL_DECIMATE (2)
#define INTEGRATOR_DECIMATE (8)
#define FINAL_DECIMATE (4)
#elif defined( EIGHTBIT )
//No larger than 8-bit signed values for integration or sincos
#define FRONTEND_AMPLITUDE (2)
#define INITIAL_DECIMATE (5) //Yurgh... only 3 bits of ADC data. That's 8 unique levels :(
#define INTEGRATOR_DECIMATE (8)
#define FINAL_DECIMATE (1)
#endif
//Right now, we need 8*freqs*octaves bytes.
//This is bad.
//What can we do to fix it?
//4x the hits (sin/cos and we need to do it once for each edge)
//8x for selecting a higher octave.
#define FREQREBASE 8.0
#define TARGFREQ 10000.0
/* Tradeoff guide:
* We will optimize for RAM size here.
* INITIAL_DECIMATE; A larger decimation: {NOTE 1}
+) Reduces the bit depth needed for the integral map.
If you use "1" and a fully saturted map (highest note is every sample), it will not overflow a signed 12-bit number.
-) Increases noise.
With full-scale: 0->1 minimal 1->2 minimal 2->3 significantly noticable, 3->4 major.
If sound is quieter, it matters more. Not sure with other changes in system. (2) seems ok.
-) If you make it (1) or (0) You can't do an 8-bit multiply and keep the output in a signed range.
Also, other things, like frequency of hits can manipulate the maximum bit depth needed for integral map.
* If you weight the bins in advance see "mulmux", you can: {NOTE 2}
+) potentially use shallower bit depth but
-) have to compute the multiply every time you update the bin.
* You can use a modified-square-wave which only integrates for 1/2 of the duty cycle. {NOTE 3}
+) uses 1/2 the integral memory.
-) Not as pretty of an output. See "integral_at"
*TODO: Investigate using all unsigned (to make multiply and/or 12-bit storage easier)
*TODO: Consider a mode which has 16-bit integrals, but still 8-bit cossin data.
So, the idea here is we would keep a running total of the current ADC value, kept away in a int16_t.
It is constantly summing, so we can take an integral of it. Or rather an integral range.
Over time, we perform operations like adding or subtracting from a current place. It basically is
a DFT where the kernel is computed using square waves (or modified square waves)
*/
//These live in RAM.
int16_t running_integral; //Realistically treat as 12-bits on ramjet8
int16_t integral_at[MAX_FREQS*OCTAVES]; //For ramjet8, make 12-bits
int32_t cossindata[MAX_FREQS*OCTAVES*2]; //Contains COS and SIN data. (32-bit for now, will be 16-bit, potentially even 8.)
uint8_t which_octave_for_op[MAX_FREQS]; //counts up, tells you which ocative you are operating on. PUT IN RAM.
uint8_t actiontableplace;
#define NR_OF_OPS (4<<OCTAVES)
//Format is:
// 255 = DO NOT OPERATE
// bits 0..3 unfolded octave, i.e. sin/cos are offset by one.
// bit 4 = add or subtract.
uint8_t optable[NR_OF_OPS]; //PUT IN FLASH
#define ACTIONTABLESIZE 256
uint16_t actiontable[ACTIONTABLESIZE]; //PUT IN FLASH // If there are more than 8 freqbins, this must be a uint16_t, otherwise if more than 16, 32.
//Format is
uint8_t mulmux[MAX_FREQS]; //PUT IN FLASH
static int Setup( float * frequencies, int bins )
{
int i;
printf( "BINS: %d\n", bins );
float highestf = frequencies[MAX_FREQS-1];
for( i = 0; i < MAX_FREQS; i++ )
{
mulmux[i] = (uint8_t)( highestf / frequencies[i] * 255 + 0.5 );
printf( "MM: %d %f / %f\n", mulmux[i], frequencies[i], highestf );
}
for( i = bins-MAX_FREQS; i < bins; i++ )
{
int topbin = i - (bins-MAX_FREQS);
float f = frequencies[i]/FREQREBASE;
float hits_per_table = (float)ACTIONTABLESIZE/f;
int dhrpertable = (int)(hits_per_table+.5);//TRICKY: You might think you need to have even number of hits (sin/cos), but you don't! It can flip sin/cos each time through the table!
float err = (TARGFREQ/((float)ACTIONTABLESIZE/dhrpertable) - (float)TARGFREQ/f)/((float)TARGFREQ/f);
//Perform an op every X samples. How well does this map into units of 1024?
printf( "%d %f -> hits per %d: %f %d (%.2f%% error)\n", topbin, f, ACTIONTABLESIZE, (float)ACTIONTABLESIZE/f, dhrpertable, err * 100.0 );
if( dhrpertable >= ACTIONTABLESIZE )
{
fprintf( stderr, "Error: Too many hits.\n" );
exit(0);
}
float advance_per_step = dhrpertable/(float)ACTIONTABLESIZE;
float fvadv = 0.5;
int j;
int countset = 0;
//Tricky: We need to start fadv off at such a place that there won't be a hicchup when going back around to 0.
// I believe this is done by setting fvadv to 0.5 initially. Unsure.
for( j = 0; j < ACTIONTABLESIZE; j++ )
{
if( fvadv >= 0.5 )
{
actiontable[j] |= 1<<topbin;
fvadv -= 1.0;
countset++;
}
fvadv += advance_per_step;
}
printf( " countset: %d\n", countset );
}
//exit(1);
int phaseinop[OCTAVES] = { 0 };
int already_hit_octaveplace[OCTAVES*2] = { 0 };
for( i = 0; i < NR_OF_OPS; i++ )
{
int longestzeroes = 0;
int val = i & ((1<<OCTAVES)-1);
for( longestzeroes = 0; longestzeroes < 255 && ( ((val >> longestzeroes) & 1) == 0 ); longestzeroes++ );
//longestzeroes goes: 255, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, ...
//This isn't great, because we need to also know whether we are attacking the SIN side or the COS side, and if it's + or -.
//We can actually decide that out.
if( longestzeroes == 255 )
{
//This is a nop. Emit a nop.
optable[i] = 255;
}
else
{
longestzeroes = OCTAVES-1-longestzeroes; //Actually do octave 0 least often.
int iop = phaseinop[longestzeroes]++;
int toop = longestzeroes;
int toopmon = (longestzeroes<<1) | (iop & 1);
//if it's the first time an octave happened this set, flag it. This may be used later in the process.
if( !already_hit_octaveplace[toopmon] )
{
already_hit_octaveplace[toopmon] = 1;
toop |= 1<<5;
}
if( iop & 1 )
{
toop |= 1<<6;
}
//Handle add/subtract bit.
if( iop & 2 ) toop |= 1<<4;
optable[i] = toop;
//printf( " %d %d %d\n", iop, val, longestzeroes );
}
//printf( "HBT: %d = %d\n", i, optable[i] );
}
//exit(1);
return 0;
}
void Small12BitRun( int8_t adcval )
{
int16_t adcv = adcval;
adcv *= FRONTEND_AMPLITUDE;
if( adcv > 127 ) adcv = 127;
if( adcv < -128 ) adcv = -128;
running_integral += adcv>>INITIAL_DECIMATE;
#define dprintf( ... )
uint32_t action = actiontable[actiontableplace++];
int n;
dprintf( "%4d ", actiontableplace );
for( n = 0; n < MAX_FREQS; n++ )
{
if( action & (1<<n) )
{
int ao = which_octave_for_op[n];
int op = optable[ao];
ao++;
if( ao >= NR_OF_OPS ) ao = 0;
which_octave_for_op[n] = ao;
if( op == 255 )
{
dprintf( "*" ); //NOP
}
else
{
//int octaveplace = op & 0xf;
//Tricky: We share the integral with SIN and COS.
//We don't need to. It would produce a slightly cleaner signal. See: NOTE 3
uint8_t octave = op & 0xf;
uint8_t intindex = octave * MAX_FREQS + n;
//int invoct = OCTAVES-1-octaveplace;
int16_t diff;
if( op & 0x10 ) //ADD
{
diff = integral_at[intindex] - running_integral;
dprintf( "%c", 'a' + (op & 0xf) );
}
else //SUBTRACT
{
diff = running_integral - integral_at[intindex];
dprintf( "%c", 'A' + (op & 0xf) );
}
integral_at[intindex] = running_integral;
#ifdef TWELVEBIT
if( diff > 2000 || diff < -2000 ) printf( "!!!!!!!!!!!! %d !!!!!!!!!!!\n", diff );
#elif defined( EIGHTBIT )
if( diff > 124 || diff < -124 ) printf( "!!!!!!!!!!!! %d !!!!!!!!!!!\n", diff );
#endif
uint8_t idx = ( intindex << 1 );
if( op&(1<<6) )
{
idx |= 1;
}
//printf( "%d: %d + %d * %d >> 8 - %d\n", idx, cossindata[idx], diff, mulmux[idx/2], cossindata[idx]>>4 );
uint8_t mulmuxval = mulmux[n];
//Do you live on a super lame processor? {NOTE 4}
//If you do, you might not have good signed multiply operations. So, an alternative mechanism is found here.
// +) Able to more cleanly crush to an 8-bit multiply.
// +) Gets extra bit of precision back, i.e. the sign bit is now used as a data bit.
// -) More than 1 line of C code. Requires possible double invert.
#if 1
//Terrible processor, i.e. PMS133
if( 0 && diff < 0 )
{
diff *= -1;
diff >>= (OCTAVES-1-octave);
if( diff > 250 ) printf( "!!!!!!!**** %d ****!!!!!!!\n", diff );
diff = (uint16_t)diff * (uint16_t)mulmuxval;
diff >>= INTEGRATOR_DECIMATE;
diff *= -1;
}
else
{
diff >>= (OCTAVES-1-octave);
if( diff > 250 ) printf( "!!!!!!!**** %d ****!!!!!!!\n", diff );
diff = (uint16_t)diff * (uint16_t)mulmuxval;
diff >>= INTEGRATOR_DECIMATE;
}
#else
//Decent processor, i.e. ATTiny85.
diff = ((diff>>(OCTAVES-1-octave)) * mulmuxval ) >> 6;
#endif
cossindata[idx] = cossindata[idx]
+ diff
- (cossindata[idx]>>4)
;
#ifdef EIGHTBIT
if( cossindata[idx] > 0 ) cossindata[idx]--;
if( cossindata[idx] < 0 ) cossindata[idx]++;
#endif
}
}
else
{
dprintf( " " );
}
}
dprintf( "\n" );
}
void DoDFT12BitSmall( float * outbins, float * frequencies, int bins, const float * databuffer, int place_in_data_buffer, int size_of_data_buffer, float q, float speedup )
{
static int is_setup;
if( !is_setup ) { is_setup = 1; Setup( frequencies, bins ); }
static int last_place;
int i;
for( i = last_place; i != place_in_data_buffer; i = (i+1)%size_of_data_buffer )
{
int16_t ifr1 = (int16_t)( ((databuffer[i]) ) * 4095 );
Small12BitRun( ifr1>>5 ); //5 = Actually only feed algorithm numbers from -128 to 127.
}
last_place = place_in_data_buffer;
static int idiv;
idiv++;
#if 1
for( i = 0; i < bins; i++ )
{
int iss = cossindata[i*2+0]>>FINAL_DECIMATE;
int isc = cossindata[i*2+1]>>FINAL_DECIMATE;
int mux = iss * iss + isc * isc;
if( mux <= 0 )
{
outbins[i] = 0;
}
else
{
outbins[i] = sqrt((float)mux)/50.0;
#ifdef TWELVEBIT
if( abs( cossindata[i*2+0] ) > 1000 || abs( cossindata[i*2+1] ) > 1000 )
printf( "CS OVF %d/%d/%d/%f\n", i, cossindata[i*2+0], cossindata[i*2+1],outbins[i] );
#elif defined( EIGHTBIT )
if( abs( cossindata[i*2+0] ) > 120 || abs( cossindata[i*2+1] ) > 120 )
printf( "CS OVF %d/%d/%d/%f\n", i, cossindata[i*2+0], cossindata[i*2+1],outbins[i] );
#endif
}
}
#endif
}

View file

@ -0,0 +1,9 @@
#ifndef _DFT8TURBO_H
#define _DFT8TURBO_H
/* Note: Frequencies must be precompiled. */
void DoDFT12BitSmall( float * outbins, float * frequencies, int bins, const float * databuffer, int place_in_data_buffer, int size_of_data_buffer, float q, float speedup );
#endif

View file

@ -5,9 +5,32 @@
#include <stdio.h> #include <stdio.h>
#define MAX_FREQS (12) #define MAX_FREQS (12)
#define OCTAVES (4) #define OCTAVES (4)
#define INITIAL_DECIMATE 1
/*
General procedure - use this code, with uint16_t or uint32_t buffers, and make sure none of the alarms go off.
All of the paths still require no more than an 8-bit multiply.
You should test with extreme cases, like square wave sweeps in, etc.
*/
//#define TWELVEBIT
#define EIGHTBIT
#ifdef TWELVEBIT
//No larger than 12-bit signed values for integration or sincos
#define FRONTEND_AMPLITUDE (0)
#define INITIAL_DECIMATE (2)
#define INTEGRATOR_DECIMATE (8)
#define FINAL_DECIMATE (4)
#elif defined( EIGHTBIT )
//No larger than 8-bit signed values for integration or sincos
#define FRONTEND_AMPLITUDE (2)
#define INITIAL_DECIMATE (5) //Yurgh... only 3 bits of ADC data. That's 8 unique levels :(
#define INTEGRATOR_DECIMATE (8)
#define FINAL_DECIMATE (1)
#endif
//Right now, we need 8*freqs*octaves bytes. //Right now, we need 8*freqs*octaves bytes.
//This is bad. //This is bad.
@ -22,13 +45,13 @@
* We will optimize for RAM size here. * We will optimize for RAM size here.
* INITIAL_DECIMATE; A larger decimation: {NOTE 1} * INITIAL_DECIMATE; A larger decimation: {NOTE 1}
+) Reduces the bit depth needed for the integral map. +) Reduces the bit depth needed for the integral map.
If you use "1" and a fully saturted map (highest note is every sample), it will not overflow a signed 12-bit number. If you use "1" and a fully saturted map (highest note is every sample), it will not overflow a signed 12-bit number.
-) Increases noise. -) Increases noise.
With full-scale: 0->1 minimal 1->2 minimal 2->3 significantly noticable, 3->4 major. With full-scale: 0->1 minimal 1->2 minimal 2->3 significantly noticable, 3->4 major.
If sound is quieter, it matters more. I recommend no less than 1. If sound is quieter, it matters more. Not sure with other changes in system. (2) seems ok.
-) If you make it (1) or (0) You can't do an 8-bit multiply and keep the output in a signed range.
Also, other things, like frequency of hits can manipulate the maximum bit depth needed for integral map. Also, other things, like frequency of hits can manipulate the maximum bit depth needed for integral map.
* If you weight the bins in advance see "mulmux", you can: {NOTE 2} * If you weight the bins in advance see "mulmux", you can: {NOTE 2}
@ -40,8 +63,7 @@
-) Not as pretty of an output. See "integral_at" -) Not as pretty of an output. See "integral_at"
*TODO: Investigate using all unsigned (to make multiply and/or 12-bit storage easier) *TODO: Investigate using all unsigned (to make multiply and/or 12-bit storage easier)
*TODO: Consider a mode which has 16-bit integrals, but still 8-bit cossin data.
So, the idea here is we would keep a running total of the current ADC value, kept away in a int16_t. So, the idea here is we would keep a running total of the current ADC value, kept away in a int16_t.
It is constantly summing, so we can take an integral of it. Or rather an integral range. It is constantly summing, so we can take an integral of it. Or rather an integral range.
@ -55,6 +77,7 @@ int16_t running_integral; //Realistically treat as 12-bits on ramjet8
int16_t integral_at[MAX_FREQS*OCTAVES]; //For ramjet8, make 12-bits int16_t integral_at[MAX_FREQS*OCTAVES]; //For ramjet8, make 12-bits
int32_t cossindata[MAX_FREQS*OCTAVES*2]; //Contains COS and SIN data. (32-bit for now, will be 16-bit, potentially even 8.) int32_t cossindata[MAX_FREQS*OCTAVES*2]; //Contains COS and SIN data. (32-bit for now, will be 16-bit, potentially even 8.)
uint8_t which_octave_for_op[MAX_FREQS]; //counts up, tells you which ocative you are operating on. PUT IN RAM. uint8_t which_octave_for_op[MAX_FREQS]; //counts up, tells you which ocative you are operating on. PUT IN RAM.
uint8_t actiontableplace;
#define NR_OF_OPS (4<<OCTAVES) #define NR_OF_OPS (4<<OCTAVES)
//Format is: //Format is:
@ -63,21 +86,19 @@ uint8_t which_octave_for_op[MAX_FREQS]; //counts up, tells you which ocative you
// bit 4 = add or subtract. // bit 4 = add or subtract.
uint8_t optable[NR_OF_OPS]; //PUT IN FLASH uint8_t optable[NR_OF_OPS]; //PUT IN FLASH
#define ACTIONTABLESIZE 256 #define ACTIONTABLESIZE 256
uint16_t actiontable[ACTIONTABLESIZE]; //PUT IN FLASH // If there are more than 8 freqbins, this must be a uint16_t, otherwise if more than 16, 32. uint16_t actiontable[ACTIONTABLESIZE]; //PUT IN FLASH // If there are more than 8 freqbins, this must be a uint16_t, otherwise if more than 16, 32.
uint8_t actiontableplace;
//Format is //Format is
uint8_t mulmux[MAX_FREQS*OCTAVES]; //PUT IN FLASH uint8_t mulmux[MAX_FREQS]; //PUT IN FLASH
static int Setup( float * frequencies, int bins ) static int Setup( float * frequencies, int bins )
{ {
int i; int i;
printf( "BINS: %d\n", bins ); printf( "BINS: %d\n", bins );
float highestf = frequencies[bins-1]; float highestf = frequencies[MAX_FREQS-1];
for( i = 0; i < bins; i++ ) for( i = 0; i < MAX_FREQS; i++ )
{ {
mulmux[i] = (uint8_t)( highestf / frequencies[i] * 255 + 0.5 ); mulmux[i] = (uint8_t)( highestf / frequencies[i] * 255 + 0.5 );
printf( "MM: %d %f / %f\n", mulmux[i], frequencies[i], highestf ); printf( "MM: %d %f / %f\n", mulmux[i], frequencies[i], highestf );
@ -141,14 +162,19 @@ static int Setup( float * frequencies, int bins )
{ {
longestzeroes = OCTAVES-1-longestzeroes; //Actually do octave 0 least often. longestzeroes = OCTAVES-1-longestzeroes; //Actually do octave 0 least often.
int iop = phaseinop[longestzeroes]++; int iop = phaseinop[longestzeroes]++;
int toop = (longestzeroes<<1) | (iop & 1); int toop = longestzeroes;
int toopmon = (longestzeroes<<1) | (iop & 1);
//if it's the first time an octave happened this set, flag it. This may be used later in the process. //if it's the first time an octave happened this set, flag it. This may be used later in the process.
if( !already_hit_octaveplace[toop] ) if( !already_hit_octaveplace[toopmon] )
{ {
already_hit_octaveplace[toop] = 1; already_hit_octaveplace[toopmon] = 1;
toop |= 1<<5; toop |= 1<<5;
} }
if( iop & 1 )
{
toop |= 1<<6;
}
//Handle add/subtract bit. //Handle add/subtract bit.
if( iop & 2 ) toop |= 1<<4; if( iop & 2 ) toop |= 1<<4;
@ -165,28 +191,13 @@ static int Setup( float * frequencies, int bins )
} }
#if 0
int16_t running_integral;
int16_t integral_at[MAX_FREQS*OCTAVES];
int16_t cossindata[MAX_FREQS*OCTAVES*2]; //Contains COS and SIN data.
uint8_t which_octave_for_op[MAX_FREQS]; //counts up, tells you which ocative you are operating on. PUT IN RAM.
#define NR_OF_OPS (4<<OCTAVES)
//Format is:
// 255 = DO NOT OPERATE
// bits 0..3 unfolded octave, i.e. sin/cos are offset by one.
// bit 4 = add or subtract.
uint8_t optable[NR_OF_OPS]; //PUT IN FLASH
#define ACTIONTABLESIZE 256
uint32_t actiontable[ACTIONTABLESIZE]; //PUT IN FLASH
//Format is
#endif
void Turbo8BitRun( int8_t adcval ) void Turbo8BitRun( int8_t adcval )
{ {
running_integral += adcval>>INITIAL_DECIMATE; int16_t adcv = adcval;
adcv *= FRONTEND_AMPLITUDE;
if( adcv > 127 ) adcv = 127;
if( adcv < -128 ) adcv = -128;
running_integral += adcv>>INITIAL_DECIMATE;
#define dprintf( ... ) #define dprintf( ... )
@ -209,11 +220,12 @@ void Turbo8BitRun( int8_t adcval )
} }
else else
{ {
int octaveplace = op & 0xf; //int octaveplace = op & 0xf;
//Tricky: We share the integral with SIN and COS. //Tricky: We share the integral with SIN and COS.
//We don't need to. It would produce a slightly cleaner signal. See: NOTE 3 //We don't need to. It would produce a slightly cleaner signal. See: NOTE 3
int intindex = (octaveplace>>1) * MAX_FREQS + n; uint8_t octave = op & 0xf;
uint8_t intindex = octave * MAX_FREQS + n;
//int invoct = OCTAVES-1-octaveplace; //int invoct = OCTAVES-1-octaveplace;
int16_t diff; int16_t diff;
@ -221,45 +233,74 @@ void Turbo8BitRun( int8_t adcval )
if( op & 0x10 ) //ADD if( op & 0x10 ) //ADD
{ {
diff = integral_at[intindex] - running_integral; diff = integral_at[intindex] - running_integral;
dprintf( "%c", 'a' + octaveplace ); dprintf( "%c", 'a' + (op & 0xf) );
} }
else //SUBTRACT else //SUBTRACT
{ {
diff = running_integral - integral_at[intindex]; diff = running_integral - integral_at[intindex];
dprintf( "%c", 'A' + octaveplace ); dprintf( "%c", 'A' + (op & 0xf) );
} }
if( diff > 2000 || diff < -2000 ) printf( "!!!!!!!!!!!! %d !!!!!!!!!!!\n", diff );
integral_at[intindex] = running_integral; integral_at[intindex] = running_integral;
int idx = intindex * 2 + (octaveplace&1); #ifdef TWELVEBIT
if( diff > 2000 || diff < -2000 ) printf( "!!!!!!!!!!!! %d !!!!!!!!!!!\n", diff );
#elif defined( EIGHTBIT )
if( diff > 124 || diff < -124 ) printf( "!!!!!!!!!!!! %d !!!!!!!!!!!\n", diff );
#endif
//if( n == 1 ) printf( "%d %d %d %d\n", n, idx, diff, op & 0x10 ); uint8_t idx = ( intindex << 1 );
//dprintf( "%d\n", idx ); if( op&(1<<6) )
#if 0
//Apply IIR operation 1; This is rough because the Q changes and goes higher as a function of frequency. This is probably a bad move.
cossindata[idx] += diff>>4;
if( op & 0x20 )
{ {
cossindata[idx] = cossindata[idx] idx |= 1;
- (cossindata[idx]>>2);
} }
#else
//Apply IIR.
//printf( "%d: %d + %d * %d >> 8 - %d\n", idx, cossindata[idx], diff, mulmux[idx/2], cossindata[idx]>>4 ); //printf( "%d: %d + %d * %d >> 8 - %d\n", idx, cossindata[idx], diff, mulmux[idx/2], cossindata[idx]>>4 );
uint8_t mulmuxval = mulmux[n];
//Do you live on a super lame processor? {NOTE 4}
//If you do, you might not have good signed multiply operations. So, an alternative mechanism is found here.
// +) Able to more cleanly crush to an 8-bit multiply.
// +) Gets extra bit of precision back, i.e. the sign bit is now used as a data bit.
// -) More than 1 line of C code. Requires possible double invert.
#if 1
//Terrible processor, i.e. PMS133
if( 0 && diff < 0 )
{
diff *= -1;
diff >>= (OCTAVES-1-octave);
if( diff > 250 ) printf( "!!!!!!!**** %d ****!!!!!!!\n", diff );
diff = (uint16_t)diff * (uint16_t)mulmuxval;
diff >>= INTEGRATOR_DECIMATE;
diff *= -1;
}
else
{
diff >>= (OCTAVES-1-octave);
if( diff > 250 ) printf( "!!!!!!!**** %d ****!!!!!!!\n", diff );
diff = (uint16_t)diff * (uint16_t)mulmuxval;
diff >>= INTEGRATOR_DECIMATE;
}
#else
//Decent processor, i.e. ATTiny85.
diff = ((diff>>(OCTAVES-1-octave)) * mulmuxval ) >> 6;
#endif
cossindata[idx] = cossindata[idx] cossindata[idx] = cossindata[idx]
+ (((int32_t)diff * (int32_t)mulmux[idx/2])>>6) + diff
- (cossindata[idx]>>4) - (cossindata[idx]>>4)
; ;
// if( cossindata[idx] > 2047 ) cossindata[idx] = 2047;
// if( cossindata[idx] < -2048 ) cossindata[idx] = -2048; #ifdef EIGHTBIT
if( cossindata[idx] > 0 ) cossindata[idx]--;
if( cossindata[idx] < 0 ) cossindata[idx]++;
#endif #endif
// if( cossindata[idx] > 1 ) cossindata[idx]--;
// if( cossindata[idx] < -1 ) cossindata[idx]++;
// if( cossindata[idx] > 16 ) cossindata[idx]-=8;
// if( cossindata[idx] < -16 ) cossindata[idx]+=8;
} }
} }
else else
@ -269,27 +310,6 @@ void Turbo8BitRun( int8_t adcval )
} }
dprintf( "\n" ); dprintf( "\n" );
#if 0
uint32_t actions = *(placeintable++);
if( placeintable == &actiontable[ACTIONTABLESIZE] ) placeintable = actiontable;
int b;
for( b = 0; b < MAX_FREQS; b++ )
{
if( ! ((1<<b) & actions) ) continue;
//If we get here, we need to do an action.
int op = which_octave_for_op[b]++;
int sinorcos = op & 1;
op >>= 1;
int octavebit = op & ((1<<OCTAVES)-1);
if( !octavebit ) { continue; } //XXX TRICKY: In our octavebit table, we have 1 0 and 1 1 entry. 2, 3, 4, etc. are ok. So, if we hit a 0, we abort.
int whichoctave = highbit_table[octavebit];
//Ok, actually we need to also know whether you're on SIN or COS.
//if( b == 0 ) printf( "%d\n", whichoctave );
//XXX TODO Optimization: Use a table, since octavebit can only be 0...31.
}
#endif
} }
@ -303,7 +323,7 @@ void DoDFT8BitTurbo( float * outbins, float * frequencies, int bins, const float
for( i = last_place; i != place_in_data_buffer; i = (i+1)%size_of_data_buffer ) for( i = last_place; i != place_in_data_buffer; i = (i+1)%size_of_data_buffer )
{ {
int16_t ifr1 = (int16_t)( ((databuffer[i]) ) * 4095 ); int16_t ifr1 = (int16_t)( ((databuffer[i]) ) * 4095 );
Turbo8BitRun( ifr1>>5 ); //6 = Actually only feed algorithm numbers from -64 to 63. Turbo8BitRun( ifr1>>5 ); //5 = Actually only feed algorithm numbers from -128 to 127.
} }
last_place = place_in_data_buffer; last_place = place_in_data_buffer;
@ -312,25 +332,9 @@ void DoDFT8BitTurbo( float * outbins, float * frequencies, int bins, const float
#if 1 #if 1
for( i = 0; i < bins; i++ ) for( i = 0; i < bins; i++ )
{ {
outbins[i] = 0; int iss = cossindata[i*2+0]>>FINAL_DECIMATE;
} int isc = cossindata[i*2+1]>>FINAL_DECIMATE;
for( i = 0; i < bins; i++ )
{
int iss = cossindata[i*2+0]>>8;
int isc = cossindata[i*2+1]>>8;
int issdiv = 0;
int iscdiv = 0;
int FWDOFFSET = 19;//MAX_FREQS*3/2;
if( i < bins-FWDOFFSET )
{
issdiv = cossindata[(i+FWDOFFSET)*2+0]/256;
iscdiv = cossindata[(i+FWDOFFSET)*2+1]/256;
}
int mux = iss * iss + isc * isc; int mux = iss * iss + isc * isc;
int muxdiv = issdiv * issdiv + iscdiv * iscdiv;
//if( (idiv % 100) > 50 ) { printf( "*" ); mux -= muxdiv; }
//mux -= muxdiv;
if( mux <= 0 ) if( mux <= 0 )
{ {
@ -338,16 +342,17 @@ void DoDFT8BitTurbo( float * outbins, float * frequencies, int bins, const float
} }
else else
{ {
//if( i == 0 )
//printf( "MUX: %d %d = %d\n", isc, iss, mux );
outbins[i] = sqrt((float)mux)/50.0; outbins[i] = sqrt((float)mux)/50.0;
if( abs( cossindata[i*2+0] ) > 2000 || abs( cossindata[i*2+1] ) > 2000 ) #ifdef TWELVEBIT
printf( "%d/%d/%d/%f ", i, cossindata[i*2+0], cossindata[i*2+1],outbins[i] ); if( abs( cossindata[i*2+0] ) > 1000 || abs( cossindata[i*2+1] ) > 1000 )
//outbins[i] = (cossindata[i*2+0]/10000.0); printf( "CS OVF %d/%d/%d/%f\n", i, cossindata[i*2+0], cossindata[i*2+1],outbins[i] );
#elif defined( EIGHTBIT )
if( abs( cossindata[i*2+0] ) > 120 || abs( cossindata[i*2+1] ) > 120 )
printf( "CS OVF %d/%d/%d/%f\n", i, cossindata[i*2+0], cossindata[i*2+1],outbins[i] );
#endif
} }
} }
printf( "\n" );
#endif #endif
} }