313 lines
9.1 KiB
C
313 lines
9.1 KiB
C
//NOTE DO NOT EDIT THIS FILE WITHOUT ALSO EDITING DFT12SMALL!!!
|
|
|
|
#include <stdint.h>
|
|
#include <stdlib.h>
|
|
#include "DFT8Turbo.h"
|
|
#include <math.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#define MAX_FREQS (12)
|
|
#define OCTAVES (4)
|
|
|
|
/*
|
|
General procedure - use this code, with uint16_t or uint32_t buffers, and make sure none of the alarms go off.
|
|
All of the paths still require no more than an 8-bit multiply.
|
|
You should test with extreme cases, like square wave sweeps in, etc.
|
|
*/
|
|
|
|
//No larger than 8-bit signed values for integration or sincos
|
|
#define FRONTEND_AMPLITUDE (2)
|
|
#define INITIAL_DECIMATE (5) //Yurgh... only 3 bits of ADC data. That's 8 unique levels :(
|
|
#define INTEGRATOR_DECIMATE (8)
|
|
#define FINAL_DECIMATE (1)
|
|
|
|
|
|
#define OPTABLETYPE uint16_t //Make uint8_t if on attiny.
|
|
|
|
//4x the hits (sin/cos and we need to do it once for each edge)
|
|
//8x for selecting a higher octave.
|
|
#define FREQREBASE 8.0
|
|
#define TARGFREQ 10000.0
|
|
|
|
//These live in RAM.
|
|
int8_t running_integral; //Realistically treat as 12-bits on ramjet8
|
|
int8_t integral_at[MAX_FREQS*OCTAVES]; //For ramjet8, make 12-bits
|
|
int8_t cossindata[MAX_FREQS*OCTAVES*2]; //Contains COS and SIN data. (32-bit for now, will be 16-bit, potentially even 8.)
|
|
uint8_t which_octave_for_op[MAX_FREQS]; //counts up, tells you which ocative you are operating on. PUT IN RAM.
|
|
uint8_t actiontableplace;
|
|
|
|
#define NR_OF_OPS (4<<OCTAVES) /*64*/
|
|
//Format is:
|
|
// 255 = DO NOT OPERATE
|
|
// bits 0..3 unfolded octave, i.e. sin/cos are offset by one.
|
|
// bit 4 = add or subtract.
|
|
OPTABLETYPE optable[NR_OF_OPS]; //PUT IN FLASH
|
|
|
|
#define ACTIONTABLESIZE 256
|
|
uint16_t actiontable[ACTIONTABLESIZE]; //PUT IN FLASH // If there are more than 8 freqbins, this must be a uint16_t, otherwise if more than 16, 32.
|
|
//Format is
|
|
|
|
OPTABLETYPE mulmux[MAX_FREQS]; //PUT IN FLASH
|
|
|
|
static int Setup( float * frequencies, int bins )
|
|
{
|
|
int i;
|
|
printf( "BINS: %d\n", bins );
|
|
|
|
float highestf = frequencies[MAX_FREQS-1];
|
|
for( i = 0; i < MAX_FREQS; i++ )
|
|
{
|
|
mulmux[i] = (uint8_t)( highestf / frequencies[i] * 255 + 0.5 );
|
|
printf( "MM: %d %f / %f\n", mulmux[i], frequencies[i], highestf );
|
|
}
|
|
|
|
for( i = bins-MAX_FREQS; i < bins; i++ )
|
|
{
|
|
int topbin = i - (bins-MAX_FREQS);
|
|
float f = frequencies[i]/FREQREBASE;
|
|
float hits_per_table = (float)ACTIONTABLESIZE/f;
|
|
int dhrpertable = (int)(hits_per_table+.5);//TRICKY: You might think you need to have even number of hits (sin/cos), but you don't! It can flip sin/cos each time through the table!
|
|
float err = (TARGFREQ/((float)ACTIONTABLESIZE/dhrpertable) - (float)TARGFREQ/f)/((float)TARGFREQ/f);
|
|
//Perform an op every X samples. How well does this map into units of 1024?
|
|
printf( "%d %f -> hits per %d: %f %d (%.2f%% error)\n", topbin, f, ACTIONTABLESIZE, (float)ACTIONTABLESIZE/f, dhrpertable, err * 100.0 );
|
|
if( dhrpertable >= ACTIONTABLESIZE )
|
|
{
|
|
fprintf( stderr, "Error: Too many hits.\n" );
|
|
exit(0);
|
|
}
|
|
|
|
float advance_per_step = dhrpertable/(float)ACTIONTABLESIZE;
|
|
float fvadv = 0.5;
|
|
int j;
|
|
int countset = 0;
|
|
|
|
//Tricky: We need to start fadv off at such a place that there won't be a hicchup when going back around to 0.
|
|
// I believe this is done by setting fvadv to 0.5 initially. Unsure.
|
|
|
|
for( j = 0; j < ACTIONTABLESIZE; j++ )
|
|
{
|
|
if( fvadv >= 0.5 )
|
|
{
|
|
actiontable[j] |= 1<<topbin;
|
|
fvadv -= 1.0;
|
|
countset++;
|
|
}
|
|
fvadv += advance_per_step;
|
|
}
|
|
printf( " countset: %d\n", countset );
|
|
}
|
|
//exit(1);
|
|
|
|
|
|
int phaseinop[OCTAVES] = { 0 };
|
|
int already_hit_octaveplace[OCTAVES*2] = { 0 };
|
|
for( i = 0; i < NR_OF_OPS; i++ )
|
|
{
|
|
int longestzeroes = 0;
|
|
int val = i & ((1<<OCTAVES)-1);
|
|
for( longestzeroes = 0; longestzeroes < 255 && ( ((val >> longestzeroes) & 1) == 0 ); longestzeroes++ );
|
|
//longestzeroes goes: 255, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, ...
|
|
//This isn't great, because we need to also know whether we are attacking the SIN side or the COS side, and if it's + or -.
|
|
//We can actually decide that out.
|
|
|
|
if( longestzeroes == 255 )
|
|
{
|
|
//This is a nop. Emit a nop.
|
|
optable[i] = 255;
|
|
}
|
|
else
|
|
{
|
|
longestzeroes = OCTAVES-1-longestzeroes; //Actually do octave 0 least often.
|
|
int iop = phaseinop[longestzeroes]++;
|
|
int toop = longestzeroes;
|
|
int toopmon = (longestzeroes<<1) | (iop & 1);
|
|
|
|
//if it's the first time an octave happened this set, flag it. This may be used later in the process.
|
|
if( !already_hit_octaveplace[toopmon] )
|
|
{
|
|
already_hit_octaveplace[toopmon] = 1;
|
|
toop |= 1<<5;
|
|
}
|
|
if( iop & 1 )
|
|
{
|
|
toop |= 1<<6;
|
|
}
|
|
|
|
//Handle add/subtract bit.
|
|
if( iop & 2 ) toop |= 1<<4;
|
|
|
|
optable[i] = toop;
|
|
|
|
//printf( " %d %d %d\n", iop, val, longestzeroes );
|
|
}
|
|
//printf( "HBT: %d = %d\n", i, optable[i] );
|
|
}
|
|
//exit(1);
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
void Turbo8BitRun( int8_t adcval )
|
|
{
|
|
int16_t adcv = adcval;
|
|
adcv *= FRONTEND_AMPLITUDE;
|
|
if( adcv > 127 ) adcv = 127;
|
|
if( adcv < -128 ) adcv = -128;
|
|
running_integral += adcv>>INITIAL_DECIMATE;
|
|
|
|
uint16_t action = actiontable[actiontableplace++];
|
|
uint8_t n;
|
|
|
|
//Counts are approximate counts for PMS133
|
|
|
|
for( n = 0; //1CYC
|
|
n < MAX_FREQS; //2CYC
|
|
n++, //1CYC
|
|
action>>=1 //2CYC
|
|
)
|
|
{
|
|
//Everything inside this loop is executed ~3/4 * MAX_FREQS per audio sample. so.. ~9x.
|
|
//If op @ 4MHz, we get 44 cycles in here.
|
|
|
|
//If no operation is scheduled, continue.
|
|
if( !( action & 1 ) ) continue; //1CYC
|
|
|
|
uint8_t ao = which_octave_for_op[n]; //4CYC
|
|
ao++; //1CYC
|
|
if( ao >= NR_OF_OPS ) ao = 0; //2CYC
|
|
which_octave_for_op[n] = ao; //2CYC (idxm)
|
|
|
|
uint8_t op = optable[ao]; //"theoretically" 3CYC (if you align things right)
|
|
//1CYC (Put A into specific RAM location)
|
|
|
|
//If we are on the one thing we aren't supposed to operate within, cancel.
|
|
if( op == 255 ) continue; //2CYC (if op is in A)
|
|
|
|
//Tricky: We share the integral with SIN and COS.
|
|
//We don't need to. It would produce a slightly cleaner signal. See: NOTE 3
|
|
uint8_t octave = op & 0xf; //1CYC (if op is in A)
|
|
|
|
|
|
uint8_t intindex = octave * MAX_FREQS //Load mulop with 12 [2CYC]; mul [1CYC]
|
|
+ n; //Add [1CYC]
|
|
//[1CYC] more cycle to write A into RAM[(intindex)
|
|
//int invoct = OCTAVES-1-octaveplace;
|
|
int8_t diff;
|
|
|
|
if( op & 0x10 ) //ADD //2CYC
|
|
{
|
|
diff = integral_at[intindex] //Assume "IntIndex" is in A, add integral_at to A [1], move A to an index [1]. [2] to read into acc. [4CYC]
|
|
- running_integral; //1CYC to subtract.
|
|
//1CYC to write diff into a memory location.
|
|
}
|
|
else //SUBTRACT
|
|
{
|
|
diff = running_integral - integral_at[intindex];
|
|
}
|
|
|
|
//30 cycles so far.
|
|
|
|
integral_at[intindex] = running_integral; //[3CYC]
|
|
|
|
//if( diff > 124 || diff < -124 ) printf( "!!!!!!!!!!!! %d !!!!!!!!!!!\n", diff );
|
|
|
|
//uint8_t idx = ( intindex << 1 ); //Overwrite intindex.
|
|
intindex <<= 1; //1CYC
|
|
|
|
if( op&(1<<6) ) //2CYC
|
|
{
|
|
intindex |= 1; //1CYC
|
|
}
|
|
|
|
uint8_t mulmuxval = mulmux[n]; //[4CYC]
|
|
|
|
|
|
//Do you live on a super lame processor? {NOTE 4}
|
|
//If you do, you might not have good signed multiply operations. So, an alternative mechanism is found here.
|
|
// +) Able to more cleanly crush to an 8-bit multiply.
|
|
// +) Gets extra bit of precision back, i.e. the sign bit is now used as a data bit.
|
|
// -) More than 1 line of C code. Requires possible double invert.
|
|
#if 1
|
|
//rough processor, i.e. PMS133
|
|
if( diff < 0 ) //[2CYC]
|
|
{
|
|
diff *= -1; //[1CYC]
|
|
diff >>= (OCTAVES-1-octave); // ???TRICKY??? Should this be a multiply?
|
|
|
|
//if( diff > 250 ) printf( "!!!!!!!**** %d ****!!!!!!!\n", diff );
|
|
|
|
diff = ((uint16_t)diff * (uint16_t)mulmuxval)>>INTEGRATOR_DECIMATE; //[3CYC]
|
|
diff *= -1; //[1CYC]
|
|
}
|
|
else
|
|
{
|
|
diff >>= (OCTAVES-1-octave);
|
|
//if( diff > 250 ) printf( "!!!!!!!**** %d ****!!!!!!!\n", diff );
|
|
diff = ((uint16_t)diff * (uint16_t)mulmuxval)>>INTEGRATOR_DECIMATE;
|
|
}
|
|
|
|
//@48 cycles :( :( :(
|
|
|
|
#else
|
|
//Decent processor, i.e. ATTiny85.
|
|
diff = ((diff>>(OCTAVES-1-octave)) * mulmuxval ) >> 6;
|
|
#endif
|
|
//printf( "%d\n", diff );
|
|
|
|
int8_t tmp =
|
|
cossindata[intindex] //[3CYC]
|
|
+ diff //[1CYC]
|
|
- (cossindata[intindex]>>4) //[2CYC]
|
|
;
|
|
|
|
if( tmp > 0 ) tmp--; //2CYC
|
|
if( tmp < 0 ) tmp++; //2CYC
|
|
cossindata[intindex] = tmp; //2CYC
|
|
//60ish cycles :( :( :(
|
|
}
|
|
}
|
|
|
|
|
|
void DoDFT8BitTurbo( float * outbins, float * frequencies, int bins, const float * databuffer, int place_in_data_buffer, int size_of_data_buffer, float q, float speedup )
|
|
{
|
|
static int is_setup;
|
|
if( !is_setup ) { is_setup = 1; Setup( frequencies, bins ); }
|
|
static int last_place;
|
|
int i;
|
|
|
|
for( i = last_place; i != place_in_data_buffer; i = (i+1)%size_of_data_buffer )
|
|
{
|
|
int16_t ifr1 = (int16_t)( ((databuffer[i]) ) * 4095 );
|
|
Turbo8BitRun( ifr1>>5 ); //5 = Actually only feed algorithm numbers from -128 to 127.
|
|
}
|
|
last_place = place_in_data_buffer;
|
|
|
|
static int idiv;
|
|
idiv++;
|
|
#if 1
|
|
for( i = 0; i < bins; i++ )
|
|
{
|
|
int iss = cossindata[i*2+0]>>FINAL_DECIMATE;
|
|
int isc = cossindata[i*2+1]>>FINAL_DECIMATE;
|
|
int mux = iss * iss + isc * isc;
|
|
|
|
if( mux <= 0 )
|
|
{
|
|
outbins[i] = 0;
|
|
}
|
|
else
|
|
{
|
|
outbins[i] = sqrt((float)mux)/50.0;
|
|
|
|
if( abs( cossindata[i*2+0] ) > 120 || abs( cossindata[i*2+1] ) > 120 )
|
|
printf( "CS OVF %d/%d/%d/%f\n", i, cossindata[i*2+0], cossindata[i*2+1],outbins[i] );
|
|
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
|
|
|