try another method for turbo operations.
This commit is contained in:
parent
0d23075125
commit
0a056db03d
|
@ -17,245 +17,120 @@
|
|||
uint8_t current_time; //1 byte
|
||||
uint8_t placecode[MAX_FREQS];
|
||||
*/
|
||||
//OK... We don't have enough ram to sum everything... can we do something wacky with multiple ocatives to sum everything better?
|
||||
//i.e.
|
||||
//
|
||||
// 4332322132212210
|
||||
//
|
||||
// ++++++++++++++++-----------------
|
||||
// ++++++++--------
|
||||
// ++++----++++----
|
||||
// ++--++--++--++--
|
||||
// +-+-+-+-+-+-+-+-
|
||||
//
|
||||
// Don't forget we need to do this for sin and cos.
|
||||
// Can we instead of making this plusses, make it a multiplier?
|
||||
// How can we handle sin+cos?
|
||||
//
|
||||
// Is it possible to do this for every frame? I.e. for each of the 24 notes, multiply with their current place in table?
|
||||
// That's interesting. It's not like a sin table.
|
||||
// There is no "multiply" in the attiny instruction set for attiny85.
|
||||
// There is, however for attiny402
|
||||
/*
|
||||
So, the idea here is we would keep a running total of the current ADC value, kept away in a int16_t.
|
||||
It is constantly summing, so we can take an integral of it. Or rather an integral range.
|
||||
|
||||
//Question: Can we do five octaves, or does this need to be balanced?
|
||||
//Question2: Should we weight higher octaves?
|
||||
Over time, we perform operations like adding or subtracting from a current place.
|
||||
|
||||
|
||||
//ATTiny402: 256x8 RAM, 4096x8 FLASH LPM: 3 cycles + FMUL: 2 cycles << Do stacked sin waves?
|
||||
//ATtiny85: 512x8 RAM, 8192x8 FLASH LPM: 3 cycles + NO MULTIPLY << Do square waves?
|
||||
|
||||
|
||||
/* Approaches:
|
||||
|
||||
on ATtiny402: Stacked sin approach.
|
||||
Say 16 MHz, though 12 MHz is interesting...
|
||||
16k SPS: 1k cycles per; say 24 bins per; 41 cycles per bin = hard. But is it too hard?
|
||||
20 cycles per s/c.
|
||||
read place in stacked table (8? bits) 3 cycles
|
||||
|
||||
//Inner loop = 17 cycles.
|
||||
read stacked table (8 bits), 3 cycles
|
||||
fractional multiply table with current value. 2 cycles
|
||||
read current running for note 2 cycles (LDS = 3 cycles)
|
||||
subtract a shifted version, to make it into an IIR. (4 cycles)
|
||||
add in current values. (2 cycles)
|
||||
store data back to ram (2 cycles)
|
||||
advance place in stacked table (8?bits) 1 cycle
|
||||
|
||||
store place in stacked table (8? bits) 3 cycles?
|
||||
|
||||
//What if we chunk ADC updates into groups of 4 or 8?
|
||||
//This is looking barely possible.
|
||||
|
||||
on attiny85: scheduled adds/subtracts (like a stacked-square-wave-table)
|
||||
//XXX TODO!
|
||||
|
||||
NOTE:
|
||||
Optimizations:
|
||||
Only use 16 bins, lets action table be 16-bits wide.
|
||||
*/
|
||||
|
||||
/* Ok... Let's think about the ATTiny402. 256x8 RAM + 4096x8 FLASH.
|
||||
int16_t running_integral;
|
||||
int16_t cossindata[MAX_FREQS*OCTAVES*2]; //Contains COS and SIN data.
|
||||
|
||||
* We can create a table which has all octaves overlaid.
|
||||
* We would need to keep track of:
|
||||
* 12 x 2 x 2 = 48 bytes = Current sin/cos values.
|
||||
* 12 x 2 = 24 bytes = Current place in table. = 72 bytes
|
||||
* We would need to store:
|
||||
* The layered lookup table. If possible, keep @ 256 bytes to simplify math ops.
|
||||
* The speed by which each note needs to advance.
|
||||
* We would need to:
|
||||
* Read current running place. X 8 cycles
|
||||
* Use that place to look up into sin table. 3 cycles
|
||||
* Read running val 4 cycles best case
|
||||
* Multiply out the sin + IIR 5 cycles
|
||||
* Store running val 4 cycles best case
|
||||
* Cos-advance that place to look up into sin table. 4 cycles
|
||||
* Read running val 4 cycles best case
|
||||
* Multiply out the sin + IIR 5 cycles
|
||||
* Store running val 4 cycles best case.
|
||||
* Read how much to advance X by. 4 cycles
|
||||
* (Cos^2+Sin^2) 8?
|
||||
* Store it. 4 cycles best case.
|
||||
* = 48 x 12 = 576 cycles. Assume 10 MHz @ 16k SPS. We're OK (625 samples)
|
||||
*/
|
||||
|
||||
// Observation: The two tables are actually mirror images of each other, well diagonally mirrored. That's odd. But, would take CPU to exploit.
|
||||
|
||||
#define SSTABLESIZE 256
|
||||
int8_t spikysin_interleved_cos[SSTABLESIZE][2];
|
||||
uint32_t advancespeed[MAX_FREQS];
|
||||
uint8_t which_octave_for_op[MAX_FREQS]; //counts up, tells you which ocative you are operating on.
|
||||
uint8_t highbit_table[2<<OCTAVES]; //PUT IN FLASH
|
||||
|
||||
static int CompTableWithPhase( int nelements, float phase, int scaling )
|
||||
{
|
||||
int highest = 0;
|
||||
int i;
|
||||
for( i = 0; i < nelements; i++ )
|
||||
{
|
||||
float taued = i * 3.141592 * 2.0 / nelements;
|
||||
int o;
|
||||
float combsin = 0;
|
||||
for( o = 0; o < OCTAVES; o++ )
|
||||
{
|
||||
combsin += sin( taued * (1<<o) + phase);
|
||||
}
|
||||
combsin /= OCTAVES;
|
||||
int csadapt = combsin * scaling - 0.5; //No value is higher with five octaves. XXX TODO Lookout. If you change # of octaves, need to change this, too.
|
||||
|
||||
if( csadapt > highest ) highest = csadapt;
|
||||
if( -csadapt > highest ) highest = -csadapt;
|
||||
#define ACTIONTABLESIZE 512
|
||||
|
||||
if( csadapt > 127 ) csadapt = 127;
|
||||
if( csadapt < -128 ) csadapt = -128; //tricky: Keep balanced.
|
||||
spikysin_interleved_cos[i][0] = csadapt;
|
||||
|
||||
float combcos = 0;
|
||||
for( o = 0; o < OCTAVES; o++ )
|
||||
{
|
||||
combcos += cos( taued * (1<<o) + phase );
|
||||
}
|
||||
combcos /= OCTAVES;
|
||||
csadapt = combcos * scaling - 0.5; //No value is higher with five octaves. XXX TODO Lookout. If you change # of octaves, need to change this, too.
|
||||
|
||||
if( csadapt > highest ) highest = csadapt;
|
||||
if( -csadapt > highest ) highest = -csadapt;
|
||||
|
||||
if( csadapt > 127 ) csadapt = 127;
|
||||
if( csadapt < -128 ) csadapt = -128; //tricky: Keep balanced.
|
||||
spikysin_interleved_cos[i][1] = csadapt;
|
||||
}
|
||||
return highest;
|
||||
}
|
||||
uint16_t * placeintable;
|
||||
|
||||
//Put this in flash.
|
||||
uint32_t actiontable[ACTIONTABLESIZE];
|
||||
|
||||
static int Setup( float * frequencies, int bins )
|
||||
{
|
||||
int i;
|
||||
|
||||
//Since start position/phase is arbitrary, we should try several to see which gives us the best dynamic range.
|
||||
float tryphase = 0;
|
||||
|
||||
float bestphase = 0;
|
||||
int highest_val_at_best_phase = 1000000;
|
||||
|
||||
for( tryphase = 0; tryphase < 3.14159; tryphase += 0.001 )
|
||||
printf( "BINS: %d\n", bins );
|
||||
for( i = bins-MAX_FREQS; i < bins; i++ )
|
||||
{
|
||||
int highest = CompTableWithPhase( SSTABLESIZE, tryphase, 65536 );
|
||||
if( highest < highest_val_at_best_phase )
|
||||
int topbin = i - (bins-MAX_FREQS);
|
||||
float f = frequencies[i]/2.0; //2x the hits (sin/cos)
|
||||
float hits_per_table = (float)ACTIONTABLESIZE/f;
|
||||
int dhrpertable = (int)(hits_per_table+.5);//TRICKY: You might think you need to have even number of hits (sin/cos), but you don't! It can flip sin/cos each time through the table!
|
||||
float err = (8000./((float)ACTIONTABLESIZE/dhrpertable) - 8000./f)/(8000./f);
|
||||
//Perform an op every X samples. How well does this map into units of 1024?
|
||||
printf( "%d %f -> hits per 1024: %f %d (%f error)\n", topbin, f, (float)ACTIONTABLESIZE/f, dhrpertable, err * 100.0 );
|
||||
|
||||
float advance_per_step = dhrpertable/(float)ACTIONTABLESIZE;
|
||||
float fvadv = 0.0;
|
||||
int j;
|
||||
int actions = 0;
|
||||
int countset = 0;
|
||||
|
||||
//XXX TODO Tricky: We need to start fadv off at such a place that there won't be a hicchup when going back around to 0.
|
||||
|
||||
for( j = 0; j < ACTIONTABLESIZE; j++ )
|
||||
{
|
||||
highest_val_at_best_phase = highest;
|
||||
bestphase = tryphase;
|
||||
if( fvadv >= 0.5 )
|
||||
{
|
||||
actiontable[j] |= 1<<topbin;
|
||||
fvadv -= 1.0;
|
||||
countset++;
|
||||
}
|
||||
fvadv += advance_per_step;
|
||||
}
|
||||
printf( " countset: %d\n", countset );
|
||||
}
|
||||
printf( "Best comp: %f : %d\n", bestphase, highest_val_at_best_phase );
|
||||
|
||||
//Set this because we would overflow the sinm and cosm regs if we don't. This is sort of like a master volume.
|
||||
//use this as that input volume knob thing.
|
||||
float further_reduce = 1.0;
|
||||
|
||||
CompTableWithPhase( SSTABLESIZE, bestphase, (65536*128*further_reduce)/highest_val_at_best_phase );
|
||||
|
||||
// for( i = 0; i < SSTABLESIZE; i++ )
|
||||
// {
|
||||
// printf( "%d %d\n", spikysin_interleved_cos[i*2+0], spikysin_interleved_cos[i*2+1] );
|
||||
// }
|
||||
|
||||
for( i = 0; i < MAX_FREQS; i++ )
|
||||
for( i = 0; i < (1<<OCTAVES); i++ )
|
||||
{
|
||||
//frequencies[i] = SPS / Freq
|
||||
// Need to decide how quickly we sweep through the table.
|
||||
advancespeed[i] = 65536 * 256.0 /* fixed point */ * 256.0 /* size of table */ / frequencies[i];
|
||||
//printf( "%f\n", frequencies[i] );
|
||||
int longestzeroes = 0;
|
||||
for( longestzeroes = 0; longestzeroes < 255 && ( ((i >> longestzeroes) & 1) == 0 ); longestzeroes++ );
|
||||
//longestzeroes goes: 255, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, ...
|
||||
//This isn't great, because we need to also know whether we are attacking the SIN side or the COS side.
|
||||
highbit_table[i] = longestzeroes;
|
||||
}
|
||||
return 0;
|
||||
//Repeat the highbit table in the second half.
|
||||
//XXX PICK UP HERE
|
||||
//Encode into highbit_table which cell is being operated on
|
||||
//Also, do the * MAX_FREQS here. That will
|
||||
|
||||
|
||||
|
||||
placeintable = actiontable;
|
||||
// for( i = 0; i < ACTIONTABLESIZE; i++ ) printf( "%08x\n", actiontable[i] );
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
uint8_t spikysin_interleved_cos[256*2];
|
||||
uint16_t advancespeed[MAX_FREQS];
|
||||
*/
|
||||
|
||||
float toutbins[MAX_FREQS];
|
||||
int16_t running_integral;
|
||||
int16_t cossindata[MAX_FREQS*OCTAVES*2];
|
||||
uint8_t which_octave_for_op[MAX_FREQS]; //counts up, tells you which ocative you are operating on.
|
||||
uint16_t * placeintable;
|
||||
|
||||
struct notedat
|
||||
{
|
||||
uint32_t time;
|
||||
int32_t sinm;
|
||||
int32_t cosm;
|
||||
};
|
||||
//Put this in flash.
|
||||
uint32_t actiontable[ACTIONTABLESIZE];
|
||||
|
||||
static struct notedat nd[MAX_FREQS];
|
||||
|
||||
void Turbo8BitRun( int8_t adcval )
|
||||
{
|
||||
int i;
|
||||
for( i = 0; i < MAX_FREQS; i++ )
|
||||
uint32_t actions = *(placeintable++);
|
||||
if( placeintable == &actiontable[ACTIONTABLESIZE] ) placeintable = actiontable;
|
||||
int b;
|
||||
for( b = 0; b < MAX_FREQS; b++ )
|
||||
{
|
||||
uint32_t ct = nd[i].time;
|
||||
int32_t muxres;
|
||||
int32_t running;
|
||||
int32_t rdesc, rdess;
|
||||
uint8_t * spikysintable = &spikysin_interleved_cos[(ct>>24)][0];
|
||||
if( ! ((1<<b) & actions) ) continue;
|
||||
//If we get here, we need to do an action.
|
||||
int op = which_octave_for_op[b]++;
|
||||
int sinorcos = op & 1;
|
||||
op >>= 1;
|
||||
int octavebit = op & ((1<<OCTAVES)-1);
|
||||
if( !octavebit ) { continue; } //XXX TRICKY: In our octavebit table, we have 1 0 and 1 1 entry. 2, 3, 4, etc. are ok. So, if we hit a 0, we abort.
|
||||
int whichoctave = highbit_table[octavebit];
|
||||
|
||||
int8_t ss = *(spikysintable++);
|
||||
//Ok, actually we need to also know whether you're on SIN or COS.
|
||||
|
||||
#define DECIR 8
|
||||
|
||||
muxres = ((int16_t)adcval * ss + (1<<(DECIR-1)) ) >> (DECIR);
|
||||
running = nd[i].cosm;
|
||||
running += muxres;
|
||||
rdesc = running >> 8;
|
||||
running -= rdesc >> 3;
|
||||
|
||||
nd[i].cosm = running;
|
||||
if( i == 0) printf( "MRX %5d %9d %9d %9d %9d\n", muxres, adcval, ss, running, nd[i].sinm );
|
||||
int8_t sc = *(spikysintable++);
|
||||
muxres = ((int16_t)adcval * sc + (1<<(DECIR-1)) ) >> (DECIR);
|
||||
running = nd[i].sinm;
|
||||
running += muxres;
|
||||
|
||||
rdess = running>>8;
|
||||
running -= rdess >> 3;
|
||||
|
||||
nd[i].sinm = running;
|
||||
|
||||
nd[i].time = ct + advancespeed[i];
|
||||
|
||||
toutbins[i] = rdess * rdess + rdesc * rdesc;
|
||||
//printf( "%d %d = %f %p\n", rdess, rdesc, toutbins[i], &toutbins[i] );
|
||||
//if( b == 0 ) printf( "%d\n", whichoctave );
|
||||
//XXX TODO Optimization: Use a table, since octavebit can only be 0...31.
|
||||
}
|
||||
|
||||
static uint8_t stater;
|
||||
/* stater++;
|
||||
if( stater == 16 )
|
||||
{
|
||||
stater = 0;
|
||||
for( i = 0; i < MAX_FREQS; i++ )
|
||||
{
|
||||
nd[i].sinm -= nd[i].sinm >> 12;
|
||||
nd[i].cosm -= nd[i].cosm >> 12;
|
||||
nd[i].sinm += 8;
|
||||
nd[i].cosm += 8;
|
||||
}
|
||||
}*/
|
||||
}
|
||||
|
||||
|
||||
|
@ -269,12 +144,11 @@ void DoDFT8BitTurbo( float * outbins, float * frequencies, int bins, const float
|
|||
for( i = last_place; i != place_in_data_buffer; i = (i+1)%size_of_data_buffer )
|
||||
{
|
||||
int16_t ifr1 = (int16_t)( ((databuffer[i]) ) * 4095 );
|
||||
//ifr1 += 4095;
|
||||
//ifr1 += 512;
|
||||
Turbo8BitRun( ifr1>>5 ); //6 = Actually only feed algorithm numbers from -64 to 63.
|
||||
}
|
||||
last_place = place_in_data_buffer;
|
||||
|
||||
#if 0
|
||||
for( i = 0; i < bins; i++ )
|
||||
{
|
||||
outbins[i] = 0;
|
||||
|
@ -289,7 +163,7 @@ void DoDFT8BitTurbo( float * outbins, float * frequencies, int bins, const float
|
|||
printf( "MUX: %d %d\n", isc, iss );
|
||||
outbins[i+MAX_FREQS] = sqrt(mux)/200.0;
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
|
295
embeddedcommon/DFT8Turbo.c.attic
Normal file
295
embeddedcommon/DFT8Turbo.c.attic
Normal file
|
@ -0,0 +1,295 @@
|
|||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include "DFT8Turbo.h"
|
||||
#include <math.h>
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#define MAX_FREQS (24)
|
||||
#define OCTAVES (5)
|
||||
|
||||
|
||||
/*
|
||||
* The first thought was using an integration map and only operating when we need to, to pull the data out.
|
||||
* Now we're doing the thing below this block comment
|
||||
int16_t accumulated_total; //2 bytes
|
||||
int16_t last_accumulated_total_at_bin[MAX_FREQS*2]; //24 * 2 * sizeof(int16_t) = 96 bytes.
|
||||
uint8_t current_time; //1 byte
|
||||
uint8_t placecode[MAX_FREQS];
|
||||
*/
|
||||
//OK... We don't have enough ram to sum everything... can we do something wacky with multiple ocatives to sum everything better?
|
||||
//i.e.
|
||||
//
|
||||
// 4332322132212210
|
||||
//
|
||||
// ++++++++++++++++-----------------
|
||||
// ++++++++--------
|
||||
// ++++----++++----
|
||||
// ++--++--++--++--
|
||||
// +-+-+-+-+-+-+-+-
|
||||
//
|
||||
// Don't forget we need to do this for sin and cos.
|
||||
// Can we instead of making this plusses, make it a multiplier?
|
||||
// How can we handle sin+cos?
|
||||
//
|
||||
// Is it possible to do this for every frame? I.e. for each of the 24 notes, multiply with their current place in table?
|
||||
// That's interesting. It's not like a sin table.
|
||||
// There is no "multiply" in the attiny instruction set for attiny85.
|
||||
// There is, however for attiny402
|
||||
|
||||
//Question: Can we do five octaves, or does this need to be balanced?
|
||||
//Question2: Should we weight higher octaves?
|
||||
|
||||
|
||||
//ATTiny402: 256x8 RAM, 4096x8 FLASH LPM: 3 cycles + FMUL: 2 cycles << Do stacked sin waves?
|
||||
//ATtiny85: 512x8 RAM, 8192x8 FLASH LPM: 3 cycles + NO MULTIPLY << Do square waves?
|
||||
|
||||
|
||||
/* Approaches:
|
||||
|
||||
on ATtiny402: Stacked sin approach.
|
||||
Say 16 MHz, though 12 MHz is interesting...
|
||||
16k SPS: 1k cycles per; say 24 bins per; 41 cycles per bin = hard. But is it too hard?
|
||||
20 cycles per s/c.
|
||||
read place in stacked table (8? bits) 3 cycles
|
||||
|
||||
//Inner loop = 17 cycles.
|
||||
read stacked table (8 bits), 3 cycles
|
||||
fractional multiply table with current value. 2 cycles
|
||||
read current running for note 2 cycles (LDS = 3 cycles)
|
||||
subtract a shifted version, to make it into an IIR. (4 cycles)
|
||||
add in current values. (2 cycles)
|
||||
store data back to ram (2 cycles)
|
||||
advance place in stacked table (8?bits) 1 cycle
|
||||
|
||||
store place in stacked table (8? bits) 3 cycles?
|
||||
|
||||
//What if we chunk ADC updates into groups of 4 or 8?
|
||||
//This is looking barely possible.
|
||||
|
||||
on attiny85: scheduled adds/subtracts (like a stacked-square-wave-table)
|
||||
//XXX TODO!
|
||||
|
||||
*/
|
||||
|
||||
/* Ok... Let's think about the ATTiny402. 256x8 RAM + 4096x8 FLASH.
|
||||
|
||||
* We can create a table which has all octaves overlaid.
|
||||
* We would need to keep track of:
|
||||
* 12 x 2 x 2 = 48 bytes = Current sin/cos values.
|
||||
* 12 x 2 = 24 bytes = Current place in table. = 72 bytes
|
||||
* We would need to store:
|
||||
* The layered lookup table. If possible, keep @ 256 bytes to simplify math ops.
|
||||
* The speed by which each note needs to advance.
|
||||
* We would need to:
|
||||
* Read current running place. X 8 cycles
|
||||
* Use that place to look up into sin table. 3 cycles
|
||||
* Read running val 4 cycles best case
|
||||
* Multiply out the sin + IIR 5 cycles
|
||||
* Store running val 4 cycles best case
|
||||
* Cos-advance that place to look up into sin table. 4 cycles
|
||||
* Read running val 4 cycles best case
|
||||
* Multiply out the sin + IIR 5 cycles
|
||||
* Store running val 4 cycles best case.
|
||||
* Read how much to advance X by. 4 cycles
|
||||
* (Cos^2+Sin^2) 8?
|
||||
* Store it. 4 cycles best case.
|
||||
* = 48 x 12 = 576 cycles. Assume 10 MHz @ 16k SPS. We're OK (625 samples)
|
||||
*/
|
||||
|
||||
// Observation: The two tables are actually mirror images of each other, well diagonally mirrored. That's odd. But, would take CPU to exploit.
|
||||
|
||||
#define SSTABLESIZE 256
|
||||
int8_t spikysin_interleved_cos[SSTABLESIZE][2];
|
||||
uint32_t advancespeed[MAX_FREQS];
|
||||
|
||||
static int CompTableWithPhase( int nelements, float phase, int scaling )
|
||||
{
|
||||
int highest = 0;
|
||||
int i;
|
||||
for( i = 0; i < nelements; i++ )
|
||||
{
|
||||
float taued = i * 3.141592 * 2.0 / nelements;
|
||||
int o;
|
||||
float combsin = 0;
|
||||
for( o = 0; o < OCTAVES; o++ )
|
||||
{
|
||||
combsin += sin( taued * (1<<o) + phase);
|
||||
}
|
||||
combsin /= OCTAVES;
|
||||
int csadapt = combsin * scaling - 0.5; //No value is higher with five octaves. XXX TODO Lookout. If you change # of octaves, need to change this, too.
|
||||
|
||||
if( csadapt > highest ) highest = csadapt;
|
||||
if( -csadapt > highest ) highest = -csadapt;
|
||||
|
||||
if( csadapt > 127 ) csadapt = 127;
|
||||
if( csadapt < -128 ) csadapt = -128; //tricky: Keep balanced.
|
||||
spikysin_interleved_cos[i][0] = csadapt;
|
||||
|
||||
float combcos = 0;
|
||||
for( o = 0; o < OCTAVES; o++ )
|
||||
{
|
||||
combcos += cos( taued * (1<<o) + phase );
|
||||
}
|
||||
combcos /= OCTAVES;
|
||||
csadapt = combcos * scaling - 0.5; //No value is higher with five octaves. XXX TODO Lookout. If you change # of octaves, need to change this, too.
|
||||
|
||||
if( csadapt > highest ) highest = csadapt;
|
||||
if( -csadapt > highest ) highest = -csadapt;
|
||||
|
||||
if( csadapt > 127 ) csadapt = 127;
|
||||
if( csadapt < -128 ) csadapt = -128; //tricky: Keep balanced.
|
||||
spikysin_interleved_cos[i][1] = csadapt;
|
||||
}
|
||||
return highest;
|
||||
}
|
||||
|
||||
|
||||
static int Setup( float * frequencies, int bins )
|
||||
{
|
||||
int i;
|
||||
|
||||
//Since start position/phase is arbitrary, we should try several to see which gives us the best dynamic range.
|
||||
float tryphase = 0;
|
||||
|
||||
float bestphase = 0;
|
||||
int highest_val_at_best_phase = 1000000;
|
||||
|
||||
for( tryphase = 0; tryphase < 3.14159; tryphase += 0.001 )
|
||||
{
|
||||
int highest = CompTableWithPhase( SSTABLESIZE, tryphase, 65536 );
|
||||
if( highest < highest_val_at_best_phase )
|
||||
{
|
||||
highest_val_at_best_phase = highest;
|
||||
bestphase = tryphase;
|
||||
}
|
||||
}
|
||||
printf( "Best comp: %f : %d\n", bestphase, highest_val_at_best_phase );
|
||||
|
||||
//Set this because we would overflow the sinm and cosm regs if we don't. This is sort of like a master volume.
|
||||
//use this as that input volume knob thing.
|
||||
float further_reduce = 1.0;
|
||||
|
||||
CompTableWithPhase( SSTABLESIZE, bestphase, (65536*128*further_reduce)/highest_val_at_best_phase );
|
||||
|
||||
// for( i = 0; i < SSTABLESIZE; i++ )
|
||||
// {
|
||||
// printf( "%d %d\n", spikysin_interleved_cos[i*2+0], spikysin_interleved_cos[i*2+1] );
|
||||
// }
|
||||
|
||||
for( i = 0; i < MAX_FREQS; i++ )
|
||||
{
|
||||
//frequencies[i] = SPS / Freq
|
||||
// Need to decide how quickly we sweep through the table.
|
||||
advancespeed[i] = 65536 * 256.0 /* fixed point */ * 256.0 /* size of table */ / frequencies[i];
|
||||
//printf( "%f\n", frequencies[i] );
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
uint8_t spikysin_interleved_cos[256*2];
|
||||
uint16_t advancespeed[MAX_FREQS];
|
||||
*/
|
||||
|
||||
float toutbins[MAX_FREQS];
|
||||
|
||||
struct notedat
|
||||
{
|
||||
uint32_t time;
|
||||
int32_t sinm;
|
||||
int32_t cosm;
|
||||
};
|
||||
|
||||
static struct notedat nd[MAX_FREQS];
|
||||
|
||||
void Turbo8BitRun( int8_t adcval )
|
||||
{
|
||||
int i;
|
||||
for( i = 0; i < MAX_FREQS; i++ )
|
||||
{
|
||||
uint32_t ct = nd[i].time;
|
||||
int32_t muxres;
|
||||
int32_t running;
|
||||
int32_t rdesc, rdess;
|
||||
uint8_t * spikysintable = &spikysin_interleved_cos[(ct>>24)][0];
|
||||
|
||||
int8_t ss = *(spikysintable++);
|
||||
|
||||
#define DECIR 8
|
||||
|
||||
muxres = ((int16_t)adcval * ss + (1<<(DECIR-1)) ) >> (DECIR);
|
||||
running = nd[i].cosm;
|
||||
running += muxres;
|
||||
rdesc = running >> 8;
|
||||
running -= rdesc >> 3;
|
||||
|
||||
nd[i].cosm = running;
|
||||
if( i == 0) printf( "MRX %5d %9d %9d %9d %9d\n", muxres, adcval, ss, running, nd[i].sinm );
|
||||
int8_t sc = *(spikysintable++);
|
||||
muxres = ((int16_t)adcval * sc + (1<<(DECIR-1)) ) >> (DECIR);
|
||||
running = nd[i].sinm;
|
||||
running += muxres;
|
||||
|
||||
rdess = running>>8;
|
||||
running -= rdess >> 3;
|
||||
|
||||
nd[i].sinm = running;
|
||||
|
||||
nd[i].time = ct + advancespeed[i];
|
||||
|
||||
toutbins[i] = rdess * rdess + rdesc * rdesc;
|
||||
//printf( "%d %d = %f %p\n", rdess, rdesc, toutbins[i], &toutbins[i] );
|
||||
}
|
||||
|
||||
static uint8_t stater;
|
||||
/* stater++;
|
||||
if( stater == 16 )
|
||||
{
|
||||
stater = 0;
|
||||
for( i = 0; i < MAX_FREQS; i++ )
|
||||
{
|
||||
nd[i].sinm -= nd[i].sinm >> 12;
|
||||
nd[i].cosm -= nd[i].cosm >> 12;
|
||||
nd[i].sinm += 8;
|
||||
nd[i].cosm += 8;
|
||||
}
|
||||
}*/
|
||||
}
|
||||
|
||||
|
||||
void DoDFT8BitTurbo( float * outbins, float * frequencies, int bins, const float * databuffer, int place_in_data_buffer, int size_of_data_buffer, float q, float speedup )
|
||||
{
|
||||
static int is_setup;
|
||||
if( !is_setup ) { is_setup = 1; Setup( frequencies, bins ); }
|
||||
static int last_place;
|
||||
int i;
|
||||
|
||||
for( i = last_place; i != place_in_data_buffer; i = (i+1)%size_of_data_buffer )
|
||||
{
|
||||
int16_t ifr1 = (int16_t)( ((databuffer[i]) ) * 4095 );
|
||||
//ifr1 += 4095;
|
||||
//ifr1 += 512;
|
||||
Turbo8BitRun( ifr1>>5 ); //6 = Actually only feed algorithm numbers from -64 to 63.
|
||||
}
|
||||
last_place = place_in_data_buffer;
|
||||
|
||||
for( i = 0; i < bins; i++ )
|
||||
{
|
||||
outbins[i] = 0;
|
||||
}
|
||||
for( i = 0; i < MAX_FREQS; i++ )
|
||||
{
|
||||
int iss = nd[i].sinm>>8;
|
||||
int isc = nd[i].cosm>>8;
|
||||
int mux = iss * iss + isc * isc;
|
||||
if( mux == 0 ) mux = 1;
|
||||
if( i == 0 )
|
||||
printf( "MUX: %d %d\n", isc, iss );
|
||||
outbins[i+MAX_FREQS] = sqrt(mux)/200.0;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
9
embeddedcommon/DFT8Turbo.h.attic
Normal file
9
embeddedcommon/DFT8Turbo.h.attic
Normal file
|
@ -0,0 +1,9 @@
|
|||
#ifndef _DFT8TURBO_H
|
||||
#define _DFT8TURBO_H
|
||||
|
||||
/* Note: Frequencies must be precompiled. */
|
||||
|
||||
void DoDFT8BitTurbo( float * outbins, float * frequencies, int bins, const float * databuffer, int place_in_data_buffer, int size_of_data_buffer, float q, float speedup );
|
||||
|
||||
#endif
|
||||
|
Loading…
Reference in a new issue