try another method for turbo operations.

2019-04-20 01:05:05 -07:00 · 2019-04-20 01:05:05 -07:00 · 0a056db03d
commit 0a056db03d
parent 0d23075125
3 changed files with 384 additions and 206 deletions
--- a/embeddedcommon/DFT8Turbo.c
+++ b/embeddedcommon/DFT8Turbo.c
@ -17,245 +17,120 @@
 		uint8_t current_time;								//1 byte
 		uint8_t placecode[MAX_FREQS];
 */
-//OK... We don't have enough ram to sum everything... can we do something wacky with multiple ocatives to sum everything better?
-//i.e.
-//
-// 4332322132212210
-//
-// ++++++++++++++++-----------------
-// ++++++++--------
-// ++++----++++----
-// ++--++--++--++--
-// +-+-+-+-+-+-+-+-
-//
-// Don't forget we need to do this for sin and cos.
-// Can we instead of making this plusses, make it a multiplier?
-// How can we handle sin+cos?
-//
-// Is it possible to do this for every frame?  I.e. for each of the 24 notes, multiply with their current place in table?
-//  That's interesting.  It's not like a sin table.
-// There is no "multiply" in the attiny instruction set for attiny85.
-// There is, however for attiny402
+/* 
+	So, the idea here is we would keep a running total of the current ADC value, kept away in a int16_t.
+	It is constantly summing, so we can take an integral of it.  Or rather an integral range.

-//Question:  Can we do five octaves, or does this need to be balanced?
-//Question2: Should we weight higher octaves?
+	Over time, we perform operations like adding or subtracting from a current place.


-//ATTiny402: 256x8 RAM, 4096x8 FLASH  LPM: 3 cycles + FMUL: 2 cycles  << Do stacked sin waves?
-//ATtiny85:  512x8 RAM, 8192x8 FLASH  LPM: 3 cycles + NO MULTIPLY     << Do square waves?
-
-
-/* Approaches:
-
-  on ATtiny402:  Stacked sin approach.
-   Say 16 MHz, though 12 MHz is interesting...
-   16k SPS: 1k cycles per; say 24 bins per; 41 cycles per bin = hard.  But is it too hard?
-   20 cycles per s/c.
-		read place in stacked table (8? bits) 3 cycles
-
-		//Inner loop = 17 cycles.
-		read stacked table (8 bits), 3 cycles
-		fractional multiply table with current value. 2 cycles
-		read current running for note 2 cycles  (LDS = 3 cycles)
-		subtract a shifted version, to make it into an IIR. (4 cycles)
-		add in current values. (2 cycles)
-		store data back to ram (2 cycles)
-		advance place in stacked table (8?bits) 1 cycle
-
-		store place in stacked table (8? bits) 3 cycles?
-
-	//What if we chunk ADC updates into groups of 4 or 8?
-	//This is looking barely possible.
-
-	on attiny85: scheduled adds/subtracts (like a stacked-square-wave-table)
-		//XXX TODO!
-
+NOTE: 
+	Optimizations:
+		Only use 16 bins, lets action table be 16-bits wide.
 */

-/* Ok... Let's think about the ATTiny402.  256x8 RAM + 4096x8 FLASH.
+int16_t running_integral;
+int16_t cossindata[MAX_FREQS*OCTAVES*2]; //Contains COS and SIN data.

-	* We can create a table which has all octaves overlaid.
-	* We would need to keep track of:
-		* 12 x 2 x 2 = 48 bytes = Current sin/cos values.
-		* 12 x 2 = 24 bytes = Current place in table.  = 72 bytes
-	* We would need to store:
-		* The layered lookup table.  If possible, keep @ 256 bytes to simplify math ops.
-		* The speed by which each note needs to advance.
-	* We would need to:
-		* Read current running place. X                8 cycles
-		* Use that place to look up into sin table.    3 cycles
-		* Read running val  4 cycles best case
-		* Multiply out the sin + IIR                   5 cycles
-		* Store running val 4 cycles best case
-		* Cos-advance that place to look up into sin table.    4 cycles
-		* Read running val 4 cycles best case
-		* Multiply out the sin + IIR                   5 cycles
-		* Store running val 4 cycles best case.
-		* Read how much to advance X by.               4 cycles
-        * (Cos^2+Sin^2)                                8?
-		* Store it.                                    4 cycles best case.
-        *                                                  = 48 x 12 = 576 cycles.  Assume 10 MHz @ 16k SPS.  We're OK (625 samples)
-*/

-// Observation: The two tables are actually mirror images of each other, well diagonally mirrored.  That's odd.  But, would take CPU to exploit.

-#define SSTABLESIZE 256
-int8_t  spikysin_interleved_cos[SSTABLESIZE][2];
-uint32_t advancespeed[MAX_FREQS];
+uint8_t  which_octave_for_op[MAX_FREQS]; //counts up, tells you which ocative you are operating on.
+uint8_t  highbit_table[2<<OCTAVES]; //PUT IN FLASH

-static int CompTableWithPhase( int nelements, float phase, int scaling )
-{
-	int highest = 0;
-	int i;
-	for( i = 0; i < nelements; i++ )
-	{
-		float taued = i * 3.141592 * 2.0 / nelements;
-		int o;
-		float combsin = 0;
-		for( o = 0; o < OCTAVES; o++ )
-		{
-			combsin += sin( taued * (1<<o) + phase);
-		}
-		combsin /= OCTAVES;
-		int csadapt =  combsin * scaling - 0.5;	//No value is higher with five octaves.  XXX TODO Lookout.  If you change # of octaves, need to change this, too.

-		if( csadapt > highest ) highest = csadapt;
-		if( -csadapt > highest ) highest = -csadapt;
+#define ACTIONTABLESIZE 512

-		if( csadapt > 127 ) csadapt = 127;
-		if( csadapt < -128 ) csadapt = -128;  //tricky: Keep balanced.
-		spikysin_interleved_cos[i][0] = csadapt;
-
-		float combcos = 0;
-		for( o = 0; o < OCTAVES; o++ )
-		{
-			combcos += cos( taued * (1<<o) + phase );
-		}
-		combcos /= OCTAVES;
-		csadapt = combcos * scaling - 0.5;	//No value is higher with five octaves.  XXX TODO Lookout.  If you change # of octaves, need to change this, too.
-
-		if( csadapt > highest ) highest = csadapt;
-		if( -csadapt > highest ) highest = -csadapt;
-
-		if( csadapt > 127 ) csadapt = 127;
-		if( csadapt < -128 ) csadapt = -128;  //tricky: Keep balanced.
-		spikysin_interleved_cos[i][1] = csadapt;
-	}
-	return highest;
-}
+uint16_t * placeintable;

+//Put this in flash.
+uint32_t actiontable[ACTIONTABLESIZE];

 static int Setup( float * frequencies, int bins )
 {
 	int i;
-
-	//Since start position/phase is arbitrary, we should try several to see which gives us the best dynamic range.
-	float tryphase = 0;
-
-	float bestphase = 0;
-	int highest_val_at_best_phase = 1000000;
-
-	for( tryphase = 0; tryphase < 3.14159; tryphase += 0.001 )
+	printf( "BINS: %d\n", bins );
+	for( i = bins-MAX_FREQS; i < bins; i++ )
 	{
-		int highest = CompTableWithPhase( SSTABLESIZE, tryphase, 65536 );
-		if( highest < highest_val_at_best_phase )
+		int topbin = i - (bins-MAX_FREQS);
+		float f = frequencies[i]/2.0; //2x the hits (sin/cos)
+		float hits_per_table = (float)ACTIONTABLESIZE/f;
+		int dhrpertable = (int)(hits_per_table+.5);//TRICKY: You might think you need to have even number of hits (sin/cos), but you don't!  It can flip sin/cos each time through the table!
+		float err = (8000./((float)ACTIONTABLESIZE/dhrpertable) - 8000./f)/(8000./f);
+		//Perform an op every X samples.  How well does this map into units of 1024?
+		printf( "%d %f -> hits per 1024: %f %d (%f error)\n", topbin, f, (float)ACTIONTABLESIZE/f, dhrpertable, err * 100.0 );
+
+		float advance_per_step = dhrpertable/(float)ACTIONTABLESIZE;
+		float fvadv = 0.0;
+		int j;
+		int actions = 0;
+		int countset = 0;
+
+		//XXX TODO Tricky: We need to start fadv off at such a place that there won't be a hicchup when going back around to 0.
+
+		for( j = 0; j < ACTIONTABLESIZE; j++ )
 		{
-			highest_val_at_best_phase = highest;
-			bestphase = tryphase;
+			if( fvadv >= 0.5 )
+			{
+				actiontable[j] |= 1<<topbin;
+				fvadv -= 1.0;
+				countset++;
+			}
+			fvadv += advance_per_step;
 		}
+		printf( "   countset: %d\n", countset );
 	}
-	printf( "Best comp: %f : %d\n", bestphase, highest_val_at_best_phase );

-	//Set this because we would overflow the sinm and cosm regs if we don't.  This is sort of like a master volume.
-	//use this as that input volume knob thing.
-	float further_reduce = 1.0;
-
-	CompTableWithPhase( SSTABLESIZE, bestphase, (65536*128*further_reduce)/highest_val_at_best_phase );
-
-//	for( i = 0; i < SSTABLESIZE; i++ )
-//	{
-//		printf( "%d %d\n", spikysin_interleved_cos[i*2+0], spikysin_interleved_cos[i*2+1] );
-//	}
-
-	for( i = 0; i < MAX_FREQS; i++ )
+	for( i = 0; i < (1<<OCTAVES); i++ )
 	{
-		//frequencies[i] = SPS / Freq
-		// Need to decide how quickly we sweep through the table.
-		advancespeed[i] = 65536 * 256.0 /* fixed point */ * 256.0 /* size of table */ / frequencies[i];
-		//printf( "%f\n", frequencies[i] );
+		int longestzeroes = 0;
+		for( longestzeroes = 0; longestzeroes < 255 && ( ((i >> longestzeroes) & 1) == 0 ); longestzeroes++ );
+		//longestzeroes goes: 255, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, ...
+		//This isn't great, because we need to also know whether we are attacking the SIN side or the COS side.
+		highbit_table[i] = longestzeroes;
 	}
-	return 0;
+	//Repeat the highbit table in the second half.
+	//XXX PICK UP HERE
+	//Encode into highbit_table which cell is being operated on
+	//Also, do the * MAX_FREQS here.  That will 
+
+
+
+	placeintable = actiontable;
+	//	for( i = 0; i < ACTIONTABLESIZE; i++ )		printf( "%08x\n", actiontable[i] );
 }


-/*
-uint8_t  spikysin_interleved_cos[256*2];
-uint16_t advancespeed[MAX_FREQS];
-*/

-float toutbins[MAX_FREQS];
+int16_t running_integral;
+int16_t cossindata[MAX_FREQS*OCTAVES*2];
+uint8_t  which_octave_for_op[MAX_FREQS]; //counts up, tells you which ocative you are operating on.
+uint16_t * placeintable;

-struct notedat
-{
-	uint32_t time;
-	int32_t sinm;
-	int32_t cosm;
-};
+//Put this in flash.
+uint32_t actiontable[ACTIONTABLESIZE];

-static struct notedat nd[MAX_FREQS];

 void Turbo8BitRun( int8_t adcval )
 {
-	int i;
-	for( i = 0; i < MAX_FREQS; i++ )
+	uint32_t actions = *(placeintable++);
+	if( placeintable == &actiontable[ACTIONTABLESIZE] ) placeintable = actiontable;
+	int b;
+	for( b = 0; b < MAX_FREQS; b++ )
 	{
-		uint32_t ct = nd[i].time;
-		int32_t muxres;
-		int32_t running;
-		int32_t rdesc, rdess;
-		uint8_t * spikysintable = &spikysin_interleved_cos[(ct>>24)][0];
+		if( ! ((1<<b) & actions) ) continue;
+		//If we get here, we need to do an action.
+		int op = which_octave_for_op[b]++;
+		int sinorcos = op & 1;
+		op >>= 1;
+		int octavebit = op & ((1<<OCTAVES)-1);
+		if( !octavebit ) { continue; } //XXX TRICKY: In our octavebit table, we have 1 0 and 1 1 entry. 2, 3, 4, etc. are ok.  So, if we hit a 0, we abort.
+		int whichoctave = highbit_table[octavebit];

-		int8_t  ss = *(spikysintable++);
+		//Ok, actually we need to also know whether you're on SIN or COS.

-		#define DECIR 8
-
-		muxres = ((int16_t)adcval * ss + (1<<(DECIR-1)) ) >> (DECIR);
-		running = nd[i].cosm;
-		running += muxres;
-		rdesc = running >> 8;
-		running -= rdesc >> 3;
-
-		nd[i].cosm = running;
-if( i == 0) printf( "MRX %5d  %9d %9d  %9d %9d\n", muxres, adcval, ss, running, nd[i].sinm );
-		int8_t  sc = *(spikysintable++);
-		muxres = ((int16_t)adcval * sc + (1<<(DECIR-1)) ) >> (DECIR);
-		running = nd[i].sinm;
-		running += muxres;
-
-		rdess = running>>8;
-		running -= rdess >> 3;
-
-		nd[i].sinm = running;
-
-		nd[i].time = ct + advancespeed[i];
-
-		toutbins[i] = rdess * rdess + rdesc * rdesc;
-		//printf( "%d %d = %f %p\n", rdess, rdesc, toutbins[i], &toutbins[i] );
+		//if( b == 0 ) printf( "%d\n", whichoctave );
+		//XXX TODO Optimization: Use a table, since octavebit can only be 0...31.
 	}
-
-	static uint8_t stater;
-/*	stater++;
-	if( stater == 16 )
-	{
-		stater = 0;
-		for( i = 0; i < MAX_FREQS; i++ )
-		{
-			nd[i].sinm -= nd[i].sinm >> 12;
-			nd[i].cosm -= nd[i].cosm >> 12;
-			nd[i].sinm += 8;
-			nd[i].cosm += 8;
-		}
-	}*/
 }


@ -269,12 +144,11 @@ void DoDFT8BitTurbo( float * outbins, float * frequencies, int bins, const float
 	for( i = last_place; i != place_in_data_buffer; i = (i+1)%size_of_data_buffer )
 	{
 		int16_t ifr1 = (int16_t)( ((databuffer[i]) ) * 4095 );
-		//ifr1 += 4095;
-		//ifr1 += 512;
 		Turbo8BitRun( ifr1>>5 ); //6 = Actually only feed algorithm numbers from -64 to 63.
 	}
 	last_place = place_in_data_buffer;

+#if 0
 	for( i = 0; i < bins; i++ )
 	{
 		outbins[i] = 0;
@ -289,7 +163,7 @@ void DoDFT8BitTurbo( float * outbins, float * frequencies, int bins, const float
 		printf( "MUX: %d %d\n", isc, iss );
 		outbins[i+MAX_FREQS] = sqrt(mux)/200.0;
 	} 
-
+#endif
 }


--- a/embeddedcommon/DFT8Turbo.c.attic
+++ b/embeddedcommon/DFT8Turbo.c.attic
@ -0,0 +1,295 @@
+#include <stdint.h>
+#include <stdlib.h>
+#include "DFT8Turbo.h"
+#include <math.h>
+
+#include <stdio.h>
+
+#define MAX_FREQS (24)
+#define OCTAVES   (5)
+
+
+/*
+	* The first thought was using an integration map and only operating when we need to, to pull the data out.
+	* Now we're doing the thing below this block comment
+		int16_t accumulated_total;							//2 bytes
+		int16_t last_accumulated_total_at_bin[MAX_FREQS*2];  //24 * 2 * sizeof(int16_t) = 96 bytes.
+		uint8_t current_time;								//1 byte
+		uint8_t placecode[MAX_FREQS];
+*/
+//OK... We don't have enough ram to sum everything... can we do something wacky with multiple ocatives to sum everything better?
+//i.e.
+//
+// 4332322132212210
+//
+// ++++++++++++++++-----------------
+// ++++++++--------
+// ++++----++++----
+// ++--++--++--++--
+// +-+-+-+-+-+-+-+-
+//
+// Don't forget we need to do this for sin and cos.
+// Can we instead of making this plusses, make it a multiplier?
+// How can we handle sin+cos?
+//
+// Is it possible to do this for every frame?  I.e. for each of the 24 notes, multiply with their current place in table?
+//  That's interesting.  It's not like a sin table.
+// There is no "multiply" in the attiny instruction set for attiny85.
+// There is, however for attiny402
+
+//Question:  Can we do five octaves, or does this need to be balanced?
+//Question2: Should we weight higher octaves?
+
+
+//ATTiny402: 256x8 RAM, 4096x8 FLASH  LPM: 3 cycles + FMUL: 2 cycles  << Do stacked sin waves?
+//ATtiny85:  512x8 RAM, 8192x8 FLASH  LPM: 3 cycles + NO MULTIPLY     << Do square waves?
+
+
+/* Approaches:
+
+  on ATtiny402:  Stacked sin approach.
+   Say 16 MHz, though 12 MHz is interesting...
+   16k SPS: 1k cycles per; say 24 bins per; 41 cycles per bin = hard.  But is it too hard?
+   20 cycles per s/c.
+		read place in stacked table (8? bits) 3 cycles
+
+		//Inner loop = 17 cycles.
+		read stacked table (8 bits), 3 cycles
+		fractional multiply table with current value. 2 cycles
+		read current running for note 2 cycles  (LDS = 3 cycles)
+		subtract a shifted version, to make it into an IIR. (4 cycles)
+		add in current values. (2 cycles)
+		store data back to ram (2 cycles)
+		advance place in stacked table (8?bits) 1 cycle
+
+		store place in stacked table (8? bits) 3 cycles?
+
+	//What if we chunk ADC updates into groups of 4 or 8?
+	//This is looking barely possible.
+
+	on attiny85: scheduled adds/subtracts (like a stacked-square-wave-table)
+		//XXX TODO!
+
+*/
+
+/* Ok... Let's think about the ATTiny402.  256x8 RAM + 4096x8 FLASH.
+
+	* We can create a table which has all octaves overlaid.
+	* We would need to keep track of:
+		* 12 x 2 x 2 = 48 bytes = Current sin/cos values.
+		* 12 x 2 = 24 bytes = Current place in table.  = 72 bytes
+	* We would need to store:
+		* The layered lookup table.  If possible, keep @ 256 bytes to simplify math ops.
+		* The speed by which each note needs to advance.
+	* We would need to:
+		* Read current running place. X                8 cycles
+		* Use that place to look up into sin table.    3 cycles
+		* Read running val  4 cycles best case
+		* Multiply out the sin + IIR                   5 cycles
+		* Store running val 4 cycles best case
+		* Cos-advance that place to look up into sin table.    4 cycles
+		* Read running val 4 cycles best case
+		* Multiply out the sin + IIR                   5 cycles
+		* Store running val 4 cycles best case.
+		* Read how much to advance X by.               4 cycles
+        * (Cos^2+Sin^2)                                8?
+		* Store it.                                    4 cycles best case.
+        *                                                  = 48 x 12 = 576 cycles.  Assume 10 MHz @ 16k SPS.  We're OK (625 samples)
+*/
+
+// Observation: The two tables are actually mirror images of each other, well diagonally mirrored.  That's odd.  But, would take CPU to exploit.
+
+#define SSTABLESIZE 256
+int8_t  spikysin_interleved_cos[SSTABLESIZE][2];
+uint32_t advancespeed[MAX_FREQS];
+
+static int CompTableWithPhase( int nelements, float phase, int scaling )
+{
+	int highest = 0;
+	int i;
+	for( i = 0; i < nelements; i++ )
+	{
+		float taued = i * 3.141592 * 2.0 / nelements;
+		int o;
+		float combsin = 0;
+		for( o = 0; o < OCTAVES; o++ )
+		{
+			combsin += sin( taued * (1<<o) + phase);
+		}
+		combsin /= OCTAVES;
+		int csadapt =  combsin * scaling - 0.5;	//No value is higher with five octaves.  XXX TODO Lookout.  If you change # of octaves, need to change this, too.
+
+		if( csadapt > highest ) highest = csadapt;
+		if( -csadapt > highest ) highest = -csadapt;
+
+		if( csadapt > 127 ) csadapt = 127;
+		if( csadapt < -128 ) csadapt = -128;  //tricky: Keep balanced.
+		spikysin_interleved_cos[i][0] = csadapt;
+
+		float combcos = 0;
+		for( o = 0; o < OCTAVES; o++ )
+		{
+			combcos += cos( taued * (1<<o) + phase );
+		}
+		combcos /= OCTAVES;
+		csadapt = combcos * scaling - 0.5;	//No value is higher with five octaves.  XXX TODO Lookout.  If you change # of octaves, need to change this, too.
+
+		if( csadapt > highest ) highest = csadapt;
+		if( -csadapt > highest ) highest = -csadapt;
+
+		if( csadapt > 127 ) csadapt = 127;
+		if( csadapt < -128 ) csadapt = -128;  //tricky: Keep balanced.
+		spikysin_interleved_cos[i][1] = csadapt;
+	}
+	return highest;
+}
+
+
+static int Setup( float * frequencies, int bins )
+{
+	int i;
+
+	//Since start position/phase is arbitrary, we should try several to see which gives us the best dynamic range.
+	float tryphase = 0;
+
+	float bestphase = 0;
+	int highest_val_at_best_phase = 1000000;
+
+	for( tryphase = 0; tryphase < 3.14159; tryphase += 0.001 )
+	{
+		int highest = CompTableWithPhase( SSTABLESIZE, tryphase, 65536 );
+		if( highest < highest_val_at_best_phase )
+		{
+			highest_val_at_best_phase = highest;
+			bestphase = tryphase;
+		}
+	}
+	printf( "Best comp: %f : %d\n", bestphase, highest_val_at_best_phase );
+
+	//Set this because we would overflow the sinm and cosm regs if we don't.  This is sort of like a master volume.
+	//use this as that input volume knob thing.
+	float further_reduce = 1.0;
+
+	CompTableWithPhase( SSTABLESIZE, bestphase, (65536*128*further_reduce)/highest_val_at_best_phase );
+
+//	for( i = 0; i < SSTABLESIZE; i++ )
+//	{
+//		printf( "%d %d\n", spikysin_interleved_cos[i*2+0], spikysin_interleved_cos[i*2+1] );
+//	}
+
+	for( i = 0; i < MAX_FREQS; i++ )
+	{
+		//frequencies[i] = SPS / Freq
+		// Need to decide how quickly we sweep through the table.
+		advancespeed[i] = 65536 * 256.0 /* fixed point */ * 256.0 /* size of table */ / frequencies[i];
+		//printf( "%f\n", frequencies[i] );
+	}
+	return 0;
+}
+
+
+/*
+uint8_t  spikysin_interleved_cos[256*2];
+uint16_t advancespeed[MAX_FREQS];
+*/
+
+float toutbins[MAX_FREQS];
+
+struct notedat
+{
+	uint32_t time;
+	int32_t sinm;
+	int32_t cosm;
+};
+
+static struct notedat nd[MAX_FREQS];
+
+void Turbo8BitRun( int8_t adcval )
+{
+	int i;
+	for( i = 0; i < MAX_FREQS; i++ )
+	{
+		uint32_t ct = nd[i].time;
+		int32_t muxres;
+		int32_t running;
+		int32_t rdesc, rdess;
+		uint8_t * spikysintable = &spikysin_interleved_cos[(ct>>24)][0];
+
+		int8_t  ss = *(spikysintable++);
+
+		#define DECIR 8
+
+		muxres = ((int16_t)adcval * ss + (1<<(DECIR-1)) ) >> (DECIR);
+		running = nd[i].cosm;
+		running += muxres;
+		rdesc = running >> 8;
+		running -= rdesc >> 3;
+
+		nd[i].cosm = running;
+if( i == 0) printf( "MRX %5d  %9d %9d  %9d %9d\n", muxres, adcval, ss, running, nd[i].sinm );
+		int8_t  sc = *(spikysintable++);
+		muxres = ((int16_t)adcval * sc + (1<<(DECIR-1)) ) >> (DECIR);
+		running = nd[i].sinm;
+		running += muxres;
+
+		rdess = running>>8;
+		running -= rdess >> 3;
+
+		nd[i].sinm = running;
+
+		nd[i].time = ct + advancespeed[i];
+
+		toutbins[i] = rdess * rdess + rdesc * rdesc;
+		//printf( "%d %d = %f %p\n", rdess, rdesc, toutbins[i], &toutbins[i] );
+	}
+
+	static uint8_t stater;
+/*	stater++;
+	if( stater == 16 )
+	{
+		stater = 0;
+		for( i = 0; i < MAX_FREQS; i++ )
+		{
+			nd[i].sinm -= nd[i].sinm >> 12;
+			nd[i].cosm -= nd[i].cosm >> 12;
+			nd[i].sinm += 8;
+			nd[i].cosm += 8;
+		}
+	}*/
+}
+
+
+void DoDFT8BitTurbo( float * outbins, float * frequencies, int bins, const float * databuffer, int place_in_data_buffer, int size_of_data_buffer, float q, float speedup )
+{
+	static int is_setup;
+	if( !is_setup ) { is_setup = 1; Setup( frequencies, bins ); }
+	static int last_place;
+	int i;
+
+	for( i = last_place; i != place_in_data_buffer; i = (i+1)%size_of_data_buffer )
+	{
+		int16_t ifr1 = (int16_t)( ((databuffer[i]) ) * 4095 );
+		//ifr1 += 4095;
+		//ifr1 += 512;
+		Turbo8BitRun( ifr1>>5 ); //6 = Actually only feed algorithm numbers from -64 to 63.
+	}
+	last_place = place_in_data_buffer;
+
+	for( i = 0; i < bins; i++ )
+	{
+		outbins[i] = 0;
+	}
+	for( i = 0; i < MAX_FREQS; i++ )
+	{
+		int iss = nd[i].sinm>>8;
+		int isc = nd[i].cosm>>8;
+		int mux = iss * iss + isc * isc;
+		if( mux == 0 ) mux = 1;
+		if( i == 0 )
+		printf( "MUX: %d %d\n", isc, iss );
+		outbins[i+MAX_FREQS] = sqrt(mux)/200.0;
+	} 
+
+}
+
+
--- a/embeddedcommon/DFT8Turbo.h.attic
+++ b/embeddedcommon/DFT8Turbo.h.attic
@ -0,0 +1,9 @@
+#ifndef _DFT8TURBO_H
+#define _DFT8TURBO_H
+
+/* Note: Frequencies must be precompiled. */
+
+void DoDFT8BitTurbo( float * outbins, float * frequencies, int bins, const float * databuffer, int place_in_data_buffer, int size_of_data_buffer, float q, float speedup );
+
+#endif
+