diff --git a/embeddedcommon/DFT12Small.c b/embeddedcommon/DFT12Small.c
index 41c9fdd..6d04241 100644
--- a/embeddedcommon/DFT12Small.c
+++ b/embeddedcommon/DFT12Small.c
@@ -1,3 +1,5 @@
+//NOTE DO NOT EDIT THIS FILE WITHOUT ALSO EDITING DFT8TURBO!!!
+
 #include <stdint.h>
 #include <stdlib.h>
 #include "DFT12Small.h"
@@ -32,9 +34,6 @@
 #define FINAL_DECIMATE (1)
 #endif
 
-//Right now, we need 8*freqs*octaves bytes.
-//This is bad.
-//What can we do to fix it?
 
 //4x the hits (sin/cos and we need to do it once for each edge)
 //8x for selecting a higher octave.
@@ -199,116 +198,104 @@ void Small12BitRun( int8_t adcval )
 	if( adcv < -128 ) adcv = -128;
 	running_integral += adcv>>INITIAL_DECIMATE;
 
-#define dprintf( ... )
-
 	uint32_t action = actiontable[actiontableplace++];
 	int n;
-	dprintf( "%4d ", actiontableplace );
-	for( n = 0; n < MAX_FREQS; n++ )
+	for( n = 0; n < MAX_FREQS; n++, action>>=1 )
 	{
-		if( action & (1<<n) )
+		if( !( action & 1 ) ) continue;
+
+		int ao = which_octave_for_op[n];
+		ao++;
+		if( ao >= NR_OF_OPS ) ao = 0;
+		which_octave_for_op[n] = ao;
+
+		int op = optable[ao];
+
+		if( op == 255 )
+			continue;
+
+		//int octaveplace = op & 0xf;
+
+		//Tricky: We share the integral with SIN and COS.
+		//We don't need to. It would produce a slightly cleaner signal. See: NOTE 3
+		uint8_t octave = op & 0xf;
+		uint8_t intindex = octave * MAX_FREQS + n;
+
+		//int invoct = OCTAVES-1-octaveplace;
+		int16_t diff;
+
+		if( op & 0x10 )	//ADD
 		{
-			int ao = which_octave_for_op[n];
-			int op = optable[ao];
-			ao++;
-			if( ao >= NR_OF_OPS ) ao = 0;
-			which_octave_for_op[n] = ao;
+			diff = integral_at[intindex] - running_integral;
+		}
+		else	//SUBTRACT
+		{
+			diff = running_integral - integral_at[intindex];
+		}
 
-			if( op == 255 )
-			{
-				dprintf( "*" );	//NOP
-			}
-			else
-			{
-				//int octaveplace = op & 0xf;
-
-				//Tricky: We share the integral with SIN and COS.
-				//We don't need to. It would produce a slightly cleaner signal. See: NOTE 3
-				uint8_t octave = op & 0xf;
-				uint8_t intindex = octave * MAX_FREQS + n;
-
-				//int invoct = OCTAVES-1-octaveplace;
-				int16_t diff;
-
-				if( op & 0x10 )	//ADD
-				{
-					diff = integral_at[intindex] - running_integral;
-					dprintf( "%c", 'a' + (op & 0xf) );
-				}
-				else	//SUBTRACT
-				{
-					diff = running_integral - integral_at[intindex];
-					dprintf( "%c", 'A' + (op & 0xf) );
-				}
-
-				integral_at[intindex] = running_integral;
+		integral_at[intindex] = running_integral;
 
 #ifdef TWELVEBIT
-				if( diff > 2000 || diff < -2000 ) printf( "!!!!!!!!!!!! %d !!!!!!!!!!!\n", diff );
+		if( diff > 2000 || diff < -2000 ) printf( "!!!!!!!!!!!! %d !!!!!!!!!!!\n", diff );
 #elif defined( EIGHTBIT )
-				if( diff > 124 || diff < -124 ) printf( "!!!!!!!!!!!! %d !!!!!!!!!!!\n", diff );
+		if( diff > 124 || diff < -124 ) printf( "!!!!!!!!!!!! %d !!!!!!!!!!!\n", diff );
 #endif
 
-				uint8_t idx = ( intindex << 1 );
-				if( op&(1<<6) )
-				{
-					idx |= 1;
-				}
+		//uint8_t idx = ( intindex << 1 );
+		intindex<<=1;
 
-				//printf( "%d: %d + %d * %d >> 8 - %d\n", idx, cossindata[idx], diff, mulmux[idx/2], cossindata[idx]>>4 );
+		if( op&(1<<6) )
+		{
+			intindex |= 1;
+		}
 
-				uint8_t mulmuxval = mulmux[n];
+		//printf( "%d: %d + %d * %d >> 8 - %d\n", intindex, cossindata[intindex], diff, mulmux[intindex/2], cossindata[intindex]>>4 );
+
+		uint8_t mulmuxval = mulmux[n];
 
 
-				//Do you live on a super lame processor? {NOTE 4}
-				//If you do, you might not have good signed multiply operations. So, an alternative mechanism is found here.
-				//	+) Able to more cleanly crush to an 8-bit multiply.
-				//	+) Gets extra bit of precision back, i.e. the sign bit is now used as a data bit.
-				//	-) More than 1 line of C code.  Requires possible double invert.
+		//Do you live on a super lame processor? {NOTE 4}
+		//If you do, you might not have good signed multiply operations. So, an alternative mechanism is found here.
+		//	+) Able to more cleanly crush to an 8-bit multiply.
+		//	+) Gets extra bit of precision back, i.e. the sign bit is now used as a data bit.
+		//	-) More than 1 line of C code.  Requires possible double invert.
 #if 1
-				//Terrible processor, i.e. PMS133
-				if( 0 && diff < 0 )
-				{
-					diff *= -1;
-					diff >>= (OCTAVES-1-octave);
+		//Terrible processor, i.e. PMS133
+		if( 0 && diff < 0 )
+		{
+			diff *= -1;
+			diff >>= (OCTAVES-1-octave);
 
-					if( diff > 250 ) printf( "!!!!!!!**** %d ****!!!!!!!\n", diff );
+			if( diff > 250 ) printf( "!!!!!!!**** %d ****!!!!!!!\n", diff );
 
-					diff = (uint16_t)diff * (uint16_t)mulmuxval;
-					diff >>= INTEGRATOR_DECIMATE;
+			diff = (uint16_t)diff * (uint16_t)mulmuxval;
+			diff >>= INTEGRATOR_DECIMATE;
 
-					diff *= -1;
-				}
-				else
-				{
-					diff >>= (OCTAVES-1-octave);
-
-					if( diff > 250 ) printf( "!!!!!!!**** %d ****!!!!!!!\n", diff );
-
-					diff = (uint16_t)diff * (uint16_t)mulmuxval;
-					diff >>= INTEGRATOR_DECIMATE;
-				}	
-#else
-				//Decent processor, i.e. ATTiny85.
-				diff = ((diff>>(OCTAVES-1-octave)) * mulmuxval ) >> 6;
-#endif
-				cossindata[idx] = cossindata[idx] 
-					+ diff
-					- (cossindata[idx]>>4)
-					;
-
-#ifdef EIGHTBIT
-				if( cossindata[idx] > 0 ) cossindata[idx]--;
-				if( cossindata[idx] < 0 ) cossindata[idx]++;
-#endif
-			}
+			diff *= -1;
 		}
 		else
 		{
-			dprintf( " " );
-		}
+			diff >>= (OCTAVES-1-octave);
+
+			if( diff > 250 ) printf( "!!!!!!!**** %d ****!!!!!!!\n", diff );
+
+			diff = (uint16_t)diff * (uint16_t)mulmuxval;
+			diff >>= INTEGRATOR_DECIMATE;
+		}	
+#else
+		//Decent processor, i.e. ATTiny85.
+		diff = ((diff>>(OCTAVES-1-octave)) * mulmuxval ) >> 6;
+#endif
+		cossindata[intindex] = cossindata[intindex] 
+			+ diff
+			- (cossindata[intindex]>>4)
+			;
+
+#ifdef EIGHTBIT
+		if( cossindata[intindex] > 0 ) cossindata[intindex]--;
+		if( cossindata[intindex] < 0 ) cossindata[intindex]++;
+#endif
 	}
-	dprintf( "\n" );
 
 }
 
diff --git a/embeddedcommon/DFT8Turbo.c b/embeddedcommon/DFT8Turbo.c
index 1471270..fd07df4 100644
--- a/embeddedcommon/DFT8Turbo.c
+++ b/embeddedcommon/DFT8Turbo.c
@@ -1,3 +1,5 @@
+//NOTE DO NOT EDIT THIS FILE WITHOUT ALSO EDITING DFT12SMALL!!!
+
 #include <stdint.h>
 #include <stdlib.h>
 #include "DFT8Turbo.h"
@@ -5,7 +7,6 @@
 
 #include <stdio.h>
 
-
 #define MAX_FREQS (12)
 #define OCTAVES   (4)
 
@@ -15,67 +16,24 @@
 		You should test with extreme cases, like square wave sweeps in, etc.
 */
 
-//#define TWELVEBIT
-#define EIGHTBIT
-
-#ifdef TWELVEBIT
-//No larger than 12-bit signed values for integration or sincos 
-#define FRONTEND_AMPLITUDE (0)
-#define INITIAL_DECIMATE (2)
-#define INTEGRATOR_DECIMATE (8)
-#define FINAL_DECIMATE (4)
-#elif defined( EIGHTBIT )
 //No larger than 8-bit signed values for integration or sincos
 #define FRONTEND_AMPLITUDE (2)
 #define INITIAL_DECIMATE (5) //Yurgh... only 3 bits of ADC data.  That's 8 unique levels :(
 #define INTEGRATOR_DECIMATE (8)
 #define FINAL_DECIMATE (1)
-#endif
 
-//Right now, we need 8*freqs*octaves bytes.
-//This is bad.
-//What can we do to fix it?
+
+#define OPTABLETYPE uint16_t	//Make uint8_t if on attiny.
 
 //4x the hits (sin/cos and we need to do it once for each edge)
 //8x for selecting a higher octave.
 #define FREQREBASE 8.0 
 #define TARGFREQ 10000.0
 
-/* Tradeoff guide:
-
-	* We will optimize for RAM size here.
-
-	* INITIAL_DECIMATE; A larger decimation: {NOTE 1}
-		+) Reduces the bit depth needed for the integral map.
-			If you use "1" and a fully saturted map (highest note is every sample), it will not overflow a signed 12-bit number.
-		-) Increases noise.  
-			With full-scale: 0->1 minimal 1->2 minimal 2->3 significantly noticable, 3->4 major.
-			If sound is quieter, it matters more.  Not sure with other changes in system. (2) seems ok.
-		-) If you make it (1) or (0) You can't do an 8-bit multiply and keep the output in a signed range.
-	Also, other things, like frequency of hits can manipulate the maximum bit depth needed for integral map.
-
-	* If you weight the bins in advance see "mulmux", you can:	{NOTE 2}
-		+) potentially use shallower bit depth but
-		-) have to compute the multiply every time you update the bin.
-
-	* You can use a modified-square-wave which only integrates for 1/2 of the duty cycle. {NOTE 3}
-		+) uses 1/2 the integral memory.
-		-) Not as pretty of an output.  See "integral_at"
-
-	*TODO: Investigate using all unsigned (to make multiply and/or 12-bit storage easier)
-	*TODO: Consider a mode which has 16-bit integrals, but still 8-bit cossin data.
-
-	So, the idea here is we would keep a running total of the current ADC value, kept away in a int16_t.
-	It is constantly summing, so we can take an integral of it.  Or rather an integral range.
-
-	Over time, we perform operations like adding or subtracting from a current place.  It basically is
-	a DFT where the kernel is computed using square waves (or modified square waves)
-*/
-
 //These live in RAM.
-int16_t running_integral; //Realistically treat as 12-bits on ramjet8
-int16_t integral_at[MAX_FREQS*OCTAVES];	//For ramjet8, make 12-bits
-int32_t cossindata[MAX_FREQS*OCTAVES*2]; //Contains COS and SIN data.  (32-bit for now, will be 16-bit, potentially even 8.)
+int8_t running_integral; //Realistically treat as 12-bits on ramjet8
+int8_t integral_at[MAX_FREQS*OCTAVES];	//For ramjet8, make 12-bits
+int8_t cossindata[MAX_FREQS*OCTAVES*2]; //Contains COS and SIN data.  (32-bit for now, will be 16-bit, potentially even 8.)
 uint8_t which_octave_for_op[MAX_FREQS]; //counts up, tells you which ocative you are operating on.  PUT IN RAM.
 uint8_t actiontableplace;
 
@@ -84,13 +42,13 @@ uint8_t actiontableplace;
 //  255 = DO NOT OPERATE
 // bits 0..3 unfolded octave, i.e. sin/cos are offset by one.
 // bit 4 = add or subtract.
-uint8_t  optable[NR_OF_OPS]; //PUT IN FLASH
+OPTABLETYPE  optable[NR_OF_OPS]; //PUT IN FLASH
 
 #define ACTIONTABLESIZE 256
 uint16_t actiontable[ACTIONTABLESIZE]; //PUT IN FLASH // If there are more than 8 freqbins, this must be a uint16_t, otherwise if more than 16, 32.
 //Format is
 
-uint8_t mulmux[MAX_FREQS];	//PUT IN FLASH
+OPTABLETYPE mulmux[MAX_FREQS];	//PUT IN FLASH
 
 static int Setup( float * frequencies, int bins )
 {
@@ -199,117 +157,112 @@ void Turbo8BitRun( int8_t adcval )
 	if( adcv < -128 ) adcv = -128;
 	running_integral += adcv>>INITIAL_DECIMATE;
 
-#define dprintf( ... )
+	uint16_t action = actiontable[actiontableplace++];
+	uint8_t n;
 
-	uint32_t action = actiontable[actiontableplace++];
-	int n;
-	dprintf( "%4d ", actiontableplace );
-	for( n = 0; n < MAX_FREQS; n++ )
+	//Counts are approximate counts for PMS133
+
+	for( n = 0; 			//1CYC
+		 n < MAX_FREQS; 	//2CYC
+		 n++, 				//1CYC
+			action>>=1 		//2CYC
+		)
 	{
-		if( action & (1<<n) )
+		//Everything inside this loop is executed ~3/4 * MAX_FREQS. so.. ~9x.
+		//If op @ 4MHz, we get 44 cycles in here.
+
+		//If no operation is scheduled, continue.
+		if( !( action & 1 ) ) continue;		//1CYC
+
+		uint8_t ao = which_octave_for_op[n];	//4CYC
+		ao++;									//1CYC
+		if( ao >= NR_OF_OPS ) ao = 0;			//2CYC
+		which_octave_for_op[n] = ao;			//2CYC (idxm)
+
+		uint8_t op = optable[ao];				//"theoretically" 3CYC (if you align things right)
+												//1CYC (Put A into specific RAM location)
+
+		//If we are on the one thing we aren't supposed to operate within, cancel.
+		if( op == 255 )	continue;				//2CYC (if op is in A)
+
+		//Tricky: We share the integral with SIN and COS.
+		//We don't need to. It would produce a slightly cleaner signal. See: NOTE 3
+		uint8_t octave = op & 0xf;				//1CYC (if op is in A)
+
+
+		uint8_t intindex = octave * MAX_FREQS //Load mulop with 12 [2CYC]; mul [1CYC]
+			 + n;								//Add [1CYC]
+												//[1CYC] more cycle to write A into RAM[(intindex)
+		//int invoct = OCTAVES-1-octaveplace;
+		int8_t diff;
+
+		if( op & 0x10 )	//ADD		//2CYC
 		{
-			int ao = which_octave_for_op[n];
-			int op = optable[ao];
-			ao++;
-			if( ao >= NR_OF_OPS ) ao = 0;
-			which_octave_for_op[n] = ao;
+			diff = integral_at[intindex]		//Assume "IntIndex" is in A, add integral_at to A [1], move A to an index [1]. [2] to read into acc. [4CYC]
+				 - running_integral;			//1CYC to subtract.
+												//1CYC to write diff into a memory location.
+		}
+		else	//SUBTRACT
+		{
+			diff = running_integral - integral_at[intindex];
+		}
 
-			if( op == 255 )
-			{
-				dprintf( "*" );	//NOP
-			}
-			else
-			{
-				//int octaveplace = op & 0xf;
+		//30 cycles so far.
 
-				//Tricky: We share the integral with SIN and COS.
-				//We don't need to. It would produce a slightly cleaner signal. See: NOTE 3
-				uint8_t octave = op & 0xf;
-				uint8_t intindex = octave * MAX_FREQS + n;
+		integral_at[intindex] = running_integral;	//[3CYC]
 
-				//int invoct = OCTAVES-1-octaveplace;
-				int16_t diff;
+		//if( diff > 124 || diff < -124 ) printf( "!!!!!!!!!!!! %d !!!!!!!!!!!\n", diff );
+		
+		//uint8_t idx = ( intindex << 1 );	//Overwrite intindex.
+		intindex <<= 1; //1CYC
 
-				if( op & 0x10 )	//ADD
-				{
-					diff = integral_at[intindex] - running_integral;
-					dprintf( "%c", 'a' + (op & 0xf) );
-				}
-				else	//SUBTRACT
-				{
-					diff = running_integral - integral_at[intindex];
-					dprintf( "%c", 'A' + (op & 0xf) );
-				}
+		if( op&(1<<6) )	//2CYC
+		{
+			intindex |= 1; //1CYC
+		}
 
-				integral_at[intindex] = running_integral;
-
-#ifdef TWELVEBIT
-				if( diff > 2000 || diff < -2000 ) printf( "!!!!!!!!!!!! %d !!!!!!!!!!!\n", diff );
-#elif defined( EIGHTBIT )
-				if( diff > 124 || diff < -124 ) printf( "!!!!!!!!!!!! %d !!!!!!!!!!!\n", diff );
-#endif
-
-				uint8_t idx = ( intindex << 1 );
-				if( op&(1<<6) )
-				{
-					idx |= 1;
-				}
-
-				//printf( "%d: %d + %d * %d >> 8 - %d\n", idx, cossindata[idx], diff, mulmux[idx/2], cossindata[idx]>>4 );
-
-				uint8_t mulmuxval = mulmux[n];
+		uint8_t mulmuxval = mulmux[n];	//[4CYC]
 
 
-				//Do you live on a super lame processor? {NOTE 4}
-				//If you do, you might not have good signed multiply operations. So, an alternative mechanism is found here.
-				//	+) Able to more cleanly crush to an 8-bit multiply.
-				//	+) Gets extra bit of precision back, i.e. the sign bit is now used as a data bit.
-				//	-) More than 1 line of C code.  Requires possible double invert.
+		//Do you live on a super lame processor? {NOTE 4}
+		//If you do, you might not have good signed multiply operations. So, an alternative mechanism is found here.
+		//	+) Able to more cleanly crush to an 8-bit multiply.
+		//	+) Gets extra bit of precision back, i.e. the sign bit is now used as a data bit.
+		//	-) More than 1 line of C code.  Requires possible double invert.
 #if 1
-				//Terrible processor, i.e. PMS133
-				if( 0 && diff < 0 )
-				{
-					diff *= -1;
-					diff >>= (OCTAVES-1-octave);
+		//rough processor, i.e. PMS133
+		if( diff < 0 )		//[2CYC]
+		{
+			diff *= -1;		//[1CYC]
+			diff >>= (OCTAVES-1-octave); // ???TRICKY???
+			//if( diff > 250 ) printf( "!!!!!!!**** %d ****!!!!!!!\n", diff );
 
-					if( diff > 250 ) printf( "!!!!!!!**** %d ****!!!!!!!\n", diff );
-
-					diff = (uint16_t)diff * (uint16_t)mulmuxval;
-					diff >>= INTEGRATOR_DECIMATE;
-
-					diff *= -1;
-				}
-				else
-				{
-					diff >>= (OCTAVES-1-octave);
-
-					if( diff > 250 ) printf( "!!!!!!!**** %d ****!!!!!!!\n", diff );
-
-					diff = (uint16_t)diff * (uint16_t)mulmuxval;
-					diff >>= INTEGRATOR_DECIMATE;
-				}	
-#else
-				//Decent processor, i.e. ATTiny85.
-				diff = ((diff>>(OCTAVES-1-octave)) * mulmuxval ) >> 6;
-#endif
-				cossindata[idx] = cossindata[idx] 
-					+ diff
-					- (cossindata[idx]>>4)
-					;
-
-#ifdef EIGHTBIT
-				if( cossindata[idx] > 0 ) cossindata[idx]--;
-				if( cossindata[idx] < 0 ) cossindata[idx]++;
-#endif
-			}
+			diff = ((uint16_t)diff * (uint16_t)mulmuxval)>>INTEGRATOR_DECIMATE; //[3CYC]
+			diff *= -1; //[1CYC]
 		}
 		else
 		{
-			dprintf( " " );
-		}
-	}
-	dprintf( "\n" );
+			diff >>= (OCTAVES-1-octave);
+			//if( diff > 250 ) printf( "!!!!!!!**** %d ****!!!!!!!\n", diff );
+			diff = ((uint16_t)diff * (uint16_t)mulmuxval)>>INTEGRATOR_DECIMATE;
+		}	
 
+		//@48 cycles :( :( :(
+
+#else
+		//Decent processor, i.e. ATTiny85.
+		diff = ((diff>>(OCTAVES-1-octave)) * mulmuxval ) >> 6;
+#endif
+		//printf( "%d\n", diff );
+
+		cossindata[intindex] = cossindata[intindex] 
+			+ diff
+			- (cossindata[intindex]>>4)
+			;
+
+		if( cossindata[intindex] > 0 ) cossindata[intindex]--;
+		if( cossindata[intindex] < 0 ) cossindata[intindex]++;
+	}
 }
 
 
@@ -344,13 +297,9 @@ void DoDFT8BitTurbo( float * outbins, float * frequencies, int bins, const float
 		{
 			outbins[i] = sqrt((float)mux)/50.0;
 
-#ifdef TWELVEBIT
-		if( abs( cossindata[i*2+0] ) > 1000 || abs( cossindata[i*2+1] ) > 1000 )
-			printf( "CS OVF %d/%d/%d/%f\n", i, cossindata[i*2+0], cossindata[i*2+1],outbins[i] );
-#elif defined( EIGHTBIT )
-		if( abs( cossindata[i*2+0] ) > 120 || abs( cossindata[i*2+1] ) > 120 )
-			printf( "CS OVF %d/%d/%d/%f\n", i, cossindata[i*2+0], cossindata[i*2+1],outbins[i] );
-#endif
+			if( abs( cossindata[i*2+0] ) > 120 || abs( cossindata[i*2+1] ) > 120 )
+				printf( "CS OVF %d/%d/%d/%f\n", i, cossindata[i*2+0], cossindata[i*2+1],outbins[i] );
+
 		}
 	} 
 #endif