diff --git a/colorchord2/Makefile b/colorchord2/Makefile
index e83c55c..1299852 100644
--- a/colorchord2/Makefile
+++ b/colorchord2/Makefile
@@ -17,7 +17,7 @@ LDLIBS:=-lpthread -lasound -lm -lpulse-simple -lpulse -ludev -lrt
 CFLAGS:=-g -O0 -flto -Wall -ffast-math -I../embeddedcommon -I. -DICACHE_FLASH_ATTR=
 EXTRALIBS:=-lusb-1.0
 
-colorchord : os_generic.o main.o  dft.o decompose.o filter.o color.o notefinder.o util.o outdrivers.o $(RAWDRAW) $(SOUND) $(OUTS) parameters.o chash.o hook.o ../embeddedcommon/DFT32.o configs.o ../embeddedcommon/DFT8Turbo.o
+colorchord : os_generic.o main.o  dft.o decompose.o filter.o color.o notefinder.o util.o outdrivers.o $(RAWDRAW) $(SOUND) $(OUTS) parameters.o chash.o hook.o ../embeddedcommon/DFT32.o configs.o ../embeddedcommon/DFT8Turbo.o ../embeddedcommon/DFT8Padauk.o
 	gcc -o $@ $^ $(CFLAGS) $(LDLIBS) $(EXTRALIBS) $(RAWDRAWLIBS)
 
 
diff --git a/colorchord2/notefinder.c b/colorchord2/notefinder.c
index 1f0a01c..02bf0b7 100644
--- a/colorchord2/notefinder.c
+++ b/colorchord2/notefinder.c
@@ -12,6 +12,7 @@
 #include "decompose.h"
 #include "DFT32.h"
 #include "DFT8Turbo.h"
+#include "DFT8Padauk.h"
 
 struct NoteFinder * CreateNoteFinder( int spsRec )
 {
@@ -203,6 +204,9 @@ void RunNoteFinder( struct NoteFinder * nf, const float * audio_stream, int head
 	case 5:
 		DoDFT8BitTurbo( dftbins, nf->frequencies, freqs, audio_stream, head, buffersize, nf->dft_q, nf->dft_speedup );
 		break;
+	case 6:
+		DoDFT8BitPadauk( dftbins, nf->frequencies, freqs, audio_stream, head, buffersize, nf->dft_q, nf->dft_speedup );
+		break;
 	default:
 		fprintf( stderr, "Error: No DFT Seleced\n" );
 	}
diff --git a/colorchord2/turbo8bit.conf b/colorchord2/turbo8bit.conf
index d9ce3d4..2d4bdcc 100644
--- a/colorchord2/turbo8bit.conf
+++ b/colorchord2/turbo8bit.conf
@@ -30,7 +30,7 @@ base_hz = 82.41
 samplerate = 10000
 freqbins = 12
 octaves = 4
-
+do_progressive_dft=6
 
 
 slope = 0
diff --git a/embeddedcommon/DFT8Padauk.c b/embeddedcommon/DFT8Padauk.c
new file mode 100644
index 0000000..bc1a976
--- /dev/null
+++ b/embeddedcommon/DFT8Padauk.c
@@ -0,0 +1,327 @@
+//NOTE DO NOT EDIT THIS FILE WITHOUT ALSO EDITING DFT12SMALL!!!
+
+#include <stdint.h>
+#include <stdlib.h>
+#include "DFT8Turbo.h"
+#include <math.h>
+
+#include <stdio.h>
+
+#define MAX_FREQS (12)
+#define OCTAVES   (4)
+
+/*
+	General procedure - use this code, with uint16_t or uint32_t buffers, and make sure none of the alarms go off.
+		All of the paths still require no more than an 8-bit multiply.
+		You should test with extreme cases, like square wave sweeps in, etc.
+*/
+
+//No larger than 8-bit signed values for integration or sincos
+#define FRONTEND_AMPLITUDE (2)
+#define INITIAL_DECIMATE (5) //Yurgh... only 3 bits of ADC data.  That's 8 unique levels :(
+#define INTEGRATOR_DECIMATE (8)
+#define FINAL_DECIMATE (1)
+
+
+#define OPTABLETYPE uint16_t	//Make uint8_t if on attiny.
+
+//4x the hits (sin/cos and we need to do it once for each edge)
+//8x for selecting a higher octave.
+#define FREQREBASE 8.0 
+#define TARGFREQ 10000.0
+
+//These live in RAM.
+int8_t running_integral; //Realistically treat as 12-bits on ramjet8
+int8_t integral_at[MAX_FREQS*OCTAVES];	//For ramjet8, make 12-bits
+int8_t cossindata[MAX_FREQS*OCTAVES*2]; //Contains COS and SIN data.  (32-bit for now, will be 16-bit, potentially even 8.)
+uint8_t which_octave_for_op[MAX_FREQS]; //counts up, tells you which ocative you are operating on.  PUT IN RAM.
+uint8_t actiontableplace;
+
+#define NR_OF_OPS (4<<OCTAVES) /*64*/
+//Format is:
+//  255 = DO NOT OPERATE
+// bits 0..3 unfolded octave, i.e. sin/cos are offset by one.
+// bit 4 = add or subtract.
+OPTABLETYPE  optable[NR_OF_OPS]; //PUT IN FLASH
+
+#define ACTIONTABLESIZE 256
+uint16_t actiontable[ACTIONTABLESIZE]; //PUT IN FLASH // If there are more than 8 freqbins, this must be a uint16_t, otherwise if more than 16, 32.
+//Format is
+
+OPTABLETYPE mulmux[MAX_FREQS];	//PUT IN FLASH
+
+static int Setup( float * frequencies, int bins )
+{
+	int i;
+	printf( "BINS: %d\n", bins );
+
+	float highestf = frequencies[MAX_FREQS-1];
+	for( i = 0; i < MAX_FREQS; i++ )
+	{
+		mulmux[i] = (uint8_t)( highestf / frequencies[i] * 255 + 0.5 );
+		printf( "MM: %d  %f / %f\n", mulmux[i], frequencies[i], highestf );
+	}
+
+	for( i = bins-MAX_FREQS; i < bins; i++ )
+	{
+		int topbin = i - (bins-MAX_FREQS);
+		float f = frequencies[i]/FREQREBASE; 
+		float hits_per_table = (float)ACTIONTABLESIZE/f;
+		int dhrpertable = (int)(hits_per_table+.5);//TRICKY: You might think you need to have even number of hits (sin/cos), but you don't!  It can flip sin/cos each time through the table!
+		float err = (TARGFREQ/((float)ACTIONTABLESIZE/dhrpertable) - (float)TARGFREQ/f)/((float)TARGFREQ/f);
+		//Perform an op every X samples.  How well does this map into units of 1024?
+		printf( "%d %f -> hits per %d: %f %d (%.2f%% error)\n", topbin, f, ACTIONTABLESIZE, (float)ACTIONTABLESIZE/f, dhrpertable, err * 100.0 );
+		if( dhrpertable >= ACTIONTABLESIZE )
+		{
+			fprintf( stderr, "Error: Too many hits.\n" );
+			exit(0);
+		}
+
+		float advance_per_step = dhrpertable/(float)ACTIONTABLESIZE;
+		float fvadv = 0.5;
+		int j;
+		int countset = 0;
+
+		//Tricky: We need to start fadv off at such a place that there won't be a hicchup when going back around to 0.
+		//	I believe this is done by setting fvadv to 0.5 initially.  Unsure.
+
+		for( j = 0; j < ACTIONTABLESIZE; j++ )
+		{
+			if( fvadv >= 0.5 )
+			{
+				actiontable[j] |= 1<<(MAX_FREQS-1-topbin);	//XXX-DEPARTURE (reversing the table symbols)
+				fvadv -= 1.0;
+				countset++;
+			}
+			fvadv += advance_per_step;
+		}
+		printf( "   countset: %d\n", countset );
+	}
+	//exit(1);
+
+
+	int phaseinop[OCTAVES] = { 0 };
+	int already_hit_octaveplace[OCTAVES*2] = { 0 };
+	for( i = 0; i < NR_OF_OPS; i++ )
+	{
+		int longestzeroes = 0;
+		int val = i & ((1<<OCTAVES)-1);
+		for( longestzeroes = 0; longestzeroes < 255 && ( ((val >> longestzeroes) & 1) == 0 ); longestzeroes++ );
+		//longestzeroes goes: 255, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, ...
+		//This isn't great, because we need to also know whether we are attacking the SIN side or the COS side, and if it's + or -.
+		//We can actually decide that out.
+
+		if( longestzeroes == 255 )
+		{
+			//This is a nop.  Emit a nop.
+			optable[i] = 255;
+		}
+		else
+		{
+			longestzeroes = OCTAVES-1-longestzeroes;	//Actually do octave 0 least often.
+			int iop = phaseinop[longestzeroes]++;
+			int toop = longestzeroes;
+			int toopmon = (longestzeroes<<1) | (iop & 1);
+
+			//if it's the first time an octave happened this set, flag it. This may be used later in the process.
+			if( !already_hit_octaveplace[toopmon] )
+			{
+				already_hit_octaveplace[toopmon] = 1;
+				toop |= 1<<5;
+			}
+			if( iop & 1 )
+			{
+				toop |= 1<<6;
+			}
+
+			//Handle add/subtract bit.
+			if( iop & 2 ) toop |= 1<<4;
+
+			optable[i] = toop;
+
+			//printf( "  %d %d %d\n", iop, val, longestzeroes );
+		}
+		//printf( "HBT: %d = %d\n", i, optable[i] );
+	}
+	//exit(1);
+
+	return 0;
+}
+
+
+static uint16_t action;
+static uint8_t note;
+static uint8_t * memptr;
+static uint16_t * romptr;
+static uint8_t op;
+static uint8_t octave;
+static uint8_t intindex;
+static int8_t diff;
+static uint8_t tmp;
+
+void Padauk8BitRun( int8_t adcval )
+{
+	int16_t adcv = adcval;
+	adcv *= FRONTEND_AMPLITUDE;
+	if( adcv > 127 ) adcv = 127;
+	if( adcv < -128 ) adcv = -128;
+	running_integral += adcv>>INITIAL_DECIMATE;
+
+	uint8_t acc;
+	uint8_t * accM;
+	uint8_t mul2;
+
+	action = actiontable[actiontableplace++];
+
+	//Counts are approximate counts for PMS133
+
+	for( note = MAX_FREQS; 	
+		 note; 				//1CYC/PAIRED
+		 note--, 			//1CYC/PAIRED (dzsn)
+			action>>=1 		//2CYC (slc x2)
+		)
+	{
+		//Everything inside this loop is executed ~3/4 * MAX_FREQS per audio sample. so.. ~9x.
+		//If op @ 4MHz, we get 44 cycles in here.
+
+		//If no operation is scheduled, continue.
+		if( !( action & 1 ) ) continue;		//1CYC
+
+		accM = which_octave_for_op - 1;			//1CYC
+		accM = accM + note;						//1CYC
+		memptr = accM;							//1CYC
+		acc = *memptr;							//2CYC (idxm)
+		acc++;									//1CYC
+		if( acc == NR_OF_OPS )					//2CYC
+			acc = 0;
+		*memptr = acc;							//2CYC (idxm)
+
+		accM = (uint8_t*)optable + acc*2;		//1CYC
+		romptr = (uint16_t*)accM;				//1CYC
+		acc = *romptr;							//2CYC (ldtabl)
+
+		//If we are on the one thing we aren't supposed to operate within, cancel.
+		if( acc == 255 )	continue;			//2CYC
+
+		op = acc;								//1CYC									
+
+		//21 cycles so far.
+
+		acc = MAX_FREQS;						//1CYC
+		mul2 = acc;								//1CYC
+		acc = op;								//1CYC
+		acc = acc & 0xf;						//1CYC
+		octave = acc;							//1CYC
+		acc = acc * mul2;						//2CYC
+		acc = acc + note;						//1CYC
+		intindex = acc;							//1CYC
+		accM = (uint8_t*)integral_at-1 + acc;	//1CYC
+		memptr = accM;							//1CYC
+
+		if( op & 0x10 )	//ADD					//2CYC
+		{
+			acc = *memptr;						//2CYC
+			acc = acc - running_integral;		//1CYC
+			diff = acc;							//1CYC
+		}
+		else	//SUBTRACT
+		{
+			acc = *memptr;						//2CYC
+			tmp = acc;							//1CYC
+			acc = running_integral;				//1CYC
+			acc = acc - tmp;					//1CYC
+			diff = acc;							//1CYC
+		}
+
+		acc = running_integral;					//1CYC
+		*memptr = acc;							//2CYC
+
+		//42 AVERAGE cycles so far.
+		//??? Something below here is wrong???  Or near here??? XXX TODO PICK UP HERE!!!
+		intindex <<= 1; 						//1CYC
+		if( op&(1<<6) )							//2CYC
+		{
+			intindex |= 1;						//1CYC
+		}
+
+		accM = (uint8_t*)(mulmux - 1);			//1CYC
+		accM = accM + note*2;					//1CYC
+		romptr = accM;							//1CYC
+		acc = *romptr;							//2CYC
+		mul2 = acc;								//1CYC
+
+		if( diff < 0 )							//[2CYC] (t0sn on MSB)
+		{
+			diff *= -1;							//[1CYC] (neg M)
+			diff >>= (OCTAVES-1-octave); // ???TRICKY???  Should this be a multiply?
+
+			//if( diff > 250 ) printf( "!!!!!!!**** %d ****!!!!!!!\n", diff );
+
+			diff = ((uint16_t)diff * (uint16_t)mul2)>>INTEGRATOR_DECIMATE; //[3CYC]
+			diff *= -1; //[1CYC]
+		}
+		else
+		{
+			diff >>= (OCTAVES-1-octave);
+			//if( diff > 250 ) printf( "!!!!!!!**** %d ****!!!!!!!\n", diff );
+			diff = ((uint16_t)diff * (uint16_t)mul2)>>INTEGRATOR_DECIMATE;
+		}	
+
+		//@48 cycles :( :( :(
+
+		//printf( "%d\n", diff );
+
+		int8_t tmp = 
+			cossindata[intindex] 	//[3CYC]
+			+ diff					//[1CYC]
+			- (cossindata[intindex]>>4)	//[2CYC]
+			;
+
+		if( tmp > 0 ) tmp--;	//2CYC
+		if( tmp < 0 ) tmp++;	//2CYC
+		cossindata[intindex] = tmp;	//2CYC
+		//60ish cycles :( :( :(
+	}
+}
+
+
+void DoDFT8BitPadauk( float * outbins, float * frequencies, int bins, const float * databuffer, int place_in_data_buffer, int size_of_data_buffer, float q, float speedup )
+{
+	static int is_setup;
+	if( !is_setup ) { is_setup = 1; Setup( frequencies, bins ); }
+	static int last_place;
+	int i;
+
+	for( i = last_place; i != place_in_data_buffer; i = (i+1)%size_of_data_buffer )
+	{
+		int16_t ifr1 = (int16_t)( ((databuffer[i]) ) * 4095 );
+		Padauk8BitRun( ifr1>>5 ); //5 = Actually only feed algorithm numbers from -128 to 127.
+	}
+	last_place = place_in_data_buffer;
+
+	static int idiv;
+	idiv++;
+#if 1
+	for( i = 0; i < bins; i++ )
+	{
+		int iss = cossindata[i*2+0]>>FINAL_DECIMATE;
+		int isc = cossindata[i*2+1]>>FINAL_DECIMATE;
+		int mux = iss * iss + isc * isc;
+
+		if( mux <= 0 ) 
+		{
+			outbins[i] = 0;
+		}
+		else
+		{
+			outbins[i] = sqrt((float)mux)/50.0;
+
+			if( abs( cossindata[i*2+0] ) > 120 || abs( cossindata[i*2+1] ) > 120 )
+				printf( "CS OVF %d/%d/%d/%f\n", i, cossindata[i*2+0], cossindata[i*2+1],outbins[i] );
+
+		}
+	} 
+#endif
+}
+
+
diff --git a/embeddedcommon/DFT8Padauk.h b/embeddedcommon/DFT8Padauk.h
new file mode 100644
index 0000000..cb6387a
--- /dev/null
+++ b/embeddedcommon/DFT8Padauk.h
@@ -0,0 +1,9 @@
+#ifndef _DFT8PADAUK_H
+#define _DFT8PADAUK_H
+
+/* Note: Frequencies must be precompiled. */
+
+void DoDFT8BitPadauk( float * outbins, float * frequencies, int bins, const float * databuffer, int place_in_data_buffer, int size_of_data_buffer, float q, float speedup );
+
+#endif
+
diff --git a/embeddedcommon/DFT8Turbo.c b/embeddedcommon/DFT8Turbo.c
index e4661a2..6645a41 100644
--- a/embeddedcommon/DFT8Turbo.c
+++ b/embeddedcommon/DFT8Turbo.c
@@ -37,7 +37,7 @@ int8_t cossindata[MAX_FREQS*OCTAVES*2]; //Contains COS and SIN data.  (32-bit fo
 uint8_t which_octave_for_op[MAX_FREQS]; //counts up, tells you which ocative you are operating on.  PUT IN RAM.
 uint8_t actiontableplace;
 
-#define NR_OF_OPS (4<<OCTAVES)
+#define NR_OF_OPS (4<<OCTAVES) /*64*/
 //Format is:
 //  255 = DO NOT OPERATE
 // bits 0..3 unfolded octave, i.e. sin/cos are offset by one.
@@ -168,7 +168,7 @@ void Turbo8BitRun( int8_t adcval )
 			action>>=1 		//2CYC
 		)
 	{
-		//Everything inside this loop is executed ~3/4 * MAX_FREQS. so.. ~9x.
+		//Everything inside this loop is executed ~3/4 * MAX_FREQS per audio sample. so.. ~9x.
 		//If op @ 4MHz, we get 44 cycles in here.
 
 		//If no operation is scheduled, continue.