From f2a1086c973def52f47aed732352ccd63d2061fd Mon Sep 17 00:00:00 2001
From: cnlohr <charles@cnlohr.com>
Date: Fri, 3 Apr 2015 14:10:25 -0400
Subject: [PATCH] Move the embedded stuff into architecture-specific folders

---
 DFT32.c                                  | 342 +++++++++++++++++++++++
 DFT32.h                                  |  69 +++++
 Makefile                                 |  11 +-
 default.conf                             |   3 +-
 dft.c                                    |  10 +-
 dft.h                                    |   7 +-
 embeddednf.c                             |  27 +-
 embeddednf.h                             |  11 +-
 embeddedx86/Makefile                     |  13 +
 embeddedcc.c => embeddedx86/embeddedcc.c |   5 +-
 notefinder.c                             |   4 +
 11 files changed, 479 insertions(+), 23 deletions(-)
 create mode 100644 DFT32.c
 create mode 100644 DFT32.h
 create mode 100644 embeddedx86/Makefile
 rename embeddedcc.c => embeddedx86/embeddedcc.c (87%)

diff --git a/DFT32.c b/DFT32.c
new file mode 100644
index 0000000..ded5ea7
--- /dev/null
+++ b/DFT32.c
@@ -0,0 +1,342 @@
+#include "DFT32.h"
+#include <string.h>
+
+#ifndef CCEMBEDDED
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+static float * goutbins;
+#endif
+
+uint16_t embeddedbins32[FIXBINS]; 
+
+//NOTES to self:
+//
+// Let's say we want to try this on an AVR.
+//  24 bins, 5 octaves = 120 bins.
+// 20 MHz clock / 4.8k sps = 4096 IPS = 34 clocks per bin = :(
+//  We can do two at the same time, this frees us up some 
+
+static uint8_t Sdonefirstrun;
+
+//A table of precomputed sin() values.  Ranging -1500 to +1500
+//If we increase this, it may cause overflows elsewhere in code.
+const int16_t Ssinonlytable[256] = {
+             0,    36,    73,   110,   147,   183,   220,   256,
+           292,   328,   364,   400,   435,   470,   505,   539,
+           574,   607,   641,   674,   707,   739,   771,   802,
+           833,   863,   893,   922,   951,   979,  1007,  1034,
+          1060,  1086,  1111,  1135,  1159,  1182,  1204,  1226,
+          1247,  1267,  1286,  1305,  1322,  1339,  1355,  1371,
+          1385,  1399,  1412,  1424,  1435,  1445,  1455,  1463,
+          1471,  1477,  1483,  1488,  1492,  1495,  1498,  1499,
+          1500,  1499,  1498,  1495,  1492,  1488,  1483,  1477,
+          1471,  1463,  1455,  1445,  1435,  1424,  1412,  1399,
+          1385,  1371,  1356,  1339,  1322,  1305,  1286,  1267,
+          1247,  1226,  1204,  1182,  1159,  1135,  1111,  1086,
+          1060,  1034,  1007,   979,   951,   922,   893,   863,
+           833,   802,   771,   739,   707,   674,   641,   607,
+           574,   539,   505,   470,   435,   400,   364,   328,
+           292,   256,   220,   183,   147,   110,    73,    36,
+             0,   -36,   -73,  -110,  -146,  -183,  -219,  -256,
+          -292,  -328,  -364,  -399,  -435,  -470,  -505,  -539,
+          -573,  -607,  -641,  -674,  -706,  -739,  -771,  -802,
+          -833,  -863,  -893,  -922,  -951,  -979, -1007, -1034,
+         -1060, -1086, -1111, -1135, -1159, -1182, -1204, -1226,
+         -1247, -1267, -1286, -1305, -1322, -1339, -1355, -1371,
+         -1385, -1399, -1412, -1424, -1435, -1445, -1454, -1463,
+         -1471, -1477, -1483, -1488, -1492, -1495, -1498, -1499,
+         -1500, -1499, -1498, -1495, -1492, -1488, -1483, -1477,
+         -1471, -1463, -1455, -1445, -1435, -1424, -1412, -1399,
+         -1385, -1371, -1356, -1339, -1322, -1305, -1286, -1267,
+         -1247, -1226, -1204, -1182, -1159, -1135, -1111, -1086,
+         -1060, -1034, -1007,  -979,  -951,  -923,  -893,  -863,
+          -833,  -802,  -771,  -739,  -707,  -674,  -641,  -608,
+          -574,  -540,  -505,  -470,  -435,  -400,  -364,  -328,
+          -292,  -256,  -220,  -183,  -147,  -110,   -73,   -37,};
+
+
+/** The above table was created using the following code:
+#include <math.h>
+#include <stdio.h>
+#include <stdint.h>
+
+int16_t Ssintable[256]; //Actually, just [sin].
+
+int main()
+{
+	int i;
+	for( i = 0; i < 256; i++ )
+	{
+		Ssintable[i] = (int16_t)((sinf( i / 256.0 * 6.283 ) * 1500.0));
+	}
+
+	printf( "const int16_t Ssinonlytable[256] = {" );
+	for( i = 0; i < 256; i++ )
+	{
+		if( !(i & 0x7 ) )
+		{
+			printf( "\n\t" );
+		}
+		printf( "%6d," ,Ssintable[i] );
+	}
+	printf( "};\n" );
+} */
+
+
+
+uint16_t Sdatspace32A[FIXBINS*2];  //(advances,places)
+int32_t Sdatspace32B[FIXBINS*2];  //(isses,icses)
+int32_t Sdatspace32BOut[FIXBINS*2];  //(isses,icses)
+
+//For 
+static uint8_t Sdo_this_octave[BINCYCLE];
+static int32_t Saccum_octavebins[OCTAVES];
+static uint8_t Swhichoctaveplace;
+uint16_t embeddedbins[FIXBINS]; //This is updated every time the DFT hits the octavecount, or 1/32 updates.
+
+//From: http://stackoverflow.com/questions/1100090/looking-for-an-efficient-integer-square-root-algorithm-for-arm-thumb2
+/**
+ * \brief    Fast Square root algorithm, with rounding
+ *
+ * This does arithmetic rounding of the result. That is, if the real answer
+ * would have a fractional part of 0.5 or greater, the result is rounded up to
+ * the next integer.
+ *      - SquareRootRounded(2) --> 1
+ *      - SquareRootRounded(3) --> 2
+ *      - SquareRootRounded(4) --> 2
+ *      - SquareRootRounded(6) --> 2
+ *      - SquareRootRounded(7) --> 3
+ *      - SquareRootRounded(8) --> 3
+ *      - SquareRootRounded(9) --> 3
+ *
+ * \param[in] a_nInput - unsigned integer for which to find the square root
+ *
+ * \return Integer square root of the input value.
+ */
+static uint16_t SquareRootRounded(uint32_t a_nInput)
+{
+    uint32_t op  = a_nInput;
+    uint32_t res = 0;
+    uint32_t one = 1uL << 30; // The second-to-top bit is set: use 1u << 14 for uint16_t type; use 1uL<<30 for uint32_t type
+
+
+    // "one" starts at the highest power of four <= than the argument.
+    while (one > op)
+    {
+        one >>= 2;
+    }
+
+    while (one != 0)
+    {
+        if (op >= res + one)
+        {
+            op = op - (res + one);
+            res = res +  2 * one;
+        }
+        res >>= 1;
+        one >>= 2;
+    }
+
+    /* Do arithmetic rounding to nearest integer */
+    if (op > res)
+    {
+        res++;
+    }
+
+    return res;
+}
+
+void UpdateOutputBins32()
+{
+	int i;
+	int * ipt = &Sdatspace32BOut[0];
+	for( i = 0; i < FIXBINS; i++ )
+	{
+		int16_t isps = *(ipt++)>>16;
+		int16_t ispc = *(ipt++)>>16;
+
+		int octave = i / FIXBPERO;
+
+#ifndef CCEMBEDDED
+		uint32_t mux = ( (isps) * (isps)) + ((ispc) * (ispc));
+		goutbins[i] = sqrtf( (float)mux );
+		goutbins[i] /= (78<<DFTIIR)*(1<<octave); //reasonable (but arbitrary amplification)
+#endif
+		uint32_t rmux = ( (isps) * (isps)) + ((ispc) * (ispc));
+		rmux = SquareRootRounded( rmux ) << 1; //bump it up so we don't lose a lot of detail at high freqs.
+		embeddedbins32[i] = rmux >> octave;
+	}
+}
+
+static void HandleInt( int16_t sample )
+{
+	int i;
+	uint16_t adv;
+	uint8_t localipl;
+
+	uint8_t oct = Sdo_this_octave[Swhichoctaveplace];
+	Swhichoctaveplace ++;
+	Swhichoctaveplace &= BINCYCLE-1;
+
+	if( oct > 128 )
+	{
+		//Special: This is when we can update everything.
+
+		int32_t * bins = &Sdatspace32B[0];
+		int32_t * binsOut = &Sdatspace32BOut[0];
+
+		for( i = 0; i < FIXBINS; i++ )
+		{
+			//First for the SIN then the COS.
+			int32_t val = *(bins);
+			*(binsOut++) = val;
+			*(bins++) -= val>>DFTIIR;
+
+			val = *(bins);
+			*(binsOut++) = val;
+			*(bins++) -= val>>DFTIIR;
+		}
+		return;
+	}
+
+
+	for( i = 0; i < OCTAVES;i++ )
+	{
+		Saccum_octavebins[i] += sample;
+	}
+
+	uint16_t * dsA = &Sdatspace32A[oct*FIXBPERO*2];
+	int32_t * dsB = &Sdatspace32B[oct*FIXBPERO*2];
+
+	sample = Saccum_octavebins[oct]>>(OCTAVES-oct);
+	Saccum_octavebins[oct] = 0;
+
+	for( i = 0; i < FIXBPERO; i++ )
+	{
+		adv = *(dsA++);
+		localipl = *(dsA) >> 8;
+		*(dsA++) += adv;
+
+		*(dsB++) += (Ssinonlytable[localipl] * sample);
+		//Get the cosine (1/4 wavelength out-of-phase with sin)
+		localipl += 64;
+		*(dsB++) += (Ssinonlytable[localipl] * sample);
+	}
+}
+
+int SetupDFTProgressive32()
+{
+	int i;
+	int j;
+	//Sdatspace = malloc(FIXBPERO*OCTAVES*8);
+	//memset(Sdatspace,0,FIXBPERO*OCTAVES*8);
+	//printf( "MS: %d\n", FIXBPERO*OCTAVES*8);
+	Sdonefirstrun = 1;
+
+	for( i = 0; i < BINCYCLE; i++ )
+	{
+		// Sdo_this_octave = 
+		// 4 3 4 2 4 3 4 ...
+		//search for "first" zero
+
+		for( j = 0; j <= OCTAVES; j++ )
+		{
+			if( ((1<<j) & i) == 0 ) break;
+		}
+		if( j > OCTAVES )
+		{
+#ifndef CCEMBEDDED
+			fprintf( stderr, "Error: algorithm fault.\n" );
+			exit( -1 );
+#endif
+			return -1;
+		}
+		Sdo_this_octave[i] = OCTAVES-j-1;
+	}
+	return 0;
+}
+
+
+
+void UpdateBins32( const uint16_t * frequencies )
+{
+	int i;	
+	for( i = 0; i < FIXBINS; i++ )
+	{
+		uint16_t freq = frequencies[i%FIXBPERO];
+		Sdatspace32A[i*2] = freq;// / oneoveroctave;
+	}
+}
+
+void PushSample32( int16_t dat )
+{
+	HandleInt( dat );
+	HandleInt( dat );
+}
+
+
+#ifndef CCEMBEDDED
+
+void UpdateBinsForDFT32( const float * frequencies )
+{
+	int i;	
+	for( i = 0; i < FIXBINS; i++ )
+	{
+		float freq = frequencies[(i%FIXBPERO) + (FIXBPERO*(OCTAVES-1))];
+		Sdatspace32A[i*2] = (65536.0/freq);// / oneoveroctave;
+	}
+}
+
+#endif
+
+
+#ifndef CCEMBEDDED
+
+void DoDFTProgressive32( float * outbins, float * frequencies, int bins, const float * databuffer, int place_in_data_buffer, int size_of_data_buffer, float q, float speedup )
+{
+	static float backupbins[FIXBINS];
+	int i;
+	static int last_place;
+
+	memset( outbins, 0, bins * sizeof( float ) );
+	goutbins = outbins;
+
+	memcpy( outbins, backupbins, FIXBINS*4 );
+
+	if( FIXBINS != bins )
+	{
+		fprintf( stderr, "Error: Bins was reconfigured.  skippy requires a constant number of bins.\n" );
+		return;
+	}
+
+
+//printf( "SKIPPY\n" );
+
+	if( !Sdonefirstrun )
+	{
+		SetupDFTProgressive32();
+		Sdonefirstrun = 1;
+	}
+
+	UpdateBinsForDFT32( frequencies );
+
+	for( i = last_place; i != place_in_data_buffer; i = (i+1)%size_of_data_buffer )
+	{
+		int16_t ifr1 = (int16_t)( ((databuffer[i]) ) * 4095 );
+		HandleInt( ifr1 );
+		HandleInt( ifr1 );
+	}
+
+	UpdateOutputBins32();
+
+	last_place = place_in_data_buffer;
+
+	memcpy( backupbins, outbins, FIXBINS*4 );
+}
+
+#endif
+
+
+
+
diff --git a/DFT32.h b/DFT32.h
new file mode 100644
index 0000000..6bce18d
--- /dev/null
+++ b/DFT32.h
@@ -0,0 +1,69 @@
+#ifndef _DFT32_H
+#define _DFT32_H
+
+#include <stdint.h>
+
+//A 32-bit version of the DFT used for ColorChord.
+//This header makes it convenient to use for an embedded system.
+//The 32-bit DFT avoids some bit shifts, however it uses slightly
+//more RAM and it uses a lot of 32-bit arithmatic. 
+//
+//This is basically a clone of "ProgressiveIntegerSkippy" and changes
+//made here should be backported there as well.
+
+//You can # define these to be other things elsewhere.
+#ifndef OCTAVES
+#define OCTAVES  5
+#endif
+
+#ifndef FIXBPERO
+#define FIXBPERO 24
+#endif
+
+#ifndef FIXBINS
+#define FIXBINS  (FIXBPERO*OCTAVES)
+#endif
+
+#ifndef BINCYCLE
+#define BINCYCLE (1<<OCTAVES)
+#endif
+
+//You may increase this past 5 but if you do, the amplitude of your incoming signal
+//must decrease.  Increasing this value makes responses slower.  Lower values are
+//more responsive.
+#ifndef DFTIIR
+#define DFTIIR 4
+#endif
+
+//Everything the integer one buys, except it only calculates 2 octaves worth of notes per audio frame.
+//This is sort of working, but still have some quality issues.
+//It would theoretically be fast enough to work on an AVR.
+//NOTE: This is the only DFT available to the embedded port of ColorChord
+#ifndef CCEMBEDDED
+void DoDFTProgressive32( float * outbins, float * frequencies, int bins, const float * databuffer, int place_in_data_buffer, int size_of_data_buffer, float q, float speedup );
+#endif
+
+//It's actually split into a few functions, which you can call on your own:
+int SetupDFTProgressive32();  //Call at start. Returns nonzero if error.
+void UpdateBins32( const uint16_t * frequencies );
+
+//Call this to push on new frames of sound. 
+//Though it accepts an int16, it actually only takes -4095 to +4095. (13-bit)
+//Any more and you will exceed the accumulators and it will cause an overflow.
+void PushSample32( int16_t dat );
+
+#ifndef CCEMBEDDED
+void UpdateBinsForDFT32( const float * frequencies ); //Update the frequencies
+#endif
+
+void UpdateOutputBins32();
+
+//Whenever you need to read the bins, you can do it from here.
+//These outputs are limited to 0..~2047, this makes it possible
+//for you to process with uint16_t's more easily.
+//This is updated every time the DFT hits the octavecount, or 1/32 updates.
+extern uint16_t embeddedbins32[];  //[FIXBINS]
+
+
+#endif
+
diff --git a/Makefile b/Makefile
index 6046917..dda437d 100644
--- a/Makefile
+++ b/Makefile
@@ -13,17 +13,10 @@ LDLIBS:=-lpthread -lasound -lm -lpulse-simple -lpulse
 CFLAGS:=-g -Os -flto -Wall
 EXTRALIBS:=-lusb-1.0
 
-colorchord : os_generic.o main.o  dft.o decompose.o filter.o color.o sort.o notefinder.o util.o outdrivers.o $(RAWDRAW) $(SOUND) $(OUTS) parameters.o chash.o 
+colorchord : os_generic.o main.o  dft.o decompose.o filter.o color.o sort.o notefinder.o util.o outdrivers.o $(RAWDRAW) $(SOUND) $(OUTS) parameters.o chash.o DFT32.o
 	gcc -o $@ $^ $(CFLAGS) $(LDLIBS) $(EXTRALIBS) $(RAWDRAWLIBS)
 
-embeddedcc : os_generic.c embeddedcc.c dft.c embeddednf.c
-	gcc -o $@ $^ $(CFLAGS) -DCCEMBEDDED $(LDFLAGS) $(EXTRALIBS) $(RAWDRAWLIBS)
-
-runembedded : embeddedcc
-	parec --format=u8 --rate=8000 --channels=1 --device=alsa_output.pci-0000_00_1b.0.analog-stereo.monitor | ./embeddedcc 
-
-
-colorchord.exe : os_generic.c main.c  dft.c decompose.c filter.c color.c sort.c notefinder.c util.c outdrivers.c DrawFunctions.c parameters.c chash.c WinDriver.c sound.c sound_null.c sound_win.c OutputVoronoi.c DisplayArray.c OutputLinear.c DisplayPie.c DisplayNetwork.c
+colorchord.exe : os_generic.c main.c  dft.c decompose.c filter.c color.c sort.c notefinder.c util.c outdrivers.c DrawFunctions.c parameters.c chash.c WinDriver.c sound.c sound_null.c sound_win.c OutputVoronoi.c DisplayArray.c OutputLinear.c DisplayPie.c DisplayNetwork.c DFT32.c
 	$(WINGCC) $(WINGCCFLAGS) -o $@ $^ $(WINLDFLAGS)
 
 
diff --git a/default.conf b/default.conf
index 502124b..0ccc958 100644
--- a/default.conf
+++ b/default.conf
@@ -52,7 +52,8 @@ octaves = 5
 # 1 = DFT Progressive
 # 2 = DFT Progressive Integer	
 # 3 = DFT Progressive Integer Skippy
-do_progressive_dft = 3
+# 4 = Integer, 32-Bit, Progressive, Skippy.
+do_progressive_dft = 4
 
 filter_iter = 2
 filter_strength = .5
diff --git a/dft.c b/dft.c
index 75aab1c..d9e574a 100644
--- a/dft.c
+++ b/dft.c
@@ -348,7 +348,7 @@ void DoDFTProgressiveInteger( float * outbins, float * frequencies, int bins, co
 
 static uint8_t Sdonefirstrun;
 //int8_t Ssintable[512]; //Actually [sin][cos] pairs.
-const int8_t Ssintable[512] = {
+static const int8_t Ssintable[512] = {
            0, 127,   3, 126,   6, 126,   9, 126,  12, 126,  15, 126,  18, 125,  21, 125,
           24, 124,  27, 123,  30, 123,  33, 122,  36, 121,  39, 120,  42, 119,  45, 118,
           48, 117,  51, 116,  54, 114,  57, 113,  59, 112,  62, 110,  65, 108,  67, 107,
@@ -397,7 +397,7 @@ int main()
 		Ssintable[i*2+1] = (int8_t)((cosf( i / 256.0 * 6.283 ) * 127.0));
 	}
 
-	printf( "const int8_t Ssintable[512] = {" );
+	printf( "static const int8_t Ssintable[512] = {" );
 	for( i = 0; i < 512; i++ )
 	{
 		if( !(i & 0xf ) )
@@ -415,9 +415,9 @@ int main()
 uint16_t Sdatspace[FIXBINS*4];  //(advances,places,isses,icses)
 
 //For 
-uint8_t Sdo_this_octave[BINCYCLE];
-int16_t Saccum_octavebins[OCTAVES];
-uint8_t Swhichoctaveplace;
+static uint8_t Sdo_this_octave[BINCYCLE];
+static int16_t Saccum_octavebins[OCTAVES];
+static uint8_t Swhichoctaveplace;
 uint16_t embeddedbins[FIXBINS]; //This is updated every time the DFT hits the octavecount, or 1/32 updates.
 
 //From: http://stackoverflow.com/questions/1100090/looking-for-an-efficient-integer-square-root-algorithm-for-arm-thumb2
diff --git a/dft.h b/dft.h
index 531d0cc..19d845e 100644
--- a/dft.h
+++ b/dft.h
@@ -53,8 +53,13 @@ void Push8BitIntegerSkippy( int8_t dat ); //Call this to push on new frames of s
 #define FIXBPERO 24
 #endif
 
+#ifndef FIXBINS
 #define FIXBINS  (FIXBPERO*OCTAVES)
+#endif
+
+#ifndef BINCYCLE
 #define BINCYCLE (1<<OCTAVES)
+#endif
 
 //This variable determins how much to nerf the current sample of the DFT.
 //I've found issues when this is smaller, but bigger values do have a negative
@@ -65,7 +70,7 @@ void Push8BitIntegerSkippy( int8_t dat ); //Call this to push on new frames of s
 
 //Whenever you need to read the bins, you can do it from here.
 extern uint16_t Sdatspace[];  //(advances,places,isses,icses)
-extern uint16_t embeddedbins[]; //This is updated every time the DFT hits the octavecount, or 1/32 updates.
+extern uint16_t embeddedbins[]; //This is updated every time the DFT hits the octavecount, or every BINCYCLE updates.
 
 #endif
 
diff --git a/embeddednf.c b/embeddednf.c
index 07e7930..ff69855 100644
--- a/embeddednf.c
+++ b/embeddednf.c
@@ -57,7 +57,11 @@ void UpdateFreqs()
 		fbins[i] = ( 65536.0 ) / ( DFREQ ) * frq * 16;
 	}
 
+#ifdef USE_32DFT
+	UpdateBins32( fbins );
+#else
 	UpdateBinsForProgressiveIntegerSkippyInt( fbins );
+#endif
 }
 
 void Init()
@@ -82,7 +86,11 @@ void Init()
 	}
 
 	//Step 1: Initialize the Integer DFT.
+#ifdef USE_32DFT
+	SetupDFTProgressive32();
+#else
 	SetupDFTProgressiveIntegerSkippy();
+#endif
 
 	//Step 2: Set up the frequency list.  You could do this multiple times
 	//if you want to change the loadout of the frequencies.
@@ -93,10 +101,18 @@ void HandleFrameInfo()
 {
 	int i, j, k;
 
+#ifdef USE_32DFT
+	uint16_t * strens;
+	UpdateOutputBins32();
+	strens = embeddedbins32;
+#else
+	uint16_t * strens = embeddedbins;
+#endif
+
 	//Copy out the bins from the DFT to our fuzzed bins.
 	for( i = 0; i < FIXBINS; i++ )
 	{
-		fuzzed_bins[i] = (fuzzed_bins[i] + (embeddedbins[i]>>FUZZ_IIR_BITS) -
+		fuzzed_bins[i] = (fuzzed_bins[i] + (strens[i]>>FUZZ_IIR_BITS) -
 			(fuzzed_bins[i]>>FUZZ_IIR_BITS));
 	}
 
@@ -322,21 +338,22 @@ void HandleFrameInfo()
 	}
 
 	//We now have notes!!!
-/*
+#if 1
 	for( i = 0; i < MAXNOTES; i++ )
 	{
 		if( note_peak_freqs[i] == 255 ) continue;
 		printf( "(%3d %4d %4d) ", note_peak_freqs[i], note_peak_amps[i], note_peak_amps2[i] );
 	}
 	printf( "\n") ;
-*/
+#endif
 
-/*
+#if 0
 	for( i = 0; i < FIXBPERO; i++ )
 	{
 		printf( "%5d ", folded_bins[i] );
 	}
-	printf( "\n" );*/
+	printf( "\n" );
+#endif
 }
 
 
diff --git a/embeddednf.h b/embeddednf.h
index a28e930..bc953da 100644
--- a/embeddednf.h
+++ b/embeddednf.h
@@ -1,7 +1,9 @@
 #ifndef _EMBEDDEDNF_H
 #define _EMBEDDEDNF_H
 
-#include "dft.h"
+//Use a 32-bit DFT.  It won't work for AVRs, but for any 32-bit systems where
+//they can multiply quickly, this is the bees knees.
+#define USE_32DFT
 
 #define DFREQ     8000
 #define BASE_FREQ 55.0  // You may make this a float.
@@ -32,6 +34,13 @@
 #define AMP_1_NERFING_BITS 5
 #define AMP_2_NERFING_BITS 3
 
+
+#ifdef USE_32DFT
+#include "DFT32.h"
+#else
+#include "dft.h"
+#endif
+
 extern uint16_t folded_bins[]; //[FIXBPERO] <- The folded fourier output.
 extern uint16_t fuzzed_bins[]; //[FIXBINS]  <- The Full DFT after IIR, Blur and Taper
 
diff --git a/embeddedx86/Makefile b/embeddedx86/Makefile
new file mode 100644
index 0000000..5a0d78b
--- /dev/null
+++ b/embeddedx86/Makefile
@@ -0,0 +1,13 @@
+all : embeddedcc
+
+CFLAGS:=-Ofast -DCCEMBEDDED -I.. -flto -m32
+LDFLAGS:=-ffunction-sections -Wl,--gc-sections -fno-asynchronous-unwind-tables -Wl,--strip-all
+
+embeddedcc :  ../embeddednf.c ../DFT32.c embeddedcc.c 
+	gcc -o $@ $^ $(CFLAGS) $(LDFLAGS)
+
+runembedded : embeddedcc
+	parec --format=u8 --rate=8000 --channels=1 --device=alsa_output.pci-0000_00_1b.0.analog-stereo.monitor | ./embeddedcc 
+
+clean :
+	rm -rf embeddedcc *~
diff --git a/embeddedcc.c b/embeddedx86/embeddedcc.c
similarity index 87%
rename from embeddedcc.c
rename to embeddedx86/embeddedcc.c
index ee6f066..127d68f 100644
--- a/embeddedcc.c
+++ b/embeddedx86/embeddedcc.c
@@ -5,7 +5,6 @@
 
 #include <stdio.h>
 #include "embeddednf.h"
-#include "dft.h"
 
 int main()
 {
@@ -15,7 +14,11 @@ int main()
 	while( ( ci = getchar() ) != EOF )
 	{
 		int cs = ci - 0x80;
+#ifdef USE_32DFT
+		PushSample32( ((int8_t)cs)*32 );
+#else
 		Push8BitIntegerSkippy( (int8_t)cs );
+#endif
 		//printf( "%d ", cs ); fflush( stdout );
 		wf++;
 		if( wf == 64 )
diff --git a/notefinder.c b/notefinder.c
index 3322929..8f7b8f0 100644
--- a/notefinder.c
+++ b/notefinder.c
@@ -9,6 +9,7 @@
 #include "filter.h"
 #include "decompose.h"
 #include "sort.h"
+#include "DFT32.h"
 
 struct NoteFinder * CreateNoteFinder( int spsRec )
 {
@@ -193,6 +194,9 @@ void RunNoteFinder( struct NoteFinder * nf, const float * audio_stream, int head
 	case 3:
 		DoDFTProgressiveIntegerSkippy( dftbins, nf->frequencies, freqs, audio_stream, head, buffersize, nf->dft_q, nf->dft_speedup );
 		break;
+	case 4:
+		DoDFTProgressive32( dftbins, nf->frequencies, freqs, audio_stream, head, buffersize, nf->dft_q, nf->dft_speedup );
+		break;
 	default:
 		fprintf( stderr, "Error: No DFT Seleced\n" );
 	}