diff --git a/DFT32.c b/DFT32.c new file mode 100644 index 0000000..ded5ea7 --- /dev/null +++ b/DFT32.c @@ -0,0 +1,342 @@ +#include "DFT32.h" +#include + +#ifndef CCEMBEDDED +#include +#include +#include +static float * goutbins; +#endif + +uint16_t embeddedbins32[FIXBINS]; + +//NOTES to self: +// +// Let's say we want to try this on an AVR. +// 24 bins, 5 octaves = 120 bins. +// 20 MHz clock / 4.8k sps = 4096 IPS = 34 clocks per bin = :( +// We can do two at the same time, this frees us up some + +static uint8_t Sdonefirstrun; + +//A table of precomputed sin() values. Ranging -1500 to +1500 +//If we increase this, it may cause overflows elsewhere in code. +const int16_t Ssinonlytable[256] = { + 0, 36, 73, 110, 147, 183, 220, 256, + 292, 328, 364, 400, 435, 470, 505, 539, + 574, 607, 641, 674, 707, 739, 771, 802, + 833, 863, 893, 922, 951, 979, 1007, 1034, + 1060, 1086, 1111, 1135, 1159, 1182, 1204, 1226, + 1247, 1267, 1286, 1305, 1322, 1339, 1355, 1371, + 1385, 1399, 1412, 1424, 1435, 1445, 1455, 1463, + 1471, 1477, 1483, 1488, 1492, 1495, 1498, 1499, + 1500, 1499, 1498, 1495, 1492, 1488, 1483, 1477, + 1471, 1463, 1455, 1445, 1435, 1424, 1412, 1399, + 1385, 1371, 1356, 1339, 1322, 1305, 1286, 1267, + 1247, 1226, 1204, 1182, 1159, 1135, 1111, 1086, + 1060, 1034, 1007, 979, 951, 922, 893, 863, + 833, 802, 771, 739, 707, 674, 641, 607, + 574, 539, 505, 470, 435, 400, 364, 328, + 292, 256, 220, 183, 147, 110, 73, 36, + 0, -36, -73, -110, -146, -183, -219, -256, + -292, -328, -364, -399, -435, -470, -505, -539, + -573, -607, -641, -674, -706, -739, -771, -802, + -833, -863, -893, -922, -951, -979, -1007, -1034, + -1060, -1086, -1111, -1135, -1159, -1182, -1204, -1226, + -1247, -1267, -1286, -1305, -1322, -1339, -1355, -1371, + -1385, -1399, -1412, -1424, -1435, -1445, -1454, -1463, + -1471, -1477, -1483, -1488, -1492, -1495, -1498, -1499, + -1500, -1499, -1498, -1495, -1492, -1488, -1483, -1477, + -1471, -1463, -1455, -1445, -1435, -1424, -1412, -1399, + -1385, -1371, -1356, -1339, -1322, -1305, -1286, -1267, + -1247, -1226, -1204, -1182, -1159, -1135, -1111, -1086, + -1060, -1034, -1007, -979, -951, -923, -893, -863, + -833, -802, -771, -739, -707, -674, -641, -608, + -574, -540, -505, -470, -435, -400, -364, -328, + -292, -256, -220, -183, -147, -110, -73, -37,}; + + +/** The above table was created using the following code: +#include +#include +#include + +int16_t Ssintable[256]; //Actually, just [sin]. + +int main() +{ + int i; + for( i = 0; i < 256; i++ ) + { + Ssintable[i] = (int16_t)((sinf( i / 256.0 * 6.283 ) * 1500.0)); + } + + printf( "const int16_t Ssinonlytable[256] = {" ); + for( i = 0; i < 256; i++ ) + { + if( !(i & 0x7 ) ) + { + printf( "\n\t" ); + } + printf( "%6d," ,Ssintable[i] ); + } + printf( "};\n" ); +} */ + + + +uint16_t Sdatspace32A[FIXBINS*2]; //(advances,places) +int32_t Sdatspace32B[FIXBINS*2]; //(isses,icses) +int32_t Sdatspace32BOut[FIXBINS*2]; //(isses,icses) + +//For +static uint8_t Sdo_this_octave[BINCYCLE]; +static int32_t Saccum_octavebins[OCTAVES]; +static uint8_t Swhichoctaveplace; +uint16_t embeddedbins[FIXBINS]; //This is updated every time the DFT hits the octavecount, or 1/32 updates. + +//From: http://stackoverflow.com/questions/1100090/looking-for-an-efficient-integer-square-root-algorithm-for-arm-thumb2 +/** + * \brief Fast Square root algorithm, with rounding + * + * This does arithmetic rounding of the result. That is, if the real answer + * would have a fractional part of 0.5 or greater, the result is rounded up to + * the next integer. + * - SquareRootRounded(2) --> 1 + * - SquareRootRounded(3) --> 2 + * - SquareRootRounded(4) --> 2 + * - SquareRootRounded(6) --> 2 + * - SquareRootRounded(7) --> 3 + * - SquareRootRounded(8) --> 3 + * - SquareRootRounded(9) --> 3 + * + * \param[in] a_nInput - unsigned integer for which to find the square root + * + * \return Integer square root of the input value. + */ +static uint16_t SquareRootRounded(uint32_t a_nInput) +{ + uint32_t op = a_nInput; + uint32_t res = 0; + uint32_t one = 1uL << 30; // The second-to-top bit is set: use 1u << 14 for uint16_t type; use 1uL<<30 for uint32_t type + + + // "one" starts at the highest power of four <= than the argument. + while (one > op) + { + one >>= 2; + } + + while (one != 0) + { + if (op >= res + one) + { + op = op - (res + one); + res = res + 2 * one; + } + res >>= 1; + one >>= 2; + } + + /* Do arithmetic rounding to nearest integer */ + if (op > res) + { + res++; + } + + return res; +} + +void UpdateOutputBins32() +{ + int i; + int * ipt = &Sdatspace32BOut[0]; + for( i = 0; i < FIXBINS; i++ ) + { + int16_t isps = *(ipt++)>>16; + int16_t ispc = *(ipt++)>>16; + + int octave = i / FIXBPERO; + +#ifndef CCEMBEDDED + uint32_t mux = ( (isps) * (isps)) + ((ispc) * (ispc)); + goutbins[i] = sqrtf( (float)mux ); + goutbins[i] /= (78<> octave; + } +} + +static void HandleInt( int16_t sample ) +{ + int i; + uint16_t adv; + uint8_t localipl; + + uint8_t oct = Sdo_this_octave[Swhichoctaveplace]; + Swhichoctaveplace ++; + Swhichoctaveplace &= BINCYCLE-1; + + if( oct > 128 ) + { + //Special: This is when we can update everything. + + int32_t * bins = &Sdatspace32B[0]; + int32_t * binsOut = &Sdatspace32BOut[0]; + + for( i = 0; i < FIXBINS; i++ ) + { + //First for the SIN then the COS. + int32_t val = *(bins); + *(binsOut++) = val; + *(bins++) -= val>>DFTIIR; + + val = *(bins); + *(binsOut++) = val; + *(bins++) -= val>>DFTIIR; + } + return; + } + + + for( i = 0; i < OCTAVES;i++ ) + { + Saccum_octavebins[i] += sample; + } + + uint16_t * dsA = &Sdatspace32A[oct*FIXBPERO*2]; + int32_t * dsB = &Sdatspace32B[oct*FIXBPERO*2]; + + sample = Saccum_octavebins[oct]>>(OCTAVES-oct); + Saccum_octavebins[oct] = 0; + + for( i = 0; i < FIXBPERO; i++ ) + { + adv = *(dsA++); + localipl = *(dsA) >> 8; + *(dsA++) += adv; + + *(dsB++) += (Ssinonlytable[localipl] * sample); + //Get the cosine (1/4 wavelength out-of-phase with sin) + localipl += 64; + *(dsB++) += (Ssinonlytable[localipl] * sample); + } +} + +int SetupDFTProgressive32() +{ + int i; + int j; + //Sdatspace = malloc(FIXBPERO*OCTAVES*8); + //memset(Sdatspace,0,FIXBPERO*OCTAVES*8); + //printf( "MS: %d\n", FIXBPERO*OCTAVES*8); + Sdonefirstrun = 1; + + for( i = 0; i < BINCYCLE; i++ ) + { + // Sdo_this_octave = + // 4 3 4 2 4 3 4 ... + //search for "first" zero + + for( j = 0; j <= OCTAVES; j++ ) + { + if( ((1< OCTAVES ) + { +#ifndef CCEMBEDDED + fprintf( stderr, "Error: algorithm fault.\n" ); + exit( -1 ); +#endif + return -1; + } + Sdo_this_octave[i] = OCTAVES-j-1; + } + return 0; +} + + + +void UpdateBins32( const uint16_t * frequencies ) +{ + int i; + for( i = 0; i < FIXBINS; i++ ) + { + uint16_t freq = frequencies[i%FIXBPERO]; + Sdatspace32A[i*2] = freq;// / oneoveroctave; + } +} + +void PushSample32( int16_t dat ) +{ + HandleInt( dat ); + HandleInt( dat ); +} + + +#ifndef CCEMBEDDED + +void UpdateBinsForDFT32( const float * frequencies ) +{ + int i; + for( i = 0; i < FIXBINS; i++ ) + { + float freq = frequencies[(i%FIXBPERO) + (FIXBPERO*(OCTAVES-1))]; + Sdatspace32A[i*2] = (65536.0/freq);// / oneoveroctave; + } +} + +#endif + + +#ifndef CCEMBEDDED + +void DoDFTProgressive32( float * outbins, float * frequencies, int bins, const float * databuffer, int place_in_data_buffer, int size_of_data_buffer, float q, float speedup ) +{ + static float backupbins[FIXBINS]; + int i; + static int last_place; + + memset( outbins, 0, bins * sizeof( float ) ); + goutbins = outbins; + + memcpy( outbins, backupbins, FIXBINS*4 ); + + if( FIXBINS != bins ) + { + fprintf( stderr, "Error: Bins was reconfigured. skippy requires a constant number of bins.\n" ); + return; + } + + +//printf( "SKIPPY\n" ); + + if( !Sdonefirstrun ) + { + SetupDFTProgressive32(); + Sdonefirstrun = 1; + } + + UpdateBinsForDFT32( frequencies ); + + for( i = last_place; i != place_in_data_buffer; i = (i+1)%size_of_data_buffer ) + { + int16_t ifr1 = (int16_t)( ((databuffer[i]) ) * 4095 ); + HandleInt( ifr1 ); + HandleInt( ifr1 ); + } + + UpdateOutputBins32(); + + last_place = place_in_data_buffer; + + memcpy( backupbins, outbins, FIXBINS*4 ); +} + +#endif + + + + diff --git a/DFT32.h b/DFT32.h new file mode 100644 index 0000000..6bce18d --- /dev/null +++ b/DFT32.h @@ -0,0 +1,69 @@ +#ifndef _DFT32_H +#define _DFT32_H + +#include + +//A 32-bit version of the DFT used for ColorChord. +//This header makes it convenient to use for an embedded system. +//The 32-bit DFT avoids some bit shifts, however it uses slightly +//more RAM and it uses a lot of 32-bit arithmatic. +// +//This is basically a clone of "ProgressiveIntegerSkippy" and changes +//made here should be backported there as well. + +//You can # define these to be other things elsewhere. +#ifndef OCTAVES +#define OCTAVES 5 +#endif + +#ifndef FIXBPERO +#define FIXBPERO 24 +#endif + +#ifndef FIXBINS +#define FIXBINS (FIXBPERO*OCTAVES) +#endif + +#ifndef BINCYCLE +#define BINCYCLE (1<>FUZZ_IIR_BITS) - + fuzzed_bins[i] = (fuzzed_bins[i] + (strens[i]>>FUZZ_IIR_BITS) - (fuzzed_bins[i]>>FUZZ_IIR_BITS)); } @@ -322,21 +338,22 @@ void HandleFrameInfo() } //We now have notes!!! -/* +#if 1 for( i = 0; i < MAXNOTES; i++ ) { if( note_peak_freqs[i] == 255 ) continue; printf( "(%3d %4d %4d) ", note_peak_freqs[i], note_peak_amps[i], note_peak_amps2[i] ); } printf( "\n") ; -*/ +#endif -/* +#if 0 for( i = 0; i < FIXBPERO; i++ ) { printf( "%5d ", folded_bins[i] ); } - printf( "\n" );*/ + printf( "\n" ); +#endif } diff --git a/embeddednf.h b/embeddednf.h index a28e930..bc953da 100644 --- a/embeddednf.h +++ b/embeddednf.h @@ -1,7 +1,9 @@ #ifndef _EMBEDDEDNF_H #define _EMBEDDEDNF_H -#include "dft.h" +//Use a 32-bit DFT. It won't work for AVRs, but for any 32-bit systems where +//they can multiply quickly, this is the bees knees. +#define USE_32DFT #define DFREQ 8000 #define BASE_FREQ 55.0 // You may make this a float. @@ -32,6 +34,13 @@ #define AMP_1_NERFING_BITS 5 #define AMP_2_NERFING_BITS 3 + +#ifdef USE_32DFT +#include "DFT32.h" +#else +#include "dft.h" +#endif + extern uint16_t folded_bins[]; //[FIXBPERO] <- The folded fourier output. extern uint16_t fuzzed_bins[]; //[FIXBINS] <- The Full DFT after IIR, Blur and Taper diff --git a/embeddedx86/Makefile b/embeddedx86/Makefile new file mode 100644 index 0000000..5a0d78b --- /dev/null +++ b/embeddedx86/Makefile @@ -0,0 +1,13 @@ +all : embeddedcc + +CFLAGS:=-Ofast -DCCEMBEDDED -I.. -flto -m32 +LDFLAGS:=-ffunction-sections -Wl,--gc-sections -fno-asynchronous-unwind-tables -Wl,--strip-all + +embeddedcc : ../embeddednf.c ../DFT32.c embeddedcc.c + gcc -o $@ $^ $(CFLAGS) $(LDFLAGS) + +runembedded : embeddedcc + parec --format=u8 --rate=8000 --channels=1 --device=alsa_output.pci-0000_00_1b.0.analog-stereo.monitor | ./embeddedcc + +clean : + rm -rf embeddedcc *~ diff --git a/embeddedcc.c b/embeddedx86/embeddedcc.c similarity index 87% rename from embeddedcc.c rename to embeddedx86/embeddedcc.c index ee6f066..127d68f 100644 --- a/embeddedcc.c +++ b/embeddedx86/embeddedcc.c @@ -5,7 +5,6 @@ #include #include "embeddednf.h" -#include "dft.h" int main() { @@ -15,7 +14,11 @@ int main() while( ( ci = getchar() ) != EOF ) { int cs = ci - 0x80; +#ifdef USE_32DFT + PushSample32( ((int8_t)cs)*32 ); +#else Push8BitIntegerSkippy( (int8_t)cs ); +#endif //printf( "%d ", cs ); fflush( stdout ); wf++; if( wf == 64 ) diff --git a/notefinder.c b/notefinder.c index 3322929..8f7b8f0 100644 --- a/notefinder.c +++ b/notefinder.c @@ -9,6 +9,7 @@ #include "filter.h" #include "decompose.h" #include "sort.h" +#include "DFT32.h" struct NoteFinder * CreateNoteFinder( int spsRec ) { @@ -193,6 +194,9 @@ void RunNoteFinder( struct NoteFinder * nf, const float * audio_stream, int head case 3: DoDFTProgressiveIntegerSkippy( dftbins, nf->frequencies, freqs, audio_stream, head, buffersize, nf->dft_q, nf->dft_speedup ); break; + case 4: + DoDFTProgressive32( dftbins, nf->frequencies, freqs, audio_stream, head, buffersize, nf->dft_q, nf->dft_speedup ); + break; default: fprintf( stderr, "Error: No DFT Seleced\n" ); }