//Copyright 2015 <>< Charles Lohr under the ColorChord License. #include "DFT32.h" #include #ifndef CCEMBEDDED #include #include #include static float * goutbins; #endif uint16_t embeddedbins32[FIXBINS]; //NOTES to self: // // Let's say we want to try this on an AVR. // 24 bins, 5 octaves = 120 bins. // 20 MHz clock / 4.8k sps = 4096 IPS = 34 clocks per bin = :( // We can do two at the same time, this frees us up some static uint8_t Sdonefirstrun; //A table of precomputed sin() values. Ranging -1500 to +1500 //If we increase this, it may cause overflows elsewhere in code. const int16_t Ssinonlytable[256] = { 0, 36, 73, 110, 147, 183, 220, 256, 292, 328, 364, 400, 435, 470, 505, 539, 574, 607, 641, 674, 707, 739, 771, 802, 833, 863, 893, 922, 951, 979, 1007, 1034, 1060, 1086, 1111, 1135, 1159, 1182, 1204, 1226, 1247, 1267, 1286, 1305, 1322, 1339, 1355, 1371, 1385, 1399, 1412, 1424, 1435, 1445, 1455, 1463, 1471, 1477, 1483, 1488, 1492, 1495, 1498, 1499, 1500, 1499, 1498, 1495, 1492, 1488, 1483, 1477, 1471, 1463, 1455, 1445, 1435, 1424, 1412, 1399, 1385, 1371, 1356, 1339, 1322, 1305, 1286, 1267, 1247, 1226, 1204, 1182, 1159, 1135, 1111, 1086, 1060, 1034, 1007, 979, 951, 922, 893, 863, 833, 802, 771, 739, 707, 674, 641, 607, 574, 539, 505, 470, 435, 400, 364, 328, 292, 256, 220, 183, 147, 110, 73, 36, 0, -36, -73, -110, -146, -183, -219, -256, -292, -328, -364, -399, -435, -470, -505, -539, -573, -607, -641, -674, -706, -739, -771, -802, -833, -863, -893, -922, -951, -979, -1007, -1034, -1060, -1086, -1111, -1135, -1159, -1182, -1204, -1226, -1247, -1267, -1286, -1305, -1322, -1339, -1355, -1371, -1385, -1399, -1412, -1424, -1435, -1445, -1454, -1463, -1471, -1477, -1483, -1488, -1492, -1495, -1498, -1499, -1500, -1499, -1498, -1495, -1492, -1488, -1483, -1477, -1471, -1463, -1455, -1445, -1435, -1424, -1412, -1399, -1385, -1371, -1356, -1339, -1322, -1305, -1286, -1267, -1247, -1226, -1204, -1182, -1159, -1135, -1111, -1086, -1060, -1034, -1007, -979, -951, -923, -893, -863, -833, -802, -771, -739, -707, -674, -641, -608, -574, -540, -505, -470, -435, -400, -364, -328, -292, -256, -220, -183, -147, -110, -73, -37,}; /** The above table was created using the following code: #include #include #include int16_t Ssintable[256]; //Actually, just [sin]. int main() { int i; for( i = 0; i < 256; i++ ) { Ssintable[i] = (int16_t)((sinf( i / 256.0 * 6.283 ) * 1500.0)); } printf( "const int16_t Ssinonlytable[256] = {" ); for( i = 0; i < 256; i++ ) { if( !(i & 0x7 ) ) { printf( "\n\t" ); } printf( "%6d," ,Ssintable[i] ); } printf( "};\n" ); } */ uint16_t Sdatspace32A[FIXBINS*2]; //(advances,places) int32_t Sdatspace32B[FIXBINS*2]; //(isses,icses) //This is updated every time the DFT hits the octavecount, or 1/32 updates. int32_t Sdatspace32BOut[FIXBINS*2]; //(isses,icses) //Sdo_this_octave is a scheduling state for the running SIN/COS states for //each bin. We have to execute the highest octave every time, however, we can //get away with updating the next octave down every-other-time, then the next //one down yet, every-other-time from that one. That way, no matter how many //octaves we have, we only need to update FIXBPERO*2 DFT bins. static uint8_t Sdo_this_octave[BINCYCLE]; static int32_t Saccum_octavebins[OCTAVES]; static uint8_t Swhichoctaveplace; // uint16_t embeddedbins[FIXBINS]; //From: http://stackoverflow.com/questions/1100090/looking-for-an-efficient-integer-square-root-algorithm-for-arm-thumb2 /** * \brief Fast Square root algorithm, with rounding * * This does arithmetic rounding of the result. That is, if the real answer * would have a fractional part of 0.5 or greater, the result is rounded up to * the next integer. * - SquareRootRounded(2) --> 1 * - SquareRootRounded(3) --> 2 * - SquareRootRounded(4) --> 2 * - SquareRootRounded(6) --> 2 * - SquareRootRounded(7) --> 3 * - SquareRootRounded(8) --> 3 * - SquareRootRounded(9) --> 3 * * \param[in] a_nInput - unsigned integer for which to find the square root * * \return Integer square root of the input value. */ static uint16_t SquareRootRounded(uint32_t a_nInput) { uint32_t op = a_nInput; uint32_t res = 0; uint32_t one = 1uL << 30; // The second-to-top bit is set: use 1u << 14 for uint16_t type; use 1uL<<30 for uint32_t type // "one" starts at the highest power of four <= than the argument. while (one > op) { one >>= 2; } while (one != 0) { if (op >= res + one) { op = op - (res + one); res = res + 2 * one; } res >>= 1; one >>= 2; } /* Do arithmetic rounding to nearest integer */ if (op > res) { res++; } return res; } void UpdateOutputBins32() { int i; int32_t * ipt = &Sdatspace32BOut[0]; for( i = 0; i < FIXBINS; i++ ) { int16_t isps = *(ipt++)>>16; int16_t ispc = *(ipt++)>>16; int octave = i / FIXBPERO; //If we are running DFT32 on regular ColorChord, then we will need to //also update goutbins[]... But if we're on embedded systems, we only //update embeddedbins32. #ifndef CCEMBEDDED uint32_t mux = ( (isps) * (isps)) + ((ispc) * (ispc)); goutbins[i] = sqrtf( (float)mux ); //reasonable (but arbitrary amplification) goutbins[i] /= (78<> octave; } } static void HandleInt( int16_t sample ) { int i; uint16_t adv; uint8_t localipl; uint8_t oct = Sdo_this_octave[Swhichoctaveplace]; Swhichoctaveplace ++; Swhichoctaveplace &= BINCYCLE-1; for( i = 0; i < OCTAVES;i++ ) { Saccum_octavebins[i] += sample; } if( oct > 128 ) { //Special: This is when we can update everything. //This gets run once out of every (1<>DFTIIR; val = *(bins); *(binsOut++) = val; *(bins++) -= val>>DFTIIR; } return; } uint16_t * dsA = &Sdatspace32A[oct*FIXBPERO*2]; int32_t * dsB = &Sdatspace32B[oct*FIXBPERO*2]; sample = Saccum_octavebins[oct]>>(OCTAVES-oct); Saccum_octavebins[oct] = 0; for( i = 0; i < FIXBPERO; i++ ) { adv = *(dsA++); localipl = *(dsA) >> 8; *(dsA++) += adv; *(dsB++) += (Ssinonlytable[localipl] * sample); //Get the cosine (1/4 wavelength out-of-phase with sin) localipl += 64; *(dsB++) += (Ssinonlytable[localipl] * sample); } } int SetupDFTProgressive32() { int i; int j; Sdonefirstrun = 1; Sdo_this_octave[0] = 0xff; for( i = 0; i < BINCYCLE-1; i++ ) { // Sdo_this_octave = // 255 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4 is case for 5 octaves. // Initial state is special one, then at step i do octave = Sdo_this_octave with averaged samples from last update of that octave //search for "first" zero for( j = 0; j <= OCTAVES; j++ ) { if( ((1< OCTAVES ) { #ifndef CCEMBEDDED fprintf( stderr, "Error: algorithm fault.\n" ); exit( -1 ); #endif return -1; } Sdo_this_octave[i+1] = OCTAVES-j-1; } return 0; } void UpdateBins32( const uint16_t * frequencies ) { int i; for( i = 0; i < FIXBINS; i++ ) { uint16_t freq = frequencies[i%FIXBPERO]; Sdatspace32A[i*2] = freq;// / oneoveroctave; } } void PushSample32( int16_t dat ) { HandleInt( dat ); HandleInt( dat ); } #ifndef CCEMBEDDED void UpdateBinsForDFT32( const float * frequencies ) { int i; for( i = 0; i < FIXBINS; i++ ) { float freq = frequencies[(i%FIXBPERO) + (FIXBPERO*(OCTAVES-1))]; Sdatspace32A[i*2] = (65536.0/freq);// / oneoveroctave; } } #endif #ifndef CCEMBEDDED void DoDFTProgressive32( float * outbins, float * frequencies, int bins, const float * databuffer, int place_in_data_buffer, int size_of_data_buffer, float q, float speedup ) { static float backupbins[FIXBINS]; int i; static int last_place; memset( outbins, 0, bins * sizeof( float ) ); goutbins = outbins; memcpy( outbins, backupbins, FIXBINS*4 ); if( FIXBINS != bins ) { fprintf( stderr, "Error: Bins was reconfigured. skippy requires a constant number of bins (%d != %d).\n", FIXBINS, bins ); return; } if( !Sdonefirstrun ) { SetupDFTProgressive32(); Sdonefirstrun = 1; } UpdateBinsForDFT32( frequencies ); for( i = last_place; i != place_in_data_buffer; i = (i+1)%size_of_data_buffer ) { int16_t ifr1 = (int16_t)( ((databuffer[i]) ) * 4095 ); HandleInt( ifr1 ); HandleInt( ifr1 ); } UpdateOutputBins32(); last_place = place_in_data_buffer; memcpy( backupbins, outbins, FIXBINS*4 ); } #endif