359 lines
9.2 KiB
C
359 lines
9.2 KiB
C
//Copyright 2015 <>< Charles Lohr under the ColorChord License.
|
|
|
|
#include "DFT32.h"
|
|
#include <string.h>
|
|
|
|
#ifndef CCEMBEDDED
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <math.h>
|
|
static float * goutbins;
|
|
#endif
|
|
|
|
uint16_t embeddedbins32[FIXBINS];
|
|
|
|
//NOTES to self:
|
|
//
|
|
// Let's say we want to try this on an AVR.
|
|
// 24 bins, 5 octaves = 120 bins.
|
|
// 20 MHz clock / 4.8k sps = 4096 IPS = 34 clocks per bin = :(
|
|
// We can do two at the same time, this frees us up some
|
|
|
|
static uint8_t Sdonefirstrun;
|
|
|
|
//A table of precomputed sin() values. Ranging -1500 to +1500
|
|
//If we increase this, it may cause overflows elsewhere in code.
|
|
const int16_t Ssinonlytable[256] = {
|
|
0, 36, 73, 110, 147, 183, 220, 256,
|
|
292, 328, 364, 400, 435, 470, 505, 539,
|
|
574, 607, 641, 674, 707, 739, 771, 802,
|
|
833, 863, 893, 922, 951, 979, 1007, 1034,
|
|
1060, 1086, 1111, 1135, 1159, 1182, 1204, 1226,
|
|
1247, 1267, 1286, 1305, 1322, 1339, 1355, 1371,
|
|
1385, 1399, 1412, 1424, 1435, 1445, 1455, 1463,
|
|
1471, 1477, 1483, 1488, 1492, 1495, 1498, 1499,
|
|
1500, 1499, 1498, 1495, 1492, 1488, 1483, 1477,
|
|
1471, 1463, 1455, 1445, 1435, 1424, 1412, 1399,
|
|
1385, 1371, 1356, 1339, 1322, 1305, 1286, 1267,
|
|
1247, 1226, 1204, 1182, 1159, 1135, 1111, 1086,
|
|
1060, 1034, 1007, 979, 951, 922, 893, 863,
|
|
833, 802, 771, 739, 707, 674, 641, 607,
|
|
574, 539, 505, 470, 435, 400, 364, 328,
|
|
292, 256, 220, 183, 147, 110, 73, 36,
|
|
0, -36, -73, -110, -146, -183, -219, -256,
|
|
-292, -328, -364, -399, -435, -470, -505, -539,
|
|
-573, -607, -641, -674, -706, -739, -771, -802,
|
|
-833, -863, -893, -922, -951, -979, -1007, -1034,
|
|
-1060, -1086, -1111, -1135, -1159, -1182, -1204, -1226,
|
|
-1247, -1267, -1286, -1305, -1322, -1339, -1355, -1371,
|
|
-1385, -1399, -1412, -1424, -1435, -1445, -1454, -1463,
|
|
-1471, -1477, -1483, -1488, -1492, -1495, -1498, -1499,
|
|
-1500, -1499, -1498, -1495, -1492, -1488, -1483, -1477,
|
|
-1471, -1463, -1455, -1445, -1435, -1424, -1412, -1399,
|
|
-1385, -1371, -1356, -1339, -1322, -1305, -1286, -1267,
|
|
-1247, -1226, -1204, -1182, -1159, -1135, -1111, -1086,
|
|
-1060, -1034, -1007, -979, -951, -923, -893, -863,
|
|
-833, -802, -771, -739, -707, -674, -641, -608,
|
|
-574, -540, -505, -470, -435, -400, -364, -328,
|
|
-292, -256, -220, -183, -147, -110, -73, -37,};
|
|
|
|
|
|
/** The above table was created using the following code:
|
|
#include <math.h>
|
|
#include <stdio.h>
|
|
#include <stdint.h>
|
|
|
|
int16_t Ssintable[256]; //Actually, just [sin].
|
|
|
|
int main()
|
|
{
|
|
int i;
|
|
for( i = 0; i < 256; i++ )
|
|
{
|
|
Ssintable[i] = (int16_t)((sinf( i / 256.0 * 6.283 ) * 1500.0));
|
|
}
|
|
|
|
printf( "const int16_t Ssinonlytable[256] = {" );
|
|
for( i = 0; i < 256; i++ )
|
|
{
|
|
if( !(i & 0x7 ) )
|
|
{
|
|
printf( "\n\t" );
|
|
}
|
|
printf( "%6d," ,Ssintable[i] );
|
|
}
|
|
printf( "};\n" );
|
|
} */
|
|
|
|
|
|
|
|
uint16_t Sdatspace32A[FIXBINS*2]; //(advances,places)
|
|
int32_t Sdatspace32B[FIXBINS*2]; //(isses,icses)
|
|
|
|
//This is updated every time the DFT hits the octavecount, or 1/32 updates.
|
|
int32_t Sdatspace32BOut[FIXBINS*2]; //(isses,icses)
|
|
|
|
//Sdo_this_octave is a scheduling state for the running SIN/COS states for
|
|
//each bin. We have to execute the highest octave every time, however, we can
|
|
//get away with updating the next octave down every-other-time, then the next
|
|
//one down yet, every-other-time from that one. That way, no matter how many
|
|
//octaves we have, we only need to update FIXBPERO*2 DFT bins.
|
|
static uint8_t Sdo_this_octave[BINCYCLE];
|
|
|
|
static int32_t Saccum_octavebins[OCTAVES];
|
|
static uint8_t Swhichoctaveplace;
|
|
|
|
//
|
|
uint16_t embeddedbins[FIXBINS];
|
|
|
|
//From: http://stackoverflow.com/questions/1100090/looking-for-an-efficient-integer-square-root-algorithm-for-arm-thumb2
|
|
/**
|
|
* \brief Fast Square root algorithm, with rounding
|
|
*
|
|
* This does arithmetic rounding of the result. That is, if the real answer
|
|
* would have a fractional part of 0.5 or greater, the result is rounded up to
|
|
* the next integer.
|
|
* - SquareRootRounded(2) --> 1
|
|
* - SquareRootRounded(3) --> 2
|
|
* - SquareRootRounded(4) --> 2
|
|
* - SquareRootRounded(6) --> 2
|
|
* - SquareRootRounded(7) --> 3
|
|
* - SquareRootRounded(8) --> 3
|
|
* - SquareRootRounded(9) --> 3
|
|
*
|
|
* \param[in] a_nInput - unsigned integer for which to find the square root
|
|
*
|
|
* \return Integer square root of the input value.
|
|
*/
|
|
static uint16_t SquareRootRounded(uint32_t a_nInput)
|
|
{
|
|
uint32_t op = a_nInput;
|
|
uint32_t res = 0;
|
|
uint32_t one = 1uL << 30; // The second-to-top bit is set: use 1u << 14 for uint16_t type; use 1uL<<30 for uint32_t type
|
|
|
|
|
|
// "one" starts at the highest power of four <= than the argument.
|
|
while (one > op)
|
|
{
|
|
one >>= 2;
|
|
}
|
|
|
|
while (one != 0)
|
|
{
|
|
if (op >= res + one)
|
|
{
|
|
op = op - (res + one);
|
|
res = res + 2 * one;
|
|
}
|
|
res >>= 1;
|
|
one >>= 2;
|
|
}
|
|
|
|
/* Do arithmetic rounding to nearest integer */
|
|
if (op > res)
|
|
{
|
|
res++;
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
void UpdateOutputBins32()
|
|
{
|
|
int i;
|
|
int32_t * ipt = &Sdatspace32BOut[0];
|
|
for( i = 0; i < FIXBINS; i++ )
|
|
{
|
|
int16_t isps = *(ipt++)>>16;
|
|
int16_t ispc = *(ipt++)>>16;
|
|
|
|
int octave = i / FIXBPERO;
|
|
|
|
//If we are running DFT32 on regular ColorChord, then we will need to
|
|
//also update goutbins[]... But if we're on embedded systems, we only
|
|
//update embeddedbins32.
|
|
#ifndef CCEMBEDDED
|
|
uint32_t mux = ( (isps) * (isps)) + ((ispc) * (ispc));
|
|
goutbins[i] = sqrtf( (float)mux );
|
|
//reasonable (but arbitrary amplification)
|
|
goutbins[i] /= (78<<DFTIIR)*(1<<octave);
|
|
#endif
|
|
|
|
uint32_t rmux = ( (isps) * (isps)) + ((ispc) * (ispc));
|
|
|
|
//bump up all outputs here, so when we nerf it by bit shifting by
|
|
//ctave we don't lose a lot of detail.
|
|
rmux = SquareRootRounded( rmux ) << 1;
|
|
|
|
embeddedbins32[i] = rmux >> octave;
|
|
}
|
|
}
|
|
|
|
static void HandleInt( int16_t sample )
|
|
{
|
|
int i;
|
|
uint16_t adv;
|
|
uint8_t localipl;
|
|
|
|
uint8_t oct = Sdo_this_octave[Swhichoctaveplace];
|
|
Swhichoctaveplace ++;
|
|
Swhichoctaveplace &= BINCYCLE-1;
|
|
|
|
if( oct > 128 )
|
|
{
|
|
//Special: This is when we can update everything.
|
|
//This gets run one out of every 1/(1<<OCTAVES) times.
|
|
//It handles updating part of the DFT.
|
|
int32_t * bins = &Sdatspace32B[0];
|
|
int32_t * binsOut = &Sdatspace32BOut[0];
|
|
|
|
for( i = 0; i < FIXBINS; i++ )
|
|
{
|
|
//First for the SIN then the COS.
|
|
int32_t val = *(bins);
|
|
*(binsOut++) = val;
|
|
*(bins++) -= val>>DFTIIR;
|
|
|
|
val = *(bins);
|
|
*(binsOut++) = val;
|
|
*(bins++) -= val>>DFTIIR;
|
|
}
|
|
return;
|
|
}
|
|
|
|
|
|
for( i = 0; i < OCTAVES;i++ )
|
|
{
|
|
Saccum_octavebins[i] += sample;
|
|
}
|
|
|
|
uint16_t * dsA = &Sdatspace32A[oct*FIXBPERO*2];
|
|
int32_t * dsB = &Sdatspace32B[oct*FIXBPERO*2];
|
|
|
|
sample = Saccum_octavebins[oct]>>(OCTAVES-oct);
|
|
Saccum_octavebins[oct] = 0;
|
|
|
|
for( i = 0; i < FIXBPERO; i++ )
|
|
{
|
|
adv = *(dsA++);
|
|
localipl = *(dsA) >> 8;
|
|
*(dsA++) += adv;
|
|
|
|
*(dsB++) += (Ssinonlytable[localipl] * sample);
|
|
//Get the cosine (1/4 wavelength out-of-phase with sin)
|
|
localipl += 64;
|
|
*(dsB++) += (Ssinonlytable[localipl] * sample);
|
|
}
|
|
}
|
|
|
|
int SetupDFTProgressive32()
|
|
{
|
|
int i;
|
|
int j;
|
|
|
|
Sdonefirstrun = 1;
|
|
|
|
for( i = 0; i < BINCYCLE; i++ )
|
|
{
|
|
// Sdo_this_octave =
|
|
// 4 3 4 2 4 3 4 ...
|
|
//search for "first" zero
|
|
|
|
for( j = 0; j <= OCTAVES; j++ )
|
|
{
|
|
if( ((1<<j) & i) == 0 ) break;
|
|
}
|
|
if( j > OCTAVES )
|
|
{
|
|
#ifndef CCEMBEDDED
|
|
fprintf( stderr, "Error: algorithm fault.\n" );
|
|
exit( -1 );
|
|
#endif
|
|
return -1;
|
|
}
|
|
Sdo_this_octave[i] = OCTAVES-j-1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
|
|
void UpdateBins32( const uint16_t * frequencies )
|
|
{
|
|
int i;
|
|
for( i = 0; i < FIXBINS; i++ )
|
|
{
|
|
uint16_t freq = frequencies[i%FIXBPERO];
|
|
Sdatspace32A[i*2] = freq;// / oneoveroctave;
|
|
}
|
|
}
|
|
|
|
void PushSample32( int16_t dat )
|
|
{
|
|
HandleInt( dat );
|
|
HandleInt( dat );
|
|
}
|
|
|
|
|
|
#ifndef CCEMBEDDED
|
|
|
|
void UpdateBinsForDFT32( const float * frequencies )
|
|
{
|
|
int i;
|
|
for( i = 0; i < FIXBINS; i++ )
|
|
{
|
|
float freq = frequencies[(i%FIXBPERO) + (FIXBPERO*(OCTAVES-1))];
|
|
Sdatspace32A[i*2] = (65536.0/freq);// / oneoveroctave;
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
#ifndef CCEMBEDDED
|
|
|
|
void DoDFTProgressive32( float * outbins, float * frequencies, int bins, const float * databuffer, int place_in_data_buffer, int size_of_data_buffer, float q, float speedup )
|
|
{
|
|
static float backupbins[FIXBINS];
|
|
int i;
|
|
static int last_place;
|
|
|
|
memset( outbins, 0, bins * sizeof( float ) );
|
|
goutbins = outbins;
|
|
|
|
memcpy( outbins, backupbins, FIXBINS*4 );
|
|
|
|
if( FIXBINS != bins )
|
|
{
|
|
fprintf( stderr, "Error: Bins was reconfigured. skippy requires a constant number of bins (%d != %d).\n", FIXBINS, bins );
|
|
return;
|
|
}
|
|
|
|
if( !Sdonefirstrun )
|
|
{
|
|
SetupDFTProgressive32();
|
|
Sdonefirstrun = 1;
|
|
}
|
|
|
|
UpdateBinsForDFT32( frequencies );
|
|
|
|
for( i = last_place; i != place_in_data_buffer; i = (i+1)%size_of_data_buffer )
|
|
{
|
|
int16_t ifr1 = (int16_t)( ((databuffer[i]) ) * 4095 );
|
|
HandleInt( ifr1 );
|
|
HandleInt( ifr1 );
|
|
}
|
|
|
|
UpdateOutputBins32();
|
|
|
|
last_place = place_in_data_buffer;
|
|
|
|
memcpy( backupbins, outbins, FIXBINS*4 );
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|