colorchord/embeddedcommon/DFT32.c

359 lines
9.2 KiB
C

//Copyright 2015 <>< Charles Lohr under the ColorChord License.
#include "DFT32.h"
#include <string.h>
#ifndef CCEMBEDDED
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
static float * goutbins;
#endif
uint16_t embeddedbins32[FIXBINS];
//NOTES to self:
//
// Let's say we want to try this on an AVR.
// 24 bins, 5 octaves = 120 bins.
// 20 MHz clock / 4.8k sps = 4096 IPS = 34 clocks per bin = :(
// We can do two at the same time, this frees us up some
static uint8_t Sdonefirstrun;
//A table of precomputed sin() values. Ranging -1500 to +1500
//If we increase this, it may cause overflows elsewhere in code.
const int16_t Ssinonlytable[256] = {
0, 36, 73, 110, 147, 183, 220, 256,
292, 328, 364, 400, 435, 470, 505, 539,
574, 607, 641, 674, 707, 739, 771, 802,
833, 863, 893, 922, 951, 979, 1007, 1034,
1060, 1086, 1111, 1135, 1159, 1182, 1204, 1226,
1247, 1267, 1286, 1305, 1322, 1339, 1355, 1371,
1385, 1399, 1412, 1424, 1435, 1445, 1455, 1463,
1471, 1477, 1483, 1488, 1492, 1495, 1498, 1499,
1500, 1499, 1498, 1495, 1492, 1488, 1483, 1477,
1471, 1463, 1455, 1445, 1435, 1424, 1412, 1399,
1385, 1371, 1356, 1339, 1322, 1305, 1286, 1267,
1247, 1226, 1204, 1182, 1159, 1135, 1111, 1086,
1060, 1034, 1007, 979, 951, 922, 893, 863,
833, 802, 771, 739, 707, 674, 641, 607,
574, 539, 505, 470, 435, 400, 364, 328,
292, 256, 220, 183, 147, 110, 73, 36,
0, -36, -73, -110, -146, -183, -219, -256,
-292, -328, -364, -399, -435, -470, -505, -539,
-573, -607, -641, -674, -706, -739, -771, -802,
-833, -863, -893, -922, -951, -979, -1007, -1034,
-1060, -1086, -1111, -1135, -1159, -1182, -1204, -1226,
-1247, -1267, -1286, -1305, -1322, -1339, -1355, -1371,
-1385, -1399, -1412, -1424, -1435, -1445, -1454, -1463,
-1471, -1477, -1483, -1488, -1492, -1495, -1498, -1499,
-1500, -1499, -1498, -1495, -1492, -1488, -1483, -1477,
-1471, -1463, -1455, -1445, -1435, -1424, -1412, -1399,
-1385, -1371, -1356, -1339, -1322, -1305, -1286, -1267,
-1247, -1226, -1204, -1182, -1159, -1135, -1111, -1086,
-1060, -1034, -1007, -979, -951, -923, -893, -863,
-833, -802, -771, -739, -707, -674, -641, -608,
-574, -540, -505, -470, -435, -400, -364, -328,
-292, -256, -220, -183, -147, -110, -73, -37,};
/** The above table was created using the following code:
#include <math.h>
#include <stdio.h>
#include <stdint.h>
int16_t Ssintable[256]; //Actually, just [sin].
int main()
{
int i;
for( i = 0; i < 256; i++ )
{
Ssintable[i] = (int16_t)((sinf( i / 256.0 * 6.283 ) * 1500.0));
}
printf( "const int16_t Ssinonlytable[256] = {" );
for( i = 0; i < 256; i++ )
{
if( !(i & 0x7 ) )
{
printf( "\n\t" );
}
printf( "%6d," ,Ssintable[i] );
}
printf( "};\n" );
} */
uint16_t Sdatspace32A[FIXBINS*2]; //(advances,places)
int32_t Sdatspace32B[FIXBINS*2]; //(isses,icses)
//This is updated every time the DFT hits the octavecount, or 1/32 updates.
int32_t Sdatspace32BOut[FIXBINS*2]; //(isses,icses)
//Sdo_this_octave is a scheduling state for the running SIN/COS states for
//each bin. We have to execute the highest octave every time, however, we can
//get away with updating the next octave down every-other-time, then the next
//one down yet, every-other-time from that one. That way, no matter how many
//octaves we have, we only need to update FIXBPERO*2 DFT bins.
static uint8_t Sdo_this_octave[BINCYCLE];
static int32_t Saccum_octavebins[OCTAVES];
static uint8_t Swhichoctaveplace;
//
uint16_t embeddedbins[FIXBINS];
//From: http://stackoverflow.com/questions/1100090/looking-for-an-efficient-integer-square-root-algorithm-for-arm-thumb2
/**
* \brief Fast Square root algorithm, with rounding
*
* This does arithmetic rounding of the result. That is, if the real answer
* would have a fractional part of 0.5 or greater, the result is rounded up to
* the next integer.
* - SquareRootRounded(2) --> 1
* - SquareRootRounded(3) --> 2
* - SquareRootRounded(4) --> 2
* - SquareRootRounded(6) --> 2
* - SquareRootRounded(7) --> 3
* - SquareRootRounded(8) --> 3
* - SquareRootRounded(9) --> 3
*
* \param[in] a_nInput - unsigned integer for which to find the square root
*
* \return Integer square root of the input value.
*/
static uint16_t SquareRootRounded(uint32_t a_nInput)
{
uint32_t op = a_nInput;
uint32_t res = 0;
uint32_t one = 1uL << 30; // The second-to-top bit is set: use 1u << 14 for uint16_t type; use 1uL<<30 for uint32_t type
// "one" starts at the highest power of four <= than the argument.
while (one > op)
{
one >>= 2;
}
while (one != 0)
{
if (op >= res + one)
{
op = op - (res + one);
res = res + 2 * one;
}
res >>= 1;
one >>= 2;
}
/* Do arithmetic rounding to nearest integer */
if (op > res)
{
res++;
}
return res;
}
void UpdateOutputBins32()
{
int i;
int32_t * ipt = &Sdatspace32BOut[0];
for( i = 0; i < FIXBINS; i++ )
{
int16_t isps = *(ipt++)>>16;
int16_t ispc = *(ipt++)>>16;
int octave = i / FIXBPERO;
//If we are running DFT32 on regular ColorChord, then we will need to
//also update goutbins[]... But if we're on embedded systems, we only
//update embeddedbins32.
#ifndef CCEMBEDDED
uint32_t mux = ( (isps) * (isps)) + ((ispc) * (ispc));
goutbins[i] = sqrtf( (float)mux );
//reasonable (but arbitrary amplification)
goutbins[i] /= (78<<DFTIIR)*(1<<octave);
#endif
uint32_t rmux = ( (isps) * (isps)) + ((ispc) * (ispc));
//bump up all outputs here, so when we nerf it by bit shifting by
//ctave we don't lose a lot of detail.
rmux = SquareRootRounded( rmux ) << 1;
embeddedbins32[i] = rmux >> octave;
}
}
static void HandleInt( int16_t sample )
{
int i;
uint16_t adv;
uint8_t localipl;
uint8_t oct = Sdo_this_octave[Swhichoctaveplace];
Swhichoctaveplace ++;
Swhichoctaveplace &= BINCYCLE-1;
if( oct > 128 )
{
//Special: This is when we can update everything.
//This gets run one out of every 1/(1<<OCTAVES) times.
//It handles updating part of the DFT.
int32_t * bins = &Sdatspace32B[0];
int32_t * binsOut = &Sdatspace32BOut[0];
for( i = 0; i < FIXBINS; i++ )
{
//First for the SIN then the COS.
int32_t val = *(bins);
*(binsOut++) = val;
*(bins++) -= val>>DFTIIR;
val = *(bins);
*(binsOut++) = val;
*(bins++) -= val>>DFTIIR;
}
return;
}
for( i = 0; i < OCTAVES;i++ )
{
Saccum_octavebins[i] += sample;
}
uint16_t * dsA = &Sdatspace32A[oct*FIXBPERO*2];
int32_t * dsB = &Sdatspace32B[oct*FIXBPERO*2];
sample = Saccum_octavebins[oct]>>(OCTAVES-oct);
Saccum_octavebins[oct] = 0;
for( i = 0; i < FIXBPERO; i++ )
{
adv = *(dsA++);
localipl = *(dsA) >> 8;
*(dsA++) += adv;
*(dsB++) += (Ssinonlytable[localipl] * sample);
//Get the cosine (1/4 wavelength out-of-phase with sin)
localipl += 64;
*(dsB++) += (Ssinonlytable[localipl] * sample);
}
}
int SetupDFTProgressive32()
{
int i;
int j;
Sdonefirstrun = 1;
for( i = 0; i < BINCYCLE; i++ )
{
// Sdo_this_octave =
// 4 3 4 2 4 3 4 ...
//search for "first" zero
for( j = 0; j <= OCTAVES; j++ )
{
if( ((1<<j) & i) == 0 ) break;
}
if( j > OCTAVES )
{
#ifndef CCEMBEDDED
fprintf( stderr, "Error: algorithm fault.\n" );
exit( -1 );
#endif
return -1;
}
Sdo_this_octave[i] = OCTAVES-j-1;
}
return 0;
}
void UpdateBins32( const uint16_t * frequencies )
{
int i;
for( i = 0; i < FIXBINS; i++ )
{
uint16_t freq = frequencies[i%FIXBPERO];
Sdatspace32A[i*2] = freq;// / oneoveroctave;
}
}
void PushSample32( int16_t dat )
{
HandleInt( dat );
HandleInt( dat );
}
#ifndef CCEMBEDDED
void UpdateBinsForDFT32( const float * frequencies )
{
int i;
for( i = 0; i < FIXBINS; i++ )
{
float freq = frequencies[(i%FIXBPERO) + (FIXBPERO*(OCTAVES-1))];
Sdatspace32A[i*2] = (65536.0/freq);// / oneoveroctave;
}
}
#endif
#ifndef CCEMBEDDED
void DoDFTProgressive32( float * outbins, float * frequencies, int bins, const float * databuffer, int place_in_data_buffer, int size_of_data_buffer, float q, float speedup )
{
static float backupbins[FIXBINS];
int i;
static int last_place;
memset( outbins, 0, bins * sizeof( float ) );
goutbins = outbins;
memcpy( outbins, backupbins, FIXBINS*4 );
if( FIXBINS != bins )
{
fprintf( stderr, "Error: Bins was reconfigured. skippy requires a constant number of bins (%d != %d).\n", FIXBINS, bins );
return;
}
if( !Sdonefirstrun )
{
SetupDFTProgressive32();
Sdonefirstrun = 1;
}
UpdateBinsForDFT32( frequencies );
for( i = last_place; i != place_in_data_buffer; i = (i+1)%size_of_data_buffer )
{
int16_t ifr1 = (int16_t)( ((databuffer[i]) ) * 4095 );
HandleInt( ifr1 );
HandleInt( ifr1 );
}
UpdateOutputBins32();
last_place = place_in_data_buffer;
memcpy( backupbins, outbins, FIXBINS*4 );
}
#endif