Move the embedded stuff into architecture-specific folders

This commit is contained in:
cnlohr 2015-04-03 14:10:25 -04:00
parent 09be0f349b
commit f2a1086c97
11 changed files with 479 additions and 23 deletions

342
DFT32.c Normal file
View file

@ -0,0 +1,342 @@
#include "DFT32.h"
#include <string.h>
#ifndef CCEMBEDDED
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
static float * goutbins;
#endif
uint16_t embeddedbins32[FIXBINS];
//NOTES to self:
//
// Let's say we want to try this on an AVR.
// 24 bins, 5 octaves = 120 bins.
// 20 MHz clock / 4.8k sps = 4096 IPS = 34 clocks per bin = :(
// We can do two at the same time, this frees us up some
static uint8_t Sdonefirstrun;
//A table of precomputed sin() values. Ranging -1500 to +1500
//If we increase this, it may cause overflows elsewhere in code.
const int16_t Ssinonlytable[256] = {
0, 36, 73, 110, 147, 183, 220, 256,
292, 328, 364, 400, 435, 470, 505, 539,
574, 607, 641, 674, 707, 739, 771, 802,
833, 863, 893, 922, 951, 979, 1007, 1034,
1060, 1086, 1111, 1135, 1159, 1182, 1204, 1226,
1247, 1267, 1286, 1305, 1322, 1339, 1355, 1371,
1385, 1399, 1412, 1424, 1435, 1445, 1455, 1463,
1471, 1477, 1483, 1488, 1492, 1495, 1498, 1499,
1500, 1499, 1498, 1495, 1492, 1488, 1483, 1477,
1471, 1463, 1455, 1445, 1435, 1424, 1412, 1399,
1385, 1371, 1356, 1339, 1322, 1305, 1286, 1267,
1247, 1226, 1204, 1182, 1159, 1135, 1111, 1086,
1060, 1034, 1007, 979, 951, 922, 893, 863,
833, 802, 771, 739, 707, 674, 641, 607,
574, 539, 505, 470, 435, 400, 364, 328,
292, 256, 220, 183, 147, 110, 73, 36,
0, -36, -73, -110, -146, -183, -219, -256,
-292, -328, -364, -399, -435, -470, -505, -539,
-573, -607, -641, -674, -706, -739, -771, -802,
-833, -863, -893, -922, -951, -979, -1007, -1034,
-1060, -1086, -1111, -1135, -1159, -1182, -1204, -1226,
-1247, -1267, -1286, -1305, -1322, -1339, -1355, -1371,
-1385, -1399, -1412, -1424, -1435, -1445, -1454, -1463,
-1471, -1477, -1483, -1488, -1492, -1495, -1498, -1499,
-1500, -1499, -1498, -1495, -1492, -1488, -1483, -1477,
-1471, -1463, -1455, -1445, -1435, -1424, -1412, -1399,
-1385, -1371, -1356, -1339, -1322, -1305, -1286, -1267,
-1247, -1226, -1204, -1182, -1159, -1135, -1111, -1086,
-1060, -1034, -1007, -979, -951, -923, -893, -863,
-833, -802, -771, -739, -707, -674, -641, -608,
-574, -540, -505, -470, -435, -400, -364, -328,
-292, -256, -220, -183, -147, -110, -73, -37,};
/** The above table was created using the following code:
#include <math.h>
#include <stdio.h>
#include <stdint.h>
int16_t Ssintable[256]; //Actually, just [sin].
int main()
{
int i;
for( i = 0; i < 256; i++ )
{
Ssintable[i] = (int16_t)((sinf( i / 256.0 * 6.283 ) * 1500.0));
}
printf( "const int16_t Ssinonlytable[256] = {" );
for( i = 0; i < 256; i++ )
{
if( !(i & 0x7 ) )
{
printf( "\n\t" );
}
printf( "%6d," ,Ssintable[i] );
}
printf( "};\n" );
} */
uint16_t Sdatspace32A[FIXBINS*2]; //(advances,places)
int32_t Sdatspace32B[FIXBINS*2]; //(isses,icses)
int32_t Sdatspace32BOut[FIXBINS*2]; //(isses,icses)
//For
static uint8_t Sdo_this_octave[BINCYCLE];
static int32_t Saccum_octavebins[OCTAVES];
static uint8_t Swhichoctaveplace;
uint16_t embeddedbins[FIXBINS]; //This is updated every time the DFT hits the octavecount, or 1/32 updates.
//From: http://stackoverflow.com/questions/1100090/looking-for-an-efficient-integer-square-root-algorithm-for-arm-thumb2
/**
* \brief Fast Square root algorithm, with rounding
*
* This does arithmetic rounding of the result. That is, if the real answer
* would have a fractional part of 0.5 or greater, the result is rounded up to
* the next integer.
* - SquareRootRounded(2) --> 1
* - SquareRootRounded(3) --> 2
* - SquareRootRounded(4) --> 2
* - SquareRootRounded(6) --> 2
* - SquareRootRounded(7) --> 3
* - SquareRootRounded(8) --> 3
* - SquareRootRounded(9) --> 3
*
* \param[in] a_nInput - unsigned integer for which to find the square root
*
* \return Integer square root of the input value.
*/
static uint16_t SquareRootRounded(uint32_t a_nInput)
{
uint32_t op = a_nInput;
uint32_t res = 0;
uint32_t one = 1uL << 30; // The second-to-top bit is set: use 1u << 14 for uint16_t type; use 1uL<<30 for uint32_t type
// "one" starts at the highest power of four <= than the argument.
while (one > op)
{
one >>= 2;
}
while (one != 0)
{
if (op >= res + one)
{
op = op - (res + one);
res = res + 2 * one;
}
res >>= 1;
one >>= 2;
}
/* Do arithmetic rounding to nearest integer */
if (op > res)
{
res++;
}
return res;
}
void UpdateOutputBins32()
{
int i;
int * ipt = &Sdatspace32BOut[0];
for( i = 0; i < FIXBINS; i++ )
{
int16_t isps = *(ipt++)>>16;
int16_t ispc = *(ipt++)>>16;
int octave = i / FIXBPERO;
#ifndef CCEMBEDDED
uint32_t mux = ( (isps) * (isps)) + ((ispc) * (ispc));
goutbins[i] = sqrtf( (float)mux );
goutbins[i] /= (78<<DFTIIR)*(1<<octave); //reasonable (but arbitrary amplification)
#endif
uint32_t rmux = ( (isps) * (isps)) + ((ispc) * (ispc));
rmux = SquareRootRounded( rmux ) << 1; //bump it up so we don't lose a lot of detail at high freqs.
embeddedbins32[i] = rmux >> octave;
}
}
static void HandleInt( int16_t sample )
{
int i;
uint16_t adv;
uint8_t localipl;
uint8_t oct = Sdo_this_octave[Swhichoctaveplace];
Swhichoctaveplace ++;
Swhichoctaveplace &= BINCYCLE-1;
if( oct > 128 )
{
//Special: This is when we can update everything.
int32_t * bins = &Sdatspace32B[0];
int32_t * binsOut = &Sdatspace32BOut[0];
for( i = 0; i < FIXBINS; i++ )
{
//First for the SIN then the COS.
int32_t val = *(bins);
*(binsOut++) = val;
*(bins++) -= val>>DFTIIR;
val = *(bins);
*(binsOut++) = val;
*(bins++) -= val>>DFTIIR;
}
return;
}
for( i = 0; i < OCTAVES;i++ )
{
Saccum_octavebins[i] += sample;
}
uint16_t * dsA = &Sdatspace32A[oct*FIXBPERO*2];
int32_t * dsB = &Sdatspace32B[oct*FIXBPERO*2];
sample = Saccum_octavebins[oct]>>(OCTAVES-oct);
Saccum_octavebins[oct] = 0;
for( i = 0; i < FIXBPERO; i++ )
{
adv = *(dsA++);
localipl = *(dsA) >> 8;
*(dsA++) += adv;
*(dsB++) += (Ssinonlytable[localipl] * sample);
//Get the cosine (1/4 wavelength out-of-phase with sin)
localipl += 64;
*(dsB++) += (Ssinonlytable[localipl] * sample);
}
}
int SetupDFTProgressive32()
{
int i;
int j;
//Sdatspace = malloc(FIXBPERO*OCTAVES*8);
//memset(Sdatspace,0,FIXBPERO*OCTAVES*8);
//printf( "MS: %d\n", FIXBPERO*OCTAVES*8);
Sdonefirstrun = 1;
for( i = 0; i < BINCYCLE; i++ )
{
// Sdo_this_octave =
// 4 3 4 2 4 3 4 ...
//search for "first" zero
for( j = 0; j <= OCTAVES; j++ )
{
if( ((1<<j) & i) == 0 ) break;
}
if( j > OCTAVES )
{
#ifndef CCEMBEDDED
fprintf( stderr, "Error: algorithm fault.\n" );
exit( -1 );
#endif
return -1;
}
Sdo_this_octave[i] = OCTAVES-j-1;
}
return 0;
}
void UpdateBins32( const uint16_t * frequencies )
{
int i;
for( i = 0; i < FIXBINS; i++ )
{
uint16_t freq = frequencies[i%FIXBPERO];
Sdatspace32A[i*2] = freq;// / oneoveroctave;
}
}
void PushSample32( int16_t dat )
{
HandleInt( dat );
HandleInt( dat );
}
#ifndef CCEMBEDDED
void UpdateBinsForDFT32( const float * frequencies )
{
int i;
for( i = 0; i < FIXBINS; i++ )
{
float freq = frequencies[(i%FIXBPERO) + (FIXBPERO*(OCTAVES-1))];
Sdatspace32A[i*2] = (65536.0/freq);// / oneoveroctave;
}
}
#endif
#ifndef CCEMBEDDED
void DoDFTProgressive32( float * outbins, float * frequencies, int bins, const float * databuffer, int place_in_data_buffer, int size_of_data_buffer, float q, float speedup )
{
static float backupbins[FIXBINS];
int i;
static int last_place;
memset( outbins, 0, bins * sizeof( float ) );
goutbins = outbins;
memcpy( outbins, backupbins, FIXBINS*4 );
if( FIXBINS != bins )
{
fprintf( stderr, "Error: Bins was reconfigured. skippy requires a constant number of bins.\n" );
return;
}
//printf( "SKIPPY\n" );
if( !Sdonefirstrun )
{
SetupDFTProgressive32();
Sdonefirstrun = 1;
}
UpdateBinsForDFT32( frequencies );
for( i = last_place; i != place_in_data_buffer; i = (i+1)%size_of_data_buffer )
{
int16_t ifr1 = (int16_t)( ((databuffer[i]) ) * 4095 );
HandleInt( ifr1 );
HandleInt( ifr1 );
}
UpdateOutputBins32();
last_place = place_in_data_buffer;
memcpy( backupbins, outbins, FIXBINS*4 );
}
#endif

69
DFT32.h Normal file
View file

@ -0,0 +1,69 @@
#ifndef _DFT32_H
#define _DFT32_H
#include <stdint.h>
//A 32-bit version of the DFT used for ColorChord.
//This header makes it convenient to use for an embedded system.
//The 32-bit DFT avoids some bit shifts, however it uses slightly
//more RAM and it uses a lot of 32-bit arithmatic.
//
//This is basically a clone of "ProgressiveIntegerSkippy" and changes
//made here should be backported there as well.
//You can # define these to be other things elsewhere.
#ifndef OCTAVES
#define OCTAVES 5
#endif
#ifndef FIXBPERO
#define FIXBPERO 24
#endif
#ifndef FIXBINS
#define FIXBINS (FIXBPERO*OCTAVES)
#endif
#ifndef BINCYCLE
#define BINCYCLE (1<<OCTAVES)
#endif
//You may increase this past 5 but if you do, the amplitude of your incoming signal
//must decrease. Increasing this value makes responses slower. Lower values are
//more responsive.
#ifndef DFTIIR
#define DFTIIR 4
#endif
//Everything the integer one buys, except it only calculates 2 octaves worth of notes per audio frame.
//This is sort of working, but still have some quality issues.
//It would theoretically be fast enough to work on an AVR.
//NOTE: This is the only DFT available to the embedded port of ColorChord
#ifndef CCEMBEDDED
void DoDFTProgressive32( float * outbins, float * frequencies, int bins, const float * databuffer, int place_in_data_buffer, int size_of_data_buffer, float q, float speedup );
#endif
//It's actually split into a few functions, which you can call on your own:
int SetupDFTProgressive32(); //Call at start. Returns nonzero if error.
void UpdateBins32( const uint16_t * frequencies );
//Call this to push on new frames of sound.
//Though it accepts an int16, it actually only takes -4095 to +4095. (13-bit)
//Any more and you will exceed the accumulators and it will cause an overflow.
void PushSample32( int16_t dat );
#ifndef CCEMBEDDED
void UpdateBinsForDFT32( const float * frequencies ); //Update the frequencies
#endif
void UpdateOutputBins32();
//Whenever you need to read the bins, you can do it from here.
//These outputs are limited to 0..~2047, this makes it possible
//for you to process with uint16_t's more easily.
//This is updated every time the DFT hits the octavecount, or 1/32 updates.
extern uint16_t embeddedbins32[]; //[FIXBINS]
#endif

View file

@ -13,17 +13,10 @@ LDLIBS:=-lpthread -lasound -lm -lpulse-simple -lpulse
CFLAGS:=-g -Os -flto -Wall
EXTRALIBS:=-lusb-1.0
colorchord : os_generic.o main.o dft.o decompose.o filter.o color.o sort.o notefinder.o util.o outdrivers.o $(RAWDRAW) $(SOUND) $(OUTS) parameters.o chash.o
colorchord : os_generic.o main.o dft.o decompose.o filter.o color.o sort.o notefinder.o util.o outdrivers.o $(RAWDRAW) $(SOUND) $(OUTS) parameters.o chash.o DFT32.o
gcc -o $@ $^ $(CFLAGS) $(LDLIBS) $(EXTRALIBS) $(RAWDRAWLIBS)
embeddedcc : os_generic.c embeddedcc.c dft.c embeddednf.c
gcc -o $@ $^ $(CFLAGS) -DCCEMBEDDED $(LDFLAGS) $(EXTRALIBS) $(RAWDRAWLIBS)
runembedded : embeddedcc
parec --format=u8 --rate=8000 --channels=1 --device=alsa_output.pci-0000_00_1b.0.analog-stereo.monitor | ./embeddedcc
colorchord.exe : os_generic.c main.c dft.c decompose.c filter.c color.c sort.c notefinder.c util.c outdrivers.c DrawFunctions.c parameters.c chash.c WinDriver.c sound.c sound_null.c sound_win.c OutputVoronoi.c DisplayArray.c OutputLinear.c DisplayPie.c DisplayNetwork.c
colorchord.exe : os_generic.c main.c dft.c decompose.c filter.c color.c sort.c notefinder.c util.c outdrivers.c DrawFunctions.c parameters.c chash.c WinDriver.c sound.c sound_null.c sound_win.c OutputVoronoi.c DisplayArray.c OutputLinear.c DisplayPie.c DisplayNetwork.c DFT32.c
$(WINGCC) $(WINGCCFLAGS) -o $@ $^ $(WINLDFLAGS)

View file

@ -52,7 +52,8 @@ octaves = 5
# 1 = DFT Progressive
# 2 = DFT Progressive Integer
# 3 = DFT Progressive Integer Skippy
do_progressive_dft = 3
# 4 = Integer, 32-Bit, Progressive, Skippy.
do_progressive_dft = 4
filter_iter = 2
filter_strength = .5

10
dft.c
View file

@ -348,7 +348,7 @@ void DoDFTProgressiveInteger( float * outbins, float * frequencies, int bins, co
static uint8_t Sdonefirstrun;
//int8_t Ssintable[512]; //Actually [sin][cos] pairs.
const int8_t Ssintable[512] = {
static const int8_t Ssintable[512] = {
0, 127, 3, 126, 6, 126, 9, 126, 12, 126, 15, 126, 18, 125, 21, 125,
24, 124, 27, 123, 30, 123, 33, 122, 36, 121, 39, 120, 42, 119, 45, 118,
48, 117, 51, 116, 54, 114, 57, 113, 59, 112, 62, 110, 65, 108, 67, 107,
@ -397,7 +397,7 @@ int main()
Ssintable[i*2+1] = (int8_t)((cosf( i / 256.0 * 6.283 ) * 127.0));
}
printf( "const int8_t Ssintable[512] = {" );
printf( "static const int8_t Ssintable[512] = {" );
for( i = 0; i < 512; i++ )
{
if( !(i & 0xf ) )
@ -415,9 +415,9 @@ int main()
uint16_t Sdatspace[FIXBINS*4]; //(advances,places,isses,icses)
//For
uint8_t Sdo_this_octave[BINCYCLE];
int16_t Saccum_octavebins[OCTAVES];
uint8_t Swhichoctaveplace;
static uint8_t Sdo_this_octave[BINCYCLE];
static int16_t Saccum_octavebins[OCTAVES];
static uint8_t Swhichoctaveplace;
uint16_t embeddedbins[FIXBINS]; //This is updated every time the DFT hits the octavecount, or 1/32 updates.
//From: http://stackoverflow.com/questions/1100090/looking-for-an-efficient-integer-square-root-algorithm-for-arm-thumb2

7
dft.h
View file

@ -53,8 +53,13 @@ void Push8BitIntegerSkippy( int8_t dat ); //Call this to push on new frames of s
#define FIXBPERO 24
#endif
#ifndef FIXBINS
#define FIXBINS (FIXBPERO*OCTAVES)
#endif
#ifndef BINCYCLE
#define BINCYCLE (1<<OCTAVES)
#endif
//This variable determins how much to nerf the current sample of the DFT.
//I've found issues when this is smaller, but bigger values do have a negative
@ -65,7 +70,7 @@ void Push8BitIntegerSkippy( int8_t dat ); //Call this to push on new frames of s
//Whenever you need to read the bins, you can do it from here.
extern uint16_t Sdatspace[]; //(advances,places,isses,icses)
extern uint16_t embeddedbins[]; //This is updated every time the DFT hits the octavecount, or 1/32 updates.
extern uint16_t embeddedbins[]; //This is updated every time the DFT hits the octavecount, or every BINCYCLE updates.
#endif

View file

@ -57,7 +57,11 @@ void UpdateFreqs()
fbins[i] = ( 65536.0 ) / ( DFREQ ) * frq * 16;
}
#ifdef USE_32DFT
UpdateBins32( fbins );
#else
UpdateBinsForProgressiveIntegerSkippyInt( fbins );
#endif
}
void Init()
@ -82,7 +86,11 @@ void Init()
}
//Step 1: Initialize the Integer DFT.
#ifdef USE_32DFT
SetupDFTProgressive32();
#else
SetupDFTProgressiveIntegerSkippy();
#endif
//Step 2: Set up the frequency list. You could do this multiple times
//if you want to change the loadout of the frequencies.
@ -93,10 +101,18 @@ void HandleFrameInfo()
{
int i, j, k;
#ifdef USE_32DFT
uint16_t * strens;
UpdateOutputBins32();
strens = embeddedbins32;
#else
uint16_t * strens = embeddedbins;
#endif
//Copy out the bins from the DFT to our fuzzed bins.
for( i = 0; i < FIXBINS; i++ )
{
fuzzed_bins[i] = (fuzzed_bins[i] + (embeddedbins[i]>>FUZZ_IIR_BITS) -
fuzzed_bins[i] = (fuzzed_bins[i] + (strens[i]>>FUZZ_IIR_BITS) -
(fuzzed_bins[i]>>FUZZ_IIR_BITS));
}
@ -322,21 +338,22 @@ void HandleFrameInfo()
}
//We now have notes!!!
/*
#if 1
for( i = 0; i < MAXNOTES; i++ )
{
if( note_peak_freqs[i] == 255 ) continue;
printf( "(%3d %4d %4d) ", note_peak_freqs[i], note_peak_amps[i], note_peak_amps2[i] );
}
printf( "\n") ;
*/
#endif
/*
#if 0
for( i = 0; i < FIXBPERO; i++ )
{
printf( "%5d ", folded_bins[i] );
}
printf( "\n" );*/
printf( "\n" );
#endif
}

View file

@ -1,7 +1,9 @@
#ifndef _EMBEDDEDNF_H
#define _EMBEDDEDNF_H
#include "dft.h"
//Use a 32-bit DFT. It won't work for AVRs, but for any 32-bit systems where
//they can multiply quickly, this is the bees knees.
#define USE_32DFT
#define DFREQ 8000
#define BASE_FREQ 55.0 // You may make this a float.
@ -32,6 +34,13 @@
#define AMP_1_NERFING_BITS 5
#define AMP_2_NERFING_BITS 3
#ifdef USE_32DFT
#include "DFT32.h"
#else
#include "dft.h"
#endif
extern uint16_t folded_bins[]; //[FIXBPERO] <- The folded fourier output.
extern uint16_t fuzzed_bins[]; //[FIXBINS] <- The Full DFT after IIR, Blur and Taper

13
embeddedx86/Makefile Normal file
View file

@ -0,0 +1,13 @@
all : embeddedcc
CFLAGS:=-Ofast -DCCEMBEDDED -I.. -flto -m32
LDFLAGS:=-ffunction-sections -Wl,--gc-sections -fno-asynchronous-unwind-tables -Wl,--strip-all
embeddedcc : ../embeddednf.c ../DFT32.c embeddedcc.c
gcc -o $@ $^ $(CFLAGS) $(LDFLAGS)
runembedded : embeddedcc
parec --format=u8 --rate=8000 --channels=1 --device=alsa_output.pci-0000_00_1b.0.analog-stereo.monitor | ./embeddedcc
clean :
rm -rf embeddedcc *~

View file

@ -5,7 +5,6 @@
#include <stdio.h>
#include "embeddednf.h"
#include "dft.h"
int main()
{
@ -15,7 +14,11 @@ int main()
while( ( ci = getchar() ) != EOF )
{
int cs = ci - 0x80;
#ifdef USE_32DFT
PushSample32( ((int8_t)cs)*32 );
#else
Push8BitIntegerSkippy( (int8_t)cs );
#endif
//printf( "%d ", cs ); fflush( stdout );
wf++;
if( wf == 64 )

View file

@ -9,6 +9,7 @@
#include "filter.h"
#include "decompose.h"
#include "sort.h"
#include "DFT32.h"
struct NoteFinder * CreateNoteFinder( int spsRec )
{
@ -193,6 +194,9 @@ void RunNoteFinder( struct NoteFinder * nf, const float * audio_stream, int head
case 3:
DoDFTProgressiveIntegerSkippy( dftbins, nf->frequencies, freqs, audio_stream, head, buffersize, nf->dft_q, nf->dft_speedup );
break;
case 4:
DoDFTProgressive32( dftbins, nf->frequencies, freqs, audio_stream, head, buffersize, nf->dft_q, nf->dft_speedup );
break;
default:
fprintf( stderr, "Error: No DFT Seleced\n" );
}