diff --git a/colorchord2/Makefile b/colorchord2/Makefile index f393fc3..1299852 100644 --- a/colorchord2/Makefile +++ b/colorchord2/Makefile @@ -17,7 +17,7 @@ LDLIBS:=-lpthread -lasound -lm -lpulse-simple -lpulse -ludev -lrt CFLAGS:=-g -O0 -flto -Wall -ffast-math -I../embeddedcommon -I. -DICACHE_FLASH_ATTR= EXTRALIBS:=-lusb-1.0 -colorchord : os_generic.o main.o dft.o decompose.o filter.o color.o notefinder.o util.o outdrivers.o $(RAWDRAW) $(SOUND) $(OUTS) parameters.o chash.o hook.o ../embeddedcommon/DFT32.o configs.o +colorchord : os_generic.o main.o dft.o decompose.o filter.o color.o notefinder.o util.o outdrivers.o $(RAWDRAW) $(SOUND) $(OUTS) parameters.o chash.o hook.o ../embeddedcommon/DFT32.o configs.o ../embeddedcommon/DFT8Turbo.o ../embeddedcommon/DFT8Padauk.o gcc -o $@ $^ $(CFLAGS) $(LDLIBS) $(EXTRALIBS) $(RAWDRAWLIBS) @@ -26,4 +26,4 @@ colorchord.exe : os_generic.c main.c dft.c decompose.c filter.c color.c notefin clean : - rm -rf *.o *~ colorchord colorchord.exe embeddedcc + rm -rf *.o *~ ../embeddedcommon/*.o colorchord colorchord.exe embeddedcc diff --git a/colorchord2/colorchord.exe b/colorchord2/colorchord.exe deleted file mode 100644 index 7afcc43..0000000 Binary files a/colorchord2/colorchord.exe and /dev/null differ diff --git a/colorchord2/default.conf b/colorchord2/default.conf index c8d2a9d..297c676 100644 --- a/colorchord2/default.conf +++ b/colorchord2/default.conf @@ -58,8 +58,9 @@ octaves = 5 # 1 = DFT Progressive # 2 = DFT Progressive Integer # 3 = DFT Progressive Integer Skippy -# 4 = Integer, 32-Bit, Progressive, Skippy. -do_progressive_dft = 4 +# 4 = Integer, 32-Bit, Progressive, Skippy. (wow, this actually works) +# 5 = 8-bit turbo test. +do_progressive_dft = 5 filter_iter = 2 diff --git a/colorchord2/main.c b/colorchord2/main.c index adc00c6..02f3a5c 100644 --- a/colorchord2/main.c +++ b/colorchord2/main.c @@ -54,6 +54,7 @@ float cpu_autolimit_interval = 0.016; REGISTER_PARAM( cpu_autolimit_interval, P int sample_channel = -1;REGISTER_PARAM( sample_channel, PAINT ); int showfps = 0; REGISTER_PARAM( showfps, PAINT ); float in_amplitude = 1; REGISTER_PARAM( in_amplitude, PAFLOAT ); +int shim_sinewave = 0; REGISTER_PARAM( shim_sinewave, PAINT ); struct NoteFinder * nf; @@ -96,6 +97,9 @@ void HandleMotion( int x, int y, int mask ) void SoundCB( float * out, float * in, int samplesr, int * samplesp, struct SoundDriver * sd ) { + static og_sema_t tss; + if( !tss ) tss = OGCreateSema(); + else OGLockSema( tss ); int channelin = sd->channelsRec; // int channelout = sd->channelsPlay; //*samplesp = 0; @@ -106,53 +110,90 @@ void SoundCB( float * out, float * in, int samplesr, int * samplesp, struct Soun int i; int j; - for( i = 0; i < samplesr; i++ ) + if( out ) { - if( out ) + for( i = 0; i < samplesr; i++ ) { for( j = 0; j < channelin; j++ ) { out[i*channelin+j] = 0; } } + } - if( sample_channel < 0 ) - { - float fo = 0; - for( j = 0; j < channelin; j++ ) - { - float f = in[i*channelin+j]; - if( f >= -1 && f <= 1 ) - { - fo += f; - } - else - { - fo += (f>0)?1:-1; -// printf( "Sound fault A %d/%d %d/%d %f\n", j, channelin, i, samplesr, f ); - } - } + if( shim_sinewave ) + { + static double sinplace; + static double sinfreq = 0; + static int msp; - fo /= channelin; - sound[soundhead] = fo*in_amplitude; - soundhead = (soundhead+1)%SOUNDCBSIZE; - } - else + for( i = 0; i < samplesr; i++ ) { - float f = in[i*channelin+sample_channel]; + sinfreq = 3.14159 * 2 * 110 * pow( 2, 5.0/12 ) / 16000; +// sinfreq += .000001; +// if( sinfreq > .2 ) sinfreq = 0; + sinplace += sinfreq; + if( sinplace > (3.14159*2) ) sinplace -= 3.14159 * 2; + + msp++; + float f = sin( sinplace ); + //if( msp % 20000 > 10000 ) f = 0; if( f > 1 || f < -1 ) { f = (f>0)?1:-1; } - //printf( "Sound fault B %d/%d\n", i, samplesr ); sound[soundhead] = f*in_amplitude; soundhead = (soundhead+1)%SOUNDCBSIZE; - } } + else + { + if( sample_channel < 0 ) + { + for( i = 0; i < samplesr; i++ ) + { + float fo = 0; + for( j = 0; j < channelin; j++ ) + { + float f = in[i*channelin+j]; + if( f >= -1 && f <= 1 ) + { + fo += f; + } + else + { + fo += (f>0)?1:-1; + // printf( "Sound fault A %d/%d %d/%d %f\n", j, channelin, i, samplesr, f ); + } + } + + fo /= channelin; + sound[soundhead] = fo*in_amplitude; + soundhead = (soundhead+1)%SOUNDCBSIZE; + } + } + else + { + for( i = 0; i < samplesr; i++ ) + { + float f = in[i*channelin+sample_channel]; + + if( f > 1 || f < -1 ) + { + f = (f>0)?1:-1; + } + + + //printf( "Sound fault B %d/%d\n", i, samplesr ); + sound[soundhead] = f*in_amplitude; + soundhead = (soundhead+1)%SOUNDCBSIZE; + } + } + } + SoundEventHappened( samplesr, in, 0, channelin ); if( out ) @@ -160,6 +201,8 @@ void SoundCB( float * out, float * in, int samplesr, int * samplesp, struct Soun SoundEventHappened( samplesr, out, 1, sd->channelsPlay ); } *samplesp = samplesr; + OGUnlockSema( tss ); + } int main(int argc, char ** argv) diff --git a/colorchord2/notefinder.c b/colorchord2/notefinder.c index f0e1178..02bf0b7 100644 --- a/colorchord2/notefinder.c +++ b/colorchord2/notefinder.c @@ -11,6 +11,8 @@ #include "filter.h" #include "decompose.h" #include "DFT32.h" +#include "DFT8Turbo.h" +#include "DFT8Padauk.h" struct NoteFinder * CreateNoteFinder( int spsRec ) { @@ -199,6 +201,12 @@ void RunNoteFinder( struct NoteFinder * nf, const float * audio_stream, int head case 4: DoDFTProgressive32( dftbins, nf->frequencies, freqs, audio_stream, head, buffersize, nf->dft_q, nf->dft_speedup ); break; + case 5: + DoDFT8BitTurbo( dftbins, nf->frequencies, freqs, audio_stream, head, buffersize, nf->dft_q, nf->dft_speedup ); + break; + case 6: + DoDFT8BitPadauk( dftbins, nf->frequencies, freqs, audio_stream, head, buffersize, nf->dft_q, nf->dft_speedup ); + break; default: fprintf( stderr, "Error: No DFT Seleced\n" ); } diff --git a/colorchord2/turbo8bit.conf b/colorchord2/turbo8bit.conf new file mode 100644 index 0000000..039af99 --- /dev/null +++ b/colorchord2/turbo8bit.conf @@ -0,0 +1,95 @@ +# This is the configuration file for colorchord. +# Most values are already defaulted in the software. +# This file is constantly checked for new versions. +# \r, and ; are used as terminators, so you can put +# multiple entries on the same line. + +#Whether to limit the control loop to ~60ish FPS. +cpu_autolimit = 1 + +#General GUI properties. +title = PA Test +set_screenx = 720 +set_screeny = 480 + +#Sound properties. +buffer = 384 +play = 0 +rec = 1 +channels = 2 + + + +# THis matters for CC Turbo8 +# What is the base note? I.e. the lowest note. +# Note that it won't have very much impact until an octave up though! + +#These two are carefully selected. You should pick a base note such that it fully saturates the sample frequency. +#10000 / 2^4{octaves} / 8 +base_hz = 82.41 +samplerate = 10000 +freqbins = 12 +octaves = 4 +do_progressive_dft=6 + + +slope = 0 +wininput = -1 + +#Compiled version will default this. +#sound_source = ALSA +#-1 indicates left and right, 0 left, 1 right. + +sample_channel = -1 +sourcename = default +#alsa_output.pci-0000_00_1f.3.analog-stereo.monitor +#default +# alsa_output.pci-0000_00_1b.0.analog-stereo.monitor +#alsa_output.pci-0000_00_1f.3.analog-stereo.monitor << New laptop +#use pactl list | grep pci- | grep monitor + +################################## +# General ColorChord properties. # +################################## + +# How much to amplify the incoming signal. +amplify = 2.0 + + +# This is only used when dealing with the slow decompose (now defunct) +# decompose_iterations = 1000 +# default_sigma = 1.4000 + + +# For the final note information... How much to slack everything? +note_attach_amp_iir = 0.3500 +note_attach_amp_iir2 = 0.250 +note_attach_freq_iir = 0.3000 + +#How many bins a note can jump from frame to frame to be considered a slide. +#this is used to prevent notes from popping in and out a lot. +note_combine_distance = 0.5000 +note_jumpability = 1.8000 +note_minimum_new_distribution_value = 0.0200 +note_out_chop = 0.05000 + +#compress_coefficient = 4.0 +#compress_exponent = .5 + + +#======================================================================= +#Outputs + + +shim_sinewave = 0 + +This is a vornoi thing: +outdrivers = OutputVoronoi, DisplayArray +lightx = 64 +lighty = 32 +fromsides = 1 +shape_cutoff = 0.03 +satamp = 5.000 +amppow = 2.510 +distpow = 1.500 + diff --git a/embedded8266/esp82xx b/embedded8266/esp82xx index a08b471..113e0d1 160000 --- a/embedded8266/esp82xx +++ b/embedded8266/esp82xx @@ -1 +1 @@ -Subproject commit a08b47184b3fcf04172ecc0b6a1aee9c90e5d92d +Subproject commit 113e0d1a182cd138510f748abf2854c0e84cfa23 diff --git a/embeddedcommon/DFT12Small.c b/embeddedcommon/DFT12Small.c new file mode 100644 index 0000000..6d04241 --- /dev/null +++ b/embeddedcommon/DFT12Small.c @@ -0,0 +1,346 @@ +//NOTE DO NOT EDIT THIS FILE WITHOUT ALSO EDITING DFT8TURBO!!! + +#include +#include +#include "DFT12Small.h" +#include + +#include + + +#define MAX_FREQS (12) +#define OCTAVES (4) + +/* + General procedure - use this code, with uint16_t or uint32_t buffers, and make sure none of the alarms go off. + All of the paths still require no more than an 8-bit multiply. + You should test with extreme cases, like square wave sweeps in, etc. +*/ + +//#define TWELVEBIT +#define EIGHTBIT + +#ifdef TWELVEBIT +//No larger than 12-bit signed values for integration or sincos +#define FRONTEND_AMPLITUDE (0) +#define INITIAL_DECIMATE (2) +#define INTEGRATOR_DECIMATE (8) +#define FINAL_DECIMATE (4) +#elif defined( EIGHTBIT ) +//No larger than 8-bit signed values for integration or sincos +#define FRONTEND_AMPLITUDE (2) +#define INITIAL_DECIMATE (5) //Yurgh... only 3 bits of ADC data. That's 8 unique levels :( +#define INTEGRATOR_DECIMATE (8) +#define FINAL_DECIMATE (1) +#endif + + +//4x the hits (sin/cos and we need to do it once for each edge) +//8x for selecting a higher octave. +#define FREQREBASE 8.0 +#define TARGFREQ 10000.0 + +/* Tradeoff guide: + + * We will optimize for RAM size here. + + * INITIAL_DECIMATE; A larger decimation: {NOTE 1} + +) Reduces the bit depth needed for the integral map. + If you use "1" and a fully saturted map (highest note is every sample), it will not overflow a signed 12-bit number. + -) Increases noise. + With full-scale: 0->1 minimal 1->2 minimal 2->3 significantly noticable, 3->4 major. + If sound is quieter, it matters more. Not sure with other changes in system. (2) seems ok. + -) If you make it (1) or (0) You can't do an 8-bit multiply and keep the output in a signed range. + Also, other things, like frequency of hits can manipulate the maximum bit depth needed for integral map. + + * If you weight the bins in advance see "mulmux", you can: {NOTE 2} + +) potentially use shallower bit depth but + -) have to compute the multiply every time you update the bin. + + * You can use a modified-square-wave which only integrates for 1/2 of the duty cycle. {NOTE 3} + +) uses 1/2 the integral memory. + -) Not as pretty of an output. See "integral_at" + + *TODO: Investigate using all unsigned (to make multiply and/or 12-bit storage easier) + *TODO: Consider a mode which has 16-bit integrals, but still 8-bit cossin data. + + So, the idea here is we would keep a running total of the current ADC value, kept away in a int16_t. + It is constantly summing, so we can take an integral of it. Or rather an integral range. + + Over time, we perform operations like adding or subtracting from a current place. It basically is + a DFT where the kernel is computed using square waves (or modified square waves) +*/ + +//These live in RAM. +int16_t running_integral; //Realistically treat as 12-bits on ramjet8 +int16_t integral_at[MAX_FREQS*OCTAVES]; //For ramjet8, make 12-bits +int32_t cossindata[MAX_FREQS*OCTAVES*2]; //Contains COS and SIN data. (32-bit for now, will be 16-bit, potentially even 8.) +uint8_t which_octave_for_op[MAX_FREQS]; //counts up, tells you which ocative you are operating on. PUT IN RAM. +uint8_t actiontableplace; + +#define NR_OF_OPS (4< hits per %d: %f %d (%.2f%% error)\n", topbin, f, ACTIONTABLESIZE, (float)ACTIONTABLESIZE/f, dhrpertable, err * 100.0 ); + if( dhrpertable >= ACTIONTABLESIZE ) + { + fprintf( stderr, "Error: Too many hits.\n" ); + exit(0); + } + + float advance_per_step = dhrpertable/(float)ACTIONTABLESIZE; + float fvadv = 0.5; + int j; + int countset = 0; + + //Tricky: We need to start fadv off at such a place that there won't be a hicchup when going back around to 0. + // I believe this is done by setting fvadv to 0.5 initially. Unsure. + + for( j = 0; j < ACTIONTABLESIZE; j++ ) + { + if( fvadv >= 0.5 ) + { + actiontable[j] |= 1<> longestzeroes) & 1) == 0 ); longestzeroes++ ); + //longestzeroes goes: 255, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, ... + //This isn't great, because we need to also know whether we are attacking the SIN side or the COS side, and if it's + or -. + //We can actually decide that out. + + if( longestzeroes == 255 ) + { + //This is a nop. Emit a nop. + optable[i] = 255; + } + else + { + longestzeroes = OCTAVES-1-longestzeroes; //Actually do octave 0 least often. + int iop = phaseinop[longestzeroes]++; + int toop = longestzeroes; + int toopmon = (longestzeroes<<1) | (iop & 1); + + //if it's the first time an octave happened this set, flag it. This may be used later in the process. + if( !already_hit_octaveplace[toopmon] ) + { + already_hit_octaveplace[toopmon] = 1; + toop |= 1<<5; + } + if( iop & 1 ) + { + toop |= 1<<6; + } + + //Handle add/subtract bit. + if( iop & 2 ) toop |= 1<<4; + + optable[i] = toop; + + //printf( " %d %d %d\n", iop, val, longestzeroes ); + } + //printf( "HBT: %d = %d\n", i, optable[i] ); + } + //exit(1); + + return 0; +} + + +void Small12BitRun( int8_t adcval ) +{ + int16_t adcv = adcval; + adcv *= FRONTEND_AMPLITUDE; + if( adcv > 127 ) adcv = 127; + if( adcv < -128 ) adcv = -128; + running_integral += adcv>>INITIAL_DECIMATE; + + uint32_t action = actiontable[actiontableplace++]; + int n; + for( n = 0; n < MAX_FREQS; n++, action>>=1 ) + { + if( !( action & 1 ) ) continue; + + int ao = which_octave_for_op[n]; + ao++; + if( ao >= NR_OF_OPS ) ao = 0; + which_octave_for_op[n] = ao; + + int op = optable[ao]; + + if( op == 255 ) + continue; + + //int octaveplace = op & 0xf; + + //Tricky: We share the integral with SIN and COS. + //We don't need to. It would produce a slightly cleaner signal. See: NOTE 3 + uint8_t octave = op & 0xf; + uint8_t intindex = octave * MAX_FREQS + n; + + //int invoct = OCTAVES-1-octaveplace; + int16_t diff; + + if( op & 0x10 ) //ADD + { + diff = integral_at[intindex] - running_integral; + } + else //SUBTRACT + { + diff = running_integral - integral_at[intindex]; + } + + integral_at[intindex] = running_integral; + +#ifdef TWELVEBIT + if( diff > 2000 || diff < -2000 ) printf( "!!!!!!!!!!!! %d !!!!!!!!!!!\n", diff ); +#elif defined( EIGHTBIT ) + if( diff > 124 || diff < -124 ) printf( "!!!!!!!!!!!! %d !!!!!!!!!!!\n", diff ); +#endif + + //uint8_t idx = ( intindex << 1 ); + intindex<<=1; + + if( op&(1<<6) ) + { + intindex |= 1; + } + + //printf( "%d: %d + %d * %d >> 8 - %d\n", intindex, cossindata[intindex], diff, mulmux[intindex/2], cossindata[intindex]>>4 ); + + uint8_t mulmuxval = mulmux[n]; + + + //Do you live on a super lame processor? {NOTE 4} + //If you do, you might not have good signed multiply operations. So, an alternative mechanism is found here. + // +) Able to more cleanly crush to an 8-bit multiply. + // +) Gets extra bit of precision back, i.e. the sign bit is now used as a data bit. + // -) More than 1 line of C code. Requires possible double invert. +#if 1 + //Terrible processor, i.e. PMS133 + if( 0 && diff < 0 ) + { + diff *= -1; + diff >>= (OCTAVES-1-octave); + + if( diff > 250 ) printf( "!!!!!!!**** %d ****!!!!!!!\n", diff ); + + diff = (uint16_t)diff * (uint16_t)mulmuxval; + diff >>= INTEGRATOR_DECIMATE; + + diff *= -1; + } + else + { + diff >>= (OCTAVES-1-octave); + + if( diff > 250 ) printf( "!!!!!!!**** %d ****!!!!!!!\n", diff ); + + diff = (uint16_t)diff * (uint16_t)mulmuxval; + diff >>= INTEGRATOR_DECIMATE; + } +#else + //Decent processor, i.e. ATTiny85. + diff = ((diff>>(OCTAVES-1-octave)) * mulmuxval ) >> 6; +#endif + cossindata[intindex] = cossindata[intindex] + + diff + - (cossindata[intindex]>>4) + ; + +#ifdef EIGHTBIT + if( cossindata[intindex] > 0 ) cossindata[intindex]--; + if( cossindata[intindex] < 0 ) cossindata[intindex]++; +#endif + } + +} + + +void DoDFT12BitSmall( float * outbins, float * frequencies, int bins, const float * databuffer, int place_in_data_buffer, int size_of_data_buffer, float q, float speedup ) +{ + static int is_setup; + if( !is_setup ) { is_setup = 1; Setup( frequencies, bins ); } + static int last_place; + int i; + + for( i = last_place; i != place_in_data_buffer; i = (i+1)%size_of_data_buffer ) + { + int16_t ifr1 = (int16_t)( ((databuffer[i]) ) * 4095 ); + Small12BitRun( ifr1>>5 ); //5 = Actually only feed algorithm numbers from -128 to 127. + } + last_place = place_in_data_buffer; + + static int idiv; + idiv++; +#if 1 + for( i = 0; i < bins; i++ ) + { + int iss = cossindata[i*2+0]>>FINAL_DECIMATE; + int isc = cossindata[i*2+1]>>FINAL_DECIMATE; + int mux = iss * iss + isc * isc; + + if( mux <= 0 ) + { + outbins[i] = 0; + } + else + { + outbins[i] = sqrt((float)mux)/50.0; + +#ifdef TWELVEBIT + if( abs( cossindata[i*2+0] ) > 1000 || abs( cossindata[i*2+1] ) > 1000 ) + printf( "CS OVF %d/%d/%d/%f\n", i, cossindata[i*2+0], cossindata[i*2+1],outbins[i] ); +#elif defined( EIGHTBIT ) + if( abs( cossindata[i*2+0] ) > 120 || abs( cossindata[i*2+1] ) > 120 ) + printf( "CS OVF %d/%d/%d/%f\n", i, cossindata[i*2+0], cossindata[i*2+1],outbins[i] ); +#endif + } + } +#endif +} + + diff --git a/embeddedcommon/DFT12Small.h b/embeddedcommon/DFT12Small.h new file mode 100644 index 0000000..13506e6 --- /dev/null +++ b/embeddedcommon/DFT12Small.h @@ -0,0 +1,9 @@ +#ifndef _DFT8TURBO_H +#define _DFT8TURBO_H + +/* Note: Frequencies must be precompiled. */ + +void DoDFT12BitSmall( float * outbins, float * frequencies, int bins, const float * databuffer, int place_in_data_buffer, int size_of_data_buffer, float q, float speedup ); + +#endif + diff --git a/embeddedcommon/DFT32.c b/embeddedcommon/DFT32.c index 21df4dd..b587c6c 100644 --- a/embeddedcommon/DFT32.c +++ b/embeddedcommon/DFT32.c @@ -353,6 +353,3 @@ void DoDFTProgressive32( float * outbins, float * frequencies, int bins, const f #endif - - - diff --git a/embeddedcommon/DFT8Padauk.c b/embeddedcommon/DFT8Padauk.c new file mode 100644 index 0000000..0194799 --- /dev/null +++ b/embeddedcommon/DFT8Padauk.c @@ -0,0 +1,360 @@ +//NOTE DO NOT EDIT THIS FILE WITHOUT ALSO EDITING DFT12SMALL!!! +//WARNING: DFT8Turbo, DFT12Small is currently the only one that's actually working. +//THIS FILE DOES NOT CURRENTLY WORK. + +#include +#include +#include "DFT8Turbo.h" +#include + +#include + +#define MAX_FREQS (12) +#define OCTAVES (4) +/* Backporting notes: + * Change loop to only check if the output table says it's complete. + * Pre-multiply octaves in optable. +*/ + +/* + General procedure - use this code, with uint16_t or uint32_t buffers, and make sure none of the alarms go off. + All of the paths still require no more than an 8-bit multiply. + You should test with extreme cases, like square wave sweeps in, etc. +*/ + +//No larger than 8-bit signed values for integration or sincos +#define FRONTEND_AMPLITUDE (2) +#define INITIAL_DECIMATE (5) //Yurgh... only 3 bits of ADC data. That's 8 unique levels :( +#define INTEGRATOR_DECIMATE (8) +#define FINAL_DECIMATE (1) + + +#define OPTABLETYPE uint16_t //Make uint8_t if on attiny. + +//4x the hits (sin/cos and we need to do it once for each edge) +//8x for selecting a higher octave. +#define FREQREBASE 8.0 +#define TARGFREQ 10000.0 + +//These live in RAM. +int8_t running_integral; //Realistically treat as 12-bits on ramjet8 +int8_t integral_at[MAX_FREQS*OCTAVES]; //For ramjet8, make 12-bits +int8_t cossindata[MAX_FREQS*OCTAVES*2]; //Contains COS and SIN data. (32-bit for now, will be 16-bit, potentially even 8.) +uint8_t which_octave_for_op[MAX_FREQS]; //counts up, tells you which ocative you are operating on. PUT IN RAM. +uint8_t actiontableplace; + +#define NR_OF_OPS (4< hits per %d: %f %d (%.2f%% error)\n", topbin, f, ACTIONTABLESIZE, (float)ACTIONTABLESIZE/f, dhrpertable, err * 100.0 ); + if( dhrpertable >= ACTIONTABLESIZE ) + { + fprintf( stderr, "Error: Too many hits.\n" ); + exit(0); + } + + float advance_per_step = dhrpertable/(float)ACTIONTABLESIZE; + float fvadv = 0.5; + int j; + int countset = 0; + + //Tricky: We need to start fadv off at such a place that there won't be a hicchup when going back around to 0. + // I believe this is done by setting fvadv to 0.5 initially. Unsure. + + for( j = 0; j < ACTIONTABLESIZE; j++ ) + { + if( fvadv >= 0.5 ) + { + actiontable[j] |= 1<<(MAX_FREQS-1-topbin); //XXX-DEPARTURE (reversing the table symbols) + fvadv -= 1.0; + countset++; + } + fvadv += advance_per_step; + } + printf( " countset: %d\n", countset ); + } + //exit(1); + + + int phaseinop[OCTAVES] = { 0 }; + int already_hit_octaveplace[OCTAVES*2] = { 0 }; + for( i = 0; i < NR_OF_OPS; i++ ) + { + int longestzeroes = 0; + int val = i & ((1<> longestzeroes) & 1) == 0 ); longestzeroes++ ); + //longestzeroes goes: 255, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, ... + //This isn't great, because we need to also know whether we are attacking the SIN side or the COS side, and if it's + or -. + //We can actually decide that out. + + if( longestzeroes == 255 ) + { + //This is a nop. Emit a nop. + optable[i] = 65535; + } + else + { + longestzeroes = OCTAVES-1-longestzeroes; //Actually do octave 0 least often. + int iop = phaseinop[longestzeroes]++; + int toop = longestzeroes; + int toopmon = (longestzeroes<<1) | (iop & 1); + + //if it's the first time an octave happened this set, flag it. This may be used later in the process. + if( !already_hit_octaveplace[toopmon] ) + { + already_hit_octaveplace[toopmon] = 1; + toop |= 1<<5; + } + + if( iop & 1 ) + { + toop |= 1<<6; + } + + //Handle add/subtract bit. + if( iop & 2 ) toop |= 1<<4; + + optable[i] = toop | ((longestzeroes*MAX_FREQS*2+(iop & 1))<<8); + + //printf( " %d %d %d\n", iop, val, longestzeroes ); + } + //printf( "HBT: %d = %d\n", i, optable[i] ); + } + //exit(1); + + return 0; +} + + +static uint16_t action; +static uint8_t note; +static uint8_t * memptr; +static uint16_t * romptr; +static uint8_t op; +static uint8_t note_offset; //Offset of current note. +static uint8_t octave; +static uint8_t intindex; +static int8_t diff; +static uint8_t tmp; + +void Padauk8BitRun( int8_t adcval ) +{ + int16_t adcv = adcval; + adcv *= FRONTEND_AMPLITUDE; + if( adcv > 127 ) adcv = 127; + if( adcv < -128 ) adcv = -128; + running_integral += adcv>>INITIAL_DECIMATE; + + uint8_t acc; + uint8_t * accM; + uint8_t mul2; + + action = actiontable[actiontableplace++]; + + //Counts are approximate counts for PMS133 + + for( note = MAX_FREQS; + note; //1CYC/PAIRED + note--, //1CYC/PAIRED (dzsn) + action>>=1 //2CYC (slc x2) + ) + { + //Everything inside this loop is executed ~3/4 * MAX_FREQS per audio sample. so.. ~9x. + //If op @ 4MHz, we get 44 cycles in here. I don't think we can do it. + + //If no operation is scheduled, continue. + if( !( action & 1 ) ) continue; //1CYC + + accM = which_octave_for_op - 1; //1CYC + accM = accM + note; //1CYC + //accM now points to the memory address containing which step we're on. + //We can use that to figure out which octave we should operate with. + memptr = accM; //1CYC + acc = *memptr; //2CYC (idxm) + acc++; //1CYC + //acc now contains the actual place we are indexing off of. + //If it overflows, be sure to reset it. + if( acc == NR_OF_OPS+1 ) + { + acc = 1; + continue; + } + //We then update the memory with the new data. + *memptr = acc; //2CYC (idxm) + + //Now, we look up in optable what we're supposed to do. + accM = ((uint8_t*)optable) + acc*2; //1CYC -> ROM dad is stored in word pairs. + romptr = (uint16_t*)accM; //1CYC + acc = *romptr; //2CYC (ldtabl) + + //If we are on the one operation we aren't supposed to operate within, we should cancel and loop around. + //XXX XXX XXX XXX XXX This is wrong. We should probably handle this logic above. + //XXX XXX XXX XXX XXX Logic handled above. XXX PICK UP HERE!!! + printf( "+ %d %d %d\n", note, acc, *memptr ); + //if( acc == 255 ) //2CYC + //{ + // //This way, when we loop back around, it will be at index 0, and everything should flow gracefully. + // *memptr = 255; + // continue; + //} + if( acc == 255 ) + { + //We dun goofed. + fprintf( stderr, "Goofed.\n" ); + exit( 0 ); + } + + //This actually reads the current octave specifier into "op" + //BIT7: add or subtract + //BIT6: reset + //BIT5: Even or odd? + //BITS 0..4 = Which octave. + op = acc; //1CYC + + acc = (*romptr)>>8; //2CYC (ldtabh) -> Contains memory offset of which note to use. + note_offset = acc; + acc = acc + note; //1CYC + accM = (uint8_t*)integral_at-1 + acc; //1CYC + memptr = accM; //1CYC + acc = *memptr; //2CYC idxm + + //acc now contains the running integral of the last time we were on this cell. + if( op & (1<<7) ) //ADD //2CYC + { + acc = acc - running_integral; //1CYC + } + else //SUBTRACT + { + tmp = acc; //1CYC + acc = running_integral; //1CYC + acc = acc - tmp; //1CYC + } + + diff = acc; //1CYC + + //Assume 2 extra cycles of overhead for if/else. //2 CYC + + acc = running_integral; //1CYC + //Store the current running integral back into this note's running integral for next time. + *memptr = acc; //2CYC + + // op = info about what op we're on. WARNING: Bitfield. + // diff = how much to add to current value. + // note_offset = index of current operative note position. + octave = op & 0x1f; //XXX TODO + + printf( "%d %d %d %d\n", op, diff, note_offset, octave ); + accM = (uint8_t*)(mulmux - 1); //1CYC + accM = accM + note*2; //1CYC + romptr = accM; //1CYC + acc = *romptr; //2CYC + mul2 = acc; //1CYC + + if( diff < 0 ) //[2CYC] (t0sn on MSB) + { + diff *= -1; //[1CYC] (neg M) + diff >>= (OCTAVES-1-octave); // ???TRICKY??? Should this be a multiply? + + //if( diff > 250 ) printf( "!!!!!!!**** %d ****!!!!!!!\n", diff ); + + diff = ((uint16_t)diff * (uint16_t)mul2)>>INTEGRATOR_DECIMATE; //[3CYC] + diff *= -1; //[1CYC] + } + else + { + diff >>= (OCTAVES-1-octave); + //if( diff > 250 ) printf( "!!!!!!!**** %d ****!!!!!!!\n", diff ); + diff = ((uint16_t)diff * (uint16_t)mul2)>>INTEGRATOR_DECIMATE; + } + + //@48 cycles :( :( :( + + //printf( "%d\n", diff ); + + int8_t tmp = + cossindata[intindex] //[3CYC] + + diff //[1CYC] + - (cossindata[intindex]>>4) //[2CYC] + ; + + if( tmp > 0 ) tmp--; //2CYC + if( tmp < 0 ) tmp++; //2CYC + cossindata[intindex] = tmp; //2CYC + //60ish cycles :( :( :( + } +} + + +void DoDFT8BitPadauk( float * outbins, float * frequencies, int bins, const float * databuffer, int place_in_data_buffer, int size_of_data_buffer, float q, float speedup ) +{ + static int is_setup; + if( !is_setup ) { is_setup = 1; Setup( frequencies, bins ); } + static int last_place; + int i; + + for( i = last_place; i != place_in_data_buffer; i = (i+1)%size_of_data_buffer ) + { + int16_t ifr1 = (int16_t)( ((databuffer[i]) ) * 4095 ); + Padauk8BitRun( ifr1>>5 ); //5 = Actually only feed algorithm numbers from -128 to 127. + } + last_place = place_in_data_buffer; + + static int idiv; + idiv++; +#if 1 + for( i = 0; i < bins; i++ ) + { + int iss = cossindata[i*2+0]>>FINAL_DECIMATE; + int isc = cossindata[i*2+1]>>FINAL_DECIMATE; + int mux = iss * iss + isc * isc; + + if( mux <= 0 ) + { + outbins[i] = 0; + } + else + { + outbins[i] = sqrt((float)mux)/50.0; + + if( abs( cossindata[i*2+0] ) > 120 || abs( cossindata[i*2+1] ) > 120 ) + printf( "CS OVF %d/%d/%d/%f\n", i, cossindata[i*2+0], cossindata[i*2+1],outbins[i] ); + + } + } +#endif +} + + diff --git a/embeddedcommon/DFT8Padauk.h b/embeddedcommon/DFT8Padauk.h new file mode 100644 index 0000000..cb6387a --- /dev/null +++ b/embeddedcommon/DFT8Padauk.h @@ -0,0 +1,9 @@ +#ifndef _DFT8PADAUK_H +#define _DFT8PADAUK_H + +/* Note: Frequencies must be precompiled. */ + +void DoDFT8BitPadauk( float * outbins, float * frequencies, int bins, const float * databuffer, int place_in_data_buffer, int size_of_data_buffer, float q, float speedup ); + +#endif + diff --git a/embeddedcommon/DFT8Turbo.c b/embeddedcommon/DFT8Turbo.c new file mode 100644 index 0000000..6645a41 --- /dev/null +++ b/embeddedcommon/DFT8Turbo.c @@ -0,0 +1,312 @@ +//NOTE DO NOT EDIT THIS FILE WITHOUT ALSO EDITING DFT12SMALL!!! + +#include +#include +#include "DFT8Turbo.h" +#include + +#include + +#define MAX_FREQS (12) +#define OCTAVES (4) + +/* + General procedure - use this code, with uint16_t or uint32_t buffers, and make sure none of the alarms go off. + All of the paths still require no more than an 8-bit multiply. + You should test with extreme cases, like square wave sweeps in, etc. +*/ + +//No larger than 8-bit signed values for integration or sincos +#define FRONTEND_AMPLITUDE (2) +#define INITIAL_DECIMATE (5) //Yurgh... only 3 bits of ADC data. That's 8 unique levels :( +#define INTEGRATOR_DECIMATE (8) +#define FINAL_DECIMATE (1) + + +#define OPTABLETYPE uint16_t //Make uint8_t if on attiny. + +//4x the hits (sin/cos and we need to do it once for each edge) +//8x for selecting a higher octave. +#define FREQREBASE 8.0 +#define TARGFREQ 10000.0 + +//These live in RAM. +int8_t running_integral; //Realistically treat as 12-bits on ramjet8 +int8_t integral_at[MAX_FREQS*OCTAVES]; //For ramjet8, make 12-bits +int8_t cossindata[MAX_FREQS*OCTAVES*2]; //Contains COS and SIN data. (32-bit for now, will be 16-bit, potentially even 8.) +uint8_t which_octave_for_op[MAX_FREQS]; //counts up, tells you which ocative you are operating on. PUT IN RAM. +uint8_t actiontableplace; + +#define NR_OF_OPS (4< hits per %d: %f %d (%.2f%% error)\n", topbin, f, ACTIONTABLESIZE, (float)ACTIONTABLESIZE/f, dhrpertable, err * 100.0 ); + if( dhrpertable >= ACTIONTABLESIZE ) + { + fprintf( stderr, "Error: Too many hits.\n" ); + exit(0); + } + + float advance_per_step = dhrpertable/(float)ACTIONTABLESIZE; + float fvadv = 0.5; + int j; + int countset = 0; + + //Tricky: We need to start fadv off at such a place that there won't be a hicchup when going back around to 0. + // I believe this is done by setting fvadv to 0.5 initially. Unsure. + + for( j = 0; j < ACTIONTABLESIZE; j++ ) + { + if( fvadv >= 0.5 ) + { + actiontable[j] |= 1<> longestzeroes) & 1) == 0 ); longestzeroes++ ); + //longestzeroes goes: 255, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, ... + //This isn't great, because we need to also know whether we are attacking the SIN side or the COS side, and if it's + or -. + //We can actually decide that out. + + if( longestzeroes == 255 ) + { + //This is a nop. Emit a nop. + optable[i] = 255; + } + else + { + longestzeroes = OCTAVES-1-longestzeroes; //Actually do octave 0 least often. + int iop = phaseinop[longestzeroes]++; + int toop = longestzeroes; + int toopmon = (longestzeroes<<1) | (iop & 1); + + //if it's the first time an octave happened this set, flag it. This may be used later in the process. + if( !already_hit_octaveplace[toopmon] ) + { + already_hit_octaveplace[toopmon] = 1; + toop |= 1<<5; + } + if( iop & 1 ) + { + toop |= 1<<6; + } + + //Handle add/subtract bit. + if( iop & 2 ) toop |= 1<<4; + + optable[i] = toop; + + //printf( " %d %d %d\n", iop, val, longestzeroes ); + } + //printf( "HBT: %d = %d\n", i, optable[i] ); + } + //exit(1); + + return 0; +} + + +void Turbo8BitRun( int8_t adcval ) +{ + int16_t adcv = adcval; + adcv *= FRONTEND_AMPLITUDE; + if( adcv > 127 ) adcv = 127; + if( adcv < -128 ) adcv = -128; + running_integral += adcv>>INITIAL_DECIMATE; + + uint16_t action = actiontable[actiontableplace++]; + uint8_t n; + + //Counts are approximate counts for PMS133 + + for( n = 0; //1CYC + n < MAX_FREQS; //2CYC + n++, //1CYC + action>>=1 //2CYC + ) + { + //Everything inside this loop is executed ~3/4 * MAX_FREQS per audio sample. so.. ~9x. + //If op @ 4MHz, we get 44 cycles in here. + + //If no operation is scheduled, continue. + if( !( action & 1 ) ) continue; //1CYC + + uint8_t ao = which_octave_for_op[n]; //4CYC + ao++; //1CYC + if( ao >= NR_OF_OPS ) ao = 0; //2CYC + which_octave_for_op[n] = ao; //2CYC (idxm) + + uint8_t op = optable[ao]; //"theoretically" 3CYC (if you align things right) + //1CYC (Put A into specific RAM location) + + //If we are on the one thing we aren't supposed to operate within, cancel. + if( op == 255 ) continue; //2CYC (if op is in A) + + //Tricky: We share the integral with SIN and COS. + //We don't need to. It would produce a slightly cleaner signal. See: NOTE 3 + uint8_t octave = op & 0xf; //1CYC (if op is in A) + + + uint8_t intindex = octave * MAX_FREQS //Load mulop with 12 [2CYC]; mul [1CYC] + + n; //Add [1CYC] + //[1CYC] more cycle to write A into RAM[(intindex) + //int invoct = OCTAVES-1-octaveplace; + int8_t diff; + + if( op & 0x10 ) //ADD //2CYC + { + diff = integral_at[intindex] //Assume "IntIndex" is in A, add integral_at to A [1], move A to an index [1]. [2] to read into acc. [4CYC] + - running_integral; //1CYC to subtract. + //1CYC to write diff into a memory location. + } + else //SUBTRACT + { + diff = running_integral - integral_at[intindex]; + } + + //30 cycles so far. + + integral_at[intindex] = running_integral; //[3CYC] + + //if( diff > 124 || diff < -124 ) printf( "!!!!!!!!!!!! %d !!!!!!!!!!!\n", diff ); + + //uint8_t idx = ( intindex << 1 ); //Overwrite intindex. + intindex <<= 1; //1CYC + + if( op&(1<<6) ) //2CYC + { + intindex |= 1; //1CYC + } + + uint8_t mulmuxval = mulmux[n]; //[4CYC] + + + //Do you live on a super lame processor? {NOTE 4} + //If you do, you might not have good signed multiply operations. So, an alternative mechanism is found here. + // +) Able to more cleanly crush to an 8-bit multiply. + // +) Gets extra bit of precision back, i.e. the sign bit is now used as a data bit. + // -) More than 1 line of C code. Requires possible double invert. +#if 1 + //rough processor, i.e. PMS133 + if( diff < 0 ) //[2CYC] + { + diff *= -1; //[1CYC] + diff >>= (OCTAVES-1-octave); // ???TRICKY??? Should this be a multiply? + + //if( diff > 250 ) printf( "!!!!!!!**** %d ****!!!!!!!\n", diff ); + + diff = ((uint16_t)diff * (uint16_t)mulmuxval)>>INTEGRATOR_DECIMATE; //[3CYC] + diff *= -1; //[1CYC] + } + else + { + diff >>= (OCTAVES-1-octave); + //if( diff > 250 ) printf( "!!!!!!!**** %d ****!!!!!!!\n", diff ); + diff = ((uint16_t)diff * (uint16_t)mulmuxval)>>INTEGRATOR_DECIMATE; + } + + //@48 cycles :( :( :( + +#else + //Decent processor, i.e. ATTiny85. + diff = ((diff>>(OCTAVES-1-octave)) * mulmuxval ) >> 6; +#endif + //printf( "%d\n", diff ); + + int8_t tmp = + cossindata[intindex] //[3CYC] + + diff //[1CYC] + - (cossindata[intindex]>>4) //[2CYC] + ; + + if( tmp > 0 ) tmp--; //2CYC + if( tmp < 0 ) tmp++; //2CYC + cossindata[intindex] = tmp; //2CYC + //60ish cycles :( :( :( + } +} + + +void DoDFT8BitTurbo( float * outbins, float * frequencies, int bins, const float * databuffer, int place_in_data_buffer, int size_of_data_buffer, float q, float speedup ) +{ + static int is_setup; + if( !is_setup ) { is_setup = 1; Setup( frequencies, bins ); } + static int last_place; + int i; + + for( i = last_place; i != place_in_data_buffer; i = (i+1)%size_of_data_buffer ) + { + int16_t ifr1 = (int16_t)( ((databuffer[i]) ) * 4095 ); + Turbo8BitRun( ifr1>>5 ); //5 = Actually only feed algorithm numbers from -128 to 127. + } + last_place = place_in_data_buffer; + + static int idiv; + idiv++; +#if 1 + for( i = 0; i < bins; i++ ) + { + int iss = cossindata[i*2+0]>>FINAL_DECIMATE; + int isc = cossindata[i*2+1]>>FINAL_DECIMATE; + int mux = iss * iss + isc * isc; + + if( mux <= 0 ) + { + outbins[i] = 0; + } + else + { + outbins[i] = sqrt((float)mux)/50.0; + + if( abs( cossindata[i*2+0] ) > 120 || abs( cossindata[i*2+1] ) > 120 ) + printf( "CS OVF %d/%d/%d/%f\n", i, cossindata[i*2+0], cossindata[i*2+1],outbins[i] ); + + } + } +#endif +} + + diff --git a/embeddedcommon/DFT8Turbo.c.attic b/embeddedcommon/DFT8Turbo.c.attic new file mode 100644 index 0000000..9c7b7b4 --- /dev/null +++ b/embeddedcommon/DFT8Turbo.c.attic @@ -0,0 +1,295 @@ +#include +#include +#include "DFT8Turbo.h" +#include + +#include + +#define MAX_FREQS (24) +#define OCTAVES (5) + + +/* + * The first thought was using an integration map and only operating when we need to, to pull the data out. + * Now we're doing the thing below this block comment + int16_t accumulated_total; //2 bytes + int16_t last_accumulated_total_at_bin[MAX_FREQS*2]; //24 * 2 * sizeof(int16_t) = 96 bytes. + uint8_t current_time; //1 byte + uint8_t placecode[MAX_FREQS]; +*/ +//OK... We don't have enough ram to sum everything... can we do something wacky with multiple ocatives to sum everything better? +//i.e. +// +// 4332322132212210 +// +// ++++++++++++++++----------------- +// ++++++++-------- +// ++++----++++---- +// ++--++--++--++-- +// +-+-+-+-+-+-+-+- +// +// Don't forget we need to do this for sin and cos. +// Can we instead of making this plusses, make it a multiplier? +// How can we handle sin+cos? +// +// Is it possible to do this for every frame? I.e. for each of the 24 notes, multiply with their current place in table? +// That's interesting. It's not like a sin table. +// There is no "multiply" in the attiny instruction set for attiny85. +// There is, however for attiny402 + +//Question: Can we do five octaves, or does this need to be balanced? +//Question2: Should we weight higher octaves? + + +//ATTiny402: 256x8 RAM, 4096x8 FLASH LPM: 3 cycles + FMUL: 2 cycles << Do stacked sin waves? +//ATtiny85: 512x8 RAM, 8192x8 FLASH LPM: 3 cycles + NO MULTIPLY << Do square waves? + + +/* Approaches: + + on ATtiny402: Stacked sin approach. + Say 16 MHz, though 12 MHz is interesting... + 16k SPS: 1k cycles per; say 24 bins per; 41 cycles per bin = hard. But is it too hard? + 20 cycles per s/c. + read place in stacked table (8? bits) 3 cycles + + //Inner loop = 17 cycles. + read stacked table (8 bits), 3 cycles + fractional multiply table with current value. 2 cycles + read current running for note 2 cycles (LDS = 3 cycles) + subtract a shifted version, to make it into an IIR. (4 cycles) + add in current values. (2 cycles) + store data back to ram (2 cycles) + advance place in stacked table (8?bits) 1 cycle + + store place in stacked table (8? bits) 3 cycles? + + //What if we chunk ADC updates into groups of 4 or 8? + //This is looking barely possible. + + on attiny85: scheduled adds/subtracts (like a stacked-square-wave-table) + //XXX TODO! + +*/ + +/* Ok... Let's think about the ATTiny402. 256x8 RAM + 4096x8 FLASH. + + * We can create a table which has all octaves overlaid. + * We would need to keep track of: + * 12 x 2 x 2 = 48 bytes = Current sin/cos values. + * 12 x 2 = 24 bytes = Current place in table. = 72 bytes + * We would need to store: + * The layered lookup table. If possible, keep @ 256 bytes to simplify math ops. + * The speed by which each note needs to advance. + * We would need to: + * Read current running place. X 8 cycles + * Use that place to look up into sin table. 3 cycles + * Read running val 4 cycles best case + * Multiply out the sin + IIR 5 cycles + * Store running val 4 cycles best case + * Cos-advance that place to look up into sin table. 4 cycles + * Read running val 4 cycles best case + * Multiply out the sin + IIR 5 cycles + * Store running val 4 cycles best case. + * Read how much to advance X by. 4 cycles + * (Cos^2+Sin^2) 8? + * Store it. 4 cycles best case. + * = 48 x 12 = 576 cycles. Assume 10 MHz @ 16k SPS. We're OK (625 samples) +*/ + +// Observation: The two tables are actually mirror images of each other, well diagonally mirrored. That's odd. But, would take CPU to exploit. + +#define SSTABLESIZE 256 +int8_t spikysin_interleved_cos[SSTABLESIZE][2]; +uint32_t advancespeed[MAX_FREQS]; + +static int CompTableWithPhase( int nelements, float phase, int scaling ) +{ + int highest = 0; + int i; + for( i = 0; i < nelements; i++ ) + { + float taued = i * 3.141592 * 2.0 / nelements; + int o; + float combsin = 0; + for( o = 0; o < OCTAVES; o++ ) + { + combsin += sin( taued * (1< highest ) highest = csadapt; + if( -csadapt > highest ) highest = -csadapt; + + if( csadapt > 127 ) csadapt = 127; + if( csadapt < -128 ) csadapt = -128; //tricky: Keep balanced. + spikysin_interleved_cos[i][0] = csadapt; + + float combcos = 0; + for( o = 0; o < OCTAVES; o++ ) + { + combcos += cos( taued * (1< highest ) highest = csadapt; + if( -csadapt > highest ) highest = -csadapt; + + if( csadapt > 127 ) csadapt = 127; + if( csadapt < -128 ) csadapt = -128; //tricky: Keep balanced. + spikysin_interleved_cos[i][1] = csadapt; + } + return highest; +} + + +static int Setup( float * frequencies, int bins ) +{ + int i; + + //Since start position/phase is arbitrary, we should try several to see which gives us the best dynamic range. + float tryphase = 0; + + float bestphase = 0; + int highest_val_at_best_phase = 1000000; + + for( tryphase = 0; tryphase < 3.14159; tryphase += 0.001 ) + { + int highest = CompTableWithPhase( SSTABLESIZE, tryphase, 65536 ); + if( highest < highest_val_at_best_phase ) + { + highest_val_at_best_phase = highest; + bestphase = tryphase; + } + } + printf( "Best comp: %f : %d\n", bestphase, highest_val_at_best_phase ); + + //Set this because we would overflow the sinm and cosm regs if we don't. This is sort of like a master volume. + //use this as that input volume knob thing. + float further_reduce = 1.0; + + CompTableWithPhase( SSTABLESIZE, bestphase, (65536*128*further_reduce)/highest_val_at_best_phase ); + +// for( i = 0; i < SSTABLESIZE; i++ ) +// { +// printf( "%d %d\n", spikysin_interleved_cos[i*2+0], spikysin_interleved_cos[i*2+1] ); +// } + + for( i = 0; i < MAX_FREQS; i++ ) + { + //frequencies[i] = SPS / Freq + // Need to decide how quickly we sweep through the table. + advancespeed[i] = 65536 * 256.0 /* fixed point */ * 256.0 /* size of table */ / frequencies[i]; + //printf( "%f\n", frequencies[i] ); + } + return 0; +} + + +/* +uint8_t spikysin_interleved_cos[256*2]; +uint16_t advancespeed[MAX_FREQS]; +*/ + +float toutbins[MAX_FREQS]; + +struct notedat +{ + uint32_t time; + int32_t sinm; + int32_t cosm; +}; + +static struct notedat nd[MAX_FREQS]; + +void Turbo8BitRun( int8_t adcval ) +{ + int i; + for( i = 0; i < MAX_FREQS; i++ ) + { + uint32_t ct = nd[i].time; + int32_t muxres; + int32_t running; + int32_t rdesc, rdess; + uint8_t * spikysintable = &spikysin_interleved_cos[(ct>>24)][0]; + + int8_t ss = *(spikysintable++); + + #define DECIR 8 + + muxres = ((int16_t)adcval * ss + (1<<(DECIR-1)) ) >> (DECIR); + running = nd[i].cosm; + running += muxres; + rdesc = running >> 8; + running -= rdesc >> 3; + + nd[i].cosm = running; +if( i == 0) printf( "MRX %5d %9d %9d %9d %9d\n", muxres, adcval, ss, running, nd[i].sinm ); + int8_t sc = *(spikysintable++); + muxres = ((int16_t)adcval * sc + (1<<(DECIR-1)) ) >> (DECIR); + running = nd[i].sinm; + running += muxres; + + rdess = running>>8; + running -= rdess >> 3; + + nd[i].sinm = running; + + nd[i].time = ct + advancespeed[i]; + + toutbins[i] = rdess * rdess + rdesc * rdesc; + //printf( "%d %d = %f %p\n", rdess, rdesc, toutbins[i], &toutbins[i] ); + } + + static uint8_t stater; +/* stater++; + if( stater == 16 ) + { + stater = 0; + for( i = 0; i < MAX_FREQS; i++ ) + { + nd[i].sinm -= nd[i].sinm >> 12; + nd[i].cosm -= nd[i].cosm >> 12; + nd[i].sinm += 8; + nd[i].cosm += 8; + } + }*/ +} + + +void DoDFT8BitTurbo( float * outbins, float * frequencies, int bins, const float * databuffer, int place_in_data_buffer, int size_of_data_buffer, float q, float speedup ) +{ + static int is_setup; + if( !is_setup ) { is_setup = 1; Setup( frequencies, bins ); } + static int last_place; + int i; + + for( i = last_place; i != place_in_data_buffer; i = (i+1)%size_of_data_buffer ) + { + int16_t ifr1 = (int16_t)( ((databuffer[i]) ) * 4095 ); + //ifr1 += 4095; + //ifr1 += 512; + Turbo8BitRun( ifr1>>5 ); //6 = Actually only feed algorithm numbers from -64 to 63. + } + last_place = place_in_data_buffer; + + for( i = 0; i < bins; i++ ) + { + outbins[i] = 0; + } + for( i = 0; i < MAX_FREQS; i++ ) + { + int iss = nd[i].sinm>>8; + int isc = nd[i].cosm>>8; + int mux = iss * iss + isc * isc; + if( mux == 0 ) mux = 1; + if( i == 0 ) + printf( "MUX: %d %d\n", isc, iss ); + outbins[i+MAX_FREQS] = sqrt(mux)/200.0; + } + +} + + diff --git a/embeddedcommon/DFT8Turbo.h b/embeddedcommon/DFT8Turbo.h new file mode 100644 index 0000000..257cf89 --- /dev/null +++ b/embeddedcommon/DFT8Turbo.h @@ -0,0 +1,9 @@ +#ifndef _DFT8TURBO_H +#define _DFT8TURBO_H + +/* Note: Frequencies must be precompiled. */ + +void DoDFT8BitTurbo( float * outbins, float * frequencies, int bins, const float * databuffer, int place_in_data_buffer, int size_of_data_buffer, float q, float speedup ); + +#endif + diff --git a/embeddedcommon/DFT8Turbo.h.attic b/embeddedcommon/DFT8Turbo.h.attic new file mode 100644 index 0000000..257cf89 --- /dev/null +++ b/embeddedcommon/DFT8Turbo.h.attic @@ -0,0 +1,9 @@ +#ifndef _DFT8TURBO_H +#define _DFT8TURBO_H + +/* Note: Frequencies must be precompiled. */ + +void DoDFT8BitTurbo( float * outbins, float * frequencies, int bins, const float * databuffer, int place_in_data_buffer, int size_of_data_buffer, float q, float speedup ); + +#endif + diff --git a/embeddedcommon/embeddednf.h b/embeddedcommon/embeddednf.h index d360020..51d3672 100644 --- a/embeddedcommon/embeddednf.h +++ b/embeddedcommon/embeddednf.h @@ -32,7 +32,7 @@ //We take the raw signal off of the #ifndef FILTER_BLUR_PASSES -#define FILTER_BLUR_PASSES 2 +#define FILTER_BLUR_PASSES 1 #endif //Determines bit shifts for where notes lie. We represent notes with an