From 1432f22b775cd3aca32ccd3f14f528fd6cec907c Mon Sep 17 00:00:00 2001 From: cnlohr Date: Mon, 29 Apr 2019 00:04:28 -0400 Subject: [PATCH] Switch over to making 8Turbo *actually* turbo. --- colorchord2/turbo8bit.conf | 1 - embeddedcommon/DFT12Small.c | 359 ++++++++++++++++++++++++++++++++++++ embeddedcommon/DFT12Small.h | 9 + embeddedcommon/DFT8Turbo.c | 213 ++++++++++----------- 4 files changed, 477 insertions(+), 105 deletions(-) create mode 100644 embeddedcommon/DFT12Small.c create mode 100644 embeddedcommon/DFT12Small.h diff --git a/colorchord2/turbo8bit.conf b/colorchord2/turbo8bit.conf index 53fb5e0..d9ce3d4 100644 --- a/colorchord2/turbo8bit.conf +++ b/colorchord2/turbo8bit.conf @@ -28,7 +28,6 @@ channels = 2 #10000 / 2^4{octaves} / 8 base_hz = 82.41 samplerate = 10000 - freqbins = 12 octaves = 4 diff --git a/embeddedcommon/DFT12Small.c b/embeddedcommon/DFT12Small.c new file mode 100644 index 0000000..41c9fdd --- /dev/null +++ b/embeddedcommon/DFT12Small.c @@ -0,0 +1,359 @@ +#include +#include +#include "DFT12Small.h" +#include + +#include + + +#define MAX_FREQS (12) +#define OCTAVES (4) + +/* + General procedure - use this code, with uint16_t or uint32_t buffers, and make sure none of the alarms go off. + All of the paths still require no more than an 8-bit multiply. + You should test with extreme cases, like square wave sweeps in, etc. +*/ + +//#define TWELVEBIT +#define EIGHTBIT + +#ifdef TWELVEBIT +//No larger than 12-bit signed values for integration or sincos +#define FRONTEND_AMPLITUDE (0) +#define INITIAL_DECIMATE (2) +#define INTEGRATOR_DECIMATE (8) +#define FINAL_DECIMATE (4) +#elif defined( EIGHTBIT ) +//No larger than 8-bit signed values for integration or sincos +#define FRONTEND_AMPLITUDE (2) +#define INITIAL_DECIMATE (5) //Yurgh... only 3 bits of ADC data. That's 8 unique levels :( +#define INTEGRATOR_DECIMATE (8) +#define FINAL_DECIMATE (1) +#endif + +//Right now, we need 8*freqs*octaves bytes. +//This is bad. +//What can we do to fix it? + +//4x the hits (sin/cos and we need to do it once for each edge) +//8x for selecting a higher octave. +#define FREQREBASE 8.0 +#define TARGFREQ 10000.0 + +/* Tradeoff guide: + + * We will optimize for RAM size here. + + * INITIAL_DECIMATE; A larger decimation: {NOTE 1} + +) Reduces the bit depth needed for the integral map. + If you use "1" and a fully saturted map (highest note is every sample), it will not overflow a signed 12-bit number. + -) Increases noise. + With full-scale: 0->1 minimal 1->2 minimal 2->3 significantly noticable, 3->4 major. + If sound is quieter, it matters more. Not sure with other changes in system. (2) seems ok. + -) If you make it (1) or (0) You can't do an 8-bit multiply and keep the output in a signed range. + Also, other things, like frequency of hits can manipulate the maximum bit depth needed for integral map. + + * If you weight the bins in advance see "mulmux", you can: {NOTE 2} + +) potentially use shallower bit depth but + -) have to compute the multiply every time you update the bin. + + * You can use a modified-square-wave which only integrates for 1/2 of the duty cycle. {NOTE 3} + +) uses 1/2 the integral memory. + -) Not as pretty of an output. See "integral_at" + + *TODO: Investigate using all unsigned (to make multiply and/or 12-bit storage easier) + *TODO: Consider a mode which has 16-bit integrals, but still 8-bit cossin data. + + So, the idea here is we would keep a running total of the current ADC value, kept away in a int16_t. + It is constantly summing, so we can take an integral of it. Or rather an integral range. + + Over time, we perform operations like adding or subtracting from a current place. It basically is + a DFT where the kernel is computed using square waves (or modified square waves) +*/ + +//These live in RAM. +int16_t running_integral; //Realistically treat as 12-bits on ramjet8 +int16_t integral_at[MAX_FREQS*OCTAVES]; //For ramjet8, make 12-bits +int32_t cossindata[MAX_FREQS*OCTAVES*2]; //Contains COS and SIN data. (32-bit for now, will be 16-bit, potentially even 8.) +uint8_t which_octave_for_op[MAX_FREQS]; //counts up, tells you which ocative you are operating on. PUT IN RAM. +uint8_t actiontableplace; + +#define NR_OF_OPS (4< hits per %d: %f %d (%.2f%% error)\n", topbin, f, ACTIONTABLESIZE, (float)ACTIONTABLESIZE/f, dhrpertable, err * 100.0 ); + if( dhrpertable >= ACTIONTABLESIZE ) + { + fprintf( stderr, "Error: Too many hits.\n" ); + exit(0); + } + + float advance_per_step = dhrpertable/(float)ACTIONTABLESIZE; + float fvadv = 0.5; + int j; + int countset = 0; + + //Tricky: We need to start fadv off at such a place that there won't be a hicchup when going back around to 0. + // I believe this is done by setting fvadv to 0.5 initially. Unsure. + + for( j = 0; j < ACTIONTABLESIZE; j++ ) + { + if( fvadv >= 0.5 ) + { + actiontable[j] |= 1<> longestzeroes) & 1) == 0 ); longestzeroes++ ); + //longestzeroes goes: 255, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, ... + //This isn't great, because we need to also know whether we are attacking the SIN side or the COS side, and if it's + or -. + //We can actually decide that out. + + if( longestzeroes == 255 ) + { + //This is a nop. Emit a nop. + optable[i] = 255; + } + else + { + longestzeroes = OCTAVES-1-longestzeroes; //Actually do octave 0 least often. + int iop = phaseinop[longestzeroes]++; + int toop = longestzeroes; + int toopmon = (longestzeroes<<1) | (iop & 1); + + //if it's the first time an octave happened this set, flag it. This may be used later in the process. + if( !already_hit_octaveplace[toopmon] ) + { + already_hit_octaveplace[toopmon] = 1; + toop |= 1<<5; + } + if( iop & 1 ) + { + toop |= 1<<6; + } + + //Handle add/subtract bit. + if( iop & 2 ) toop |= 1<<4; + + optable[i] = toop; + + //printf( " %d %d %d\n", iop, val, longestzeroes ); + } + //printf( "HBT: %d = %d\n", i, optable[i] ); + } + //exit(1); + + return 0; +} + + +void Small12BitRun( int8_t adcval ) +{ + int16_t adcv = adcval; + adcv *= FRONTEND_AMPLITUDE; + if( adcv > 127 ) adcv = 127; + if( adcv < -128 ) adcv = -128; + running_integral += adcv>>INITIAL_DECIMATE; + +#define dprintf( ... ) + + uint32_t action = actiontable[actiontableplace++]; + int n; + dprintf( "%4d ", actiontableplace ); + for( n = 0; n < MAX_FREQS; n++ ) + { + if( action & (1<= NR_OF_OPS ) ao = 0; + which_octave_for_op[n] = ao; + + if( op == 255 ) + { + dprintf( "*" ); //NOP + } + else + { + //int octaveplace = op & 0xf; + + //Tricky: We share the integral with SIN and COS. + //We don't need to. It would produce a slightly cleaner signal. See: NOTE 3 + uint8_t octave = op & 0xf; + uint8_t intindex = octave * MAX_FREQS + n; + + //int invoct = OCTAVES-1-octaveplace; + int16_t diff; + + if( op & 0x10 ) //ADD + { + diff = integral_at[intindex] - running_integral; + dprintf( "%c", 'a' + (op & 0xf) ); + } + else //SUBTRACT + { + diff = running_integral - integral_at[intindex]; + dprintf( "%c", 'A' + (op & 0xf) ); + } + + integral_at[intindex] = running_integral; + +#ifdef TWELVEBIT + if( diff > 2000 || diff < -2000 ) printf( "!!!!!!!!!!!! %d !!!!!!!!!!!\n", diff ); +#elif defined( EIGHTBIT ) + if( diff > 124 || diff < -124 ) printf( "!!!!!!!!!!!! %d !!!!!!!!!!!\n", diff ); +#endif + + uint8_t idx = ( intindex << 1 ); + if( op&(1<<6) ) + { + idx |= 1; + } + + //printf( "%d: %d + %d * %d >> 8 - %d\n", idx, cossindata[idx], diff, mulmux[idx/2], cossindata[idx]>>4 ); + + uint8_t mulmuxval = mulmux[n]; + + + //Do you live on a super lame processor? {NOTE 4} + //If you do, you might not have good signed multiply operations. So, an alternative mechanism is found here. + // +) Able to more cleanly crush to an 8-bit multiply. + // +) Gets extra bit of precision back, i.e. the sign bit is now used as a data bit. + // -) More than 1 line of C code. Requires possible double invert. +#if 1 + //Terrible processor, i.e. PMS133 + if( 0 && diff < 0 ) + { + diff *= -1; + diff >>= (OCTAVES-1-octave); + + if( diff > 250 ) printf( "!!!!!!!**** %d ****!!!!!!!\n", diff ); + + diff = (uint16_t)diff * (uint16_t)mulmuxval; + diff >>= INTEGRATOR_DECIMATE; + + diff *= -1; + } + else + { + diff >>= (OCTAVES-1-octave); + + if( diff > 250 ) printf( "!!!!!!!**** %d ****!!!!!!!\n", diff ); + + diff = (uint16_t)diff * (uint16_t)mulmuxval; + diff >>= INTEGRATOR_DECIMATE; + } +#else + //Decent processor, i.e. ATTiny85. + diff = ((diff>>(OCTAVES-1-octave)) * mulmuxval ) >> 6; +#endif + cossindata[idx] = cossindata[idx] + + diff + - (cossindata[idx]>>4) + ; + +#ifdef EIGHTBIT + if( cossindata[idx] > 0 ) cossindata[idx]--; + if( cossindata[idx] < 0 ) cossindata[idx]++; +#endif + } + } + else + { + dprintf( " " ); + } + } + dprintf( "\n" ); + +} + + +void DoDFT12BitSmall( float * outbins, float * frequencies, int bins, const float * databuffer, int place_in_data_buffer, int size_of_data_buffer, float q, float speedup ) +{ + static int is_setup; + if( !is_setup ) { is_setup = 1; Setup( frequencies, bins ); } + static int last_place; + int i; + + for( i = last_place; i != place_in_data_buffer; i = (i+1)%size_of_data_buffer ) + { + int16_t ifr1 = (int16_t)( ((databuffer[i]) ) * 4095 ); + Small12BitRun( ifr1>>5 ); //5 = Actually only feed algorithm numbers from -128 to 127. + } + last_place = place_in_data_buffer; + + static int idiv; + idiv++; +#if 1 + for( i = 0; i < bins; i++ ) + { + int iss = cossindata[i*2+0]>>FINAL_DECIMATE; + int isc = cossindata[i*2+1]>>FINAL_DECIMATE; + int mux = iss * iss + isc * isc; + + if( mux <= 0 ) + { + outbins[i] = 0; + } + else + { + outbins[i] = sqrt((float)mux)/50.0; + +#ifdef TWELVEBIT + if( abs( cossindata[i*2+0] ) > 1000 || abs( cossindata[i*2+1] ) > 1000 ) + printf( "CS OVF %d/%d/%d/%f\n", i, cossindata[i*2+0], cossindata[i*2+1],outbins[i] ); +#elif defined( EIGHTBIT ) + if( abs( cossindata[i*2+0] ) > 120 || abs( cossindata[i*2+1] ) > 120 ) + printf( "CS OVF %d/%d/%d/%f\n", i, cossindata[i*2+0], cossindata[i*2+1],outbins[i] ); +#endif + } + } +#endif +} + + diff --git a/embeddedcommon/DFT12Small.h b/embeddedcommon/DFT12Small.h new file mode 100644 index 0000000..13506e6 --- /dev/null +++ b/embeddedcommon/DFT12Small.h @@ -0,0 +1,9 @@ +#ifndef _DFT8TURBO_H +#define _DFT8TURBO_H + +/* Note: Frequencies must be precompiled. */ + +void DoDFT12BitSmall( float * outbins, float * frequencies, int bins, const float * databuffer, int place_in_data_buffer, int size_of_data_buffer, float q, float speedup ); + +#endif + diff --git a/embeddedcommon/DFT8Turbo.c b/embeddedcommon/DFT8Turbo.c index db9101e..1471270 100644 --- a/embeddedcommon/DFT8Turbo.c +++ b/embeddedcommon/DFT8Turbo.c @@ -5,9 +5,32 @@ #include + #define MAX_FREQS (12) #define OCTAVES (4) -#define INITIAL_DECIMATE 1 + +/* + General procedure - use this code, with uint16_t or uint32_t buffers, and make sure none of the alarms go off. + All of the paths still require no more than an 8-bit multiply. + You should test with extreme cases, like square wave sweeps in, etc. +*/ + +//#define TWELVEBIT +#define EIGHTBIT + +#ifdef TWELVEBIT +//No larger than 12-bit signed values for integration or sincos +#define FRONTEND_AMPLITUDE (0) +#define INITIAL_DECIMATE (2) +#define INTEGRATOR_DECIMATE (8) +#define FINAL_DECIMATE (4) +#elif defined( EIGHTBIT ) +//No larger than 8-bit signed values for integration or sincos +#define FRONTEND_AMPLITUDE (2) +#define INITIAL_DECIMATE (5) //Yurgh... only 3 bits of ADC data. That's 8 unique levels :( +#define INTEGRATOR_DECIMATE (8) +#define FINAL_DECIMATE (1) +#endif //Right now, we need 8*freqs*octaves bytes. //This is bad. @@ -22,13 +45,13 @@ * We will optimize for RAM size here. - * INITIAL_DECIMATE; A larger decimation: {NOTE 1} +) Reduces the bit depth needed for the integral map. If you use "1" and a fully saturted map (highest note is every sample), it will not overflow a signed 12-bit number. -) Increases noise. With full-scale: 0->1 minimal 1->2 minimal 2->3 significantly noticable, 3->4 major. - If sound is quieter, it matters more. I recommend no less than 1. + If sound is quieter, it matters more. Not sure with other changes in system. (2) seems ok. + -) If you make it (1) or (0) You can't do an 8-bit multiply and keep the output in a signed range. Also, other things, like frequency of hits can manipulate the maximum bit depth needed for integral map. * If you weight the bins in advance see "mulmux", you can: {NOTE 2} @@ -40,8 +63,7 @@ -) Not as pretty of an output. See "integral_at" *TODO: Investigate using all unsigned (to make multiply and/or 12-bit storage easier) - - + *TODO: Consider a mode which has 16-bit integrals, but still 8-bit cossin data. So, the idea here is we would keep a running total of the current ADC value, kept away in a int16_t. It is constantly summing, so we can take an integral of it. Or rather an integral range. @@ -55,6 +77,7 @@ int16_t running_integral; //Realistically treat as 12-bits on ramjet8 int16_t integral_at[MAX_FREQS*OCTAVES]; //For ramjet8, make 12-bits int32_t cossindata[MAX_FREQS*OCTAVES*2]; //Contains COS and SIN data. (32-bit for now, will be 16-bit, potentially even 8.) uint8_t which_octave_for_op[MAX_FREQS]; //counts up, tells you which ocative you are operating on. PUT IN RAM. +uint8_t actiontableplace; #define NR_OF_OPS (4<>INITIAL_DECIMATE; + int16_t adcv = adcval; + adcv *= FRONTEND_AMPLITUDE; + if( adcv > 127 ) adcv = 127; + if( adcv < -128 ) adcv = -128; + running_integral += adcv>>INITIAL_DECIMATE; #define dprintf( ... ) @@ -209,11 +220,12 @@ void Turbo8BitRun( int8_t adcval ) } else { - int octaveplace = op & 0xf; + //int octaveplace = op & 0xf; //Tricky: We share the integral with SIN and COS. //We don't need to. It would produce a slightly cleaner signal. See: NOTE 3 - int intindex = (octaveplace>>1) * MAX_FREQS + n; + uint8_t octave = op & 0xf; + uint8_t intindex = octave * MAX_FREQS + n; //int invoct = OCTAVES-1-octaveplace; int16_t diff; @@ -221,45 +233,74 @@ void Turbo8BitRun( int8_t adcval ) if( op & 0x10 ) //ADD { diff = integral_at[intindex] - running_integral; - dprintf( "%c", 'a' + octaveplace ); + dprintf( "%c", 'a' + (op & 0xf) ); } else //SUBTRACT { diff = running_integral - integral_at[intindex]; - dprintf( "%c", 'A' + octaveplace ); + dprintf( "%c", 'A' + (op & 0xf) ); } - if( diff > 2000 || diff < -2000 ) printf( "!!!!!!!!!!!! %d !!!!!!!!!!!\n", diff ); - integral_at[intindex] = running_integral; - int idx = intindex * 2 + (octaveplace&1); +#ifdef TWELVEBIT + if( diff > 2000 || diff < -2000 ) printf( "!!!!!!!!!!!! %d !!!!!!!!!!!\n", diff ); +#elif defined( EIGHTBIT ) + if( diff > 124 || diff < -124 ) printf( "!!!!!!!!!!!! %d !!!!!!!!!!!\n", diff ); +#endif - //if( n == 1 ) printf( "%d %d %d %d\n", n, idx, diff, op & 0x10 ); - //dprintf( "%d\n", idx ); - -#if 0 - //Apply IIR operation 1; This is rough because the Q changes and goes higher as a function of frequency. This is probably a bad move. - cossindata[idx] += diff>>4; - if( op & 0x20 ) + uint8_t idx = ( intindex << 1 ); + if( op&(1<<6) ) { - cossindata[idx] = cossindata[idx] - - (cossindata[idx]>>2); + idx |= 1; } -#else - //Apply IIR. + //printf( "%d: %d + %d * %d >> 8 - %d\n", idx, cossindata[idx], diff, mulmux[idx/2], cossindata[idx]>>4 ); + + uint8_t mulmuxval = mulmux[n]; + + + //Do you live on a super lame processor? {NOTE 4} + //If you do, you might not have good signed multiply operations. So, an alternative mechanism is found here. + // +) Able to more cleanly crush to an 8-bit multiply. + // +) Gets extra bit of precision back, i.e. the sign bit is now used as a data bit. + // -) More than 1 line of C code. Requires possible double invert. +#if 1 + //Terrible processor, i.e. PMS133 + if( 0 && diff < 0 ) + { + diff *= -1; + diff >>= (OCTAVES-1-octave); + + if( diff > 250 ) printf( "!!!!!!!**** %d ****!!!!!!!\n", diff ); + + diff = (uint16_t)diff * (uint16_t)mulmuxval; + diff >>= INTEGRATOR_DECIMATE; + + diff *= -1; + } + else + { + diff >>= (OCTAVES-1-octave); + + if( diff > 250 ) printf( "!!!!!!!**** %d ****!!!!!!!\n", diff ); + + diff = (uint16_t)diff * (uint16_t)mulmuxval; + diff >>= INTEGRATOR_DECIMATE; + } +#else + //Decent processor, i.e. ATTiny85. + diff = ((diff>>(OCTAVES-1-octave)) * mulmuxval ) >> 6; +#endif cossindata[idx] = cossindata[idx] - + (((int32_t)diff * (int32_t)mulmux[idx/2])>>6) + + diff - (cossindata[idx]>>4) ; - // if( cossindata[idx] > 2047 ) cossindata[idx] = 2047; - // if( cossindata[idx] < -2048 ) cossindata[idx] = -2048; + +#ifdef EIGHTBIT + if( cossindata[idx] > 0 ) cossindata[idx]--; + if( cossindata[idx] < 0 ) cossindata[idx]++; #endif - // if( cossindata[idx] > 1 ) cossindata[idx]--; - // if( cossindata[idx] < -1 ) cossindata[idx]++; - // if( cossindata[idx] > 16 ) cossindata[idx]-=8; - // if( cossindata[idx] < -16 ) cossindata[idx]+=8; } } else @@ -269,27 +310,6 @@ void Turbo8BitRun( int8_t adcval ) } dprintf( "\n" ); -#if 0 - uint32_t actions = *(placeintable++); - if( placeintable == &actiontable[ACTIONTABLESIZE] ) placeintable = actiontable; - int b; - for( b = 0; b < MAX_FREQS; b++ ) - { - if( ! ((1<>= 1; - int octavebit = op & ((1<>5 ); //6 = Actually only feed algorithm numbers from -64 to 63. + Turbo8BitRun( ifr1>>5 ); //5 = Actually only feed algorithm numbers from -128 to 127. } last_place = place_in_data_buffer; @@ -312,25 +332,9 @@ void DoDFT8BitTurbo( float * outbins, float * frequencies, int bins, const float #if 1 for( i = 0; i < bins; i++ ) { - outbins[i] = 0; - } - for( i = 0; i < bins; i++ ) - { - int iss = cossindata[i*2+0]>>8; - int isc = cossindata[i*2+1]>>8; - int issdiv = 0; - int iscdiv = 0; - int FWDOFFSET = 19;//MAX_FREQS*3/2; - if( i < bins-FWDOFFSET ) - { - issdiv = cossindata[(i+FWDOFFSET)*2+0]/256; - iscdiv = cossindata[(i+FWDOFFSET)*2+1]/256; - } + int iss = cossindata[i*2+0]>>FINAL_DECIMATE; + int isc = cossindata[i*2+1]>>FINAL_DECIMATE; int mux = iss * iss + isc * isc; - int muxdiv = issdiv * issdiv + iscdiv * iscdiv; - - //if( (idiv % 100) > 50 ) { printf( "*" ); mux -= muxdiv; } - //mux -= muxdiv; if( mux <= 0 ) { @@ -338,16 +342,17 @@ void DoDFT8BitTurbo( float * outbins, float * frequencies, int bins, const float } else { - //if( i == 0 ) - //printf( "MUX: %d %d = %d\n", isc, iss, mux ); outbins[i] = sqrt((float)mux)/50.0; - if( abs( cossindata[i*2+0] ) > 2000 || abs( cossindata[i*2+1] ) > 2000 ) - printf( "%d/%d/%d/%f ", i, cossindata[i*2+0], cossindata[i*2+1],outbins[i] ); - //outbins[i] = (cossindata[i*2+0]/10000.0); +#ifdef TWELVEBIT + if( abs( cossindata[i*2+0] ) > 1000 || abs( cossindata[i*2+1] ) > 1000 ) + printf( "CS OVF %d/%d/%d/%f\n", i, cossindata[i*2+0], cossindata[i*2+1],outbins[i] ); +#elif defined( EIGHTBIT ) + if( abs( cossindata[i*2+0] ) > 120 || abs( cossindata[i*2+1] ) > 120 ) + printf( "CS OVF %d/%d/%d/%f\n", i, cossindata[i*2+0], cossindata[i*2+1],outbins[i] ); +#endif } } - printf( "\n" ); #endif }