diff --git a/embeddedcommon/DFT12Small.c b/embeddedcommon/DFT12Small.c index 41c9fdd..6d04241 100644 --- a/embeddedcommon/DFT12Small.c +++ b/embeddedcommon/DFT12Small.c @@ -1,3 +1,5 @@ +//NOTE DO NOT EDIT THIS FILE WITHOUT ALSO EDITING DFT8TURBO!!! + #include #include #include "DFT12Small.h" @@ -32,9 +34,6 @@ #define FINAL_DECIMATE (1) #endif -//Right now, we need 8*freqs*octaves bytes. -//This is bad. -//What can we do to fix it? //4x the hits (sin/cos and we need to do it once for each edge) //8x for selecting a higher octave. @@ -199,116 +198,104 @@ void Small12BitRun( int8_t adcval ) if( adcv < -128 ) adcv = -128; running_integral += adcv>>INITIAL_DECIMATE; -#define dprintf( ... ) - uint32_t action = actiontable[actiontableplace++]; int n; - dprintf( "%4d ", actiontableplace ); - for( n = 0; n < MAX_FREQS; n++ ) + for( n = 0; n < MAX_FREQS; n++, action>>=1 ) { - if( action & (1<= NR_OF_OPS ) ao = 0; + which_octave_for_op[n] = ao; + + int op = optable[ao]; + + if( op == 255 ) + continue; + + //int octaveplace = op & 0xf; + + //Tricky: We share the integral with SIN and COS. + //We don't need to. It would produce a slightly cleaner signal. See: NOTE 3 + uint8_t octave = op & 0xf; + uint8_t intindex = octave * MAX_FREQS + n; + + //int invoct = OCTAVES-1-octaveplace; + int16_t diff; + + if( op & 0x10 ) //ADD { - int ao = which_octave_for_op[n]; - int op = optable[ao]; - ao++; - if( ao >= NR_OF_OPS ) ao = 0; - which_octave_for_op[n] = ao; + diff = integral_at[intindex] - running_integral; + } + else //SUBTRACT + { + diff = running_integral - integral_at[intindex]; + } - if( op == 255 ) - { - dprintf( "*" ); //NOP - } - else - { - //int octaveplace = op & 0xf; - - //Tricky: We share the integral with SIN and COS. - //We don't need to. It would produce a slightly cleaner signal. See: NOTE 3 - uint8_t octave = op & 0xf; - uint8_t intindex = octave * MAX_FREQS + n; - - //int invoct = OCTAVES-1-octaveplace; - int16_t diff; - - if( op & 0x10 ) //ADD - { - diff = integral_at[intindex] - running_integral; - dprintf( "%c", 'a' + (op & 0xf) ); - } - else //SUBTRACT - { - diff = running_integral - integral_at[intindex]; - dprintf( "%c", 'A' + (op & 0xf) ); - } - - integral_at[intindex] = running_integral; + integral_at[intindex] = running_integral; #ifdef TWELVEBIT - if( diff > 2000 || diff < -2000 ) printf( "!!!!!!!!!!!! %d !!!!!!!!!!!\n", diff ); + if( diff > 2000 || diff < -2000 ) printf( "!!!!!!!!!!!! %d !!!!!!!!!!!\n", diff ); #elif defined( EIGHTBIT ) - if( diff > 124 || diff < -124 ) printf( "!!!!!!!!!!!! %d !!!!!!!!!!!\n", diff ); + if( diff > 124 || diff < -124 ) printf( "!!!!!!!!!!!! %d !!!!!!!!!!!\n", diff ); #endif - uint8_t idx = ( intindex << 1 ); - if( op&(1<<6) ) - { - idx |= 1; - } + //uint8_t idx = ( intindex << 1 ); + intindex<<=1; - //printf( "%d: %d + %d * %d >> 8 - %d\n", idx, cossindata[idx], diff, mulmux[idx/2], cossindata[idx]>>4 ); + if( op&(1<<6) ) + { + intindex |= 1; + } - uint8_t mulmuxval = mulmux[n]; + //printf( "%d: %d + %d * %d >> 8 - %d\n", intindex, cossindata[intindex], diff, mulmux[intindex/2], cossindata[intindex]>>4 ); + + uint8_t mulmuxval = mulmux[n]; - //Do you live on a super lame processor? {NOTE 4} - //If you do, you might not have good signed multiply operations. So, an alternative mechanism is found here. - // +) Able to more cleanly crush to an 8-bit multiply. - // +) Gets extra bit of precision back, i.e. the sign bit is now used as a data bit. - // -) More than 1 line of C code. Requires possible double invert. + //Do you live on a super lame processor? {NOTE 4} + //If you do, you might not have good signed multiply operations. So, an alternative mechanism is found here. + // +) Able to more cleanly crush to an 8-bit multiply. + // +) Gets extra bit of precision back, i.e. the sign bit is now used as a data bit. + // -) More than 1 line of C code. Requires possible double invert. #if 1 - //Terrible processor, i.e. PMS133 - if( 0 && diff < 0 ) - { - diff *= -1; - diff >>= (OCTAVES-1-octave); + //Terrible processor, i.e. PMS133 + if( 0 && diff < 0 ) + { + diff *= -1; + diff >>= (OCTAVES-1-octave); - if( diff > 250 ) printf( "!!!!!!!**** %d ****!!!!!!!\n", diff ); + if( diff > 250 ) printf( "!!!!!!!**** %d ****!!!!!!!\n", diff ); - diff = (uint16_t)diff * (uint16_t)mulmuxval; - diff >>= INTEGRATOR_DECIMATE; + diff = (uint16_t)diff * (uint16_t)mulmuxval; + diff >>= INTEGRATOR_DECIMATE; - diff *= -1; - } - else - { - diff >>= (OCTAVES-1-octave); - - if( diff > 250 ) printf( "!!!!!!!**** %d ****!!!!!!!\n", diff ); - - diff = (uint16_t)diff * (uint16_t)mulmuxval; - diff >>= INTEGRATOR_DECIMATE; - } -#else - //Decent processor, i.e. ATTiny85. - diff = ((diff>>(OCTAVES-1-octave)) * mulmuxval ) >> 6; -#endif - cossindata[idx] = cossindata[idx] - + diff - - (cossindata[idx]>>4) - ; - -#ifdef EIGHTBIT - if( cossindata[idx] > 0 ) cossindata[idx]--; - if( cossindata[idx] < 0 ) cossindata[idx]++; -#endif - } + diff *= -1; } else { - dprintf( " " ); - } + diff >>= (OCTAVES-1-octave); + + if( diff > 250 ) printf( "!!!!!!!**** %d ****!!!!!!!\n", diff ); + + diff = (uint16_t)diff * (uint16_t)mulmuxval; + diff >>= INTEGRATOR_DECIMATE; + } +#else + //Decent processor, i.e. ATTiny85. + diff = ((diff>>(OCTAVES-1-octave)) * mulmuxval ) >> 6; +#endif + cossindata[intindex] = cossindata[intindex] + + diff + - (cossindata[intindex]>>4) + ; + +#ifdef EIGHTBIT + if( cossindata[intindex] > 0 ) cossindata[intindex]--; + if( cossindata[intindex] < 0 ) cossindata[intindex]++; +#endif } - dprintf( "\n" ); } diff --git a/embeddedcommon/DFT8Turbo.c b/embeddedcommon/DFT8Turbo.c index 1471270..fd07df4 100644 --- a/embeddedcommon/DFT8Turbo.c +++ b/embeddedcommon/DFT8Turbo.c @@ -1,3 +1,5 @@ +//NOTE DO NOT EDIT THIS FILE WITHOUT ALSO EDITING DFT12SMALL!!! + #include #include #include "DFT8Turbo.h" @@ -5,7 +7,6 @@ #include - #define MAX_FREQS (12) #define OCTAVES (4) @@ -15,67 +16,24 @@ You should test with extreme cases, like square wave sweeps in, etc. */ -//#define TWELVEBIT -#define EIGHTBIT - -#ifdef TWELVEBIT -//No larger than 12-bit signed values for integration or sincos -#define FRONTEND_AMPLITUDE (0) -#define INITIAL_DECIMATE (2) -#define INTEGRATOR_DECIMATE (8) -#define FINAL_DECIMATE (4) -#elif defined( EIGHTBIT ) //No larger than 8-bit signed values for integration or sincos #define FRONTEND_AMPLITUDE (2) #define INITIAL_DECIMATE (5) //Yurgh... only 3 bits of ADC data. That's 8 unique levels :( #define INTEGRATOR_DECIMATE (8) #define FINAL_DECIMATE (1) -#endif -//Right now, we need 8*freqs*octaves bytes. -//This is bad. -//What can we do to fix it? + +#define OPTABLETYPE uint16_t //Make uint8_t if on attiny. //4x the hits (sin/cos and we need to do it once for each edge) //8x for selecting a higher octave. #define FREQREBASE 8.0 #define TARGFREQ 10000.0 -/* Tradeoff guide: - - * We will optimize for RAM size here. - - * INITIAL_DECIMATE; A larger decimation: {NOTE 1} - +) Reduces the bit depth needed for the integral map. - If you use "1" and a fully saturted map (highest note is every sample), it will not overflow a signed 12-bit number. - -) Increases noise. - With full-scale: 0->1 minimal 1->2 minimal 2->3 significantly noticable, 3->4 major. - If sound is quieter, it matters more. Not sure with other changes in system. (2) seems ok. - -) If you make it (1) or (0) You can't do an 8-bit multiply and keep the output in a signed range. - Also, other things, like frequency of hits can manipulate the maximum bit depth needed for integral map. - - * If you weight the bins in advance see "mulmux", you can: {NOTE 2} - +) potentially use shallower bit depth but - -) have to compute the multiply every time you update the bin. - - * You can use a modified-square-wave which only integrates for 1/2 of the duty cycle. {NOTE 3} - +) uses 1/2 the integral memory. - -) Not as pretty of an output. See "integral_at" - - *TODO: Investigate using all unsigned (to make multiply and/or 12-bit storage easier) - *TODO: Consider a mode which has 16-bit integrals, but still 8-bit cossin data. - - So, the idea here is we would keep a running total of the current ADC value, kept away in a int16_t. - It is constantly summing, so we can take an integral of it. Or rather an integral range. - - Over time, we perform operations like adding or subtracting from a current place. It basically is - a DFT where the kernel is computed using square waves (or modified square waves) -*/ - //These live in RAM. -int16_t running_integral; //Realistically treat as 12-bits on ramjet8 -int16_t integral_at[MAX_FREQS*OCTAVES]; //For ramjet8, make 12-bits -int32_t cossindata[MAX_FREQS*OCTAVES*2]; //Contains COS and SIN data. (32-bit for now, will be 16-bit, potentially even 8.) +int8_t running_integral; //Realistically treat as 12-bits on ramjet8 +int8_t integral_at[MAX_FREQS*OCTAVES]; //For ramjet8, make 12-bits +int8_t cossindata[MAX_FREQS*OCTAVES*2]; //Contains COS and SIN data. (32-bit for now, will be 16-bit, potentially even 8.) uint8_t which_octave_for_op[MAX_FREQS]; //counts up, tells you which ocative you are operating on. PUT IN RAM. uint8_t actiontableplace; @@ -84,13 +42,13 @@ uint8_t actiontableplace; // 255 = DO NOT OPERATE // bits 0..3 unfolded octave, i.e. sin/cos are offset by one. // bit 4 = add or subtract. -uint8_t optable[NR_OF_OPS]; //PUT IN FLASH +OPTABLETYPE optable[NR_OF_OPS]; //PUT IN FLASH #define ACTIONTABLESIZE 256 uint16_t actiontable[ACTIONTABLESIZE]; //PUT IN FLASH // If there are more than 8 freqbins, this must be a uint16_t, otherwise if more than 16, 32. //Format is -uint8_t mulmux[MAX_FREQS]; //PUT IN FLASH +OPTABLETYPE mulmux[MAX_FREQS]; //PUT IN FLASH static int Setup( float * frequencies, int bins ) { @@ -199,117 +157,112 @@ void Turbo8BitRun( int8_t adcval ) if( adcv < -128 ) adcv = -128; running_integral += adcv>>INITIAL_DECIMATE; -#define dprintf( ... ) + uint16_t action = actiontable[actiontableplace++]; + uint8_t n; - uint32_t action = actiontable[actiontableplace++]; - int n; - dprintf( "%4d ", actiontableplace ); - for( n = 0; n < MAX_FREQS; n++ ) + //Counts are approximate counts for PMS133 + + for( n = 0; //1CYC + n < MAX_FREQS; //2CYC + n++, //1CYC + action>>=1 //2CYC + ) { - if( action & (1<= NR_OF_OPS ) ao = 0; //2CYC + which_octave_for_op[n] = ao; //2CYC (idxm) + + uint8_t op = optable[ao]; //"theoretically" 3CYC (if you align things right) + //1CYC (Put A into specific RAM location) + + //If we are on the one thing we aren't supposed to operate within, cancel. + if( op == 255 ) continue; //2CYC (if op is in A) + + //Tricky: We share the integral with SIN and COS. + //We don't need to. It would produce a slightly cleaner signal. See: NOTE 3 + uint8_t octave = op & 0xf; //1CYC (if op is in A) + + + uint8_t intindex = octave * MAX_FREQS //Load mulop with 12 [2CYC]; mul [1CYC] + + n; //Add [1CYC] + //[1CYC] more cycle to write A into RAM[(intindex) + //int invoct = OCTAVES-1-octaveplace; + int8_t diff; + + if( op & 0x10 ) //ADD //2CYC { - int ao = which_octave_for_op[n]; - int op = optable[ao]; - ao++; - if( ao >= NR_OF_OPS ) ao = 0; - which_octave_for_op[n] = ao; + diff = integral_at[intindex] //Assume "IntIndex" is in A, add integral_at to A [1], move A to an index [1]. [2] to read into acc. [4CYC] + - running_integral; //1CYC to subtract. + //1CYC to write diff into a memory location. + } + else //SUBTRACT + { + diff = running_integral - integral_at[intindex]; + } - if( op == 255 ) - { - dprintf( "*" ); //NOP - } - else - { - //int octaveplace = op & 0xf; + //30 cycles so far. - //Tricky: We share the integral with SIN and COS. - //We don't need to. It would produce a slightly cleaner signal. See: NOTE 3 - uint8_t octave = op & 0xf; - uint8_t intindex = octave * MAX_FREQS + n; + integral_at[intindex] = running_integral; //[3CYC] - //int invoct = OCTAVES-1-octaveplace; - int16_t diff; + //if( diff > 124 || diff < -124 ) printf( "!!!!!!!!!!!! %d !!!!!!!!!!!\n", diff ); + + //uint8_t idx = ( intindex << 1 ); //Overwrite intindex. + intindex <<= 1; //1CYC - if( op & 0x10 ) //ADD - { - diff = integral_at[intindex] - running_integral; - dprintf( "%c", 'a' + (op & 0xf) ); - } - else //SUBTRACT - { - diff = running_integral - integral_at[intindex]; - dprintf( "%c", 'A' + (op & 0xf) ); - } + if( op&(1<<6) ) //2CYC + { + intindex |= 1; //1CYC + } - integral_at[intindex] = running_integral; - -#ifdef TWELVEBIT - if( diff > 2000 || diff < -2000 ) printf( "!!!!!!!!!!!! %d !!!!!!!!!!!\n", diff ); -#elif defined( EIGHTBIT ) - if( diff > 124 || diff < -124 ) printf( "!!!!!!!!!!!! %d !!!!!!!!!!!\n", diff ); -#endif - - uint8_t idx = ( intindex << 1 ); - if( op&(1<<6) ) - { - idx |= 1; - } - - //printf( "%d: %d + %d * %d >> 8 - %d\n", idx, cossindata[idx], diff, mulmux[idx/2], cossindata[idx]>>4 ); - - uint8_t mulmuxval = mulmux[n]; + uint8_t mulmuxval = mulmux[n]; //[4CYC] - //Do you live on a super lame processor? {NOTE 4} - //If you do, you might not have good signed multiply operations. So, an alternative mechanism is found here. - // +) Able to more cleanly crush to an 8-bit multiply. - // +) Gets extra bit of precision back, i.e. the sign bit is now used as a data bit. - // -) More than 1 line of C code. Requires possible double invert. + //Do you live on a super lame processor? {NOTE 4} + //If you do, you might not have good signed multiply operations. So, an alternative mechanism is found here. + // +) Able to more cleanly crush to an 8-bit multiply. + // +) Gets extra bit of precision back, i.e. the sign bit is now used as a data bit. + // -) More than 1 line of C code. Requires possible double invert. #if 1 - //Terrible processor, i.e. PMS133 - if( 0 && diff < 0 ) - { - diff *= -1; - diff >>= (OCTAVES-1-octave); + //rough processor, i.e. PMS133 + if( diff < 0 ) //[2CYC] + { + diff *= -1; //[1CYC] + diff >>= (OCTAVES-1-octave); // ???TRICKY??? + //if( diff > 250 ) printf( "!!!!!!!**** %d ****!!!!!!!\n", diff ); - if( diff > 250 ) printf( "!!!!!!!**** %d ****!!!!!!!\n", diff ); - - diff = (uint16_t)diff * (uint16_t)mulmuxval; - diff >>= INTEGRATOR_DECIMATE; - - diff *= -1; - } - else - { - diff >>= (OCTAVES-1-octave); - - if( diff > 250 ) printf( "!!!!!!!**** %d ****!!!!!!!\n", diff ); - - diff = (uint16_t)diff * (uint16_t)mulmuxval; - diff >>= INTEGRATOR_DECIMATE; - } -#else - //Decent processor, i.e. ATTiny85. - diff = ((diff>>(OCTAVES-1-octave)) * mulmuxval ) >> 6; -#endif - cossindata[idx] = cossindata[idx] - + diff - - (cossindata[idx]>>4) - ; - -#ifdef EIGHTBIT - if( cossindata[idx] > 0 ) cossindata[idx]--; - if( cossindata[idx] < 0 ) cossindata[idx]++; -#endif - } + diff = ((uint16_t)diff * (uint16_t)mulmuxval)>>INTEGRATOR_DECIMATE; //[3CYC] + diff *= -1; //[1CYC] } else { - dprintf( " " ); - } - } - dprintf( "\n" ); + diff >>= (OCTAVES-1-octave); + //if( diff > 250 ) printf( "!!!!!!!**** %d ****!!!!!!!\n", diff ); + diff = ((uint16_t)diff * (uint16_t)mulmuxval)>>INTEGRATOR_DECIMATE; + } + //@48 cycles :( :( :( + +#else + //Decent processor, i.e. ATTiny85. + diff = ((diff>>(OCTAVES-1-octave)) * mulmuxval ) >> 6; +#endif + //printf( "%d\n", diff ); + + cossindata[intindex] = cossindata[intindex] + + diff + - (cossindata[intindex]>>4) + ; + + if( cossindata[intindex] > 0 ) cossindata[intindex]--; + if( cossindata[intindex] < 0 ) cossindata[intindex]++; + } } @@ -344,13 +297,9 @@ void DoDFT8BitTurbo( float * outbins, float * frequencies, int bins, const float { outbins[i] = sqrt((float)mux)/50.0; -#ifdef TWELVEBIT - if( abs( cossindata[i*2+0] ) > 1000 || abs( cossindata[i*2+1] ) > 1000 ) - printf( "CS OVF %d/%d/%d/%f\n", i, cossindata[i*2+0], cossindata[i*2+1],outbins[i] ); -#elif defined( EIGHTBIT ) - if( abs( cossindata[i*2+0] ) > 120 || abs( cossindata[i*2+1] ) > 120 ) - printf( "CS OVF %d/%d/%d/%f\n", i, cossindata[i*2+0], cossindata[i*2+1],outbins[i] ); -#endif + if( abs( cossindata[i*2+0] ) > 120 || abs( cossindata[i*2+1] ) > 120 ) + printf( "CS OVF %d/%d/%d/%f\n", i, cossindata[i*2+0], cossindata[i*2+1],outbins[i] ); + } } #endif