First inroads to turbo8
This commit is contained in:
		
							parent
							
								
									8e628ab602
								
							
						
					
					
						commit
						b9dc46c701
					
				
					 7 changed files with 384 additions and 6 deletions
				
			
		|  | @ -17,7 +17,7 @@ LDLIBS:=-lpthread -lasound -lm -lpulse-simple -lpulse -ludev -lrt | |||
| CFLAGS:=-g -O0 -flto -Wall -ffast-math -I../embeddedcommon -I. -DICACHE_FLASH_ATTR= | ||||
| EXTRALIBS:=-lusb-1.0 | ||||
| 
 | ||||
| colorchord : os_generic.o main.o  dft.o decompose.o filter.o color.o notefinder.o util.o outdrivers.o $(RAWDRAW) $(SOUND) $(OUTS) parameters.o chash.o hook.o ../embeddedcommon/DFT32.o configs.o | ||||
| colorchord : os_generic.o main.o  dft.o decompose.o filter.o color.o notefinder.o util.o outdrivers.o $(RAWDRAW) $(SOUND) $(OUTS) parameters.o chash.o hook.o ../embeddedcommon/DFT32.o configs.o ../embeddedcommon/DFT8Turbo.o | ||||
| 	gcc -o $@ $^ $(CFLAGS) $(LDLIBS) $(EXTRALIBS) $(RAWDRAWLIBS) | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
|  | @ -58,8 +58,9 @@ octaves = 5 | |||
| # 1 = DFT Progressive | ||||
| # 2 = DFT Progressive Integer	 | ||||
| # 3 = DFT Progressive Integer Skippy | ||||
| # 4 = Integer, 32-Bit, Progressive, Skippy. | ||||
| do_progressive_dft = 4 | ||||
| # 4 = Integer, 32-Bit, Progressive, Skippy. (wow, this actually works) | ||||
| # 5 = 8-bit turbo test. | ||||
| do_progressive_dft = 5 | ||||
| 
 | ||||
| 
 | ||||
| filter_iter = 2 | ||||
|  |  | |||
|  | @ -11,6 +11,7 @@ | |||
| #include "filter.h" | ||||
| #include "decompose.h" | ||||
| #include "DFT32.h" | ||||
| #include "DFT8Turbo.h" | ||||
| 
 | ||||
| struct NoteFinder * CreateNoteFinder( int spsRec ) | ||||
| { | ||||
|  | @ -199,6 +200,9 @@ void RunNoteFinder( struct NoteFinder * nf, const float * audio_stream, int head | |||
| 	case 4: | ||||
| 		DoDFTProgressive32( dftbins, nf->frequencies, freqs, audio_stream, head, buffersize, nf->dft_q, nf->dft_speedup ); | ||||
| 		break; | ||||
| 	case 5: | ||||
| 		DoDFT8BitTurbo( dftbins, nf->frequencies, freqs, audio_stream, head, buffersize, nf->dft_q, nf->dft_speedup ); | ||||
| 		break; | ||||
| 	default: | ||||
| 		fprintf( stderr, "Error: No DFT Seleced\n" ); | ||||
| 	} | ||||
|  |  | |||
							
								
								
									
										103
									
								
								colorchord2/turbo8bit.conf
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										103
									
								
								colorchord2/turbo8bit.conf
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,103 @@ | |||
| # This is the configuration file for colorchord. | ||||
| # Most values are already defaulted in the software. | ||||
| # This file is constantly checked for new versions. | ||||
| # \r, and ; are used as terminators, so you can put | ||||
| # multiple entries on the same line. | ||||
| 
 | ||||
| #Whether to limit the control loop to ~60ish FPS. | ||||
| cpu_autolimit = 1 | ||||
| 
 | ||||
| #General GUI properties. | ||||
| title = PA Test | ||||
| set_screenx = 720 | ||||
| set_screeny = 480 | ||||
| 
 | ||||
| #Sound properties. | ||||
| buffer = 384 | ||||
| play = 0 | ||||
| rec = 1 | ||||
| channels = 2 | ||||
| samplerate = 16000 | ||||
| wininput = -1 | ||||
| 
 | ||||
| #Compiled version will default this. | ||||
| #sound_source = ALSA | ||||
| #-1 indicates left and right, 0 left, 1 right. | ||||
| 
 | ||||
| sample_channel = -1 | ||||
| sourcename = default | ||||
| #alsa_output.pci-0000_00_1f.3.analog-stereo.monitor | ||||
| #default | ||||
| # alsa_output.pci-0000_00_1b.0.analog-stereo.monitor | ||||
| #alsa_output.pci-0000_00_1f.3.analog-stereo.monitor << New laptop | ||||
| #use pactl list | grep pci- | grep monitor | ||||
| 
 | ||||
| ################################## | ||||
| # General ColorChord properties. # | ||||
| ################################## | ||||
| 
 | ||||
| # How much to amplify the incoming signal. | ||||
| amplify = 2.0 | ||||
| 
 | ||||
| # What is the base note?  I.e. the lowest note.  | ||||
| # Note that it won't have very much impact until an octave up though! | ||||
| base_hz = 110 | ||||
| 
 | ||||
| # This is only used when dealing with the slow decompose (now defunct) | ||||
| # decompose_iterations = 1000 | ||||
| # default_sigma = 1.4000 | ||||
| 
 | ||||
| # DFT properties for the DFT up top. | ||||
| dft_iir = 0.6 | ||||
| dft_q = 20.0000 | ||||
| dft_speedup = 1000.0000 | ||||
| octaves = 5 | ||||
| 
 | ||||
| # Should we use a progressive DFT? | ||||
| # 0 = DFT Quick | ||||
| # 1 = DFT Progressive | ||||
| # 2 = DFT Progressive Integer	 | ||||
| # 3 = DFT Progressive Integer Skippy | ||||
| # 4 = Integer, 32-Bit, Progressive, Skippy. (wow, this actually works) | ||||
| # 5 = 8-bit turbo test. | ||||
| do_progressive_dft = 5 | ||||
| 
 | ||||
| 
 | ||||
| filter_iter = 2 | ||||
| filter_strength = .5 | ||||
| 
 | ||||
| # How many bins per octave to use? | ||||
| freqbins = 24 | ||||
| 
 | ||||
| # For the final note information... How much to slack everything? | ||||
| note_attach_amp_iir = 0.3500 | ||||
| note_attach_amp_iir2 = 0.250 | ||||
| note_attach_freq_iir = 0.3000 | ||||
| 
 | ||||
| #How many bins a note can jump from frame to frame to be considered a slide. | ||||
| #this is used to prevent notes from popping in and out a lot. | ||||
| note_combine_distance = 0.5000 | ||||
| note_jumpability = 1.8000 | ||||
| note_minimum_new_distribution_value = 0.0200 | ||||
| note_out_chop = 0.05000 | ||||
| 
 | ||||
| #compress_coefficient = 4.0 | ||||
| #compress_exponent = .5 | ||||
| 
 | ||||
| 
 | ||||
| #======================================================================= | ||||
| #Outputs | ||||
| 
 | ||||
| 
 | ||||
| This is a vornoi thing:  | ||||
| outdrivers = OutputVoronoi, DisplayArray | ||||
| lightx = 64 | ||||
| lighty = 32 | ||||
| fromsides = 1 | ||||
| shape_cutoff = 0.03 | ||||
| satamp = 5.000 | ||||
| amppow = 2.510 | ||||
| distpow = 1.500 | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
|  | @ -353,6 +353,3 @@ void DoDFTProgressive32( float * outbins, float * frequencies, int bins, const f | |||
| 
 | ||||
| #endif | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
							
								
								
									
										264
									
								
								embeddedcommon/DFT8Turbo.c
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										264
									
								
								embeddedcommon/DFT8Turbo.c
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,264 @@ | |||
| #include <stdint.h> | ||||
| #include <stdlib.h> | ||||
| #include "DFT8Turbo.h" | ||||
| #include <math.h> | ||||
| 
 | ||||
| #include <stdio.h> | ||||
| 
 | ||||
| #define MAX_FREQS (24) | ||||
| #define OCTAVES   (5) | ||||
| 
 | ||||
| 
 | ||||
| /*
 | ||||
| 	* The first thought was using an integration map and only operating when we need to, to pull the data out. | ||||
| 	* Now we're doing the thing below this block comment | ||||
| 		int16_t accumulated_total;							//2 bytes
 | ||||
| 		int16_t last_accumulated_total_at_bin[MAX_FREQS*2];  //24 * 2 * sizeof(int16_t) = 96 bytes.
 | ||||
| 		uint8_t current_time;								//1 byte
 | ||||
| 		uint8_t placecode[MAX_FREQS]; | ||||
| */ | ||||
| //OK... We don't have enough ram to sum everything... can we do something wacky with multiple ocatives to sum everything better?
 | ||||
| //i.e.
 | ||||
| //
 | ||||
| // 4332322132212210
 | ||||
| //
 | ||||
| // ++++++++++++++++-----------------
 | ||||
| // ++++++++--------
 | ||||
| // ++++----++++----
 | ||||
| // ++--++--++--++--
 | ||||
| // +-+-+-+-+-+-+-+-
 | ||||
| //
 | ||||
| // Don't forget we need to do this for sin and cos.
 | ||||
| // Can we instead of making this plusses, make it a multiplier?
 | ||||
| // How can we handle sin+cos?
 | ||||
| //
 | ||||
| // Is it possible to do this for every frame?  I.e. for each of the 24 notes, multiply with their current place in table?
 | ||||
| //  That's interesting.  It's not like a sin table.
 | ||||
| // There is no "multiply" in the attiny instruction set for attiny85.
 | ||||
| // There is, however for attiny402
 | ||||
| 
 | ||||
| //Question:  Can we do five octaves, or does this need to be balanced?
 | ||||
| //Question2: Should we weight higher octaves?
 | ||||
| 
 | ||||
| 
 | ||||
| //ATTiny402: 256x8 RAM, 4096x8 FLASH  LPM: 3 cycles + FMUL: 2 cycles  << Do stacked sin waves?
 | ||||
| //ATtiny85:  512x8 RAM, 8192x8 FLASH  LPM: 3 cycles + NO MULTIPLY     << Do square waves?
 | ||||
| 
 | ||||
| 
 | ||||
| /* Approaches:
 | ||||
| 
 | ||||
|   on ATtiny402:  Stacked sin approach. | ||||
|    Say 16 MHz, though 12 MHz is interesting... | ||||
|    16k SPS: 1k cycles per; say 24 bins per; 41 cycles per bin = hard.  But is it too hard? | ||||
|    20 cycles per s/c. | ||||
| 		read place in stacked table (8? bits) 3 cycles | ||||
| 
 | ||||
| 		//Inner loop = 17 cycles.
 | ||||
| 		read stacked table (8 bits), 3 cycles | ||||
| 		fractional multiply table with current value. 2 cycles | ||||
| 		read current running for note 2 cycles  (LDS = 3 cycles) | ||||
| 		subtract a shifted version, to make it into an IIR. (4 cycles) | ||||
| 		add in current values. (2 cycles) | ||||
| 		store data back to ram (2 cycles) | ||||
| 		advance place in stacked table (8?bits) 1 cycle | ||||
| 
 | ||||
| 		store place in stacked table (8? bits) 3 cycles? | ||||
| 
 | ||||
| 	//What if we chunk ADC updates into groups of 4 or 8?
 | ||||
| 	//This is looking barely possible.
 | ||||
| 
 | ||||
| 	on attiny85: scheduled adds/subtracts (like a stacked-square-wave-table) | ||||
| 		//XXX TODO!
 | ||||
| 
 | ||||
| */ | ||||
| 
 | ||||
| /* Ok... Let's think about the ATTiny402.  256x8 RAM + 4096x8 FLASH.
 | ||||
| 
 | ||||
| 	* We can create a table which has all octaves overlaid. | ||||
| 	* We would need to keep track of: | ||||
| 		* 12 x 2 x 2 = 48 bytes = Current sin/cos values. | ||||
| 		* 12 x 2 = 24 bytes = Current place in table.  = 72 bytes | ||||
| 	* We would need to store: | ||||
| 		* The layered lookup table.  If possible, keep @ 256 bytes to simplify math ops. | ||||
| 		* The speed by which each note needs to advance. | ||||
| 	* We would need to: | ||||
| 		* Read current running place. X                8 cycles | ||||
| 		* Use that place to look up into sin table.    3 cycles | ||||
| 		* Read running val  4 cycles best case | ||||
| 		* Multiply out the sin + IIR                   5 cycles | ||||
| 		* Store running val 4 cycles best case | ||||
| 		* Cos-advance that place to look up into sin table.    4 cycles | ||||
| 		* Read running val 4 cycles best case | ||||
| 		* Multiply out the sin + IIR                   5 cycles | ||||
| 		* Store running val 4 cycles best case. | ||||
| 		* Read how much to advance X by.               4 cycles | ||||
|         * (Cos^2+Sin^2)                                8? | ||||
| 		* Store it.                                    4 cycles best case. | ||||
|         *                                                  = 48 x 12 = 576 cycles.  Assume 10 MHz @ 16k SPS.  We're OK (625 samples) | ||||
| */ | ||||
| 
 | ||||
| // Observation: The two tables are actually mirror images of each other, well diagonally mirrored.  That's odd.  But, would take CPU to exploit.
 | ||||
| 
 | ||||
| #define SSTABLESIZE 256 | ||||
| int8_t  spikysin_interleved_cos[SSTABLESIZE*2]; | ||||
| uint16_t advancespeed[MAX_FREQS]; | ||||
| 
 | ||||
| static int CompTableWithPhase( int nelements, float phase, int scaling ) | ||||
| { | ||||
| 	int highest = 0; | ||||
| 	int i; | ||||
| 	for( i = 0; i < nelements; i++ ) | ||||
| 	{ | ||||
| 		float taued = i * 3.141592 * 2.0 / nelements; | ||||
| 		int o; | ||||
| 		float combsin = 0; | ||||
| 		for( o = 0; o < OCTAVES; o++ ) | ||||
| 		{ | ||||
| 			combsin += sin( taued * (1<<o) + phase); | ||||
| 		} | ||||
| 		combsin /= OCTAVES; | ||||
| 		int csadapt =  combsin * scaling;	//No value is higher with five octaves.  XXX TODO Lookout.  If you change # of octaves, need to change this, too.
 | ||||
| 
 | ||||
| 		if( csadapt > highest ) highest = csadapt; | ||||
| 		if( -csadapt > highest ) highest = -csadapt; | ||||
| 
 | ||||
| 		if( csadapt > 127 ) csadapt = 127; | ||||
| 		if( csadapt < -127 ) csadapt = -127;  //tricky: Keep balanced.
 | ||||
| 		spikysin_interleved_cos[i*2+0] = csadapt; | ||||
| 
 | ||||
| 		float combcos = 0; | ||||
| 		for( o = 0; o < OCTAVES; o++ ) | ||||
| 		{ | ||||
| 			combcos += cos( taued * (1<<o) + phase ); | ||||
| 		} | ||||
| 		combcos /= OCTAVES; | ||||
| 		csadapt = combcos * scaling;	//No value is higher with five octaves.  XXX TODO Lookout.  If you change # of octaves, need to change this, too.
 | ||||
| 
 | ||||
| 		if( csadapt > highest ) highest = csadapt; | ||||
| 		if( -csadapt > highest ) highest = -csadapt; | ||||
| 
 | ||||
| 		if( csadapt > 127 ) csadapt = 127; | ||||
| 		if( csadapt < -127 ) csadapt = -127;  //tricky: Keep balanced.
 | ||||
| 		spikysin_interleved_cos[i*2+1] = csadapt; | ||||
| 	} | ||||
| 	return highest; | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| static int Setup( float * frequencies, int bins ) | ||||
| { | ||||
| 	int i; | ||||
| 
 | ||||
| 	//Since start position/phase is arbitrary, we should try several to see which gives us the best dynamic range.
 | ||||
| 	float tryphase = 0; | ||||
| 
 | ||||
| 	float bestphase = 0; | ||||
| 	int highest_val_at_best_phase = 1000000; | ||||
| 
 | ||||
| 	for( tryphase = 0; tryphase < 3.14159; tryphase += 0.001 ) | ||||
| 	{ | ||||
| 		int highest = CompTableWithPhase( SSTABLESIZE, tryphase, 65536 ); | ||||
| 		if( highest < highest_val_at_best_phase ) | ||||
| 		{ | ||||
| 			highest_val_at_best_phase = highest; | ||||
| 			bestphase = tryphase; | ||||
| 		} | ||||
| 	} | ||||
| 	printf( "Best comp: %f : %d\n", bestphase, highest_val_at_best_phase ); | ||||
| 
 | ||||
| 	CompTableWithPhase( SSTABLESIZE, bestphase, (65536*128)/highest_val_at_best_phase ); | ||||
| 
 | ||||
| //	for( i = 0; i < SSTABLESIZE; i++ )
 | ||||
| //	{
 | ||||
| //		printf( "%d %d\n", spikysin_interleved_cos[i*2+0], spikysin_interleved_cos[i*2+1] );
 | ||||
| //	}
 | ||||
| 
 | ||||
| 	for( i = 0; i < MAX_FREQS; i++ ) | ||||
| 	{ | ||||
| 		//frequencies[i] = SPS / Freq
 | ||||
| 		// Need to decide how quickly we sweep through the table.
 | ||||
| 		advancespeed[i] = 256.0 /* fixed point */ * 256.0 /* size of table */ / frequencies[i]; | ||||
| 		//printf( "%f\n", frequencies[i] );
 | ||||
| 	} | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| /*
 | ||||
| uint8_t  spikysin_interleved_cos[256*2]; | ||||
| uint16_t advancespeed[MAX_FREQS]; | ||||
| */ | ||||
| 
 | ||||
| float toutbins[MAX_FREQS]; | ||||
| 
 | ||||
| struct notedat | ||||
| { | ||||
| 	uint16_t time; | ||||
| 	int32_t sinm; | ||||
| 	int32_t cosm; | ||||
| }; | ||||
| 
 | ||||
| static struct notedat nd[MAX_FREQS]; | ||||
| 
 | ||||
| void Turbo8BitRun( int8_t adcval ) | ||||
| { | ||||
| 	int i; | ||||
| 	for( i = 0; i < MAX_FREQS; i++ ) | ||||
| 	{ | ||||
| 		uint16_t ct = nd[i].time; | ||||
| 		int32_t muxres; | ||||
| 		int32_t running; | ||||
| 		int32_t rdesc, rdess; | ||||
| 		int8_t  ss = spikysin_interleved_cos[(ct>>8) + 0]; | ||||
| 		muxres = ((int16_t)adcval * ss) >> 8; | ||||
| 		running = nd[i].cosm; | ||||
| 		running += muxres; | ||||
| 		rdesc = running >> 8; | ||||
| 		running -= rdesc>>6; | ||||
| 		nd[i].cosm = running; | ||||
| 
 | ||||
| 		int8_t  sc = spikysin_interleved_cos[(ct>>8) + 1]; | ||||
| 		muxres = ((int16_t)adcval * sc) >> 8; | ||||
| 		running = nd[i].sinm; | ||||
| 		running += muxres; | ||||
| 		rdess = running>>8; | ||||
| 		running -= rdess>>6; | ||||
| 		nd[i].sinm = running; | ||||
| 
 | ||||
| 		nd[i].time = ct + advancespeed[i]; | ||||
| 		toutbins[i] = rdess * rdess + rdesc * rdesc; | ||||
| 		//printf( "%d %d = %f %p\n", rdess, rdesc, toutbins[i], &toutbins[i] );
 | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| void DoDFT8BitTurbo( float * outbins, float * frequencies, int bins, const float * databuffer, int place_in_data_buffer, int size_of_data_buffer, float q, float speedup ) | ||||
| { | ||||
| 	static int is_setup; | ||||
| 	if( !is_setup ) { is_setup = 1; Setup( frequencies, bins ); } | ||||
| 	static int last_place; | ||||
| 	int i; | ||||
| 
 | ||||
| 	for( i = last_place; i != place_in_data_buffer; i = (i+1)%size_of_data_buffer ) | ||||
| 	{ | ||||
| 		int16_t ifr1 = (int16_t)( ((databuffer[i]) ) * 4095 ); | ||||
| 		//ifr1 += 4095;
 | ||||
| 		Turbo8BitRun( ifr1>>5 ); | ||||
| 	} | ||||
| 
 | ||||
| 	for( i = 0; i < bins; i++ ) | ||||
| 	{ | ||||
| 		outbins[i] = 0; | ||||
| 	} | ||||
| 	for( i = 0; i < MAX_FREQS; i++ ) | ||||
| 	{ | ||||
| 		int iss = nd[i].sinm; | ||||
| 		int isc = nd[i].cosm; | ||||
| 		int mux = iss * iss + isc * isc; | ||||
| 		if( mux == 0 ) mux = 1; | ||||
| 		outbins[i+MAX_FREQS] = sqrt(mux)/1000.0; | ||||
| 	}  | ||||
| 
 | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
							
								
								
									
										9
									
								
								embeddedcommon/DFT8Turbo.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										9
									
								
								embeddedcommon/DFT8Turbo.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,9 @@ | |||
| #ifndef _DFT8TURBO_H | ||||
| #define _DFT8TURBO_H | ||||
| 
 | ||||
| /* Note: Frequencies must be precompiled. */ | ||||
| 
 | ||||
| void DoDFT8BitTurbo( float * outbins, float * frequencies, int bins, const float * databuffer, int place_in_data_buffer, int size_of_data_buffer, float q, float speedup ); | ||||
| 
 | ||||
| #endif | ||||
| 
 | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue