First inroads to turbo8
This commit is contained in:
parent
8e628ab602
commit
b9dc46c701
|
@ -17,7 +17,7 @@ LDLIBS:=-lpthread -lasound -lm -lpulse-simple -lpulse -ludev -lrt
|
||||||
CFLAGS:=-g -O0 -flto -Wall -ffast-math -I../embeddedcommon -I. -DICACHE_FLASH_ATTR=
|
CFLAGS:=-g -O0 -flto -Wall -ffast-math -I../embeddedcommon -I. -DICACHE_FLASH_ATTR=
|
||||||
EXTRALIBS:=-lusb-1.0
|
EXTRALIBS:=-lusb-1.0
|
||||||
|
|
||||||
colorchord : os_generic.o main.o dft.o decompose.o filter.o color.o notefinder.o util.o outdrivers.o $(RAWDRAW) $(SOUND) $(OUTS) parameters.o chash.o hook.o ../embeddedcommon/DFT32.o configs.o
|
colorchord : os_generic.o main.o dft.o decompose.o filter.o color.o notefinder.o util.o outdrivers.o $(RAWDRAW) $(SOUND) $(OUTS) parameters.o chash.o hook.o ../embeddedcommon/DFT32.o configs.o ../embeddedcommon/DFT8Turbo.o
|
||||||
gcc -o $@ $^ $(CFLAGS) $(LDLIBS) $(EXTRALIBS) $(RAWDRAWLIBS)
|
gcc -o $@ $^ $(CFLAGS) $(LDLIBS) $(EXTRALIBS) $(RAWDRAWLIBS)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -58,8 +58,9 @@ octaves = 5
|
||||||
# 1 = DFT Progressive
|
# 1 = DFT Progressive
|
||||||
# 2 = DFT Progressive Integer
|
# 2 = DFT Progressive Integer
|
||||||
# 3 = DFT Progressive Integer Skippy
|
# 3 = DFT Progressive Integer Skippy
|
||||||
# 4 = Integer, 32-Bit, Progressive, Skippy.
|
# 4 = Integer, 32-Bit, Progressive, Skippy. (wow, this actually works)
|
||||||
do_progressive_dft = 4
|
# 5 = 8-bit turbo test.
|
||||||
|
do_progressive_dft = 5
|
||||||
|
|
||||||
|
|
||||||
filter_iter = 2
|
filter_iter = 2
|
||||||
|
|
|
@ -11,6 +11,7 @@
|
||||||
#include "filter.h"
|
#include "filter.h"
|
||||||
#include "decompose.h"
|
#include "decompose.h"
|
||||||
#include "DFT32.h"
|
#include "DFT32.h"
|
||||||
|
#include "DFT8Turbo.h"
|
||||||
|
|
||||||
struct NoteFinder * CreateNoteFinder( int spsRec )
|
struct NoteFinder * CreateNoteFinder( int spsRec )
|
||||||
{
|
{
|
||||||
|
@ -199,6 +200,9 @@ void RunNoteFinder( struct NoteFinder * nf, const float * audio_stream, int head
|
||||||
case 4:
|
case 4:
|
||||||
DoDFTProgressive32( dftbins, nf->frequencies, freqs, audio_stream, head, buffersize, nf->dft_q, nf->dft_speedup );
|
DoDFTProgressive32( dftbins, nf->frequencies, freqs, audio_stream, head, buffersize, nf->dft_q, nf->dft_speedup );
|
||||||
break;
|
break;
|
||||||
|
case 5:
|
||||||
|
DoDFT8BitTurbo( dftbins, nf->frequencies, freqs, audio_stream, head, buffersize, nf->dft_q, nf->dft_speedup );
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
fprintf( stderr, "Error: No DFT Seleced\n" );
|
fprintf( stderr, "Error: No DFT Seleced\n" );
|
||||||
}
|
}
|
||||||
|
|
103
colorchord2/turbo8bit.conf
Normal file
103
colorchord2/turbo8bit.conf
Normal file
|
@ -0,0 +1,103 @@
|
||||||
|
# This is the configuration file for colorchord.
|
||||||
|
# Most values are already defaulted in the software.
|
||||||
|
# This file is constantly checked for new versions.
|
||||||
|
# \r, and ; are used as terminators, so you can put
|
||||||
|
# multiple entries on the same line.
|
||||||
|
|
||||||
|
#Whether to limit the control loop to ~60ish FPS.
|
||||||
|
cpu_autolimit = 1
|
||||||
|
|
||||||
|
#General GUI properties.
|
||||||
|
title = PA Test
|
||||||
|
set_screenx = 720
|
||||||
|
set_screeny = 480
|
||||||
|
|
||||||
|
#Sound properties.
|
||||||
|
buffer = 384
|
||||||
|
play = 0
|
||||||
|
rec = 1
|
||||||
|
channels = 2
|
||||||
|
samplerate = 16000
|
||||||
|
wininput = -1
|
||||||
|
|
||||||
|
#Compiled version will default this.
|
||||||
|
#sound_source = ALSA
|
||||||
|
#-1 indicates left and right, 0 left, 1 right.
|
||||||
|
|
||||||
|
sample_channel = -1
|
||||||
|
sourcename = default
|
||||||
|
#alsa_output.pci-0000_00_1f.3.analog-stereo.monitor
|
||||||
|
#default
|
||||||
|
# alsa_output.pci-0000_00_1b.0.analog-stereo.monitor
|
||||||
|
#alsa_output.pci-0000_00_1f.3.analog-stereo.monitor << New laptop
|
||||||
|
#use pactl list | grep pci- | grep monitor
|
||||||
|
|
||||||
|
##################################
|
||||||
|
# General ColorChord properties. #
|
||||||
|
##################################
|
||||||
|
|
||||||
|
# How much to amplify the incoming signal.
|
||||||
|
amplify = 2.0
|
||||||
|
|
||||||
|
# What is the base note? I.e. the lowest note.
|
||||||
|
# Note that it won't have very much impact until an octave up though!
|
||||||
|
base_hz = 110
|
||||||
|
|
||||||
|
# This is only used when dealing with the slow decompose (now defunct)
|
||||||
|
# decompose_iterations = 1000
|
||||||
|
# default_sigma = 1.4000
|
||||||
|
|
||||||
|
# DFT properties for the DFT up top.
|
||||||
|
dft_iir = 0.6
|
||||||
|
dft_q = 20.0000
|
||||||
|
dft_speedup = 1000.0000
|
||||||
|
octaves = 5
|
||||||
|
|
||||||
|
# Should we use a progressive DFT?
|
||||||
|
# 0 = DFT Quick
|
||||||
|
# 1 = DFT Progressive
|
||||||
|
# 2 = DFT Progressive Integer
|
||||||
|
# 3 = DFT Progressive Integer Skippy
|
||||||
|
# 4 = Integer, 32-Bit, Progressive, Skippy. (wow, this actually works)
|
||||||
|
# 5 = 8-bit turbo test.
|
||||||
|
do_progressive_dft = 5
|
||||||
|
|
||||||
|
|
||||||
|
filter_iter = 2
|
||||||
|
filter_strength = .5
|
||||||
|
|
||||||
|
# How many bins per octave to use?
|
||||||
|
freqbins = 24
|
||||||
|
|
||||||
|
# For the final note information... How much to slack everything?
|
||||||
|
note_attach_amp_iir = 0.3500
|
||||||
|
note_attach_amp_iir2 = 0.250
|
||||||
|
note_attach_freq_iir = 0.3000
|
||||||
|
|
||||||
|
#How many bins a note can jump from frame to frame to be considered a slide.
|
||||||
|
#this is used to prevent notes from popping in and out a lot.
|
||||||
|
note_combine_distance = 0.5000
|
||||||
|
note_jumpability = 1.8000
|
||||||
|
note_minimum_new_distribution_value = 0.0200
|
||||||
|
note_out_chop = 0.05000
|
||||||
|
|
||||||
|
#compress_coefficient = 4.0
|
||||||
|
#compress_exponent = .5
|
||||||
|
|
||||||
|
|
||||||
|
#=======================================================================
|
||||||
|
#Outputs
|
||||||
|
|
||||||
|
|
||||||
|
This is a vornoi thing:
|
||||||
|
outdrivers = OutputVoronoi, DisplayArray
|
||||||
|
lightx = 64
|
||||||
|
lighty = 32
|
||||||
|
fromsides = 1
|
||||||
|
shape_cutoff = 0.03
|
||||||
|
satamp = 5.000
|
||||||
|
amppow = 2.510
|
||||||
|
distpow = 1.500
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -353,6 +353,3 @@ void DoDFTProgressive32( float * outbins, float * frequencies, int bins, const f
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
264
embeddedcommon/DFT8Turbo.c
Normal file
264
embeddedcommon/DFT8Turbo.c
Normal file
|
@ -0,0 +1,264 @@
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "DFT8Turbo.h"
|
||||||
|
#include <math.h>
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#define MAX_FREQS (24)
|
||||||
|
#define OCTAVES (5)
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The first thought was using an integration map and only operating when we need to, to pull the data out.
|
||||||
|
* Now we're doing the thing below this block comment
|
||||||
|
int16_t accumulated_total; //2 bytes
|
||||||
|
int16_t last_accumulated_total_at_bin[MAX_FREQS*2]; //24 * 2 * sizeof(int16_t) = 96 bytes.
|
||||||
|
uint8_t current_time; //1 byte
|
||||||
|
uint8_t placecode[MAX_FREQS];
|
||||||
|
*/
|
||||||
|
//OK... We don't have enough ram to sum everything... can we do something wacky with multiple ocatives to sum everything better?
|
||||||
|
//i.e.
|
||||||
|
//
|
||||||
|
// 4332322132212210
|
||||||
|
//
|
||||||
|
// ++++++++++++++++-----------------
|
||||||
|
// ++++++++--------
|
||||||
|
// ++++----++++----
|
||||||
|
// ++--++--++--++--
|
||||||
|
// +-+-+-+-+-+-+-+-
|
||||||
|
//
|
||||||
|
// Don't forget we need to do this for sin and cos.
|
||||||
|
// Can we instead of making this plusses, make it a multiplier?
|
||||||
|
// How can we handle sin+cos?
|
||||||
|
//
|
||||||
|
// Is it possible to do this for every frame? I.e. for each of the 24 notes, multiply with their current place in table?
|
||||||
|
// That's interesting. It's not like a sin table.
|
||||||
|
// There is no "multiply" in the attiny instruction set for attiny85.
|
||||||
|
// There is, however for attiny402
|
||||||
|
|
||||||
|
//Question: Can we do five octaves, or does this need to be balanced?
|
||||||
|
//Question2: Should we weight higher octaves?
|
||||||
|
|
||||||
|
|
||||||
|
//ATTiny402: 256x8 RAM, 4096x8 FLASH LPM: 3 cycles + FMUL: 2 cycles << Do stacked sin waves?
|
||||||
|
//ATtiny85: 512x8 RAM, 8192x8 FLASH LPM: 3 cycles + NO MULTIPLY << Do square waves?
|
||||||
|
|
||||||
|
|
||||||
|
/* Approaches:
|
||||||
|
|
||||||
|
on ATtiny402: Stacked sin approach.
|
||||||
|
Say 16 MHz, though 12 MHz is interesting...
|
||||||
|
16k SPS: 1k cycles per; say 24 bins per; 41 cycles per bin = hard. But is it too hard?
|
||||||
|
20 cycles per s/c.
|
||||||
|
read place in stacked table (8? bits) 3 cycles
|
||||||
|
|
||||||
|
//Inner loop = 17 cycles.
|
||||||
|
read stacked table (8 bits), 3 cycles
|
||||||
|
fractional multiply table with current value. 2 cycles
|
||||||
|
read current running for note 2 cycles (LDS = 3 cycles)
|
||||||
|
subtract a shifted version, to make it into an IIR. (4 cycles)
|
||||||
|
add in current values. (2 cycles)
|
||||||
|
store data back to ram (2 cycles)
|
||||||
|
advance place in stacked table (8?bits) 1 cycle
|
||||||
|
|
||||||
|
store place in stacked table (8? bits) 3 cycles?
|
||||||
|
|
||||||
|
//What if we chunk ADC updates into groups of 4 or 8?
|
||||||
|
//This is looking barely possible.
|
||||||
|
|
||||||
|
on attiny85: scheduled adds/subtracts (like a stacked-square-wave-table)
|
||||||
|
//XXX TODO!
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Ok... Let's think about the ATTiny402. 256x8 RAM + 4096x8 FLASH.
|
||||||
|
|
||||||
|
* We can create a table which has all octaves overlaid.
|
||||||
|
* We would need to keep track of:
|
||||||
|
* 12 x 2 x 2 = 48 bytes = Current sin/cos values.
|
||||||
|
* 12 x 2 = 24 bytes = Current place in table. = 72 bytes
|
||||||
|
* We would need to store:
|
||||||
|
* The layered lookup table. If possible, keep @ 256 bytes to simplify math ops.
|
||||||
|
* The speed by which each note needs to advance.
|
||||||
|
* We would need to:
|
||||||
|
* Read current running place. X 8 cycles
|
||||||
|
* Use that place to look up into sin table. 3 cycles
|
||||||
|
* Read running val 4 cycles best case
|
||||||
|
* Multiply out the sin + IIR 5 cycles
|
||||||
|
* Store running val 4 cycles best case
|
||||||
|
* Cos-advance that place to look up into sin table. 4 cycles
|
||||||
|
* Read running val 4 cycles best case
|
||||||
|
* Multiply out the sin + IIR 5 cycles
|
||||||
|
* Store running val 4 cycles best case.
|
||||||
|
* Read how much to advance X by. 4 cycles
|
||||||
|
* (Cos^2+Sin^2) 8?
|
||||||
|
* Store it. 4 cycles best case.
|
||||||
|
* = 48 x 12 = 576 cycles. Assume 10 MHz @ 16k SPS. We're OK (625 samples)
|
||||||
|
*/
|
||||||
|
|
||||||
|
// Observation: The two tables are actually mirror images of each other, well diagonally mirrored. That's odd. But, would take CPU to exploit.
|
||||||
|
|
||||||
|
#define SSTABLESIZE 256
|
||||||
|
int8_t spikysin_interleved_cos[SSTABLESIZE*2];
|
||||||
|
uint16_t advancespeed[MAX_FREQS];
|
||||||
|
|
||||||
|
static int CompTableWithPhase( int nelements, float phase, int scaling )
|
||||||
|
{
|
||||||
|
int highest = 0;
|
||||||
|
int i;
|
||||||
|
for( i = 0; i < nelements; i++ )
|
||||||
|
{
|
||||||
|
float taued = i * 3.141592 * 2.0 / nelements;
|
||||||
|
int o;
|
||||||
|
float combsin = 0;
|
||||||
|
for( o = 0; o < OCTAVES; o++ )
|
||||||
|
{
|
||||||
|
combsin += sin( taued * (1<<o) + phase);
|
||||||
|
}
|
||||||
|
combsin /= OCTAVES;
|
||||||
|
int csadapt = combsin * scaling; //No value is higher with five octaves. XXX TODO Lookout. If you change # of octaves, need to change this, too.
|
||||||
|
|
||||||
|
if( csadapt > highest ) highest = csadapt;
|
||||||
|
if( -csadapt > highest ) highest = -csadapt;
|
||||||
|
|
||||||
|
if( csadapt > 127 ) csadapt = 127;
|
||||||
|
if( csadapt < -127 ) csadapt = -127; //tricky: Keep balanced.
|
||||||
|
spikysin_interleved_cos[i*2+0] = csadapt;
|
||||||
|
|
||||||
|
float combcos = 0;
|
||||||
|
for( o = 0; o < OCTAVES; o++ )
|
||||||
|
{
|
||||||
|
combcos += cos( taued * (1<<o) + phase );
|
||||||
|
}
|
||||||
|
combcos /= OCTAVES;
|
||||||
|
csadapt = combcos * scaling; //No value is higher with five octaves. XXX TODO Lookout. If you change # of octaves, need to change this, too.
|
||||||
|
|
||||||
|
if( csadapt > highest ) highest = csadapt;
|
||||||
|
if( -csadapt > highest ) highest = -csadapt;
|
||||||
|
|
||||||
|
if( csadapt > 127 ) csadapt = 127;
|
||||||
|
if( csadapt < -127 ) csadapt = -127; //tricky: Keep balanced.
|
||||||
|
spikysin_interleved_cos[i*2+1] = csadapt;
|
||||||
|
}
|
||||||
|
return highest;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static int Setup( float * frequencies, int bins )
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
//Since start position/phase is arbitrary, we should try several to see which gives us the best dynamic range.
|
||||||
|
float tryphase = 0;
|
||||||
|
|
||||||
|
float bestphase = 0;
|
||||||
|
int highest_val_at_best_phase = 1000000;
|
||||||
|
|
||||||
|
for( tryphase = 0; tryphase < 3.14159; tryphase += 0.001 )
|
||||||
|
{
|
||||||
|
int highest = CompTableWithPhase( SSTABLESIZE, tryphase, 65536 );
|
||||||
|
if( highest < highest_val_at_best_phase )
|
||||||
|
{
|
||||||
|
highest_val_at_best_phase = highest;
|
||||||
|
bestphase = tryphase;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
printf( "Best comp: %f : %d\n", bestphase, highest_val_at_best_phase );
|
||||||
|
|
||||||
|
CompTableWithPhase( SSTABLESIZE, bestphase, (65536*128)/highest_val_at_best_phase );
|
||||||
|
|
||||||
|
// for( i = 0; i < SSTABLESIZE; i++ )
|
||||||
|
// {
|
||||||
|
// printf( "%d %d\n", spikysin_interleved_cos[i*2+0], spikysin_interleved_cos[i*2+1] );
|
||||||
|
// }
|
||||||
|
|
||||||
|
for( i = 0; i < MAX_FREQS; i++ )
|
||||||
|
{
|
||||||
|
//frequencies[i] = SPS / Freq
|
||||||
|
// Need to decide how quickly we sweep through the table.
|
||||||
|
advancespeed[i] = 256.0 /* fixed point */ * 256.0 /* size of table */ / frequencies[i];
|
||||||
|
//printf( "%f\n", frequencies[i] );
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
uint8_t spikysin_interleved_cos[256*2];
|
||||||
|
uint16_t advancespeed[MAX_FREQS];
|
||||||
|
*/
|
||||||
|
|
||||||
|
float toutbins[MAX_FREQS];
|
||||||
|
|
||||||
|
struct notedat
|
||||||
|
{
|
||||||
|
uint16_t time;
|
||||||
|
int32_t sinm;
|
||||||
|
int32_t cosm;
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct notedat nd[MAX_FREQS];
|
||||||
|
|
||||||
|
void Turbo8BitRun( int8_t adcval )
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
for( i = 0; i < MAX_FREQS; i++ )
|
||||||
|
{
|
||||||
|
uint16_t ct = nd[i].time;
|
||||||
|
int32_t muxres;
|
||||||
|
int32_t running;
|
||||||
|
int32_t rdesc, rdess;
|
||||||
|
int8_t ss = spikysin_interleved_cos[(ct>>8) + 0];
|
||||||
|
muxres = ((int16_t)adcval * ss) >> 8;
|
||||||
|
running = nd[i].cosm;
|
||||||
|
running += muxres;
|
||||||
|
rdesc = running >> 8;
|
||||||
|
running -= rdesc>>6;
|
||||||
|
nd[i].cosm = running;
|
||||||
|
|
||||||
|
int8_t sc = spikysin_interleved_cos[(ct>>8) + 1];
|
||||||
|
muxres = ((int16_t)adcval * sc) >> 8;
|
||||||
|
running = nd[i].sinm;
|
||||||
|
running += muxres;
|
||||||
|
rdess = running>>8;
|
||||||
|
running -= rdess>>6;
|
||||||
|
nd[i].sinm = running;
|
||||||
|
|
||||||
|
nd[i].time = ct + advancespeed[i];
|
||||||
|
toutbins[i] = rdess * rdess + rdesc * rdesc;
|
||||||
|
//printf( "%d %d = %f %p\n", rdess, rdesc, toutbins[i], &toutbins[i] );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void DoDFT8BitTurbo( float * outbins, float * frequencies, int bins, const float * databuffer, int place_in_data_buffer, int size_of_data_buffer, float q, float speedup )
|
||||||
|
{
|
||||||
|
static int is_setup;
|
||||||
|
if( !is_setup ) { is_setup = 1; Setup( frequencies, bins ); }
|
||||||
|
static int last_place;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for( i = last_place; i != place_in_data_buffer; i = (i+1)%size_of_data_buffer )
|
||||||
|
{
|
||||||
|
int16_t ifr1 = (int16_t)( ((databuffer[i]) ) * 4095 );
|
||||||
|
//ifr1 += 4095;
|
||||||
|
Turbo8BitRun( ifr1>>5 );
|
||||||
|
}
|
||||||
|
|
||||||
|
for( i = 0; i < bins; i++ )
|
||||||
|
{
|
||||||
|
outbins[i] = 0;
|
||||||
|
}
|
||||||
|
for( i = 0; i < MAX_FREQS; i++ )
|
||||||
|
{
|
||||||
|
int iss = nd[i].sinm;
|
||||||
|
int isc = nd[i].cosm;
|
||||||
|
int mux = iss * iss + isc * isc;
|
||||||
|
if( mux == 0 ) mux = 1;
|
||||||
|
outbins[i+MAX_FREQS] = sqrt(mux)/1000.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
9
embeddedcommon/DFT8Turbo.h
Normal file
9
embeddedcommon/DFT8Turbo.h
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
#ifndef _DFT8TURBO_H
|
||||||
|
#define _DFT8TURBO_H
|
||||||
|
|
||||||
|
/* Note: Frequencies must be precompiled. */
|
||||||
|
|
||||||
|
void DoDFT8BitTurbo( float * outbins, float * frequencies, int bins, const float * databuffer, int place_in_data_buffer, int size_of_data_buffer, float q, float speedup );
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
Loading…
Reference in a new issue