diff --git a/CMakeLists.txt b/CMakeLists.txt index fc90682..6550818 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -251,7 +251,7 @@ add_test(NAME radae_tx_basic ./inference.sh model19_check3/checkpoints/checkpoint_epoch_100.pth wav/brian_g8sez.wav /dev/null \ --rate_Fs --pilots --pilot_eq --eq_ls --cp 0.004 --bottleneck 3 --auxdata --write_rx rx.f32 --correct_freq_offset; \ cat features_in.f32 | python3 radae_txe.py --model model19_check3/checkpoints/checkpoint_epoch_100.pth --txbpf > rx.f32 - cat rx.f32 | python3 radae_rxe.py --model model19_check3/checkpoints/checkpoint_epoch_100.pth -v 1 > features_txs_out.f32; \ + cat rx.f32 | python3 radae_rxe.py --model model19_check3/checkpoints/checkpoint_epoch_100.pth -v 2 > features_txs_out.f32; \ python3 loss.py features_in.f32 features_txs_out.f32 --loss_test 0.15 --acq_time_test 0.5 --clip_start 5") set_tests_properties(radae_tx_basic PROPERTIES PASS_REGULAR_EXPRESSION "PASS") @@ -291,10 +291,10 @@ add_test(NAME rx_streaming # basic test of streaming rx, run rx in vanilla and streaming, compare add_test(NAME radae_rx_basic COMMAND sh -c "cd ${CMAKE_SOURCE_DIR}; \ - ./inference.sh model17/checkpoints/checkpoint_epoch_100.pth wav/brian_g8sez.wav /dev/null \ - --EbNodB 10 --freq_offset 11 \ + ./inference.sh model19_check3/checkpoints/checkpoint_epoch_100.pth wav/brian_g8sez.wav /dev/null \ + --EbNodB 10 --freq_offset 11 --prepend_noise 1 --append_noise 1 --end_of_over --auxdata \ --rate_Fs --pilots --pilot_eq --eq_ls --cp 0.004 --bottleneck 3 --write_rx rx.f32 --correct_freq_offset; \ - cat rx.f32 | PYTHONPATH='../' python3 radae_rxe.py --model model17/checkpoints/checkpoint_epoch_100.pth -v 1 --noauxdata > features_rxs_out.f32; \ + cat rx.f32 | PYTHONPATH='../' python3 radae_rxe.py -v 2 > features_rxs_out.f32; \ python3 loss.py features_in.f32 features_rxs_out.f32 --loss_test 0.15 --acq_time_test 0.5") set_tests_properties(radae_rx_basic PROPERTIES PASS_REGULAR_EXPRESSION "PASS") @@ -309,7 +309,7 @@ add_test(NAME radae_rx_awgn --rate_Fs --pilots --pilot_eq --eq_ls --cp 0.004 --bottleneck 3 --time_offset -16 --write_rx rx.f32 \ --prepend_noise 1 --append_noise 3 --end_of_over --auxdata --correct_freq_offset; \ cat rx.f32 | python3 radae_rxe.py --model model19_check3/checkpoints/checkpoint_epoch_100.pth -v 2 > features_rx_out.f32; \ - python3 loss.py features_in.f32 features_rx_out.f32 --loss 0.3 --acq_time_test 1.0 --clip_end 100") + python3 loss.py features_in.f32 features_rx_out.f32 --loss 0.3 --acq_time_test 1.0 --clip_end 300") set_tests_properties(radae_rx_awgn PROPERTIES PASS_REGULAR_EXPRESSION "PASS") # SNR=0dB MPP @@ -488,7 +488,8 @@ add_test(NAME radae_tx_embed_c add_test(NAME radae_rx_embed_c COMMAND sh -c "cd ${CMAKE_SOURCE_DIR}; \ ./inference.sh model19_check3/checkpoints/checkpoint_epoch_100.pth wav/brian_g8sez.wav /dev/null \ - --rate_Fs --pilots --pilot_eq --eq_ls --cp 0.004 --bottleneck 3 --auxdata --write_rx rx.f32 --correct_freq_offset; \ + --EbNodB 100 --freq_offset 0 --append_noise 1 --end_of_over --auxdata \ + --rate_Fs --pilots --pilot_eq --eq_ls --cp 0.004 --bottleneck 3 --write_rx rx.f32 --correct_freq_offset; \ cat rx.f32 | PYTHONPATH='.' ${CMAKE_CURRENT_BINARY_DIR}/src/radae_rx > features_out.f32; python3 loss.py features_in.f32 features_out.f32 --loss_test 0.15 --acq_time_test 0.5") set_tests_properties(radae_rx_embed_c PROPERTIES PASS_REGULAR_EXPRESSION "PASS") @@ -551,6 +552,41 @@ add_test(NAME c_decoder_aux_mpp python3 loss.py features_in.f32 features_c.f32 --loss 0.3 --clip_start 300") set_tests_properties(c_decoder_aux_mpp PROPERTIES PASS_REGULAR_EXPRESSION "PASS") +# EOO data ------------------------------------------------------------------------------------------- + +# Pythion Tx & Rx, no noise +add_test(NAME radae_eoo_data_py + COMMAND sh -c "cd ${CMAKE_SOURCE_DIR}; \ + ${CMAKE_CURRENT_BINARY_DIR}/src/lpcnet_demo -features wav/brian_g8sez.wav features_in.f32; \ + cat features_in.f32 | python3 radae_txe.py --eoo_data_test > rx.f32; \ + cat rx.f32 | python3 radae_rxe.py -v 2 --eoo_data_test > /dev/null") + set_tests_properties(radae_eoo_data_py PROPERTIES PASS_REGULAR_EXPRESSION "PASS") + +# C tx & rx, no noise. Note Python radae_txe.py just generates eoo_tx.f32 for C radae_tx +add_test(NAME radae_eoo_data_c + COMMAND sh -c "cd ${CMAKE_SOURCE_DIR}; \ + ${CMAKE_CURRENT_BINARY_DIR}/src/lpcnet_demo -features wav/brian_g8sez.wav features_in.f32; \ + cat features_in.f32 | python3 radae_txe.py --eoo_data_test > /dev/null; \ + cat features_in.f32 | PYTHONPATH='.' ${CMAKE_CURRENT_BINARY_DIR}/src/radae_tx > rx.f32; \ + cat rx.f32 | PYTHONPATH='.' ${CMAKE_CURRENT_BINARY_DIR}/src/radae_rx > /dev/null; \ + python3 eoo_ber.py eoo_tx.f32 eoo_rx.f32") + set_tests_properties(radae_eoo_data_c PROPERTIES PASS_REGULAR_EXPRESSION "PASS") + +# C tx & rx, over a multipath channel, we set up 5 "overs", each with an EOO chunk of data. Just one of them needs +# to have a BER < 5% for a PASS. About 6dB SNR +add_test(NAME radae_eoo_data_mpp + COMMAND sh -c "cd ${CMAKE_SOURCE_DIR}; test/make_g.sh; \ + ${CMAKE_CURRENT_BINARY_DIR}/src/lpcnet_demo -features wav/brian_g8sez.wav features_in.f32; \ + cat features_in.f32 | python3 radae_txe.py --eoo_data_test > /dev/null; \ + cat features_in.f32 | PYTHONPATH='.' ${CMAKE_CURRENT_BINARY_DIR}/src/radae_tx > tx.f32; \ + cat tx.f32 tx.f32 tx.f32 tx.f32 tx.f32 > tx_2.f32; + cat tx_2.f32 | python3 f32toint16.py --real --scale 8192 > tx.raw; \ + ${CODEC2_DEV_BUILD_DIR}/src/ch tx.raw rx.raw --No -24 --mpp --fading_dir .; \ + cat rx.raw | python3 int16tof32.py --zeropad > rx.f32; \ + cat rx.f32 | PYTHONPATH='.' ${CMAKE_CURRENT_BINARY_DIR}/src/radae_rx > /dev/null; \ + python3 eoo_ber.py eoo_tx.f32 eoo_rx.f32") + set_tests_properties(radae_eoo_data_mpp PROPERTIES PASS_REGULAR_EXPRESSION "PASS") + # BBFM ----------------------------------------------------------------------------------------------- # single carrier modem internal (inside single_carrier class) tests diff --git a/README.md b/README.md index 023b9b3..673fb43 100644 --- a/README.md +++ b/README.md @@ -51,7 +51,6 @@ The RDOVAE derived Python source code is released under the two-clause BSD licen | stateful_encoder.[py,sh] | Inference test that compares stateful to vanilla encoder | | radae_tx.[py,sh] | streaming RADAE encoder and helper script | | radae_rx.[py,sh] | streaming RADAE decoder and helper script | -| resource_est.py | WIP estimate CPU/memory resources | | radae_base.py | Shared ML code between models | | radae/bbfm.py | Baseband FM PyTorch model | | train_bbfm.py | Training for BBFM model | diff --git a/eoo_ber.py b/eoo_ber.py new file mode 100644 index 0000000..554a437 --- /dev/null +++ b/eoo_ber.py @@ -0,0 +1,18 @@ +import sys +import numpy as np + +# bits are in float form, e.g. +/-1 or +/-1000 +tx_bits = np.fromfile(sys.argv[1], dtype=np.float32) +rx_bits = np.fromfile(sys.argv[2], dtype=np.float32) +bits_per_frame = len(tx_bits) +n_frames = len(rx_bits)//bits_per_frame +n_ok_frames = 0 +for f in range(n_frames): + n_errors = sum(rx_bits[f*bits_per_frame:(f+1)*bits_per_frame]*tx_bits < 0) + ber = n_errors/bits_per_frame + print(f"frame received! BER: {ber:5.2f}") + if ber < 0.05: + n_ok_frames += 1 +print(f"EOO frames received: {n_frames} n_ok_frames: {n_ok_frames}", file=sys.stderr) +if n_ok_frames: + print("PASS", file=sys.stderr) diff --git a/inference.py b/inference.py index f66a18c..75f148b 100644 --- a/inference.py +++ b/inference.py @@ -57,7 +57,7 @@ parser.add_argument('--rx_gain', type=float, default=1.0, help='gain to apply to --write_rx samples (default 1.0)') parser.add_argument('--write_tx', type=str, default="", help='path to output file of rate Fs tx samples in ..IQIQ...f32 format') parser.add_argument('--phase_offset', type=float, default=0, help='phase offset in rads') -parser.add_argument('--freq_offset', type=float, help='freq offset in Hz') +parser.add_argument('--freq_offset', type=float, default=0, help='freq offset in Hz') parser.add_argument('--time_offset', type=int, default=0, help='sampling time offset in samples') parser.add_argument('--df_dt', type=float, default=0, help='rate of change of freq offset in Hz/s') parser.add_argument('--gain', type=float, default=1.0, help='rx gain (defaul 1.0)') @@ -240,7 +240,7 @@ n_errors = int(torch.sum(x < 0)) n_bits = int(torch.numel(x)) BER = n_errors/n_bits - print(f"loss: {loss:5.3f} BER: {BER:5.3f}") + print(f"loss: {loss:5.3f} Auxdata BER: {BER:5.3f}") else: print(f"loss: {loss:5.3f}") if args.loss_test > 0.0: @@ -264,6 +264,14 @@ # appends a frame containing a final pilot so the last RADAE frame # has a good phase reference, and two "end of over" symbols eoo = model.eoo + + # this is messy! - continue phase, freq and dF/dt track from inside forward() + freq = torch.zeros_like(eoo) + freq[:,] = model.freq_offset*torch.ones_like(eoo) + model.df_dt*torch.arange(eoo.shape[1])/model.Fs + omega = freq*2*torch.pi/model.Fs + lin_phase = torch.cumsum(omega,dim=1) + lin_phase = torch.exp(1j*lin_phase) + eoo = eoo*lin_phase*model.final_phase eoo = eoo + sigma*torch.randn_like(eoo) rx = torch.concatenate([rx,eoo],dim=1) if args.prepend_noise > 0.0: diff --git a/radae/dsp.py b/radae/dsp.py index 0233a73..b3b2fe5 100644 --- a/radae/dsp.py +++ b/radae/dsp.py @@ -344,7 +344,7 @@ def transmitter_one(self, z, num_timesteps_at_rate_Rs): # Single modem frame streaming receiver. TODO: is there a better way to pass a bunch of constants around? class receiver_one(): - def __init__(self,latent_dim,Fs,M,Ncp,Wfwd,Nc,Ns,w,P,bottleneck,pilot_gain,time_offset,coarse_mag): + def __init__(self,latent_dim,Fs,M,Ncp,Wfwd,Nc,Ns,w,P,Pend,bottleneck,pilot_gain,time_offset,coarse_mag): self.latent_dim = latent_dim self.Fs = Fs self.M = M @@ -354,6 +354,7 @@ def __init__(self,latent_dim,Fs,M,Ncp,Wfwd,Nc,Ns,w,P,bottleneck,pilot_gain,time_ self.Ns = Ns self.w = w self.P = P + self.Pend = Pend self.bottleneck = bottleneck self.pilot_gain = pilot_gain self.time_offset = time_offset @@ -415,7 +416,7 @@ def do_pilot_eq_one(self, num_modem_frames, rx_sym_pilots): return rx_sym_pilots # One frame version of rate Fs receiver for streaming implementation - def receiver_one(self, rx): + def receiver_one(self, rx, endofover): Ns = self.Ns + 1 # we expect: Pilots - data symbols - Pilots @@ -431,21 +432,32 @@ def receiver_one(self, rx): # DFT to transform M time domain samples to Nc carriers rx_sym = torch.matmul(rx_dash, self.Wfwd) - # Pilot based EQ rx_sym_pilots = torch.reshape(rx_sym,(1, num_modem_frames, num_timesteps_at_rate_Rs, self.Nc)) - rx_sym_pilots = self.do_pilot_eq_one(num_modem_frames,rx_sym_pilots) - rx_sym = torch.ones(1, num_modem_frames, self.Ns, self.Nc, dtype=torch.complex64) - rx_sym = rx_sym_pilots[:,:,1:self.Ns+1,:] - - # demap QPSK symbols - rx_sym = torch.reshape(rx_sym, (1, -1, self.latent_dim//2)) - z_hat = torch.zeros(1,rx_sym.shape[1], self.latent_dim) - - z_hat[:,:,::2] = rx_sym.real - z_hat[:,:,1::2] = rx_sym.imag - + if not endofover: + # Pilot based least squares EQ + rx_sym_pilots = self.do_pilot_eq_one(num_modem_frames,rx_sym_pilots) + rx_sym = rx_sym_pilots[:,:,1:self.Ns+1,:] + rx_sym = torch.reshape(rx_sym, (1, -1, self.latent_dim//2)) + z_hat = torch.zeros(1,rx_sym.shape[1], self.latent_dim) + + z_hat[:,:,::2] = rx_sym.real + z_hat[:,:,1::2] = rx_sym.imag + else: + # Simpler (but lower performance) EQ as average of pilots, as LS set up for PDDDDP, rather than out PEDDDE + for c in range(self.Nc): + phase_offset = torch.angle(rx_sym_pilots[0,0,0,c]/self.P[c] + + rx_sym_pilots[0,0,1,c]/self.Pend[c] + + rx_sym_pilots[0,0,Ns,c]/self.Pend[c]) + rx_sym_pilots[:,:,:Ns+1,c] *= torch.exp(-1j*phase_offset) + rx_sym = torch.reshape(rx_sym_pilots[:,:,2:Ns,:],(1,(Ns-2)*self.Nc)) + z_hat = torch.zeros(1,(Ns-2)*self.Nc*2) + + z_hat[:,::2] = rx_sym.real + z_hat[:,1::2] = rx_sym.imag + return z_hat + # Generate root raised cosine (Root Nyquist) filter coefficients # thanks http://www.dsplog.com/db-install/wp-content/uploads/2008/05/raised_cosine_filter.m diff --git a/radae/radae.py b/radae/radae.py index 09fbe27..00dec6e 100644 --- a/radae/radae.py +++ b/radae/radae.py @@ -199,12 +199,12 @@ def __init__(self, self.pilot_gain = pilot_backoff*self.M/(Nc**0.5) self.d_samples = int(self.multipath_delay * self.Fs) # multipath delay in samples - self.Ncp = int(cyclic_prefix*self.Fs) # set up End Of Over sequence # Normal frame ...PDDDDP... # EOO frame ...PE000E... # Key: P = self.p_cp, D = data symbols, E = self.pend_cp, 0 = zeros + if self.Ncp: M = self.M Ncp = self.Ncp @@ -217,7 +217,10 @@ def __init__(self, if self.bottleneck == 3: eoo = torch.tanh(torch.abs(eoo)) * torch.exp(1j*torch.angle(eoo)) self.eoo = eoo - + + # experimental EOO data symbols (quick and dirty supplimentary txt channel) + self.Nseoo = (Ns-1)*Nc # number of EOO data symbols + print(f"Rs: {Rs:5.2f} Rs': {Rs_dash:5.2f} Ts': {Ts_dash:5.3f} Nsmf: {Nsmf:3d} Ns: {Ns:3d} Nc: {Nc:3d} M: {self.M:d} Ncp: {self.Ncp:d}", file=sys.stderr) self.Tmf = Tmf @@ -435,6 +438,22 @@ def est_snr(self, r, time_offset=0): SNR_est = Ct/(torch.dot(torch.conj(p),p) - Ct) return SNR_est.real + def set_eoo_bits(self, eoo_bits): + Ns = self.Ns; Ncp = self.Ncp; M = self.M; Nc = self.Nc; Nmf = int((Ns+1)*(M+Ncp)) + + eoo_syms = eoo_bits[::2] + 1j*eoo_bits[1::2] + eoo_syms = torch.reshape(eoo_syms,(1,Ns-1,Nc)) + + eoo_tx = torch.matmul(eoo_syms,self.Winv) + if self.Ncp: + eoo_tx_cp = torch.zeros((1,Ns-1,self.M+Ncp),dtype=torch.complex64) + eoo_tx_cp[:,:,Ncp:] = eoo_tx + eoo_tx_cp[:,:,:Ncp] = eoo_tx_cp[:,:,-Ncp:] + eoo_tx = torch.reshape(eoo_tx_cp,(1,(Ns-1)*(self.M+Ncp)))*self.pilot_gain + if self.bottleneck == 3: + eoo_tx = torch.tanh(torch.abs(eoo_tx)) * torch.exp(1j*torch.angle(eoo_tx)) + self.eoo[0,2*(M+Ncp):Nmf] = eoo_tx + def forward(self, features, H, G=None): (num_batches, num_ten_ms_timesteps, num_features) = features.shape @@ -482,6 +501,7 @@ def forward(self, features, H, G=None): tx_before_channel = None rx = None + self.final_phase = torch.tensor(1,dtype=torch.complex64) if self.rate_Fs: num_timesteps_at_rate_Fs = num_timesteps_at_rate_Rs*self.M @@ -530,6 +550,7 @@ def forward(self, features, H, G=None): lin_phase = torch.cumsum(omega,dim=1) lin_phase = torch.exp(1j*lin_phase) tx = tx*lin_phase + self.final_phase = lin_phase[:,-1] # insert per sequence random phase and freq offset (training time) if self.freq_rand: diff --git a/radae/radae_base.py b/radae/radae_base.py index 95d8790..bab6236 100644 --- a/radae/radae_base.py +++ b/radae/radae_base.py @@ -185,7 +185,7 @@ def __init__(self, feature_dim, output_dim, bottleneck = 1): self.z_dense = nn.Linear(864, self.output_dim) nb_params = sum(p.numel() for p in self.parameters()) - print(f"encoder: {nb_params} weights", file=sys.stderr) + #print(f"encoder: {nb_params} weights", file=sys.stderr) # initialize weights self.apply(init_weights) @@ -251,7 +251,7 @@ def __init__(self, feature_dim, output_dim, bottleneck = 1): self.z_dense = nn.Linear(864, self.output_dim) nb_params = sum(p.numel() for p in self.parameters()) - print(f"encoder: {nb_params} weights", file=sys.stderr) + #print(f"encoder: {nb_params} weights", file=sys.stderr) # initialize weights self.apply(init_weights) @@ -326,7 +326,7 @@ def __init__(self, input_dim, output_dim): self.glu5 = GLU(96) nb_params = sum(p.numel() for p in self.parameters()) - print(f"decoder: {nb_params} weights", file=sys.stderr) + #print(f"decoder: {nb_params} weights", file=sys.stderr) # initialize weights self.apply(init_weights) @@ -393,7 +393,7 @@ def __init__(self, input_dim, output_dim): self.glu5 = GLU(96) nb_params = sum(p.numel() for p in self.parameters()) - print(f"decoder: {nb_params} weights", file=sys.stderr) + #print(f"decoder: {nb_params} weights", file=sys.stderr) # initialize weights self.apply(init_weights) diff --git a/radae_rxe.py b/radae_rxe.py index 2cdaafd..df4d84c 100644 --- a/radae_rxe.py +++ b/radae_rxe.py @@ -54,7 +54,8 @@ # P(accept|false) = binocdf(8,24,0.5) = 3.2E-3 class radae_rx: - def __init__(self, model_name, latent_dim = 80, auxdata = True, bottleneck = 3, bpf_en=True, v=2, disable_unsync=False, foff_err=0, bypass_dec=False): + def __init__(self, model_name, latent_dim = 80, auxdata = True, bottleneck = 3, bpf_en=True, v=2, + disable_unsync=False, foff_err=0, bypass_dec=False, eoo_data_test=False): self.latent_dim = latent_dim self.auxdata = auxdata @@ -64,6 +65,7 @@ def __init__(self, model_name, latent_dim = 80, auxdata = True, bottleneck = 3, self.disable_unsync = disable_unsync self.foff_err = foff_err self.bypass_dec = bypass_dec + self.eoo_data_test = eoo_data_test print(f"bypass_dec: {bypass_dec} foff_err: {foff_err:f}", file=sys.stderr) @@ -85,7 +87,7 @@ def __init__(self, model_name, latent_dim = 80, auxdata = True, bottleneck = 3, assert self.model.coarse_mag self.receiver = receiver_one(model.latent_dim,model.Fs,model.M,model.Ncp,model.Wfwd,model.Nc, - model.Ns,model.w,model.P,model.bottleneck,model.pilot_gain, + model.Ns,model.w,model.P,model.Pend,model.bottleneck,model.pilot_gain, model.time_offset,model.coarse_mag) M = model.M @@ -115,7 +117,7 @@ def __init__(self, model_name, latent_dim = 80, auxdata = True, bottleneck = 3, # Stateful decoder wasn't present during training, so we need to load weights from existing decoder model.core_decoder_statefull_load_state_dict() - # number of input floats per processing frame + # number of output floats per processing frame if not self.bypass_dec: self.n_floats_out = model.Nzmf*model.enc_stride*nb_total_features else: @@ -142,6 +144,9 @@ def __init__(self, model_name, latent_dim = 80, auxdata = True, bottleneck = 3, def get_n_features_out(self): return self.model.Nzmf*self.model.dec_stride*nb_total_features + def get_n_eoo_features_out(self): + return self.model.Nseoo + def get_n_floats_out(self): return self.n_floats_out @@ -156,6 +161,9 @@ def get_sync(self): def sum_uw_errors(self,new_uw_errors): self.uw_errors += new_uw_errors + + def get_Neoo_bits(self): + return self.model.Nseoo*self.model.bps def do_radae_rx(self, buffer_complex, floats_out): acq = self.acq @@ -212,21 +220,19 @@ def do_radae_rx(self, buffer_complex, floats_out): uw_fail = True self.uw_errors = 0 - if not endofover: - # correct frequency offset, note we preserve state of phase - # TODO do we need preserve state of phase? We're passing entire vector and there isn't any memory (I think) - w = 2*np.pi*self.fmax/Fs - rx_phase_vec = np.zeros(Nmf+M+Ncp,np.csingle) - for n in range(Nmf+M+Ncp): - self.rx_phase = self.rx_phase*np.exp(-1j*w) - rx_phase_vec[n] = self.rx_phase - rx1 = rx_buf[self.tmax-Ncp:self.tmax-Ncp+Nmf+M+Ncp] - rx = torch.tensor(rx1*rx_phase_vec, dtype=torch.complex64) - - # run through RADAE receiver DSP - z_hat = receiver.receiver_one(rx) - valid_output = True - + # correct frequency offset, note we preserve state of phase (TODO: I don't think we need to) + w = 2*np.pi*self.fmax/Fs + rx_phase_vec = np.zeros(Nmf+M+Ncp,np.csingle) + for n in range(Nmf+M+Ncp): + self.rx_phase = self.rx_phase*np.exp(-1j*w) + rx_phase_vec[n] = self.rx_phase + rx1 = rx_buf[self.tmax-Ncp:self.tmax-Ncp+Nmf+M+Ncp] + rx = torch.tensor(rx1*rx_phase_vec, dtype=torch.complex64) + + # run through RADAE receiver DSP + z_hat = receiver.receiver_one(rx, endofover) + valid_output = not endofover + if v == 2 or (v == 1 and (self.state == "search" or self.state == "candidate" or prev_state == "candidate")): print(f"{self.mf:3d} state: {self.state:10s} valid: {candidate:d} {endofover:d} {self.valid_count:2d} Dthresh: {acq.Dthresh:8.2f} ", end='', file=sys.stderr) print(f"Dtmax12: {acq.Dtmax12:8.2f} {acq.Dtmax12_eoo:8.2f} tmax: {self.tmax:4d} fmax: {self.fmax:6.2f}", end='', file=sys.stderr) @@ -305,7 +311,16 @@ def do_radae_rx(self, buffer_complex, floats_out): else: np.copyto(floats_out, z_hat.cpu().detach().numpy().flatten().astype('float32')) - return valid_output + if endofover: + z_hat = z_hat.cpu().detach().numpy().flatten() + np.copyto(floats_out,np.concatenate([z_hat,np.zeros(len(floats_out)-len(z_hat))])) + + # possible return cases + # valid_output | endofover | Description + # 0 0 Nothing returned + # 1 0 valid speech output (either z_hat or features, depending on bypass_dec) + # 0 1 EOO data output + return valid_output | endofover<<1 if __name__ == '__main__': parser = argparse.ArgumentParser(description='RADAE streaming receiver, IQ.f32 on stdin to features.f32 on stdout') @@ -316,11 +331,13 @@ def do_radae_rx(self, buffer_complex, floats_out): parser.add_argument('--no_stdout', action='store_false', dest='use_stdout', help='disable the use of stdout (e.g. with python3 -m cProfile)') parser.add_argument('--foff_err', type=float, default=0.0, help='Artifical freq offset error after first sync to test false sync (default 0.0)') parser.add_argument('--bypass_dec', action='store_true', help='Bypass core decoder, write z_hat to stdout') + parser.add_argument('--eoo_data_test', action='store_true', help='experimental EOO data test - count bit errors') parser.set_defaults(auxdata=True) parser.set_defaults(use_stdout=True) args = parser.parse_args() - rx = radae_rx(args.model_name,auxdata=args.auxdata,v=args.v,disable_unsync=args.disable_unsync,foff_err=args.foff_err, bypass_dec=args.bypass_dec) + rx = radae_rx(args.model_name,auxdata=args.auxdata,v=args.v,disable_unsync=args.disable_unsync,foff_err=args.foff_err, + bypass_dec=args.bypass_dec,eoo_data_test=args.eoo_data_test) # allocate storage for output features floats_out = np.zeros(rx.get_n_floats_out(),dtype=np.float32) @@ -329,6 +346,16 @@ def do_radae_rx(self, buffer_complex, floats_out): if len(buffer) != rx.get_nin()*struct.calcsize("ff"): break buffer_complex = np.frombuffer(buffer,np.csingle) - valid_output = rx.do_radae_rx(buffer_complex, floats_out) - if valid_output and args.use_stdout: + ret = rx.do_radae_rx(buffer_complex, floats_out) + if (ret & 1) and args.use_stdout: sys.stdout.buffer.write(floats_out) + if (ret & 2) and args.eoo_data_test: + # create a RNG with same sequence for BER testing with separate tx and rx + seed = 65647; rng = np.random.default_rng(seed) + tx_bits = np.sign(rng.random(rx.get_Neoo_bits())-0.5) + n_bits = len(tx_bits) + n_errors = sum(floats_out[:n_bits]*tx_bits < 0) + ber = n_errors/n_bits + print(f"EOO data n_bits: {n_bits} n_errors: {n_errors} BER: {ber:5.2f}", file=sys.stderr) + if ber < 0.05: + print("PASS", file=sys.stderr) diff --git a/radae_txe.py b/radae_txe.py index 4e09eb8..528135e 100644 --- a/radae_txe.py +++ b/radae_txe.py @@ -97,7 +97,13 @@ def get_n_floats_in(self): def get_Nmf(self): return self.Nmf def get_Neoo(self): - return self.Neoo + return self.Neoo + def get_Neoo_bits(self): + return self.model.Nseoo*self.model.bps + def set_eoo_bits(self,eoo_bits): + print("setting bits!", file=sys.stderr) + print(eoo_bits[0:8],file=sys.stderr) + self.model.set_eoo_bits(torch.tensor(eoo_bits, dtype=torch.float32)) def do_radae_tx(self,buffer_f32,tx_out): model = self.model @@ -143,10 +149,18 @@ def do_eoo(self,tx_out): parser.add_argument('--noauxdata', dest="auxdata", action='store_false', help='disable injection of auxillary data symbols') parser.add_argument('--txbpf', action='store_true', help='enable Tx BPF') parser.add_argument('--bypass_enc', action='store_true', help='Bypass core encoder, read z from stdin') + parser.add_argument('--eoo_data_test', action='store_true', help='experimental EOO data test - tx test frame') parser.set_defaults(auxdata=True) args = parser.parse_args() tx = radae_tx(model_name=args.model_name, auxdata=args.auxdata, txbpf_en=args.txbpf, bypass_enc=args.bypass_enc) - + + if args.eoo_data_test: + # create a RNG with same sequence for BER testing with separate tx and rx + seed = 65647; rng = np.random.default_rng(seed) + tx_bits = np.sign(rng.random(tx.get_Neoo_bits())-0.5, dtype=np.float32) + tx.set_eoo_bits(tx_bits) + tx_bits.tofile("eoo_tx.f32") + tx_out = np.zeros(tx.Nmf,dtype=np.csingle) while True: buffer = sys.stdin.buffer.read(tx.n_floats_in*struct.calcsize("f")) @@ -159,3 +173,8 @@ def do_eoo(self,tx_out): eoo_out = np.zeros(tx.Neoo,dtype=np.csingle) tx.do_eoo(eoo_out) sys.stdout.buffer.write(eoo_out) + if args.eoo_data_test: + # trailing silence so Rx has enough sample to process EOO frame + eoo_out = np.zeros(tx.Neoo,dtype=np.csingle) + sys.stdout.buffer.write(eoo_out) + diff --git a/resource_est.py b/resource_est.py deleted file mode 100644 index 76c658b..0000000 --- a/resource_est.py +++ /dev/null @@ -1,92 +0,0 @@ -""" -/* Estimate CPU and memory requirements */ - -/* Copyright (c) 2024 David Rowe */ - -/* - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ -""" - -def gru(n_infeat, n_outfeat, multiplies, adds): - # for one encoder timestep - # Ref: https://pytorch.org/docs/stable/generated/torch.nn.GRU.html - - # step 1: r_t - multiplies += 2*n_infeat*n_outfeat - adds += 2*n_infeat*n_outfeat + 3*n_outfeat - - # step 2: z_t - multiplies += 2*n_infeat*n_outfeat - adds += 2*n_infeat*n_outfeat + 3*n_outfeat - - # step 3: n_t - multiplies += 2*n_infeat*n_outfeat + n_outfeat - adds += 2*n_infeat*n_outfeat + 3*n_outfeat - - # step 4: h_t - multiplies += 2*n_outfeat - adds += 2*n_outfeat - - # TODO sigmoid and tanh (treat as look ups?) - return multiplies,adds - -def conv1d(n_inchan, n_outchan, kernel_size, multiplies, adds): - # for one encoder timestep - multiplies += kernel_size*n_inchan*n_outchan - adds += kernel_size*n_inchan*n_outchan + n_outchan - - # TODO tanh - return multiplies,adds - -def linear(n_infeat, n_outfeat, multiplies, adds): - multiplies += n_infeat*n_outfeat - adds += n_infeat*n_outfeat + n_outfeat - - return multiplies,adds - -if __name__ == '__main__': - Tz = 0.04 # one encoder timestep - - gru_multiplies,gru_adds = gru(64,64,0,0) - gru_multiplies,gru_adds = gru(224,64, gru_multiplies,gru_adds) - gru_multiplies,gru_adds = gru(384,64, gru_multiplies,gru_adds) - gru_multiplies,gru_adds = gru(544,64, gru_multiplies,gru_adds) - gru_multiplies,gru_adds = gru(704,64, gru_multiplies,gru_adds) - print(f"GRU per encoder call multiplies: {gru_multiplies:d} adds: {gru_adds}") - - conv1d_multiplies,conv1d_adds = conv1d(128,96, 2, 0, 0) - conv1d_multiplies,conv1d_adds = conv1d(288,96, 2, conv1d_multiplies,conv1d_adds) - conv1d_multiplies,conv1d_adds = conv1d(448,96, 2, conv1d_multiplies,conv1d_adds) - conv1d_multiplies,conv1d_adds = conv1d(544,96, 2, conv1d_multiplies,conv1d_adds) - conv1d_multiplies,conv1d_adds = conv1d(768,96, 2, conv1d_multiplies,conv1d_adds) - print(f"Conv1D per encoder call multiplies: {conv1d_multiplies:d} adds: {conv1d_adds}") - - linear_multiplies,linear_adds = linear(4*22,64,0,0) - linear_multiplies,linear_adds = linear(864,80,linear_multiplies,linear_adds) - print(f"Linear per encoder call multiplies: {linear_multiplies:d} adds: {linear_adds}") - - mmacs = (gru_multiplies+conv1d_multiplies+linear_multiplies)/Tz/1E6 - print(f"CoreEncoder MMACs per second: {mmacs}") - diff --git a/src/radae_rx.c b/src/radae_rx.c index 4dba7ba..774accb 100644 --- a/src/radae_rx.c +++ b/src/radae_rx.c @@ -27,7 +27,11 @@ int main(int argc, char *argv[]) int n_rx_in = rade_nin_max(r); RADE_COMP rx_in[n_rx_in]; int nin = rade_nin(r); - + int n_eoo_bits = rade_n_eoo_bits(r); + FILE *feoo = fopen("eoo_rx.f32","wb"); assert(feoo != NULL); + int has_eoo_out; + float eoo_out[n_eoo_bits]; + #ifdef _WIN32 // Note: freopen() returns NULL if filename is NULL, so // we have to use setmode() to make it a binary stream instead. @@ -36,15 +40,19 @@ int main(int argc, char *argv[]) #endif // _WIN32 while((size_t)nin == fread(rx_in, sizeof(RADE_COMP), nin, stdin)) { - int n_out = rade_rx(r,features_out,rx_in); + int n_out = rade_rx(r,features_out,&has_eoo_out,eoo_out,rx_in); if (n_out) { fwrite(features_out, sizeof(float), n_features_out, stdout); fflush(stdout); } + if (has_eoo_out) { + fwrite(eoo_out, sizeof(float), n_eoo_bits, feoo); + } nin = rade_nin(r); } rade_close(r); rade_finalize(); + fclose(feoo); return 0; } diff --git a/src/radae_tx.c b/src/radae_tx.c index 4f08dfc..55b8b4f 100644 --- a/src/radae_tx.c +++ b/src/radae_tx.c @@ -1,5 +1,6 @@ #include #include +#include #ifdef _WIN32 // For _setmode(). #include @@ -21,6 +22,16 @@ int main(void) int n_tx_eoo_out = rade_n_tx_eoo_out(r); RADE_COMP tx_eoo_out[n_tx_eoo_out]; + FILE *feoo_bits = fopen("eoo_tx.f32","rb"); + if (feoo_bits) { + int n_eoo_bits = rade_n_eoo_bits(r); + float eoo_bits[n_eoo_bits]; + int ret = fread(eoo_bits, sizeof(float), n_eoo_bits, feoo_bits); + assert(ret == n_eoo_bits); + rade_tx_set_eoo_bits(r, eoo_bits); + fclose(feoo_bits); + } + #ifdef _WIN32 // Note: freopen() returns NULL if filename is NULL, so // we have to use setmode() to make it a binary stream instead. @@ -36,6 +47,10 @@ int main(void) rade_tx_eoo(r,tx_eoo_out); fwrite(tx_eoo_out, sizeof(RADE_COMP), n_tx_eoo_out, stdout); + // extra silence buf to let Rx finish processing EOO + memset(tx_eoo_out,0,sizeof(tx_eoo_out)); + fwrite(tx_eoo_out, sizeof(RADE_COMP), n_tx_eoo_out, stdout); + rade_close(r); rade_finalize(); diff --git a/src/rade_api.c b/src/rade_api.c index 715c3dd..2e9b577 100644 --- a/src/rade_api.c +++ b/src/rade_api.c @@ -64,7 +64,7 @@ struct rade { npy_intp Nmf, Neoo; npy_intp nin, nin_max; - npy_intp n_features_in, n_features_out; + npy_intp n_features_in, n_features_out, n_eoo_bits; RADEEnc enc_model; RADEEncState enc_state; @@ -75,6 +75,8 @@ struct rade { RADE_COMP *tx_out; PyObject *pMeth_radae_tx_eoo, *pArgs_radae_tx_eoo; RADE_COMP *tx_eoo_out; + PyObject *pMeth_radae_set_eoo_bits, *pArgs_radae_set_eoo_bits; + float *eoo_bits; RADEDec dec_model; RADEDecState dec_state; @@ -131,6 +133,7 @@ int rade_tx_open(struct rade *r) { char *python_module_name = "radae_txe"; char *do_radae_tx_meth_name = "do_radae_tx"; char *do_eoo_meth_name = "do_eoo"; + char *set_eoo_bits_meth_name = "set_eoo_bits"; // Load module of Python code pName = PyUnicode_DecodeFSDefault(python_module_name); @@ -153,7 +156,10 @@ int rade_tx_open(struct rade *r) { r->n_floats_in = (int)call_getter(r->pInst_radae_tx, "get_n_floats_in"); r->Nmf = (int)call_getter(r->pInst_radae_tx, "get_Nmf"); r->Neoo = (int)call_getter(r->pInst_radae_tx, "get_Neoo"); - fprintf(stderr, "n_features_in: %d n_floats_in: %d Nmf: %d Neoo: %d\n", (int)r->n_features_in, (int)r->n_floats_in, (int)r->Nmf, (int)r->Neoo); + // num floats is 2 x number of complex QPSK symbols + r->n_eoo_bits = (int)call_getter(r->pInst_radae_tx, "get_Neoo_bits"); + fprintf(stderr, "n_features_in: %d n_floats_in: %d Nmf: %d Neoo: %d n_eoo_bits: %d\n", + (int)r->n_features_in, (int)r->n_floats_in, (int)r->Nmf, (int)r->Neoo, (int)r->n_eoo_bits); // RADAE Tx --------------------------------------------------------- @@ -177,7 +183,7 @@ int rade_tx_open(struct rade *r) { check_error(pValue, "setting up numpy array", "tx_out"); PyTuple_SetItem(r->pArgs_radae_tx, 1, pValue); - // End of Over -------------------------------------------------------- + // End of Over Samples -------------------------------------------------- r->pMeth_radae_tx_eoo = PyObject_GetAttrString(r->pInst_radae_tx, do_eoo_meth_name); check_error(r->pMeth_radae_tx_eoo, "finding", do_eoo_meth_name); @@ -191,6 +197,20 @@ int rade_tx_open(struct rade *r) { check_error(pValue, "setting up numpy array", "tx_eoo_out"); PyTuple_SetItem(r->pArgs_radae_tx_eoo, 0, pValue); + // Set End Of Over Bits ------------------------------------------------- + + r->pMeth_radae_set_eoo_bits = PyObject_GetAttrString(r->pInst_radae_tx, set_eoo_bits_meth_name); + check_error(r->pMeth_radae_set_eoo_bits, "finding", set_eoo_bits_meth_name); + check_callable(r->pMeth_radae_set_eoo_bits, set_eoo_bits_meth_name, "not callable"); + r->pArgs_radae_set_eoo_bits = PyTuple_New(1); + + // Python arg is a numpy array used for output to C + r->eoo_bits = (float*)malloc(sizeof(float)*r->n_eoo_bits); + assert(r->eoo_bits != NULL); + pValue = PyArray_SimpleNewFromData(1, &r->n_eoo_bits, NPY_FLOAT, r->eoo_bits); + check_error(pValue, "setting up numpy array", "eoo_bits"); + PyTuple_SetItem(r->pArgs_radae_set_eoo_bits, 0, pValue); + if (r->flags & RADE_USE_C_ENCODER) { if (init_radeenc(&r->enc_model, radeenc_arrays, r->num_features*RADE_FRAMES_PER_STEP) != 0) { fprintf(stderr, "Error initialising built-in C encoder model\n"); @@ -208,12 +228,15 @@ void rade_tx_close(struct rade *r) { Py_DECREF(r->pMeth_radae_tx); Py_DECREF(r->pMeth_radae_tx_eoo); Py_DECREF(r->pArgs_radae_tx_eoo); + Py_DECREF(r->pMeth_radae_set_eoo_bits); + Py_DECREF(r->pArgs_radae_set_eoo_bits); Py_DECREF(r->pInst_radae_tx); Py_DECREF(r->pModule_radae_tx); free(r->floats_in); free(r->tx_out); free(r->tx_eoo_out); + free(r->eoo_bits); } // returns 0 for success @@ -252,7 +275,8 @@ int rade_rx_open(struct rade *r) { r->n_floats_out = (int)call_getter(r->pInst_radae_rx, "get_n_floats_out"); r->nin_max = (int)call_getter(r->pInst_radae_rx, "get_nin_max"); r->nin = (int)call_getter(r->pInst_radae_rx, "get_nin"); - fprintf(stderr, "n_features_out: %d n_floats_out: %d nin_max: %d nin: %d\n", (int)r->n_features_out, (int)r->n_floats_out, (int)r->nin_max, (int)r->nin); + fprintf(stderr, "n_features_out: %d n_eoo_bits: %d n_floats_out: %d nin_max: %d nin: %d\n", + (int)r->n_features_out, (int)r->n_eoo_bits, (int)r->n_floats_out, (int)r->nin_max, (int)r->nin); r->pMeth_radae_rx = PyObject_GetAttrString(r->pInst_radae_rx, do_radae_rx_meth_name); check_error(r->pMeth_radae_rx, "finding", do_radae_rx_meth_name); @@ -360,10 +384,17 @@ int rade_n_tx_out(struct rade *r) { assert(r != NULL); return (int)r->Nmf; } int rade_n_tx_eoo_out(struct rade *r) { assert(r != NULL); return (int)r->Neoo; } int rade_nin_max(struct rade *r) { assert(r != NULL); return r->nin_max; } int rade_nin(struct rade *r) { assert(r != NULL); return r->nin; } +int rade_n_features_in_out(struct rade *r) { assert(r != NULL); return r->n_features_in; } +int rade_n_eoo_bits(struct rade *r) { assert(r != NULL); return r->n_eoo_bits; } -int rade_n_features_in_out(struct rade *r) { - assert(r != NULL); - return r->n_features_in; +RADE_EXPORT void rade_tx_set_eoo_bits(struct rade *r, float eoo_bits[]) { + assert(r != NULL); + assert(eoo_bits != NULL); + PyGILState_STATE gstate = PyGILState_Ensure(); + fprintf(stderr, "n_eoo_bits: %ld\n", r->n_eoo_bits); + memcpy(r->eoo_bits, eoo_bits, sizeof(float)*r->n_eoo_bits); + PyObject_CallObject(r->pMeth_radae_set_eoo_bits, r->pArgs_radae_set_eoo_bits); + PyGILState_Release(gstate); } int rade_tx(struct rade *r, RADE_COMP tx_out[], float floats_in[]) { @@ -426,7 +457,7 @@ int rade_tx_eoo(struct rade *r, RADE_COMP tx_eoo_out[]) { return r->Neoo; } -int rade_rx(struct rade *r, float features_out[], RADE_COMP rx_in[]) { +int rade_rx(struct rade *r, float features_out[], int *has_eoo_out, float eoo_out[], RADE_COMP rx_in[]) { PyObject *pValue; assert(r != NULL); assert(features_out != NULL); @@ -438,8 +469,10 @@ int rade_rx(struct rade *r, float features_out[], RADE_COMP rx_in[]) { memcpy(r->rx_in, rx_in, sizeof(RADE_COMP)*(r->nin)); pValue = PyObject_CallObject(r->pMeth_radae_rx, r->pArgs_radae_rx); check_error(pValue, "return value", "from do_rx_radae"); - long valid_out = PyLong_AsLong(pValue); - + long ret = PyLong_AsLong(pValue); + int valid_out = ret & 0x1; + int endofover = ret & 0x2; + if (valid_out) { if (r->flags & RADE_USE_C_DECODER) { // sanity check: need integer number of latent vecs @@ -482,6 +515,12 @@ int rade_rx(struct rade *r, float features_out[], RADE_COMP rx_in[]) { } } + *has_eoo_out = 0; + if (endofover) { + memcpy(eoo_out, r->floats_out, sizeof(float)*(r->n_eoo_bits)); + *has_eoo_out = 1; + } + // sample nin so we have an updated copy r->nin = (int)call_getter(r->pInst_radae_rx, "get_nin"); diff --git a/src/rade_api.h b/src/rade_api.h index 836df2f..631e990 100644 --- a/src/rade_api.h +++ b/src/rade_api.h @@ -96,11 +96,16 @@ RADE_EXPORT int rade_n_tx_out(struct rade *r); RADE_EXPORT int rade_n_tx_eoo_out(struct rade *r); RADE_EXPORT int rade_nin_max(struct rade *r); RADE_EXPORT int rade_n_features_in_out(struct rade *r); +RADE_EXPORT int rade_n_eoo_bits(struct rade *r); // note vocoder is not encapsulated in API in this version // returns number of RADE_COMP samples written to tx_out[] RADE_EXPORT int rade_tx(struct rade *r, RADE_COMP tx_out[], float features_in[]); +// Set the rade_n_eoo_bits() bits to be sent in the EOO frame, which are +// in +/- 1 float form (note NOT 1 or 0) +RADE_EXPORT void rade_tx_set_eoo_bits(struct rade *r, float eoo_bits[]); + // call this for the final frame at the end of over // returns the number of RADE_COMP samples written to tx_eoo_out[] RADE_EXPORT int rade_tx_eoo(struct rade *r, RADE_COMP tx_eoo_out[]); @@ -109,8 +114,10 @@ RADE_EXPORT int rade_tx_eoo(struct rade *r, RADE_COMP tx_eoo_out[]); RADE_EXPORT int rade_nin(struct rade *r); // returns non-zero if features_out[] contains valid output. The number -// returned is the number of samples written to features_out[] -RADE_EXPORT int rade_rx(struct rade *r, float features_out[], RADE_COMP rx_in[]); +// returned is the number of samples written to features_out[]. If the +// has_eoo_out is set, eoo_out[] contains End of Over soft decision bits +// from QPSK symbols in ..IQIQI... order +RADE_EXPORT int rade_rx(struct rade *r, float features_out[], int *has_eoo_out, float eoo_out[], RADE_COMP rx_in[]); // returns non-zero if Rx is currently in sync RADE_EXPORT int rade_sync(struct rade *r);