-
Notifications
You must be signed in to change notification settings - Fork 1.3k
RP2040/235x: Push DMA data to PIO TX FIFO through ping-pong #4784
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,7 @@ | ||
| //! PIO driver. | ||
| use core::future::Future; | ||
| use core::marker::PhantomData; | ||
| use core::ops::ControlFlow; | ||
| use core::pin::Pin as FuturePin; | ||
| use core::sync::atomic::{AtomicU8, AtomicU32, Ordering, compiler_fence}; | ||
| use core::task::{Context, Poll}; | ||
|
|
@@ -530,6 +531,95 @@ impl<'d, PIO: Instance, const SM: usize> StateMachineTx<'d, PIO, SM> { | |
| pub fn dma_push_repeated<'a, C: Channel, W: Word>(&'a mut self, ch: Peri<'a, C>, len: usize) -> Transfer<'a, C> { | ||
| unsafe { dma::write_repeated(ch, PIO::PIO.txf(SM).as_ptr(), len, Self::dreq()) } | ||
| } | ||
|
|
||
| /// Feed the TX FIFO a continuous stream of data using a 2 alternating buffers. | ||
| /// | ||
| /// The initial data in each buffer isn't immediately sent. Instead, the callback will be called once before the DMA | ||
| /// transfer starts, to initialize the first buffer. After this, the callback will be called each time a new | ||
| /// transfer starts to provide the data that will be sent with the transfer after it. The user is responsible for | ||
| /// ensuring that the callback finishes in time for the buffers to swap. | ||
| pub async fn dma_push_ping_pong<'a, C1: Channel, C2: Channel, W: Word, F>( | ||
| &'a mut self, | ||
| mut ch1: Peri<'a, C1>, | ||
| mut ch2: Peri<'a, C2>, | ||
| data1: &'a mut [W], | ||
| data2: &'a mut [W], | ||
| mut fill_buffer_callback: F, | ||
| ) where | ||
| F: FnMut(&mut [W]) -> ControlFlow<()>, | ||
| { | ||
| let init_dma_channel = |regs: pac::dma::Channel, chain_target: u8, buffer: &[W]| { | ||
| regs.read_addr().write_value(buffer.as_ptr() as u32); | ||
| regs.write_addr().write_value(PIO::PIO.txf(SM).as_ptr() as u32); | ||
|
|
||
| #[cfg(feature = "rp2040")] | ||
| regs.trans_count().write(|w| *w = buffer.len() as u32); | ||
| #[cfg(feature = "_rp235x")] | ||
| regs.trans_count().write(|w| w.set_count(buffer.len() as u32)); | ||
|
|
||
| // don't use trigger register since we don't want the channel to start yet | ||
| regs.al1_ctrl().write(|w| { | ||
| // SAFETY: this register is an alias for ctrl_trig, see embassy-rs/rp-pac#12 | ||
| let w: &mut rp_pac::dma::regs::CtrlTrig = unsafe { core::mem::transmute(w) }; | ||
| w.set_treq_sel(Self::dreq()); | ||
| w.set_data_size(W::size()); | ||
| w.set_incr_read(true); | ||
| w.set_incr_write(false); | ||
| w.set_en(true); | ||
|
|
||
| // trigger other channel when finished | ||
| w.set_chain_to(chain_target); | ||
| }); | ||
| }; | ||
|
|
||
| // initialize both DMA channels | ||
| init_dma_channel(ch1.regs(), ch2.number(), data1); | ||
| init_dma_channel(ch2.regs(), ch1.number(), data2); | ||
|
|
||
| trace!("Fill initial ping buffer"); | ||
| if let ControlFlow::Break(()) = fill_buffer_callback(data1) { | ||
| return; | ||
| } | ||
|
|
||
| // trigger ping dma channel by writing to a TRIG register | ||
| ch1.regs().ctrl_trig().modify(|_| {}); | ||
|
|
||
| loop { | ||
| trace!("Fill pong buffer"); | ||
| if let ControlFlow::Break(()) = fill_buffer_callback(data2) { | ||
| break; | ||
| } | ||
|
|
||
| trace!("Waiting for ping transfer to finish"); | ||
| Transfer::new(ch1.reborrow()).await; | ||
|
|
||
| // re-init DMA 1 (without triggering it) | ||
| ch1.regs().read_addr().write_value(data1.as_ptr() as u32); | ||
|
|
||
| trace!("Fill ping buffer"); | ||
| if let ControlFlow::Break(()) = fill_buffer_callback(data1) { | ||
| break; | ||
| } | ||
|
|
||
| trace!("Waiting for pong transfer"); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Technically you should have a fence here, before the other transfer is started. Though I'm quite sure that an await point makes for a fence anyway, so perhaps it's not an issue. If you disabled the chanel for the duration of the callback then you do need a fence before re-enabling the channel. Otherwise the compiler could reorder some writes from inside the callback to after the re-enable. |
||
| Transfer::new(ch2.reborrow()).await; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How does this behave in case the callback runs for too long? Can it potentially hand forever? |
||
|
|
||
| // re-init DMA 2 (without triggering it) | ||
| ch2.regs().read_addr().write_value(data2.as_ptr() as u32); | ||
| } | ||
|
|
||
| // turn off DMA channels | ||
| ch1.regs().al1_ctrl().modify(|w| { | ||
| // SAFETY: this register is an alias for ctrl_trig, see embassy-rs/rp-pac#12 | ||
| let w: &mut rp_pac::dma::regs::CtrlTrig = unsafe { core::mem::transmute(w) }; | ||
| w.set_en(false); | ||
| }); | ||
| ch2.regs().al1_ctrl().modify(|w| { | ||
| // SAFETY: this register is an alias for ctrl_trig, see embassy-rs/rp-pac#12 | ||
| let w: &mut rp_pac::dma::regs::CtrlTrig = unsafe { core::mem::transmute(w) }; | ||
| w.set_en(false); | ||
| }); | ||
| } | ||
| } | ||
|
|
||
| /// A type representing a single PIO state machine. | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,104 @@ | ||
| //! This example shows how to feed alternating buffers to the PIO without downtime. | ||
|
|
||
| #![no_std] | ||
| #![no_main] | ||
| use defmt::info; | ||
| use embassy_executor::Spawner; | ||
| use embassy_rp::bind_interrupts; | ||
| use embassy_rp::peripherals::PIO0; | ||
| use embassy_rp::pio::program::pio_asm; | ||
| use embassy_rp::pio::{Config, Direction, InterruptHandler, Pio}; | ||
| use {defmt_rtt as _, panic_probe as _}; | ||
|
|
||
| bind_interrupts!(struct Irqs { | ||
| PIO0_IRQ_0 => InterruptHandler<PIO0>; | ||
| }); | ||
|
|
||
| /// The desired samples/second to output | ||
| const SAMPLE_RATE: u32 = 16_000; | ||
|
|
||
| #[embassy_executor::main] | ||
| async fn main(_spawner: Spawner) { | ||
| let mut p = embassy_rp::init(Default::default()); | ||
| let mut pio = Pio::new(p.PIO0, Irqs); | ||
|
|
||
| const PIO_OUTPUT_RATE: u32 = 2; // pio program efficiency (clocks per output) | ||
| let clock_freq = embassy_rp::clocks::clk_sys_freq(); | ||
| let divider = clock_freq / PIO_OUTPUT_RATE / SAMPLE_RATE; | ||
| info!("PIO base divider: {}", divider); | ||
|
|
||
| let pio_program = pio_asm!( | ||
| ".origin 0" | ||
| ".wrap_target" | ||
| "PULL" | ||
| "OUT PINS, 8" | ||
| ".wrap" | ||
| ); | ||
|
|
||
| let pio_pins = [ | ||
| &pio.common.make_pio_pin(p.PIN_5), | ||
| &pio.common.make_pio_pin(p.PIN_6), | ||
| &pio.common.make_pio_pin(p.PIN_7), | ||
| &pio.common.make_pio_pin(p.PIN_8), | ||
| &pio.common.make_pio_pin(p.PIN_9), | ||
| &pio.common.make_pio_pin(p.PIN_10), | ||
| &pio.common.make_pio_pin(p.PIN_11), | ||
| &pio.common.make_pio_pin(p.PIN_12), | ||
| ]; | ||
|
|
||
| let mut cfg = Config::default(); | ||
| cfg.use_program(&pio.common.load_program(&pio_program.program), &[]); | ||
| cfg.clock_divider = (divider as u16).into(); | ||
| cfg.set_out_pins(&pio_pins); | ||
|
|
||
| pio.sm0.set_pin_dirs(Direction::Out, &pio_pins); | ||
| pio.sm0.set_config(&cfg); | ||
| pio.sm0.set_enable(true); | ||
|
|
||
| let tx = pio.sm0.tx(); | ||
|
|
||
| let mut buffer_1 = [0x0u8; 128]; | ||
| let mut buffer_2 = [0x0u8; 128]; | ||
|
|
||
| let mut sample_index = 0usize; | ||
| tx.dma_push_ping_pong( | ||
| p.DMA_CH0.reborrow(), | ||
| p.DMA_CH1.reborrow(), | ||
| &mut buffer_1, | ||
| &mut buffer_2, | ||
| |buf| { | ||
| info!("In start of fill callback, index={}", sample_index); | ||
| if sample_index > 100_000 { | ||
| buf.iter_mut().for_each(|b| *b = 0); | ||
| return core::ops::ControlFlow::Break(()); | ||
| } | ||
|
|
||
| for b in buf.iter_mut() { | ||
| // generate a 440hz sine wave | ||
| let time = sample_index as f32 / SAMPLE_RATE as f32; | ||
| let wave = fast_sin(time * 440. * core::f32::consts::PI * 2.); | ||
|
|
||
| // convert [-1, 1] to [0, 255] | ||
| *b = ((wave + 1.) / 2. * 256.) as u8; | ||
|
|
||
| sample_index += 1; | ||
| } | ||
|
|
||
| core::ops::ControlFlow::Continue(()) | ||
| }, | ||
| ) | ||
| .await; | ||
|
|
||
| // push a zero to reset the pin state | ||
| tx.dma_push(p.DMA_CH0, &[0u8; 1], false).await; | ||
| } | ||
|
|
||
| /// Based on https://bmtechjournal.wordpress.com/2020/05/27/super-fast-quadratic-sinusoid-approximation/ | ||
| fn fast_sin(x: f32) -> f32 { | ||
| use num_traits::float::FloatCore as _; | ||
|
|
||
| let fake_sin_2 = |x: f32| 2.0 * x * (1.0 - (2.0 * x).abs()); | ||
| let range_limiter_2 = |x: f32| x - x.floor() - 0.5; | ||
|
|
||
| -4.0 * fake_sin_2(range_limiter_2(x / (2.0 * core::f32::consts::PI))) | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure this is sound. If the callback is too slow and the
ch1is triggered while it's still executing we're holding onto a mutable borrow while another immutable access (by the DMA) is happening.Perhaps the channel should be disabled for the duration of this callback? Or the function should be unsafe?
If you went with marking the function as unsafe, I'm not sure how could you even guarantee soundness of this without analyzing the whole system, as other executing tasks could block the CPU for long enough to make the callback unable to meet its deadline.