From 6498f85de3078ad7c3206c22c690ecb3d0fa71bb Mon Sep 17 00:00:00 2001 From: Ian McIntyre Date: Thu, 17 Jul 2025 20:50:40 -0400 Subject: Rewrite descriptors with atomics The memory ordering ensures that operations to normal memory are synchronized with operations on device memory, at runtime. I've seen this play out in the transmit path: writes to the transmit buffer's flags weren't reaching memory by the time the MAC was checking, resulting in missing packets. Moving the fence works, but it's better to use atomics. --- src/bd.rs | 84 +++++++++++++++++++--------------------------------------- src/bd/rxbd.rs | 69 ++++++++++++----------------------------------- src/bd/txbd.rs | 68 +++++++++++++---------------------------------- 3 files changed, 62 insertions(+), 159 deletions(-) (limited to 'src') diff --git a/src/bd.rs b/src/bd.rs index 5508770..9d7ac3c 100644 --- a/src/bd.rs +++ b/src/bd.rs @@ -3,28 +3,6 @@ //! Buffer descriptors (BD) are defined with register access layer (RAL) compatibility. //! These definitions come from the i.MX RT 1170 reference manual, revision 2. -macro_rules! bdfields { - ($name:ident, $type:ty $(, $field:ident [ offset = $offset:expr, bits = $bits:expr, $( $enum:ident = $val:expr $(,)? )* ] $(,)? )*) => { - pub mod $name { - $( - pub mod $field { - #![allow(non_snake_case, non_upper_case_globals, dead_code)] - // Assuming all fields are read-write. - pub mod R {} - pub mod W {} - pub mod RW { - $( - pub const $enum: $type = $val; - )* - } - pub const offset: $type = $offset; - pub const mask: $type = ((1 << $bits) - 1) << offset; - } - )* - } - }; -} - pub(crate) mod rxbd; pub(crate) mod txbd; @@ -114,18 +92,17 @@ impl IoBuffers { pub fn take(&'static mut self) -> IoSlices<'static, txbd::TxBD> { self.init(|descriptors, buffers| { for (descriptor, buffer) in descriptors.iter_mut().zip(buffers.iter_mut()) { - ral_registers::write_reg!( - txbd, - descriptor, - data_buffer_pointer, - buffer.0.as_mut_ptr() as _ - ); + descriptor + .data_buffer_pointer + .store(buffer.0.as_mut_ptr() as _, Ordering::SeqCst); } // When the DMA engine reaches this descriptor, it needs to wrap // around to the first descriptor. if let Some(descriptor) = descriptors.last_mut() { - ral_registers::modify_reg!(txbd, descriptor, flags, wrap: 1); + descriptor + .flags + .fetch_or(txbd::FLAGS_WRAP, Ordering::SeqCst); } }) } @@ -135,20 +112,19 @@ impl IoBuffers { pub fn take(&'static mut self) -> IoSlices<'static, rxbd::RxBD> { self.init(|descriptors, buffers| { for (descriptor, buffer) in descriptors.iter_mut().zip(buffers.iter_mut()) { - ral_registers::write_reg!( - rxbd, - descriptor, - data_buffer_pointer, - buffer.0.as_mut_ptr() as _ - ); + descriptor + .data_buffer_pointer + .store(buffer.0.as_mut_ptr() as _, Ordering::Relaxed); // Zero all other flags. - ral_registers::write_reg!(rxbd, descriptor, flags, empty: 1); + descriptor.flags.store(rxbd::FLAGS_EMPTY, Ordering::SeqCst); } // When the DMA engine reaches this descriptor, it needs to wrap // around to the first descriptor. if let Some(descriptor) = descriptors.last_mut() { - ral_registers::modify_reg!(rxbd, descriptor, flags, wrap: 1); + descriptor + .flags + .fetch_or(rxbd::FLAGS_WRAP, Ordering::SeqCst); } }) } @@ -214,19 +190,13 @@ pub type RxToken<'a> = IoToken<'a, rxbd::RxBD, crate::RxReady<'a>>; impl ReceiveSlices<'_> { pub(crate) fn next_token<'a>(&'a mut self, ready: crate::RxReady<'a>) -> Option> { - self.next_impl( - |rxbd| ral_registers::read_reg!(rxbd, rxbd, flags, empty == 0), - ready, - ) + self.next_impl(|rxbd| !rxbd.is_empty(), ready) } } impl TransmitSlices<'_> { pub(crate) fn next_token<'a>(&'a mut self, ready: crate::TxReady<'a>) -> Option> { - self.next_impl( - |txbd| ral_registers::read_reg!(txbd, txbd, flags, ready == 0), - ready, - ) + self.next_impl(|txbd| !txbd.is_ready(), ready) } } @@ -241,19 +211,18 @@ impl smoltcp::phy::TxToken for TxToken<'_> { // lifetimes. let buffer = unsafe { assert!(len <= self.mtu); - let ptr = - ral_registers::read_reg!(txbd, self.descriptor, data_buffer_pointer) as *mut u8; + let ptr = self.descriptor.data_buffer_pointer.load(Ordering::Relaxed) as *mut u8; core::slice::from_raw_parts_mut(ptr, len) }; let result = f(buffer); - core::sync::atomic::fence(Ordering::SeqCst); - ral_registers::write_reg!(txbd, self.descriptor, data_length, len as _); - ral_registers::modify_reg!(txbd, self.descriptor, flags, - ready: 1, - last_in: 1, - transmit_crc: 1, + self.descriptor + .data_length + .store(len as _, Ordering::Relaxed); + self.descriptor.flags.fetch_or( + txbd::FLAGS_READY | txbd::FLAGS_LAST_IN | txbd::FLAGS_TRANSMIT_CRC, + Ordering::SeqCst, ); self.ready.consume(); *self.index = self.next; @@ -269,15 +238,16 @@ impl smoltcp::phy::RxToken for RxToken<'_> { // Safety: hardware will not exceed our maximum frame length. We know that // the pointer is valid; see discussion above. let buffer = unsafe { - let len = ral_registers::read_reg!(rxbd, self.descriptor, data_length) as usize; + let len = self.descriptor.data_length.load(Ordering::Relaxed) as usize; assert!(len <= self.mtu); - let ptr = - ral_registers::read_reg!(rxbd, self.descriptor, data_buffer_pointer) as *mut u8; + let ptr = self.descriptor.data_buffer_pointer.load(Ordering::Relaxed) as *mut u8; core::slice::from_raw_parts_mut(ptr, len) }; let result = f(buffer); - ral_registers::modify_reg!(rxbd, self.descriptor, flags, empty: 1); + self.descriptor + .flags + .fetch_or(rxbd::FLAGS_EMPTY, Ordering::SeqCst); self.ready.consume(); *self.index = self.next; result diff --git a/src/bd/rxbd.rs b/src/bd/rxbd.rs index 8523d6e..ae85a57 100644 --- a/src/bd/rxbd.rs +++ b/src/bd/rxbd.rs @@ -1,65 +1,30 @@ //! Enhanced receive buffer descriptor layout and fields. -use ral_registers::{RORegister, RWRegister}; +use core::sync::atomic::{AtomicU16, AtomicU32, Ordering}; #[repr(C)] pub struct RxBD { - pub data_length: RORegister, - pub flags: RWRegister, - pub data_buffer_pointer: RWRegister, - pub status: RWRegister, - pub control: RWRegister, - pub checksum: RORegister, - pub header: RORegister, + pub data_length: AtomicU16, + pub flags: AtomicU16, + pub data_buffer_pointer: AtomicU32, + pub status: AtomicU16, + pub control: AtomicU16, + pub checksum: AtomicU16, + pub header: AtomicU16, _reserved0: [u16; 1], - pub last_bdu: RWRegister, - pub timestamp_1588: RORegister, + pub last_bdu: AtomicU16, + pub timestamp_1588: AtomicU32, _reserved1: [u16; 4], } -bdfields!(flags, u16, - empty [ offset = 15, bits = 1, ], - ro1 [ offset = 14, bits = 1, ], - wrap [ offset = 13, bits = 1, ], - ro2 [ offset = 12, bits = 1, ], - last [ offset = 11, bits = 1, ], +pub const FLAGS_EMPTY: u16 = 1 << 15; +pub const FLAGS_WRAP: u16 = 1 << 13; - miss [ offset = 8, bits = 1, ], - broadcast [ offset = 7, bits = 1, ], - multicast [ offset = 6, bits = 1, ], - length_violation [ offset = 5, bits = 1, ], - non_octet_violation [ offset = 4, bits = 1, ], - - crc_error [ offset = 2, bits = 1, ], - overrun [ offset = 1, bits = 1, ], - truncated [ offset = 0, bits = 1, ], -); - -bdfields!(status, u16, - vlan_priority [ offset = 13, bits = 3, ], - ip_checksum_error [ offset = 5, bits = 1, ], - protocol_checksum_error [ offset = 4, bits = 1, ], - vlan [ offset = 2, bits = 1, ], - ipv6 [ offset = 1, bits = 1, ], - frag [ offset = 0, bits = 1, ], -); - -bdfields!(control, u16, - mac_error [ offset = 15, bits = 1, ], - phy_error [ offset = 10, bits = 1, ], - collision [ offset = 9, bits = 1, ], - unicast [ offset = 8, bits = 1, ], - interrupt [ offset = 7, bits = 1, ], -); - -bdfields!(header, u16, - length [ offset = 11, bits = 5, ], - protocol [ offset = 0, bits = 8, ], -); - -bdfields!(last_bdu, u16, - last_bdu [ offset = 15, bits = 1, ], -); +impl RxBD { + pub(crate) fn is_empty(&self) -> bool { + self.flags.load(Ordering::SeqCst) & FLAGS_EMPTY != 0 + } +} #[cfg(test)] mod tests { diff --git a/src/bd/txbd.rs b/src/bd/txbd.rs index 9550eff..b8265a6 100644 --- a/src/bd/txbd.rs +++ b/src/bd/txbd.rs @@ -1,52 +1,31 @@ //! Enhanced transmit buffer descriptor layout and fields. -use ral_registers::RWRegister; +use core::sync::atomic::{AtomicU16, AtomicU32, Ordering}; #[repr(C)] pub struct TxBD { - pub data_length: RWRegister, - pub flags: RWRegister, - pub data_buffer_pointer: RWRegister, - pub errors: RWRegister, - pub control: RWRegister, - pub launch_time: RWRegister, + pub data_length: AtomicU16, + pub flags: AtomicU16, + pub data_buffer_pointer: AtomicU32, + pub errors: AtomicU16, + pub control: AtomicU16, + pub launch_time: AtomicU32, _reserved0: [u16; 1], - pub last_bdu: RWRegister, - pub timestamp_1588: RWRegister, + pub last_bdu: AtomicU16, + pub timestamp_1588: AtomicU32, _reserved1: [u16; 4], } -bdfields!(flags, u16, - ready [ offset = 15, bits = 1, ], - to1 [ offset = 14, bits = 1, ], - wrap [ offset = 13, bits = 1, ], - to2 [ offset = 12, bits = 1, ], - last_in [ offset = 11, bits = 1, ], - transmit_crc [ offset = 10, bits = 1, ], -); +pub const FLAGS_READY: u16 = 1 << 15; +pub const FLAGS_WRAP: u16 = 1 << 13; +pub const FLAGS_LAST_IN: u16 = 1 << 11; +pub const FLAGS_TRANSMIT_CRC: u16 = 1 << 10; -bdfields!(errors, u16, - transmit [ offset = 15, bits = 1, ], - underflow [ offset = 13, bits = 1, ], - excess_collision [ offset = 12, bits = 1, ], - frame_error [ offset = 11, bits = 1, ], - late_collision [ offset = 10, bits = 1, ], - overflow [ offset = 9, bits = 1, ], - timestamp [ offset = 8, bits = 1, ], -); - -bdfields!(control, u16, - interrupt [ offset = 14, bits = 1, ], - timestamp [ offset = 13, bits = 1, ], - pins [ offset = 12, bits = 1, ], - iins [ offset = 11, bits = 1, ], - utlt [ offset = 8, bits = 1, ], - ftype [ offset = 4, bits = 4, NON_AVB = 0, AVB_A = 1, AVB_B = 2 ], -); - -bdfields!(last_bdu, u16, - last_bdu [ offset = 15, bits = 1, ], -); +impl TxBD { + pub(crate) fn is_ready(&self) -> bool { + self.flags.load(Ordering::SeqCst) & FLAGS_READY != 0 + } +} #[cfg(test)] mod tests { @@ -77,15 +56,4 @@ mod tests { addr_of!(txbd.timestamp_1588).cast() ); } - - #[test] - fn bdfields_enum() { - let txbd = zeroed(); - ral_registers::modify_reg!(super, &txbd, control, ftype: AVB_B); - assert_eq!(txbd.control.read(), 0x2 << 4); - ral_registers::modify_reg!(super, &txbd, control, interrupt: 1); - assert_eq!(txbd.control.read(), 0x2 << 4 | 1 << 14); - ral_registers::modify_reg!(super, &txbd, control, ftype: 0); - assert_eq!(txbd.control.read(), 1 << 14); - } } -- cgit v1.2.3