99use std:: io:: Read ;
1010use std:: mem;
1111use std:: net:: Ipv4Addr ;
12+ use std:: num:: Wrapping ;
1213use std:: sync:: { Arc , Mutex } ;
1314
1415use libc:: EAGAIN ;
15- use log:: { error, warn } ;
16+ use log:: error;
1617use utils:: eventfd:: EventFd ;
1718use utils:: net:: mac:: MacAddr ;
18- use utils:: u64_to_usize;
19- use vm_memory:: GuestMemoryError ;
19+ use utils:: { u64_to_usize, usize_to_u64 } ;
20+ use vm_memory:: { GuestAddress , GuestMemory , GuestMemoryError } ;
2021
2122use crate :: devices:: virtio:: device:: { DeviceState , IrqTrigger , IrqType , VirtioDevice } ;
2223use crate :: devices:: virtio:: gen:: virtio_blk:: VIRTIO_F_VERSION_1 ;
2324use crate :: devices:: virtio:: gen:: virtio_net:: {
2425 virtio_net_hdr_v1, VIRTIO_NET_F_CSUM , VIRTIO_NET_F_GUEST_CSUM , VIRTIO_NET_F_GUEST_TSO4 ,
2526 VIRTIO_NET_F_GUEST_TSO6 , VIRTIO_NET_F_GUEST_UFO , VIRTIO_NET_F_HOST_TSO4 ,
26- VIRTIO_NET_F_HOST_TSO6 , VIRTIO_NET_F_HOST_UFO , VIRTIO_NET_F_MAC ,
27+ VIRTIO_NET_F_HOST_TSO6 , VIRTIO_NET_F_HOST_UFO , VIRTIO_NET_F_MAC , VIRTIO_NET_F_MRG_RXBUF ,
2728} ;
2829use crate :: devices:: virtio:: gen:: virtio_ring:: VIRTIO_RING_F_EVENT_IDX ;
2930use crate :: devices:: virtio:: iovec:: IoVecBuffer ;
@@ -32,7 +33,7 @@ use crate::devices::virtio::net::tap::Tap;
3233use crate :: devices:: virtio:: net:: {
3334 gen, NetError , NetQueue , MAX_BUFFER_SIZE , NET_QUEUE_SIZES , RX_INDEX , TX_INDEX ,
3435} ;
35- use crate :: devices:: virtio:: queue:: { DescriptorChain , Queue } ;
36+ use crate :: devices:: virtio:: queue:: { Queue , UsedElement } ;
3637use crate :: devices:: virtio:: { ActivateError , TYPE_NET } ;
3738use crate :: devices:: { report_net_event_fail, DeviceError } ;
3839use crate :: dumbo:: pdu:: arp:: ETH_IPV4_FRAME_LEN ;
@@ -47,14 +48,14 @@ const FRAME_HEADER_MAX_LEN: usize = PAYLOAD_OFFSET + ETH_IPV4_FRAME_LEN;
4748
4849#[ derive( Debug , thiserror:: Error , displaydoc:: Display ) ]
4950enum FrontendError {
50- /// Add user.
51- AddUsed ,
52- /// Descriptor chain too mall.
53- DescriptorChainTooSmall ,
5451 /// Empty queue.
5552 EmptyQueue ,
5653 /// Guest memory error: {0}
5754 GuestMemory ( GuestMemoryError ) ,
55+ /// Attempt to write an empty packet.
56+ AttemptToWriteEmptyPacket ,
57+ /// Attempt to use more descriptor chains(heads) than it is allowed.
58+ MaxHeadsUsed ,
5859 /// Read only descriptor.
5960 ReadOnlyDescriptor ,
6061}
@@ -103,6 +104,20 @@ pub struct ConfigSpace {
103104// SAFETY: `ConfigSpace` contains only PODs in `repr(C)` or `repr(transparent)`, without padding.
104105unsafe impl ByteValued for ConfigSpace { }
105106
107+ // This struct contains information about partially
108+ // written packet.
109+ #[ derive( Debug ) ]
110+ struct PartialWrite {
111+ // Amount of bytes written so far.
112+ bytes_written : usize ,
113+ // Amount of descriptor heads used for the packet.
114+ used_heads : u16 ,
115+ // Guest address of the first buffer used for the packet.
116+ // This will be used to set number of descriptors heads used
117+ // to store the whole packet.
118+ packet_start_addr : GuestAddress ,
119+ }
120+
106121/// VirtIO network device.
107122///
108123/// It emulates a network device able to exchange L2 frames between the guest
@@ -127,6 +142,7 @@ pub struct Net {
127142
128143 rx_bytes_read : usize ,
129144 rx_frame_buf : [ u8 ; MAX_BUFFER_SIZE ] ,
145+ rx_partial_write : Option < PartialWrite > ,
130146
131147 tx_frame_headers : [ u8 ; frame_hdr_len ( ) ] ,
132148
@@ -161,6 +177,7 @@ impl Net {
161177 | 1 << VIRTIO_NET_F_HOST_TSO4
162178 | 1 << VIRTIO_NET_F_HOST_TSO6
163179 | 1 << VIRTIO_NET_F_HOST_UFO
180+ | 1 << VIRTIO_NET_F_MRG_RXBUF
164181 | 1 << VIRTIO_F_VERSION_1
165182 | 1 << VIRTIO_RING_F_EVENT_IDX ;
166183
@@ -191,6 +208,7 @@ impl Net {
191208 rx_deferred_frame : false ,
192209 rx_bytes_read : 0 ,
193210 rx_frame_buf : [ 0u8 ; MAX_BUFFER_SIZE ] ,
211+ rx_partial_write : None ,
194212 tx_frame_headers : [ 0u8 ; frame_hdr_len ( ) ] ,
195213 irq_trigger : IrqTrigger :: new ( ) . map_err ( NetError :: EventFd ) ?,
196214 config_space,
@@ -319,7 +337,17 @@ impl Net {
319337 }
320338
321339 // Attempt frame delivery.
322- let success = self . write_frame_to_guest ( ) ;
340+ let success = loop {
341+ // We retry to write a frame if there were internal errors.
342+ // Each new write will use new descriptor chains up to the
343+ // point of consuming all available descriptors, if they are
344+ // all bad.
345+ match self . write_frame_to_guest ( ) {
346+ Ok ( ( ) ) => break true ,
347+ Err ( FrontendError :: EmptyQueue ) => break false ,
348+ _ => ( ) ,
349+ } ;
350+ } ;
323351
324352 // Undo the tokens consumption if guest delivery failed.
325353 if !success {
@@ -330,108 +358,186 @@ impl Net {
330358 success
331359 }
332360
333- /// Write a slice in a descriptor chain
334- ///
335- /// # Errors
336- ///
337- /// Returns an error if the descriptor chain is too short or
338- /// an inappropriate (read only) descriptor is found in the chain
339- fn write_to_descriptor_chain (
340- mem : & GuestMemoryMmap ,
341- data : & [ u8 ] ,
342- head : DescriptorChain ,
343- net_metrics : & NetDeviceMetrics ,
344- ) -> Result < ( ) , FrontendError > {
345- let mut chunk = data;
346- let mut next_descriptor = Some ( head) ;
361+ /// Write packet contained in the internal buffer into guest provided
362+ /// descriptor chains.
363+ fn write_frame_to_guest ( & mut self ) -> Result < ( ) , FrontendError > {
364+ // This is safe since we checked in the event handler that the device is activated.
365+ let mem = self . device_state . mem ( ) . unwrap ( ) ;
366+
367+ if self . queues [ RX_INDEX ] . is_empty ( mem) {
368+ self . metrics . no_rx_avail_buffer . inc ( ) ;
369+ return Err ( FrontendError :: EmptyQueue ) ;
370+ }
371+
372+ let next_used = self . queues [ RX_INDEX ] . next_used ;
373+ let actual_size = self . queues [ RX_INDEX ] . actual_size ( ) ;
374+
375+ let ( mut slice, mut packet_start_addr, mut used_heads) =
376+ if let Some ( pw) = & self . rx_partial_write {
377+ (
378+ & self . rx_frame_buf [ pw. bytes_written ..self . rx_bytes_read ] ,
379+ Some ( pw. packet_start_addr ) ,
380+ pw. used_heads ,
381+ )
382+ } else {
383+ ( & self . rx_frame_buf [ ..self . rx_bytes_read ] , None , 0 )
384+ } ;
347385
348- while let Some ( descriptor) = & next_descriptor {
349- if !descriptor. is_write_only ( ) {
350- return Err ( FrontendError :: ReadOnlyDescriptor ) ;
386+ let max_used_heads = if self . has_feature ( u64:: from ( VIRTIO_NET_F_MRG_RXBUF ) ) {
387+ // There is no real limit on how much heads we can use, but we will
388+ // never use more than the queue has.
389+ u16:: MAX
390+ } else {
391+ // Without VIRTIO_NET_F_MRG_RXBUF only 1 head can be used for the packet.
392+ 1
393+ } ;
394+
395+ let mut error = None ;
396+ while !slice. is_empty ( ) && error. is_none ( ) {
397+ if used_heads == max_used_heads {
398+ error = Some ( FrontendError :: MaxHeadsUsed ) ;
399+ break ;
351400 }
352401
353- let len = std:: cmp:: min ( chunk. len ( ) , descriptor. len as usize ) ;
354- match mem. write_slice ( & chunk[ ..len] , descriptor. addr ) {
355- Ok ( ( ) ) => {
356- net_metrics. rx_count . inc ( ) ;
357- chunk = & chunk[ len..] ;
402+ let Some ( head_desc) = self . queues [ RX_INDEX ] . pop_or_enable_notification ( mem) else {
403+ break ;
404+ } ;
405+
406+ let head_desc_index = head_desc. index ;
407+ let mut desc_len = 0 ;
408+
409+ // If this is the first head of the packet, save it for later.
410+ if packet_start_addr. is_none ( ) {
411+ packet_start_addr = Some ( head_desc. addr ) ;
412+ }
413+
414+ // Write to the descriptor chain as much as possible.
415+ let mut desc = Some ( head_desc) ;
416+ while !slice. is_empty ( ) && desc. is_some ( ) {
417+ let d = desc. unwrap ( ) ;
418+
419+ if !d. is_write_only ( ) {
420+ error = Some ( FrontendError :: ReadOnlyDescriptor ) ;
421+ break ;
358422 }
359- Err ( err ) => {
360- error ! ( "Failed to write slice: {:?}" , err ) ;
423+ let len = slice . len ( ) . min ( d . len as usize ) ;
424+ if let Err ( err ) = mem . write_slice ( & slice[ ..len ] , d . addr ) {
361425 if let GuestMemoryError :: PartialBuffer { .. } = err {
362- net_metrics . rx_partial_writes . inc ( ) ;
426+ self . metrics . rx_partial_writes . inc ( ) ;
363427 }
364- return Err ( FrontendError :: GuestMemory ( err) ) ;
428+ error = Some ( FrontendError :: GuestMemory ( err) ) ;
429+ break ;
430+ } else {
431+ desc_len += len;
432+ slice = & slice[ len..] ;
365433 }
366- }
367434
368- // If chunk is empty we are done here.
369- if chunk. is_empty ( ) {
370- let len = data. len ( ) as u64 ;
371- net_metrics. rx_bytes_count . add ( len) ;
372- net_metrics. rx_packets_count . inc ( ) ;
373- return Ok ( ( ) ) ;
435+ desc = d. next_descriptor ( ) ;
374436 }
375437
376- next_descriptor = descriptor. next_descriptor ( ) ;
438+ // At this point descriptor chain was processed.
439+ // We add it to the used_ring.
440+ let next_used_index = ( next_used + Wrapping ( used_heads) ) . 0 % actual_size;
441+ let used_element = UsedElement {
442+ id : u32:: from ( head_desc_index) ,
443+ len : u32:: try_from ( desc_len) . unwrap ( ) ,
444+ } ;
445+ // We don't update queues internals just yet.
446+ // This is done to prevent giving information to the guest
447+ // about descriptor heads we used for partialy written packets.
448+ // SAFETY:
449+ // This should never panic as we provide index in
450+ // correct bounds.
451+ self . queues [ RX_INDEX ]
452+ . write_used_ring ( mem, next_used_index, used_element)
453+ . unwrap ( ) ;
454+
455+ used_heads += 1 ;
377456 }
378457
379- warn ! ( "Receiving buffer is too small to hold frame of current size" ) ;
380- Err ( FrontendError :: DescriptorChainTooSmall )
381- }
458+ let packet_start_addr =
459+ packet_start_addr. ok_or ( FrontendError :: AttemptToWriteEmptyPacket ) ?;
382460
383- // Copies a single frame from `self.rx_frame_buf` into the guest.
384- fn do_write_frame_to_guest ( & mut self ) -> Result < ( ) , FrontendError > {
385- // This is safe since we checked in the event handler that the device is activated.
386- let mem = self . device_state . mem ( ) . unwrap ( ) ;
461+ let mut end_packet_processing = || {
462+ // Update queues internals as this is the end of this
463+ // packet being written.
464+ self . queues [ RX_INDEX ] . next_used += Wrapping ( used_heads) ;
465+ self . queues [ RX_INDEX ] . num_added += Wrapping ( used_heads) ;
387466
388- let queue = & mut self . queues [ RX_INDEX ] ;
389- let head_descriptor = queue. pop_or_enable_notification ( mem) . ok_or_else ( || {
390- self . metrics . no_rx_avail_buffer . inc ( ) ;
391- FrontendError :: EmptyQueue
392- } ) ?;
393- let head_index = head_descriptor. index ;
467+ // Update used ring with what we used to process the packet
468+ self . queues [ RX_INDEX ] . set_used_ring_idx ( ( next_used + Wrapping ( used_heads) ) . 0 , mem) ;
394469
395- let result = Self :: write_to_descriptor_chain (
396- mem,
397- & self . rx_frame_buf [ ..self . rx_bytes_read ] ,
398- head_descriptor,
399- & self . metrics ,
400- ) ;
401- // Mark the descriptor chain as used. If an error occurred, skip the descriptor chain.
402- let used_len = if result. is_err ( ) {
403- self . metrics . rx_fails . inc ( ) ;
404- 0
405- } else {
406- // Safe to unwrap because a frame must be smaller than 2^16 bytes.
407- u32:: try_from ( self . rx_bytes_read ) . unwrap ( )
470+ // Clear partial write info if there was one
471+ self . rx_partial_write = None ;
408472 } ;
409- queue. add_used ( mem, head_index, used_len) . map_err ( |err| {
410- error ! ( "Failed to add available descriptor {}: {}" , head_index, err) ;
411- FrontendError :: AddUsed
412- } ) ?;
413473
414- result
415- }
474+ if let Some ( err) = error {
475+ // There was a error during writing.
476+ end_packet_processing ( ) ;
416477
417- // Copies a single frame from `self.rx_frame_buf` into the guest. In case of an error retries
418- // the operation if possible. Returns true if the operation was successfull.
419- fn write_frame_to_guest ( & mut self ) -> bool {
420- let max_iterations = self . queues [ RX_INDEX ] . actual_size ( ) ;
421- for _ in 0 ..max_iterations {
422- match self . do_write_frame_to_guest ( ) {
423- Ok ( ( ) ) => return true ,
424- Err ( FrontendError :: EmptyQueue ) | Err ( FrontendError :: AddUsed ) => {
425- return false ;
426- }
427- Err ( _) => {
428- // retry
429- continue ;
430- }
478+ self . metrics . rx_fails . inc ( ) ;
479+
480+ // `next_used` is pointing at the first descriptor used to process the packet.
481+ // We used `used_heads` descriptors to process the packet. Go over all of them
482+ // and overwrite them with 0 len to discard them.
483+ for i in 0 ..used_heads {
484+ let next_used_index = ( next_used + Wrapping ( i) ) . 0 % actual_size;
485+
486+ // SAFETY:
487+ // This should never panic as we provide index in
488+ // correct bounds.
489+ let mut used_element = self . queues [ RX_INDEX ]
490+ . read_used_ring ( mem, next_used_index)
491+ . unwrap ( ) ;
492+ used_element. len = 0 ;
493+ self . queues [ RX_INDEX ]
494+ . write_used_ring ( mem, next_used_index, used_element)
495+ . unwrap ( ) ;
431496 }
432- }
433497
434- false
498+ Err ( err)
499+ } else if slice. is_empty ( ) {
500+ // Packet was fully written.
501+ end_packet_processing ( ) ;
502+
503+ self . metrics
504+ . rx_bytes_count
505+ . add ( usize_to_u64 ( self . rx_bytes_read ) ) ;
506+ self . metrics . rx_packets_count . inc ( ) ;
507+
508+ // Update number of descriptor heads used to store a packet.
509+ // SAFETY:
510+ // The packet_start_addr is valid guest address and we check
511+ // memory boundaries.
512+ #[ allow( clippy:: transmute_ptr_to_ref) ]
513+ let header: & mut virtio_net_hdr_v1 = unsafe {
514+ let header_slice = mem
515+ . get_slice ( packet_start_addr, std:: mem:: size_of :: < virtio_net_hdr_v1 > ( ) )
516+ . map_err ( FrontendError :: GuestMemory ) ?;
517+ std:: mem:: transmute ( header_slice. ptr_guard_mut ( ) . as_ptr ( ) )
518+ } ;
519+ header. num_buffers = used_heads;
520+
521+ Ok ( ( ) )
522+ } else {
523+ // Packet could not be fully written to the guest
524+ // Save necessary info to use it during next invocation.
525+ self . metrics . rx_partial_writes . inc ( ) ;
526+
527+ if let Some ( pw) = & mut self . rx_partial_write {
528+ pw. bytes_written = self . rx_bytes_read - slice. len ( ) ;
529+ pw. used_heads = used_heads;
530+ } else {
531+ let pw = PartialWrite {
532+ bytes_written : self . rx_bytes_read - slice. len ( ) ,
533+ used_heads,
534+ packet_start_addr,
535+ } ;
536+ self . rx_partial_write = Some ( pw) ;
537+ }
538+
539+ Err ( FrontendError :: EmptyQueue )
540+ }
435541 }
436542
437543 // Tries to detour the frame to MMDS and if MMDS doesn't accept it, sends it on the host TAP.
@@ -1027,6 +1133,7 @@ pub mod tests {
10271133 | 1 << VIRTIO_NET_F_HOST_TSO4
10281134 | 1 << VIRTIO_NET_F_HOST_TSO6
10291135 | 1 << VIRTIO_NET_F_HOST_UFO
1136+ | 1 << VIRTIO_NET_F_MRG_RXBUF
10301137 | 1 << VIRTIO_F_VERSION_1
10311138 | 1 << VIRTIO_RING_F_EVENT_IDX ;
10321139
0 commit comments