Edit

IABSD.fr/src/sys/net/ifq.h

Branch :

  • Show log

    Commit

  • Author : dlg
    Date : 2020-07-07 00:00:03
    Hash : 57b86033
    Message : add kstats for rx queues (ifiqs) and transmit queues (ifqs). this means you can observe what the network stack is trying to do when it's working with a nic driver that supports multiple rings. a nic with only one set of rings still gets queues though, and this still exports their stats. here is a small example of what kstat(8) currently outputs for these stats: em0:0:rxq:0 packets: 2292 packets bytes: 229846 bytes qdrops: 0 packets errors: 0 packets qlen: 0 packets em0:0:txq:0 packets: 1297 packets bytes: 193413 bytes qdrops: 0 packets errors: 0 packets qlen: 0 packets maxqlen: 511 packets oactive: false

  • sys/net/ifq.h
  • /*	$OpenBSD: ifq.h,v 1.32 2020/07/07 00:00:03 dlg Exp $ */
    
    /*
     * Copyright (c) 2015 David Gwynne <dlg@openbsd.org>
     *
     * Permission to use, copy, modify, and distribute this software for any
     * purpose with or without fee is hereby granted, provided that the above
     * copyright notice and this permission notice appear in all copies.
     *
     * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
     * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
     * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
     * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
     * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
     * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
     * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
     */
    
    #ifndef _NET_IFQ_H_
    #define _NET_IFQ_H_
    
    struct ifnet;
    struct kstat;
    
    struct ifq_ops;
    
    struct ifqueue {
    	struct ifnet		*ifq_if;
    	struct taskq		*ifq_softnet;
    	union {
    		void			*_ifq_softc;
    		/*
    		 * a rings sndq is found by looking up an array of pointers.
    		 * by default we only have one sndq and the default drivers
    		 * dont use ifq_softc, so we can borrow it for the map until
    		 * we need to allocate a proper map.
    		 */
    		struct ifqueue		*_ifq_ifqs[1];
    	} _ifq_ptr;
    #define ifq_softc		 _ifq_ptr._ifq_softc
    #define ifq_ifqs		 _ifq_ptr._ifq_ifqs
    
    	/* mbuf handling */
    	struct mutex		 ifq_mtx;
    	const struct ifq_ops	*ifq_ops;
    	void			*ifq_q;
    	struct mbuf_list	 ifq_free;
    	unsigned int		 ifq_len;
    	unsigned int		 ifq_oactive;
    
    	/* statistics */
    	uint64_t		 ifq_packets;
    	uint64_t		 ifq_bytes;
    	uint64_t		 ifq_qdrops;
    	uint64_t		 ifq_errors;
    	uint64_t		 ifq_mcasts;
    
    	struct kstat		*ifq_kstat;
    
    	/* work serialisation */
    	struct mutex		 ifq_task_mtx;
    	struct task_list	 ifq_task_list;
    	void			*ifq_serializer;
    	struct task		 ifq_bundle;
    
    	/* work to be serialised */
    	struct task		 ifq_start;
    	struct task		 ifq_restart;
    
    	/* properties */
    	unsigned int		 ifq_maxlen;
    	unsigned int		 ifq_idx;
    };
    
    struct ifiqueue {
    	struct ifnet		*ifiq_if;
    	struct taskq		*ifiq_softnet;
    	union {
    		void			*_ifiq_softc;
    		struct ifiqueue		*_ifiq_ifiqs[1];
    	} _ifiq_ptr;
    #define ifiq_softc		 _ifiq_ptr._ifiq_softc
    #define ifiq_ifiqs		 _ifiq_ptr._ifiq_ifiqs
    
    	struct mutex		 ifiq_mtx;
    	struct mbuf_list	 ifiq_ml;
    	struct task		 ifiq_task;
    	unsigned int		 ifiq_pressure;
    
    	/* counters */
    	uint64_t		 ifiq_packets;
    	uint64_t		 ifiq_bytes;
    	uint64_t		 ifiq_qdrops;
    	uint64_t		 ifiq_errors;
    	uint64_t		 ifiq_mcasts;
    	uint64_t		 ifiq_noproto;
    
    	struct kstat		*ifiq_kstat;
    
    	/* properties */
    	unsigned int		 ifiq_idx;
    };
    
    #ifdef _KERNEL
    
    #define IFQ_MAXLEN		256
    
    /*
     *
     * Interface Send Queues
     *
     * struct ifqueue sits between the network stack and a drivers
     * transmission of packets. The high level view is that when the stack
     * has finished generating a packet it hands it to a driver for
     * transmission. It does this by queueing the packet on an ifqueue and
     * notifying the driver to start transmission of the queued packets.
     *
     * A network device may have multiple contexts for the transmission
     * of packets, ie, independent transmit rings. Such a network device,
     * represented by a struct ifnet, would then have multiple ifqueue
     * structures, each of which maps to an independent transmit ring.
     *
     * struct ifqueue also provides the point where conditioning of
     * traffic (ie, priq and hfsc) is implemented, and provides some
     * infrastructure to assist in the implementation of network drivers.
     *
     * = ifq API
     *
     * The ifq API provides functions for three distinct consumers:
     *
     * 1. The network stack
     * 2. Traffic QoS/conditioning implementations
     * 3. Network drivers
     *
     * == Network Stack API
     *
     * The network stack is responsible for initialising and destroying
     * the ifqueue structures, changing the traffic conditioner on an
     * interface, enqueuing packets for transmission, and notifying
     * the driver to start transmission of a particular ifqueue.
     *
     * === ifq_init()
     *
     * During if_attach(), the network stack calls ifq_init to initialise
     * the ifqueue structure. By default it configures the priq traffic
     * conditioner.
     *
     * === ifq_destroy()
     *
     * The network stack calls ifq_destroy() during if_detach to tear down
     * the ifqueue structure. It frees the traffic conditioner state, and
     * frees any mbufs that were left queued.
     *
     * === ifq_attach()
     *
     * ifq_attach() is used to replace the current traffic conditioner on
     * the ifqueue. All the pending mbufs are removed from the previous
     * conditioner and requeued on the new.
     *
     * === ifq_idx()
     *
     * ifq_idx() selects a specific ifqueue from the current ifnet
     * structure for use in the transmission of the mbuf.
     *
     * === ifq_enqueue()
     *
     * ifq_enqueue() attempts to fit an mbuf onto the ifqueue. The
     * current traffic conditioner may drop a packet to make space on the
     * queue.
     *
     * === ifq_start()
     *
     * Once a packet has been successfully queued with ifq_enqueue(),
     * the network card is notified with a call to ifq_start().
     * Calls to ifq_start() run in the ifqueue serialisation context,
     * guaranteeing that only one instance of ifp->if_qstart() will be
     * running on behalf of a specific ifqueue in the system at any point
     * in time.
     *
     * == Traffic conditioners API
     *
     * The majority of interaction between struct ifqueue and a traffic
     * conditioner occurs via the callbacks a traffic conditioner provides
     * in an instance of struct ifq_ops.
     *
     * XXX document ifqop_*
     *
     * The ifqueue API implements the locking on behalf of the conditioning
     * implementations so conditioners only have to reject or keep mbufs.
     * If something needs to inspect a conditioners internals, the queue lock
     * needs to be taken to allow for a consistent or safe view. The queue
     * lock may be taken and released with ifq_q_enter() and ifq_q_leave().
     *
     * === ifq_q_enter()
     *
     * Code wishing to access a conditioners internals may take the queue
     * lock with ifq_q_enter(). The caller must pass a reference to the
     * conditioners ifq_ops structure so the infrastructure can ensure the
     * caller is able to understand the internals. ifq_q_enter() returns
     * a pointer to the conditioners internal structures, or NULL if the
     * ifq_ops did not match the current conditioner.
     *
     * === ifq_q_leave()
     *
     * The queue lock acquired with ifq_q_enter() is released with
     * ifq_q_leave().
     *
     * === ifq_mfreem() and ifq_mfreeml()
     *
     * A goal of the API is to avoid freeing an mbuf while mutexs are
     * held. Because the ifq API manages the lock on behalf of the backend
     * ifqops, the backend should not directly free mbufs. If a conditioner
     * backend needs to drop a packet during the handling of ifqop_deq_begin,
     * it may free it by calling ifq_mfreem(). This accounts for the drop,
     * and schedules the free of the mbuf outside the hold of ifq_mtx.
     * ifq_mfreeml() takes an mbuf list as an argument instead.
     *
     *
     * == Network Driver API
     *
     * The API used by network drivers is mostly documented in the
     * ifq_dequeue(9) manpage except for ifq_serialize().
     *
     * === ifq_serialize()
     *
     * A driver may run arbitrary work in the ifqueue serialiser context
     * via ifq_serialize(). The work to be done is represented by a task
     * that has been prepared with task_set.
     *
     * The work will be run in series with any other work dispatched by
     * ifq_start(), ifq_restart(), or other ifq_serialize() calls.
     *
     * Because the work may be run on another CPU, the lifetime of the
     * task and the work it represents can extend beyond the end of the
     * call to ifq_serialize() that dispatched it.
     *
     *
     * = ifqueue work serialisation
     *
     * ifqueues provide a mechanism to dispatch work to be run in a single
     * context. Work in this mechanism is represented by task structures.
     *
     * The tasks are run in a context similar to a taskq serviced by a
     * single kernel thread, except the work is run immediately by the
     * first CPU that dispatches work. If a second CPU attempts to dispatch
     * additional tasks while the first is still running, it will be queued
     * to be run by the first CPU. The second CPU will return immediately.
     *
     * = MP Safe Network Drivers
     *
     * An MP safe network driver is one in which its start routine can be
     * called by the network stack without holding the big kernel lock.
     *
     * == Attach
     *
     * A driver advertises it's ability to run its start routine without
     * the kernel lock by setting the IFXF_MPSAFE flag in ifp->if_xflags
     * before calling if_attach(). Advertising an MPSAFE start routine
     * also implies that the driver understands that a network card can
     * have multiple rings or transmit queues, and therefore provides
     * if_qstart function (which takes an ifqueue pointer) instead of an
     * if_start function (which takes an ifnet pointer).
     *
     * If the hardware supports multiple transmit rings, it advertises
     * support for multiple rings to the network stack with if_attach_queues()
     * after the call to if_attach(). if_attach_queues allocates a struct
     * ifqueue for each hardware ring, which can then be initialised by
     * the driver with data for each ring.
     *
     *	void	drv_start(struct ifqueue *);
     *
     *	void
     *	drv_attach()
     *	{
     *	...
     *		ifp->if_xflags = IFXF_MPSAFE;
     *		ifp->if_qstart = drv_start;
     *		if_attach(ifp);
     *
     *		if_attach_queues(ifp, DRV_NUM_TX_RINGS);
     *		for (i = ; i < DRV_NUM_TX_RINGS; i++) {
     *			struct ifqueue *ifq = ifp->if_ifqs[i];
     *			struct drv_tx_ring *ring = &sc->sc_tx_rings[i];
     *
     *			ifq->ifq_softc = ring;
     *			ring->ifq = ifq;
     *		}
     *	}
     *
     * The network stack will then call ifp->if_qstart via ifq_start()
     * to guarantee there is only one instance of that function running
     * for each ifq in the system, and to serialise it with other work
     * the driver may provide.
     *
     * == Initialise
     *
     * When the stack requests an interface be brought up (ie, drv_ioctl()
     * is called to handle SIOCSIFFLAGS with IFF_UP set in ifp->if_flags)
     * drivers should set IFF_RUNNING in ifp->if_flags, and then call
     * ifq_clr_oactive() against each ifq.
     *
     * == if_start
     *
     * ifq_start() checks that IFF_RUNNING is set in ifp->if_flags, that
     * ifq_is_oactive() does not return true, and that there are pending
     * packets to transmit via a call to ifq_len(). Therefore, drivers are
     * no longer responsible for doing this themselves.
     *
     * If a driver should not transmit packets while its link is down, use
     * ifq_purge() to flush pending packets from the transmit queue.
     *
     * Drivers for hardware should use the following pattern to transmit
     * packets:
     *
     *	void
     *	drv_start(struct ifqueue *ifq)
     *	{
     *		struct drv_tx_ring *ring = ifq->ifq_softc;
     *		struct ifnet *ifp = ifq->ifq_if;
     *		struct drv_softc *sc = ifp->if_softc;
     *		struct mbuf *m;
     *		int kick = 0;
     *
     *		if (NO_LINK) {
     *			ifq_purge(ifq);
     *			return;
     *		}
     *
     *		for (;;) {
     *			if (NO_SPACE(ring)) {
     *				ifq_set_oactive(ifq);
     *				break;
     *			}
     *
     *			m = ifq_dequeue(ifq);
     *			if (m == NULL)
     *				break;
     *
     *			if (drv_encap(sc, ring, m) != 0) { // map and fill ring
     *				m_freem(m);
     *				continue;
     *			}
     *
     *			bpf_mtap();
     *		}
     *
     *		drv_kick(ring); // notify hw of new descriptors on the ring
     *	 }
     *
     * == Transmission completion
     *
     * The following pattern should be used for transmit queue interrupt
     * processing:
     *
     *	void
     *	drv_txeof(struct drv_tx_ring *ring)
     *	{
     *		struct ifqueue *ifq = ring->ifq;
     *
     *		while (COMPLETED_PKTS(ring)) {
     *			// unmap packets, m_freem() the mbufs.
     *		}
     *
     *		if (ifq_is_oactive(ifq))
     *			ifq_restart(ifq);
     *	}
     *
     * == Stop
     *
     * Bringing an interface down (ie, IFF_UP was cleared in ifp->if_flags)
     * should clear IFF_RUNNING in ifp->if_flags, and guarantee the start
     * routine is not running before freeing any resources it uses:
     *
     *	void
     *	drv_down(struct drv_softc *sc)
     *	{
     *		struct ifnet *ifp = &sc->sc_if;
     *		struct ifqueue *ifq;
     *		int i;
     *
     *		CLR(ifp->if_flags, IFF_RUNNING);
     *		DISABLE_INTERRUPTS();
     *
     *		for (i = 0; i < sc->sc_num_queues; i++) {
     *			ifq = ifp->if_ifqs[i];
     *			ifq_barrier(ifq);
     *		}
     *
     *		intr_barrier(sc->sc_ih);
     *
     *		FREE_RESOURCES();
     *
     *		for (i = 0; i < sc->sc_num_queues; i++) {
     *			ifq = ifp->if_ifqs[i];
     *			ifq_clr_oactive(ifq);
     *		}
     *	}
     *
     */
    
    struct ifq_ops {
    	unsigned int		 (*ifqop_idx)(unsigned int,
    				    const struct mbuf *);
    	struct mbuf		*(*ifqop_enq)(struct ifqueue *, struct mbuf *);
    	struct mbuf		*(*ifqop_deq_begin)(struct ifqueue *, void **);
    	void			 (*ifqop_deq_commit)(struct ifqueue *,
    				    struct mbuf *, void *);
    	void			 (*ifqop_purge)(struct ifqueue *,
    				    struct mbuf_list *);
    	void			*(*ifqop_alloc)(unsigned int, void *);
    	void			 (*ifqop_free)(unsigned int, void *);
    };
    
    extern const struct ifq_ops * const ifq_priq_ops;
    
    /*
     * Interface send queues.
     */
    
    void		 ifq_init(struct ifqueue *, struct ifnet *, unsigned int);
    void		 ifq_attach(struct ifqueue *, const struct ifq_ops *, void *);
    void		 ifq_destroy(struct ifqueue *);
    void		 ifq_add_data(struct ifqueue *, struct if_data *);
    int		 ifq_enqueue(struct ifqueue *, struct mbuf *);
    void		 ifq_start(struct ifqueue *);
    struct mbuf	*ifq_deq_begin(struct ifqueue *);
    void		 ifq_deq_commit(struct ifqueue *, struct mbuf *);
    void		 ifq_deq_rollback(struct ifqueue *, struct mbuf *);
    struct mbuf	*ifq_dequeue(struct ifqueue *);
    int		 ifq_hdatalen(struct ifqueue *);
    void		 ifq_mfreem(struct ifqueue *, struct mbuf *);
    void		 ifq_mfreeml(struct ifqueue *, struct mbuf_list *);
    unsigned int	 ifq_purge(struct ifqueue *);
    void		*ifq_q_enter(struct ifqueue *, const struct ifq_ops *);
    void		 ifq_q_leave(struct ifqueue *, void *);
    void		 ifq_serialize(struct ifqueue *, struct task *);
    void		 ifq_barrier(struct ifqueue *);
    
    
    int		 ifq_deq_sleep(struct ifqueue *, struct mbuf **, int, int,
    		     const char *, volatile unsigned int *,
    		     volatile unsigned int *);
    
    #define	ifq_len(_ifq)			((_ifq)->ifq_len)
    #define	ifq_empty(_ifq)			(ifq_len(_ifq) == 0)
    #define	ifq_set_maxlen(_ifq, _l)	((_ifq)->ifq_maxlen = (_l))
    
    static inline int
    ifq_is_priq(struct ifqueue *ifq)
    {
    	return (ifq->ifq_ops == ifq_priq_ops);
    }
    
    static inline void
    ifq_set_oactive(struct ifqueue *ifq)
    {
    	ifq->ifq_oactive = 1;
    }
    
    static inline void
    ifq_clr_oactive(struct ifqueue *ifq)
    {
    	ifq->ifq_oactive = 0;
    }
    
    static inline unsigned int
    ifq_is_oactive(struct ifqueue *ifq)
    {
    	return (ifq->ifq_oactive);
    }
    
    static inline void
    ifq_restart(struct ifqueue *ifq)
    {
    	ifq_serialize(ifq, &ifq->ifq_restart);
    }
    
    static inline unsigned int
    ifq_idx(struct ifqueue *ifq, unsigned int nifqs, const struct mbuf *m)
    {
    	return ((*ifq->ifq_ops->ifqop_idx)(nifqs, m));
    }
    
    /* ifiq */
    
    void		 ifiq_init(struct ifiqueue *, struct ifnet *, unsigned int);
    void		 ifiq_destroy(struct ifiqueue *);
    int		 ifiq_input(struct ifiqueue *, struct mbuf_list *);
    int		 ifiq_enqueue(struct ifiqueue *, struct mbuf *);
    void		 ifiq_add_data(struct ifiqueue *, struct if_data *);
    
    #define	ifiq_len(_ifiq)			ml_len(&(_ifiq)->ifiq_ml)
    #define	ifiq_empty(_ifiq)		ml_empty(&(_ifiq)->ifiq_ml)
    
    #endif /* _KERNEL */
    
    #endif /* _NET_IFQ_H_ */