Edit

IABSD.fr/src/sys/dev/dt/dt_dev.c

Branch :

  • Show log

    Commit

  • Author : mpi
    Date : 2024-04-06 11:18:02
    Hash : c2c0b0cf
    Message : Remove (unused) in-kernel event filtering, it's currently dead code. All events are currently exported to userland in order to support complex filters. If this becomes a bottleneck it should be possible to translate (some) user-land filters to in-kernel fitlers. Prodded by a diff from Christian Ludwig to also trace the tracing program. ok claudio@

  • sys/dev/dt/dt_dev.c
  • /*	$OpenBSD: dt_dev.c,v 1.33 2024/04/06 11:18:02 mpi Exp $ */
    
    /*
     * Copyright (c) 2019 Martin Pieuchot <mpi@openbsd.org>
     *
     * Permission to use, copy, modify, and distribute this software for any
     * purpose with or without fee is hereby granted, provided that the above
     * copyright notice and this permission notice appear in all copies.
     *
     * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
     * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
     * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
     * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
     * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
     * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
     * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
     */
    
    #include <sys/types.h>
    #include <sys/systm.h>
    #include <sys/param.h>
    #include <sys/clockintr.h>
    #include <sys/device.h>
    #include <sys/exec_elf.h>
    #include <sys/malloc.h>
    #include <sys/proc.h>
    #include <sys/ptrace.h>
    
    #include <dev/dt/dtvar.h>
    
    /*
     * Number of frames to skip in stack traces.
     *
     * The number of frames required to execute dt(4) profiling code
     * depends on the probe, context, architecture and possibly the
     * compiler.
     *
     * Static probes (tracepoints) are executed in the context of the
     * current thread and only need to skip frames up to the recording
     * function.  For example the syscall provider:
     *
     *	dt_prov_syscall_entry+0x141
     *	syscall+0x205		<--- start here
     *	Xsyscall+0x128
     *
     * Probes executed in their own context, like the profile provider,
     * need to skip the frames of that context which are different for
     * every architecture.  For example the profile provider executed
     * from hardclock(9) on amd64:
     *
     *	dt_prov_profile_enter+0x6e
     *	hardclock+0x1a9
     *	lapic_clockintr+0x3f
     *	Xresume_lapic_ltimer+0x26
     *	acpicpu_idle+0x1d2	<---- start here.
     *	sched_idle+0x225
     *	proc_trampoline+0x1c
     */
    #if defined(__amd64__)
    #define DT_FA_PROFILE	5
    #define DT_FA_STATIC	2
    #elif defined(__i386__)
    #define DT_FA_PROFILE	5
    #define DT_FA_STATIC	2
    #elif defined(__macppc__)
    #define DT_FA_PROFILE  5
    #define DT_FA_STATIC   2
    #elif defined(__octeon__)
    #define DT_FA_PROFILE	6
    #define DT_FA_STATIC	2
    #elif defined(__powerpc64__)
    #define DT_FA_PROFILE	6
    #define DT_FA_STATIC	2
    #elif defined(__sparc64__)
    #define DT_FA_PROFILE	7
    #define DT_FA_STATIC	1
    #else
    #define DT_FA_STATIC	0
    #define DT_FA_PROFILE	0
    #endif
    
    #define DT_EVTRING_SIZE	16	/* # of slots in per PCB event ring */
    
    #define DPRINTF(x...) /* nothing */
    
    /*
     * Descriptor associated with each program opening /dev/dt.  It is used
     * to keep track of enabled PCBs.
     *
     *  Locks used to protect struct members in this file:
     *	m	per-softc mutex
     *	K	kernel lock
     */
    struct dt_softc {
    	SLIST_ENTRY(dt_softc)	 ds_next;	/* [K] descriptor list */
    	int			 ds_unit;	/* [I] D_CLONE unique unit */
    	pid_t			 ds_pid;	/* [I] PID of tracing program */
    
    	struct mutex		 ds_mtx;
    
    	struct dt_pcb_list	 ds_pcbs;	/* [K] list of enabled PCBs */
    	struct dt_evt		*ds_bufqueue;	/* [K] copy evts to userland */
    	size_t			 ds_bufqlen;	/* [K] length of the queue */
    	int			 ds_recording;	/* [K] currently recording? */
    	int			 ds_evtcnt;	/* [m] # of readable evts */
    
    	/* Counters */
    	uint64_t		 ds_readevt;	/* [m] # of events read */
    	uint64_t		 ds_dropevt;	/* [m] # of events dropped */
    };
    
    SLIST_HEAD(, dt_softc) dtdev_list;	/* [K] list of open /dev/dt nodes */
    
    /*
     * Probes are created during dt_attach() and never modified/freed during
     * the lifetime of the system.  That's why we consider them as [I]mmutable.
     */
    unsigned int			dt_nprobes;	/* [I] # of probes available */
    SIMPLEQ_HEAD(, dt_probe)	dt_probe_list;	/* [I] list of probes */
    
    struct rwlock			dt_lock = RWLOCK_INITIALIZER("dtlk");
    volatile uint32_t		dt_tracing = 0;	/* [K] # of processes tracing */
    
    int allowdt;
    
    void	dtattach(struct device *, struct device *, void *);
    int	dtopen(dev_t, int, int, struct proc *);
    int	dtclose(dev_t, int, int, struct proc *);
    int	dtread(dev_t, struct uio *, int);
    int	dtioctl(dev_t, u_long, caddr_t, int, struct proc *);
    
    struct	dt_softc *dtlookup(int);
    
    int	dt_ioctl_list_probes(struct dt_softc *, struct dtioc_probe *);
    int	dt_ioctl_get_args(struct dt_softc *, struct dtioc_arg *);
    int	dt_ioctl_get_stats(struct dt_softc *, struct dtioc_stat *);
    int	dt_ioctl_record_start(struct dt_softc *);
    void	dt_ioctl_record_stop(struct dt_softc *);
    int	dt_ioctl_probe_enable(struct dt_softc *, struct dtioc_req *);
    int	dt_ioctl_probe_disable(struct dt_softc *, struct dtioc_req *);
    int	dt_ioctl_get_auxbase(struct dt_softc *, struct dtioc_getaux *);
    
    int	dt_pcb_ring_copy(struct dt_pcb *, struct dt_evt *, size_t, uint64_t *);
    
    void
    dtattach(struct device *parent, struct device *self, void *aux)
    {
    	SLIST_INIT(&dtdev_list);
    	SIMPLEQ_INIT(&dt_probe_list);
    
    	/* Init providers */
    	dt_nprobes += dt_prov_profile_init();
    	dt_nprobes += dt_prov_syscall_init();
    	dt_nprobes += dt_prov_static_init();
    #ifdef DDBPROF
    	dt_nprobes += dt_prov_kprobe_init();
    #endif
    }
    
    int
    dtopen(dev_t dev, int flags, int mode, struct proc *p)
    {
    	struct dt_softc *sc;
    	struct dt_evt *queue;
    	size_t qlen;
    	int unit = minor(dev);
    
    	if (!allowdt)
    		return EPERM;
    
    	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_CANFAIL|M_ZERO);
    	if (sc == NULL)
    		return ENOMEM;
    
    	/*
    	 * Enough space to empty 2 full rings of events in a single read.
    	 */
    	qlen = 2 * DT_EVTRING_SIZE;
    	queue = mallocarray(qlen, sizeof(*queue), M_DEVBUF, M_WAITOK|M_CANFAIL);
    	if (queue == NULL) {
    		free(sc, M_DEVBUF, sizeof(*sc));
    		return ENOMEM;
    	}
    
    	/* no sleep after this point */
    	if (dtlookup(unit) != NULL) {
    		free(queue, M_DEVBUF, qlen * sizeof(*queue));
    		free(sc, M_DEVBUF, sizeof(*sc));
    		return EBUSY;
    	}
    
    	sc->ds_unit = unit;
    	sc->ds_pid = p->p_p->ps_pid;
    	TAILQ_INIT(&sc->ds_pcbs);
    	mtx_init(&sc->ds_mtx, IPL_HIGH);
    	sc->ds_bufqlen = qlen;
    	sc->ds_bufqueue = queue;
    	sc->ds_evtcnt = 0;
    	sc->ds_readevt = 0;
    	sc->ds_dropevt = 0;
    
    	SLIST_INSERT_HEAD(&dtdev_list, sc, ds_next);
    
    	DPRINTF("dt%d: pid %d open\n", sc->ds_unit, sc->ds_pid);
    
    	return 0;
    }
    
    int
    dtclose(dev_t dev, int flags, int mode, struct proc *p)
    {
    	struct dt_softc *sc;
    	int unit = minor(dev);
    
    	sc = dtlookup(unit);
    	KASSERT(sc != NULL);
    
    	DPRINTF("dt%d: pid %d close\n", sc->ds_unit, sc->ds_pid);
    
    	SLIST_REMOVE(&dtdev_list, sc, dt_softc, ds_next);
    	dt_ioctl_record_stop(sc);
    	dt_pcb_purge(&sc->ds_pcbs);
    
    	free(sc->ds_bufqueue, M_DEVBUF,
    	    sc->ds_bufqlen * sizeof(*sc->ds_bufqueue));
    	free(sc, M_DEVBUF, sizeof(*sc));
    
    	return 0;
    }
    
    int
    dtread(dev_t dev, struct uio *uio, int flags)
    {
    	struct dt_softc *sc;
    	struct dt_evt *estq;
    	struct dt_pcb *dp;
    	int error = 0, unit = minor(dev);
    	size_t qlen, count, read = 0;
    	uint64_t dropped = 0;
    
    	sc = dtlookup(unit);
    	KASSERT(sc != NULL);
    
    	count = howmany(uio->uio_resid, sizeof(struct dt_evt));
    	if (count < 1)
    		return (EMSGSIZE);
    
    	while (!sc->ds_evtcnt) {
    		sleep_setup(sc, PWAIT | PCATCH, "dtread");
    		error = sleep_finish(0, !sc->ds_evtcnt);
    		if (error == EINTR || error == ERESTART)
    			break;
    	}
    	if (error)
    		return error;
    
    	estq = sc->ds_bufqueue;
    	qlen = MIN(sc->ds_bufqlen, count);
    
    	KERNEL_ASSERT_LOCKED();
    	TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) {
    		count = dt_pcb_ring_copy(dp, estq, qlen, &dropped);
    		read += count;
    		estq += count; /* pointer arithmetic */
    		qlen -= count;
    		if (qlen == 0)
    			break;
    	}
    	if (read > 0)
    		uiomove(sc->ds_bufqueue, read * sizeof(struct dt_evt), uio);
    
    	mtx_enter(&sc->ds_mtx);
    	sc->ds_evtcnt -= read;
    	sc->ds_readevt += read;
    	sc->ds_dropevt += dropped;
    	mtx_leave(&sc->ds_mtx);
    
    	return 0;
    }
    
    int
    dtioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p)
    {
    	struct dt_softc *sc;
    	int unit = minor(dev);
    	int on, error = 0;
    
    	sc = dtlookup(unit);
    	KASSERT(sc != NULL);
    
    	switch (cmd) {
    	case DTIOCGPLIST:
    		return dt_ioctl_list_probes(sc, (struct dtioc_probe *)addr);
    	case DTIOCGARGS:
    		return dt_ioctl_get_args(sc, (struct dtioc_arg *)addr);
    	case DTIOCGSTATS:
    		return dt_ioctl_get_stats(sc, (struct dtioc_stat *)addr);
    	case DTIOCRECORD:
    	case DTIOCPRBENABLE:
    	case DTIOCPRBDISABLE:
    	case DTIOCGETAUXBASE:
    		/* root only ioctl(2) */
    		break;
    	default:
    		return ENOTTY;
    	}
    
    	if ((error = suser(p)) != 0)
    		return error;
    
    	switch (cmd) {
    	case DTIOCRECORD:
    		on = *(int *)addr;
    		if (on)
    			error = dt_ioctl_record_start(sc);
    		else
    			dt_ioctl_record_stop(sc);
    		break;
    	case DTIOCPRBENABLE:
    		error = dt_ioctl_probe_enable(sc, (struct dtioc_req *)addr);
    		break;
    	case DTIOCPRBDISABLE:
    		error = dt_ioctl_probe_disable(sc, (struct dtioc_req *)addr);
    		break;
    	case DTIOCGETAUXBASE:
    		error = dt_ioctl_get_auxbase(sc, (struct dtioc_getaux *)addr);
    		break;
    	default:
    		KASSERT(0);
    	}
    
    	return error;
    }
    
    struct dt_softc *
    dtlookup(int unit)
    {
    	struct dt_softc *sc;
    
    	KERNEL_ASSERT_LOCKED();
    
    	SLIST_FOREACH(sc, &dtdev_list, ds_next) {
    		if (sc->ds_unit == unit)
    			break;
    	}
    
    	return sc;
    }
    
    int
    dt_ioctl_list_probes(struct dt_softc *sc, struct dtioc_probe *dtpr)
    {
    	struct dtioc_probe_info info, *dtpi;
    	struct dt_probe *dtp;
    	size_t size;
    	int error = 0;
    
    	size = dtpr->dtpr_size;
    	dtpr->dtpr_size = dt_nprobes * sizeof(*dtpi);
    	if (size == 0)
    		return 0;
    
    	dtpi = dtpr->dtpr_probes;
    	SIMPLEQ_FOREACH(dtp, &dt_probe_list, dtp_next) {
    		if (size < sizeof(*dtpi)) {
    			error = ENOSPC;
    			break;
    		}
    		memset(&info, 0, sizeof(info));
    		info.dtpi_pbn = dtp->dtp_pbn;
    		info.dtpi_nargs = dtp->dtp_nargs;
    		strlcpy(info.dtpi_prov, dtp->dtp_prov->dtpv_name,
    		    sizeof(info.dtpi_prov));
    		strlcpy(info.dtpi_func, dtp->dtp_func, sizeof(info.dtpi_func));
    		strlcpy(info.dtpi_name, dtp->dtp_name, sizeof(info.dtpi_name));
    		error = copyout(&info, dtpi, sizeof(*dtpi));
    		if (error)
    			break;
    		size -= sizeof(*dtpi);
    		dtpi++;
    	}
    
    	return error;
    }
    
    int
    dt_ioctl_get_args(struct dt_softc *sc, struct dtioc_arg *dtar)
    {
    	struct dtioc_arg_info info, *dtai;
    	struct dt_probe *dtp;
    	size_t size, n, t;
    	uint32_t pbn;
    	int error = 0;
    
    	pbn = dtar->dtar_pbn;
    	if (pbn == 0 || pbn > dt_nprobes)
    		return EINVAL;
    
    	SIMPLEQ_FOREACH(dtp, &dt_probe_list, dtp_next) {
    		if (pbn == dtp->dtp_pbn)
    			break;
    	}
    	if (dtp == NULL)
    		return EINVAL;
    
    	if (dtp->dtp_sysnum != 0) {
    		/* currently not supported for system calls */
    		dtar->dtar_size = 0;
    		return 0;
    	}
    
    	size = dtar->dtar_size;
    	dtar->dtar_size = dtp->dtp_nargs * sizeof(*dtar);
    	if (size == 0)
    		return 0;
    
    	t = 0;
    	dtai = dtar->dtar_args;
    	for (n = 0; n < dtp->dtp_nargs; n++) {
    		if (size < sizeof(*dtai)) {
    			error = ENOSPC;
    			break;
    		}
    		if (n >= DTMAXARGTYPES || dtp->dtp_argtype[n] == NULL)
    			continue;
    		memset(&info, 0, sizeof(info));
    		info.dtai_pbn = dtp->dtp_pbn;
    		info.dtai_argn = t++;
    		strlcpy(info.dtai_argtype, dtp->dtp_argtype[n],
    		    sizeof(info.dtai_argtype));
    		error = copyout(&info, dtai, sizeof(*dtai));
    		if (error)
    			break;
    		size -= sizeof(*dtai);
    		dtai++;
    	}
    	dtar->dtar_size = t * sizeof(*dtar);
    
    	return error;
    }
    
    int
    dt_ioctl_get_stats(struct dt_softc *sc, struct dtioc_stat *dtst)
    {
    	mtx_enter(&sc->ds_mtx);
    	dtst->dtst_readevt = sc->ds_readevt;
    	dtst->dtst_dropevt = sc->ds_dropevt;
    	mtx_leave(&sc->ds_mtx);
    
    	return 0;
    }
    
    int
    dt_ioctl_record_start(struct dt_softc *sc)
    {
    	uint64_t now;
    	struct dt_pcb *dp;
    
    	if (sc->ds_recording)
    		return EBUSY;
    
    	KERNEL_ASSERT_LOCKED();
    	if (TAILQ_EMPTY(&sc->ds_pcbs))
    		return ENOENT;
    
    	rw_enter_write(&dt_lock);
    	now = nsecuptime();
    	TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) {
    		struct dt_probe *dtp = dp->dp_dtp;
    
    		SMR_SLIST_INSERT_HEAD_LOCKED(&dtp->dtp_pcbs, dp, dp_pnext);
    		dtp->dtp_recording++;
    		dtp->dtp_prov->dtpv_recording++;
    
    		if (dp->dp_nsecs != 0) {
    			clockintr_bind(&dp->dp_clockintr, dp->dp_cpu, dt_clock,
    			    dp);
    			clockintr_schedule(&dp->dp_clockintr,
    			    now + dp->dp_nsecs);
    		}
    	}
    	rw_exit_write(&dt_lock);
    
    	sc->ds_recording = 1;
    	dt_tracing++;
    
    	return 0;
    }
    
    void
    dt_ioctl_record_stop(struct dt_softc *sc)
    {
    	struct dt_pcb *dp;
    
    	if (!sc->ds_recording)
    		return;
    
    	DPRINTF("dt%d: pid %d disable\n", sc->ds_unit, sc->ds_pid);
    
    	dt_tracing--;
    	sc->ds_recording = 0;
    
    	rw_enter_write(&dt_lock);
    	TAILQ_FOREACH(dp, &sc->ds_pcbs, dp_snext) {
    		struct dt_probe *dtp = dp->dp_dtp;
    
    		/*
    		 * Set an execution barrier to ensure the shared
    		 * reference to dp is inactive.
    		 */
    		if (dp->dp_nsecs != 0)
    			clockintr_unbind(&dp->dp_clockintr, CL_BARRIER);
    
    		dtp->dtp_recording--;
    		dtp->dtp_prov->dtpv_recording--;
    		SMR_SLIST_REMOVE_LOCKED(&dtp->dtp_pcbs, dp, dt_pcb, dp_pnext);
    	}
    	rw_exit_write(&dt_lock);
    
    	/* Wait until readers cannot access the PCBs. */
    	smr_barrier();
    }
    
    int
    dt_ioctl_probe_enable(struct dt_softc *sc, struct dtioc_req *dtrq)
    {
    	struct dt_pcb_list plist;
    	struct dt_probe *dtp;
    	int error;
    
    	SIMPLEQ_FOREACH(dtp, &dt_probe_list, dtp_next) {
    		if (dtp->dtp_pbn == dtrq->dtrq_pbn)
    			break;
    	}
    	if (dtp == NULL)
    		return ENOENT;
    
    	TAILQ_INIT(&plist);
    	error = dtp->dtp_prov->dtpv_alloc(dtp, sc, &plist, dtrq);
    	if (error)
    		return error;
    
    	DPRINTF("dt%d: pid %d enable %u : %b\n", sc->ds_unit, sc->ds_pid,
    	    dtrq->dtrq_pbn, (unsigned int)dtrq->dtrq_evtflags, DTEVT_FLAG_BITS);
    
    	/* Append all PCBs to this instance */
    	TAILQ_CONCAT(&sc->ds_pcbs, &plist, dp_snext);
    
    	return 0;
    }
    
    int
    dt_ioctl_probe_disable(struct dt_softc *sc, struct dtioc_req *dtrq)
    {
    	struct dt_probe *dtp;
    	int error;
    
    	SIMPLEQ_FOREACH(dtp, &dt_probe_list, dtp_next) {
    		if (dtp->dtp_pbn == dtrq->dtrq_pbn)
    			break;
    	}
    	if (dtp == NULL)
    		return ENOENT;
    
    	if (dtp->dtp_prov->dtpv_dealloc) {
    		error = dtp->dtp_prov->dtpv_dealloc(dtp, sc, dtrq);
    		if (error)
    			return error;
    	}
    
    	DPRINTF("dt%d: pid %d dealloc\n", sc->ds_unit, sc->ds_pid,
    	    dtrq->dtrq_pbn);
    
    	return 0;
    }
    
    int
    dt_ioctl_get_auxbase(struct dt_softc *sc, struct dtioc_getaux *dtga)
    {
    	struct uio uio;
    	struct iovec iov;
    	struct process *pr;
    	struct proc *p = curproc;
    	AuxInfo auxv[ELF_AUX_ENTRIES];
    	int i, error;
    
    	dtga->dtga_auxbase = 0;
    
    	if ((pr = prfind(dtga->dtga_pid)) == NULL)
    		return ESRCH;
    
    	iov.iov_base = auxv;
    	iov.iov_len = sizeof(auxv);
    	uio.uio_iov = &iov;
    	uio.uio_iovcnt = 1;
    	uio.uio_offset = pr->ps_auxinfo;
    	uio.uio_resid = sizeof(auxv);
    	uio.uio_segflg = UIO_SYSSPACE;
    	uio.uio_procp = p;
    	uio.uio_rw = UIO_READ;
    
    	error = process_domem(p, pr, &uio, PT_READ_D);
    	if (error)
    		return error;
    
    	for (i = 0; i < ELF_AUX_ENTRIES; i++)
    		if (auxv[i].au_id == AUX_base)
    			dtga->dtga_auxbase = auxv[i].au_v;
    
    	return 0;
    }
    
    struct dt_probe *
    dt_dev_alloc_probe(const char *func, const char *name, struct dt_provider *dtpv)
    {
    	struct dt_probe *dtp;
    
    	dtp = malloc(sizeof(*dtp), M_DT, M_NOWAIT|M_ZERO);
    	if (dtp == NULL)
    		return NULL;
    
    	SMR_SLIST_INIT(&dtp->dtp_pcbs);
    	dtp->dtp_prov = dtpv;
    	dtp->dtp_func = func;
    	dtp->dtp_name = name;
    	dtp->dtp_sysnum = -1;
    	dtp->dtp_ref = 0;
    
    	return dtp;
    }
    
    void
    dt_dev_register_probe(struct dt_probe *dtp)
    {
    	static uint64_t probe_nb;
    
    	dtp->dtp_pbn = ++probe_nb;
    	SIMPLEQ_INSERT_TAIL(&dt_probe_list, dtp, dtp_next);
    }
    
    struct dt_pcb *
    dt_pcb_alloc(struct dt_probe *dtp, struct dt_softc *sc)
    {
    	struct dt_pcb *dp;
    
    	dp = malloc(sizeof(*dp), M_DT, M_WAITOK|M_CANFAIL|M_ZERO);
    	if (dp == NULL)
    		goto bad;
    
    	dp->dp_ring = mallocarray(DT_EVTRING_SIZE, sizeof(*dp->dp_ring), M_DT,
    	    M_WAITOK|M_CANFAIL|M_ZERO);
    	if (dp->dp_ring == NULL)
    		goto bad;
    
    	mtx_init(&dp->dp_mtx, IPL_HIGH);
    	dp->dp_sc = sc;
    	dp->dp_dtp = dtp;
    	return dp;
    bad:
    	dt_pcb_free(dp);
    	return NULL;
    }
    
    void
    dt_pcb_free(struct dt_pcb *dp)
    {
    	if (dp == NULL)
    		return;
    	free(dp->dp_ring, M_DT, DT_EVTRING_SIZE * sizeof(*dp->dp_ring));
    	free(dp, M_DT, sizeof(*dp));
    }
    
    void
    dt_pcb_purge(struct dt_pcb_list *plist)
    {
    	struct dt_pcb *dp;
    
    	while ((dp = TAILQ_FIRST(plist)) != NULL) {
    		TAILQ_REMOVE(plist, dp, dp_snext);
    		dt_pcb_free(dp);
    	}
    }
    
    /*
     * Get a reference to the next free event state from the ring.
     */
    struct dt_evt *
    dt_pcb_ring_get(struct dt_pcb *dp, int profiling)
    {
    	struct proc *p = curproc;
    	struct dt_evt *dtev;
    	int distance;
    
    	mtx_enter(&dp->dp_mtx);
    	distance = dp->dp_prod - dp->dp_cons;
    	if (distance == 1 || distance == (1 - DT_EVTRING_SIZE)) {
    		/* read(2) isn't finished */
    		dp->dp_dropevt++;
    		mtx_leave(&dp->dp_mtx);
    		return NULL;
    	}
    
    	/*
    	 * Save states in next free event slot.
    	 */
    	dtev = &dp->dp_ring[dp->dp_cons];
    	memset(dtev, 0, sizeof(*dtev));
    
    	dtev->dtev_pbn = dp->dp_dtp->dtp_pbn;
    	dtev->dtev_cpu = cpu_number();
    	dtev->dtev_pid = p->p_p->ps_pid;
    	dtev->dtev_tid = p->p_tid + THREAD_PID_OFFSET;
    	nanotime(&dtev->dtev_tsp);
    
    	if (ISSET(dp->dp_evtflags, DTEVT_EXECNAME))
    		strlcpy(dtev->dtev_comm, p->p_p->ps_comm, sizeof(dtev->dtev_comm));
    
    	if (ISSET(dp->dp_evtflags, DTEVT_KSTACK)) {
    		if (profiling)
    			stacktrace_save_at(&dtev->dtev_kstack, DT_FA_PROFILE);
    		else
    			stacktrace_save_at(&dtev->dtev_kstack, DT_FA_STATIC);
    	}
    	if (ISSET(dp->dp_evtflags, DTEVT_USTACK))
    		stacktrace_save_utrace(&dtev->dtev_ustack);
    
    	return dtev;
    }
    
    void
    dt_pcb_ring_consume(struct dt_pcb *dp, struct dt_evt *dtev)
    {
    	MUTEX_ASSERT_LOCKED(&dp->dp_mtx);
    	KASSERT(dtev == &dp->dp_ring[dp->dp_cons]);
    
    	dp->dp_cons = (dp->dp_cons + 1) % DT_EVTRING_SIZE;
    	mtx_leave(&dp->dp_mtx);
    
    	mtx_enter(&dp->dp_sc->ds_mtx);
    	dp->dp_sc->ds_evtcnt++;
    	mtx_leave(&dp->dp_sc->ds_mtx);
    	wakeup(dp->dp_sc);
    }
    
    /*
     * Copy at most `qlen' events from `dp', producing the same amount
     * of free slots.
     */
    int
    dt_pcb_ring_copy(struct dt_pcb *dp, struct dt_evt *estq, size_t qlen,
        uint64_t *dropped)
    {
    	size_t count, copied = 0;
    	unsigned int cons, prod;
    
    	KASSERT(qlen > 0);
    
    	mtx_enter(&dp->dp_mtx);
    	cons = dp->dp_cons;
    	prod = dp->dp_prod;
    
    	if (cons < prod)
    		count = DT_EVTRING_SIZE - prod;
    	else
    		count = cons - prod;
    
    	if (count == 0)
    		goto out;
    
    	*dropped += dp->dp_dropevt;
    	dp->dp_dropevt = 0;
    
    	count = MIN(count, qlen);
    
    	memcpy(&estq[0], &dp->dp_ring[prod], count * sizeof(*estq));
    	copied += count;
    
    	/* Produce */
    	prod = (prod + count) % DT_EVTRING_SIZE;
    
    	/* If the queue is full or the ring didn't wrap, stop here. */
    	if (qlen == copied || prod != 0 || cons == 0)
    		goto out;
    
    	count = MIN(cons, (qlen - copied));
    	memcpy(&estq[copied], &dp->dp_ring[0], count * sizeof(*estq));
    	copied += count;
    	prod += count;
    
    out:
    	dp->dp_prod = prod;
    	mtx_leave(&dp->dp_mtx);
    	return copied;
    }