Edit

IABSD.fr/src/sys/dev/pv/hypervic.c

Branch :

  • Show log

    Commit

  • Author : jsg
    Date : 2024-05-24 10:05:55
    Hash : b4155af8
    Message : remove unneeded includes

  • sys/dev/pv/hypervic.c
  • /*-
     * Copyright (c) 2009-2016 Microsoft Corp.
     * Copyright (c) 2012 NetApp Inc.
     * Copyright (c) 2012 Citrix Inc.
     * Copyright (c) 2016 Mike Belopuhov <mike@esdenera.com>
     * All rights reserved.
     *
     * Redistribution and use in source and binary forms, with or without
     * modification, are permitted provided that the following conditions
     * are met:
     * 1. Redistributions of source code must retain the above copyright
     *    notice unmodified, this list of conditions, and the following
     *    disclaimer.
     * 2. Redistributions in binary form must reproduce the above copyright
     *    notice, this list of conditions and the following disclaimer in the
     *    documentation and/or other materials provided with the distribution.
     *
     * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     */
    
    /*
     * The OpenBSD port was done under funding by Esdenera Networks GmbH.
     */
    
    #include <sys/param.h>
    
    /* Hyperv requires locked atomic operations */
    #ifndef MULTIPROCESSOR
    #define _HYPERVMPATOMICS
    #define MULTIPROCESSOR
    #endif
    #include <sys/atomic.h>
    #ifdef _HYPERVMPATOMICS
    #undef MULTIPROCESSOR
    #undef _HYPERVMPATOMICS
    #endif
    
    #include <sys/systm.h>
    #include <sys/malloc.h>
    #include <sys/kernel.h>
    #include <sys/device.h>
    #include <sys/pool.h>
    #include <sys/task.h>
    #include <sys/sensors.h>
    
    #include <machine/bus.h>
    
    #include <net/if.h>
    #include <net/if_dl.h>
    #include <netinet/in.h>
    #include <netinet/if_ether.h>
    
    #include <dev/pv/pvvar.h>
    #include <dev/pv/hypervreg.h>
    #include <dev/pv/hypervvar.h>
    #include <dev/pv/hypervicreg.h>
    
    struct hv_ic_dev;
    
    #define NKVPPOOLS			4
    #define MAXPOOLENTS			1023
    
    struct kvp_entry {
    	int				kpe_index;
    	uint32_t			kpe_valtype;
    	uint8_t				kpe_key[HV_KVP_MAX_KEY_SIZE / 2];
    	uint8_t				kpe_val[HV_KVP_MAX_VAL_SIZE / 2];
    	TAILQ_ENTRY(kvp_entry)		kpe_entry;
    };
    TAILQ_HEAD(kvp_list, kvp_entry);
    
    struct kvp_pool {
    	struct kvp_list			kvp_entries;
    	struct mutex			kvp_lock;
    	u_int				kvp_index;
    };
    
    struct pool				kvp_entry_pool;
    
    struct hv_kvp {
    	struct kvp_pool			kvp_pool[NKVPPOOLS];
    };
    
    int	hv_heartbeat_attach(struct hv_ic_dev *);
    void	hv_heartbeat(void *);
    int	hv_kvp_attach(struct hv_ic_dev *);
    void	hv_kvp(void *);
    int	hv_kvop(void *, int, char *, char *, size_t);
    int	hv_shutdown_attach(struct hv_ic_dev *);
    void	hv_shutdown(void *);
    int	hv_timesync_attach(struct hv_ic_dev *);
    void	hv_timesync(void *);
    
    static struct hv_ic_dev {
    	const char		 *dv_name;
    	const struct hv_guid	 *dv_type;
    	int			(*dv_attach)(struct hv_ic_dev *);
    	void			(*dv_handler)(void *);
    	struct hv_channel	 *dv_ch;
    	uint8_t			 *dv_buf;
    	void			 *dv_priv;
    } hv_ic_devs[] = {
    	{
    		"heartbeat",
    		&hv_guid_heartbeat,
    		hv_heartbeat_attach,
    		hv_heartbeat
    	},
    	{
    		"kvp",
    		&hv_guid_kvp,
    		hv_kvp_attach,
    		hv_kvp
    	},
    	{
    		"shutdown",
    		&hv_guid_shutdown,
    		hv_shutdown_attach,
    		hv_shutdown
    	},
    	{
    		"timesync",
    		&hv_guid_timesync,
    		hv_timesync_attach,
    		hv_timesync
    	}
    };
    
    static const struct {
    	enum hv_kvp_pool		 poolidx;
    	const char			*poolname;
    	size_t				 poolnamelen;
    } kvp_pools[] = {
    	{ HV_KVP_POOL_EXTERNAL,		"External",	sizeof("External") },
    	{ HV_KVP_POOL_GUEST,		"Guest",	sizeof("Guest")	},
    	{ HV_KVP_POOL_AUTO,		"Auto",		sizeof("Auto") },
    	{ HV_KVP_POOL_AUTO_EXTERNAL,	"Guest/Parameters",
    	  sizeof("Guest/Parameters") }
    };
    
    static const struct {
    	int				 keyidx;
    	const char			*keyname;
    	const char			*value;
    } kvp_pool_auto[] = {
    	{ 0, "FullyQualifiedDomainName",	hostname },
    	{ 1, "IntegrationServicesVersion",	"6.6.6"	},
    	{ 2, "NetworkAddressIPv4",		"127.0.0.1" },
    	{ 3, "NetworkAddressIPv6",		"::1" },
    	{ 4, "OSBuildNumber",			osversion },
    	{ 5, "OSName",				ostype },
    	{ 6, "OSMajorVersion",			"6" }, /* free commit for mike */
    	{ 7, "OSMinorVersion",			&osrelease[2] },
    	{ 8, "OSVersion",			osrelease },
    #ifdef __amd64__ /* As specified in SYSTEM_INFO.wProcessorArchitecture */
    	{ 9, "ProcessorArchitecture",		"9" }
    #else
    	{ 9, "ProcessorArchitecture",		"0" }
    #endif
    };
    
    void
    hv_attach_icdevs(struct hv_softc *sc)
    {
    	struct hv_ic_dev *dv;
    	struct hv_channel *ch;
    	int i, header = 0;
    
    	for (i = 0; i < nitems(hv_ic_devs); i++) {
    		dv = &hv_ic_devs[i];
    
    		TAILQ_FOREACH(ch, &sc->sc_channels, ch_entry) {
    			if (ch->ch_state != HV_CHANSTATE_OFFERED)
    				continue;
    			if (ch->ch_flags & CHF_MONITOR)
    				continue;
    			if (memcmp(dv->dv_type, &ch->ch_type,
    			    sizeof(ch->ch_type)) == 0)
    				break;
    		}
    		if (ch == NULL)
    			continue;
    
    		dv->dv_ch = ch;
    
    		/*
    		 * These services are not performance critical and
    		 * do not need batched reading. Furthermore, some
    		 * services such as KVP can only handle one message
    		 * from the host at a time.
    		 */
    		dv->dv_ch->ch_flags &= ~CHF_BATCHED;
    
    		if (dv->dv_attach && dv->dv_attach(dv) != 0)
    			continue;
    
    		if (hv_channel_open(ch, VMBUS_IC_BUFRINGSIZE, NULL, 0,
    		    dv->dv_handler, dv)) {
    			printf("%s: failed to open channel for %s\n",
    			    sc->sc_dev.dv_xname, dv->dv_name);
    			continue;
    		}
    		evcount_attach(&ch->ch_evcnt, dv->dv_name, &sc->sc_idtvec);
    
    		if (!header) {
    			printf("%s: %s", sc->sc_dev.dv_xname, dv->dv_name);
    			header = 1;
    		} else
    			printf(", %s", dv->dv_name);
    	}
    	if (header)
    		printf("\n");
    }
    
    static inline void
    hv_ic_negotiate(struct vmbus_icmsg_hdr *hdr, uint32_t *rlen, uint32_t fwver,
        uint32_t msgver)
    {
    	struct vmbus_icmsg_negotiate *msg;
    	uint16_t propmin, propmaj, chosenmaj, chosenmin;
    	int i;
    
    	msg = (struct vmbus_icmsg_negotiate *)hdr;
    
    	chosenmaj = chosenmin = 0;
    	for (i = 0; i < msg->ic_fwver_cnt; i++) {
    		propmaj = VMBUS_ICVER_MAJOR(msg->ic_ver[i]);
    		propmin = VMBUS_ICVER_MINOR(msg->ic_ver[i]);
    		if (propmaj > chosenmaj &&
    		    propmaj <= VMBUS_ICVER_MAJOR(fwver) &&
    		    propmin >= chosenmin &&
    		    propmin <= VMBUS_ICVER_MINOR(fwver)) {
    			chosenmaj = propmaj;
    			chosenmin = propmin;
    		}
    	}
    	fwver = VMBUS_IC_VERSION(chosenmaj, chosenmin);
    
    	chosenmaj = chosenmin = 0;
    	for (; i < msg->ic_fwver_cnt + msg->ic_msgver_cnt; i++) {
    		propmaj = VMBUS_ICVER_MAJOR(msg->ic_ver[i]);
    		propmin = VMBUS_ICVER_MINOR(msg->ic_ver[i]);
    		if (propmaj > chosenmaj &&
    		    propmaj <= VMBUS_ICVER_MAJOR(msgver) &&
    		    propmin >= chosenmin &&
    		    propmin <= VMBUS_ICVER_MINOR(msgver)) {
    			chosenmaj = propmaj;
    			chosenmin = propmin;
    		}
    	}
    	msgver = VMBUS_IC_VERSION(chosenmaj, chosenmin);
    
    	msg->ic_fwver_cnt = 1;
    	msg->ic_ver[0] = fwver;
    	msg->ic_msgver_cnt = 1;
    	msg->ic_ver[1] = msgver;
    	hdr->ic_dsize = sizeof(*msg) + 2 * sizeof(uint32_t) -
    	    sizeof(struct vmbus_icmsg_hdr);
    	if (*rlen < sizeof(*msg) + 2 * sizeof(uint32_t))
    		*rlen = sizeof(*msg) + 2 * sizeof(uint32_t);
    }
    
    int
    hv_heartbeat_attach(struct hv_ic_dev *dv)
    {
    	struct hv_channel *ch = dv->dv_ch;
    	struct hv_softc *sc = ch->ch_sc;
    
    	dv->dv_buf = malloc(PAGE_SIZE, M_DEVBUF, M_ZERO |
    	    (cold ? M_NOWAIT : M_WAITOK));
    	if (dv->dv_buf == NULL) {
    		printf("%s: failed to allocate receive buffer\n",
    		    sc->sc_dev.dv_xname);
    		return (-1);
    	}
    	return (0);
    }
    
    void
    hv_heartbeat(void *arg)
    {
    	struct hv_ic_dev *dv = arg;
    	struct hv_channel *ch = dv->dv_ch;
    	struct hv_softc *sc = ch->ch_sc;
    	struct vmbus_icmsg_hdr *hdr;
    	struct vmbus_icmsg_heartbeat *msg;
    	uint64_t rid;
    	uint32_t rlen;
    	int rv;
    
    	rv = hv_channel_recv(ch, dv->dv_buf, PAGE_SIZE, &rlen, &rid, 0);
    	if (rv || rlen == 0) {
    		if (rv != EAGAIN)
    			DPRINTF("%s: heartbeat rv=%d rlen=%u\n",
    			    sc->sc_dev.dv_xname, rv, rlen);
    		return;
    	}
    	if (rlen < sizeof(struct vmbus_icmsg_hdr)) {
    		DPRINTF("%s: heartbeat short read rlen=%u\n",
    			    sc->sc_dev.dv_xname, rlen);
    		return;
    	}
    	hdr = (struct vmbus_icmsg_hdr *)dv->dv_buf;
    	switch (hdr->ic_type) {
    	case VMBUS_ICMSG_TYPE_NEGOTIATE:
    		hv_ic_negotiate(hdr, &rlen, VMBUS_IC_VERSION(3, 0),
    		    VMBUS_IC_VERSION(3, 0));
    		break;
    	case VMBUS_ICMSG_TYPE_HEARTBEAT:
    		msg = (struct vmbus_icmsg_heartbeat *)hdr;
    		msg->ic_seq += 1;
    		break;
    	default:
    		printf("%s: unhandled heartbeat message type %u\n",
    		    sc->sc_dev.dv_xname, hdr->ic_type);
    		return;
    	}
    	hdr->ic_flags = VMBUS_ICMSG_FLAG_TRANSACTION | VMBUS_ICMSG_FLAG_RESPONSE;
    	hv_channel_send(ch, dv->dv_buf, rlen, rid, VMBUS_CHANPKT_TYPE_INBAND, 0);
    }
    
    static void
    hv_shutdown_task(void *arg)
    {
    	struct hv_softc *sc = arg;
    	pvbus_shutdown(&sc->sc_dev);
    }
    
    int
    hv_shutdown_attach(struct hv_ic_dev *dv)
    {
    	struct hv_channel *ch = dv->dv_ch;
    	struct hv_softc *sc = ch->ch_sc;
    
    	dv->dv_buf = malloc(PAGE_SIZE, M_DEVBUF, M_ZERO |
    	    (cold ? M_NOWAIT : M_WAITOK));
    	if (dv->dv_buf == NULL) {
    		printf("%s: failed to allocate receive buffer\n",
    		    sc->sc_dev.dv_xname);
    		return (-1);
    	}
    
    	task_set(&sc->sc_sdtask, hv_shutdown_task, sc);
    
    	return (0);
    }
    
    void
    hv_shutdown(void *arg)
    {
    	struct hv_ic_dev *dv = arg;
    	struct hv_channel *ch = dv->dv_ch;
    	struct hv_softc *sc = ch->ch_sc;
    	struct vmbus_icmsg_hdr *hdr;
    	struct vmbus_icmsg_shutdown *msg;
    	uint64_t rid;
    	uint32_t rlen;
    	int rv, shutdown = 0;
    
    	rv = hv_channel_recv(ch, dv->dv_buf, PAGE_SIZE, &rlen, &rid, 0);
    	if (rv || rlen == 0) {
    		if (rv != EAGAIN)
    			DPRINTF("%s: shutdown rv=%d rlen=%u\n",
    			    sc->sc_dev.dv_xname, rv, rlen);
    		return;
    	}
    	if (rlen < sizeof(struct vmbus_icmsg_hdr)) {
    		DPRINTF("%s: shutdown short read rlen=%u\n",
    			    sc->sc_dev.dv_xname, rlen);
    		return;
    	}
    	hdr = (struct vmbus_icmsg_hdr *)dv->dv_buf;
    	switch (hdr->ic_type) {
    	case VMBUS_ICMSG_TYPE_NEGOTIATE:
    		hv_ic_negotiate(hdr, &rlen, VMBUS_IC_VERSION(3, 0),
    		    VMBUS_IC_VERSION(3, 0));
    		break;
    	case VMBUS_ICMSG_TYPE_SHUTDOWN:
    		msg = (struct vmbus_icmsg_shutdown *)hdr;
    		if (msg->ic_haltflags == 0 || msg->ic_haltflags == 1) {
    			shutdown = 1;
    			hdr->ic_status = VMBUS_ICMSG_STATUS_OK;
    		} else
    			hdr->ic_status = VMBUS_ICMSG_STATUS_FAIL;
    		break;
    	default:
    		printf("%s: unhandled shutdown message type %u\n",
    		    sc->sc_dev.dv_xname, hdr->ic_type);
    		return;
    	}
    
    	hdr->ic_flags = VMBUS_ICMSG_FLAG_TRANSACTION | VMBUS_ICMSG_FLAG_RESPONSE;
    	hv_channel_send(ch, dv->dv_buf, rlen, rid, VMBUS_CHANPKT_TYPE_INBAND, 0);
    
    	if (shutdown)
    		task_add(systq, &sc->sc_sdtask);
    }
    
    int
    hv_timesync_attach(struct hv_ic_dev *dv)
    {
    	struct hv_channel *ch = dv->dv_ch;
    	struct hv_softc *sc = ch->ch_sc;
    
    	dv->dv_buf = malloc(PAGE_SIZE, M_DEVBUF, M_ZERO |
    	    (cold ? M_NOWAIT : M_WAITOK));
    	if (dv->dv_buf == NULL) {
    		printf("%s: failed to allocate receive buffer\n",
    		    sc->sc_dev.dv_xname);
    		return (-1);
    	}
    
    	strlcpy(sc->sc_sensordev.xname, sc->sc_dev.dv_xname,
    	    sizeof(sc->sc_sensordev.xname));
    
    	sc->sc_sensor.type = SENSOR_TIMEDELTA;
    	sc->sc_sensor.status = SENSOR_S_UNKNOWN;
    
    	sensor_attach(&sc->sc_sensordev, &sc->sc_sensor);
    	sensordev_install(&sc->sc_sensordev);
    
    	return (0);
    }
    
    void
    hv_timesync(void *arg)
    {
    	struct hv_ic_dev *dv = arg;
    	struct hv_channel *ch = dv->dv_ch;
    	struct hv_softc *sc = ch->ch_sc;
    	struct vmbus_icmsg_hdr *hdr;
    	struct vmbus_icmsg_timesync *msg;
    	struct timespec guest, host, diff;
    	uint64_t tns;
    	uint64_t rid;
    	uint32_t rlen;
    	int rv;
    
    	rv = hv_channel_recv(ch, dv->dv_buf, PAGE_SIZE, &rlen, &rid, 0);
    	if (rv || rlen == 0) {
    		if (rv != EAGAIN)
    			DPRINTF("%s: timesync rv=%d rlen=%u\n",
    			    sc->sc_dev.dv_xname, rv, rlen);
    		return;
    	}
    	if (rlen < sizeof(struct vmbus_icmsg_hdr)) {
    		DPRINTF("%s: timesync short read rlen=%u\n",
    			    sc->sc_dev.dv_xname, rlen);
    		return;
    	}
    	hdr = (struct vmbus_icmsg_hdr *)dv->dv_buf;
    	switch (hdr->ic_type) {
    	case VMBUS_ICMSG_TYPE_NEGOTIATE:
    		hv_ic_negotiate(hdr, &rlen, VMBUS_IC_VERSION(3, 0),
    		    VMBUS_IC_VERSION(3, 0));
    		break;
    	case VMBUS_ICMSG_TYPE_TIMESYNC:
    		msg = (struct vmbus_icmsg_timesync *)hdr;
    		if (msg->ic_tsflags == VMBUS_ICMSG_TS_FLAG_SAMPLE) {
    			microtime(&sc->sc_sensor.tv);
    			nanotime(&guest);
    			tns = (msg->ic_hvtime - 116444736000000000LL) * 100;
    			host.tv_sec = tns / 1000000000LL;
    			host.tv_nsec = tns % 1000000000LL;
    			timespecsub(&guest, &host, &diff);
    			sc->sc_sensor.value = (int64_t)diff.tv_sec *
    			    1000000000LL + diff.tv_nsec;
    			sc->sc_sensor.status = SENSOR_S_OK;
    		}
    		break;
    	default:
    		printf("%s: unhandled timesync message type %u\n",
    		    sc->sc_dev.dv_xname, hdr->ic_type);
    		return;
    	}
    
    	hdr->ic_flags = VMBUS_ICMSG_FLAG_TRANSACTION | VMBUS_ICMSG_FLAG_RESPONSE;
    	hv_channel_send(ch, dv->dv_buf, rlen, rid, VMBUS_CHANPKT_TYPE_INBAND, 0);
    }
    
    static inline int
    copyout_utf16le(void *dst, const void *src, size_t dlen, size_t slen)
    {
    	const uint8_t *sp = src;
    	uint8_t *dp = dst;
    	int i, j;
    
    	KASSERT(dlen >= slen * 2);
    
    	for (i = j = 0; i < slen; i++, j += 2) {
    		dp[j] = sp[i];
    		dp[j + 1] = '\0';
    	}
    	return (j);
    }
    
    static inline int
    copyin_utf16le(void *dst, const void *src, size_t dlen, size_t slen)
    {
    	const uint8_t *sp = src;
    	uint8_t *dp = dst;
    	int i, j;
    
    	KASSERT(dlen >= slen / 2);
    
    	for (i = j = 0; i < slen; i += 2, j++)
    		dp[j] = sp[i];
    	return (j);
    }
    
    static inline int
    keycmp_utf16le(const uint8_t *key, const uint8_t *ukey, size_t ukeylen)
    {
    	int i, j;
    
    	for (i = j = 0; i < ukeylen; i += 2, j++) {
    		if (key[j] != ukey[i])
    			return (key[j] > ukey[i] ?
    			    key[j] - ukey[i] :
    			    ukey[i] - key[j]);
    	}
    	return (0);
    }
    
    static void
    kvp_pool_init(struct kvp_pool *kvpl)
    {
    	TAILQ_INIT(&kvpl->kvp_entries);
    	mtx_init(&kvpl->kvp_lock, IPL_NET);
    	kvpl->kvp_index = 0;
    }
    
    static int
    kvp_pool_insert(struct kvp_pool *kvpl, const char *key, const char *val,
        uint32_t vallen, uint32_t valtype)
    {
    	struct kvp_entry *kpe;
    	int keylen = strlen(key);
    
    	if (keylen > HV_KVP_MAX_KEY_SIZE / 2)
    		return (ERANGE);
    
    	mtx_enter(&kvpl->kvp_lock);
    
    	TAILQ_FOREACH(kpe, &kvpl->kvp_entries, kpe_entry) {
    		if (strcmp(kpe->kpe_key, key) == 0) {
    			mtx_leave(&kvpl->kvp_lock);
    			return (EEXIST);
    		}
    	}
    
    	kpe = pool_get(&kvp_entry_pool, PR_ZERO | PR_NOWAIT);
    	if (kpe == NULL) {
    		mtx_leave(&kvpl->kvp_lock);
    		return (ENOMEM);
    	}
    
    	strlcpy(kpe->kpe_key, key, HV_KVP_MAX_KEY_SIZE / 2);
    
    	if ((kpe->kpe_valtype = valtype) == HV_KVP_REG_SZ)
    		strlcpy(kpe->kpe_val, val, HV_KVP_MAX_KEY_SIZE / 2);
    	else
    		memcpy(kpe->kpe_val, val, vallen);
    
    	kpe->kpe_index = kvpl->kvp_index++ & MAXPOOLENTS;
    
    	TAILQ_INSERT_TAIL(&kvpl->kvp_entries, kpe, kpe_entry);
    
    	mtx_leave(&kvpl->kvp_lock);
    
    	return (0);
    }
    
    static int
    kvp_pool_update(struct kvp_pool *kvpl, const char *key, const char *val,
        uint32_t vallen, uint32_t valtype)
    {
    	struct kvp_entry *kpe;
    	int keylen = strlen(key);
    
    	if (keylen > HV_KVP_MAX_KEY_SIZE / 2)
    		return (ERANGE);
    
    	mtx_enter(&kvpl->kvp_lock);
    
    	TAILQ_FOREACH(kpe, &kvpl->kvp_entries, kpe_entry) {
    		if (strcmp(kpe->kpe_key, key) == 0)
    			break;
    	}
    	if (kpe == NULL) {
    		mtx_leave(&kvpl->kvp_lock);
    		return (ENOENT);
    	}
    
    	if ((kpe->kpe_valtype = valtype) == HV_KVP_REG_SZ)
    		strlcpy(kpe->kpe_val, val, HV_KVP_MAX_KEY_SIZE / 2);
    	else
    		memcpy(kpe->kpe_val, val, vallen);
    
    	mtx_leave(&kvpl->kvp_lock);
    
    	return (0);
    }
    
    static int
    kvp_pool_import(struct kvp_pool *kvpl, const char *key, uint32_t keylen,
        const char *val, uint32_t vallen, uint32_t valtype)
    {
    	struct kvp_entry *kpe;
    
    	if (keylen > HV_KVP_MAX_KEY_SIZE ||
    	    vallen > HV_KVP_MAX_VAL_SIZE)
    		return (ERANGE);
    
    	mtx_enter(&kvpl->kvp_lock);
    
    	TAILQ_FOREACH(kpe, &kvpl->kvp_entries, kpe_entry) {
    		if (keycmp_utf16le(kpe->kpe_key, key, keylen) == 0)
    			break;
    	}
    	if (kpe == NULL) {
    		kpe = pool_get(&kvp_entry_pool, PR_ZERO | PR_NOWAIT);
    		if (kpe == NULL) {
    			mtx_leave(&kvpl->kvp_lock);
    			return (ENOMEM);
    		}
    
    		copyin_utf16le(kpe->kpe_key, key, HV_KVP_MAX_KEY_SIZE / 2,
    		    keylen);
    
    		kpe->kpe_index = kvpl->kvp_index++ & MAXPOOLENTS;
    
    		TAILQ_INSERT_TAIL(&kvpl->kvp_entries, kpe, kpe_entry);
    	}
    
    	copyin_utf16le(kpe->kpe_val, val, HV_KVP_MAX_VAL_SIZE / 2, vallen);
    	kpe->kpe_valtype = valtype;
    
    	mtx_leave(&kvpl->kvp_lock);
    
    	return (0);
    }
    
    static int
    kvp_pool_export(struct kvp_pool *kvpl, uint32_t index, char *key,
        uint32_t *keylen, char *val, uint32_t *vallen, uint32_t *valtype)
    {
    	struct kvp_entry *kpe;
    
    	mtx_enter(&kvpl->kvp_lock);
    
    	TAILQ_FOREACH(kpe, &kvpl->kvp_entries, kpe_entry) {
    		if (kpe->kpe_index == index)
    			break;
    	}
    	if (kpe == NULL) {
    		mtx_leave(&kvpl->kvp_lock);
    		return (ENOENT);
    	}
    
    	*keylen = copyout_utf16le(key, kpe->kpe_key, HV_KVP_MAX_KEY_SIZE,
    	    strlen(kpe->kpe_key) + 1);
    	*vallen = copyout_utf16le(val, kpe->kpe_val, HV_KVP_MAX_VAL_SIZE,
    	    strlen(kpe->kpe_val) + 1);
    	*valtype = kpe->kpe_valtype;
    
    	mtx_leave(&kvpl->kvp_lock);
    
    	return (0);
    }
    
    static int
    kvp_pool_remove(struct kvp_pool *kvpl, const char *key, uint32_t keylen)
    {
    	struct kvp_entry *kpe;
    
    	mtx_enter(&kvpl->kvp_lock);
    
    	TAILQ_FOREACH(kpe, &kvpl->kvp_entries, kpe_entry) {
    		if (keycmp_utf16le(kpe->kpe_key, key, keylen) == 0)
    			break;
    	}
    	if (kpe == NULL) {
    		mtx_leave(&kvpl->kvp_lock);
    		return (ENOENT);
    	}
    
    	TAILQ_REMOVE(&kvpl->kvp_entries, kpe, kpe_entry);
    
    	mtx_leave(&kvpl->kvp_lock);
    
    	pool_put(&kvp_entry_pool, kpe);
    
    	return (0);
    }
    
    static int
    kvp_pool_extract(struct kvp_pool *kvpl, const char *key, char *val,
        uint32_t vallen)
    {
    	struct kvp_entry *kpe;
    
    	if (vallen < HV_KVP_MAX_VAL_SIZE / 2)
    		return (ERANGE);
    
    	mtx_enter(&kvpl->kvp_lock);
    
    	TAILQ_FOREACH(kpe, &kvpl->kvp_entries, kpe_entry) {
    		if (strcmp(kpe->kpe_key, key) == 0)
    			break;
    	}
    	if (kpe == NULL) {
    		mtx_leave(&kvpl->kvp_lock);
    		return (ENOENT);
    	}
    
    	switch (kpe->kpe_valtype) {
    	case HV_KVP_REG_SZ:
    		strlcpy(val, kpe->kpe_val, HV_KVP_MAX_VAL_SIZE / 2);
    		break;
    	case HV_KVP_REG_U32:
    		snprintf(val, HV_KVP_MAX_VAL_SIZE / 2, "%u",
    		    *(uint32_t *)kpe->kpe_val);
    		break;
    	case HV_KVP_REG_U64:
    		snprintf(val, HV_KVP_MAX_VAL_SIZE / 2, "%llu",
    		    *(uint64_t *)kpe->kpe_val);
    		break;
    	}
    
    	mtx_leave(&kvpl->kvp_lock);
    
    	return (0);
    }
    
    static int
    kvp_pool_keys(struct kvp_pool *kvpl, int next, char *key, size_t *keylen)
    {
    	struct kvp_entry *kpe;
    	int iter = 0;
    
    	mtx_enter(&kvpl->kvp_lock);
    
    	TAILQ_FOREACH(kpe, &kvpl->kvp_entries, kpe_entry) {
    		if (iter++ < next)
    			continue;
    		*keylen = strlen(kpe->kpe_key) + 1;
    		strlcpy(key, kpe->kpe_key, *keylen);
    
    		mtx_leave(&kvpl->kvp_lock);
    
    		return (0);
    	}
    
    	mtx_leave(&kvpl->kvp_lock);
    
    	return (-1);
    }
    
    int
    hv_kvp_attach(struct hv_ic_dev *dv)
    {
    	struct hv_channel *ch = dv->dv_ch;
    	struct hv_softc *sc = ch->ch_sc;
    	struct hv_kvp *kvp;
    	int i;
    
    	dv->dv_buf = malloc(2 * PAGE_SIZE, M_DEVBUF, M_ZERO |
    	    (cold ? M_NOWAIT : M_WAITOK));
    	if (dv->dv_buf == NULL) {
    		printf("%s: failed to allocate receive buffer\n",
    		    sc->sc_dev.dv_xname);
    		return (-1);
    	}
    
    	dv->dv_priv = malloc(sizeof(struct hv_kvp), M_DEVBUF, M_ZERO |
    	    (cold ? M_NOWAIT : M_WAITOK));
    	if (dv->dv_priv == NULL) {
    		free(dv->dv_buf, M_DEVBUF, 2 * PAGE_SIZE);
    		printf("%s: failed to allocate KVP private data\n",
    		    sc->sc_dev.dv_xname);
    		return (-1);
    	}
    	kvp = dv->dv_priv;
    
    	pool_init(&kvp_entry_pool, sizeof(struct kvp_entry), 0, IPL_NET, 0,
    	    "hvkvpl", NULL);
    
    	for (i = 0; i < NKVPPOOLS; i++)
    		kvp_pool_init(&kvp->kvp_pool[i]);
    
    	/* Initialize 'Auto' pool */
    	for (i = 0; i < nitems(kvp_pool_auto); i++) {
    		if (kvp_pool_insert(&kvp->kvp_pool[HV_KVP_POOL_AUTO],
    		    kvp_pool_auto[i].keyname, kvp_pool_auto[i].value,
    		    strlen(kvp_pool_auto[i].value), HV_KVP_REG_SZ))
    			DPRINTF("%s: failed to insert into 'Auto' pool\n",
    			    sc->sc_dev.dv_xname);
    	}
    
    	sc->sc_pvbus->hv_kvop = hv_kvop;
    	sc->sc_pvbus->hv_arg = dv;
    
    	return (0);
    }
    
    static int
    nibble(int ch)
    {
    	if (ch >= '0' && ch <= '9')
    		return (ch - '0');
    	if (ch >= 'A' && ch <= 'F')
    		return (10 + ch - 'A');
    	if (ch >= 'a' && ch <= 'f')
    		return (10 + ch - 'a');
    	return (-1);
    }
    
    static int
    kvp_get_ip_info(struct hv_kvp *kvp, const uint8_t *mac, uint8_t *family,
        uint8_t *addr, uint8_t *netmask, size_t addrlen)
    {
    	struct ifnet *ifp;
    	struct ifaddr *ifa, *ifa6, *ifa6ll;
    	struct sockaddr_in *sin;
    	struct sockaddr_in6 *sin6, sa6;
    	uint8_t	enaddr[ETHER_ADDR_LEN];
    	uint8_t ipaddr[INET6_ADDRSTRLEN];
    	int i, j, lo, hi, s, af;
    
    	/* Convert from the UTF-16LE string format to binary */
    	for (i = 0, j = 0; j < ETHER_ADDR_LEN; i += 6) {
    		if ((hi = nibble(mac[i])) == -1 ||
    		    (lo = nibble(mac[i+2])) == -1)
    			return (-1);
    		enaddr[j++] = hi << 4 | lo;
    	}
    
    	switch (*family) {
    	case ADDR_FAMILY_NONE:
    		af = AF_UNSPEC;
    		break;
    	case ADDR_FAMILY_IPV4:
    		af = AF_INET;
    		break;
    	case ADDR_FAMILY_IPV6:
    		af = AF_INET6;
    		break;
    	default:
    		return (-1);
    	}
    
    	KERNEL_LOCK();
    	s = splnet();
    
    	TAILQ_FOREACH(ifp, &ifnetlist, if_list) {
    		if (!memcmp(LLADDR(ifp->if_sadl), enaddr, ETHER_ADDR_LEN))
    			break;
    	}
    	if (ifp == NULL) {
    		splx(s);
    		KERNEL_UNLOCK();
    		return (-1);
    	}
    
    	ifa6 = ifa6ll = NULL;
    
    	/* Try to find a best matching address, preferring IPv4 */
    	TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
    		/*
    		 * First IPv4 address is always a best match unless
    		 * we were asked for an IPv6 address.
    		 */
    		if ((af == AF_INET || af == AF_UNSPEC) &&
    		    (ifa->ifa_addr->sa_family == AF_INET)) {
    			af = AF_INET;
    			goto found;
    		}
    		if ((af == AF_INET6 || af == AF_UNSPEC) &&
    		    (ifa->ifa_addr->sa_family == AF_INET6)) {
    			if (!IN6_IS_ADDR_LINKLOCAL(
    			    &satosin6(ifa->ifa_addr)->sin6_addr)) {
    				/* Done if we're looking for an IPv6 address */
    				if (af == AF_INET6)
    					goto found;
    				/* Stick to the first one */
    				if (ifa6 == NULL)
    					ifa6 = ifa;
    			} else	/* Pick the last one */
    				ifa6ll = ifa;
    		}
    	}
    	/* If we haven't found any IPv4 or IPv6 direct matches... */
    	if (ifa == NULL) {
    		/* ... try the last global IPv6 address... */
    		if (ifa6 != NULL)
    			ifa = ifa6;
    		/* ... or the last link-local...  */
    		else if (ifa6ll != NULL)
    			ifa = ifa6ll;
    		else {
    			splx(s);
    			KERNEL_UNLOCK();
    			return (-1);
    		}
    	}
     found:
    	switch (af) {
    	case AF_INET:
    		sin = satosin(ifa->ifa_addr);
    		inet_ntop(AF_INET, &sin->sin_addr, ipaddr, sizeof(ipaddr));
    		copyout_utf16le(addr, ipaddr, addrlen, INET_ADDRSTRLEN);
    
    		sin = satosin(ifa->ifa_netmask);
    		inet_ntop(AF_INET, &sin->sin_addr, ipaddr, sizeof(ipaddr));
    		copyout_utf16le(netmask, ipaddr, addrlen, INET_ADDRSTRLEN);
    
    		*family = ADDR_FAMILY_IPV4;
    		break;
    	case AF_UNSPEC:
    	case AF_INET6:
    		sin6 = satosin6(ifa->ifa_addr);
    		if (IN6_IS_SCOPE_EMBED(&sin6->sin6_addr)) {
    			sa6 = *satosin6(ifa->ifa_addr);
    			sa6.sin6_addr.s6_addr16[1] = 0;
    			sin6 = &sa6;
    		}
    		inet_ntop(AF_INET6, &sin6->sin6_addr, ipaddr, sizeof(ipaddr));
    		copyout_utf16le(addr, ipaddr, addrlen, INET6_ADDRSTRLEN);
    
    		sin6 = satosin6(ifa->ifa_netmask);
    		inet_ntop(AF_INET6, &sin6->sin6_addr, ipaddr, sizeof(ipaddr));
    		copyout_utf16le(netmask, ipaddr, addrlen, INET6_ADDRSTRLEN);
    
    		*family = ADDR_FAMILY_IPV6;
    		break;
    	}
    
    	splx(s);
    	KERNEL_UNLOCK();
    
    	return (0);
    }
    
    static void
    hv_kvp_process(struct hv_kvp *kvp, struct vmbus_icmsg_kvp *msg)
    {
    	union hv_kvp_hdr *kvh = &msg->ic_kvh;
    	union hv_kvp_msg *kvm = &msg->ic_kvm;
    
    	switch (kvh->kvh_op) {
    	case HV_KVP_OP_SET:
    		if (kvh->kvh_pool == HV_KVP_POOL_AUTO_EXTERNAL &&
    		    kvp_pool_import(&kvp->kvp_pool[HV_KVP_POOL_AUTO_EXTERNAL],
    		    kvm->kvm_val.kvm_key, kvm->kvm_val.kvm_keylen,
    		    kvm->kvm_val.kvm_val, kvm->kvm_val.kvm_vallen,
    		    kvm->kvm_val.kvm_valtype)) {
    			DPRINTF("%s: failed to import into 'Guest/Parameters'"
    			    " pool\n", __func__);
    			kvh->kvh_err = HV_KVP_S_CONT;
    		} else if (kvh->kvh_pool == HV_KVP_POOL_EXTERNAL &&
    		    kvp_pool_import(&kvp->kvp_pool[HV_KVP_POOL_EXTERNAL],
    		    kvm->kvm_val.kvm_key, kvm->kvm_val.kvm_keylen,
    		    kvm->kvm_val.kvm_val, kvm->kvm_val.kvm_vallen,
    		    kvm->kvm_val.kvm_valtype)) {
    			DPRINTF("%s: failed to import into 'External' pool\n",
    			    __func__);
    			kvh->kvh_err = HV_KVP_S_CONT;
    		} else if (kvh->kvh_pool != HV_KVP_POOL_AUTO_EXTERNAL &&
    		    kvh->kvh_pool != HV_KVP_POOL_EXTERNAL) {
    			kvh->kvh_err = HV_KVP_S_CONT;
    		} else
    			kvh->kvh_err = HV_KVP_S_OK;
    		break;
    	case HV_KVP_OP_DELETE:
    		if (kvh->kvh_pool != HV_KVP_POOL_EXTERNAL ||
    		    kvp_pool_remove(&kvp->kvp_pool[HV_KVP_POOL_EXTERNAL],
    		    kvm->kvm_del.kvm_key, kvm->kvm_del.kvm_keylen)) {
    			DPRINTF("%s: failed to remove from 'External' pool\n",
    			    __func__);
    			kvh->kvh_err = HV_KVP_S_CONT;
    		} else
    			kvh->kvh_err = HV_KVP_S_OK;
    		break;
    	case HV_KVP_OP_ENUMERATE:
    		if (kvh->kvh_pool == HV_KVP_POOL_AUTO &&
    		    kvp_pool_export(&kvp->kvp_pool[HV_KVP_POOL_AUTO],
    		    kvm->kvm_enum.kvm_index, kvm->kvm_enum.kvm_key,
    		    &kvm->kvm_enum.kvm_keylen, kvm->kvm_enum.kvm_val,
    		    &kvm->kvm_enum.kvm_vallen, &kvm->kvm_enum.kvm_valtype))
    			kvh->kvh_err = HV_KVP_S_CONT;
    		else if (kvh->kvh_pool == HV_KVP_POOL_GUEST &&
    		    kvp_pool_export(&kvp->kvp_pool[HV_KVP_POOL_GUEST],
    		    kvm->kvm_enum.kvm_index, kvm->kvm_enum.kvm_key,
    		    &kvm->kvm_enum.kvm_keylen, kvm->kvm_enum.kvm_val,
    		    &kvm->kvm_enum.kvm_vallen, &kvm->kvm_enum.kvm_valtype))
    			kvh->kvh_err = HV_KVP_S_CONT;
    		else
    			kvh->kvh_err = HV_KVP_S_OK;
    		break;
    	case HV_KVP_OP_GET_IP_INFO:
    		if (VMBUS_ICVER_MAJOR(msg->ic_hdr.ic_msgver) <= 4) {
    			struct vmbus_icmsg_kvp_addr *amsg;
    			struct hv_kvp_msg_addr *kva;
    
    			amsg = (struct vmbus_icmsg_kvp_addr *)msg;
    			kva = &amsg->ic_kvm;
    
    			if (kvp_get_ip_info(kvp, kva->kvm_mac,
    			    &kva->kvm_family, kva->kvm_addr,
    			    kva->kvm_netmask, sizeof(kva->kvm_addr)))
    				kvh->kvh_err = HV_KVP_S_CONT;
    			else
    				kvh->kvh_err = HV_KVP_S_OK;
    		} else {
    			DPRINTF("KVP GET_IP_INFO fw %u.%u msg %u.%u dsize=%u\n",
    			    VMBUS_ICVER_MAJOR(msg->ic_hdr.ic_fwver),
    			    VMBUS_ICVER_MINOR(msg->ic_hdr.ic_fwver),
    			    VMBUS_ICVER_MAJOR(msg->ic_hdr.ic_msgver),
    			    VMBUS_ICVER_MINOR(msg->ic_hdr.ic_msgver),
    			    msg->ic_hdr.ic_dsize);
    			kvh->kvh_err = HV_KVP_S_CONT;
    		}
    		break;
    	default:
    		DPRINTF("KVP message op %u pool %u\n", kvh->kvh_op,
    		    kvh->kvh_pool);
    		kvh->kvh_err = HV_KVP_S_CONT;
    	}
    }
    
    void
    hv_kvp(void *arg)
    {
    	struct hv_ic_dev *dv = arg;
    	struct hv_channel *ch = dv->dv_ch;
    	struct hv_softc *sc = ch->ch_sc;
    	struct hv_kvp *kvp = dv->dv_priv;
    	struct vmbus_icmsg_hdr *hdr;
    	uint64_t rid;
    	uint32_t fwver, msgver, rlen;
    	int rv;
    
    	for (;;) {
    		rv = hv_channel_recv(ch, dv->dv_buf, 2 * PAGE_SIZE,
    		    &rlen, &rid, 0);
    		if (rv || rlen == 0) {
    			if (rv != EAGAIN)
    				DPRINTF("%s: kvp rv=%d rlen=%u\n",
    				    sc->sc_dev.dv_xname, rv, rlen);
    			return;
    		}
    		if (rlen < sizeof(struct vmbus_icmsg_hdr)) {
    			DPRINTF("%s: kvp short read rlen=%u\n",
    			    sc->sc_dev.dv_xname, rlen);
    			return;
    		}
    		hdr = (struct vmbus_icmsg_hdr *)dv->dv_buf;
    		switch (hdr->ic_type) {
    		case VMBUS_ICMSG_TYPE_NEGOTIATE:
    			switch (sc->sc_proto) {
    			case VMBUS_VERSION_WS2008:
    				fwver = VMBUS_IC_VERSION(1, 0);
    				msgver = VMBUS_IC_VERSION(1, 0);
    				break;
    			case VMBUS_VERSION_WIN7:
    				fwver = VMBUS_IC_VERSION(3, 0);
    				msgver = VMBUS_IC_VERSION(3, 0);
    				break;
    			default:
    				fwver = VMBUS_IC_VERSION(3, 0);
    				msgver = VMBUS_IC_VERSION(4, 0);
    			}
    			hv_ic_negotiate(hdr, &rlen, fwver, msgver);
    			break;
    		case VMBUS_ICMSG_TYPE_KVP:
    			if (hdr->ic_dsize >= sizeof(union hv_kvp_hdr))
    				hv_kvp_process(kvp,
    				    (struct vmbus_icmsg_kvp *)hdr);
    			else
    				printf("%s: message too short: %u\n",
    				    sc->sc_dev.dv_xname, hdr->ic_dsize);
    			break;
    		default:
    			printf("%s: unhandled kvp message type %u\n",
    			    sc->sc_dev.dv_xname, hdr->ic_type);
    			continue;
    		}
    		hdr->ic_flags = VMBUS_ICMSG_FLAG_TRANSACTION |
    		    VMBUS_ICMSG_FLAG_RESPONSE;
    		hv_channel_send(ch, dv->dv_buf, rlen, rid,
    		    VMBUS_CHANPKT_TYPE_INBAND, 0);
    	}
    }
    
    static int
    kvp_poolname(char **key)
    {
    	char *p;
    	int i, rv = -1;
    
    	if ((p = strrchr(*key, '/')) == NULL)
    		return (rv);
    	*p = '\0';
    	for (i = 0; i < nitems(kvp_pools); i++) {
    		if (strncasecmp(*key, kvp_pools[i].poolname,
    		    kvp_pools[i].poolnamelen) == 0) {
    			rv = kvp_pools[i].poolidx;
    			break;
    		}
    	}
    	if (rv >= 0)
    		*key = ++p;
    	return (rv);
    }
    
    int
    hv_kvop(void *arg, int op, char *key, char *val, size_t vallen)
    {
    	struct hv_ic_dev *dv = arg;
    	struct hv_kvp *kvp = dv->dv_priv;
    	struct kvp_pool *kvpl;
    	int next, pool, error = 0;
    	char *vp = val;
    	size_t keylen;
    
    	pool = kvp_poolname(&key);
    	if (pool == -1)
    		return (EINVAL);
    
    	kvpl = &kvp->kvp_pool[pool];
    	if (strlen(key) == 0) {
    		for (next = 0; next < MAXPOOLENTS; next++) {
    			if (val + vallen < vp + HV_KVP_MAX_KEY_SIZE / 2)
    				return (ERANGE);
    			if (kvp_pool_keys(kvpl, next, vp, &keylen))
    				goto out;
    			if (strlcat(val, "\n", vallen) >= vallen)
    				return (ERANGE);
    			vp += keylen;
    		}
     out:
    		if (vp > val)
    			*(vp - 1) = '\0';
    		return (0);
    	}
    
    	if (op == PVBUS_KVWRITE) {
    		if (pool == HV_KVP_POOL_AUTO)
    			error = kvp_pool_update(kvpl, key, val, vallen,
    			    HV_KVP_REG_SZ);
    		else if (pool == HV_KVP_POOL_GUEST)
    			error = kvp_pool_insert(kvpl, key, val, vallen,
    			    HV_KVP_REG_SZ);
    		else
    			error = EINVAL;
    	} else
    		error = kvp_pool_extract(kvpl, key, val, vallen);
    
    	return (error);
    }