// SPDX-License-Identifier: GPL-2.0-or-later /* * Virtio vhost-user driver * * Copyright(c) 2019 Intel Corporation * * This driver allows virtio devices to be used over a vhost-user socket. * * Guest devices can be instantiated by kernel module or command line * parameters. One device will be created for each parameter. Syntax: * * virtio_uml.device=:[:] * where: * := vhost-user socket path to connect * := virtio device id (as in virtio_ids.h) * := (optional) platform device id * * example: * virtio_uml.device=/var/uml.socket:1 * * Based on Virtio MMIO driver by Pawel Moll, copyright 2011-2014, ARM Ltd. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "vhost_user.h" #define MAX_SUPPORTED_QUEUE_SIZE 256 #define to_virtio_uml_device(_vdev) \ container_of(_vdev, struct virtio_uml_device, vdev) struct virtio_uml_platform_data { u32 virtio_device_id; const char *socket_path; struct work_struct conn_broken_wk; struct platform_device *pdev; }; struct virtio_uml_device { struct virtio_device vdev; struct platform_device *pdev; struct virtio_uml_platform_data *pdata; spinlock_t sock_lock; int sock, req_fd, irq; u64 features; u64 protocol_features; u8 status; u8 registered:1; u8 suspended:1; u8 no_vq_suspend:1; u8 config_changed_irq:1; uint64_t vq_irq_vq_map; int recv_rc; }; struct virtio_uml_vq_info { int kick_fd, call_fd; char name[32]; bool suspended; }; extern unsigned long long physmem_size, highmem; #define vu_err(vu_dev, ...) dev_err(&(vu_dev)->pdev->dev, ##__VA_ARGS__) /* Vhost-user protocol */ static int full_sendmsg_fds(int fd, const void *buf, unsigned int len, const int *fds, unsigned int fds_num) { int rc; do { rc = os_sendmsg_fds(fd, buf, len, fds, fds_num); if (rc > 0) { buf += rc; len -= rc; fds = NULL; fds_num = 0; } } while (len && (rc >= 0 || rc == -EINTR)); if (rc < 0) return rc; return 0; } static int full_read(int fd, void *buf, int len, bool abortable) { int rc; if (!len) return 0; do { rc = os_read_file(fd, buf, len); if (rc > 0) { buf += rc; len -= rc; } } while (len && (rc > 0 || rc == -EINTR || (!abortable && rc == -EAGAIN))); if (rc < 0) return rc; if (rc == 0) return -ECONNRESET; return 0; } static int vhost_user_recv_header(int fd, struct vhost_user_msg *msg) { return full_read(fd, msg, sizeof(msg->header), true); } static int vhost_user_recv(struct virtio_uml_device *vu_dev, int fd, struct vhost_user_msg *msg, size_t max_payload_size, bool wait) { size_t size; int rc; /* * In virtio time-travel mode, we're handling all the vhost-user * FDs by polling them whenever appropriate. However, we may get * into a situation where we're sending out an interrupt message * to a device (e.g. a net device) and need to handle a simulation * time message while doing so, e.g. one that tells us to update * our idea of how long we can run without scheduling. * * Thus, we need to not just read() from the given fd, but need * to also handle messages for the simulation time - this function * does that for us while waiting for the given fd to be readable. */ if (wait) time_travel_wait_readable(fd); rc = vhost_user_recv_header(fd, msg); if (rc) return rc; size = msg->header.size; if (size > max_payload_size) return -EPROTO; return full_read(fd, &msg->payload, size, false); } static void vhost_user_check_reset(struct virtio_uml_device *vu_dev, int rc) { struct virtio_uml_platform_data *pdata = vu_dev->pdata; if (rc != -ECONNRESET) return; if (!vu_dev->registered) return; vu_dev->registered = 0; schedule_work(&pdata->conn_broken_wk); } static int vhost_user_recv_resp(struct virtio_uml_device *vu_dev, struct vhost_user_msg *msg, size_t max_payload_size) { int rc = vhost_user_recv(vu_dev, vu_dev->sock, msg, max_payload_size, true); if (rc) { vhost_user_check_reset(vu_dev, rc); return rc; } if (msg->header.flags != (VHOST_USER_FLAG_REPLY | VHOST_USER_VERSION)) return -EPROTO; return 0; } static int vhost_user_recv_u64(struct virtio_uml_device *vu_dev, u64 *value) { struct vhost_user_msg msg; int rc = vhost_user_recv_resp(vu_dev, &msg, sizeof(msg.payload.integer)); if (rc) return rc; if (msg.header.size != sizeof(msg.payload.integer)) return -EPROTO; *value = msg.payload.integer; return 0; } static int vhost_user_recv_req(struct virtio_uml_device *vu_dev, struct vhost_user_msg *msg, size_t max_payload_size) { int rc = vhost_user_recv(vu_dev, vu_dev->req_fd, msg, max_payload_size, false); if (rc) return rc; if ((msg->header.flags & ~VHOST_USER_FLAG_NEED_REPLY) != VHOST_USER_VERSION) return -EPROTO; return 0; } static int vhost_user_send(struct virtio_uml_device *vu_dev, bool need_response, struct vhost_user_msg *msg, int *fds, size_t num_fds) { size_t size = sizeof(msg->header) + msg->header.size; unsigned long flags; bool request_ack; int rc; msg->header.flags |= VHOST_USER_VERSION; /* * The need_response flag indicates that we already need a response, * e.g. to read the features. In these cases, don't request an ACK as * it is meaningless. Also request an ACK only if supported. */ request_ack = !need_response; if (!(vu_dev->protocol_features & BIT_ULL(VHOST_USER_PROTOCOL_F_REPLY_ACK))) request_ack = false; if (request_ack) msg->header.flags |= VHOST_USER_FLAG_NEED_REPLY; spin_lock_irqsave(&vu_dev->sock_lock, flags); rc = full_sendmsg_fds(vu_dev->sock, msg, size, fds, num_fds); if (rc < 0) goto out; if (request_ack) { uint64_t status; rc = vhost_user_recv_u64(vu_dev, &status); if (rc) goto out; if (status) { vu_err(vu_dev, "slave reports error: %llu\n", status); rc = -EIO; goto out; } } out: spin_unlock_irqrestore(&vu_dev->sock_lock, flags); return rc; } static int vhost_user_send_no_payload(struct virtio_uml_device *vu_dev, bool need_response, u32 request) { struct vhost_user_msg msg = { .header.request = request, }; return vhost_user_send(vu_dev, need_response, &msg, NULL, 0); } static int vhost_user_send_no_payload_fd(struct virtio_uml_device *vu_dev, u32 request, int fd) { struct vhost_user_msg msg = { .header.request = request, }; return vhost_user_send(vu_dev, false, &msg, &fd, 1); } static int vhost_user_send_u64(struct virtio_uml_device *vu_dev, u32 request, u64 value) { struct vhost_user_msg msg = { .header.request = request, .header.size = sizeof(msg.payload.integer), .payload.integer = value, }; return vhost_user_send(vu_dev, false, &msg, NULL, 0); } static int vhost_user_set_owner(struct virtio_uml_device *vu_dev) { return vhost_user_send_no_payload(vu_dev, false, VHOST_USER_SET_OWNER); } static int vhost_user_get_features(struct virtio_uml_device *vu_dev, u64 *features) { int rc = vhost_user_send_no_payload(vu_dev, true, VHOST_USER_GET_FEATURES); if (rc) return rc; return vhost_user_recv_u64(vu_dev, features); } static int vhost_user_set_features(struct virtio_uml_device *vu_dev, u64 features) { return vhost_user_send_u64(vu_dev, VHOST_USER_SET_FEATURES, features); } static int vhost_user_get_protocol_features(struct virtio_uml_device *vu_dev, u64 *protocol_features) { int rc = vhost_user_send_no_payload(vu_dev, true, VHOST_USER_GET_PROTOCOL_FEATURES); if (rc) return rc; return vhost_user_recv_u64(vu_dev, protocol_features); } static int vhost_user_set_protocol_features(struct virtio_uml_device *vu_dev, u64 protocol_features) { return vhost_user_send_u64(vu_dev, VHOST_USER_SET_PROTOCOL_FEATURES, protocol_features); } static void vhost_user_reply(struct virtio_uml_device *vu_dev, struct vhost_user_msg *msg, int response) { struct vhost_user_msg reply = { .payload.integer = response, }; size_t size = sizeof(reply.header) + sizeof(reply.payload.integer); int rc; reply.header = msg->header; reply.header.flags &= ~VHOST_USER_FLAG_NEED_REPLY; reply.header.flags |= VHOST_USER_FLAG_REPLY; reply.header.size = sizeof(reply.payload.integer); rc = full_sendmsg_fds(vu_dev->req_fd, &reply, size, NULL, 0); if (rc) vu_err(vu_dev, "sending reply to slave request failed: %d (size %zu)\n", rc, size); } static irqreturn_t vu_req_read_message(struct virtio_uml_device *vu_dev, struct time_travel_event *ev) { struct virtqueue *vq; int response = 1; struct { struct vhost_user_msg msg; u8 extra_payload[512]; } msg; int rc; irqreturn_t irq_rc = IRQ_NONE; while (1) { rc = vhost_user_recv_req(vu_dev, &msg.msg, sizeof(msg.msg.payload) + sizeof(msg.extra_payload)); if (rc) break; switch (msg.msg.header.request) { case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG: vu_dev->config_changed_irq = true; response = 0; break; case VHOST_USER_SLAVE_VRING_CALL: virtio_device_for_each_vq((&vu_dev->vdev), vq) { if (vq->index == msg.msg.payload.vring_state.index) { response = 0; vu_dev->vq_irq_vq_map |= BIT_ULL(vq->index); break; } } break; case VHOST_USER_SLAVE_IOTLB_MSG: /* not supported - VIRTIO_F_ACCESS_PLATFORM */ case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG: /* not supported - VHOST_USER_PROTOCOL_F_HOST_NOTIFIER */ default: vu_err(vu_dev, "unexpected slave request %d\n", msg.msg.header.request); } if (ev && !vu_dev->suspended) time_travel_add_irq_event(ev); if (msg.msg.header.flags & VHOST_USER_FLAG_NEED_REPLY) vhost_user_reply(vu_dev, &msg.msg, response); irq_rc = IRQ_HANDLED; } /* mask EAGAIN as we try non-blocking read until socket is empty */ vu_dev->recv_rc = (rc == -EAGAIN) ? 0 : rc; return irq_rc; } static irqreturn_t vu_req_interrupt(int irq, void *data) { struct virtio_uml_device *vu_dev = data; irqreturn_t ret = IRQ_HANDLED; if (!um_irq_timetravel_handler_used()) ret = vu_req_read_message(vu_dev, NULL); if (vu_dev->recv_rc) { vhost_user_check_reset(vu_dev, vu_dev->recv_rc); } else if (vu_dev->vq_irq_vq_map) { struct virtqueue *vq; virtio_device_for_each_vq((&vu_dev->vdev), vq) { if (vu_dev->vq_irq_vq_map & BIT_ULL(vq->index)) vring_interrupt(0 /* ignored */, vq); } vu_dev->vq_irq_vq_map = 0; } else if (vu_dev->config_changed_irq) { virtio_config_changed(&vu_dev->vdev); vu_dev->config_changed_irq = false; } return ret; } static void vu_req_interrupt_comm_handler(int irq, int fd, void *data, struct time_travel_event *ev) { vu_req_read_message(data, ev); } static int vhost_user_init_slave_req(struct virtio_uml_device *vu_dev) { int rc, req_fds[2]; /* Use a pipe for slave req fd, SIGIO is not supported for eventfd */ rc = os_pipe(req_fds, true, true); if (rc < 0) return rc; vu_dev->req_fd = req_fds[0]; rc = um_request_irq_tt(UM_IRQ_ALLOC, vu_dev->req_fd, IRQ_READ, vu_req_interrupt, IRQF_SHARED, vu_dev->pdev->name, vu_dev, vu_req_interrupt_comm_handler); if (rc < 0) goto err_close; vu_dev->irq = rc; rc = vhost_user_send_no_payload_fd(vu_dev, VHOST_USER_SET_SLAVE_REQ_FD, req_fds[1]); if (rc) goto err_free_irq; goto out; err_free_irq: um_free_irq(vu_dev->irq, vu_dev); err_close: os_close_file(req_fds[0]); out: /* Close unused write end of request fds */ os_close_file(req_fds[1]); return rc; } static int vhost_user_init(struct virtio_uml_device *vu_dev) { int rc = vhost_user_set_owner(vu_dev); if (rc) return rc; rc = vhost_user_get_features(vu_dev, &vu_dev->features); if (rc) return rc; if (vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)) { rc = vhost_user_get_protocol_features(vu_dev, &vu_dev->protocol_features); if (rc) return rc; vu_dev->protocol_features &= VHOST_USER_SUPPORTED_PROTOCOL_F; rc = vhost_user_set_protocol_features(vu_dev, vu_dev->protocol_features); if (rc) return rc; } if (vu_dev->protocol_features & BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ)) { rc = vhost_user_init_slave_req(vu_dev); if (rc) return rc; } return 0; } static void vhost_user_get_config(struct virtio_uml_device *vu_dev, u32 offset, void *buf, u32 len) { u32 cfg_size = offset + len; struct vhost_user_msg *msg; size_t payload_size = sizeof(msg->payload.config) + cfg_size; size_t msg_size = sizeof(msg->header) + payload_size; int rc; if (!(vu_dev->protocol_features & BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG))) return; msg = kzalloc(msg_size, GFP_KERNEL); if (!msg) return; msg->header.request = VHOST_USER_GET_CONFIG; msg->header.size = payload_size; msg->payload.config.offset = 0; msg->payload.config.size = cfg_size; rc = vhost_user_send(vu_dev, true, msg, NULL, 0); if (rc) { vu_err(vu_dev, "sending VHOST_USER_GET_CONFIG failed: %d\n", rc); goto free; } rc = vhost_user_recv_resp(vu_dev, msg, msg_size); if (rc) { vu_err(vu_dev, "receiving VHOST_USER_GET_CONFIG response failed: %d\n", rc); goto free; } if (msg->header.size != payload_size || msg->payload.config.size != cfg_size) { rc = -EPROTO; vu_err(vu_dev, "Invalid VHOST_USER_GET_CONFIG sizes (payload %d expected %zu, config %u expected %u)\n", msg->header.size, payload_size, msg->payload.config.size, cfg_size); goto free; } memcpy(buf, msg->payload.config.payload + offset, len); free: kfree(msg); } static void vhost_user_set_config(struct virtio_uml_device *vu_dev, u32 offset, const void *buf, u32 len) { struct vhost_user_msg *msg; size_t payload_size = sizeof(msg->payload.config) + len; size_t msg_size = sizeof(msg->header) + payload_size; int rc; if (!(vu_dev->protocol_features & BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG))) return; msg = kzalloc(msg_size, GFP_KERNEL); if (!msg) return; msg->header.request = VHOST_USER_SET_CONFIG; msg->header.size = payload_size; msg->payload.config.offset = offset; msg->payload.config.size = len; memcpy(msg->payload.config.payload, buf, len); rc = vhost_user_send(vu_dev, false, msg, NULL, 0); if (rc) vu_err(vu_dev, "sending VHOST_USER_SET_CONFIG failed: %d\n", rc); kfree(msg); } static int vhost_user_init_mem_region(u64 addr, u64 size, int *fd_out, struct vhost_user_mem_region *region_out) { unsigned long long mem_offset; int rc = phys_mapping(addr, &mem_offset); if (WARN(rc < 0, "phys_mapping of 0x%llx returned %d\n", addr, rc)) return -EFAULT; *fd_out = rc; region_out->guest_addr = addr; region_out->user_addr = addr; region_out->size = size; region_out->mmap_offset = mem_offset; /* Ensure mapping is valid for the entire region */ rc = phys_mapping(addr + size - 1, &mem_offset); if (WARN(rc != *fd_out, "phys_mapping of 0x%llx failed: %d != %d\n", addr + size - 1, rc, *fd_out)) return -EFAULT; return 0; } static int vhost_user_set_mem_table(struct virtio_uml_device *vu_dev) { struct vhost_user_msg msg = { .header.request = VHOST_USER_SET_MEM_TABLE, .header.size = sizeof(msg.payload.mem_regions), .payload.mem_regions.num = 1, }; unsigned long reserved = uml_reserved - uml_physmem; int fds[2]; int rc; /* * This is a bit tricky, see also the comment with setup_physmem(). * * Essentially, setup_physmem() uses a file to mmap() our physmem, * but the code and data we *already* have is omitted. To us, this * is no difference, since they both become part of our address * space and memory consumption. To somebody looking in from the * outside, however, it is different because the part of our memory * consumption that's already part of the binary (code/data) is not * mapped from the file, so it's not visible to another mmap from * the file descriptor. * * Thus, don't advertise this space to the vhost-user slave. This * means that the slave will likely abort or similar when we give * it an address from the hidden range, since it's not marked as * a valid address, but at least that way we detect the issue and * don't just have the slave read an all-zeroes buffer from the * shared memory file, or write something there that we can never * see (depending on the direction of the virtqueue traffic.) * * Since we usually don't want to use .text for virtio buffers, * this effectively means that you cannot use * 1) global variables, which are in the .bss and not in the shm * file-backed memory * 2) the stack in some processes, depending on where they have * their stack (or maybe only no interrupt stack?) * * The stack is already not typically valid for DMA, so this isn't * much of a restriction, but global variables might be encountered. * * It might be possible to fix it by copying around the data that's * between bss_start and where we map the file now, but it's not * something that you typically encounter with virtio drivers, so * it didn't seem worthwhile. */ rc = vhost_user_init_mem_region(reserved, physmem_size - reserved, &fds[0], &msg.payload.mem_regions.regions[0]); if (rc < 0) return rc; if (highmem) { msg.payload.mem_regions.num++; rc = vhost_user_init_mem_region(__pa(end_iomem), highmem, &fds[1], &msg.payload.mem_regions.regions[1]); if (rc < 0) return rc; } return vhost_user_send(vu_dev, false, &msg, fds, msg.payload.mem_regions.num); } static int vhost_user_set_vring_state(struct virtio_uml_device *vu_dev, u32 request, u32 index, u32 num) { struct vhost_user_msg msg = { .header.request = request, .header.size = sizeof(msg.payload.vring_state), .payload.vring_state.index = index, .payload.vring_state.num = num, }; return vhost_user_send(vu_dev, false, &msg, NULL, 0); } static int vhost_user_set_vring_num(struct virtio_uml_device *vu_dev, u32 index, u32 num) { return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_NUM, index, num); } static int vhost_user_set_vring_base(struct virtio_uml_device *vu_dev, u32 index, u32 offset) { return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_BASE, index, offset); } static int vhost_user_set_vring_addr(struct virtio_uml_device *vu_dev, u32 index, u64 desc, u64 used, u64 avail, u64 log) { struct vhost_user_msg msg = { .header.request = VHOST_USER_SET_VRING_ADDR, .header.size = sizeof(msg.payload.vring_addr), .payload.vring_addr.index = index, .payload.vring_addr.desc = desc, .payload.vring_addr.used = used, .payload.vring_addr.avail = avail, .payload.vring_addr.log = log, }; return vhost_user_send(vu_dev, false, &msg, NULL, 0); } static int vhost_user_set_vring_fd(struct virtio_uml_device *vu_dev, u32 request, int index, int fd) { struct vhost_user_msg msg = { .header.request = request, .header.size = sizeof(msg.payload.integer), .payload.integer = index, }; if (index & ~VHOST_USER_VRING_INDEX_MASK) return -EINVAL; if (fd < 0) { msg.payload.integer |= VHOST_USER_VRING_POLL_MASK; return vhost_user_send(vu_dev, false, &msg, NULL, 0); } return vhost_user_send(vu_dev, false, &msg, &fd, 1); } static int vhost_user_set_vring_call(struct virtio_uml_device *vu_dev, int index, int fd) { return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_CALL, index, fd); } static int vhost_user_set_vring_kick(struct virtio_uml_device *vu_dev, int index, int fd) { return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_KICK, index, fd); } static int vhost_user_set_vring_enable(struct virtio_uml_device *vu_dev, u32 index, bool enable) { if (!(vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES))) return 0; return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_ENABLE, index, enable); } /* Virtio interface */ static bool vu_notify(struct virtqueue *vq) { struct virtio_uml_vq_info *info = vq->priv; const uint64_t n = 1; int rc; if (info->suspended) return true; time_travel_propagate_time(); if (info->kick_fd < 0) { struct virtio_uml_device *vu_dev; vu_dev = to_virtio_uml_device(vq->vdev); return vhost_user_set_vring_state(vu_dev, VHOST_USER_VRING_KICK, vq->index, 0) == 0; } do { rc = os_write_file(info->kick_fd, &n, sizeof(n)); } while (rc == -EINTR); return !WARN(rc != sizeof(n), "write returned %d\n", rc); } static irqreturn_t vu_interrupt(int irq, void *opaque) { struct virtqueue *vq = opaque; struct virtio_uml_vq_info *info = vq->priv; uint64_t n; int rc; irqreturn_t ret = IRQ_NONE; do { rc = os_read_file(info->call_fd, &n, sizeof(n)); if (rc == sizeof(n)) ret |= vring_interrupt(irq, vq); } while (rc == sizeof(n) || rc == -EINTR); WARN(rc != -EAGAIN, "read returned %d\n", rc); return ret; } static void vu_get(struct virtio_device *vdev, unsigned offset, void *buf, unsigned len) { struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); vhost_user_get_config(vu_dev, offset, buf, len); } static void vu_set(struct virtio_device *vdev, unsigned offset, const void *buf, unsigned len) { struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); vhost_user_set_config(vu_dev, offset, buf, len); } static u8 vu_get_status(struct virtio_device *vdev) { struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); return vu_dev->status; } static void vu_set_status(struct virtio_device *vdev, u8 status) { struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); vu_dev->status = status; } static void vu_reset(struct virtio_device *vdev) { struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); vu_dev->status = 0; } static void vu_del_vq(struct virtqueue *vq) { struct virtio_uml_vq_info *info = vq->priv; if (info->call_fd >= 0) { struct virtio_uml_device *vu_dev; vu_dev = to_virtio_uml_device(vq->vdev); um_free_irq(vu_dev->irq, vq); os_close_file(info->call_fd); } if (info->kick_fd >= 0) os_close_file(info->kick_fd); vring_del_virtqueue(vq); kfree(info); } static void vu_del_vqs(struct virtio_device *vdev) { struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); struct virtqueue *vq, *n; u64 features; /* Note: reverse order as a workaround to a decoding bug in snabb */ list_for_each_entry_reverse(vq, &vdev->vqs, list) WARN_ON(vhost_user_set_vring_enable(vu_dev, vq->index, false)); /* Ensure previous messages have been processed */ WARN_ON(vhost_user_get_features(vu_dev, &features)); list_for_each_entry_safe(vq, n, &vdev->vqs, list) vu_del_vq(vq); } static int vu_setup_vq_call_fd(struct virtio_uml_device *vu_dev, struct virtqueue *vq) { struct virtio_uml_vq_info *info = vq->priv; int call_fds[2]; int rc; /* no call FD needed/desired in this case */ if (vu_dev->protocol_features & BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS) && vu_dev->protocol_features & BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ)) { info->call_fd = -1; return 0; } /* Use a pipe for call fd, since SIGIO is not supported for eventfd */ rc = os_pipe(call_fds, true, true); if (rc < 0) return rc; info->call_fd = call_fds[0]; rc = um_request_irq(vu_dev->irq, info->call_fd, IRQ_READ, vu_interrupt, IRQF_SHARED, info->name, vq); if (rc < 0) goto close_both; rc = vhost_user_set_vring_call(vu_dev, vq->index, call_fds[1]); if (rc) goto release_irq; goto out; release_irq: um_free_irq(vu_dev->irq, vq); close_both: os_close_file(call_fds[0]); out: /* Close (unused) write end of call fds */ os_close_file(call_fds[1]); return rc; } static struct virtqueue *vu_setup_vq(struct virtio_device *vdev, unsigned index, vq_callback_t *callback, const char *name, bool ctx) { struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); struct platform_device *pdev = vu_dev->pdev; struct virtio_uml_vq_info *info; struct virtqueue *vq; int num = MAX_SUPPORTED_QUEUE_SIZE; int rc; info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) { rc = -ENOMEM; goto error_kzalloc; } snprintf(info->name, sizeof(info->name), "%s.%d-%s", pdev->name, pdev->id, name); vq = vring_create_virtqueue(index, num, PAGE_SIZE, vdev, true, true, ctx, vu_notify, callback, info->name); if (!vq) { rc = -ENOMEM; goto error_create; } vq->priv = info; vq->num_max = num; num = virtqueue_get_vring_size(vq); if (vu_dev->protocol_features & BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS)) { info->kick_fd = -1; } else { rc = os_eventfd(0, 0); if (rc < 0) goto error_kick; info->kick_fd = rc; } rc = vu_setup_vq_call_fd(vu_dev, vq); if (rc) goto error_call; rc = vhost_user_set_vring_num(vu_dev, index, num); if (rc) goto error_setup; rc = vhost_user_set_vring_base(vu_dev, index, 0); if (rc) goto error_setup; rc = vhost_user_set_vring_addr(vu_dev, index, virtqueue_get_desc_addr(vq), virtqueue_get_used_addr(vq), virtqueue_get_avail_addr(vq), (u64) -1); if (rc) goto error_setup; return vq; error_setup: if (info->call_fd >= 0) { um_free_irq(vu_dev->irq, vq); os_close_file(info->call_fd); } error_call: if (info->kick_fd >= 0) os_close_file(info->kick_fd); error_kick: vring_del_virtqueue(vq); error_create: kfree(info); error_kzalloc: return ERR_PTR(rc); } static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], const char * const names[], const bool *ctx, struct irq_affinity *desc) { struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); int i, queue_idx = 0, rc; struct virtqueue *vq; /* not supported for now */ if (WARN_ON(nvqs > 64)) return -EINVAL; rc = vhost_user_set_mem_table(vu_dev); if (rc) return rc; for (i = 0; i < nvqs; ++i) { if (!names[i]) { vqs[i] = NULL; continue; } vqs[i] = vu_setup_vq(vdev, queue_idx++, callbacks[i], names[i], ctx ? ctx[i] : false); if (IS_ERR(vqs[i])) { rc = PTR_ERR(vqs[i]); goto error_setup; } } list_for_each_entry(vq, &vdev->vqs, list) { struct virtio_uml_vq_info *info = vq->priv; if (info->kick_fd >= 0) { rc = vhost_user_set_vring_kick(vu_dev, vq->index, info->kick_fd); if (rc) goto error_setup; } rc = vhost_user_set_vring_enable(vu_dev, vq->index, true); if (rc) goto error_setup; } return 0; error_setup: vu_del_vqs(vdev); return rc; } static u64 vu_get_features(struct virtio_device *vdev) { struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); return vu_dev->features; } static int vu_finalize_features(struct virtio_device *vdev) { struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); u64 supported = vdev->features & VHOST_USER_SUPPORTED_F; vring_transport_features(vdev); vu_dev->features = vdev->features | supported; return vhost_user_set_features(vu_dev, vu_dev->features); } static const char *vu_bus_name(struct virtio_device *vdev) { struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); return vu_dev->pdev->name; } static const struct virtio_config_ops virtio_uml_config_ops = { .get = vu_get, .set = vu_set, .get_status = vu_get_status, .set_status = vu_set_status, .reset = vu_reset, .find_vqs = vu_find_vqs, .del_vqs = vu_del_vqs, .get_features = vu_get_features, .finalize_features = vu_finalize_features, .bus_name = vu_bus_name, }; static void virtio_uml_release_dev(struct device *d) { struct virtio_device *vdev = container_of(d, struct virtio_device, dev); struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); time_travel_propagate_time(); /* might not have been opened due to not negotiating the feature */ if (vu_dev->req_fd >= 0) { um_free_irq(vu_dev->irq, vu_dev); os_close_file(vu_dev->req_fd); } os_close_file(vu_dev->sock); kfree(vu_dev); } void virtio_uml_set_no_vq_suspend(struct virtio_device *vdev, bool no_vq_suspend) { struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); if (WARN_ON(vdev->config != &virtio_uml_config_ops)) return; vu_dev->no_vq_suspend = no_vq_suspend; dev_info(&vdev->dev, "%sabled VQ suspend\n", no_vq_suspend ? "dis" : "en"); } static void vu_of_conn_broken(struct work_struct *wk) { struct virtio_uml_platform_data *pdata; struct virtio_uml_device *vu_dev; pdata = container_of(wk, struct virtio_uml_platform_data, conn_broken_wk); vu_dev = platform_get_drvdata(pdata->pdev); virtio_break_device(&vu_dev->vdev); /* * We can't remove the device from the devicetree so the only thing we * can do is warn. */ WARN_ON(1); } /* Platform device */ static struct virtio_uml_platform_data * virtio_uml_create_pdata(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; struct virtio_uml_platform_data *pdata; int ret; if (!np) return ERR_PTR(-EINVAL); pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); if (!pdata) return ERR_PTR(-ENOMEM); INIT_WORK(&pdata->conn_broken_wk, vu_of_conn_broken); pdata->pdev = pdev; ret = of_property_read_string(np, "socket-path", &pdata->socket_path); if (ret) return ERR_PTR(ret); ret = of_property_read_u32(np, "virtio-device-id", &pdata->virtio_device_id); if (ret) return ERR_PTR(ret); return pdata; } static int virtio_uml_probe(struct platform_device *pdev) { struct virtio_uml_platform_data *pdata = pdev->dev.platform_data; struct virtio_uml_device *vu_dev; int rc; if (!pdata) { pdata = virtio_uml_create_pdata(pdev); if (IS_ERR(pdata)) return PTR_ERR(pdata); } vu_dev = kzalloc(sizeof(*vu_dev), GFP_KERNEL); if (!vu_dev) return -ENOMEM; vu_dev->pdata = pdata; vu_dev->vdev.dev.parent = &pdev->dev; vu_dev->vdev.dev.release = virtio_uml_release_dev; vu_dev->vdev.config = &virtio_uml_config_ops; vu_dev->vdev.id.device = pdata->virtio_device_id; vu_dev->vdev.id.vendor = VIRTIO_DEV_ANY_ID; vu_dev->pdev = pdev; vu_dev->req_fd = -1; time_travel_propagate_time(); do { rc = os_connect_socket(pdata->socket_path); } while (rc == -EINTR); if (rc < 0) goto error_free; vu_dev->sock = rc; spin_lock_init(&vu_dev->sock_lock); rc = vhost_user_init(vu_dev); if (rc) goto error_init; platform_set_drvdata(pdev, vu_dev); device_set_wakeup_capable(&vu_dev->vdev.dev, true); rc = register_virtio_device(&vu_dev->vdev); if (rc) put_device(&vu_dev->vdev.dev); vu_dev->registered = 1; return rc; error_init: os_close_file(vu_dev->sock); error_free: kfree(vu_dev); return rc; } static void virtio_uml_remove(struct platform_device *pdev) { struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev); unregister_virtio_device(&vu_dev->vdev); } /* Command line device list */ static void vu_cmdline_release_dev(struct device *d) { } static struct device vu_cmdline_parent = { .init_name = "virtio-uml-cmdline", .release = vu_cmdline_release_dev, }; static bool vu_cmdline_parent_registered; static int vu_cmdline_id; static int vu_unregister_cmdline_device(struct device *dev, void *data) { struct platform_device *pdev = to_platform_device(dev); struct virtio_uml_platform_data *pdata = pdev->dev.platform_data; kfree(pdata->socket_path); platform_device_unregister(pdev); return 0; } static void vu_conn_broken(struct work_struct *wk) { struct virtio_uml_platform_data *pdata; struct virtio_uml_device *vu_dev; pdata = container_of(wk, struct virtio_uml_platform_data, conn_broken_wk); vu_dev = platform_get_drvdata(pdata->pdev); virtio_break_device(&vu_dev->vdev); vu_unregister_cmdline_device(&pdata->pdev->dev, NULL); } static int vu_cmdline_set(const char *device, const struct kernel_param *kp) { const char *ids = strchr(device, ':'); unsigned int virtio_device_id; int processed, consumed, err; char *socket_path; struct virtio_uml_platform_data pdata, *ppdata; struct platform_device *pdev; if (!ids || ids == device) return -EINVAL; processed = sscanf(ids, ":%u%n:%d%n", &virtio_device_id, &consumed, &vu_cmdline_id, &consumed); if (processed < 1 || ids[consumed]) return -EINVAL; if (!vu_cmdline_parent_registered) { err = device_register(&vu_cmdline_parent); if (err) { pr_err("Failed to register parent device!\n"); put_device(&vu_cmdline_parent); return err; } vu_cmdline_parent_registered = true; } socket_path = kmemdup_nul(device, ids - device, GFP_KERNEL); if (!socket_path) return -ENOMEM; pdata.virtio_device_id = (u32) virtio_device_id; pdata.socket_path = socket_path; pr_info("Registering device virtio-uml.%d id=%d at %s\n", vu_cmdline_id, virtio_device_id, socket_path); pdev = platform_device_register_data(&vu_cmdline_parent, "virtio-uml", vu_cmdline_id++, &pdata, sizeof(pdata)); err = PTR_ERR_OR_ZERO(pdev); if (err) goto free; ppdata = pdev->dev.platform_data; ppdata->pdev = pdev; INIT_WORK(&ppdata->conn_broken_wk, vu_conn_broken); return 0; free: kfree(socket_path); return err; } static int vu_cmdline_get_device(struct device *dev, void *data) { struct platform_device *pdev = to_platform_device(dev); struct virtio_uml_platform_data *pdata = pdev->dev.platform_data; char *buffer = data; unsigned int len = strlen(buffer); snprintf(buffer + len, PAGE_SIZE - len, "%s:%d:%d\n", pdata->socket_path, pdata->virtio_device_id, pdev->id); return 0; } static int vu_cmdline_get(char *buffer, const struct kernel_param *kp) { buffer[0] = '\0'; if (vu_cmdline_parent_registered) device_for_each_child(&vu_cmdline_parent, buffer, vu_cmdline_get_device); return strlen(buffer) + 1; } static const struct kernel_param_ops vu_cmdline_param_ops = { .set = vu_cmdline_set, .get = vu_cmdline_get, }; device_param_cb(device, &vu_cmdline_param_ops, NULL, S_IRUSR); __uml_help(vu_cmdline_param_ops, "virtio_uml.device=:[:]\n" " Configure a virtio device over a vhost-user socket.\n" " See virtio_ids.h for a list of possible virtio device id values.\n" " Optionally use a specific platform_device id.\n\n" ); static void vu_unregister_cmdline_devices(void) { if (vu_cmdline_parent_registered) { device_for_each_child(&vu_cmdline_parent, NULL, vu_unregister_cmdline_device); device_unregister(&vu_cmdline_parent); vu_cmdline_parent_registered = false; } } /* Platform driver */ static const struct of_device_id virtio_uml_match[] = { { .compatible = "virtio,uml", }, { } }; MODULE_DEVICE_TABLE(of, virtio_uml_match); static int virtio_uml_suspend(struct platform_device *pdev, pm_message_t state) { struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev); if (!vu_dev->no_vq_suspend) { struct virtqueue *vq; virtio_device_for_each_vq((&vu_dev->vdev), vq) { struct virtio_uml_vq_info *info = vq->priv; info->suspended = true; vhost_user_set_vring_enable(vu_dev, vq->index, false); } } if (!device_may_wakeup(&vu_dev->vdev.dev)) { vu_dev->suspended = true; return 0; } return irq_set_irq_wake(vu_dev->irq, 1); } static int virtio_uml_resume(struct platform_device *pdev) { struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev); if (!vu_dev->no_vq_suspend) { struct virtqueue *vq; virtio_device_for_each_vq((&vu_dev->vdev), vq) { struct virtio_uml_vq_info *info = vq->priv; info->suspended = false; vhost_user_set_vring_enable(vu_dev, vq->index, true); } } vu_dev->suspended = false; if (!device_may_wakeup(&vu_dev->vdev.dev)) return 0; return irq_set_irq_wake(vu_dev->irq, 0); } static struct platform_driver virtio_uml_driver = { .probe = virtio_uml_probe, .remove_new = virtio_uml_remove, .driver = { .name = "virtio-uml", .of_match_table = virtio_uml_match, }, .suspend = virtio_uml_suspend, .resume = virtio_uml_resume, }; static int __init virtio_uml_init(void) { return platform_driver_register(&virtio_uml_driver); } static void __exit virtio_uml_exit(void) { platform_driver_unregister(&virtio_uml_driver); vu_unregister_cmdline_devices(); } module_init(virtio_uml_init); module_exit(virtio_uml_exit); __uml_exitcall(virtio_uml_exit); MODULE_DESCRIPTION("UML driver for vhost-user virtio devices"); MODULE_LICENSE("GPL");