diff options
Diffstat (limited to 'Documentation')
-rw-r--r-- | Documentation/devicetree/bindings/mailbox/mailbox.txt | 38 | ||||
-rw-r--r-- | Documentation/devicetree/bindings/pwm/pwm-fsl-ftm.txt | 19 | ||||
-rw-r--r-- | Documentation/devicetree/bindings/pwm/pwm-rockchip.txt | 4 | ||||
-rw-r--r-- | Documentation/devicetree/bindings/thermal/imx-thermal.txt | 5 | ||||
-rw-r--r-- | Documentation/devicetree/bindings/watchdog/cadence-wdt.txt | 24 | ||||
-rw-r--r-- | Documentation/devicetree/bindings/watchdog/fsl-imx-wdt.txt | 3 | ||||
-rw-r--r-- | Documentation/devicetree/bindings/watchdog/meson6-wdt.txt | 13 | ||||
-rw-r--r-- | Documentation/devicetree/bindings/watchdog/qcom-wdt.txt | 24 | ||||
-rw-r--r-- | Documentation/devicetree/bindings/watchdog/samsung-wdt.txt | 1 | ||||
-rw-r--r-- | Documentation/kernel-parameters.txt | 14 | ||||
-rw-r--r-- | Documentation/mailbox.txt | 122 | ||||
-rw-r--r-- | Documentation/scsi/osd.txt | 3 | ||||
-rw-r--r-- | Documentation/target/tcmu-design.txt | 378 |
13 files changed, 639 insertions, 9 deletions
diff --git a/Documentation/devicetree/bindings/mailbox/mailbox.txt b/Documentation/devicetree/bindings/mailbox/mailbox.txt new file mode 100644 index 000000000000..1a2cd3d266db --- /dev/null +++ b/Documentation/devicetree/bindings/mailbox/mailbox.txt @@ -0,0 +1,38 @@ +* Generic Mailbox Controller and client driver bindings + +Generic binding to provide a way for Mailbox controller drivers to +assign appropriate mailbox channel to client drivers. + +* Mailbox Controller + +Required property: +- #mbox-cells: Must be at least 1. Number of cells in a mailbox + specifier. + +Example: + mailbox: mailbox { + ... + #mbox-cells = <1>; + }; + + +* Mailbox Client + +Required property: +- mboxes: List of phandle and mailbox channel specifiers. + +Optional property: +- mbox-names: List of identifier strings for each mailbox channel + required by the client. The use of this property + is discouraged in favor of using index in list of + 'mboxes' while requesting a mailbox. Instead the + platforms may define channel indices, in DT headers, + to something legible. + +Example: + pwr_cntrl: power { + ... + mbox-names = "pwr-ctrl", "rpc"; + mboxes = <&mailbox 0 + &mailbox 1>; + }; diff --git a/Documentation/devicetree/bindings/pwm/pwm-fsl-ftm.txt b/Documentation/devicetree/bindings/pwm/pwm-fsl-ftm.txt index 0bda229a6171..3899d6a557c1 100644 --- a/Documentation/devicetree/bindings/pwm/pwm-fsl-ftm.txt +++ b/Documentation/devicetree/bindings/pwm/pwm-fsl-ftm.txt @@ -1,5 +1,20 @@ Freescale FlexTimer Module (FTM) PWM controller +The same FTM PWM device can have a different endianness on different SoCs. The +device tree provides a property to describing this so that an operating system +device driver can handle all variants of the device. Refer to the table below +for the endianness of the FTM PWM block as integrated into the existing SoCs: + + SoC | FTM-PWM endianness + --------+------------------- + Vybrid | LE + LS1 | BE + LS2 | LE + +Please see ../regmap/regmap.txt for more detail about how to specify endian +modes in device tree. + + Required properties: - compatible: Should be "fsl,vf610-ftm-pwm". - reg: Physical base address and length of the controller's registers @@ -16,7 +31,8 @@ Required properties: - pinctrl-names: Must contain a "default" entry. - pinctrl-NNN: One property must exist for each entry in pinctrl-names. See pinctrl/pinctrl-bindings.txt for details of the property values. - +- big-endian: Boolean property, required if the FTM PWM registers use a big- + endian rather than little-endian layout. Example: @@ -32,4 +48,5 @@ pwm0: pwm@40038000 { <&clks VF610_CLK_FTM0_EXT_FIX_EN>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_pwm0_1>; + big-endian; }; diff --git a/Documentation/devicetree/bindings/pwm/pwm-rockchip.txt b/Documentation/devicetree/bindings/pwm/pwm-rockchip.txt index d47d15a6a298..b8be3d09ee26 100644 --- a/Documentation/devicetree/bindings/pwm/pwm-rockchip.txt +++ b/Documentation/devicetree/bindings/pwm/pwm-rockchip.txt @@ -7,8 +7,8 @@ Required properties: "rockchip,vop-pwm": found integrated in VOP on RK3288 SoC - reg: physical base address and length of the controller's registers - clocks: phandle and clock specifier of the PWM reference clock - - #pwm-cells: should be 2. See pwm.txt in this directory for a - description of the cell format. + - #pwm-cells: must be 2 (rk2928) or 3 (rk3288). See pwm.txt in this directory + for a description of the cell format. Example: diff --git a/Documentation/devicetree/bindings/thermal/imx-thermal.txt b/Documentation/devicetree/bindings/thermal/imx-thermal.txt index 1f0f67234a91..3c67bd50aa10 100644 --- a/Documentation/devicetree/bindings/thermal/imx-thermal.txt +++ b/Documentation/devicetree/bindings/thermal/imx-thermal.txt @@ -1,7 +1,10 @@ * Temperature Monitor (TEMPMON) on Freescale i.MX SoCs Required properties: -- compatible : "fsl,imx6q-thermal" +- compatible : "fsl,imx6q-tempmon" for i.MX6Q, "fsl,imx6sx-tempmon" for i.MX6SX. + i.MX6SX has two more IRQs than i.MX6Q, one is IRQ_LOW and the other is IRQ_PANIC, + when temperature is below than low threshold, IRQ_LOW will be triggered, when temperature + is higher than panic threshold, system will auto reboot by SRC module. - fsl,tempmon : phandle pointer to system controller that contains TEMPMON control registers, e.g. ANATOP on imx6q. - fsl,tempmon-data : phandle pointer to fuse controller that contains TEMPMON diff --git a/Documentation/devicetree/bindings/watchdog/cadence-wdt.txt b/Documentation/devicetree/bindings/watchdog/cadence-wdt.txt new file mode 100644 index 000000000000..c3a36ee45552 --- /dev/null +++ b/Documentation/devicetree/bindings/watchdog/cadence-wdt.txt @@ -0,0 +1,24 @@ +Zynq Watchdog Device Tree Bindings +------------------------------------------- + +Required properties: +- compatible : Should be "cdns,wdt-r1p2". +- clocks : This is pclk (APB clock). +- interrupts : This is wd_irq - watchdog timeout interrupt. +- interrupt-parent : Must be core interrupt controller. + +Optional properties +- reset-on-timeout : If this property exists, then a reset is done + when watchdog times out. +- timeout-sec : Watchdog timeout value (in seconds). + +Example: + watchdog@f8005000 { + compatible = "cdns,wdt-r1p2"; + clocks = <&clkc 45>; + interrupt-parent = <&intc>; + interrupts = <0 9 1>; + reg = <0xf8005000 0x1000>; + reset-on-timeout; + timeout-sec = <10>; + }; diff --git a/Documentation/devicetree/bindings/watchdog/fsl-imx-wdt.txt b/Documentation/devicetree/bindings/watchdog/fsl-imx-wdt.txt index e52ba2da868c..8dab6fd024aa 100644 --- a/Documentation/devicetree/bindings/watchdog/fsl-imx-wdt.txt +++ b/Documentation/devicetree/bindings/watchdog/fsl-imx-wdt.txt @@ -7,7 +7,8 @@ Required properties: Optional property: - big-endian: If present the watchdog device's registers are implemented - in big endian mode, otherwise in little mode. + in big endian mode, otherwise in native mode(same with CPU), for more + detail please see: Documentation/devicetree/bindings/regmap/regmap.txt. Examples: diff --git a/Documentation/devicetree/bindings/watchdog/meson6-wdt.txt b/Documentation/devicetree/bindings/watchdog/meson6-wdt.txt new file mode 100644 index 000000000000..9200fc2d508c --- /dev/null +++ b/Documentation/devicetree/bindings/watchdog/meson6-wdt.txt @@ -0,0 +1,13 @@ +Meson SoCs Watchdog timer + +Required properties: + +- compatible : should be "amlogic,meson6-wdt" +- reg : Specifies base physical address and size of the registers. + +Example: + +wdt: watchdog@c1109900 { + compatible = "amlogic,meson6-wdt"; + reg = <0xc1109900 0x8>; +}; diff --git a/Documentation/devicetree/bindings/watchdog/qcom-wdt.txt b/Documentation/devicetree/bindings/watchdog/qcom-wdt.txt new file mode 100644 index 000000000000..4726924d034e --- /dev/null +++ b/Documentation/devicetree/bindings/watchdog/qcom-wdt.txt @@ -0,0 +1,24 @@ +Qualcomm Krait Processor Sub-system (KPSS) Watchdog +--------------------------------------------------- + +Required properties : +- compatible : shall contain only one of the following: + + "qcom,kpss-wdt-msm8960" + "qcom,kpss-wdt-apq8064" + "qcom,kpss-wdt-ipq8064" + +- reg : shall contain base register location and length +- clocks : shall contain the input clock + +Optional properties : +- timeout-sec : shall contain the default watchdog timeout in seconds, + if unset, the default timeout is 30 seconds + +Example: + watchdog@208a038 { + compatible = "qcom,kpss-wdt-ipq8064"; + reg = <0x0208a038 0x40>; + clocks = <&sleep_clk>; + timeout-sec = <10>; + }; diff --git a/Documentation/devicetree/bindings/watchdog/samsung-wdt.txt b/Documentation/devicetree/bindings/watchdog/samsung-wdt.txt index cfff37511aac..8f3d96af81d7 100644 --- a/Documentation/devicetree/bindings/watchdog/samsung-wdt.txt +++ b/Documentation/devicetree/bindings/watchdog/samsung-wdt.txt @@ -9,6 +9,7 @@ Required properties: (a) "samsung,s3c2410-wdt" for Exynos4 and previous SoCs (b) "samsung,exynos5250-wdt" for Exynos5250 (c) "samsung,exynos5420-wdt" for Exynos5420 + (c) "samsung,exynos7-wdt" for Exynos7 - reg : base physical address of the controller and length of memory mapped region. diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 7dbe5ec9d9cd..74339c57b914 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1015,10 +1015,14 @@ bytes respectively. Such letter suffixes can also be entirely omitted. Format: {"off" | "on" | "skip[mbr]"} efi= [EFI] - Format: { "old_map" } + Format: { "old_map", "nochunk", "noruntime" } old_map [X86-64]: switch to the old ioremap-based EFI runtime services mapping. 32-bit still uses this one by default. + nochunk: disable reading files in "chunks" in the EFI + boot stub, as chunking can cause problems with some + firmware implementations. + noruntime : disable EFI runtime services support efi_no_storage_paranoia [EFI; X86] Using this parameter you can use more than 50% of @@ -2232,7 +2236,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. nodsp [SH] Disable hardware DSP at boot time. - noefi [X86] Disable EFI runtime services support. + noefi Disable EFI runtime services support. noexec [IA-64] @@ -3465,6 +3469,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. e.g. base its process migration decisions on it. Default is on. + topology_updates= [KNL, PPC, NUMA] + Format: {off} + Specify if the kernel should ignore (off) + topology updates sent by the hypervisor to this + LPAR. + tp720= [HW,PS2] tpm_suspend_pcr=[HW,TPM] diff --git a/Documentation/mailbox.txt b/Documentation/mailbox.txt new file mode 100644 index 000000000000..60f43ff629aa --- /dev/null +++ b/Documentation/mailbox.txt @@ -0,0 +1,122 @@ + The Common Mailbox Framework + Jassi Brar <jaswinder.singh@linaro.org> + + This document aims to help developers write client and controller +drivers for the API. But before we start, let us note that the +client (especially) and controller drivers are likely going to be +very platform specific because the remote firmware is likely to be +proprietary and implement non-standard protocol. So even if two +platforms employ, say, PL320 controller, the client drivers can't +be shared across them. Even the PL320 driver might need to accommodate +some platform specific quirks. So the API is meant mainly to avoid +similar copies of code written for each platform. Having said that, +nothing prevents the remote f/w to also be Linux based and use the +same api there. However none of that helps us locally because we only +ever deal at client's protocol level. + Some of the choices made during implementation are the result of this +peculiarity of this "common" framework. + + + + Part 1 - Controller Driver (See include/linux/mailbox_controller.h) + + Allocate mbox_controller and the array of mbox_chan. +Populate mbox_chan_ops, except peek_data() all are mandatory. +The controller driver might know a message has been consumed +by the remote by getting an IRQ or polling some hardware flag +or it can never know (the client knows by way of the protocol). +The method in order of preference is IRQ -> Poll -> None, which +the controller driver should set via 'txdone_irq' or 'txdone_poll' +or neither. + + + Part 2 - Client Driver (See include/linux/mailbox_client.h) + + The client might want to operate in blocking mode (synchronously +send a message through before returning) or non-blocking/async mode (submit +a message and a callback function to the API and return immediately). + + +struct demo_client { + struct mbox_client cl; + struct mbox_chan *mbox; + struct completion c; + bool async; + /* ... */ +}; + +/* + * This is the handler for data received from remote. The behaviour is purely + * dependent upon the protocol. This is just an example. + */ +static void message_from_remote(struct mbox_client *cl, void *mssg) +{ + struct demo_client *dc = container_of(mbox_client, + struct demo_client, cl); + if (dc->aysnc) { + if (is_an_ack(mssg)) { + /* An ACK to our last sample sent */ + return; /* Or do something else here */ + } else { /* A new message from remote */ + queue_req(mssg); + } + } else { + /* Remote f/w sends only ACK packets on this channel */ + return; + } +} + +static void sample_sent(struct mbox_client *cl, void *mssg, int r) +{ + struct demo_client *dc = container_of(mbox_client, + struct demo_client, cl); + complete(&dc->c); +} + +static void client_demo(struct platform_device *pdev) +{ + struct demo_client *dc_sync, *dc_async; + /* The controller already knows async_pkt and sync_pkt */ + struct async_pkt ap; + struct sync_pkt sp; + + dc_sync = kzalloc(sizeof(*dc_sync), GFP_KERNEL); + dc_async = kzalloc(sizeof(*dc_async), GFP_KERNEL); + + /* Populate non-blocking mode client */ + dc_async->cl.dev = &pdev->dev; + dc_async->cl.rx_callback = message_from_remote; + dc_async->cl.tx_done = sample_sent; + dc_async->cl.tx_block = false; + dc_async->cl.tx_tout = 0; /* doesn't matter here */ + dc_async->cl.knows_txdone = false; /* depending upon protocol */ + dc_async->async = true; + init_completion(&dc_async->c); + + /* Populate blocking mode client */ + dc_sync->cl.dev = &pdev->dev; + dc_sync->cl.rx_callback = message_from_remote; + dc_sync->cl.tx_done = NULL; /* operate in blocking mode */ + dc_sync->cl.tx_block = true; + dc_sync->cl.tx_tout = 500; /* by half a second */ + dc_sync->cl.knows_txdone = false; /* depending upon protocol */ + dc_sync->async = false; + + /* ASync mailbox is listed second in 'mboxes' property */ + dc_async->mbox = mbox_request_channel(&dc_async->cl, 1); + /* Populate data packet */ + /* ap.xxx = 123; etc */ + /* Send async message to remote */ + mbox_send_message(dc_async->mbox, &ap); + + /* Sync mailbox is listed first in 'mboxes' property */ + dc_sync->mbox = mbox_request_channel(&dc_sync->cl, 0); + /* Populate data packet */ + /* sp.abc = 123; etc */ + /* Send message to remote in blocking mode */ + mbox_send_message(dc_sync->mbox, &sp); + /* At this point 'sp' has been sent */ + + /* Now wait for async chan to be done */ + wait_for_completion(&dc_async->c); +} diff --git a/Documentation/scsi/osd.txt b/Documentation/scsi/osd.txt index da162f7fd5f5..5a9879bad073 100644 --- a/Documentation/scsi/osd.txt +++ b/Documentation/scsi/osd.txt @@ -184,8 +184,7 @@ Any problems, questions, bug reports, lonely OSD nights, please email: More up-to-date information can be found on: http://open-osd.org -Boaz Harrosh <bharrosh@panasas.com> -Benny Halevy <bhalevy@panasas.com> +Boaz Harrosh <ooo@electrozaur.com> References ========== diff --git a/Documentation/target/tcmu-design.txt b/Documentation/target/tcmu-design.txt new file mode 100644 index 000000000000..5518465290bf --- /dev/null +++ b/Documentation/target/tcmu-design.txt @@ -0,0 +1,378 @@ +Contents: + +1) TCM Userspace Design + a) Background + b) Benefits + c) Design constraints + d) Implementation overview + i. Mailbox + ii. Command ring + iii. Data Area + e) Device discovery + f) Device events + g) Other contingencies +2) Writing a user pass-through handler + a) Discovering and configuring TCMU uio devices + b) Waiting for events on the device(s) + c) Managing the command ring +3) Command filtering and pass_level +4) A final note + + +TCM Userspace Design +-------------------- + +TCM is another name for LIO, an in-kernel iSCSI target (server). +Existing TCM targets run in the kernel. TCMU (TCM in Userspace) +allows userspace programs to be written which act as iSCSI targets. +This document describes the design. + +The existing kernel provides modules for different SCSI transport +protocols. TCM also modularizes the data storage. There are existing +modules for file, block device, RAM or using another SCSI device as +storage. These are called "backstores" or "storage engines". These +built-in modules are implemented entirely as kernel code. + +Background: + +In addition to modularizing the transport protocol used for carrying +SCSI commands ("fabrics"), the Linux kernel target, LIO, also modularizes +the actual data storage as well. These are referred to as "backstores" +or "storage engines". The target comes with backstores that allow a +file, a block device, RAM, or another SCSI device to be used for the +local storage needed for the exported SCSI LUN. Like the rest of LIO, +these are implemented entirely as kernel code. + +These backstores cover the most common use cases, but not all. One new +use case that other non-kernel target solutions, such as tgt, are able +to support is using Gluster's GLFS or Ceph's RBD as a backstore. The +target then serves as a translator, allowing initiators to store data +in these non-traditional networked storage systems, while still only +using standard protocols themselves. + +If the target is a userspace process, supporting these is easy. tgt, +for example, needs only a small adapter module for each, because the +modules just use the available userspace libraries for RBD and GLFS. + +Adding support for these backstores in LIO is considerably more +difficult, because LIO is entirely kernel code. Instead of undertaking +the significant work to port the GLFS or RBD APIs and protocols to the +kernel, another approach is to create a userspace pass-through +backstore for LIO, "TCMU". + + +Benefits: + +In addition to allowing relatively easy support for RBD and GLFS, TCMU +will also allow easier development of new backstores. TCMU combines +with the LIO loopback fabric to become something similar to FUSE +(Filesystem in Userspace), but at the SCSI layer instead of the +filesystem layer. A SUSE, if you will. + +The disadvantage is there are more distinct components to configure, and +potentially to malfunction. This is unavoidable, but hopefully not +fatal if we're careful to keep things as simple as possible. + +Design constraints: + +- Good performance: high throughput, low latency +- Cleanly handle if userspace: + 1) never attaches + 2) hangs + 3) dies + 4) misbehaves +- Allow future flexibility in user & kernel implementations +- Be reasonably memory-efficient +- Simple to configure & run +- Simple to write a userspace backend + + +Implementation overview: + +The core of the TCMU interface is a memory region that is shared +between kernel and userspace. Within this region is: a control area +(mailbox); a lockless producer/consumer circular buffer for commands +to be passed up, and status returned; and an in/out data buffer area. + +TCMU uses the pre-existing UIO subsystem. UIO allows device driver +development in userspace, and this is conceptually very close to the +TCMU use case, except instead of a physical device, TCMU implements a +memory-mapped layout designed for SCSI commands. Using UIO also +benefits TCMU by handling device introspection (e.g. a way for +userspace to determine how large the shared region is) and signaling +mechanisms in both directions. + +There are no embedded pointers in the memory region. Everything is +expressed as an offset from the region's starting address. This allows +the ring to still work if the user process dies and is restarted with +the region mapped at a different virtual address. + +See target_core_user.h for the struct definitions. + +The Mailbox: + +The mailbox is always at the start of the shared memory region, and +contains a version, details about the starting offset and size of the +command ring, and head and tail pointers to be used by the kernel and +userspace (respectively) to put commands on the ring, and indicate +when the commands are completed. + +version - 1 (userspace should abort if otherwise) +flags - none yet defined. +cmdr_off - The offset of the start of the command ring from the start +of the memory region, to account for the mailbox size. +cmdr_size - The size of the command ring. This does *not* need to be a +power of two. +cmd_head - Modified by the kernel to indicate when a command has been +placed on the ring. +cmd_tail - Modified by userspace to indicate when it has completed +processing of a command. + +The Command Ring: + +Commands are placed on the ring by the kernel incrementing +mailbox.cmd_head by the size of the command, modulo cmdr_size, and +then signaling userspace via uio_event_notify(). Once the command is +completed, userspace updates mailbox.cmd_tail in the same way and +signals the kernel via a 4-byte write(). When cmd_head equals +cmd_tail, the ring is empty -- no commands are currently waiting to be +processed by userspace. + +TCMU commands start with a common header containing "len_op", a 32-bit +value that stores the length, as well as the opcode in the lowest +unused bits. Currently only two opcodes are defined, TCMU_OP_PAD and +TCMU_OP_CMD. When userspace encounters a command with PAD opcode, it +should skip ahead by the bytes in "length". (The kernel inserts PAD +entries to ensure each CMD entry fits contigously into the circular +buffer.) + +When userspace handles a CMD, it finds the SCSI CDB (Command Data +Block) via tcmu_cmd_entry.req.cdb_off. This is an offset from the +start of the overall shared memory region, not the entry. The data +in/out buffers are accessible via tht req.iov[] array. Note that +each iov.iov_base is also an offset from the start of the region. + +TCMU currently does not support BIDI operations. + +When completing a command, userspace sets rsp.scsi_status, and +rsp.sense_buffer if necessary. Userspace then increments +mailbox.cmd_tail by entry.hdr.length (mod cmdr_size) and signals the +kernel via the UIO method, a 4-byte write to the file descriptor. + +The Data Area: + +This is shared-memory space after the command ring. The organization +of this area is not defined in the TCMU interface, and userspace +should access only the parts referenced by pending iovs. + + +Device Discovery: + +Other devices may be using UIO besides TCMU. Unrelated user processes +may also be handling different sets of TCMU devices. TCMU userspace +processes must find their devices by scanning sysfs +class/uio/uio*/name. For TCMU devices, these names will be of the +format: + +tcm-user/<hba_num>/<device_name>/<subtype>/<path> + +where "tcm-user" is common for all TCMU-backed UIO devices. <hba_num> +and <device_name> allow userspace to find the device's path in the +kernel target's configfs tree. Assuming the usual mount point, it is +found at: + +/sys/kernel/config/target/core/user_<hba_num>/<device_name> + +This location contains attributes such as "hw_block_size", that +userspace needs to know for correct operation. + +<subtype> will be a userspace-process-unique string to identify the +TCMU device as expecting to be backed by a certain handler, and <path> +will be an additional handler-specific string for the user process to +configure the device, if needed. The name cannot contain ':', due to +LIO limitations. + +For all devices so discovered, the user handler opens /dev/uioX and +calls mmap(): + +mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0) + +where size must be equal to the value read from +/sys/class/uio/uioX/maps/map0/size. + + +Device Events: + +If a new device is added or removed, a notification will be broadcast +over netlink, using a generic netlink family name of "TCM-USER" and a +multicast group named "config". This will include the UIO name as +described in the previous section, as well as the UIO minor +number. This should allow userspace to identify both the UIO device and +the LIO device, so that after determining the device is supported +(based on subtype) it can take the appropriate action. + + +Other contingencies: + +Userspace handler process never attaches: + +- TCMU will post commands, and then abort them after a timeout period + (30 seconds.) + +Userspace handler process is killed: + +- It is still possible to restart and re-connect to TCMU + devices. Command ring is preserved. However, after the timeout period, + the kernel will abort pending tasks. + +Userspace handler process hangs: + +- The kernel will abort pending tasks after a timeout period. + +Userspace handler process is malicious: + +- The process can trivially break the handling of devices it controls, + but should not be able to access kernel memory outside its shared + memory areas. + + +Writing a user pass-through handler (with example code) +------------------------------------------------------- + +A user process handing a TCMU device must support the following: + +a) Discovering and configuring TCMU uio devices +b) Waiting for events on the device(s) +c) Managing the command ring: Parsing operations and commands, + performing work as needed, setting response fields (scsi_status and + possibly sense_buffer), updating cmd_tail, and notifying the kernel + that work has been finished + +First, consider instead writing a plugin for tcmu-runner. tcmu-runner +implements all of this, and provides a higher-level API for plugin +authors. + +TCMU is designed so that multiple unrelated processes can manage TCMU +devices separately. All handlers should make sure to only open their +devices, based opon a known subtype string. + +a) Discovering and configuring TCMU UIO devices: + +(error checking omitted for brevity) + +int fd, dev_fd; +char buf[256]; +unsigned long long map_len; +void *map; + +fd = open("/sys/class/uio/uio0/name", O_RDONLY); +ret = read(fd, buf, sizeof(buf)); +close(fd); +buf[ret-1] = '\0'; /* null-terminate and chop off the \n */ + +/* we only want uio devices whose name is a format we expect */ +if (strncmp(buf, "tcm-user", 8)) + exit(-1); + +/* Further checking for subtype also needed here */ + +fd = open(/sys/class/uio/%s/maps/map0/size, O_RDONLY); +ret = read(fd, buf, sizeof(buf)); +close(fd); +str_buf[ret-1] = '\0'; /* null-terminate and chop off the \n */ + +map_len = strtoull(buf, NULL, 0); + +dev_fd = open("/dev/uio0", O_RDWR); +map = mmap(NULL, map_len, PROT_READ|PROT_WRITE, MAP_SHARED, dev_fd, 0); + + +b) Waiting for events on the device(s) + +while (1) { + char buf[4]; + + int ret = read(dev_fd, buf, 4); /* will block */ + + handle_device_events(dev_fd, map); +} + + +c) Managing the command ring + +#include <linux/target_core_user.h> + +int handle_device_events(int fd, void *map) +{ + struct tcmu_mailbox *mb = map; + struct tcmu_cmd_entry *ent = (void *) mb + mb->cmdr_off + mb->cmd_tail; + int did_some_work = 0; + + /* Process events from cmd ring until we catch up with cmd_head */ + while (ent != (void *)mb + mb->cmdr_off + mb->cmd_head) { + + if (tcmu_hdr_get_op(&ent->hdr) == TCMU_OP_CMD) { + uint8_t *cdb = (void *)mb + ent->req.cdb_off; + bool success = true; + + /* Handle command here. */ + printf("SCSI opcode: 0x%x\n", cdb[0]); + + /* Set response fields */ + if (success) + ent->rsp.scsi_status = SCSI_NO_SENSE; + else { + /* Also fill in rsp->sense_buffer here */ + ent->rsp.scsi_status = SCSI_CHECK_CONDITION; + } + } + else { + /* Do nothing for PAD entries */ + } + + /* update cmd_tail */ + mb->cmd_tail = (mb->cmd_tail + tcmu_hdr_get_len(&ent->hdr)) % mb->cmdr_size; + ent = (void *) mb + mb->cmdr_off + mb->cmd_tail; + did_some_work = 1; + } + + /* Notify the kernel that work has been finished */ + if (did_some_work) { + uint32_t buf = 0; + + write(fd, &buf, 4); + } + + return 0; +} + + +Command filtering and pass_level +-------------------------------- + +TCMU supports a "pass_level" option with valid values of 0 or 1. When +the value is 0 (the default), nearly all SCSI commands received for +the device are passed through to the handler. This allows maximum +flexibility but increases the amount of code required by the handler, +to support all mandatory SCSI commands. If pass_level is set to 1, +then only IO-related commands are presented, and the rest are handled +by LIO's in-kernel command emulation. The commands presented at level +1 include all versions of: + +READ +WRITE +WRITE_VERIFY +XDWRITEREAD +WRITE_SAME +COMPARE_AND_WRITE +SYNCHRONIZE_CACHE +UNMAP + + +A final note +------------ + +Please be careful to return codes as defined by the SCSI +specifications. These are different than some values defined in the +scsi/scsi.h include file. For example, CHECK CONDITION's status code +is 2, not 1. |