From cb2d956dd329caa11b5ece454dc52253aa038e73 Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Mon, 17 Feb 2014 16:52:35 +0200 Subject: cfg80211: refactor cfg80211_can_use_iftype_chan() Separate the code that counts the interface types and channels from the code that check the interface combinations. The new function that checks for combinations is exported so it can be called by the drivers. This is done in preparation for moving the interface combinations checks out of cfg80211. Signed-off-by: Luciano Coelho Signed-off-by: Johannes Berg --- Documentation/DocBook/80211.tmpl | 1 + 1 file changed, 1 insertion(+) (limited to 'Documentation') diff --git a/Documentation/DocBook/80211.tmpl b/Documentation/DocBook/80211.tmpl index 044b76436e83..d9b9416c989f 100644 --- a/Documentation/DocBook/80211.tmpl +++ b/Documentation/DocBook/80211.tmpl @@ -100,6 +100,7 @@ !Finclude/net/cfg80211.h wdev_priv !Finclude/net/cfg80211.h ieee80211_iface_limit !Finclude/net/cfg80211.h ieee80211_iface_combination +!Finclude/net/cfg80211.h cfg80211_check_combinations Actions and configuration -- cgit v1.2.3 From a1a8b4594f8d30e650dad19832e17552219e5694 Mon Sep 17 00:00:00 2001 From: Clement Perrochaud Date: Wed, 2 Apr 2014 09:03:27 +0000 Subject: NFC: pn544: i2c: Add DTS Documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Describe the properties used by the PN544 NFC controller driver. Signed-off-by: Clément Perrochaud Signed-off-by: Samuel Ortiz --- .../devicetree/bindings/net/nfc/pn544.txt | 35 ++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 Documentation/devicetree/bindings/net/nfc/pn544.txt (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/nfc/pn544.txt b/Documentation/devicetree/bindings/net/nfc/pn544.txt new file mode 100644 index 000000000000..dab69f36167c --- /dev/null +++ b/Documentation/devicetree/bindings/net/nfc/pn544.txt @@ -0,0 +1,35 @@ +* NXP Semiconductors PN544 NFC Controller + +Required properties: +- compatible: Should be "nxp,pn544-i2c". +- clock-frequency: I²C work frequency. +- reg: address on the bus +- interrupt-parent: phandle for the interrupt gpio controller +- interrupts: GPIO interrupt to which the chip is connected +- enable-gpios: Output GPIO pin used for enabling/disabling the PN544 +- firmware-gpios: Output GPIO pin used to enter firmware download mode + +Optional SoC Specific Properties: +- pinctrl-names: Contains only one value - "default". +- pintctrl-0: Specifies the pin control groups used for this controller. + +Example (for ARM-based BeagleBone with PN544 on I2C2): + +&i2c2 { + + status = "okay"; + + pn544: pn544@28 { + + compatible = "nxp,pn544-i2c"; + + reg = <0x28>; + clock-frequency = <400000>; + + interrupt-parent = <&gpio1>; + interrupts = <17 GPIO_ACTIVE_HIGH>; + + enable-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>; + firmware-gpios = <&gpio3 19 GPIO_ACTIVE_HIGH>; + }; +}; -- cgit v1.2.3 From 3b82637813a9ea9f79ac6315341c59bfe408bf11 Mon Sep 17 00:00:00 2001 From: "Mark A. Greer" Date: Tue, 25 Mar 2014 08:54:41 -0700 Subject: NFC: trf7970a: Document the 'autosuspend-delay' DTS property The trf7970a driver recently had support added for the 'autosuspend-delay' property so document it. Signed-off-by: Mark A. Greer Signed-off-by: Samuel Ortiz --- Documentation/devicetree/bindings/net/nfc/trf7970a.txt | 2 ++ 1 file changed, 2 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/nfc/trf7970a.txt b/Documentation/devicetree/bindings/net/nfc/trf7970a.txt index 8dd3ef7bc56b..1e436133685f 100644 --- a/Documentation/devicetree/bindings/net/nfc/trf7970a.txt +++ b/Documentation/devicetree/bindings/net/nfc/trf7970a.txt @@ -12,6 +12,7 @@ Required properties: Optional SoC Specific Properties: - pinctrl-names: Contains only one value - "default". - pintctrl-0: Specifies the pin control groups used for this controller. +- autosuspend-delay: Specify autosuspend delay in milliseconds. Example (for ARM-based BeagleBone with TRF7970A on SPI1): @@ -29,6 +30,7 @@ Example (for ARM-based BeagleBone with TRF7970A on SPI1): ti,enable-gpios = <&gpio2 2 GPIO_ACTIVE_LOW>, <&gpio2 5 GPIO_ACTIVE_LOW>; vin-supply = <&ldo3_reg>; + autosuspend-delay = <30000>; status = "okay"; }; }; -- cgit v1.2.3 From 4cd3675ebf74d7f559038ded6aa8088e4099a83d Mon Sep 17 00:00:00 2001 From: Chema Gonzalez Date: Mon, 21 Apr 2014 09:21:24 -0700 Subject: filter: added BPF random opcode Added a new ancillary load (bpf call in eBPF parlance) that produces a 32-bit random number. We are implementing it as an ancillary load (instead of an ISA opcode) because (a) it is simpler, (b) allows easy JITing, and (c) seems more in line with generic ISAs that do not have "get a random number" as a instruction, but as an OS call. The main use for this ancillary load is to perform random packet sampling. Signed-off-by: Chema Gonzalez Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- Documentation/networking/filter.txt | 13 +++++++++++++ include/linux/filter.h | 1 + include/uapi/linux/filter.h | 3 ++- net/core/filter.c | 12 ++++++++++++ tools/net/bpf_exp.l | 1 + tools/net/bpf_exp.y | 11 ++++++++++- 6 files changed, 39 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt index 81f940f4e884..82e1cb0b3da8 100644 --- a/Documentation/networking/filter.txt +++ b/Documentation/networking/filter.txt @@ -281,6 +281,7 @@ Possible BPF extensions are shown in the following table: cpu raw_smp_processor_id() vlan_tci vlan_tx_tag_get(skb) vlan_pr vlan_tx_tag_present(skb) + rand prandom_u32() These extensions can also be prefixed with '#'. Examples for low-level BPF: @@ -308,6 +309,18 @@ Examples for low-level BPF: ret #-1 drop: ret #0 +** icmp random packet sampling, 1 in 4 + ldh [12] + jne #0x800, drop + ldb [23] + jneq #1, drop + # get a random uint32 number + ld rand + mod #4 + jneq #1, drop + ret #-1 + drop: ret #0 + ** SECCOMP filter example: ld [4] /* offsetof(struct seccomp_data, arch) */ diff --git a/include/linux/filter.h b/include/linux/filter.h index 024fd03e5d18..759abf78dd61 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -223,6 +223,7 @@ enum { BPF_S_ANC_VLAN_TAG, BPF_S_ANC_VLAN_TAG_PRESENT, BPF_S_ANC_PAY_OFFSET, + BPF_S_ANC_RANDOM, }; #endif /* __LINUX_FILTER_H__ */ diff --git a/include/uapi/linux/filter.h b/include/uapi/linux/filter.h index 8eb9ccaa5b48..253b4d42cf2b 100644 --- a/include/uapi/linux/filter.h +++ b/include/uapi/linux/filter.h @@ -130,7 +130,8 @@ struct sock_fprog { /* Required for SO_ATTACH_FILTER. */ #define SKF_AD_VLAN_TAG 44 #define SKF_AD_VLAN_TAG_PRESENT 48 #define SKF_AD_PAY_OFFSET 52 -#define SKF_AD_MAX 56 +#define SKF_AD_RANDOM 56 +#define SKF_AD_MAX 60 #define SKF_NET_OFF (-0x100000) #define SKF_LL_OFF (-0x200000) diff --git a/net/core/filter.c b/net/core/filter.c index cd58614660cf..78a636e60a0b 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -643,6 +643,12 @@ static u64 __get_raw_cpu_id(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) return raw_smp_processor_id(); } +/* note that this only generates 32-bit random numbers */ +static u64 __get_random_u32(u64 ctx, u64 A, u64 X, u64 r4, u64 r5) +{ + return (u64)prandom_u32(); +} + /* Register mappings for user programs. */ #define A_REG 0 #define X_REG 7 @@ -779,6 +785,7 @@ static bool convert_bpf_extensions(struct sock_filter *fp, case SKF_AD_OFF + SKF_AD_NLATTR: case SKF_AD_OFF + SKF_AD_NLATTR_NEST: case SKF_AD_OFF + SKF_AD_CPU: + case SKF_AD_OFF + SKF_AD_RANDOM: /* arg1 = ctx */ insn->code = BPF_ALU64 | BPF_MOV | BPF_X; insn->a_reg = ARG1_REG; @@ -812,6 +819,9 @@ static bool convert_bpf_extensions(struct sock_filter *fp, case SKF_AD_OFF + SKF_AD_CPU: insn->imm = __get_raw_cpu_id - __bpf_call_base; break; + case SKF_AD_OFF + SKF_AD_RANDOM: + insn->imm = __get_random_u32 - __bpf_call_base; + break; } break; @@ -1362,6 +1372,7 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen) ANCILLARY(VLAN_TAG); ANCILLARY(VLAN_TAG_PRESENT); ANCILLARY(PAY_OFFSET); + ANCILLARY(RANDOM); } /* ancillary operation unknown or unsupported */ @@ -1746,6 +1757,7 @@ void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to) [BPF_S_ANC_VLAN_TAG] = BPF_LD|BPF_B|BPF_ABS, [BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS, [BPF_S_ANC_PAY_OFFSET] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_RANDOM] = BPF_LD|BPF_B|BPF_ABS, [BPF_S_LD_W_LEN] = BPF_LD|BPF_W|BPF_LEN, [BPF_S_LD_W_IND] = BPF_LD|BPF_W|BPF_IND, [BPF_S_LD_H_IND] = BPF_LD|BPF_H|BPF_IND, diff --git a/tools/net/bpf_exp.l b/tools/net/bpf_exp.l index bf7be77ddd62..833a96611da6 100644 --- a/tools/net/bpf_exp.l +++ b/tools/net/bpf_exp.l @@ -92,6 +92,7 @@ extern void yyerror(const char *str); "#"?("cpu") { return K_CPU; } "#"?("vlan_tci") { return K_VLANT; } "#"?("vlan_pr") { return K_VLANP; } +"#"?("rand") { return K_RAND; } ":" { return ':'; } "," { return ','; } diff --git a/tools/net/bpf_exp.y b/tools/net/bpf_exp.y index d15efc989ef5..e6306c51c26f 100644 --- a/tools/net/bpf_exp.y +++ b/tools/net/bpf_exp.y @@ -56,7 +56,7 @@ static void bpf_set_jmp_label(char *label, enum jmp_type type); %token OP_LDXI %token K_PKT_LEN K_PROTO K_TYPE K_NLATTR K_NLATTR_NEST K_MARK K_QUEUE K_HATYPE -%token K_RXHASH K_CPU K_IFIDX K_VLANT K_VLANP K_POFF +%token K_RXHASH K_CPU K_IFIDX K_VLANT K_VLANP K_POFF K_RAND %token ':' ',' '[' ']' '(' ')' 'x' 'a' '+' 'M' '*' '&' '#' '%' @@ -164,6 +164,9 @@ ldb | OP_LDB K_POFF { bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_PAY_OFFSET); } + | OP_LDB K_RAND { + bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, + SKF_AD_OFF + SKF_AD_RANDOM); } ; ldh @@ -212,6 +215,9 @@ ldh | OP_LDH K_POFF { bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_PAY_OFFSET); } + | OP_LDH K_RAND { + bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, + SKF_AD_OFF + SKF_AD_RANDOM); } ; ldi @@ -265,6 +271,9 @@ ld | OP_LD K_POFF { bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_PAY_OFFSET); } + | OP_LD K_RAND { + bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, + SKF_AD_OFF + SKF_AD_RANDOM); } | OP_LD 'M' '[' number ']' { bpf_set_curr_instr(BPF_LD | BPF_MEM, 0, 0, $4); } | OP_LD '[' 'x' '+' number ']' { -- cgit v1.2.3 From 2d283862dc62daead9db0dc89cd0d0351e91f765 Mon Sep 17 00:00:00 2001 From: Alexey Charkov Date: Tue, 22 Apr 2014 19:28:09 +0400 Subject: net: via-rhine: add OF bus binding This should make the driver usable with VIA/WonderMedia ARM-based Systems-on-Chip integrated Rhine III adapters. Note that these are always in MMIO mode, and don't have any known EEPROM. Signed-off-by: Alexey Charkov Acked-by: Rob Herring Signed-off-by: David S. Miller --- .../devicetree/bindings/net/via-rhine.txt | 17 ++ arch/arm/boot/dts/vt8500.dtsi | 6 + arch/arm/boot/dts/wm8650.dtsi | 6 + arch/arm/boot/dts/wm8850.dtsi | 6 + drivers/net/ethernet/via/Kconfig | 2 +- drivers/net/ethernet/via/via-rhine.c | 307 +++++++++++++-------- 6 files changed, 229 insertions(+), 115 deletions(-) create mode 100644 Documentation/devicetree/bindings/net/via-rhine.txt (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/via-rhine.txt b/Documentation/devicetree/bindings/net/via-rhine.txt new file mode 100644 index 000000000000..334eca2bf937 --- /dev/null +++ b/Documentation/devicetree/bindings/net/via-rhine.txt @@ -0,0 +1,17 @@ +* VIA Rhine 10/100 Network Controller + +Required properties: +- compatible : Should be "via,vt8500-rhine" for integrated + Rhine controllers found in VIA VT8500, WonderMedia WM8950 + and similar. These are listed as 1106:3106 rev. 0x84 on the + virtual PCI bus under vendor-provided kernels +- reg : Address and length of the io space +- interrupts : Should contain the controller interrupt line + +Examples: + +ethernet@d8004000 { + compatible = "via,vt8500-rhine"; + reg = <0xd8004000 0x100>; + interrupts = <10>; +}; diff --git a/arch/arm/boot/dts/vt8500.dtsi b/arch/arm/boot/dts/vt8500.dtsi index 51d0e912c8f5..1929ad390d88 100644 --- a/arch/arm/boot/dts/vt8500.dtsi +++ b/arch/arm/boot/dts/vt8500.dtsi @@ -165,5 +165,11 @@ reg = <0xd8100000 0x10000>; interrupts = <48>; }; + + ethernet@d8004000 { + compatible = "via,vt8500-rhine"; + reg = <0xd8004000 0x100>; + interrupts = <10>; + }; }; }; diff --git a/arch/arm/boot/dts/wm8650.dtsi b/arch/arm/boot/dts/wm8650.dtsi index 7525982262ac..b1c59a766a13 100644 --- a/arch/arm/boot/dts/wm8650.dtsi +++ b/arch/arm/boot/dts/wm8650.dtsi @@ -218,5 +218,11 @@ reg = <0xd8100000 0x10000>; interrupts = <48>; }; + + ethernet@d8004000 { + compatible = "via,vt8500-rhine"; + reg = <0xd8004000 0x100>; + interrupts = <10>; + }; }; }; diff --git a/arch/arm/boot/dts/wm8850.dtsi b/arch/arm/boot/dts/wm8850.dtsi index d98386dd2882..8fbccfbe75f3 100644 --- a/arch/arm/boot/dts/wm8850.dtsi +++ b/arch/arm/boot/dts/wm8850.dtsi @@ -298,5 +298,11 @@ bus-width = <4>; sdon-inverted; }; + + ethernet@d8004000 { + compatible = "via,vt8500-rhine"; + reg = <0xd8004000 0x100>; + interrupts = <10>; + }; }; }; diff --git a/drivers/net/ethernet/via/Kconfig b/drivers/net/ethernet/via/Kconfig index 8a049a2b4474..f66ddaee0c87 100644 --- a/drivers/net/ethernet/via/Kconfig +++ b/drivers/net/ethernet/via/Kconfig @@ -19,7 +19,7 @@ if NET_VENDOR_VIA config VIA_RHINE tristate "VIA Rhine support" - depends on PCI + depends on (PCI || USE_OF) select CRC32 select MII ---help--- diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c index 20b83f11004a..4fa92012ceac 100644 --- a/drivers/net/ethernet/via/via-rhine.c +++ b/drivers/net/ethernet/via/via-rhine.c @@ -94,6 +94,10 @@ static const int multicast_filter_limit = 32; #include #include #include +#include +#include +#include +#include #include #include #include @@ -279,6 +283,15 @@ static DEFINE_PCI_DEVICE_TABLE(rhine_pci_tbl) = { }; MODULE_DEVICE_TABLE(pci, rhine_pci_tbl); +/* OpenFirmware identifiers for platform-bus devices + * The .data field is currently only used to store chip revision + * (for quirks etc.) + */ +static struct of_device_id rhine_of_tbl[] = { + { .compatible = "via,vt8500-rhine", .data = (void *)0x84 }, + { } /* terminate list */ +}; +MODULE_DEVICE_TABLE(of, rhine_of_tbl); /* Offsets to the device registers. */ enum register_offsets { @@ -847,7 +860,8 @@ static void rhine_hw_init(struct net_device *dev, long pioaddr) msleep(5); /* Reload EEPROM controlled bytes cleared by soft reset */ - rhine_reload_eeprom(pioaddr, dev); + if (dev_is_pci(dev->dev.parent)) + rhine_reload_eeprom(pioaddr, dev); } static const struct net_device_ops rhine_netdev_ops = { @@ -868,125 +882,55 @@ static const struct net_device_ops rhine_netdev_ops = { #endif }; -static int rhine_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) +static int rhine_init_one_common(struct device *hwdev, int revision, + long pioaddr, void __iomem *ioaddr, int irq) { struct net_device *dev; struct rhine_private *rp; - struct device *hwdev = &pdev->dev; - int revision = pdev->revision; - int i, rc; - u32 quirks; - long pioaddr; - long memaddr; - void __iomem *ioaddr; - int io_size, phy_id; + int i, rc, phy_id; const char *name; -#ifdef USE_MMIO - int bar = 1; -#else - int bar = 0; -#endif - -/* when built into the kernel, we only print version if device is found */ -#ifndef MODULE - pr_info_once("%s\n", version); -#endif - - io_size = 256; - phy_id = 0; - quirks = 0; - name = "Rhine"; - if (revision < VTunknown0) { - quirks = rqRhineI; - io_size = 128; - } else if (revision >= VT6102) { - quirks = rqWOL | rqForceReset; - if (revision < VT6105) { - name = "Rhine II"; - quirks |= rqStatusWBRace; /* Rhine-II exclusive */ - } else { - phy_id = 1; /* Integrated PHY, phy_id fixed to 1 */ - if (revision >= VT6105_B0) - quirks |= rq6patterns; - if (revision < VT6105M) - name = "Rhine III"; - else - name = "Rhine III (Management Adapter)"; - } - } - - rc = pci_enable_device(pdev); - if (rc) - goto err_out; /* this should always be supported */ rc = dma_set_mask(hwdev, DMA_BIT_MASK(32)); if (rc) { dev_err(hwdev, "32-bit DMA addresses not supported by the card!?\n"); - goto err_out_pci_disable; - } - - /* sanity check */ - if ((pci_resource_len(pdev, 0) < io_size) || - (pci_resource_len(pdev, 1) < io_size)) { - rc = -EIO; - dev_err(hwdev, "Insufficient PCI resources, aborting\n"); - goto err_out_pci_disable; + goto err_out; } - pioaddr = pci_resource_start(pdev, 0); - memaddr = pci_resource_start(pdev, 1); - - pci_set_master(pdev); - dev = alloc_etherdev(sizeof(struct rhine_private)); if (!dev) { rc = -ENOMEM; - goto err_out_pci_disable; + goto err_out; } SET_NETDEV_DEV(dev, hwdev); rp = netdev_priv(dev); rp->dev = dev; rp->revision = revision; - rp->quirks = quirks; rp->pioaddr = pioaddr; + rp->base = ioaddr; + rp->irq = irq; rp->msg_enable = netif_msg_init(debug, RHINE_MSG_DEFAULT); - rc = pci_request_regions(pdev, DRV_NAME); - if (rc) - goto err_out_free_netdev; - - ioaddr = pci_iomap(pdev, bar, io_size); - if (!ioaddr) { - rc = -EIO; - dev_err(hwdev, - "ioremap failed for device %s, region 0x%X @ 0x%lX\n", - dev_name(hwdev), io_size, memaddr); - goto err_out_free_res; - } - -#ifdef USE_MMIO - enable_mmio(pioaddr, quirks); - - /* Check that selected MMIO registers match the PIO ones */ - i = 0; - while (mmio_verify_registers[i]) { - int reg = mmio_verify_registers[i++]; - unsigned char a = inb(pioaddr+reg); - unsigned char b = readb(ioaddr+reg); - if (a != b) { - rc = -EIO; - dev_err(hwdev, - "MMIO do not match PIO [%02x] (%02x != %02x)\n", - reg, a, b); - goto err_out_unmap; + phy_id = 0; + name = "Rhine"; + if (revision < VTunknown0) { + rp->quirks = rqRhineI; + } else if (revision >= VT6102) { + rp->quirks = rqWOL | rqForceReset; + if (revision < VT6105) { + name = "Rhine II"; + rp->quirks |= rqStatusWBRace; /* Rhine-II exclusive */ + } else { + phy_id = 1; /* Integrated PHY, phy_id fixed to 1 */ + if (revision >= VT6105_B0) + rp->quirks |= rq6patterns; + if (revision < VT6105M) + name = "Rhine III"; + else + name = "Rhine III (Management Adapter)"; } } -#endif /* USE_MMIO */ - - rp->base = ioaddr; - rp->irq = pdev->irq; u64_stats_init(&rp->tx_stats.syncp); u64_stats_init(&rp->rx_stats.syncp); @@ -1039,16 +983,10 @@ static int rhine_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* dev->name not defined before register_netdev()! */ rc = register_netdev(dev); if (rc) - goto err_out_unmap; + goto err_out_free_netdev; netdev_info(dev, "VIA %s at 0x%lx, %pM, IRQ %d\n", - name, -#ifdef USE_MMIO - memaddr, -#else - (long)ioaddr, -#endif - dev->dev_addr, rp->irq); + name, (long)ioaddr, dev->dev_addr, rp->irq); dev_set_drvdata(hwdev, dev); @@ -1079,18 +1017,126 @@ static int rhine_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; +err_out_free_netdev: + free_netdev(dev); +err_out: + return rc; +} + +static int rhine_init_one_pci(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct device *hwdev = &pdev->dev; + int i, rc; + long pioaddr, memaddr; + void __iomem *ioaddr; + int io_size = pdev->revision < VTunknown0 ? 128 : 256; + u32 quirks = pdev->revision < VTunknown0 ? rqRhineI : 0; +#ifdef USE_MMIO + int bar = 1; +#else + int bar = 0; +#endif + +/* when built into the kernel, we only print version if device is found */ +#ifndef MODULE + pr_info_once("%s\n", version); +#endif + + rc = pci_enable_device(pdev); + if (rc) + goto err_out; + + /* sanity check */ + if ((pci_resource_len(pdev, 0) < io_size) || + (pci_resource_len(pdev, 1) < io_size)) { + rc = -EIO; + dev_err(hwdev, "Insufficient PCI resources, aborting\n"); + goto err_out_pci_disable; + } + + pioaddr = pci_resource_start(pdev, 0); + memaddr = pci_resource_start(pdev, 1); + + pci_set_master(pdev); + + rc = pci_request_regions(pdev, DRV_NAME); + if (rc) + goto err_out_pci_disable; + + ioaddr = pci_iomap(pdev, bar, io_size); + if (!ioaddr) { + rc = -EIO; + dev_err(hwdev, + "ioremap failed for device %s, region 0x%X @ 0x%lX\n", + dev_name(hwdev), io_size, memaddr); + goto err_out_free_res; + } + +#ifdef USE_MMIO + enable_mmio(pioaddr, quirks); + + /* Check that selected MMIO registers match the PIO ones */ + i = 0; + while (mmio_verify_registers[i]) { + int reg = mmio_verify_registers[i++]; + unsigned char a = inb(pioaddr+reg); + unsigned char b = readb(ioaddr+reg); + + if (a != b) { + rc = -EIO; + dev_err(hwdev, + "MMIO do not match PIO [%02x] (%02x != %02x)\n", + reg, a, b); + goto err_out_unmap; + } + } +#endif /* USE_MMIO */ + + rc = rhine_init_one_common(&pdev->dev, pdev->revision, + pioaddr, ioaddr, pdev->irq); + if (!rc) + return 0; + err_out_unmap: pci_iounmap(pdev, ioaddr); err_out_free_res: pci_release_regions(pdev); -err_out_free_netdev: - free_netdev(dev); err_out_pci_disable: pci_disable_device(pdev); err_out: return rc; } +static int rhine_init_one_platform(struct platform_device *pdev) +{ + const struct of_device_id *match; + u32 revision; + int irq; + struct resource *res; + void __iomem *ioaddr; + + match = of_match_device(rhine_of_tbl, &pdev->dev); + if (!match) + return -EINVAL; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + ioaddr = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(ioaddr)) + return PTR_ERR(ioaddr); + + irq = irq_of_parse_and_map(pdev->dev.of_node, 0); + if (!irq) + return -EINVAL; + + revision = (u32)match->data; + if (!revision) + return -EINVAL; + + return rhine_init_one_common(&pdev->dev, revision, + (long)ioaddr, ioaddr, irq); +} + static int alloc_ring(struct net_device* dev) { struct rhine_private *rp = netdev_priv(dev); @@ -2297,7 +2343,7 @@ static int rhine_close(struct net_device *dev) } -static void rhine_remove_one(struct pci_dev *pdev) +static void rhine_remove_one_pci(struct pci_dev *pdev) { struct net_device *dev = pci_get_drvdata(pdev); struct rhine_private *rp = netdev_priv(dev); @@ -2311,7 +2357,21 @@ static void rhine_remove_one(struct pci_dev *pdev) pci_disable_device(pdev); } -static void rhine_shutdown (struct pci_dev *pdev) +static int rhine_remove_one_platform(struct platform_device *pdev) +{ + struct net_device *dev = platform_get_drvdata(pdev); + struct rhine_private *rp = netdev_priv(dev); + + unregister_netdev(dev); + + iounmap(rp->base); + + free_netdev(dev); + + return 0; +} + +static void rhine_shutdown_pci(struct pci_dev *pdev) { struct net_device *dev = pci_get_drvdata(pdev); struct rhine_private *rp = netdev_priv(dev); @@ -2378,7 +2438,7 @@ static int rhine_suspend(struct device *device) netif_device_detach(dev); if (dev_is_pci(device)) - rhine_shutdown(to_pci_dev(device)); + rhine_shutdown_pci(to_pci_dev(device)); return 0; } @@ -2418,15 +2478,26 @@ static SIMPLE_DEV_PM_OPS(rhine_pm_ops, rhine_suspend, rhine_resume); #endif /* !CONFIG_PM_SLEEP */ -static struct pci_driver rhine_driver = { +static struct pci_driver rhine_driver_pci = { .name = DRV_NAME, .id_table = rhine_pci_tbl, - .probe = rhine_init_one, - .remove = rhine_remove_one, - .shutdown = rhine_shutdown, + .probe = rhine_init_one_pci, + .remove = rhine_remove_one_pci, + .shutdown = rhine_shutdown_pci, .driver.pm = RHINE_PM_OPS, }; +static struct platform_driver rhine_driver_platform = { + .probe = rhine_init_one_platform, + .remove = rhine_remove_one_platform, + .driver = { + .name = DRV_NAME, + .owner = THIS_MODULE, + .of_match_table = rhine_of_tbl, + .pm = RHINE_PM_OPS, + } +}; + static struct dmi_system_id rhine_dmi_table[] __initdata = { { .ident = "EPIA-M", @@ -2447,6 +2518,8 @@ static struct dmi_system_id rhine_dmi_table[] __initdata = { static int __init rhine_init(void) { + int ret_pci, ret_platform; + /* when a module, this is printed whether or not devices are found in probe */ #ifdef MODULE pr_info("%s\n", version); @@ -2459,13 +2532,19 @@ static int __init rhine_init(void) else if (avoid_D3) pr_info("avoid_D3 set\n"); - return pci_register_driver(&rhine_driver); + ret_pci = pci_register_driver(&rhine_driver_pci); + ret_platform = platform_driver_register(&rhine_driver_platform); + if ((ret_pci < 0) && (ret_platform < 0)) + return ret_pci; + + return 0; } static void __exit rhine_cleanup(void) { - pci_unregister_driver(&rhine_driver); + platform_driver_unregister(&rhine_driver_platform); + pci_unregister_driver(&rhine_driver_pci); } -- cgit v1.2.3 From f05b42eaa22cd7c6736d31316e6046c5127f8721 Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Tue, 22 Apr 2014 16:30:20 -0700 Subject: bonding: Added bond_tlb_xmit() for tlb mode. Re-organized the xmit function for the lb mode separating tlb xmit from the alb mode. This will enable use of the hashing policies like 802.3ad mode. Also extended use of xmit-hash-policy to tlb mode. Now the tlb-mode defaults to BOND_XMIT_POLICY_LAYER2 if the xmit policy module parameter is not set (just like 802.3ad, or Xor mode). Change-Id: I140257403d272df75f477b380207338d0f04963e Signed-off-by: Mahesh Bandewar Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- Documentation/networking/bonding.txt | 2 +- drivers/net/bonding/bond_alb.c | 26 ++++++++++++++++++++++++++ drivers/net/bonding/bond_alb.h | 1 + drivers/net/bonding/bond_main.c | 6 ++++-- drivers/net/bonding/bond_options.c | 2 +- 5 files changed, 33 insertions(+), 4 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt index a383c00392d0..a97c567f24e8 100644 --- a/Documentation/networking/bonding.txt +++ b/Documentation/networking/bonding.txt @@ -769,7 +769,7 @@ use_carrier xmit_hash_policy Selects the transmit hash policy to use for slave selection in - balance-xor and 802.3ad modes. Possible values are: + balance-xor, 802.3ad, and tlb modes. Possible values are: layer2 diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 5cd36016c393..153232ed4b3f 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -1381,6 +1381,32 @@ out: return NETDEV_TX_OK; } +int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev) +{ + struct bonding *bond = netdev_priv(bond_dev); + struct ethhdr *eth_data; + struct slave *tx_slave = NULL; + u32 hash_index; + + skb_reset_mac_header(skb); + eth_data = eth_hdr(skb); + + /* Do not TX balance any multicast or broadcast */ + if (!is_multicast_ether_addr(eth_data->h_dest)) { + switch (skb->protocol) { + case htons(ETH_P_IP): + case htons(ETH_P_IPX): + /* In case of IPX, it will falback to L2 hash */ + case htons(ETH_P_IPV6): + hash_index = bond_xmit_hash(bond, skb); + tx_slave = tlb_choose_channel(bond, hash_index & 0xFF, skb->len); + break; + } + } + + return bond_do_alb_xmit(skb, bond, tx_slave); +} + int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) { struct bonding *bond = netdev_priv(bond_dev); diff --git a/drivers/net/bonding/bond_alb.h b/drivers/net/bonding/bond_alb.h index e09dd4bfafff..5fc76c01636c 100644 --- a/drivers/net/bonding/bond_alb.h +++ b/drivers/net/bonding/bond_alb.h @@ -175,6 +175,7 @@ void bond_alb_deinit_slave(struct bonding *bond, struct slave *slave); void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char link); void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave); int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev); +int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev); void bond_alb_monitor(struct work_struct *); int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr); void bond_alb_clear_vlan(struct bonding *bond, unsigned short vlan_id); diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index c7046350c4b5..1fd32a16cbc5 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3774,8 +3774,9 @@ static netdev_tx_t __bond_start_xmit(struct sk_buff *skb, struct net_device *dev case BOND_MODE_8023AD: return bond_3ad_xmit_xor(skb, dev); case BOND_MODE_ALB: - case BOND_MODE_TLB: return bond_alb_xmit(skb, dev); + case BOND_MODE_TLB: + return bond_tlb_xmit(skb, dev); default: /* Should never happen, mode already checked */ pr_err("%s: Error: Unknown bonding mode %d\n", @@ -3996,7 +3997,8 @@ static int bond_check_params(struct bond_params *params) if (xmit_hash_policy) { if ((bond_mode != BOND_MODE_XOR) && - (bond_mode != BOND_MODE_8023AD)) { + (bond_mode != BOND_MODE_8023AD) && + (bond_mode != BOND_MODE_TLB)) { pr_info("xmit_hash_policy param is irrelevant in mode %s\n", bond_mode_name(bond_mode)); } else { diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 724e30fa20b9..dc3893841752 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -199,7 +199,7 @@ static const struct bond_option bond_opts[] = { [BOND_OPT_XMIT_HASH] = { .id = BOND_OPT_XMIT_HASH, .name = "xmit_hash_policy", - .desc = "balance-xor and 802.3ad hashing method", + .desc = "balance-xor, 802.3ad, and tlb hashing method", .values = bond_xmit_hashtype_tbl, .set = bond_option_xmit_hash_policy_set }, -- cgit v1.2.3 From e9f0fb88493570200b8dc1cc02d3e676412d25bc Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Tue, 22 Apr 2014 16:30:22 -0700 Subject: bonding: Add tlb_dynamic_lb parameter for tlb mode The aggresive load balancing causes packet re-ordering as active flows are moved from a slave to another within the group. Sometime this aggresive lb is not necessary if the preference is for less re-ordering. This parameter if used with value "0" disables this dynamic flow shuffling minimizing packet re-ordering. Of course the side effect is that it has to live with the static load balancing that the hashing distribution provides. This impact is less severe if the correct xmit-hashing-policy is used for the tlb setup. The default value of the parameter is set to "1" mimicing the earlier behavior. Ran the netperf test with 200 stream for 1 min between two hosts with 4x1G trunk (xmit-lb mode with xmit-policy L3+4) before and after these changes. Following was the command used for those 200 instances - netperf -t TCP_RR -l 60 -s 5 -H -- -r81920,81920 Transactions per second: Before change: 1,367.11 After change: 1,470.65 Change-Id: Ie3f75c77282cf602e83a6e833c6eb164e72a0990 Signed-off-by: Mahesh Bandewar Signed-off-by: David S. Miller --- Documentation/networking/bonding.txt | 42 ++++++++++++++++++++++++++++++------ drivers/net/bonding/bond_alb.c | 19 ++++++++++++---- drivers/net/bonding/bond_main.c | 4 +++- drivers/net/bonding/bond_options.c | 27 +++++++++++++++++++++++ drivers/net/bonding/bond_options.h | 1 + drivers/net/bonding/bond_sysfs.c | 29 +++++++++++++++++++++++++ drivers/net/bonding/bonding.h | 1 + 7 files changed, 111 insertions(+), 12 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt index a97c567f24e8..9c723ecd0025 100644 --- a/Documentation/networking/bonding.txt +++ b/Documentation/networking/bonding.txt @@ -585,13 +585,19 @@ mode balance-tlb or 5 Adaptive transmit load balancing: channel bonding that - does not require any special switch support. The - outgoing traffic is distributed according to the - current load (computed relative to the speed) on each - slave. Incoming traffic is received by the current - slave. If the receiving slave fails, another slave - takes over the MAC address of the failed receiving - slave. + does not require any special switch support. + + In tlb_dynamic_lb=1 mode; the outgoing traffic is + distributed according to the current load (computed + relative to the speed) on each slave. + + In tlb_dynamic_lb=0 mode; the load balancing based on + current load is disabled and the load is distributed + only using the hash distribution. + + Incoming traffic is received by the current slave. + If the receiving slave fails, another slave takes over + the MAC address of the failed receiving slave. Prerequisite: @@ -736,6 +742,28 @@ primary_reselect This option was added for bonding version 3.6.0. +tlb_dynamic_lb + + Specifies if dynamic shuffling of flows is enabled in tlb + mode. The value has no effect on any other modes. + + The default behavior of tlb mode is to shuffle active flows across + slaves based on the load in that interval. This gives nice lb + characteristics but can cause packet reordering. If re-ordering is + a concern use this variable to disable flow shuffling and rely on + load balancing provided solely by the hash distribution. + xmit-hash-policy can be used to select the appropriate hashing for + the setup. + + The sysfs entry can be used to change the setting per bond device + and the initial value is derived from the module parameter. The + sysfs entry is allowed to be changed only if the bond device is + down. + + The default value is "1" that enables flow shuffling while value "0" + disables it. This option was added in bonding driver 3.7.1 + + updelay Specifies the time, in milliseconds, to wait before enabling a diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 153232ed4b3f..70de039dad2e 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -1356,7 +1356,8 @@ static int bond_do_alb_xmit(struct sk_buff *skb, struct bonding *bond, if (!tx_slave) { /* unbalanced or unassigned, send through primary */ tx_slave = rcu_dereference(bond->curr_active_slave); - bond_info->unbalanced_load += skb->len; + if (bond->params.tlb_dynamic_lb) + bond_info->unbalanced_load += skb->len; } if (tx_slave && SLAVE_IS_OK(tx_slave)) { @@ -1369,7 +1370,7 @@ static int bond_do_alb_xmit(struct sk_buff *skb, struct bonding *bond, goto out; } - if (tx_slave) { + if (tx_slave && bond->params.tlb_dynamic_lb) { _lock_tx_hashtbl(bond); __tlb_clear_slave(bond, tx_slave, 0); _unlock_tx_hashtbl(bond); @@ -1399,11 +1400,21 @@ int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev) /* In case of IPX, it will falback to L2 hash */ case htons(ETH_P_IPV6): hash_index = bond_xmit_hash(bond, skb); - tx_slave = tlb_choose_channel(bond, hash_index & 0xFF, skb->len); + if (bond->params.tlb_dynamic_lb) { + tx_slave = tlb_choose_channel(bond, + hash_index & 0xFF, + skb->len); + } else { + struct list_head *iter; + int idx = hash_index % bond->slave_cnt; + + bond_for_each_slave_rcu(bond, tx_slave, iter) + if (--idx < 0) + break; + } break; } } - return bond_do_alb_xmit(skb, bond, tx_slave); } diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 1fd32a16cbc5..9d08e007d853 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3096,7 +3096,8 @@ static int bond_open(struct net_device *bond_dev) */ if (bond_alb_initialize(bond, (bond->params.mode == BOND_MODE_ALB))) return -ENOMEM; - queue_delayed_work(bond->wq, &bond->alb_work, 0); + if (bond->params.tlb_dynamic_lb) + queue_delayed_work(bond->wq, &bond->alb_work, 0); } if (bond->params.miimon) /* link check interval, in milliseconds. */ @@ -4304,6 +4305,7 @@ static int bond_check_params(struct bond_params *params) params->min_links = min_links; params->lp_interval = lp_interval; params->packets_per_slave = packets_per_slave; + params->tlb_dynamic_lb = 1; /* Default value */ if (packets_per_slave > 0) { params->reciprocal_packets_per_slave = reciprocal_value(packets_per_slave); diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index dc3893841752..9fba7a1e6d51 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -70,6 +70,8 @@ static int bond_option_mode_set(struct bonding *bond, const struct bond_opt_value *newval); static int bond_option_slaves_set(struct bonding *bond, const struct bond_opt_value *newval); +static int bond_option_tlb_dynamic_lb_set(struct bonding *bond, + const struct bond_opt_value *newval); static const struct bond_opt_value bond_mode_tbl[] = { @@ -179,6 +181,12 @@ static const struct bond_opt_value bond_lp_interval_tbl[] = { { NULL, -1, 0}, }; +static const struct bond_opt_value bond_tlb_dynamic_lb_tbl[] = { + { "off", 0, 0}, + { "on", 1, BOND_VALFLAG_DEFAULT}, + { NULL, -1, 0} +}; + static const struct bond_option bond_opts[] = { [BOND_OPT_MODE] = { .id = BOND_OPT_MODE, @@ -364,6 +372,15 @@ static const struct bond_option bond_opts[] = { .flags = BOND_OPTFLAG_RAWVAL, .set = bond_option_slaves_set }, + [BOND_OPT_TLB_DYNAMIC_LB] = { + .id = BOND_OPT_TLB_DYNAMIC_LB, + .name = "dynamic_lb", + .desc = "Enable dynamic flow shuffling", + .unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_TLB)), + .values = bond_tlb_dynamic_lb_tbl, + .flags = BOND_OPTFLAG_IFDOWN, + .set = bond_option_tlb_dynamic_lb_set, + }, { } }; @@ -1337,3 +1354,13 @@ err_no_cmd: ret = -EPERM; goto out; } + +static int bond_option_tlb_dynamic_lb_set(struct bonding *bond, + const struct bond_opt_value *newval) +{ + pr_info("%s: Setting dynamic-lb to %s (%llu)\n", + bond->dev->name, newval->string, newval->value); + bond->params.tlb_dynamic_lb = newval->value; + + return 0; +} diff --git a/drivers/net/bonding/bond_options.h b/drivers/net/bonding/bond_options.h index 12be9e1bfb0c..c1860f06145a 100644 --- a/drivers/net/bonding/bond_options.h +++ b/drivers/net/bonding/bond_options.h @@ -62,6 +62,7 @@ enum { BOND_OPT_RESEND_IGMP, BOND_OPT_LP_INTERVAL, BOND_OPT_SLAVES, + BOND_OPT_TLB_DYNAMIC_LB, BOND_OPT_LAST }; diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index 0e8b268da0a0..431892f1a4ce 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -1039,6 +1039,34 @@ static ssize_t bonding_store_lp_interval(struct device *d, static DEVICE_ATTR(lp_interval, S_IRUGO | S_IWUSR, bonding_show_lp_interval, bonding_store_lp_interval); +static ssize_t bonding_show_tlb_dynamic_lb(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct bonding *bond = to_bond(d); + return sprintf(buf, "%d\n", bond->params.tlb_dynamic_lb); +} + +static ssize_t bonding_store_tlb_dynamic_lb(struct device *d, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct bonding *bond = to_bond(d); + int ret; + + ret = bond_opt_tryset_rtnl(bond, BOND_OPT_TLB_DYNAMIC_LB, + (char *)buf); + if (!ret) + ret = count; + + return ret; +} + +static DEVICE_ATTR(tlb_dynamic_lb, S_IRUGO | S_IWUSR, + bonding_show_tlb_dynamic_lb, + bonding_store_tlb_dynamic_lb); + static ssize_t bonding_show_packets_per_slave(struct device *d, struct device_attribute *attr, char *buf) @@ -1099,6 +1127,7 @@ static struct attribute *per_bond_attrs[] = { &dev_attr_min_links.attr, &dev_attr_lp_interval.attr, &dev_attr_packets_per_slave.attr, + &dev_attr_tlb_dynamic_lb.attr, NULL, }; diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h index c0948ca26389..c1c7c2f12ac4 100644 --- a/drivers/net/bonding/bonding.h +++ b/drivers/net/bonding/bonding.h @@ -174,6 +174,7 @@ struct bond_params { int resend_igmp; int lp_interval; int packets_per_slave; + int tlb_dynamic_lb; struct reciprocal_value reciprocal_packets_per_slave; }; -- cgit v1.2.3 From 0dbfc8fd32f3dbe3069969fd6120ce3c61964c40 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Thu, 24 Apr 2014 19:09:06 +0200 Subject: devicetree: add at86rf230 bindings This patch adds devicetree bindings for the at86rf230 IEEE 802.15.4 SPI device driver. Signed-off-by: Alexander Aring Signed-off-by: David S. Miller --- .../bindings/net/ieee802154/at86rf230.txt | 23 ++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 Documentation/devicetree/bindings/net/ieee802154/at86rf230.txt (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/ieee802154/at86rf230.txt b/Documentation/devicetree/bindings/net/ieee802154/at86rf230.txt new file mode 100644 index 000000000000..d3bbdded4cbe --- /dev/null +++ b/Documentation/devicetree/bindings/net/ieee802154/at86rf230.txt @@ -0,0 +1,23 @@ +* AT86RF230 IEEE 802.15.4 * + +Required properties: + - compatible: should be "atmel,at86rf230", "atmel,at86rf231", + "atmel,at86rf233" or "atmel,at86rf212" + - spi-max-frequency: maximal bus speed, should be set to 7500000 depends + sync or async operation mode + - reg: the chipselect index + - interrupts: the interrupt generated by the device + +Optional properties: + - reset-gpio: GPIO spec for the rstn pin + - sleep-gpio: GPIO spec for the slp_tr pin + +Example: + + at86rf231@0 { + compatible = "atmel,at86rf231"; + spi-max-frequency = <7500000>; + reg = <0>; + interrupts = <19 1>; + interrupt-parent = <&gpio3>; + }; -- cgit v1.2.3 From fcecaeb026f9b35e679c12aed8a92bee3a673da0 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 24 Apr 2014 18:08:58 -0700 Subject: Documentation: add Broadcom SYSTEMPORT Device Tree bindings Add the Device Tree bindings documentation for the Broadcom SYSTEMPORT Ethernet MAC controller hardware. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- .../bindings/net/broadcom-systemport.txt | 29 ++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 Documentation/devicetree/bindings/net/broadcom-systemport.txt (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/broadcom-systemport.txt b/Documentation/devicetree/bindings/net/broadcom-systemport.txt new file mode 100644 index 000000000000..1b7600e022dd --- /dev/null +++ b/Documentation/devicetree/bindings/net/broadcom-systemport.txt @@ -0,0 +1,29 @@ +* Broadcom BCM7xxx Ethernet Systemport Controller (SYSTEMPORT) + +Required properties: +- compatible: should be one of "brcm,systemport-v1.00" or "brcm,systemport" +- reg: address and length of the register set for the device. +- interrupts: interrupts for the device, first cell must be for the the rx + interrupts, and the second cell should be for the transmit queues +- local-mac-address: Ethernet MAC address (48 bits) of this adapter +- phy-mode: Should be a string describing the PHY interface to the + Ethernet switch/PHY, see Documentation/devicetree/bindings/net/ethernet.txt +- fixed-link: see Documentation/devicetree/bindings/net/fsl-tsec-phy.txt for + the property specific details + +Optional properties: +- systemport,num-tier2-arb: number of tier 2 arbiters, an integer +- systemport,num-tier1-arb: number of tier 1 arbiters, an integer +- systemport,num-txq: number of HW transmit queues, an integer +- systemport,num-rxq: number of HW receive queues, an integer + +Example: +ethernet@f04a0000 { + compatible = "brcm,systemport-v1.00"; + reg = <0xf04a0000 0x4650>; + local-mac-address = [ 00 11 22 33 44 55 ]; + fixed-link = <0 1 1000 0 0>; + phy-mode = "gmii"; + interrupts = <0x0 0x16 0x0>, + <0x0 0x17 0x0>; +}; -- cgit v1.2.3 From 6d48f44b7b2af67b33c1ae5994b8f642685c8bc8 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Wed, 30 Apr 2014 15:23:33 +0300 Subject: mdio_bus: implement devm_mdiobus_alloc/devm_mdiobus_free Add a resource managed devm_mdiobus_alloc[_size]()/devm_mdiobus_free() to automatically clean up MDIO bus alocations made by MDIO drivers, thus leading to simplified MDIO drivers code. Cc: Florian Fainelli Cc: Sergei Shtylyov Acked-and-tested-by: Lad, Prabhakar Signed-off-by: Grygorii Strashko Signed-off-by: David S. Miller --- Documentation/driver-model/devres.txt | 5 +++ drivers/net/phy/mdio_bus.c | 67 +++++++++++++++++++++++++++++++++++ include/linux/phy.h | 7 ++++ 3 files changed, 79 insertions(+) (limited to 'Documentation') diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt index 4f7897e99cba..c74e04494ade 100644 --- a/Documentation/driver-model/devres.txt +++ b/Documentation/driver-model/devres.txt @@ -308,3 +308,8 @@ SLAVE DMA ENGINE SPI devm_spi_register_master() + +MDIO + devm_mdiobus_alloc() + devm_mdiobus_alloc_size() + devm_mdiobus_free() diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 76f54b32a120..68a9a3867c0f 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -69,6 +69,73 @@ struct mii_bus *mdiobus_alloc_size(size_t size) } EXPORT_SYMBOL(mdiobus_alloc_size); +static void _devm_mdiobus_free(struct device *dev, void *res) +{ + mdiobus_free(*(struct mii_bus **)res); +} + +static int devm_mdiobus_match(struct device *dev, void *res, void *data) +{ + struct mii_bus **r = res; + + if (WARN_ON(!r || !*r)) + return 0; + + return *r == data; +} + +/** + * devm_mdiobus_alloc_size - Resource-managed mdiobus_alloc_size() + * @dev: Device to allocate mii_bus for + * @sizeof_priv: Space to allocate for private structure. + * + * Managed mdiobus_alloc_size. mii_bus allocated with this function is + * automatically freed on driver detach. + * + * If an mii_bus allocated with this function needs to be freed separately, + * devm_mdiobus_free() must be used. + * + * RETURNS: + * Pointer to allocated mii_bus on success, NULL on failure. + */ +struct mii_bus *devm_mdiobus_alloc_size(struct device *dev, int sizeof_priv) +{ + struct mii_bus **ptr, *bus; + + ptr = devres_alloc(_devm_mdiobus_free, sizeof(*ptr), GFP_KERNEL); + if (!ptr) + return NULL; + + /* use raw alloc_dr for kmalloc caller tracing */ + bus = mdiobus_alloc_size(sizeof_priv); + if (bus) { + *ptr = bus; + devres_add(dev, ptr); + } else { + devres_free(ptr); + } + + return bus; +} +EXPORT_SYMBOL_GPL(devm_mdiobus_alloc); + +/** + * devm_mdiobus_free - Resource-managed mdiobus_free() + * @dev: Device this mii_bus belongs to + * @bus: the mii_bus associated with the device + * + * Free mii_bus allocated with devm_mdiobus_alloc_size(). + */ +void devm_mdiobus_free(struct device *dev, struct mii_bus *bus) +{ + int rc; + + rc = devres_release(dev, _devm_mdiobus_free, + devm_mdiobus_match, bus); + WARN_ON(rc); +} +EXPORT_SYMBOL_GPL(devm_mdiobus_free); + /** * mdiobus_release - mii_bus device release callback * @d: the target struct device that contains the mii_bus diff --git a/include/linux/phy.h b/include/linux/phy.h index 51d15f684e7e..864ddafad8cc 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -198,6 +198,13 @@ static inline struct mii_bus *mdiobus_alloc(void) int mdiobus_register(struct mii_bus *bus); void mdiobus_unregister(struct mii_bus *bus); void mdiobus_free(struct mii_bus *bus); +struct mii_bus *devm_mdiobus_alloc_size(struct device *dev, int sizeof_priv); +static inline struct mii_bus *devm_mdiobus_alloc(struct device *dev) +{ + return devm_mdiobus_alloc_size(dev, 0); +} + +void devm_mdiobus_free(struct device *dev, struct mii_bus *bus); struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr); int mdiobus_read(struct mii_bus *bus, int addr, u32 regnum); int mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val); -- cgit v1.2.3 From dfee07cceff94af498a8465353554e0b63c3b152 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 1 May 2014 08:16:03 -0700 Subject: net: filter: doc: expand and improve BPF documentation In particular, this patch tries to clarify internal BPF calling convention and adds internal BPF examples, JIT guide, use cases. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- Documentation/networking/filter.txt | 158 +++++++++++++++++++++++++++++++++++- 1 file changed, 154 insertions(+), 4 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt index 82e1cb0b3da8..748fd385535d 100644 --- a/Documentation/networking/filter.txt +++ b/Documentation/networking/filter.txt @@ -613,7 +613,7 @@ Some core changes of the new internal format: Therefore, BPF calling convention is defined as: - * R0 - return value from in-kernel function + * R0 - return value from in-kernel function, and exit value for BPF program * R1 - R5 - arguments from BPF program to in-kernel function * R6 - R9 - callee saved registers that in-kernel function will preserve * R10 - read-only frame pointer to access stack @@ -659,9 +659,140 @@ Some core changes of the new internal format: - Introduces bpf_call insn and register passing convention for zero overhead calls from/to other kernel functions: - After a kernel function call, R1 - R5 are reset to unreadable and R0 has a - return type of the function. Since R6 - R9 are callee saved, their state is - preserved across the call. + Before an in-kernel function call, the internal BPF program needs to + place function arguments into R1 to R5 registers to satisfy calling + convention, then the interpreter will take them from registers and pass + to in-kernel function. If R1 - R5 registers are mapped to CPU registers + that are used for argument passing on given architecture, the JIT compiler + doesn't need to emit extra moves. Function arguments will be in the correct + registers and BPF_CALL instruction will be JITed as single 'call' HW + instruction. This calling convention was picked to cover common call + situations without performance penalty. + + After an in-kernel function call, R1 - R5 are reset to unreadable and R0 has + a return value of the function. Since R6 - R9 are callee saved, their state + is preserved across the call. + + For example, consider three C functions: + + u64 f1() { return (*_f2)(1); } + u64 f2(u64 a) { return f3(a + 1, a); } + u64 f3(u64 a, u64 b) { return a - b; } + + GCC can compile f1, f3 into x86_64: + + f1: + movl $1, %edi + movq _f2(%rip), %rax + jmp *%rax + f3: + movq %rdi, %rax + subq %rsi, %rax + ret + + Function f2 in BPF may look like: + + f2: + bpf_mov R2, R1 + bpf_add R1, 1 + bpf_call f3 + bpf_exit + + If f2 is JITed and the pointer stored to '_f2'. The calls f1 -> f2 -> f3 and + returns will be seamless. Without JIT, __sk_run_filter() interpreter needs to + be used to call into f2. + + For practical reasons all BPF programs have only one argument 'ctx' which is + already placed into R1 (e.g. on __sk_run_filter() startup) and the programs + can call kernel functions with up to 5 arguments. Calls with 6 or more arguments + are currently not supported, but these restrictions can be lifted if necessary + in the future. + + On 64-bit architectures all register map to HW registers one to one. For + example, x86_64 JIT compiler can map them as ... + + R0 - rax + R1 - rdi + R2 - rsi + R3 - rdx + R4 - rcx + R5 - r8 + R6 - rbx + R7 - r13 + R8 - r14 + R9 - r15 + R10 - rbp + + ... since x86_64 ABI mandates rdi, rsi, rdx, rcx, r8, r9 for argument passing + and rbx, r12 - r15 are callee saved. + + Then the following internal BPF pseudo-program: + + bpf_mov R6, R1 /* save ctx */ + bpf_mov R2, 2 + bpf_mov R3, 3 + bpf_mov R4, 4 + bpf_mov R5, 5 + bpf_call foo + bpf_mov R7, R0 /* save foo() return value */ + bpf_mov R1, R6 /* restore ctx for next call */ + bpf_mov R2, 6 + bpf_mov R3, 7 + bpf_mov R4, 8 + bpf_mov R5, 9 + bpf_call bar + bpf_add R0, R7 + bpf_exit + + After JIT to x86_64 may look like: + + push %rbp + mov %rsp,%rbp + sub $0x228,%rsp + mov %rbx,-0x228(%rbp) + mov %r13,-0x220(%rbp) + mov %rdi,%rbx + mov $0x2,%esi + mov $0x3,%edx + mov $0x4,%ecx + mov $0x5,%r8d + callq foo + mov %rax,%r13 + mov %rbx,%rdi + mov $0x2,%esi + mov $0x3,%edx + mov $0x4,%ecx + mov $0x5,%r8d + callq bar + add %r13,%rax + mov -0x228(%rbp),%rbx + mov -0x220(%rbp),%r13 + leaveq + retq + + Which is in this example equivalent in C to: + + u64 bpf_filter(u64 ctx) + { + return foo(ctx, 2, 3, 4, 5) + bar(ctx, 6, 7, 8, 9); + } + + In-kernel functions foo() and bar() with prototype: u64 (*)(u64 arg1, u64 + arg2, u64 arg3, u64 arg4, u64 arg5); will receive arguments in proper + registers and place their return value into '%rax' which is R0 in BPF. + Prologue and epilogue are emitted by JIT and are implicit in the + interpreter. R0-R5 are scratch registers, so BPF program needs to preserve + them across the calls as defined by calling convention. + + For example the following program is invalid: + + bpf_mov R1, 1 + bpf_call foo + bpf_mov R0, R1 + bpf_exit + + After the call the registers R1-R5 contain junk values and cannot be read. + In the future a BPF verifier can be used to validate internal BPF programs. Also in the new design, BPF is limited to 4096 insns, which means that any program will terminate quickly and will only call a fixed number of kernel @@ -676,6 +807,25 @@ A program, that is translated internally consists of the following elements: op:16, jt:8, jf:8, k:32 ==> op:8, a_reg:4, x_reg:4, off:16, imm:32 +So far 87 internal BPF instructions were implemented. 8-bit 'op' opcode field +has room for new instructions. Some of them may use 16/24/32 byte encoding. New +instructions must be multiple of 8 bytes to preserve backward compatibility. + +Internal BPF is a general purpose RISC instruction set. Not every register and +every instruction are used during translation from original BPF to new format. +For example, socket filters are not using 'exclusive add' instruction, but +tracing filters may do to maintain counters of events, for example. Register R9 +is not used by socket filters either, but more complex filters may be running +out of registers and would have to resort to spill/fill to stack. + +Internal BPF can used as generic assembler for last step performance +optimizations, socket filters and seccomp are using it as assembler. Tracing +filters may use it as assembler to generate code from kernel. In kernel usage +may not be bounded by security considerations, since generated internal BPF code +may be optimizing internal code path and not being exposed to the user space. +Safety of internal BPF can come from a verifier (TBD). In such use cases as +described, it may be used as safe instruction set. + Just like the original BPF, the new format runs within a controlled environment, is deterministic and the kernel can easily prove that. The safety of the program can be determined in two steps: first step does depth-first-search to disallow -- cgit v1.2.3 From 4b405efbe12de28b26289282b431323d73992381 Mon Sep 17 00:00:00 2001 From: Hubert Chaumette Date: Tue, 6 May 2014 09:40:16 +0200 Subject: Update Micrel KSZ90x1 binding documentation Renames micrel-ksz9021.txt to micrel-ksz90x1.txt and adds documentation for the KSZ9031 binding from patch 1. Also adds step increment information, and note about phy fixups. Signed-off-by: Hubert Chaumette Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- .../devicetree/bindings/net/micrel-ksz9021.txt | 49 ------------- .../devicetree/bindings/net/micrel-ksz90x1.txt | 83 ++++++++++++++++++++++ 2 files changed, 83 insertions(+), 49 deletions(-) delete mode 100644 Documentation/devicetree/bindings/net/micrel-ksz9021.txt create mode 100644 Documentation/devicetree/bindings/net/micrel-ksz90x1.txt (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/micrel-ksz9021.txt b/Documentation/devicetree/bindings/net/micrel-ksz9021.txt deleted file mode 100644 index 997a63f1aea1..000000000000 --- a/Documentation/devicetree/bindings/net/micrel-ksz9021.txt +++ /dev/null @@ -1,49 +0,0 @@ -Micrel KSZ9021 Gigabit Ethernet PHY - -Some boards require special tuning values, particularly when it comes to -clock delays. You can specify clock delay values by adding -micrel-specific properties to an Ethernet OF device node. - -All skew control options are specified in picoseconds. The minimum -value is 0, and the maximum value is 3000. - -Optional properties: - - rxc-skew-ps : Skew control of RXC pad - - rxdv-skew-ps : Skew control of RX CTL pad - - txc-skew-ps : Skew control of TXC pad - - txen-skew-ps : Skew control of TX_CTL pad - - rxd0-skew-ps : Skew control of RX data 0 pad - - rxd1-skew-ps : Skew control of RX data 1 pad - - rxd2-skew-ps : Skew control of RX data 2 pad - - rxd3-skew-ps : Skew control of RX data 3 pad - - txd0-skew-ps : Skew control of TX data 0 pad - - txd1-skew-ps : Skew control of TX data 1 pad - - txd2-skew-ps : Skew control of TX data 2 pad - - txd3-skew-ps : Skew control of TX data 3 pad - -Examples: - - /* Attach to an Ethernet device with autodetected PHY */ - &enet { - rxc-skew-ps = <3000>; - rxdv-skew-ps = <0>; - txc-skew-ps = <3000>; - txen-skew-ps = <0>; - status = "okay"; - }; - - /* Attach to an explicitly-specified PHY */ - mdio { - phy0: ethernet-phy@0 { - rxc-skew-ps = <3000>; - rxdv-skew-ps = <0>; - txc-skew-ps = <3000>; - txen-skew-ps = <0>; - reg = <0>; - }; - }; - ethernet@70000 { - status = "okay"; - phy = <&phy0>; - phy-mode = "rgmii-id"; - }; diff --git a/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt b/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt new file mode 100644 index 000000000000..692076fda0e5 --- /dev/null +++ b/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt @@ -0,0 +1,83 @@ +Micrel KSZ9021/KSZ9031 Gigabit Ethernet PHY + +Some boards require special tuning values, particularly when it comes to +clock delays. You can specify clock delay values by adding +micrel-specific properties to an Ethernet OF device node. + +Note that these settings are applied after any phy-specific fixup from +phy_fixup_list (see phy_init_hw() from drivers/net/phy/phy_device.c), +and therefore may overwrite them. + +KSZ9021: + + All skew control options are specified in picoseconds. The minimum + value is 0, the maximum value is 3000, and it is incremented by 200ps + steps. + + Optional properties: + + - rxc-skew-ps : Skew control of RXC pad + - rxdv-skew-ps : Skew control of RX CTL pad + - txc-skew-ps : Skew control of TXC pad + - txen-skew-ps : Skew control of TX CTL pad + - rxd0-skew-ps : Skew control of RX data 0 pad + - rxd1-skew-ps : Skew control of RX data 1 pad + - rxd2-skew-ps : Skew control of RX data 2 pad + - rxd3-skew-ps : Skew control of RX data 3 pad + - txd0-skew-ps : Skew control of TX data 0 pad + - txd1-skew-ps : Skew control of TX data 1 pad + - txd2-skew-ps : Skew control of TX data 2 pad + - txd3-skew-ps : Skew control of TX data 3 pad + +KSZ9031: + + All skew control options are specified in picoseconds. The minimum + value is 0, and the maximum is property-dependent. The increment + step is 60ps. + + Optional properties: + + Maximum value of 1860: + + - rxc-skew-ps : Skew control of RX clock pad + - txc-skew-ps : Skew control of TX clock pad + + Maximum value of 900: + + - rxdv-skew-ps : Skew control of RX CTL pad + - txen-skew-ps : Skew control of TX CTL pad + - rxd0-skew-ps : Skew control of RX data 0 pad + - rxd1-skew-ps : Skew control of RX data 1 pad + - rxd2-skew-ps : Skew control of RX data 2 pad + - rxd3-skew-ps : Skew control of RX data 3 pad + - txd0-skew-ps : Skew control of TX data 0 pad + - txd1-skew-ps : Skew control of TX data 1 pad + - txd2-skew-ps : Skew control of TX data 2 pad + - txd3-skew-ps : Skew control of TX data 3 pad + +Examples: + + /* Attach to an Ethernet device with autodetected PHY */ + &enet { + rxc-skew-ps = <3000>; + rxdv-skew-ps = <0>; + txc-skew-ps = <3000>; + txen-skew-ps = <0>; + status = "okay"; + }; + + /* Attach to an explicitly-specified PHY */ + mdio { + phy0: ethernet-phy@0 { + rxc-skew-ps = <3000>; + rxdv-skew-ps = <0>; + txc-skew-ps = <3000>; + txen-skew-ps = <0>; + reg = <0>; + }; + }; + ethernet@70000 { + status = "okay"; + phy = <&phy0>; + phy-mode = "rgmii-id"; + }; -- cgit v1.2.3 From d415fa1b88748d664b7b6a310dd8e699d2686cf7 Mon Sep 17 00:00:00 2001 From: Mugunthan V N Date: Fri, 9 May 2014 19:07:34 +0530 Subject: drivers: net: cpsw-phy-sel: add dra7xx support for phy sel Add dra7xx support for selecting the phy mode which is present in control module of dra7xx SoC Signed-off-by: Mugunthan V N Signed-off-by: David S. Miller --- .../devicetree/bindings/net/cpsw-phy-sel.txt | 3 +- drivers/net/ethernet/ti/cpsw-phy-sel.c | 57 +++++++++++++++++++++- 2 files changed, 58 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/cpsw-phy-sel.txt b/Documentation/devicetree/bindings/net/cpsw-phy-sel.txt index 7ff57a119f81..d9da911b6eea 100644 --- a/Documentation/devicetree/bindings/net/cpsw-phy-sel.txt +++ b/Documentation/devicetree/bindings/net/cpsw-phy-sel.txt @@ -2,7 +2,8 @@ TI CPSW Phy mode Selection Device Tree Bindings ----------------------------------------------- Required properties: -- compatible : Should be "ti,am3352-cpsw-phy-sel" +- compatible : Should be "ti,am3352-cpsw-phy-sel" for am335x platform and + "ti,dra7xx-cpsw-phy-sel" for dra7xx platform - reg : physical base address and size of the cpsw registers map - reg-names : names of the register map given in "reg" node diff --git a/drivers/net/ethernet/ti/cpsw-phy-sel.c b/drivers/net/ethernet/ti/cpsw-phy-sel.c index 86b5dce04642..b93838db9a72 100644 --- a/drivers/net/ethernet/ti/cpsw-phy-sel.c +++ b/drivers/net/ethernet/ti/cpsw-phy-sel.c @@ -29,6 +29,8 @@ #define AM33XX_GMII_SEL_RMII2_IO_CLK_EN BIT(7) #define AM33XX_GMII_SEL_RMII1_IO_CLK_EN BIT(6) +#define GMII_SEL_MODE_MASK 0x3 + struct cpsw_phy_sel_priv { struct device *dev; u32 __iomem *gmii_sel; @@ -65,7 +67,7 @@ static void cpsw_gmii_sel_am3352(struct cpsw_phy_sel_priv *priv, break; }; - mask = 0x3 << (slave * 2) | BIT(slave + 6); + mask = GMII_SEL_MODE_MASK << (slave * 2) | BIT(slave + 6); mode <<= slave * 2; if (priv->rmii_clock_external) { @@ -81,6 +83,55 @@ static void cpsw_gmii_sel_am3352(struct cpsw_phy_sel_priv *priv, writel(reg, priv->gmii_sel); } +static void cpsw_gmii_sel_dra7xx(struct cpsw_phy_sel_priv *priv, + phy_interface_t phy_mode, int slave) +{ + u32 reg; + u32 mask; + u32 mode = 0; + + reg = readl(priv->gmii_sel); + + switch (phy_mode) { + case PHY_INTERFACE_MODE_RMII: + mode = AM33XX_GMII_SEL_MODE_RMII; + break; + + case PHY_INTERFACE_MODE_RGMII: + case PHY_INTERFACE_MODE_RGMII_ID: + case PHY_INTERFACE_MODE_RGMII_RXID: + case PHY_INTERFACE_MODE_RGMII_TXID: + mode = AM33XX_GMII_SEL_MODE_RGMII; + break; + + case PHY_INTERFACE_MODE_MII: + default: + mode = AM33XX_GMII_SEL_MODE_MII; + break; + }; + + switch (slave) { + case 0: + mask = GMII_SEL_MODE_MASK; + break; + case 1: + mask = GMII_SEL_MODE_MASK << 4; + mode <<= 4; + break; + default: + dev_err(priv->dev, "invalid slave number...\n"); + return; + } + + if (priv->rmii_clock_external) + dev_err(priv->dev, "RMII External clock is not supported\n"); + + reg &= ~mask; + reg |= mode; + + writel(reg, priv->gmii_sel); +} + static struct platform_driver cpsw_phy_sel_driver; static int match(struct device *dev, void *data) { @@ -112,6 +163,10 @@ static const struct of_device_id cpsw_phy_sel_id_table[] = { .compatible = "ti,am3352-cpsw-phy-sel", .data = &cpsw_gmii_sel_am3352, }, + { + .compatible = "ti,dra7xx-cpsw-phy-sel", + .data = &cpsw_gmii_sel_dra7xx, + }, {} }; MODULE_DEVICE_TABLE(of, cpsw_phy_sel_id_table); -- cgit v1.2.3 From b80b93096bc05f013418e315ccc544ce2e8efc28 Mon Sep 17 00:00:00 2001 From: Mugunthan V N Date: Fri, 9 May 2014 19:07:35 +0530 Subject: drivers: net: cpsw-phy-sel: add am43xx platform support AM43xx phy mode selection is similar to AM33xx platform, so adding only the compatibility string to the driver Signed-off-by: Mugunthan V N Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/cpsw-phy-sel.txt | 1 + drivers/net/ethernet/ti/cpsw-phy-sel.c | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/cpsw-phy-sel.txt b/Documentation/devicetree/bindings/net/cpsw-phy-sel.txt index d9da911b6eea..764c0c79b43d 100644 --- a/Documentation/devicetree/bindings/net/cpsw-phy-sel.txt +++ b/Documentation/devicetree/bindings/net/cpsw-phy-sel.txt @@ -4,6 +4,7 @@ TI CPSW Phy mode Selection Device Tree Bindings Required properties: - compatible : Should be "ti,am3352-cpsw-phy-sel" for am335x platform and "ti,dra7xx-cpsw-phy-sel" for dra7xx platform + "ti,am43xx-cpsw-phy-sel" for am43xx platform - reg : physical base address and size of the cpsw registers map - reg-names : names of the register map given in "reg" node diff --git a/drivers/net/ethernet/ti/cpsw-phy-sel.c b/drivers/net/ethernet/ti/cpsw-phy-sel.c index b93838db9a72..aa8bf45e53dc 100644 --- a/drivers/net/ethernet/ti/cpsw-phy-sel.c +++ b/drivers/net/ethernet/ti/cpsw-phy-sel.c @@ -167,6 +167,10 @@ static const struct of_device_id cpsw_phy_sel_id_table[] = { .compatible = "ti,dra7xx-cpsw-phy-sel", .data = &cpsw_gmii_sel_dra7xx, }, + { + .compatible = "ti,am43xx-cpsw-phy-sel", + .data = &cpsw_gmii_sel_am3352, + }, {} }; MODULE_DEVICE_TABLE(of, cpsw_phy_sel_id_table); -- cgit v1.2.3 From a563babeb5fbe721a046adf6f1cdc02e0a207c8d Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Sun, 11 May 2014 10:47:14 +0200 Subject: net: cdc_mbim: add driver documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit An initial attempt on describing some of the odd APIs provided by this driver. Cc: Greg Suarez Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- Documentation/networking/cdc_mbim.txt | 339 ++++++++++++++++++++++++++++++++++ 1 file changed, 339 insertions(+) create mode 100644 Documentation/networking/cdc_mbim.txt (limited to 'Documentation') diff --git a/Documentation/networking/cdc_mbim.txt b/Documentation/networking/cdc_mbim.txt new file mode 100644 index 000000000000..a15ea602aa52 --- /dev/null +++ b/Documentation/networking/cdc_mbim.txt @@ -0,0 +1,339 @@ + cdc_mbim - Driver for CDC MBIM Mobile Broadband modems + ======================================================== + +The cdc_mbim driver supports USB devices conforming to the "Universal +Serial Bus Communications Class Subclass Specification for Mobile +Broadband Interface Model" [1], which is a further development of +"Universal Serial Bus Communications Class Subclass Specifications for +Network Control Model Devices" [2] optimized for Mobile Broadband +devices, aka "3G/LTE modems". + + +Command Line Parameters +======================= + +The cdc_mbim driver has no parameters of its own. But the probing +behaviour for NCM 1.0 backwards compatible MBIM functions (an +"NCM/MBIM function" as defined in section 3.2 of [1]) is affected +by a cdc_ncm driver parameter: + +prefer_mbim +----------- +Type: Boolean +Valid Range: N/Y (0-1) +Default Value: Y (MBIM is preferred) + +This parameter sets the system policy for NCM/MBIM functions. Such +functions will be handled by either the cdc_ncm driver or the cdc_mbim +driver depending on the prefer_mbim setting. Setting prefer_mbim=N +makes the cdc_mbim driver ignore these functions and lets the cdc_ncm +driver handle them instead. + +The parameter is writable, and can be changed at any time. A manual +unbind/bind is required to make the change effective for NCM/MBIM +functions bound to the "wrong" driver + + +Basic usage +=========== + +MBIM functions are inactive when unmanaged. The cdc_mbim driver only +provides an userspace interface to the MBIM control channel, and will +not participate in the management of the function. This implies that a +userspace MBIM management application always is required to enable a +MBIM function. + +Such userspace applications includes, but are not limited to: + - mbimcli (included with the libmbim [3] library), and + - ModemManager [4] + +Establishing a MBIM IP session reequires at least these actions by the +management application: + - open the control channel + - configure network connection settings + - connect to network + - configure IP interface + +Management application development +---------------------------------- +The driver <-> userspace interfaces are described below. The MBIM +control channel protocol is described in [1]. + + +MBIM control channel userspace ABI +================================== + +/dev/cdc-wdmX character device +------------------------------ +The driver creates a two-way pipe to the MBIM function control channel +using the cdc-wdm driver as a subdriver. The userspace end of the +control channel pipe is a /dev/cdc-wdmX character device. + +The cdc_mbim driver does not process or police messages on the control +channel. The channel is fully delegated to the userspace management +application. It is therefore up to this application to ensure that it +complies with all the control channel requirements in [1]. + +The cdc-wdmX device is created as a child of the MBIM control +interface USB device. The character device associated with a specific +MBIM function can be looked up using sysfs. For example: + + bjorn@nemi:~$ ls /sys/bus/usb/drivers/cdc_mbim/2-4:2.12/usbmisc + cdc-wdm0 + + bjorn@nemi:~$ grep . /sys/bus/usb/drivers/cdc_mbim/2-4:2.12/usbmisc/cdc-wdm0/dev + 180:0 + + +USB configuration descriptors +----------------------------- +The wMaxControlMessage field of the CDC MBIM functional descriptor +limits the maximum control message size. The managament application is +responsible for negotiating a control message size complying with the +requirements in section 9.3.1 of [1], taking this descriptor field +into consideration. + +The userspace application can access the CDC MBIM functional +descriptor of a MBIM function using either of the two USB +configuration descriptor kernel interfaces described in [6] or [7]. + +See also the ioctl documentation below. + + +Fragmentation +------------- +The userspace application is responsible for all control message +fragmentation and defragmentaion, as described in section 9.5 of [1]. + + +/dev/cdc-wdmX write() +--------------------- +The MBIM control messages from the management application *must not* +exceed the negotiated control message size. + + +/dev/cdc-wdmX read() +-------------------- +The management application *must* accept control messages of up the +negotiated control message size. + + +/dev/cdc-wdmX ioctl() +-------------------- +IOCTL_WDM_MAX_COMMAND: Get Maximum Command Size +This ioctl returns the wMaxControlMessage field of the CDC MBIM +functional descriptor for MBIM devices. This is intended as a +convenience, eliminating the need to parse the USB descriptors from +userspace. + + #include + #include + #include + #include + #include + int main() + { + __u16 max; + int fd = open("/dev/cdc-wdm0", O_RDWR); + if (!ioctl(fd, IOCTL_WDM_MAX_COMMAND, &max)) + printf("wMaxControlMessage is %d\n", max); + } + + +Custom device services +---------------------- +The MBIM specification allows vendors to freely define additional +services. This is fully supported by the cdc_mbim driver. + +Support for new MBIM services, including vendor specified services, is +implemented entirely in userspace, like the rest of the MBIM control +protocol + +New services should be registered in the MBIM Registry [5]. + + + +MBIM data channel userspace ABI +=============================== + +wwanY network device +-------------------- +The cdc_mbim driver represents the MBIM data channel as a single +network device of the "wwan" type. This network device is initially +mapped to MBIM IP session 0. + + +Multiplexed IP sessions (IPS) +----------------------------- +MBIM allows multiplexing up to 256 IP sessions over a single USB data +channel. The cdc_mbim driver models such IP sessions as 802.1q VLAN +subdevices of the master wwanY device, mapping MBIM IP session Z to +VLAN ID Z for all values of Z greater than 0. + +The device maximum Z is given in the MBIM_DEVICE_CAPS_INFO structure +described in section 10.5.1 of [1]. + +The userspace management application is responsible for adding new +VLAN links prior to establishing MBIM IP sessions where the SessionId +is greater than 0. These links can be added by using the normal VLAN +kernel interfaces, either ioctl or netlink. + +For example, adding a link for a MBIM IP session with SessionId 3: + + ip link add link wwan0 name wwan0.3 type vlan id 3 + +The driver will automatically map the "wwan0.3" network device to MBIM +IP session 3. + + +Device Service Streams (DSS) +---------------------------- +MBIM also allows up to 256 non-IP data streams to be multiplexed over +the same shared USB data channel. The cdc_mbim driver models these +sessions as another set of 802.1q VLAN subdevices of the master wwanY +device, mapping MBIM DSS session A to VLAN ID (256 + A) for all values +of A. + +The device maximum A is given in the MBIM_DEVICE_SERVICES_INFO +structure described in section 10.5.29 of [1]. + +The DSS VLAN subdevices are used as a practical interface between the +shared MBIM data channel and a MBIM DSS aware userspace application. +It is not intended to be presented as-is to an end user. The +assumption is that an userspace application initiating a DSS session +also takes care of the necessary framing of the DSS data, presenting +the stream to the end user in an appropriate way for the stream type. + +The network device ABI requires a dummy ethernet header for every DSS +data frame being transported. The contents of this header is +arbitrary, with the following exceptions: + - TX frames using an IP protocol (0x0800 or 0x86dd) will be dropped + - RX frames will have the protocol field set to ETH_P_802_3 (but will + not be properly formatted 802.3 frames) + - RX frames will have the destination address set to the hardware + address of the master device + +The DSS supporting userspace management application is responsible for +adding the dummy ethernet header on TX and stripping it on RX. + +This is a simple example using tools commonly available, exporting +DssSessionId 5 as a pty character device pointed to by a /dev/nmea +symlink: + + ip link add link wwan0 name wwan0.dss5 type vlan id 261 + ip link set dev wwan0.dss5 up + socat INTERFACE:wwan0.dss5,type=2 PTY:,echo=0,link=/dev/nmea + +This is only an example, most suitable for testing out a DSS +service. Userspace applications supporting specific MBIM DSS services +are expected to use the tools and programming interfaces required by +that service. + +Note that adding VLAN links for DSS sessions is entirely optional. A +management application may instead choose to bind a packet socket +directly to the master network device, using the received VLAN tags to +map frames to the correct DSS session and adding 18 byte VLAN ethernet +headers with the appropriate tag on TX. In this case using a socket +filter is recommended, matching only the DSS VLAN subset. This avoid +unnecessary copying of unrelated IP session data to userspace. For +example: + + static struct sock_filter dssfilter[] = { + /* use special negative offsets to get VLAN tag */ + BPF_STMT(BPF_LD|BPF_B|BPF_ABS, SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 1, 0, 6), /* true */ + + /* verify DSS VLAN range */ + BPF_STMT(BPF_LD|BPF_H|BPF_ABS, SKF_AD_OFF + SKF_AD_VLAN_TAG), + BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, 256, 0, 4), /* 256 is first DSS VLAN */ + BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, 512, 3, 0), /* 511 is last DSS VLAN */ + + /* verify ethertype */ + BPF_STMT(BPF_LD|BPF_H|BPF_ABS, 2 * ETH_ALEN), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, ETH_P_802_3, 0, 1), + + BPF_STMT(BPF_RET|BPF_K, (u_int)-1), /* accept */ + BPF_STMT(BPF_RET|BPF_K, 0), /* ignore */ + }; + + + +Tagged IP session 0 VLAN +------------------------ +As described above, MBIM IP session 0 is treated as special by the +driver. It is initially mapped to untagged frames on the wwanY +network device. + +This mapping implies a few restrictions on multiplexed IPS and DSS +sessions, which may not always be practical: + - no IPS or DSS session can use a frame size greater than the MTU on + IP session 0 + - no IPS or DSS session can be in the up state unless the network + device representing IP session 0 also is up + +These problems can be avoided by optionally making the driver map IP +session 0 to a VLAN subdevice, similar to all other IP sessions. This +behaviour is triggered by adding a VLAN link for the magic VLAN ID +4094. The driver will then immediately start mapping MBIM IP session +0 to this VLAN, and will drop untagged frames on the master wwanY +device. + +Tip: It might be less confusing to the end user to name this VLAN +subdevice after the MBIM SessionID instead of the VLAN ID. For +example: + + ip link add link wwan0 name wwan0.0 type vlan id 4094 + + +VLAN mapping +------------ + +Summarizing the cdc_mbim driver mapping described above, we have this +relationship between VLAN tags on the wwanY network device and MBIM +sessions on the shared USB data channel: + + VLAN ID MBIM type MBIM SessionID Notes + --------------------------------------------------------- + untagged IPS 0 a) + 1 - 255 IPS 1 - 255 + 256 - 511 DSS 0 - 255 + 512 - 4093 b) + 4094 IPS 0 c) + + a) if no VLAN ID 4094 link exists, else dropped + b) unsupported VLAN range, unconditionally dropped + c) if a VLAN ID 4094 link exists, else dropped + + + + +References +========== + +[1] USB Implementers Forum, Inc. - "Universal Serial Bus + Communications Class Subclass Specification for Mobile Broadband + Interface Model", Revision 1.0 (Errata 1), May 1, 2013 + - http://www.usb.org/developers/docs/devclass_docs/ + +[2] USB Implementers Forum, Inc. - "Universal Serial Bus + Communications Class Subclass Specifications for Network Control + Model Devices", Revision 1.0 (Errata 1), November 24, 2010 + - http://www.usb.org/developers/docs/devclass_docs/ + +[3] libmbim - "a glib-based library for talking to WWAN modems and + devices which speak the Mobile Interface Broadband Model (MBIM) + protocol" + - http://www.freedesktop.org/wiki/Software/libmbim/ + +[4] ModemManager - "a DBus-activated daemon which controls mobile + broadband (2G/3G/4G) devices and connections" + - http://www.freedesktop.org/wiki/Software/ModemManager/ + +[5] "MBIM (Mobile Broadband Interface Model) Registry" + - http://compliance.usb.org/mbim/ + +[6] "/proc/bus/usb filesystem output" + - Documentation/usb/proc_usb_info.txt + +[7] "/sys/bus/usb/devices/.../descriptors" + - Documentation/ABI/stable/sysfs-bus-usb -- cgit v1.2.3 From 3be2a49e5c08d268f8af0dd4fe89a24ea8cdc339 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Fri, 16 May 2014 16:14:05 +0200 Subject: of: provide a binding for fixed link PHYs Some Ethernet MACs have a "fixed link", and are not connected to a normal MDIO-managed PHY device. For those situations, a Device Tree binding allows to describe a "fixed link" using a special PHY node. This patch adds: * A documentation for the fixed PHY Device Tree binding. * An of_phy_is_fixed_link() function that an Ethernet driver can call on its PHY phandle to find out whether it's a fixed link PHY or not. It should typically be used to know if of_phy_register_fixed_link() should be called. * An of_phy_register_fixed_link() function that instantiates the fixed PHY into the PHY subsystem, so that when the driver calls of_phy_connect(), the PHY device associated to the OF node will be found. These two additional functions also support the old fixed-link Device Tree binding used on PowerPC platforms, so that ultimately, the network device drivers for those platforms could be converted to use of_phy_is_fixed_link() and of_phy_register_fixed_link() instead of of_phy_connect_fixed_link(), while keeping compatibility with their respective Device Tree bindings. Signed-off-by: Thomas Petazzoni Reviewed-by: Florian Fainelli Tested-by: Florian Fainelli Signed-off-by: David S. Miller --- .../devicetree/bindings/net/fixed-link.txt | 30 ++++++++++ drivers/of/of_mdio.c | 67 ++++++++++++++++++++++ include/linux/of_mdio.h | 15 +++++ 3 files changed, 112 insertions(+) create mode 100644 Documentation/devicetree/bindings/net/fixed-link.txt (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/fixed-link.txt b/Documentation/devicetree/bindings/net/fixed-link.txt new file mode 100644 index 000000000000..e956de1be935 --- /dev/null +++ b/Documentation/devicetree/bindings/net/fixed-link.txt @@ -0,0 +1,30 @@ +Fixed link Device Tree binding +------------------------------ + +Some Ethernet MACs have a "fixed link", and are not connected to a +normal MDIO-managed PHY device. For those situations, a Device Tree +binding allows to describe a "fixed link". + +Such a fixed link situation is described by creating a 'fixed-link' +sub-node of the Ethernet MAC device node, with the following +properties: + +* 'speed' (integer, mandatory), to indicate the link speed. Accepted + values are 10, 100 and 1000 +* 'full-duplex' (boolean, optional), to indicate that full duplex is + used. When absent, half duplex is assumed. +* 'pause' (boolean, optional), to indicate that pause should be + enabled. +* 'asym-pause' (boolean, optional), to indicate that asym_pause should + be enabled. + +Example: + +ethernet@0 { + ... + fixed-link { + speed = <1000>; + full-duplex; + }; + ... +}; diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c index 9a95831bd065..1def0bb5cb37 100644 --- a/drivers/of/of_mdio.c +++ b/drivers/of/of_mdio.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -301,3 +302,69 @@ struct phy_device *of_phy_attach(struct net_device *dev, return phy_attach_direct(dev, phy, flags, iface) ? NULL : phy; } EXPORT_SYMBOL(of_phy_attach); + +#if defined(CONFIG_FIXED_PHY) +/* + * of_phy_is_fixed_link() and of_phy_register_fixed_link() must + * support two DT bindings: + * - the old DT binding, where 'fixed-link' was a property with 5 + * cells encoding various informations about the fixed PHY + * - the new DT binding, where 'fixed-link' is a sub-node of the + * Ethernet device. + */ +bool of_phy_is_fixed_link(struct device_node *np) +{ + struct device_node *dn; + int len; + + /* New binding */ + dn = of_get_child_by_name(np, "fixed-link"); + if (dn) { + of_node_put(dn); + return true; + } + + /* Old binding */ + if (of_get_property(np, "fixed-link", &len) && + len == (5 * sizeof(__be32))) + return true; + + return false; +} +EXPORT_SYMBOL(of_phy_is_fixed_link); + +int of_phy_register_fixed_link(struct device_node *np) +{ + struct fixed_phy_status status = {}; + struct device_node *fixed_link_node; + const __be32 *fixed_link_prop; + int len; + + /* New binding */ + fixed_link_node = of_get_child_by_name(np, "fixed-link"); + if (fixed_link_node) { + status.link = 1; + status.duplex = of_property_read_bool(np, "full-duplex"); + if (of_property_read_u32(fixed_link_node, "speed", &status.speed)) + return -EINVAL; + status.pause = of_property_read_bool(np, "pause"); + status.asym_pause = of_property_read_bool(np, "asym-pause"); + of_node_put(fixed_link_node); + return fixed_phy_register(PHY_POLL, &status, np); + } + + /* Old binding */ + fixed_link_prop = of_get_property(np, "fixed-link", &len); + if (fixed_link_prop && len == (5 * sizeof(__be32))) { + status.link = 1; + status.duplex = be32_to_cpu(fixed_link_prop[1]); + status.speed = be32_to_cpu(fixed_link_prop[2]); + status.pause = be32_to_cpu(fixed_link_prop[3]); + status.asym_pause = be32_to_cpu(fixed_link_prop[4]); + return fixed_phy_register(PHY_POLL, &status, np); + } + + return -ENODEV; +} +EXPORT_SYMBOL(of_phy_register_fixed_link); +#endif diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h index 881a7c3571f4..0aa367e316cb 100644 --- a/include/linux/of_mdio.h +++ b/include/linux/of_mdio.h @@ -72,4 +72,19 @@ static inline struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np) } #endif /* CONFIG_OF */ +#if defined(CONFIG_OF) && defined(CONFIG_FIXED_PHY) +extern int of_phy_register_fixed_link(struct device_node *np); +extern bool of_phy_is_fixed_link(struct device_node *np); +#else +static inline int of_phy_register_fixed_link(struct device_node *np) +{ + return -ENOSYS; +} +static inline bool of_phy_is_fixed_link(struct device_node *np) +{ + return false; +} +#endif + + #endif /* __LINUX_OF_MDIO_H */ -- cgit v1.2.3 From 277bd320e7dd8b77de6d3b6b9757b08fcca0108b Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Wed, 2 Apr 2014 20:25:27 +0200 Subject: can: add documentation for CAN filter usage optimisation To benefit from special filters for single SFF or single EFF CAN identifier subscriptions the CAN_EFF_FLAG bit and the CAN_RTR_FLAG bit has to be set together with the CAN_(SFF|EFF)_MASK in can_filter.mask. Signed-off-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- Documentation/networking/can.txt | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'Documentation') diff --git a/Documentation/networking/can.txt b/Documentation/networking/can.txt index 2fa44cbe81b7..cdd381c5311d 100644 --- a/Documentation/networking/can.txt +++ b/Documentation/networking/can.txt @@ -469,6 +469,41 @@ solution for a couple of reasons: having this 'send only' use-case we may remove the receive list in the Kernel to save a little (really a very little!) CPU usage. + 4.1.1.1 CAN filter usage optimisation + + The CAN filters are processed in per-device filter lists at CAN frame + reception time. To reduce the number of checks that need to be performed + while walking through the filter lists the CAN core provides an optimized + filter handling when the filter subscription focusses on a single CAN ID. + + For the possible 2048 SFF CAN identifiers the identifier is used as an index + to access the corresponding subscription list without any further checks. + For the 2^29 possible EFF CAN identifiers a 10 bit XOR folding is used as + hash function to retrieve the EFF table index. + + To benefit from the optimized filters for single CAN identifiers the + CAN_SFF_MASK or CAN_EFF_MASK have to be set into can_filter.mask together + with set CAN_EFF_FLAG and CAN_RTR_FLAG bits. A set CAN_EFF_FLAG bit in the + can_filter.mask makes clear that it matters whether a SFF or EFF CAN ID is + subscribed. E.g. in the example from above + + rfilter[0].can_id = 0x123; + rfilter[0].can_mask = CAN_SFF_MASK; + + both SFF frames with CAN ID 0x123 and EFF frames with 0xXXXXX123 can pass. + + To filter for only 0x123 (SFF) and 0x12345678 (EFF) CAN identifiers the + filter has to be defined in this way to benefit from the optimized filters: + + struct can_filter rfilter[2]; + + rfilter[0].can_id = 0x123; + rfilter[0].can_mask = (CAN_EFF_FLAG | CAN_RTR_FLAG | CAN_SFF_MASK); + rfilter[1].can_id = 0x12345678 | CAN_EFF_FLAG; + rfilter[1].can_mask = (CAN_EFF_FLAG | CAN_RTR_FLAG | CAN_EFF_MASK); + + setsockopt(s, SOL_CAN_RAW, CAN_RAW_FILTER, &rfilter, sizeof(rfilter)); + 4.1.2 RAW socket option CAN_RAW_ERR_FILTER As described in chapter 3.4 the CAN interface driver can generate so -- cgit v1.2.3 From f517a5f370de4171afe1487d67345d7eef1f1822 Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Tue, 13 May 2014 22:03:40 +0200 Subject: NFC: dts: st21nfca_i2c: Add DTS Documentation Describe the properties used by the st21nfca NFC controller driver. Signed-off-by: Christophe Ricard Signed-off-by: Samuel Ortiz --- .../devicetree/bindings/net/nfc/st21nfca.txt | 33 ++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 Documentation/devicetree/bindings/net/nfc/st21nfca.txt (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/nfc/st21nfca.txt b/Documentation/devicetree/bindings/net/nfc/st21nfca.txt new file mode 100644 index 000000000000..4724fe669172 --- /dev/null +++ b/Documentation/devicetree/bindings/net/nfc/st21nfca.txt @@ -0,0 +1,33 @@ +* STMicroelectronics SAS. ST21NFCA NFC Controller + +Required properties: +- compatible: Should be "st,st21nfca-i2c". +- clock-frequency: I²C work frequency. +- reg: address on the bus +- interrupt-parent: phandle for the interrupt gpio controller +- interrupts: GPIO interrupt to which the chip is connected +- enable-gpios: Output GPIO pin used for enabling/disabling the ST21NFCA + +Optional SoC Specific Properties: +- pinctrl-names: Contains only one value - "default". +- pintctrl-0: Specifies the pin control groups used for this controller. + +Example (for ARM-based BeagleBoard xM with ST21NFCA on I2C2): + +&i2c2 { + + status = "okay"; + + st21nfca: st21nfca@1 { + + compatible = "st,st21nfca_i2c"; + + reg = <0x01>; + clock-frequency = <400000>; + + interrupt-parent = <&gpio5>; + interrupts = <2 IRQ_TYPE_LEVEL_LOW>; + + enable-gpios = <&gpio5 29 GPIO_ACTIVE_HIGH>; + }; +}; -- cgit v1.2.3 From 91c1d980d6013dec4292309aa1700af36b400477 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 22 May 2014 09:47:43 -0700 Subject: Documentation: devicetree: add old and deprecated 'fixed-link' Update the fixed-link Device Tree binding documentation to contain information about the old and deprecated 5-digit 'fixed-link' property. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/fixed-link.txt | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/fixed-link.txt b/Documentation/devicetree/bindings/net/fixed-link.txt index e956de1be935..82bf7e0f47b6 100644 --- a/Documentation/devicetree/bindings/net/fixed-link.txt +++ b/Documentation/devicetree/bindings/net/fixed-link.txt @@ -18,6 +18,18 @@ properties: * 'asym-pause' (boolean, optional), to indicate that asym_pause should be enabled. +Old, deprecated 'fixed-link' binding: + +* A 'fixed-link' property in the Ethernet MAC node, with 5 cells, of the + form with the following accepted values: + - a: emulated PHY ID, choose any but but unique to the all specified + fixed-links, from 0 to 31 + - b: duplex configuration: 0 for half duplex, 1 for full duplex + - c: link speed in Mbits/sec, accepted values are: 10, 100 and 1000 + - d: pause configuration: 0 for no pause, 1 for pause + - e: asymmetric pause configuration: 0 for no asymmetric pause, 1 for + asymmetric pause + Example: ethernet@0 { -- cgit v1.2.3 From ae21888f9ef34fc2584b6caceb93f0b496dd21d5 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 22 May 2014 09:47:44 -0700 Subject: Documentation: devicetree: net: refer to fixed-link.txt Update the Freescale TSEC PHY, Broadcom GENET & SYSTEMPORT Device Tree binding documentation to refer to the fixed-link Device Tree binding in fixed-link.txt. Reviewed-by: Thomas Petazzoni Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/broadcom-bcmgenet.txt | 2 +- Documentation/devicetree/bindings/net/broadcom-systemport.txt | 2 +- Documentation/devicetree/bindings/net/fsl-tsec-phy.txt | 5 +---- 3 files changed, 3 insertions(+), 6 deletions(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/broadcom-bcmgenet.txt b/Documentation/devicetree/bindings/net/broadcom-bcmgenet.txt index f2febb94550e..451fef26b4df 100644 --- a/Documentation/devicetree/bindings/net/broadcom-bcmgenet.txt +++ b/Documentation/devicetree/bindings/net/broadcom-bcmgenet.txt @@ -24,7 +24,7 @@ Optional properties: - fixed-link: When the GENET interface is connected to a MoCA hardware block or when operating in a RGMII to RGMII type of connection, or when the MDIO bus is voluntarily disabled, this property should be used to describe the "fixed link". - See Documentation/devicetree/bindings/net/fsl-tsec-phy.txt for information on + See Documentation/devicetree/bindings/net/fixed-link.txt for information on the property specifics Required child nodes: diff --git a/Documentation/devicetree/bindings/net/broadcom-systemport.txt b/Documentation/devicetree/bindings/net/broadcom-systemport.txt index 1b7600e022dd..c183ea90d9bc 100644 --- a/Documentation/devicetree/bindings/net/broadcom-systemport.txt +++ b/Documentation/devicetree/bindings/net/broadcom-systemport.txt @@ -8,7 +8,7 @@ Required properties: - local-mac-address: Ethernet MAC address (48 bits) of this adapter - phy-mode: Should be a string describing the PHY interface to the Ethernet switch/PHY, see Documentation/devicetree/bindings/net/ethernet.txt -- fixed-link: see Documentation/devicetree/bindings/net/fsl-tsec-phy.txt for +- fixed-link: see Documentation/devicetree/bindings/net/fixed-link.txt for the property specific details Optional properties: diff --git a/Documentation/devicetree/bindings/net/fsl-tsec-phy.txt b/Documentation/devicetree/bindings/net/fsl-tsec-phy.txt index 737cdef4f903..be6ea8960f20 100644 --- a/Documentation/devicetree/bindings/net/fsl-tsec-phy.txt +++ b/Documentation/devicetree/bindings/net/fsl-tsec-phy.txt @@ -42,10 +42,7 @@ Properties: interrupt. For TSEC and eTSEC devices, the first interrupt is transmit, the second is receive, and the third is error. - phy-handle : See ethernet.txt file in the same directory. - - fixed-link : where a is emulated phy id - choose any, - but unique to the all specified fixed-links, b is duplex - 0 half, - 1 full, c is link speed - d#10/d#100/d#1000, d is pause - 0 no - pause, 1 pause, e is asym_pause - 0 no asym_pause, 1 asym_pause. + - fixed-link : See fixed-link.txt in the same directory. - phy-connection-type : See ethernet.txt file in the same directory. This property is only really needed if the connection is of type "rgmii-id", as all other connection types are detected by hardware. -- cgit v1.2.3 From 04caa489301c5e01ac4e5d1c13abcecb2041300b Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 23 May 2014 18:43:59 +0200 Subject: net: filter: doc: add section for BPF test suite Mention the recently added test suite in the documentation file. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- Documentation/networking/filter.txt | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'Documentation') diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt index 748fd385535d..dc7dcc8c8184 100644 --- a/Documentation/networking/filter.txt +++ b/Documentation/networking/filter.txt @@ -833,6 +833,20 @@ loops and other CFG validation; second step starts from the first insn and descends all possible paths. It simulates execution of every insn and observes the state change of registers and stack. +Testing +------- + +Next to the BPF toolchain, the kernel also ships a test module that contains +various test cases for classic and internal BPF that can be executed against +the BPF interpreter and JIT compiler. It can be found in lib/test_bpf.c and +enabled via Kconfig: + + CONFIG_TEST_BPF=m + +After the module has been built and installed, the test suite can be executed +via insmod or modprobe against 'test_bpf' module. Results of the test cases +including timings in nsec can be found in the kernel log (dmesg). + Misc ---- -- cgit v1.2.3 From 481300ccf3ef5a79bb7ba59f77c96035134606d9 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Fri, 23 May 2014 12:57:17 -0700 Subject: devicetree: bindings: Properly document micrel ks8851 SPI chips The ks8851 SPI ethernet wasn't documented, but we documented the optional regulator supply for it under the mll based ethernet chip. Furthermore, that compatible string needed another 'l'. Fix all of this and document the optional vdd-io and reset-gpios properties. Cc: Nishanth Menon Cc: Rob Herring Cc: Pawel Moll Cc: Mark Rutland Cc: Ian Campbell Cc: Kumar Gala Cc: Signed-off-by: Stephen Boyd Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/micrel-ks8851.txt | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/micrel-ks8851.txt b/Documentation/devicetree/bindings/net/micrel-ks8851.txt index d54d0cc79487..bbdf9a7359a2 100644 --- a/Documentation/devicetree/bindings/net/micrel-ks8851.txt +++ b/Documentation/devicetree/bindings/net/micrel-ks8851.txt @@ -1,9 +1,18 @@ -Micrel KS8851 Ethernet mac +Micrel KS8851 Ethernet mac (MLL) Required properties: -- compatible = "micrel,ks8851-ml" of parallel interface +- compatible = "micrel,ks8851-mll" of parallel interface - reg : 2 physical address and size of registers for data and command - interrupts : interrupt connection +Micrel KS8851 Ethernet mac (SPI) + +Required properties: +- compatible = "micrel,ks8851" or the deprecated "ks8851" +- reg : chip select number +- interrupts : interrupt connection + Optional properties: -- vdd-supply: supply for Ethernet mac +- vdd-supply: analog 3.3V supply for Ethernet mac +- vdd-io-supply: digital 1.8V IO supply for Ethernet mac +- reset-gpios: reset_n input pin -- cgit v1.2.3 From 4ea7ceea5189f018800b9ed3b354bb4e2c39cd1a Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Tue, 20 May 2014 21:45:04 +0200 Subject: NFC: dts: st21nfca_i2c: Correct minor typo in documentation Replaced st,st21nfca-i2c by st,st21nfca_i2c to be concistent with below configuration and driver in drivers/nfc/st21nfca/ Signed-off-by: Christophe Ricard Signed-off-by: Samuel Ortiz --- Documentation/devicetree/bindings/net/nfc/st21nfca.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/nfc/st21nfca.txt b/Documentation/devicetree/bindings/net/nfc/st21nfca.txt index 4724fe669172..e4faa2e8dfeb 100644 --- a/Documentation/devicetree/bindings/net/nfc/st21nfca.txt +++ b/Documentation/devicetree/bindings/net/nfc/st21nfca.txt @@ -1,7 +1,7 @@ * STMicroelectronics SAS. ST21NFCA NFC Controller Required properties: -- compatible: Should be "st,st21nfca-i2c". +- compatible: Should be "st,st21nfca_i2c". - clock-frequency: I²C work frequency. - reg: address on the bus - interrupt-parent: phandle for the interrupt gpio controller -- cgit v1.2.3 From 79499bca6c921d61f3ee5d20d71fb344ece1fdd2 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 23 May 2014 16:35:40 -0700 Subject: net: sysfs: add missing phys_port_id documentation Add documentation for the phys_port_id sysfs attribute of a network device. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- Documentation/ABI/testing/sysfs-class-net | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'Documentation') diff --git a/Documentation/ABI/testing/sysfs-class-net b/Documentation/ABI/testing/sysfs-class-net index d922060e455d..416c5d59f52e 100644 --- a/Documentation/ABI/testing/sysfs-class-net +++ b/Documentation/ABI/testing/sysfs-class-net @@ -169,6 +169,14 @@ Description: "unknown", "notpresent", "down", "lowerlayerdown", "testing", "dormant", "up". +What: /sys/class/net//phys_port_id +Date: July 2013 +KernelVersion: 3.12 +Contact: netdev@vger.kernel.org +Description: + Indicates the interface unique physical port identifier within + the NIC, as a string. + What: /sys/class/net//speed Date: October 2009 KernelVersion: 2.6.33 -- cgit v1.2.3 From 1a02ef76acfa0e172e7c3b08e4efb89db84ef0a5 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 23 May 2014 16:35:41 -0700 Subject: net: sysfs: add documentation entries for /sys/class//queues Add sysfs documentation for the various attributes of a network interface exposed in /sys/class//queues/{rx,tx}-/ Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- Documentation/ABI/testing/sysfs-class-net-queues | 79 ++++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-class-net-queues (limited to 'Documentation') diff --git a/Documentation/ABI/testing/sysfs-class-net-queues b/Documentation/ABI/testing/sysfs-class-net-queues new file mode 100644 index 000000000000..5e9aeb91d355 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-net-queues @@ -0,0 +1,79 @@ +What: /sys/class//queues/rx-/rps_cpus +Date: March 2010 +KernelVersion: 2.6.35 +Contact: netdev@vger.kernel.org +Description: + Mask of the CPU(s) currently enabled to participate into the + Receive Packet Steering packet processing flow for this + network device queue. Possible values depend on the number + of available CPU(s) in the system. + +What: /sys/class//queues/rx-/rps_flow_cnt +Date: April 2010 +KernelVersion: 2.6.35 +Contact: netdev@vger.kernel.org +Description: + Number of Receive Packet Steering flows being currently + processed by this particular network device receive queue. + +What: /sys/class//queues/tx-/tx_timeout +Date: November 2011 +KernelVersion: 3.3 +Contact: netdev@vger.kernel.org +Description: + Indicates the number of transmit timeout events seen by this + network interface transmit queue. + +What: /sys/class//queues/tx-/xps_cpus +Date: November 2010 +KernelVersion: 2.6.38 +Contact: netdev@vger.kernel.org +Description: + Mask of the CPU(s) currently enabled to participate into the + Transmit Packet Steering packet processing flow for this + network device transmit queue. Possible vaules depend on the + number of available CPU(s) in the system. + +What: /sys/class//queues/tx-/byte_queue_limits/hold_time +Date: November 2011 +KernelVersion: 3.3 +Contact: netdev@vger.kernel.org +Description: + Indicates the hold time in milliseconds to measure the slack + of this particular network device transmit queue. + Default value is 1000. + +What: /sys/class//queues/tx-/byte_queue_limits/inflight +Date: November 2011 +KernelVersion: 3.3 +Contact: netdev@vger.kernel.org +Description: + Indicates the number of bytes (objects) in flight on this + network device transmit queue. + +What: /sys/class//queues/tx-/byte_queue_limits/limit +Date: November 2011 +KernelVersion: 3.3 +Contact: netdev@vger.kernel.org +Description: + Indicates the current limit of bytes allowed to be queued + on this network device transmit queue. This value is clamped + to be within the bounds defined by limit_max and limit_min. + +What: /sys/class//queues/tx-/byte_queue_limits/limit_max +Date: November 2011 +KernelVersion: 3.3 +Contact: netdev@vger.kernel.org +Description: + Indicates the absolute maximum limit of bytes allowed to be + queued on this network device transmit queue. See + include/linux/dynamic_queue_limits.h for the default value. + +What: /sys/class//queues/tx-/byte_queue_limits/limit_min +Date: November 2011 +KernelVersion: 3.3 +Contact: netdev@vger.kernel.org +Description: + Indicates the absolute minimum limit of bytes allowed to be + queued on this network device transmit queue. Default value is + 0. -- cgit v1.2.3 From 6044f9700645264c52f69033a2b4457b856202f7 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 23 May 2014 16:35:42 -0700 Subject: net: sysfs: document /sys/class/net/statistics/* Document the network device statistics counter that are exposed as sysfs attributes. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- .../ABI/testing/sysfs-class-net-statistics | 201 +++++++++++++++++++++ 1 file changed, 201 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-class-net-statistics (limited to 'Documentation') diff --git a/Documentation/ABI/testing/sysfs-class-net-statistics b/Documentation/ABI/testing/sysfs-class-net-statistics new file mode 100644 index 000000000000..397118de7b5e --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-net-statistics @@ -0,0 +1,201 @@ +What: /sys/class//statistics/collisions +Date: April 2005 +KernelVersion: 2.6.12 +Contact: netdev@vger.kernel.org +Description: + Indicates the number of collisions seen by this network device. + This value might not be relevant with all MAC layers. + +What: /sys/class//statistics/multicast +Date: April 2005 +KernelVersion: 2.6.12 +Contact: netdev@vger.kernel.org +Description: + Indicates the number of multicast packets received by this + network device. + +What: /sys/class//statistics/rx_bytes +Date: April 2005 +KernelVersion: 2.6.12 +Contact: netdev@vger.kernel.org +Description: + Indicates the number of bytes received by this network device. + See the network driver for the exact meaning of when this + value is incremented. + +What: /sys/class//statistics/rx_compressed +Date: April 2005 +KernelVersion: 2.6.12 +Contact: netdev@vger.kernel.org +Description: + Indicates the number of compressed packets received by this + network device. This value might only be relevant for interfaces + that support packet compression (e.g: PPP). + +What: /sys/class//statistics/rx_crc_errors +Date: April 2005 +KernelVersion: 2.6.12 +Contact: netdev@vger.kernel.org +Description: + Indicates the number of packets received with a CRC (FCS) error + by this network device. Note that the specific meaning might + depend on the MAC layer used by the interface. + +What: /sys/class//statistics/rx_dropped +Date: April 2005 +KernelVersion: 2.6.12 +Contact: netdev@vger.kernel.org +Description: + Indicates the number of packets received by the network device + but dropped, that are not forwarded to the upper layers for + packet processing. See the network driver for the exact + meaning of this value. + +What: /sys/class//statistics/rx_fifo_errors +Date: April 2005 +KernelVersion: 2.6.12 +Contact: netdev@vger.kernel.org +Description: + Indicates the number of receive FIFO errors seen by this + network device. See the network driver for the exact + meaning of this value. + +What: /sys/class//statistics/rx_frame_errors +Date: April 2005 +KernelVersion: 2.6.12 +Contact: netdev@vger.kernel.org +Description: + Indicates the number of received frames with error, such as + alignment errors. Note that the specific meaning depends on + on the MAC layer protocol used. See the network driver for + the exact meaning of this value. + +What: /sys/class//statistics/rx_length_errors +Date: April 2005 +KernelVersion: 2.6.12 +Contact: netdev@vger.kernel.org +Description: + Indicates the number of received error packet with a length + error, oversized or undersized. See the network driver for the + exact meaning of this value. + +What: /sys/class//statistics/rx_missed_errors +Date: April 2005 +KernelVersion: 2.6.12 +Contact: netdev@vger.kernel.org +Description: + Indicates the number of received packets that have been missed + due to lack of capacity in the receive side. See the network + driver for the exact meaning of this value. + +What: /sys/class//statistics/rx_over_errors +Date: April 2005 +KernelVersion: 2.6.12 +Contact: netdev@vger.kernel.org +Description: + Indicates the number of received packets that are oversized + compared to what the network device is configured to accept + (e.g: larger than MTU). See the network driver for the exact + meaning of this value. + +What: /sys/class//statistics/rx_packets +Date: April 2005 +KernelVersion: 2.6.12 +Contact: netdev@vger.kernel.org +Description: + Indicates the total number of good packets received by this + network device. + +What: /sys/class//statistics/tx_aborted_errors +Date: April 2005 +KernelVersion: 2.6.12 +Contact: netdev@vger.kernel.org +Description: + Indicates the number of packets that have been aborted + during transmission by a network device (e.g: because of + a medium collision). See the network driver for the exact + meaning of this value. + +What: /sys/class//statistics/tx_bytes +Date: April 2005 +KernelVersion: 2.6.12 +Contact: netdev@vger.kernel.org +Description: + Indicates the number of bytes transmitted by a network + device. See the network driver for the exact meaning of this + value, in particular whether this accounts for all successfully + transmitted packets or all packets that have been queued for + transmission. + +What: /sys/class//statistics/tx_carrier_errors +Date: April 2005 +KernelVersion: 2.6.12 +Contact: netdev@vger.kernel.org +Description: + Indicates the number of packets that could not be transmitted + because of carrier errors (e.g: physical link down). See the + network driver for the exact meaning of this value. + +What: /sys/class//statistics/tx_compressed +Date: April 2005 +KernelVersion: 2.6.12 +Contact: netdev@vger.kernel.org +Description: + Indicates the number of transmitted compressed packets. Note + this might only be relevant for devices that support + compression (e.g: PPP). + +What: /sys/class//statistics/tx_dropped +Date: April 2005 +KernelVersion: 2.6.12 +Contact: netdev@vger.kernel.org +Description: + Indicates the number of packets dropped during transmission. + See the driver for the exact reasons as to why the packets were + dropped. + +What: /sys/class//statistics/tx_errors +Date: April 2005 +KernelVersion: 2.6.12 +Contact: netdev@vger.kernel.org +Description: + Indicates the number of packets in error during transmission by + a network device. See the driver for the exact reasons as to + why the packets were dropped. + +What: /sys/class//statistics/tx_fifo_errors +Date: April 2005 +KernelVersion: 2.6.12 +Contact: netdev@vger.kernel.org +Description: + Indicates the number of packets having caused a transmit + FIFO error. See the driver for the exact reasons as to why the + packets were dropped. + +What: /sys/class//statistics/tx_heartbeat_errors +Date: April 2005 +KernelVersion: 2.6.12 +Contact: netdev@vger.kernel.org +Description: + Indicates the number of packets transmitted that have been + reported as heartbeat errors. See the driver for the exact + reasons as to why the packets were dropped. + +What: /sys/class//statistics/tx_packets +Date: April 2005 +KernelVersion: 2.6.12 +Contact: netdev@vger.kernel.org +Description: + Indicates the number of packets transmitted by a network + device. See the driver for whether this reports the number of all + attempted or successful transmissions. + +What: /sys/class//statistics/tx_window_errors +Date: April 2005 +KernelVersion: 2.6.12 +Contact: netdev@vger.kernel.org +Description: + Indicates the number of packets not successfully transmitted + due to a window collision. The specific meaning depends on the + MAC layer used. On Ethernet this is usually used to report + late collisions errors. -- cgit v1.2.3 From e649c648469f947b4fa2ad79dd37510cdbafdce7 Mon Sep 17 00:00:00 2001 From: Kedareswara rao Appana Date: Wed, 21 May 2014 17:11:29 +0530 Subject: can: Add xilinx CAN device tree bindings documentation Add xilinx CAN bindings documentation. Signed-off-by: Kedareswara rao Appana Acked-by: Rob Herring Signed-off-by: Marc Kleine-Budde --- .../devicetree/bindings/net/can/xilinx_can.txt | 44 ++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 Documentation/devicetree/bindings/net/can/xilinx_can.txt (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/can/xilinx_can.txt b/Documentation/devicetree/bindings/net/can/xilinx_can.txt new file mode 100644 index 000000000000..fe38847d8e26 --- /dev/null +++ b/Documentation/devicetree/bindings/net/can/xilinx_can.txt @@ -0,0 +1,44 @@ +Xilinx Axi CAN/Zynq CANPS controller Device Tree Bindings +--------------------------------------------------------- + +Required properties: +- compatible : Should be "xlnx,zynq-can-1.0" for Zynq CAN + controllers and "xlnx,axi-can-1.00.a" for Axi CAN + controllers. +- reg : Physical base address and size of the Axi CAN/Zynq + CANPS registers map. +- interrupts : Property with a value describing the interrupt + number. +- interrupt-parent : Must be core interrupt controller +- clock-names : List of input clock names - "can_clk", "pclk" + (For CANPS), "can_clk" , "s_axi_aclk"(For AXI CAN) + (See clock bindings for details). +- clocks : Clock phandles (see clock bindings for details). +- tx-fifo-depth : Can Tx fifo depth. +- rx-fifo-depth : Can Rx fifo depth. + + +Example: + +For Zynq CANPS Dts file: + zynq_can_0: can@e0008000 { + compatible = "xlnx,zynq-can-1.0"; + clocks = <&clkc 19>, <&clkc 36>; + clock-names = "can_clk", "pclk"; + reg = <0xe0008000 0x1000>; + interrupts = <0 28 4>; + interrupt-parent = <&intc>; + tx-fifo-depth = <0x40>; + rx-fifo-depth = <0x40>; + }; +For Axi CAN Dts file: + axi_can_0: axi-can@40000000 { + compatible = "xlnx,axi-can-1.00.a"; + clocks = <&clkc 0>, <&clkc 1>; + clock-names = "can_clk","s_axi_aclk" ; + reg = <0x40000000 0x10000>; + interrupt-parent = <&intc>; + interrupts = <0 59 1>; + tx-fifo-depth = <0x40>; + rx-fifo-depth = <0x40>; + }; -- cgit v1.2.3 From e3cfeefc0e491af00155525131285821e26f7715 Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Fri, 30 May 2014 09:31:10 +0200 Subject: net: cdc_ncm: document the sysfs API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adding documentation for all the driver specific sysfs attributes. Cc: Peter Stuge Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- Documentation/ABI/testing/sysfs-class-net-cdc_ncm | 149 ++++++++++++++++++++++ 1 file changed, 149 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-class-net-cdc_ncm (limited to 'Documentation') diff --git a/Documentation/ABI/testing/sysfs-class-net-cdc_ncm b/Documentation/ABI/testing/sysfs-class-net-cdc_ncm new file mode 100644 index 000000000000..5cedf72df358 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-net-cdc_ncm @@ -0,0 +1,149 @@ +What: /sys/class/net//cdc_ncm/min_tx_pkt +Date: May 2014 +KernelVersion: 3.16 +Contact: Bjørn Mork +Description: + The driver will pad NCM Transfer Blocks (NTBs) longer + than this to tx_max, allowing the device to receive + tx_max sized frames with no terminating short + packet. NTBs shorter than this limit are transmitted + as-is, without any padding, and are terminated with a + short USB packet. + + Padding to tx_max allows the driver to transmit NTBs + back-to-back without any interleaving short USB + packets. This reduces the number of short packet + interrupts in the device, and represents a tradeoff + between USB bus bandwidth and device DMA optimization. + + Set to 0 to pad all frames. Set greater than tx_max to + disable all padding. + +What: /sys/class/net//cdc_ncm/rx_max +Date: May 2014 +KernelVersion: 3.16 +Contact: Bjørn Mork +Description: + The maximum NTB size for RX. Cannot exceed the + maximum value supported by the device. Must allow at + least one max sized datagram plus headers. + + The actual limits are device dependent. See + dwNtbInMaxSize. + + Note: Some devices will silently ignore changes to + this value, resulting in oversized NTBs and + corresponding framing errors. + +What: /sys/class/net//cdc_ncm/tx_max +Date: May 2014 +KernelVersion: 3.16 +Contact: Bjørn Mork +Description: + The maximum NTB size for TX. Cannot exceed the + maximum value supported by the device. Must allow at + least one max sized datagram plus headers. + + The actual limits are device dependent. See + dwNtbOutMaxSize. + +What: /sys/class/net//cdc_ncm/tx_timer_usecs +Date: May 2014 +KernelVersion: 3.16 +Contact: Bjørn Mork +Description: + Datagram aggregation timeout in µs. The driver will + wait up to 3 times this timeout for more datagrams to + aggregate before transmitting an NTB frame. + + Valid range: 5 to 4000000 + + Set to 0 to disable aggregation. + +The following read-only attributes all represent fields of the +structure defined in section 6.2.1 "GetNtbParameters" of "Universal +Serial Bus Communications Class Subclass Specifications for Network +Control Model Devices" (CDC NCM), Revision 1.0 (Errata 1), November +24, 2010 from USB Implementers Forum, Inc. The descriptions are +quoted from table 6-3 of CDC NCM: "NTB Parameter Structure". + +What: /sys/class/net//cdc_ncm/bmNtbFormatsSupported +Date: May 2014 +KernelVersion: 3.16 +Contact: Bjørn Mork +Description: + Bit 0: 16-bit NTB supported (set to 1) + Bit 1: 32-bit NTB supported + Bits 2 – 15: reserved (reset to zero; must be ignored by host) + +What: /sys/class/net//cdc_ncm/dwNtbInMaxSize +Date: May 2014 +KernelVersion: 3.16 +Contact: Bjørn Mork +Description: + IN NTB Maximum Size in bytes + +What: /sys/class/net//cdc_ncm/wNdpInDivisor +Date: May 2014 +KernelVersion: 3.16 +Contact: Bjørn Mork +Description: + Divisor used for IN NTB Datagram payload alignment + +What: /sys/class/net//cdc_ncm/wNdpInPayloadRemainder +Date: May 2014 +KernelVersion: 3.16 +Contact: Bjørn Mork +Description: + Remainder used to align input datagram payload within + the NTB: (Payload Offset) mod (wNdpInDivisor) = + wNdpInPayloadRemainder + +What: /sys/class/net//cdc_ncm/wNdpInAlignment +Date: May 2014 +KernelVersion: 3.16 +Contact: Bjørn Mork +Description: + NDP alignment modulus for NTBs on the IN pipe. Shall + be a power of 2, and shall be at least 4. + +What: /sys/class/net//cdc_ncm/dwNtbOutMaxSize +Date: May 2014 +KernelVersion: 3.16 +Contact: Bjørn Mork +Description: + OUT NTB Maximum Size + +What: /sys/class/net//cdc_ncm/wNdpOutDivisor +Date: May 2014 +KernelVersion: 3.16 +Contact: Bjørn Mork +Description: + OUT NTB Datagram alignment modulus + +What: /sys/class/net//cdc_ncm/wNdpOutPayloadRemainder +Date: May 2014 +KernelVersion: 3.16 +Contact: Bjørn Mork +Description: + Remainder used to align output datagram payload + offsets within the NTB: Padding, shall be transmitted + as zero by function, and ignored by host. (Payload + Offset) mod (wNdpOutDivisor) = wNdpOutPayloadRemainder + +What: /sys/class/net//cdc_ncm/wNdpOutAlignment +Date: May 2014 +KernelVersion: 3.16 +Contact: Bjørn Mork +Description: + NDP alignment modulus for use in NTBs on the OUT + pipe. Shall be a power of 2, and shall be at least 4. + +What: /sys/class/net//cdc_ncm/wNtbOutMaxDatagrams +Date: May 2014 +KernelVersion: 3.16 +Contact: Bjørn Mork +Description: + Maximum number of datagrams that the host may pack + into a single OUT NTB. Zero means that the device + imposes no limit. -- cgit v1.2.3 From f70e9d88a105db9a8cf4dc4d5675d0a54def70c9 Mon Sep 17 00:00:00 2001 From: Zhangfei Gao Date: Tue, 3 Jun 2014 13:49:36 +0800 Subject: Documentation: add Device tree bindings for Hisilicon hix5hd2 ethernet Signed-off-by: Zhangfei Gao Signed-off-by: David S. Miller --- .../bindings/net/hisilicon-hix5hd2-gmac.txt | 36 ++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 Documentation/devicetree/bindings/net/hisilicon-hix5hd2-gmac.txt (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/hisilicon-hix5hd2-gmac.txt b/Documentation/devicetree/bindings/net/hisilicon-hix5hd2-gmac.txt new file mode 100644 index 000000000000..75d398bb1fbb --- /dev/null +++ b/Documentation/devicetree/bindings/net/hisilicon-hix5hd2-gmac.txt @@ -0,0 +1,36 @@ +Hisilicon hix5hd2 gmac controller + +Required properties: +- compatible: should be "hisilicon,hix5hd2-gmac". +- reg: specifies base physical address(s) and size of the device registers. + The first region is the MAC register base and size. + The second region is external interface control register. +- interrupts: should contain the MAC interrupt. +- #address-cells: must be <1>. +- #size-cells: must be <0>. +- phy-mode: see ethernet.txt [1]. +- phy-handle: see ethernet.txt [1]. +- mac-address: see ethernet.txt [1]. +- clocks: clock phandle and specifier pair. + +- PHY subnode: inherits from phy binding [2] + +[1] Documentation/devicetree/bindings/net/ethernet.txt +[2] Documentation/devicetree/bindings/net/phy.txt + +Example: + gmac0: ethernet@f9840000 { + compatible = "hisilicon,hix5hd2-gmac"; + reg = <0xf9840000 0x1000>,<0xf984300c 0x4>; + interrupts = <0 71 4>; + #address-cells = <1>; + #size-cells = <0>; + phy-mode = "mii"; + phy-handle = <&phy2>; + mac-address = [00 00 00 00 00 00]; + clocks = <&clock HIX5HD2_MAC0_CLK>; + + phy2: ethernet-phy@2 { + reg = <2>; + }; + }; -- cgit v1.2.3 From 7c123b6a1c5ce1d7900b4ef7bab5f0491e4f674e Mon Sep 17 00:00:00 2001 From: "Lendacky, Thomas" Date: Thu, 5 Jun 2014 09:15:00 -0500 Subject: amd-xgbe: AMD 10GbE device bindings documentation This patch provides the documentation of the device bindings for the AMD 10GbE platform driver. Signed-off-by: Tom Lendacky Signed-off-by: David S. Miller --- .../devicetree/bindings/net/amd-xgbe-phy.txt | 17 +++++++++++ Documentation/devicetree/bindings/net/amd-xgbe.txt | 34 ++++++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100644 Documentation/devicetree/bindings/net/amd-xgbe-phy.txt create mode 100644 Documentation/devicetree/bindings/net/amd-xgbe.txt (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/amd-xgbe-phy.txt b/Documentation/devicetree/bindings/net/amd-xgbe-phy.txt new file mode 100644 index 000000000000..d01ed63d3ebb --- /dev/null +++ b/Documentation/devicetree/bindings/net/amd-xgbe-phy.txt @@ -0,0 +1,17 @@ +* AMD 10GbE PHY driver (amd-xgbe-phy) + +Required properties: +- compatible: Should be "amd,xgbe-phy-seattle-v1a" and + "ethernet-phy-ieee802.3-c45" +- reg: Address and length of the register sets for the device + - SerDes Rx/Tx registers + - SerDes integration registers (1/2) + - SerDes integration registers (2/2) + +Example: + xgbe_phy@e1240800 { + compatible = "amd,xgbe-phy-seattle-v1a", "ethernet-phy-ieee802.3-c45"; + reg = <0 0xe1240800 0 0x00400>, + <0 0xe1250000 0 0x00060>, + <0 0xe1250080 0 0x00004>; + }; diff --git a/Documentation/devicetree/bindings/net/amd-xgbe.txt b/Documentation/devicetree/bindings/net/amd-xgbe.txt new file mode 100644 index 000000000000..ea0c7908a3b8 --- /dev/null +++ b/Documentation/devicetree/bindings/net/amd-xgbe.txt @@ -0,0 +1,34 @@ +* AMD 10GbE driver (amd-xgbe) + +Required properties: +- compatible: Should be "amd,xgbe-seattle-v1a" +- reg: Address and length of the register sets for the device + - MAC registers + - PCS registers +- interrupt-parent: Should be the phandle for the interrupt controller + that services interrupts for this device +- interrupts: Should contain the amd-xgbe interrupt +- clocks: Should be the DMA clock for the amd-xgbe device (used for + calculating the correct Rx interrupt watchdog timer value on a DMA + channel for coalescing) +- clock-names: Should be the name of the DMA clock, "dma_clk" +- phy-handle: See ethernet.txt file in the same directory +- phy-mode: See ethernet.txt file in the same directory + +Optional properties: +- mac-address: mac address to be assigned to the device. Can be overridden + by UEFI. + +Example: + xgbe@e0700000 { + compatible = "amd,xgbe-seattle-v1a"; + reg = <0 0xe0700000 0 0x80000>, + <0 0xe0780000 0 0x80000>; + interrupt-parent = <&gic>; + interrupts = <0 325 4>; + clocks = <&xgbe_clk>; + clock-names = "dma_clk"; + phy-handle = <&phy>; + phy-mode = "xgmii"; + mac-address = [ 02 a1 a2 a3 a4 a5 ]; + }; -- cgit v1.2.3 From e430f34ee5192c84bcabd3c79ab7e2388b5eec74 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 6 Jun 2014 14:46:06 -0700 Subject: net: filter: cleanup A/X name usage The macro 'A' used in internal BPF interpreter: #define A regs[insn->a_reg] was easily confused with the name of classic BPF register 'A', since 'A' would mean two different things depending on context. This patch is trying to clean up the naming and clarify its usage in the following way: - A and X are names of two classic BPF registers - BPF_REG_A denotes internal BPF register R0 used to map classic register A in internal BPF programs generated from classic - BPF_REG_X denotes internal BPF register R7 used to map classic register X in internal BPF programs generated from classic - internal BPF instruction format: struct sock_filter_int { __u8 code; /* opcode */ __u8 dst_reg:4; /* dest register */ __u8 src_reg:4; /* source register */ __s16 off; /* signed offset */ __s32 imm; /* signed immediate constant */ }; - BPF_X/BPF_K is 1 bit used to encode source operand of instruction In classic: BPF_X - means use register X as source operand BPF_K - means use 32-bit immediate as source operand In internal: BPF_X - means use 'src_reg' register as source operand BPF_K - means use 32-bit immediate as source operand Suggested-by: Chema Gonzalez Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Acked-by: Chema Gonzalez Signed-off-by: David S. Miller --- Documentation/networking/filter.txt | 2 +- arch/x86/net/bpf_jit_comp.c | 260 ++++++++++++++++++------------------ include/linux/filter.h | 156 ++++++++++++---------- net/core/filter.c | 198 +++++++++++++-------------- 4 files changed, 314 insertions(+), 302 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt index 58c443926647..9f49b8690500 100644 --- a/Documentation/networking/filter.txt +++ b/Documentation/networking/filter.txt @@ -805,7 +805,7 @@ to seccomp_data, for converted BPF filters R1 points to a skb. A program, that is translated internally consists of the following elements: - op:16, jt:8, jf:8, k:32 ==> op:8, a_reg:4, x_reg:4, off:16, imm:32 + op:16, jt:8, jf:8, k:32 ==> op:8, dst_reg:4, src_reg:4, off:16, imm:32 So far 87 internal BPF instructions were implemented. 8-bit 'op' opcode field has room for new instructions. Some of them may use 16/24/32 byte encoding. New diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 080f3f071bb0..99bef86ed6df 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -64,10 +64,10 @@ static inline bool is_simm32(s64 value) return value == (s64) (s32) value; } -/* mov A, X */ -#define EMIT_mov(A, X) \ - do {if (A != X) \ - EMIT3(add_2mod(0x48, A, X), 0x89, add_2reg(0xC0, A, X)); \ +/* mov dst, src */ +#define EMIT_mov(DST, SRC) \ + do {if (DST != SRC) \ + EMIT3(add_2mod(0x48, DST, SRC), 0x89, add_2reg(0xC0, DST, SRC)); \ } while (0) static int bpf_size_to_x86_bytes(int bpf_size) @@ -194,16 +194,16 @@ static inline u8 add_2mod(u8 byte, u32 r1, u32 r2) return byte; } -/* encode dest register 'a_reg' into x64 opcode 'byte' */ -static inline u8 add_1reg(u8 byte, u32 a_reg) +/* encode 'dst_reg' register into x64 opcode 'byte' */ +static inline u8 add_1reg(u8 byte, u32 dst_reg) { - return byte + reg2hex[a_reg]; + return byte + reg2hex[dst_reg]; } -/* encode dest 'a_reg' and src 'x_reg' registers into x64 opcode 'byte' */ -static inline u8 add_2reg(u8 byte, u32 a_reg, u32 x_reg) +/* encode 'dst_reg' and 'src_reg' registers into x64 opcode 'byte' */ +static inline u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg) { - return byte + reg2hex[a_reg] + (reg2hex[x_reg] << 3); + return byte + reg2hex[dst_reg] + (reg2hex[src_reg] << 3); } struct jit_context { @@ -286,9 +286,9 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image, } for (i = 0; i < insn_cnt; i++, insn++) { - const s32 K = insn->imm; - u32 a_reg = insn->a_reg; - u32 x_reg = insn->x_reg; + const s32 imm32 = insn->imm; + u32 dst_reg = insn->dst_reg; + u32 src_reg = insn->src_reg; u8 b1 = 0, b2 = 0, b3 = 0; s64 jmp_offset; u8 jmp_cond; @@ -315,32 +315,32 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image, case BPF_XOR: b2 = 0x31; break; } if (BPF_CLASS(insn->code) == BPF_ALU64) - EMIT1(add_2mod(0x48, a_reg, x_reg)); - else if (is_ereg(a_reg) || is_ereg(x_reg)) - EMIT1(add_2mod(0x40, a_reg, x_reg)); - EMIT2(b2, add_2reg(0xC0, a_reg, x_reg)); + EMIT1(add_2mod(0x48, dst_reg, src_reg)); + else if (is_ereg(dst_reg) || is_ereg(src_reg)) + EMIT1(add_2mod(0x40, dst_reg, src_reg)); + EMIT2(b2, add_2reg(0xC0, dst_reg, src_reg)); break; - /* mov A, X */ + /* mov dst, src */ case BPF_ALU64 | BPF_MOV | BPF_X: - EMIT_mov(a_reg, x_reg); + EMIT_mov(dst_reg, src_reg); break; - /* mov32 A, X */ + /* mov32 dst, src */ case BPF_ALU | BPF_MOV | BPF_X: - if (is_ereg(a_reg) || is_ereg(x_reg)) - EMIT1(add_2mod(0x40, a_reg, x_reg)); - EMIT2(0x89, add_2reg(0xC0, a_reg, x_reg)); + if (is_ereg(dst_reg) || is_ereg(src_reg)) + EMIT1(add_2mod(0x40, dst_reg, src_reg)); + EMIT2(0x89, add_2reg(0xC0, dst_reg, src_reg)); break; - /* neg A */ + /* neg dst */ case BPF_ALU | BPF_NEG: case BPF_ALU64 | BPF_NEG: if (BPF_CLASS(insn->code) == BPF_ALU64) - EMIT1(add_1mod(0x48, a_reg)); - else if (is_ereg(a_reg)) - EMIT1(add_1mod(0x40, a_reg)); - EMIT2(0xF7, add_1reg(0xD8, a_reg)); + EMIT1(add_1mod(0x48, dst_reg)); + else if (is_ereg(dst_reg)) + EMIT1(add_1mod(0x40, dst_reg)); + EMIT2(0xF7, add_1reg(0xD8, dst_reg)); break; case BPF_ALU | BPF_ADD | BPF_K: @@ -354,9 +354,9 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image, case BPF_ALU64 | BPF_OR | BPF_K: case BPF_ALU64 | BPF_XOR | BPF_K: if (BPF_CLASS(insn->code) == BPF_ALU64) - EMIT1(add_1mod(0x48, a_reg)); - else if (is_ereg(a_reg)) - EMIT1(add_1mod(0x40, a_reg)); + EMIT1(add_1mod(0x48, dst_reg)); + else if (is_ereg(dst_reg)) + EMIT1(add_1mod(0x40, dst_reg)); switch (BPF_OP(insn->code)) { case BPF_ADD: b3 = 0xC0; break; @@ -366,10 +366,10 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image, case BPF_XOR: b3 = 0xF0; break; } - if (is_imm8(K)) - EMIT3(0x83, add_1reg(b3, a_reg), K); + if (is_imm8(imm32)) + EMIT3(0x83, add_1reg(b3, dst_reg), imm32); else - EMIT2_off32(0x81, add_1reg(b3, a_reg), K); + EMIT2_off32(0x81, add_1reg(b3, dst_reg), imm32); break; case BPF_ALU64 | BPF_MOV | BPF_K: @@ -377,23 +377,23 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image, * use 'mov eax, imm32' (which zero-extends imm32) * to save 2 bytes */ - if (K < 0) { + if (imm32 < 0) { /* 'mov rax, imm32' sign extends imm32 */ - b1 = add_1mod(0x48, a_reg); + b1 = add_1mod(0x48, dst_reg); b2 = 0xC7; b3 = 0xC0; - EMIT3_off32(b1, b2, add_1reg(b3, a_reg), K); + EMIT3_off32(b1, b2, add_1reg(b3, dst_reg), imm32); break; } case BPF_ALU | BPF_MOV | BPF_K: /* mov %eax, imm32 */ - if (is_ereg(a_reg)) - EMIT1(add_1mod(0x40, a_reg)); - EMIT1_off32(add_1reg(0xB8, a_reg), K); + if (is_ereg(dst_reg)) + EMIT1(add_1mod(0x40, dst_reg)); + EMIT1_off32(add_1reg(0xB8, dst_reg), imm32); break; - /* A %= X, A /= X, A %= K, A /= K */ + /* dst %= src, dst /= src, dst %= imm32, dst /= imm32 */ case BPF_ALU | BPF_MOD | BPF_X: case BPF_ALU | BPF_DIV | BPF_X: case BPF_ALU | BPF_MOD | BPF_K: @@ -406,14 +406,14 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image, EMIT1(0x52); /* push rdx */ if (BPF_SRC(insn->code) == BPF_X) - /* mov r11, X */ - EMIT_mov(AUX_REG, x_reg); + /* mov r11, src_reg */ + EMIT_mov(AUX_REG, src_reg); else - /* mov r11, K */ - EMIT3_off32(0x49, 0xC7, 0xC3, K); + /* mov r11, imm32 */ + EMIT3_off32(0x49, 0xC7, 0xC3, imm32); - /* mov rax, A */ - EMIT_mov(BPF_REG_0, a_reg); + /* mov rax, dst_reg */ + EMIT_mov(BPF_REG_0, dst_reg); /* xor edx, edx * equivalent to 'xor rdx, rdx', but one byte less @@ -421,7 +421,7 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image, EMIT2(0x31, 0xd2); if (BPF_SRC(insn->code) == BPF_X) { - /* if (X == 0) return 0 */ + /* if (src_reg == 0) return 0 */ /* cmp r11, 0 */ EMIT4(0x49, 0x83, 0xFB, 0x00); @@ -457,8 +457,8 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image, EMIT1(0x5A); /* pop rdx */ EMIT1(0x58); /* pop rax */ - /* mov A, r11 */ - EMIT_mov(a_reg, AUX_REG); + /* mov dst_reg, r11 */ + EMIT_mov(dst_reg, AUX_REG); break; case BPF_ALU | BPF_MUL | BPF_K: @@ -468,15 +468,15 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image, EMIT1(0x50); /* push rax */ EMIT1(0x52); /* push rdx */ - /* mov r11, A */ - EMIT_mov(AUX_REG, a_reg); + /* mov r11, dst_reg */ + EMIT_mov(AUX_REG, dst_reg); if (BPF_SRC(insn->code) == BPF_X) - /* mov rax, X */ - EMIT_mov(BPF_REG_0, x_reg); + /* mov rax, src_reg */ + EMIT_mov(BPF_REG_0, src_reg); else - /* mov rax, K */ - EMIT3_off32(0x48, 0xC7, 0xC0, K); + /* mov rax, imm32 */ + EMIT3_off32(0x48, 0xC7, 0xC0, imm32); if (BPF_CLASS(insn->code) == BPF_ALU64) EMIT1(add_1mod(0x48, AUX_REG)); @@ -491,8 +491,8 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image, EMIT1(0x5A); /* pop rdx */ EMIT1(0x58); /* pop rax */ - /* mov A, r11 */ - EMIT_mov(a_reg, AUX_REG); + /* mov dst_reg, r11 */ + EMIT_mov(dst_reg, AUX_REG); break; /* shifts */ @@ -503,39 +503,39 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image, case BPF_ALU64 | BPF_RSH | BPF_K: case BPF_ALU64 | BPF_ARSH | BPF_K: if (BPF_CLASS(insn->code) == BPF_ALU64) - EMIT1(add_1mod(0x48, a_reg)); - else if (is_ereg(a_reg)) - EMIT1(add_1mod(0x40, a_reg)); + EMIT1(add_1mod(0x48, dst_reg)); + else if (is_ereg(dst_reg)) + EMIT1(add_1mod(0x40, dst_reg)); switch (BPF_OP(insn->code)) { case BPF_LSH: b3 = 0xE0; break; case BPF_RSH: b3 = 0xE8; break; case BPF_ARSH: b3 = 0xF8; break; } - EMIT3(0xC1, add_1reg(b3, a_reg), K); + EMIT3(0xC1, add_1reg(b3, dst_reg), imm32); break; case BPF_ALU | BPF_END | BPF_FROM_BE: - switch (K) { + switch (imm32) { case 16: /* emit 'ror %ax, 8' to swap lower 2 bytes */ EMIT1(0x66); - if (is_ereg(a_reg)) + if (is_ereg(dst_reg)) EMIT1(0x41); - EMIT3(0xC1, add_1reg(0xC8, a_reg), 8); + EMIT3(0xC1, add_1reg(0xC8, dst_reg), 8); break; case 32: /* emit 'bswap eax' to swap lower 4 bytes */ - if (is_ereg(a_reg)) + if (is_ereg(dst_reg)) EMIT2(0x41, 0x0F); else EMIT1(0x0F); - EMIT1(add_1reg(0xC8, a_reg)); + EMIT1(add_1reg(0xC8, dst_reg)); break; case 64: /* emit 'bswap rax' to swap 8 bytes */ - EMIT3(add_1mod(0x48, a_reg), 0x0F, - add_1reg(0xC8, a_reg)); + EMIT3(add_1mod(0x48, dst_reg), 0x0F, + add_1reg(0xC8, dst_reg)); break; } break; @@ -543,117 +543,117 @@ static int do_jit(struct sk_filter *bpf_prog, int *addrs, u8 *image, case BPF_ALU | BPF_END | BPF_FROM_LE: break; - /* ST: *(u8*)(a_reg + off) = imm */ + /* ST: *(u8*)(dst_reg + off) = imm */ case BPF_ST | BPF_MEM | BPF_B: - if (is_ereg(a_reg)) + if (is_ereg(dst_reg)) EMIT2(0x41, 0xC6); else EMIT1(0xC6); goto st; case BPF_ST | BPF_MEM | BPF_H: - if (is_ereg(a_reg)) + if (is_ereg(dst_reg)) EMIT3(0x66, 0x41, 0xC7); else EMIT2(0x66, 0xC7); goto st; case BPF_ST | BPF_MEM | BPF_W: - if (is_ereg(a_reg)) + if (is_ereg(dst_reg)) EMIT2(0x41, 0xC7); else EMIT1(0xC7); goto st; case BPF_ST | BPF_MEM | BPF_DW: - EMIT2(add_1mod(0x48, a_reg), 0xC7); + EMIT2(add_1mod(0x48, dst_reg), 0xC7); st: if (is_imm8(insn->off)) - EMIT2(add_1reg(0x40, a_reg), insn->off); + EMIT2(add_1reg(0x40, dst_reg), insn->off); else - EMIT1_off32(add_1reg(0x80, a_reg), insn->off); + EMIT1_off32(add_1reg(0x80, dst_reg), insn->off); - EMIT(K, bpf_size_to_x86_bytes(BPF_SIZE(insn->code))); + EMIT(imm32, bpf_size_to_x86_bytes(BPF_SIZE(insn->code))); break; - /* STX: *(u8*)(a_reg + off) = x_reg */ + /* STX: *(u8*)(dst_reg + off) = src_reg */ case BPF_STX | BPF_MEM | BPF_B: /* emit 'mov byte ptr [rax + off], al' */ - if (is_ereg(a_reg) || is_ereg(x_reg) || + if (is_ereg(dst_reg) || is_ereg(src_reg) || /* have to add extra byte for x86 SIL, DIL regs */ - x_reg == BPF_REG_1 || x_reg == BPF_REG_2) - EMIT2(add_2mod(0x40, a_reg, x_reg), 0x88); + src_reg == BPF_REG_1 || src_reg == BPF_REG_2) + EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88); else EMIT1(0x88); goto stx; case BPF_STX | BPF_MEM | BPF_H: - if (is_ereg(a_reg) || is_ereg(x_reg)) - EMIT3(0x66, add_2mod(0x40, a_reg, x_reg), 0x89); + if (is_ereg(dst_reg) || is_ereg(src_reg)) + EMIT3(0x66, add_2mod(0x40, dst_reg, src_reg), 0x89); else EMIT2(0x66, 0x89); goto stx; case BPF_STX | BPF_MEM | BPF_W: - if (is_ereg(a_reg) || is_ereg(x_reg)) - EMIT2(add_2mod(0x40, a_reg, x_reg), 0x89); + if (is_ereg(dst_reg) || is_ereg(src_reg)) + EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x89); else EMIT1(0x89); goto stx; case BPF_STX | BPF_MEM | BPF_DW: - EMIT2(add_2mod(0x48, a_reg, x_reg), 0x89); + EMIT2(add_2mod(0x48, dst_reg, src_reg), 0x89); stx: if (is_imm8(insn->off)) - EMIT2(add_2reg(0x40, a_reg, x_reg), insn->off); + EMIT2(add_2reg(0x40, dst_reg, src_reg), insn->off); else - EMIT1_off32(add_2reg(0x80, a_reg, x_reg), + EMIT1_off32(add_2reg(0x80, dst_reg, src_reg), insn->off); break; - /* LDX: a_reg = *(u8*)(x_reg + off) */ + /* LDX: dst_reg = *(u8*)(src_reg + off) */ case BPF_LDX | BPF_MEM | BPF_B: /* emit 'movzx rax, byte ptr [rax + off]' */ - EMIT3(add_2mod(0x48, x_reg, a_reg), 0x0F, 0xB6); + EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6); goto ldx; case BPF_LDX | BPF_MEM | BPF_H: /* emit 'movzx rax, word ptr [rax + off]' */ - EMIT3(add_2mod(0x48, x_reg, a_reg), 0x0F, 0xB7); + EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7); goto ldx; case BPF_LDX | BPF_MEM | BPF_W: /* emit 'mov eax, dword ptr [rax+0x14]' */ - if (is_ereg(a_reg) || is_ereg(x_reg)) - EMIT2(add_2mod(0x40, x_reg, a_reg), 0x8B); + if (is_ereg(dst_reg) || is_ereg(src_reg)) + EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B); else EMIT1(0x8B); goto ldx; case BPF_LDX | BPF_MEM | BPF_DW: /* emit 'mov rax, qword ptr [rax+0x14]' */ - EMIT2(add_2mod(0x48, x_reg, a_reg), 0x8B); + EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B); ldx: /* if insn->off == 0 we can save one extra byte, but * special case of x86 r13 which always needs an offset * is not worth the hassle */ if (is_imm8(insn->off)) - EMIT2(add_2reg(0x40, x_reg, a_reg), insn->off); + EMIT2(add_2reg(0x40, src_reg, dst_reg), insn->off); else - EMIT1_off32(add_2reg(0x80, x_reg, a_reg), + EMIT1_off32(add_2reg(0x80, src_reg, dst_reg), insn->off); break; - /* STX XADD: lock *(u32*)(a_reg + off) += x_reg */ + /* STX XADD: lock *(u32*)(dst_reg + off) += src_reg */ case BPF_STX | BPF_XADD | BPF_W: /* emit 'lock add dword ptr [rax + off], eax' */ - if (is_ereg(a_reg) || is_ereg(x_reg)) - EMIT3(0xF0, add_2mod(0x40, a_reg, x_reg), 0x01); + if (is_ereg(dst_reg) || is_ereg(src_reg)) + EMIT3(0xF0, add_2mod(0x40, dst_reg, src_reg), 0x01); else EMIT2(0xF0, 0x01); goto xadd; case BPF_STX | BPF_XADD | BPF_DW: - EMIT3(0xF0, add_2mod(0x48, a_reg, x_reg), 0x01); + EMIT3(0xF0, add_2mod(0x48, dst_reg, src_reg), 0x01); xadd: if (is_imm8(insn->off)) - EMIT2(add_2reg(0x40, a_reg, x_reg), insn->off); + EMIT2(add_2reg(0x40, dst_reg, src_reg), insn->off); else - EMIT1_off32(add_2reg(0x80, a_reg, x_reg), + EMIT1_off32(add_2reg(0x80, dst_reg, src_reg), insn->off); break; /* call */ case BPF_JMP | BPF_CALL: - func = (u8 *) __bpf_call_base + K; + func = (u8 *) __bpf_call_base + imm32; jmp_offset = func - (image + addrs[i]); if (ctx->seen_ld_abs) { EMIT2(0x41, 0x52); /* push %r10 */ @@ -663,9 +663,9 @@ xadd: if (is_imm8(insn->off)) */ jmp_offset += 4; } - if (!K || !is_simm32(jmp_offset)) { + if (!imm32 || !is_simm32(jmp_offset)) { pr_err("unsupported bpf func %d addr %p image %p\n", - K, func, image); + imm32, func, image); return -EINVAL; } EMIT1_off32(0xE8, jmp_offset); @@ -682,21 +682,21 @@ xadd: if (is_imm8(insn->off)) case BPF_JMP | BPF_JGE | BPF_X: case BPF_JMP | BPF_JSGT | BPF_X: case BPF_JMP | BPF_JSGE | BPF_X: - /* cmp a_reg, x_reg */ - EMIT3(add_2mod(0x48, a_reg, x_reg), 0x39, - add_2reg(0xC0, a_reg, x_reg)); + /* cmp dst_reg, src_reg */ + EMIT3(add_2mod(0x48, dst_reg, src_reg), 0x39, + add_2reg(0xC0, dst_reg, src_reg)); goto emit_cond_jmp; case BPF_JMP | BPF_JSET | BPF_X: - /* test a_reg, x_reg */ - EMIT3(add_2mod(0x48, a_reg, x_reg), 0x85, - add_2reg(0xC0, a_reg, x_reg)); + /* test dst_reg, src_reg */ + EMIT3(add_2mod(0x48, dst_reg, src_reg), 0x85, + add_2reg(0xC0, dst_reg, src_reg)); goto emit_cond_jmp; case BPF_JMP | BPF_JSET | BPF_K: - /* test a_reg, imm32 */ - EMIT1(add_1mod(0x48, a_reg)); - EMIT2_off32(0xF7, add_1reg(0xC0, a_reg), K); + /* test dst_reg, imm32 */ + EMIT1(add_1mod(0x48, dst_reg)); + EMIT2_off32(0xF7, add_1reg(0xC0, dst_reg), imm32); goto emit_cond_jmp; case BPF_JMP | BPF_JEQ | BPF_K: @@ -705,13 +705,13 @@ xadd: if (is_imm8(insn->off)) case BPF_JMP | BPF_JGE | BPF_K: case BPF_JMP | BPF_JSGT | BPF_K: case BPF_JMP | BPF_JSGE | BPF_K: - /* cmp a_reg, imm8/32 */ - EMIT1(add_1mod(0x48, a_reg)); + /* cmp dst_reg, imm8/32 */ + EMIT1(add_1mod(0x48, dst_reg)); - if (is_imm8(K)) - EMIT3(0x83, add_1reg(0xF8, a_reg), K); + if (is_imm8(imm32)) + EMIT3(0x83, add_1reg(0xF8, dst_reg), imm32); else - EMIT2_off32(0x81, add_1reg(0xF8, a_reg), K); + EMIT2_off32(0x81, add_1reg(0xF8, dst_reg), imm32); emit_cond_jmp: /* convert BPF opcode to x86 */ switch (BPF_OP(insn->code)) { @@ -773,27 +773,27 @@ emit_jmp: func = sk_load_word; goto common_load; case BPF_LD | BPF_ABS | BPF_W: - func = CHOOSE_LOAD_FUNC(K, sk_load_word); + func = CHOOSE_LOAD_FUNC(imm32, sk_load_word); common_load: ctx->seen_ld_abs = true; jmp_offset = func - (image + addrs[i]); if (!func || !is_simm32(jmp_offset)) { pr_err("unsupported bpf func %d addr %p image %p\n", - K, func, image); + imm32, func, image); return -EINVAL; } if (BPF_MODE(insn->code) == BPF_ABS) { /* mov %esi, imm32 */ - EMIT1_off32(0xBE, K); + EMIT1_off32(0xBE, imm32); } else { - /* mov %rsi, x_reg */ - EMIT_mov(BPF_REG_2, x_reg); - if (K) { - if (is_imm8(K)) + /* mov %rsi, src_reg */ + EMIT_mov(BPF_REG_2, src_reg); + if (imm32) { + if (is_imm8(imm32)) /* add %esi, imm8 */ - EMIT3(0x83, 0xC6, K); + EMIT3(0x83, 0xC6, imm32); else /* add %esi, imm32 */ - EMIT2_off32(0x81, 0xC6, K); + EMIT2_off32(0x81, 0xC6, imm32); } } /* skb pointer is in R6 (%rbx), it will be copied into @@ -808,13 +808,13 @@ common_load: ctx->seen_ld_abs = true; func = sk_load_half; goto common_load; case BPF_LD | BPF_ABS | BPF_H: - func = CHOOSE_LOAD_FUNC(K, sk_load_half); + func = CHOOSE_LOAD_FUNC(imm32, sk_load_half); goto common_load; case BPF_LD | BPF_IND | BPF_B: func = sk_load_byte; goto common_load; case BPF_LD | BPF_ABS | BPF_B: - func = CHOOSE_LOAD_FUNC(K, sk_load_byte); + func = CHOOSE_LOAD_FUNC(imm32, sk_load_byte); goto common_load; case BPF_JMP | BPF_EXIT: diff --git a/include/linux/filter.h b/include/linux/filter.h index f0c2ad43b4af..a7e3c48d73a7 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -78,161 +78,173 @@ enum { /* Helper macros for filter block array initializers. */ -/* ALU ops on registers, bpf_add|sub|...: A += X */ +/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */ -#define BPF_ALU64_REG(OP, A, X) \ +#define BPF_ALU64_REG(OP, DST, SRC) \ ((struct sock_filter_int) { \ .code = BPF_ALU64 | BPF_OP(OP) | BPF_X, \ - .a_reg = A, \ - .x_reg = X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ .off = 0, \ .imm = 0 }) -#define BPF_ALU32_REG(OP, A, X) \ +#define BPF_ALU32_REG(OP, DST, SRC) \ ((struct sock_filter_int) { \ .code = BPF_ALU | BPF_OP(OP) | BPF_X, \ - .a_reg = A, \ - .x_reg = X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ .off = 0, \ .imm = 0 }) -/* ALU ops on immediates, bpf_add|sub|...: A += IMM */ +/* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */ -#define BPF_ALU64_IMM(OP, A, IMM) \ +#define BPF_ALU64_IMM(OP, DST, IMM) \ ((struct sock_filter_int) { \ .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \ - .a_reg = A, \ - .x_reg = 0, \ + .dst_reg = DST, \ + .src_reg = 0, \ .off = 0, \ .imm = IMM }) -#define BPF_ALU32_IMM(OP, A, IMM) \ +#define BPF_ALU32_IMM(OP, DST, IMM) \ ((struct sock_filter_int) { \ .code = BPF_ALU | BPF_OP(OP) | BPF_K, \ - .a_reg = A, \ - .x_reg = 0, \ + .dst_reg = DST, \ + .src_reg = 0, \ .off = 0, \ .imm = IMM }) /* Endianess conversion, cpu_to_{l,b}e(), {l,b}e_to_cpu() */ -#define BPF_ENDIAN(TYPE, A, LEN) \ +#define BPF_ENDIAN(TYPE, DST, LEN) \ ((struct sock_filter_int) { \ .code = BPF_ALU | BPF_END | BPF_SRC(TYPE), \ - .a_reg = A, \ - .x_reg = 0, \ + .dst_reg = DST, \ + .src_reg = 0, \ .off = 0, \ .imm = LEN }) -/* Short form of mov, A = X */ +/* Short form of mov, dst_reg = src_reg */ -#define BPF_MOV64_REG(A, X) \ +#define BPF_MOV64_REG(DST, SRC) \ ((struct sock_filter_int) { \ .code = BPF_ALU64 | BPF_MOV | BPF_X, \ - .a_reg = A, \ - .x_reg = X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ .off = 0, \ .imm = 0 }) -#define BPF_MOV32_REG(A, X) \ +#define BPF_MOV32_REG(DST, SRC) \ ((struct sock_filter_int) { \ .code = BPF_ALU | BPF_MOV | BPF_X, \ - .a_reg = A, \ - .x_reg = X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ .off = 0, \ .imm = 0 }) -/* Short form of mov, A = IMM */ +/* Short form of mov, dst_reg = imm32 */ -#define BPF_MOV64_IMM(A, IMM) \ +#define BPF_MOV64_IMM(DST, IMM) \ ((struct sock_filter_int) { \ .code = BPF_ALU64 | BPF_MOV | BPF_K, \ - .a_reg = A, \ - .x_reg = 0, \ + .dst_reg = DST, \ + .src_reg = 0, \ .off = 0, \ .imm = IMM }) -#define BPF_MOV32_IMM(A, IMM) \ +#define BPF_MOV32_IMM(DST, IMM) \ ((struct sock_filter_int) { \ .code = BPF_ALU | BPF_MOV | BPF_K, \ - .a_reg = A, \ - .x_reg = 0, \ + .dst_reg = DST, \ + .src_reg = 0, \ .off = 0, \ .imm = IMM }) -/* Short form of mov based on type, BPF_X: A = X, BPF_K: A = IMM */ +/* Short form of mov based on type, BPF_X: dst_reg = src_reg, BPF_K: dst_reg = imm32 */ -#define BPF_MOV64_RAW(TYPE, A, X, IMM) \ +#define BPF_MOV64_RAW(TYPE, DST, SRC, IMM) \ ((struct sock_filter_int) { \ .code = BPF_ALU64 | BPF_MOV | BPF_SRC(TYPE), \ - .a_reg = A, \ - .x_reg = X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ .off = 0, \ .imm = IMM }) -#define BPF_MOV32_RAW(TYPE, A, X, IMM) \ +#define BPF_MOV32_RAW(TYPE, DST, SRC, IMM) \ ((struct sock_filter_int) { \ .code = BPF_ALU | BPF_MOV | BPF_SRC(TYPE), \ - .a_reg = A, \ - .x_reg = X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ .off = 0, \ .imm = IMM }) -/* Direct packet access, R0 = *(uint *) (skb->data + OFF) */ +/* Direct packet access, R0 = *(uint *) (skb->data + imm32) */ -#define BPF_LD_ABS(SIZE, OFF) \ +#define BPF_LD_ABS(SIZE, IMM) \ ((struct sock_filter_int) { \ .code = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, \ - .a_reg = 0, \ - .x_reg = 0, \ + .dst_reg = 0, \ + .src_reg = 0, \ .off = 0, \ - .imm = OFF }) + .imm = IMM }) -/* Indirect packet access, R0 = *(uint *) (skb->data + X + OFF) */ +/* Indirect packet access, R0 = *(uint *) (skb->data + src_reg + imm32) */ -#define BPF_LD_IND(SIZE, X, OFF) \ +#define BPF_LD_IND(SIZE, SRC, IMM) \ ((struct sock_filter_int) { \ .code = BPF_LD | BPF_SIZE(SIZE) | BPF_IND, \ - .a_reg = 0, \ - .x_reg = X, \ + .dst_reg = 0, \ + .src_reg = SRC, \ .off = 0, \ - .imm = OFF }) + .imm = IMM }) -/* Memory store, A = *(uint *) (X + OFF), and vice versa */ +/* Memory load, dst_reg = *(uint *) (src_reg + off16) */ -#define BPF_LDX_MEM(SIZE, A, X, OFF) \ +#define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \ ((struct sock_filter_int) { \ .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \ - .a_reg = A, \ - .x_reg = X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ .off = OFF, \ .imm = 0 }) -#define BPF_STX_MEM(SIZE, A, X, OFF) \ +/* Memory store, *(uint *) (dst_reg + off16) = src_reg */ + +#define BPF_STX_MEM(SIZE, DST, SRC, OFF) \ ((struct sock_filter_int) { \ .code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \ - .a_reg = A, \ - .x_reg = X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ .off = OFF, \ .imm = 0 }) -/* Conditional jumps against registers, if (A 'op' X) goto pc + OFF */ +/* Memory store, *(uint *) (dst_reg + off16) = imm32 */ + +#define BPF_ST_MEM(SIZE, DST, OFF, IMM) \ + ((struct sock_filter_int) { \ + .code = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = IMM }) + +/* Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + off16 */ -#define BPF_JMP_REG(OP, A, X, OFF) \ +#define BPF_JMP_REG(OP, DST, SRC, OFF) \ ((struct sock_filter_int) { \ .code = BPF_JMP | BPF_OP(OP) | BPF_X, \ - .a_reg = A, \ - .x_reg = X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ .off = OFF, \ .imm = 0 }) -/* Conditional jumps against immediates, if (A 'op' IMM) goto pc + OFF */ +/* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */ -#define BPF_JMP_IMM(OP, A, IMM, OFF) \ +#define BPF_JMP_IMM(OP, DST, IMM, OFF) \ ((struct sock_filter_int) { \ .code = BPF_JMP | BPF_OP(OP) | BPF_K, \ - .a_reg = A, \ - .x_reg = 0, \ + .dst_reg = DST, \ + .src_reg = 0, \ .off = OFF, \ .imm = IMM }) @@ -241,18 +253,18 @@ enum { #define BPF_EMIT_CALL(FUNC) \ ((struct sock_filter_int) { \ .code = BPF_JMP | BPF_CALL, \ - .a_reg = 0, \ - .x_reg = 0, \ + .dst_reg = 0, \ + .src_reg = 0, \ .off = 0, \ .imm = ((FUNC) - __bpf_call_base) }) /* Raw code statement block */ -#define BPF_RAW_INSN(CODE, A, X, OFF, IMM) \ +#define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM) \ ((struct sock_filter_int) { \ .code = CODE, \ - .a_reg = A, \ - .x_reg = X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ .off = OFF, \ .imm = IMM }) @@ -261,8 +273,8 @@ enum { #define BPF_EXIT_INSN() \ ((struct sock_filter_int) { \ .code = BPF_JMP | BPF_EXIT, \ - .a_reg = 0, \ - .x_reg = 0, \ + .dst_reg = 0, \ + .src_reg = 0, \ .off = 0, \ .imm = 0 }) @@ -287,8 +299,8 @@ enum { struct sock_filter_int { __u8 code; /* opcode */ - __u8 a_reg:4; /* dest register */ - __u8 x_reg:4; /* source register */ + __u8 dst_reg:4; /* dest register */ + __u8 src_reg:4; /* source register */ __s16 off; /* signed offset */ __s32 imm; /* signed immediate constant */ }; diff --git a/net/core/filter.c b/net/core/filter.c index 6bd2e350e751..b3f21751b238 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -59,12 +59,12 @@ #define BPF_R10 regs[BPF_REG_10] /* Named registers */ -#define A regs[insn->a_reg] -#define X regs[insn->x_reg] +#define DST regs[insn->dst_reg] +#define SRC regs[insn->src_reg] #define FP regs[BPF_REG_FP] #define ARG1 regs[BPF_REG_ARG1] #define CTX regs[BPF_REG_CTX] -#define K insn->imm +#define IMM insn->imm /* No hurry in this branch * @@ -264,7 +264,7 @@ static unsigned int __sk_run_filter(void *ctx, const struct sock_filter_int *ins FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; ARG1 = (u64) (unsigned long) ctx; - /* Register for user BPF programs need to be reset first. */ + /* Registers used in classic BPF programs need to be reset first. */ regs[BPF_REG_A] = 0; regs[BPF_REG_X] = 0; @@ -274,16 +274,16 @@ select_insn: /* ALU */ #define ALU(OPCODE, OP) \ ALU64_##OPCODE##_X: \ - A = A OP X; \ + DST = DST OP SRC; \ CONT; \ ALU_##OPCODE##_X: \ - A = (u32) A OP (u32) X; \ + DST = (u32) DST OP (u32) SRC; \ CONT; \ ALU64_##OPCODE##_K: \ - A = A OP K; \ + DST = DST OP IMM; \ CONT; \ ALU_##OPCODE##_K: \ - A = (u32) A OP (u32) K; \ + DST = (u32) DST OP (u32) IMM; \ CONT; ALU(ADD, +) @@ -296,92 +296,92 @@ select_insn: ALU(MUL, *) #undef ALU ALU_NEG: - A = (u32) -A; + DST = (u32) -DST; CONT; ALU64_NEG: - A = -A; + DST = -DST; CONT; ALU_MOV_X: - A = (u32) X; + DST = (u32) SRC; CONT; ALU_MOV_K: - A = (u32) K; + DST = (u32) IMM; CONT; ALU64_MOV_X: - A = X; + DST = SRC; CONT; ALU64_MOV_K: - A = K; + DST = IMM; CONT; ALU64_ARSH_X: - (*(s64 *) &A) >>= X; + (*(s64 *) &DST) >>= SRC; CONT; ALU64_ARSH_K: - (*(s64 *) &A) >>= K; + (*(s64 *) &DST) >>= IMM; CONT; ALU64_MOD_X: - if (unlikely(X == 0)) + if (unlikely(SRC == 0)) return 0; - tmp = A; - A = do_div(tmp, X); + tmp = DST; + DST = do_div(tmp, SRC); CONT; ALU_MOD_X: - if (unlikely(X == 0)) + if (unlikely(SRC == 0)) return 0; - tmp = (u32) A; - A = do_div(tmp, (u32) X); + tmp = (u32) DST; + DST = do_div(tmp, (u32) SRC); CONT; ALU64_MOD_K: - tmp = A; - A = do_div(tmp, K); + tmp = DST; + DST = do_div(tmp, IMM); CONT; ALU_MOD_K: - tmp = (u32) A; - A = do_div(tmp, (u32) K); + tmp = (u32) DST; + DST = do_div(tmp, (u32) IMM); CONT; ALU64_DIV_X: - if (unlikely(X == 0)) + if (unlikely(SRC == 0)) return 0; - do_div(A, X); + do_div(DST, SRC); CONT; ALU_DIV_X: - if (unlikely(X == 0)) + if (unlikely(SRC == 0)) return 0; - tmp = (u32) A; - do_div(tmp, (u32) X); - A = (u32) tmp; + tmp = (u32) DST; + do_div(tmp, (u32) SRC); + DST = (u32) tmp; CONT; ALU64_DIV_K: - do_div(A, K); + do_div(DST, IMM); CONT; ALU_DIV_K: - tmp = (u32) A; - do_div(tmp, (u32) K); - A = (u32) tmp; + tmp = (u32) DST; + do_div(tmp, (u32) IMM); + DST = (u32) tmp; CONT; ALU_END_TO_BE: - switch (K) { + switch (IMM) { case 16: - A = (__force u16) cpu_to_be16(A); + DST = (__force u16) cpu_to_be16(DST); break; case 32: - A = (__force u32) cpu_to_be32(A); + DST = (__force u32) cpu_to_be32(DST); break; case 64: - A = (__force u64) cpu_to_be64(A); + DST = (__force u64) cpu_to_be64(DST); break; } CONT; ALU_END_TO_LE: - switch (K) { + switch (IMM) { case 16: - A = (__force u16) cpu_to_le16(A); + DST = (__force u16) cpu_to_le16(DST); break; case 32: - A = (__force u32) cpu_to_le32(A); + DST = (__force u32) cpu_to_le32(DST); break; case 64: - A = (__force u64) cpu_to_le64(A); + DST = (__force u64) cpu_to_le64(DST); break; } CONT; @@ -401,85 +401,85 @@ select_insn: insn += insn->off; CONT; JMP_JEQ_X: - if (A == X) { + if (DST == SRC) { insn += insn->off; CONT_JMP; } CONT; JMP_JEQ_K: - if (A == K) { + if (DST == IMM) { insn += insn->off; CONT_JMP; } CONT; JMP_JNE_X: - if (A != X) { + if (DST != SRC) { insn += insn->off; CONT_JMP; } CONT; JMP_JNE_K: - if (A != K) { + if (DST != IMM) { insn += insn->off; CONT_JMP; } CONT; JMP_JGT_X: - if (A > X) { + if (DST > SRC) { insn += insn->off; CONT_JMP; } CONT; JMP_JGT_K: - if (A > K) { + if (DST > IMM) { insn += insn->off; CONT_JMP; } CONT; JMP_JGE_X: - if (A >= X) { + if (DST >= SRC) { insn += insn->off; CONT_JMP; } CONT; JMP_JGE_K: - if (A >= K) { + if (DST >= IMM) { insn += insn->off; CONT_JMP; } CONT; JMP_JSGT_X: - if (((s64) A) > ((s64) X)) { + if (((s64) DST) > ((s64) SRC)) { insn += insn->off; CONT_JMP; } CONT; JMP_JSGT_K: - if (((s64) A) > ((s64) K)) { + if (((s64) DST) > ((s64) IMM)) { insn += insn->off; CONT_JMP; } CONT; JMP_JSGE_X: - if (((s64) A) >= ((s64) X)) { + if (((s64) DST) >= ((s64) SRC)) { insn += insn->off; CONT_JMP; } CONT; JMP_JSGE_K: - if (((s64) A) >= ((s64) K)) { + if (((s64) DST) >= ((s64) IMM)) { insn += insn->off; CONT_JMP; } CONT; JMP_JSET_X: - if (A & X) { + if (DST & SRC) { insn += insn->off; CONT_JMP; } CONT; JMP_JSET_K: - if (A & K) { + if (DST & IMM) { insn += insn->off; CONT_JMP; } @@ -488,15 +488,15 @@ select_insn: return BPF_R0; /* STX and ST and LDX*/ -#define LDST(SIZEOP, SIZE) \ - STX_MEM_##SIZEOP: \ - *(SIZE *)(unsigned long) (A + insn->off) = X; \ - CONT; \ - ST_MEM_##SIZEOP: \ - *(SIZE *)(unsigned long) (A + insn->off) = K; \ - CONT; \ - LDX_MEM_##SIZEOP: \ - A = *(SIZE *)(unsigned long) (X + insn->off); \ +#define LDST(SIZEOP, SIZE) \ + STX_MEM_##SIZEOP: \ + *(SIZE *)(unsigned long) (DST + insn->off) = SRC; \ + CONT; \ + ST_MEM_##SIZEOP: \ + *(SIZE *)(unsigned long) (DST + insn->off) = IMM; \ + CONT; \ + LDX_MEM_##SIZEOP: \ + DST = *(SIZE *)(unsigned long) (SRC + insn->off); \ CONT; LDST(B, u8) @@ -504,16 +504,16 @@ select_insn: LDST(W, u32) LDST(DW, u64) #undef LDST - STX_XADD_W: /* lock xadd *(u32 *)(A + insn->off) += X */ - atomic_add((u32) X, (atomic_t *)(unsigned long) - (A + insn->off)); + STX_XADD_W: /* lock xadd *(u32 *)(dst_reg + off16) += src_reg */ + atomic_add((u32) SRC, (atomic_t *)(unsigned long) + (DST + insn->off)); CONT; - STX_XADD_DW: /* lock xadd *(u64 *)(A + insn->off) += X */ - atomic64_add((u64) X, (atomic64_t *)(unsigned long) - (A + insn->off)); + STX_XADD_DW: /* lock xadd *(u64 *)(dst_reg + off16) += src_reg */ + atomic64_add((u64) SRC, (atomic64_t *)(unsigned long) + (DST + insn->off)); CONT; - LD_ABS_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + K)) */ - off = K; + LD_ABS_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + imm32)) */ + off = IMM; load_word: /* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are * only appearing in the programs where ctx == @@ -527,51 +527,51 @@ load_word: * BPF_R6-BPF_R9, and store return value into BPF_R0. * * Implicit input: - * ctx + * ctx == skb == BPF_R6 == CTX * * Explicit input: - * X == any register - * K == 32-bit immediate + * SRC == any register + * IMM == 32-bit immediate * * Output: * BPF_R0 - 8/16/32-bit skb data converted to cpu endianness */ - ptr = load_pointer((struct sk_buff *) ctx, off, 4, &tmp); + ptr = load_pointer((struct sk_buff *) CTX, off, 4, &tmp); if (likely(ptr != NULL)) { BPF_R0 = get_unaligned_be32(ptr); CONT; } return 0; - LD_ABS_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + K)) */ - off = K; + LD_ABS_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + imm32)) */ + off = IMM; load_half: - ptr = load_pointer((struct sk_buff *) ctx, off, 2, &tmp); + ptr = load_pointer((struct sk_buff *) CTX, off, 2, &tmp); if (likely(ptr != NULL)) { BPF_R0 = get_unaligned_be16(ptr); CONT; } return 0; - LD_ABS_B: /* BPF_R0 = *(u8 *) (ctx + K) */ - off = K; + LD_ABS_B: /* BPF_R0 = *(u8 *) (skb->data + imm32) */ + off = IMM; load_byte: - ptr = load_pointer((struct sk_buff *) ctx, off, 1, &tmp); + ptr = load_pointer((struct sk_buff *) CTX, off, 1, &tmp); if (likely(ptr != NULL)) { BPF_R0 = *(u8 *)ptr; CONT; } return 0; - LD_IND_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + X + K)) */ - off = K + X; + LD_IND_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + src_reg + imm32)) */ + off = IMM + SRC; goto load_word; - LD_IND_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + X + K)) */ - off = K + X; + LD_IND_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + src_reg + imm32)) */ + off = IMM + SRC; goto load_half; - LD_IND_B: /* BPF_R0 = *(u8 *) (skb->data + X + K) */ - off = K + X; + LD_IND_B: /* BPF_R0 = *(u8 *) (skb->data + src_reg + imm32) */ + off = IMM + SRC; goto load_byte; default_label: @@ -675,7 +675,7 @@ static bool convert_bpf_extensions(struct sock_filter *fp, case SKF_AD_OFF + SKF_AD_PROTOCOL: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2); - /* A = *(u16 *) (ctx + offsetof(protocol)) */ + /* A = *(u16 *) (CTX + offsetof(protocol)) */ *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX, offsetof(struct sk_buff, protocol)); /* A = ntohs(A) [emitting a nop or swap16] */ @@ -741,7 +741,7 @@ static bool convert_bpf_extensions(struct sock_filter *fp, BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000); - /* A = *(u16 *) (ctx + offsetof(vlan_tci)) */ + /* A = *(u16 *) (CTX + offsetof(vlan_tci)) */ *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX, offsetof(struct sk_buff, vlan_tci)); if (fp->k == SKF_AD_OFF + SKF_AD_VLAN_TAG) { @@ -760,13 +760,13 @@ static bool convert_bpf_extensions(struct sock_filter *fp, case SKF_AD_OFF + SKF_AD_NLATTR_NEST: case SKF_AD_OFF + SKF_AD_CPU: case SKF_AD_OFF + SKF_AD_RANDOM: - /* arg1 = ctx */ + /* arg1 = CTX */ *insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX); /* arg2 = A */ *insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_A); /* arg3 = X */ *insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_X); - /* Emit call(ctx, arg2=A, arg3=X) */ + /* Emit call(arg1=CTX, arg2=A, arg3=X) */ switch (fp->k) { case SKF_AD_OFF + SKF_AD_PAY_OFFSET: *insn = BPF_EMIT_CALL(__skb_get_pay_offset); @@ -941,12 +941,12 @@ do_pass: */ *insn++ = BPF_MOV32_IMM(BPF_REG_TMP, fp->k); - insn->a_reg = BPF_REG_A; - insn->x_reg = BPF_REG_TMP; + insn->dst_reg = BPF_REG_A; + insn->src_reg = BPF_REG_TMP; bpf_src = BPF_X; } else { - insn->a_reg = BPF_REG_A; - insn->x_reg = BPF_REG_X; + insn->dst_reg = BPF_REG_A; + insn->src_reg = BPF_REG_X; insn->imm = fp->k; bpf_src = BPF_SRC(fp->code); } -- cgit v1.2.3 From e4ad403269ff0ecdfb137b2a72349c30941cec7a Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 10 Jun 2014 17:44:06 +0200 Subject: net: filter: mention eBPF terminology as well Since the term eBPF is used anyway on mailing list discussions, lets also document that in the main BPF documentation file and replace a couple of occurrences with eBPF terminology to be more clear. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- Documentation/networking/filter.txt | 85 +++++++++++++++++++------------------ 1 file changed, 43 insertions(+), 42 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt index 9f49b8690500..1c7fc6baed84 100644 --- a/Documentation/networking/filter.txt +++ b/Documentation/networking/filter.txt @@ -561,42 +561,43 @@ toolchain for developing and testing the kernel's JIT compiler. BPF kernel internals -------------------- -Internally, for the kernel interpreter, a different BPF instruction set +Internally, for the kernel interpreter, a different instruction set format with similar underlying principles from BPF described in previous paragraphs is being used. However, the instruction set format is modelled closer to the underlying architecture to mimic native instruction sets, so -that a better performance can be achieved (more details later). +that a better performance can be achieved (more details later). This new +ISA is called 'eBPF' or 'internal BPF' interchangeably. (Note: eBPF which +originates from [e]xtended BPF is not the same as BPF extensions! While +eBPF is an ISA, BPF extensions date back to classic BPF's 'overloading' +of BPF_LD | BPF_{B,H,W} | BPF_ABS instruction.) It is designed to be JITed with one to one mapping, which can also open up -the possibility for GCC/LLVM compilers to generate optimized BPF code through -a BPF backend that performs almost as fast as natively compiled code. +the possibility for GCC/LLVM compilers to generate optimized eBPF code through +an eBPF backend that performs almost as fast as natively compiled code. The new instruction set was originally designed with the possible goal in -mind to write programs in "restricted C" and compile into BPF with a optional +mind to write programs in "restricted C" and compile into eBPF with a optional GCC/LLVM backend, so that it can just-in-time map to modern 64-bit CPUs with -minimal performance overhead over two steps, that is, C -> BPF -> native code. +minimal performance overhead over two steps, that is, C -> eBPF -> native code. Currently, the new format is being used for running user BPF programs, which includes seccomp BPF, classic socket filters, cls_bpf traffic classifier, team driver's classifier for its load-balancing mode, netfilter's xt_bpf extension, PTP dissector/classifier, and much more. They are all internally converted by the kernel into the new instruction set representation and run -in the extended interpreter. For in-kernel handlers, this all works -transparently by using sk_unattached_filter_create() for setting up the -filter, resp. sk_unattached_filter_destroy() for destroying it. The macro -SK_RUN_FILTER(filter, ctx) transparently invokes the right BPF function to -run the filter. 'filter' is a pointer to struct sk_filter that we got from -sk_unattached_filter_create(), and 'ctx' the given context (e.g. skb pointer). -All constraints and restrictions from sk_chk_filter() apply before a -conversion to the new layout is being done behind the scenes! - -Currently, for JITing, the user BPF format is being used and current BPF JIT -compilers reused whenever possible. In other words, we do not (yet!) perform -a JIT compilation in the new layout, however, future work will successively -migrate traditional JIT compilers into the new instruction format as well, so -that they will profit from the very same benefits. Thus, when speaking about -JIT in the following, a JIT compiler (TBD) for the new instruction format is -meant in this context. +in the eBPF interpreter. For in-kernel handlers, this all works transparently +by using sk_unattached_filter_create() for setting up the filter, resp. +sk_unattached_filter_destroy() for destroying it. The macro +SK_RUN_FILTER(filter, ctx) transparently invokes eBPF interpreter or JITed +code to run the filter. 'filter' is a pointer to struct sk_filter that we +got from sk_unattached_filter_create(), and 'ctx' the given context (e.g. +skb pointer). All constraints and restrictions from sk_chk_filter() apply +before a conversion to the new layout is being done behind the scenes! + +Currently, the classic BPF format is being used for JITing on most of the +architectures. Only x86-64 performs JIT compilation from eBPF instruction set, +however, future work will migrate other JIT compilers as well, so that they +will profit from the very same benefits. Some core changes of the new internal format: @@ -605,35 +606,35 @@ Some core changes of the new internal format: The old format had two registers A and X, and a hidden frame pointer. The new layout extends this to be 10 internal registers and a read-only frame pointer. Since 64-bit CPUs are passing arguments to functions via registers - the number of args from BPF program to in-kernel function is restricted + the number of args from eBPF program to in-kernel function is restricted to 5 and one register is used to accept return value from an in-kernel function. Natively, x86_64 passes first 6 arguments in registers, aarch64/ sparcv9/mips64 have 7 - 8 registers for arguments; x86_64 has 6 callee saved registers, and aarch64/sparcv9/mips64 have 11 or more callee saved registers. - Therefore, BPF calling convention is defined as: + Therefore, eBPF calling convention is defined as: - * R0 - return value from in-kernel function, and exit value for BPF program - * R1 - R5 - arguments from BPF program to in-kernel function + * R0 - return value from in-kernel function, and exit value for eBPF program + * R1 - R5 - arguments from eBPF program to in-kernel function * R6 - R9 - callee saved registers that in-kernel function will preserve * R10 - read-only frame pointer to access stack - Thus, all BPF registers map one to one to HW registers on x86_64, aarch64, - etc, and BPF calling convention maps directly to ABIs used by the kernel on + Thus, all eBPF registers map one to one to HW registers on x86_64, aarch64, + etc, and eBPF calling convention maps directly to ABIs used by the kernel on 64-bit architectures. On 32-bit architectures JIT may map programs that use only 32-bit arithmetic and may let more complex programs to be interpreted. - R0 - R5 are scratch registers and BPF program needs spill/fill them if - necessary across calls. Note that there is only one BPF program (== one BPF - main routine) and it cannot call other BPF functions, it can only call - predefined in-kernel functions, though. + R0 - R5 are scratch registers and eBPF program needs spill/fill them if + necessary across calls. Note that there is only one eBPF program (== one + eBPF main routine) and it cannot call other eBPF functions, it can only + call predefined in-kernel functions, though. - Register width increases from 32-bit to 64-bit: Still, the semantics of the original 32-bit ALU operations are preserved - via 32-bit subregisters. All BPF registers are 64-bit with 32-bit lower + via 32-bit subregisters. All eBPF registers are 64-bit with 32-bit lower subregisters that zero-extend into 64-bit if they are being written to. That behavior maps directly to x86_64 and arm64 subregister definition, but makes other JITs more difficult. @@ -644,8 +645,8 @@ Some core changes of the new internal format: Operation is 64-bit, because on 64-bit architectures, pointers are also 64-bit wide, and we want to pass 64-bit values in/out of kernel functions, - so 32-bit BPF registers would otherwise require to define register-pair - ABI, thus, there won't be able to use a direct BPF register to HW register + so 32-bit eBPF registers would otherwise require to define register-pair + ABI, thus, there won't be able to use a direct eBPF register to HW register mapping and JIT would need to do combine/split/move operations for every register in and out of the function, which is complex, bug prone and slow. Another reason is the use of atomic 64-bit counters. @@ -690,7 +691,7 @@ Some core changes of the new internal format: subq %rsi, %rax ret - Function f2 in BPF may look like: + Function f2 in eBPF may look like: f2: bpf_mov R2, R1 @@ -702,7 +703,7 @@ Some core changes of the new internal format: returns will be seamless. Without JIT, __sk_run_filter() interpreter needs to be used to call into f2. - For practical reasons all BPF programs have only one argument 'ctx' which is + For practical reasons all eBPF programs have only one argument 'ctx' which is already placed into R1 (e.g. on __sk_run_filter() startup) and the programs can call kernel functions with up to 5 arguments. Calls with 6 or more arguments are currently not supported, but these restrictions can be lifted if necessary @@ -779,9 +780,9 @@ Some core changes of the new internal format: In-kernel functions foo() and bar() with prototype: u64 (*)(u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5); will receive arguments in proper - registers and place their return value into '%rax' which is R0 in BPF. + registers and place their return value into '%rax' which is R0 in eBPF. Prologue and epilogue are emitted by JIT and are implicit in the - interpreter. R0-R5 are scratch registers, so BPF program needs to preserve + interpreter. R0-R5 are scratch registers, so eBPF program needs to preserve them across the calls as defined by calling convention. For example the following program is invalid: @@ -792,12 +793,12 @@ Some core changes of the new internal format: bpf_exit After the call the registers R1-R5 contain junk values and cannot be read. - In the future a BPF verifier can be used to validate internal BPF programs. + In the future an eBPF verifier can be used to validate internal BPF programs. -Also in the new design, BPF is limited to 4096 insns, which means that any +Also in the new design, eBPF is limited to 4096 insns, which means that any program will terminate quickly and will only call a fixed number of kernel functions. Original BPF and the new format are two operand instructions, -which helps to do one-to-one mapping between BPF insn and x86 insn during JIT. +which helps to do one-to-one mapping between eBPF insn and x86 insn during JIT. The input context pointer for invoking the interpreter function is generic, its content is defined by a specific use case. For seccomp register R1 points -- cgit v1.2.3 From 783e327b69e24924055359a4e5779d04c052974a Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 10 Jun 2014 17:44:07 +0200 Subject: net: filter: document internal instruction encoding This patch adds a description of eBPFs instruction encoding in order to bring the documentation in line with the implementation. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- Documentation/networking/filter.txt | 161 ++++++++++++++++++++++++++++++++++++ 1 file changed, 161 insertions(+) (limited to 'Documentation') diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt index 1c7fc6baed84..ee78eba78a9d 100644 --- a/Documentation/networking/filter.txt +++ b/Documentation/networking/filter.txt @@ -834,6 +834,167 @@ loops and other CFG validation; second step starts from the first insn and descends all possible paths. It simulates execution of every insn and observes the state change of registers and stack. +eBPF opcode encoding +-------------------- + +eBPF is reusing most of the opcode encoding from classic to simplify conversion +of classic BPF to eBPF. For arithmetic and jump instructions the 8-bit 'code' +field is divided into three parts: + + +----------------+--------+--------------------+ + | 4 bits | 1 bit | 3 bits | + | operation code | source | instruction class | + +----------------+--------+--------------------+ + (MSB) (LSB) + +Three LSB bits store instruction class which is one of: + + Classic BPF classes: eBPF classes: + + BPF_LD 0x00 BPF_LD 0x00 + BPF_LDX 0x01 BPF_LDX 0x01 + BPF_ST 0x02 BPF_ST 0x02 + BPF_STX 0x03 BPF_STX 0x03 + BPF_ALU 0x04 BPF_ALU 0x04 + BPF_JMP 0x05 BPF_JMP 0x05 + BPF_RET 0x06 [ class 6 unused, for future if needed ] + BPF_MISC 0x07 BPF_ALU64 0x07 + +When BPF_CLASS(code) == BPF_ALU or BPF_JMP, 4th bit encodes source operand ... + + BPF_K 0x00 + BPF_X 0x08 + + * in classic BPF, this means: + + BPF_SRC(code) == BPF_X - use register X as source operand + BPF_SRC(code) == BPF_K - use 32-bit immediate as source operand + + * in eBPF, this means: + + BPF_SRC(code) == BPF_X - use 'src_reg' register as source operand + BPF_SRC(code) == BPF_K - use 32-bit immediate as source operand + +... and four MSB bits store operation code. + +If BPF_CLASS(code) == BPF_ALU or BPF_ALU64 [ in eBPF ], BPF_OP(code) is one of: + + BPF_ADD 0x00 + BPF_SUB 0x10 + BPF_MUL 0x20 + BPF_DIV 0x30 + BPF_OR 0x40 + BPF_AND 0x50 + BPF_LSH 0x60 + BPF_RSH 0x70 + BPF_NEG 0x80 + BPF_MOD 0x90 + BPF_XOR 0xa0 + BPF_MOV 0xb0 /* eBPF only: mov reg to reg */ + BPF_ARSH 0xc0 /* eBPF only: sign extending shift right */ + BPF_END 0xd0 /* eBPF only: endianness conversion */ + +If BPF_CLASS(code) == BPF_JMP, BPF_OP(code) is one of: + + BPF_JA 0x00 + BPF_JEQ 0x10 + BPF_JGT 0x20 + BPF_JGE 0x30 + BPF_JSET 0x40 + BPF_JNE 0x50 /* eBPF only: jump != */ + BPF_JSGT 0x60 /* eBPF only: signed '>' */ + BPF_JSGE 0x70 /* eBPF only: signed '>=' */ + BPF_CALL 0x80 /* eBPF only: function call */ + BPF_EXIT 0x90 /* eBPF only: function return */ + +So BPF_ADD | BPF_X | BPF_ALU means 32-bit addition in both classic BPF +and eBPF. There are only two registers in classic BPF, so it means A += X. +In eBPF it means dst_reg = (u32) dst_reg + (u32) src_reg; similarly, +BPF_XOR | BPF_K | BPF_ALU means A ^= imm32 in classic BPF and analogous +src_reg = (u32) src_reg ^ (u32) imm32 in eBPF. + +Classic BPF is using BPF_MISC class to represent A = X and X = A moves. +eBPF is using BPF_MOV | BPF_X | BPF_ALU code instead. Since there are no +BPF_MISC operations in eBPF, the class 7 is used as BPF_ALU64 to mean +exactly the same operations as BPF_ALU, but with 64-bit wide operands +instead. So BPF_ADD | BPF_X | BPF_ALU64 means 64-bit addition, i.e.: +dst_reg = dst_reg + src_reg + +Classic BPF wastes the whole BPF_RET class to represent a single 'ret' +operation. Classic BPF_RET | BPF_K means copy imm32 into return register +and perform function exit. eBPF is modeled to match CPU, so BPF_JMP | BPF_EXIT +in eBPF means function exit only. The eBPF program needs to store return +value into register R0 before doing a BPF_EXIT. Class 6 in eBPF is currently +unused and reserved for future use. + +For load and store instructions the 8-bit 'code' field is divided as: + + +--------+--------+-------------------+ + | 3 bits | 2 bits | 3 bits | + | mode | size | instruction class | + +--------+--------+-------------------+ + (MSB) (LSB) + +Size modifier is one of ... + + BPF_W 0x00 /* word */ + BPF_H 0x08 /* half word */ + BPF_B 0x10 /* byte */ + BPF_DW 0x18 /* eBPF only, double word */ + +... which encodes size of load/store operation: + + B - 1 byte + H - 2 byte + W - 4 byte + DW - 8 byte (eBPF only) + +Mode modifier is one of: + + BPF_IMM 0x00 /* classic BPF only, reserved in eBPF */ + BPF_ABS 0x20 + BPF_IND 0x40 + BPF_MEM 0x60 + BPF_LEN 0x80 /* classic BPF only, reserved in eBPF */ + BPF_MSH 0xa0 /* classic BPF only, reserved in eBPF */ + BPF_XADD 0xc0 /* eBPF only, exclusive add */ + +eBPF has two non-generic instructions: (BPF_ABS | | BPF_LD) and +(BPF_IND | | BPF_LD) which are used to access packet data. + +They had to be carried over from classic to have strong performance of +socket filters running in eBPF interpreter. These instructions can only +be used when interpreter context is a pointer to 'struct sk_buff' and +have seven implicit operands. Register R6 is an implicit input that must +contain pointer to sk_buff. Register R0 is an implicit output which contains +the data fetched from the packet. Registers R1-R5 are scratch registers +and must not be used to store the data across BPF_ABS | BPF_LD or +BPF_IND | BPF_LD instructions. + +These instructions have implicit program exit condition as well. When +eBPF program is trying to access the data beyond the packet boundary, +the interpreter will abort the execution of the program. JIT compilers +therefore must preserve this property. src_reg and imm32 fields are +explicit inputs to these instructions. + +For example: + + BPF_IND | BPF_W | BPF_LD means: + + R0 = ntohl(*(u32 *) (((struct sk_buff *) R6)->data + src_reg + imm32)) + and R1 - R5 were scratched. + +Unlike classic BPF instruction set, eBPF has generic load/store operations: + +BPF_MEM | | BPF_STX: *(size *) (dst_reg + off) = src_reg +BPF_MEM | | BPF_ST: *(size *) (dst_reg + off) = imm32 +BPF_MEM | | BPF_LDX: dst_reg = *(size *) (src_reg + off) +BPF_XADD | BPF_W | BPF_STX: lock xadd *(u32 *)(dst_reg + off16) += src_reg +BPF_XADD | BPF_DW | BPF_STX: lock xadd *(u64 *)(dst_reg + off16) += src_reg + +Where size is one of: BPF_B or BPF_H or BPF_W or BPF_DW. Note that 1 and +2 byte atomic increments are not supported. + Testing ------- -- cgit v1.2.3