diff options
Diffstat (limited to 'arch/tile')
75 files changed, 6573 insertions, 1102 deletions
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 1eb308cb711a..e11b5fcb70eb 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -58,6 +58,9 @@ config ARCH_SUPPORTS_OPTIMIZED_INLINING config ARCH_PHYS_ADDR_T_64BIT def_bool y +config ARCH_DMA_ADDR_T_64BIT + def_bool y + config LOCKDEP_SUPPORT def_bool y @@ -96,6 +99,7 @@ config HVC_TILE config TILE def_bool y + select HAVE_KVM if !TILEGX select GENERIC_FIND_FIRST_BIT select GENERIC_FIND_NEXT_BIT select USE_GENERIC_SMP_HELPERS @@ -113,8 +117,6 @@ config TILE # config HUGETLB_PAGE_SIZE_VARIABLE -mainmenu "Linux/TILE Kernel Configuration" - # Please note: TILE-Gx support is not yet finalized; this is # the preliminary support. TILE-Gx drivers are only provided # with the alpha or beta test versions for Tilera customers. @@ -236,9 +238,9 @@ choice If you are not absolutely sure what you are doing, leave this option alone! - config VMSPLIT_375G + config VMSPLIT_3_75G bool "3.75G/0.25G user/kernel split (no kernel networking)" - config VMSPLIT_35G + config VMSPLIT_3_5G bool "3.5G/0.5G user/kernel split" config VMSPLIT_3G bool "3G/1G user/kernel split" @@ -252,8 +254,8 @@ endchoice config PAGE_OFFSET hex - default 0xF0000000 if VMSPLIT_375G - default 0xE0000000 if VMSPLIT_35G + default 0xF0000000 if VMSPLIT_3_75G + default 0xE0000000 if VMSPLIT_3_5G default 0xB0000000 if VMSPLIT_3G_OPT default 0x80000000 if VMSPLIT_2G default 0x40000000 if VMSPLIT_1G @@ -314,10 +316,31 @@ config HARDWALL bool "Hardwall support to allow access to user dynamic network" default y +config KERNEL_PL + int "Processor protection level for kernel" + range 1 2 + default "1" + ---help--- + This setting determines the processor protection level the + kernel will be built to run at. Generally you should use + the default value here. + endmenu # Tilera-specific configuration menu "Bus options" +config PCI + bool "PCI support" + default y + select PCI_DOMAINS + ---help--- + Enable PCI root complex support, so PCIe endpoint devices can + be attached to the Tile chip. Many, but not all, PCI devices + are supported under Tilera's root complex driver. + +config PCI_DOMAINS + bool + config NO_IOMEM def_bool !PCI @@ -354,3 +377,5 @@ source "security/Kconfig" source "crypto/Kconfig" source "lib/Kconfig" + +source "arch/tile/kvm/Kconfig" diff --git a/arch/tile/Makefile b/arch/tile/Makefile index fd8f6bb5face..17acce70569b 100644 --- a/arch/tile/Makefile +++ b/arch/tile/Makefile @@ -26,8 +26,9 @@ $(error Set TILERA_ROOT or CROSS_COMPILE when building $(ARCH) on $(HOST_ARCH)) endif endif - +ifneq ($(CONFIG_DEBUG_EXTRA_FLAGS),"") KBUILD_CFLAGS += $(CONFIG_DEBUG_EXTRA_FLAGS) +endif LIBGCC_PATH := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name) @@ -49,6 +50,20 @@ head-y := arch/tile/kernel/head_$(BITS).o libs-y += arch/tile/lib/ libs-y += $(LIBGCC_PATH) - # See arch/tile/Kbuild for content of core part of the kernel core-y += arch/tile/ + +core-$(CONFIG_KVM) += arch/tile/kvm/ + +ifdef TILERA_ROOT +INSTALL_PATH ?= $(TILERA_ROOT)/tile/boot +endif + +install: + install -D -m 755 vmlinux $(INSTALL_PATH)/vmlinux-$(KERNELRELEASE) + install -D -m 644 .config $(INSTALL_PATH)/config-$(KERNELRELEASE) + install -D -m 644 System.map $(INSTALL_PATH)/System.map-$(KERNELRELEASE) + +define archhelp + echo ' install - install kernel into $(INSTALL_PATH)' +endef diff --git a/arch/tile/include/arch/sim.h b/arch/tile/include/arch/sim.h new file mode 100644 index 000000000000..74b7c1624d34 --- /dev/null +++ b/arch/tile/include/arch/sim.h @@ -0,0 +1,619 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/** + * @file + * + * Provides an API for controlling the simulator at runtime. + */ + +/** + * @addtogroup arch_sim + * @{ + * + * An API for controlling the simulator at runtime. + * + * The simulator's behavior can be modified while it is running. + * For example, human-readable trace output can be enabled and disabled + * around code of interest. + * + * There are two ways to modify simulator behavior: + * programmatically, by calling various sim_* functions, and + * interactively, by entering commands like "sim set functional true" + * at the tile-monitor prompt. Typing "sim help" at that prompt provides + * a list of interactive commands. + * + * All interactive commands can also be executed programmatically by + * passing a string to the sim_command function. + */ + +#ifndef __ARCH_SIM_H__ +#define __ARCH_SIM_H__ + +#include <arch/sim_def.h> +#include <arch/abi.h> + +#ifndef __ASSEMBLER__ + +#include <arch/spr_def.h> + + +/** + * Return true if the current program is running under a simulator, + * rather than on real hardware. If running on hardware, other "sim_xxx()" + * calls have no useful effect. + */ +static inline int +sim_is_simulator(void) +{ + return __insn_mfspr(SPR_SIM_CONTROL) != 0; +} + + +/** + * Checkpoint the simulator state to a checkpoint file. + * + * The checkpoint file name is either the default or the name specified + * on the command line with "--checkpoint-file". + */ +static __inline void +sim_checkpoint(void) +{ + __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_CHECKPOINT); +} + + +/** + * Report whether or not various kinds of simulator tracing are enabled. + * + * @return The bitwise OR of these values: + * + * SIM_TRACE_CYCLES (--trace-cycles), + * SIM_TRACE_ROUTER (--trace-router), + * SIM_TRACE_REGISTER_WRITES (--trace-register-writes), + * SIM_TRACE_DISASM (--trace-disasm), + * SIM_TRACE_STALL_INFO (--trace-stall-info) + * SIM_TRACE_MEMORY_CONTROLLER (--trace-memory-controller) + * SIM_TRACE_L2_CACHE (--trace-l2) + * SIM_TRACE_LINES (--trace-lines) + */ +static __inline unsigned int +sim_get_tracing(void) +{ + return __insn_mfspr(SPR_SIM_CONTROL) & SIM_TRACE_FLAG_MASK; +} + + +/** + * Turn on or off different kinds of simulator tracing. + * + * @param mask Either one of these special values: + * + * SIM_TRACE_NONE (turns off tracing), + * SIM_TRACE_ALL (turns on all possible tracing). + * + * or the bitwise OR of these values: + * + * SIM_TRACE_CYCLES (--trace-cycles), + * SIM_TRACE_ROUTER (--trace-router), + * SIM_TRACE_REGISTER_WRITES (--trace-register-writes), + * SIM_TRACE_DISASM (--trace-disasm), + * SIM_TRACE_STALL_INFO (--trace-stall-info) + * SIM_TRACE_MEMORY_CONTROLLER (--trace-memory-controller) + * SIM_TRACE_L2_CACHE (--trace-l2) + * SIM_TRACE_LINES (--trace-lines) + */ +static __inline void +sim_set_tracing(unsigned int mask) +{ + __insn_mtspr(SPR_SIM_CONTROL, SIM_TRACE_SPR_ARG(mask)); +} + + +/** + * Request dumping of different kinds of simulator state. + * + * @param mask Either this special value: + * + * SIM_DUMP_ALL (dump all known state) + * + * or the bitwise OR of these values: + * + * SIM_DUMP_REGS (the register file), + * SIM_DUMP_SPRS (the SPRs), + * SIM_DUMP_ITLB (the iTLB), + * SIM_DUMP_DTLB (the dTLB), + * SIM_DUMP_L1I (the L1 I-cache), + * SIM_DUMP_L1D (the L1 D-cache), + * SIM_DUMP_L2 (the L2 cache), + * SIM_DUMP_SNREGS (the switch register file), + * SIM_DUMP_SNITLB (the switch iTLB), + * SIM_DUMP_SNL1I (the switch L1 I-cache), + * SIM_DUMP_BACKTRACE (the current backtrace) + */ +static __inline void +sim_dump(unsigned int mask) +{ + __insn_mtspr(SPR_SIM_CONTROL, SIM_DUMP_SPR_ARG(mask)); +} + + +/** + * Print a string to the simulator stdout. + * + * @param str The string to be written; a newline is automatically added. + */ +static __inline void +sim_print_string(const char* str) +{ + int i; + for (i = 0; str[i] != 0; i++) + { + __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PUTC | + (str[i] << _SIM_CONTROL_OPERATOR_BITS)); + } + __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PUTC | + (SIM_PUTC_FLUSH_STRING << _SIM_CONTROL_OPERATOR_BITS)); +} + + +/** + * Execute a simulator command string. + * + * Type 'sim help' at the tile-monitor prompt to learn what commands + * are available. Note the use of the tile-monitor "sim" command to + * pass commands to the simulator. + * + * The argument to sim_command() does not include the leading "sim" + * prefix used at the tile-monitor prompt; for example, you might call + * sim_command("trace disasm"). + */ +static __inline void +sim_command(const char* str) +{ + int c; + do + { + c = *str++; + __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_COMMAND | + (c << _SIM_CONTROL_OPERATOR_BITS)); + } + while (c); +} + + + +#ifndef __DOXYGEN__ + +/** + * The underlying implementation of "_sim_syscall()". + * + * We use extra "and" instructions to ensure that all the values + * we are passing to the simulator are actually valid in the registers + * (i.e. returned from memory) prior to the SIM_CONTROL spr. + */ +static __inline int _sim_syscall0(int val) +{ + long result; + __asm__ __volatile__ ("mtspr SIM_CONTROL, r0" + : "=R00" (result) : "R00" (val)); + return result; +} + +static __inline int _sim_syscall1(int val, long arg1) +{ + long result; + __asm__ __volatile__ ("{ and zero, r1, r1; mtspr SIM_CONTROL, r0 }" + : "=R00" (result) : "R00" (val), "R01" (arg1)); + return result; +} + +static __inline int _sim_syscall2(int val, long arg1, long arg2) +{ + long result; + __asm__ __volatile__ ("{ and zero, r1, r2; mtspr SIM_CONTROL, r0 }" + : "=R00" (result) + : "R00" (val), "R01" (arg1), "R02" (arg2)); + return result; +} + +/* Note that _sim_syscall3() and higher are technically at risk of + receiving an interrupt right before the mtspr bundle, in which case + the register values for arguments 3 and up may still be in flight + to the core from a stack frame reload. */ + +static __inline int _sim_syscall3(int val, long arg1, long arg2, long arg3) +{ + long result; + __asm__ __volatile__ ("{ and zero, r3, r3 };" + "{ and zero, r1, r2; mtspr SIM_CONTROL, r0 }" + : "=R00" (result) + : "R00" (val), "R01" (arg1), "R02" (arg2), + "R03" (arg3)); + return result; +} + +static __inline int _sim_syscall4(int val, long arg1, long arg2, long arg3, + long arg4) +{ + long result; + __asm__ __volatile__ ("{ and zero, r3, r4 };" + "{ and zero, r1, r2; mtspr SIM_CONTROL, r0 }" + : "=R00" (result) + : "R00" (val), "R01" (arg1), "R02" (arg2), + "R03" (arg3), "R04" (arg4)); + return result; +} + +static __inline int _sim_syscall5(int val, long arg1, long arg2, long arg3, + long arg4, long arg5) +{ + long result; + __asm__ __volatile__ ("{ and zero, r3, r4; and zero, r5, r5 };" + "{ and zero, r1, r2; mtspr SIM_CONTROL, r0 }" + : "=R00" (result) + : "R00" (val), "R01" (arg1), "R02" (arg2), + "R03" (arg3), "R04" (arg4), "R05" (arg5)); + return result; +} + + +/** + * Make a special syscall to the simulator itself, if running under + * simulation. This is used as the implementation of other functions + * and should not be used outside this file. + * + * @param syscall_num The simulator syscall number. + * @param nr The number of additional arguments provided. + * + * @return Varies by syscall. + */ +#define _sim_syscall(syscall_num, nr, args...) \ + _sim_syscall##nr( \ + ((syscall_num) << _SIM_CONTROL_OPERATOR_BITS) | SIM_CONTROL_SYSCALL, args) + + +/* Values for the "access_mask" parameters below. */ +#define SIM_WATCHPOINT_READ 1 +#define SIM_WATCHPOINT_WRITE 2 +#define SIM_WATCHPOINT_EXECUTE 4 + + +static __inline int +sim_add_watchpoint(unsigned int process_id, + unsigned long address, + unsigned long size, + unsigned int access_mask, + unsigned long user_data) +{ + return _sim_syscall(SIM_SYSCALL_ADD_WATCHPOINT, 5, process_id, + address, size, access_mask, user_data); +} + + +static __inline int +sim_remove_watchpoint(unsigned int process_id, + unsigned long address, + unsigned long size, + unsigned int access_mask, + unsigned long user_data) +{ + return _sim_syscall(SIM_SYSCALL_REMOVE_WATCHPOINT, 5, process_id, + address, size, access_mask, user_data); +} + + +/** + * Return value from sim_query_watchpoint. + */ +struct SimQueryWatchpointStatus +{ + /** + * 0 if a watchpoint fired, 1 if no watchpoint fired, or -1 for + * error (meaning a bad process_id). + */ + int syscall_status; + + /** + * The address of the watchpoint that fired (this is the address + * passed to sim_add_watchpoint, not an address within that range + * that actually triggered the watchpoint). + */ + unsigned long address; + + /** The arbitrary user_data installed by sim_add_watchpoint. */ + unsigned long user_data; +}; + + +static __inline struct SimQueryWatchpointStatus +sim_query_watchpoint(unsigned int process_id) +{ + struct SimQueryWatchpointStatus status; + long val = SIM_CONTROL_SYSCALL | + (SIM_SYSCALL_QUERY_WATCHPOINT << _SIM_CONTROL_OPERATOR_BITS); + __asm__ __volatile__ ("{ and zero, r1, r1; mtspr SIM_CONTROL, r0 }" + : "=R00" (status.syscall_status), + "=R01" (status.address), + "=R02" (status.user_data) + : "R00" (val), "R01" (process_id)); + return status; +} + + +/* On the simulator, confirm lines have been evicted everywhere. */ +static __inline void +sim_validate_lines_evicted(unsigned long long pa, unsigned long length) +{ +#ifdef __LP64__ + _sim_syscall(SIM_SYSCALL_VALIDATE_LINES_EVICTED, 2, pa, length); +#else + _sim_syscall(SIM_SYSCALL_VALIDATE_LINES_EVICTED, 4, + 0 /* dummy */, (long)(pa), (long)(pa >> 32), length); +#endif +} + + +#endif /* !__DOXYGEN__ */ + + + + +/** + * Modify the shaping parameters of a shim. + * + * @param shim The shim to modify. One of: + * SIM_CONTROL_SHAPING_GBE_0 + * SIM_CONTROL_SHAPING_GBE_1 + * SIM_CONTROL_SHAPING_GBE_2 + * SIM_CONTROL_SHAPING_GBE_3 + * SIM_CONTROL_SHAPING_XGBE_0 + * SIM_CONTROL_SHAPING_XGBE_1 + * + * @param type The type of shaping. This should be the same type of + * shaping that is already in place on the shim. One of: + * SIM_CONTROL_SHAPING_MULTIPLIER + * SIM_CONTROL_SHAPING_PPS + * SIM_CONTROL_SHAPING_BPS + * + * @param units The magnitude of the rate. One of: + * SIM_CONTROL_SHAPING_UNITS_SINGLE + * SIM_CONTROL_SHAPING_UNITS_KILO + * SIM_CONTROL_SHAPING_UNITS_MEGA + * SIM_CONTROL_SHAPING_UNITS_GIGA + * + * @param rate The rate to which to change it. This must fit in + * SIM_CONTROL_SHAPING_RATE_BITS bits or a warning is issued and + * the shaping is not changed. + * + * @return 0 if no problems were detected in the arguments to sim_set_shaping + * or 1 if problems were detected (for example, rate does not fit in 17 bits). + */ +static __inline int +sim_set_shaping(unsigned shim, + unsigned type, + unsigned units, + unsigned rate) +{ + if ((rate & ~((1 << SIM_CONTROL_SHAPING_RATE_BITS) - 1)) != 0) + return 1; + + __insn_mtspr(SPR_SIM_CONTROL, SIM_SHAPING_SPR_ARG(shim, type, units, rate)); + return 0; +} + +#ifdef __tilegx__ + +/** Enable a set of mPIPE links. Pass a -1 link_mask to enable all links. */ +static __inline void +sim_enable_mpipe_links(unsigned mpipe, unsigned long link_mask) +{ + __insn_mtspr(SPR_SIM_CONTROL, + (SIM_CONTROL_ENABLE_MPIPE_LINK_MAGIC_BYTE | + (mpipe << 8) | (1 << 16) | ((uint_reg_t)link_mask << 32))); +} + +/** Disable a set of mPIPE links. Pass a -1 link_mask to disable all links. */ +static __inline void +sim_disable_mpipe_links(unsigned mpipe, unsigned long link_mask) +{ + __insn_mtspr(SPR_SIM_CONTROL, + (SIM_CONTROL_ENABLE_MPIPE_LINK_MAGIC_BYTE | + (mpipe << 8) | (0 << 16) | ((uint_reg_t)link_mask << 32))); +} + +#endif /* __tilegx__ */ + + +/* + * An API for changing "functional" mode. + */ + +#ifndef __DOXYGEN__ + +#define sim_enable_functional() \ + __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_ENABLE_FUNCTIONAL) + +#define sim_disable_functional() \ + __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_DISABLE_FUNCTIONAL) + +#endif /* __DOXYGEN__ */ + + +/* + * Profiler support. + */ + +/** + * Turn profiling on for the current task. + * + * Note that this has no effect if run in an environment without + * profiling support (thus, the proper flags to the simulator must + * be supplied). + */ +static __inline void +sim_profiler_enable(void) +{ + __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PROFILER_ENABLE); +} + + +/** Turn profiling off for the current task. */ +static __inline void +sim_profiler_disable(void) +{ + __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PROFILER_DISABLE); +} + + +/** + * Turn profiling on or off for the current task. + * + * @param enabled If true, turns on profiling. If false, turns it off. + * + * Note that this has no effect if run in an environment without + * profiling support (thus, the proper flags to the simulator must + * be supplied). + */ +static __inline void +sim_profiler_set_enabled(int enabled) +{ + int val = + enabled ? SIM_CONTROL_PROFILER_ENABLE : SIM_CONTROL_PROFILER_DISABLE; + __insn_mtspr(SPR_SIM_CONTROL, val); +} + + +/** + * Return true if and only if profiling is currently enabled + * for the current task. + * + * This returns false even if sim_profiler_enable() was called + * if the current execution environment does not support profiling. + */ +static __inline int +sim_profiler_is_enabled(void) +{ + return ((__insn_mfspr(SPR_SIM_CONTROL) & SIM_PROFILER_ENABLED_MASK) != 0); +} + + +/** + * Reset profiling counters to zero for the current task. + * + * Resetting can be done while profiling is enabled. It does not affect + * the chip-wide profiling counters. + */ +static __inline void +sim_profiler_clear(void) +{ + __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_PROFILER_CLEAR); +} + + +/** + * Enable specified chip-level profiling counters. + * + * Does not affect the per-task profiling counters. + * + * @param mask Either this special value: + * + * SIM_CHIP_ALL (enables all chip-level components). + * + * or the bitwise OR of these values: + * + * SIM_CHIP_MEMCTL (enable all memory controllers) + * SIM_CHIP_XAUI (enable all XAUI controllers) + * SIM_CHIP_MPIPE (enable all MPIPE controllers) + */ +static __inline void +sim_profiler_chip_enable(unsigned int mask) +{ + __insn_mtspr(SPR_SIM_CONTROL, SIM_PROFILER_CHIP_ENABLE_SPR_ARG(mask)); +} + + +/** + * Disable specified chip-level profiling counters. + * + * Does not affect the per-task profiling counters. + * + * @param mask Either this special value: + * + * SIM_CHIP_ALL (disables all chip-level components). + * + * or the bitwise OR of these values: + * + * SIM_CHIP_MEMCTL (disable all memory controllers) + * SIM_CHIP_XAUI (disable all XAUI controllers) + * SIM_CHIP_MPIPE (disable all MPIPE controllers) + */ +static __inline void +sim_profiler_chip_disable(unsigned int mask) +{ + __insn_mtspr(SPR_SIM_CONTROL, SIM_PROFILER_CHIP_DISABLE_SPR_ARG(mask)); +} + + +/** + * Reset specified chip-level profiling counters to zero. + * + * Does not affect the per-task profiling counters. + * + * @param mask Either this special value: + * + * SIM_CHIP_ALL (clears all chip-level components). + * + * or the bitwise OR of these values: + * + * SIM_CHIP_MEMCTL (clear all memory controllers) + * SIM_CHIP_XAUI (clear all XAUI controllers) + * SIM_CHIP_MPIPE (clear all MPIPE controllers) + */ +static __inline void +sim_profiler_chip_clear(unsigned int mask) +{ + __insn_mtspr(SPR_SIM_CONTROL, SIM_PROFILER_CHIP_CLEAR_SPR_ARG(mask)); +} + + +/* + * Event support. + */ + +#ifndef __DOXYGEN__ + +static __inline void +sim_event_begin(unsigned int x) +{ +#if defined(__tile__) && !defined(__NO_EVENT_SPR__) + __insn_mtspr(SPR_EVENT_BEGIN, x); +#endif +} + +static __inline void +sim_event_end(unsigned int x) +{ +#if defined(__tile__) && !defined(__NO_EVENT_SPR__) + __insn_mtspr(SPR_EVENT_END, x); +#endif +} + +#endif /* !__DOXYGEN__ */ + +#endif /* !__ASSEMBLER__ */ + +#endif /* !__ARCH_SIM_H__ */ + +/** @} */ diff --git a/arch/tile/include/arch/sim_def.h b/arch/tile/include/arch/sim_def.h index 6418fbde063e..7a17082c3773 100644 --- a/arch/tile/include/arch/sim_def.h +++ b/arch/tile/include/arch/sim_def.h @@ -1,477 +1,461 @@ -// Copyright 2010 Tilera Corporation. All Rights Reserved. -// -// This program is free software; you can redistribute it and/or -// modify it under the terms of the GNU General Public License -// as published by the Free Software Foundation, version 2. -// -// This program is distributed in the hope that it will be useful, but -// WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or -// NON INFRINGEMENT. See the GNU General Public License for -// more details. - -//! @file -//! -//! Some low-level simulator definitions. -//! +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/** + * @file + * + * Some low-level simulator definitions. + */ #ifndef __ARCH_SIM_DEF_H__ #define __ARCH_SIM_DEF_H__ -//! Internal: the low bits of the SIM_CONTROL_* SPR values specify -//! the operation to perform, and the remaining bits are -//! an operation-specific parameter (often unused). -//! +/** + * Internal: the low bits of the SIM_CONTROL_* SPR values specify + * the operation to perform, and the remaining bits are + * an operation-specific parameter (often unused). + */ #define _SIM_CONTROL_OPERATOR_BITS 8 -//== Values which can be written to SPR_SIM_CONTROL. +/* + * Values which can be written to SPR_SIM_CONTROL. + */ -//! If written to SPR_SIM_CONTROL, stops profiling. -//! +/** If written to SPR_SIM_CONTROL, stops profiling. */ #define SIM_CONTROL_PROFILER_DISABLE 0 -//! If written to SPR_SIM_CONTROL, starts profiling. -//! +/** If written to SPR_SIM_CONTROL, starts profiling. */ #define SIM_CONTROL_PROFILER_ENABLE 1 -//! If written to SPR_SIM_CONTROL, clears profiling counters. -//! +/** If written to SPR_SIM_CONTROL, clears profiling counters. */ #define SIM_CONTROL_PROFILER_CLEAR 2 -//! If written to SPR_SIM_CONTROL, checkpoints the simulator. -//! +/** If written to SPR_SIM_CONTROL, checkpoints the simulator. */ #define SIM_CONTROL_CHECKPOINT 3 -//! If written to SPR_SIM_CONTROL, combined with a mask (shifted by 8), -//! sets the tracing mask to the given mask. See "sim_set_tracing()". -//! +/** + * If written to SPR_SIM_CONTROL, combined with a mask (shifted by 8), + * sets the tracing mask to the given mask. See "sim_set_tracing()". + */ #define SIM_CONTROL_SET_TRACING 4 -//! If written to SPR_SIM_CONTROL, combined with a mask (shifted by 8), -//! dumps the requested items of machine state to the log. -//! +/** + * If written to SPR_SIM_CONTROL, combined with a mask (shifted by 8), + * dumps the requested items of machine state to the log. + */ #define SIM_CONTROL_DUMP 5 -//! If written to SPR_SIM_CONTROL, clears chip-level profiling counters. -//! +/** If written to SPR_SIM_CONTROL, clears chip-level profiling counters. */ #define SIM_CONTROL_PROFILER_CHIP_CLEAR 6 -//! If written to SPR_SIM_CONTROL, disables chip-level profiling. -//! +/** If written to SPR_SIM_CONTROL, disables chip-level profiling. */ #define SIM_CONTROL_PROFILER_CHIP_DISABLE 7 -//! If written to SPR_SIM_CONTROL, enables chip-level profiling. -//! +/** If written to SPR_SIM_CONTROL, enables chip-level profiling. */ #define SIM_CONTROL_PROFILER_CHIP_ENABLE 8 -//! If written to SPR_SIM_CONTROL, enables chip-level functional mode -//! +/** If written to SPR_SIM_CONTROL, enables chip-level functional mode */ #define SIM_CONTROL_ENABLE_FUNCTIONAL 9 -//! If written to SPR_SIM_CONTROL, disables chip-level functional mode. -//! +/** If written to SPR_SIM_CONTROL, disables chip-level functional mode. */ #define SIM_CONTROL_DISABLE_FUNCTIONAL 10 -//! If written to SPR_SIM_CONTROL, enables chip-level functional mode. -//! All tiles must perform this write for functional mode to be enabled. -//! Ignored in naked boot mode unless --functional is specified. -//! WARNING: Only the hypervisor startup code should use this! -//! +/** + * If written to SPR_SIM_CONTROL, enables chip-level functional mode. + * All tiles must perform this write for functional mode to be enabled. + * Ignored in naked boot mode unless --functional is specified. + * WARNING: Only the hypervisor startup code should use this! + */ #define SIM_CONTROL_ENABLE_FUNCTIONAL_BARRIER 11 -//! If written to SPR_SIM_CONTROL, combined with a character (shifted by 8), -//! writes a string directly to the simulator output. Written to once for -//! each character in the string, plus a final NUL. Instead of NUL, -//! you can also use "SIM_PUTC_FLUSH_STRING" or "SIM_PUTC_FLUSH_BINARY". -//! -// ISSUE: Document the meaning of "newline", and the handling of NUL. -// +/** + * If written to SPR_SIM_CONTROL, combined with a character (shifted by 8), + * writes a string directly to the simulator output. Written to once for + * each character in the string, plus a final NUL. Instead of NUL, + * you can also use "SIM_PUTC_FLUSH_STRING" or "SIM_PUTC_FLUSH_BINARY". + */ +/* ISSUE: Document the meaning of "newline", and the handling of NUL. */ #define SIM_CONTROL_PUTC 12 -//! If written to SPR_SIM_CONTROL, clears the --grind-coherence state for -//! this core. This is intended to be used before a loop that will -//! invalidate the cache by loading new data and evicting all current data. -//! Generally speaking, this API should only be used by system code. -//! +/** + * If written to SPR_SIM_CONTROL, clears the --grind-coherence state for + * this core. This is intended to be used before a loop that will + * invalidate the cache by loading new data and evicting all current data. + * Generally speaking, this API should only be used by system code. + */ #define SIM_CONTROL_GRINDER_CLEAR 13 -//! If written to SPR_SIM_CONTROL, shuts down the simulator. -//! +/** If written to SPR_SIM_CONTROL, shuts down the simulator. */ #define SIM_CONTROL_SHUTDOWN 14 -//! If written to SPR_SIM_CONTROL, combined with a pid (shifted by 8), -//! indicates that a fork syscall just created the given process. -//! +/** + * If written to SPR_SIM_CONTROL, combined with a pid (shifted by 8), + * indicates that a fork syscall just created the given process. + */ #define SIM_CONTROL_OS_FORK 15 -//! If written to SPR_SIM_CONTROL, combined with a pid (shifted by 8), -//! indicates that an exit syscall was just executed by the given process. -//! +/** + * If written to SPR_SIM_CONTROL, combined with a pid (shifted by 8), + * indicates that an exit syscall was just executed by the given process. + */ #define SIM_CONTROL_OS_EXIT 16 -//! If written to SPR_SIM_CONTROL, combined with a pid (shifted by 8), -//! indicates that the OS just switched to the given process. -//! +/** + * If written to SPR_SIM_CONTROL, combined with a pid (shifted by 8), + * indicates that the OS just switched to the given process. + */ #define SIM_CONTROL_OS_SWITCH 17 -//! If written to SPR_SIM_CONTROL, combined with a character (shifted by 8), -//! indicates that an exec syscall was just executed. Written to once for -//! each character in the executable name, plus a final NUL. -//! +/** + * If written to SPR_SIM_CONTROL, combined with a character (shifted by 8), + * indicates that an exec syscall was just executed. Written to once for + * each character in the executable name, plus a final NUL. + */ #define SIM_CONTROL_OS_EXEC 18 -//! If written to SPR_SIM_CONTROL, combined with a character (shifted by 8), -//! indicates that an interpreter (PT_INTERP) was loaded. Written to once -//! for each character in "ADDR:PATH", plus a final NUL, where "ADDR" is a -//! hex load address starting with "0x", and "PATH" is the executable name. -//! +/** + * If written to SPR_SIM_CONTROL, combined with a character (shifted by 8), + * indicates that an interpreter (PT_INTERP) was loaded. Written to once + * for each character in "ADDR:PATH", plus a final NUL, where "ADDR" is a + * hex load address starting with "0x", and "PATH" is the executable name. + */ #define SIM_CONTROL_OS_INTERP 19 -//! If written to SPR_SIM_CONTROL, combined with a character (shifted by 8), -//! indicates that a dll was loaded. Written to once for each character -//! in "ADDR:PATH", plus a final NUL, where "ADDR" is a hexadecimal load -//! address starting with "0x", and "PATH" is the executable name. -//! +/** + * If written to SPR_SIM_CONTROL, combined with a character (shifted by 8), + * indicates that a dll was loaded. Written to once for each character + * in "ADDR:PATH", plus a final NUL, where "ADDR" is a hexadecimal load + * address starting with "0x", and "PATH" is the executable name. + */ #define SIM_CONTROL_DLOPEN 20 -//! If written to SPR_SIM_CONTROL, combined with a character (shifted by 8), -//! indicates that a dll was unloaded. Written to once for each character -//! in "ADDR", plus a final NUL, where "ADDR" is a hexadecimal load -//! address starting with "0x". -//! +/** + * If written to SPR_SIM_CONTROL, combined with a character (shifted by 8), + * indicates that a dll was unloaded. Written to once for each character + * in "ADDR", plus a final NUL, where "ADDR" is a hexadecimal load + * address starting with "0x". + */ #define SIM_CONTROL_DLCLOSE 21 -//! If written to SPR_SIM_CONTROL, combined with a flag (shifted by 8), -//! indicates whether to allow data reads to remotely-cached -//! dirty cache lines to be cached locally without grinder warnings or -//! assertions (used by Linux kernel fast memcpy). -//! +/** + * If written to SPR_SIM_CONTROL, combined with a flag (shifted by 8), + * indicates whether to allow data reads to remotely-cached + * dirty cache lines to be cached locally without grinder warnings or + * assertions (used by Linux kernel fast memcpy). + */ #define SIM_CONTROL_ALLOW_MULTIPLE_CACHING 22 -//! If written to SPR_SIM_CONTROL, enables memory tracing. -//! +/** If written to SPR_SIM_CONTROL, enables memory tracing. */ #define SIM_CONTROL_ENABLE_MEM_LOGGING 23 -//! If written to SPR_SIM_CONTROL, disables memory tracing. -//! +/** If written to SPR_SIM_CONTROL, disables memory tracing. */ #define SIM_CONTROL_DISABLE_MEM_LOGGING 24 -//! If written to SPR_SIM_CONTROL, changes the shaping parameters of one of -//! the gbe or xgbe shims. Must specify the shim id, the type, the units, and -//! the rate, as defined in SIM_SHAPING_SPR_ARG. -//! +/** + * If written to SPR_SIM_CONTROL, changes the shaping parameters of one of + * the gbe or xgbe shims. Must specify the shim id, the type, the units, and + * the rate, as defined in SIM_SHAPING_SPR_ARG. + */ #define SIM_CONTROL_SHAPING 25 -//! If written to SPR_SIM_CONTROL, combined with character (shifted by 8), -//! requests that a simulator command be executed. Written to once for each -//! character in the command, plus a final NUL. -//! +/** + * If written to SPR_SIM_CONTROL, combined with character (shifted by 8), + * requests that a simulator command be executed. Written to once for each + * character in the command, plus a final NUL. + */ #define SIM_CONTROL_COMMAND 26 -//! If written to SPR_SIM_CONTROL, indicates that the simulated system -//! is panicking, to allow debugging via --debug-on-panic. -//! +/** + * If written to SPR_SIM_CONTROL, indicates that the simulated system + * is panicking, to allow debugging via --debug-on-panic. + */ #define SIM_CONTROL_PANIC 27 -//! If written to SPR_SIM_CONTROL, triggers a simulator syscall. -//! See "sim_syscall()" for more info. -//! +/** + * If written to SPR_SIM_CONTROL, triggers a simulator syscall. + * See "sim_syscall()" for more info. + */ #define SIM_CONTROL_SYSCALL 32 -//! If written to SPR_SIM_CONTROL, combined with a pid (shifted by 8), -//! provides the pid that subsequent SIM_CONTROL_OS_FORK writes should -//! use as the pid, rather than the default previous SIM_CONTROL_OS_SWITCH. -//! +/** + * If written to SPR_SIM_CONTROL, combined with a pid (shifted by 8), + * provides the pid that subsequent SIM_CONTROL_OS_FORK writes should + * use as the pid, rather than the default previous SIM_CONTROL_OS_SWITCH. + */ #define SIM_CONTROL_OS_FORK_PARENT 33 -//! If written to SPR_SIM_CONTROL, combined with a mPIPE shim number -//! (shifted by 8), clears the pending magic data section. The cleared -//! pending magic data section and any subsequently appended magic bytes -//! will only take effect when the classifier blast programmer is run. +/** + * If written to SPR_SIM_CONTROL, combined with a mPIPE shim number + * (shifted by 8), clears the pending magic data section. The cleared + * pending magic data section and any subsequently appended magic bytes + * will only take effect when the classifier blast programmer is run. + */ #define SIM_CONTROL_CLEAR_MPIPE_MAGIC_BYTES 34 -//! If written to SPR_SIM_CONTROL, combined with a mPIPE shim number -//! (shifted by 8) and a byte of data (shifted by 16), appends that byte -//! to the shim's pending magic data section. The pending magic data -//! section takes effect when the classifier blast programmer is run. +/** + * If written to SPR_SIM_CONTROL, combined with a mPIPE shim number + * (shifted by 8) and a byte of data (shifted by 16), appends that byte + * to the shim's pending magic data section. The pending magic data + * section takes effect when the classifier blast programmer is run. + */ #define SIM_CONTROL_APPEND_MPIPE_MAGIC_BYTE 35 -//! If written to SPR_SIM_CONTROL, combined with a mPIPE shim number -//! (shifted by 8), an enable=1/disable=0 bit (shifted by 16), and a -//! mask of links (shifted by 32), enable or disable the corresponding -//! mPIPE links. +/** + * If written to SPR_SIM_CONTROL, combined with a mPIPE shim number + * (shifted by 8), an enable=1/disable=0 bit (shifted by 16), and a + * mask of links (shifted by 32), enable or disable the corresponding + * mPIPE links. + */ #define SIM_CONTROL_ENABLE_MPIPE_LINK_MAGIC_BYTE 36 -//== Syscall numbers for use with "sim_syscall()". -//! Syscall number for sim_add_watchpoint(). -//! +/* + * Syscall numbers for use with "sim_syscall()". + */ + +/** Syscall number for sim_add_watchpoint(). */ #define SIM_SYSCALL_ADD_WATCHPOINT 2 -//! Syscall number for sim_remove_watchpoint(). -//! +/** Syscall number for sim_remove_watchpoint(). */ #define SIM_SYSCALL_REMOVE_WATCHPOINT 3 -//! Syscall number for sim_query_watchpoint(). -//! +/** Syscall number for sim_query_watchpoint(). */ #define SIM_SYSCALL_QUERY_WATCHPOINT 4 -//! Syscall number that asserts that the cache lines whose 64-bit PA -//! is passed as the second argument to sim_syscall(), and over a -//! range passed as the third argument, are no longer in cache. -//! The simulator raises an error if this is not the case. -//! +/** + * Syscall number that asserts that the cache lines whose 64-bit PA + * is passed as the second argument to sim_syscall(), and over a + * range passed as the third argument, are no longer in cache. + * The simulator raises an error if this is not the case. + */ #define SIM_SYSCALL_VALIDATE_LINES_EVICTED 5 -//== Bit masks which can be shifted by 8, combined with -//== SIM_CONTROL_SET_TRACING, and written to SPR_SIM_CONTROL. +/* + * Bit masks which can be shifted by 8, combined with + * SIM_CONTROL_SET_TRACING, and written to SPR_SIM_CONTROL. + */ -//! @addtogroup arch_sim -//! @{ +/** + * @addtogroup arch_sim + * @{ + */ -//! Enable --trace-cycle when passed to simulator_set_tracing(). -//! +/** Enable --trace-cycle when passed to simulator_set_tracing(). */ #define SIM_TRACE_CYCLES 0x01 -//! Enable --trace-router when passed to simulator_set_tracing(). -//! +/** Enable --trace-router when passed to simulator_set_tracing(). */ #define SIM_TRACE_ROUTER 0x02 -//! Enable --trace-register-writes when passed to simulator_set_tracing(). -//! +/** Enable --trace-register-writes when passed to simulator_set_tracing(). */ #define SIM_TRACE_REGISTER_WRITES 0x04 -//! Enable --trace-disasm when passed to simulator_set_tracing(). -//! +/** Enable --trace-disasm when passed to simulator_set_tracing(). */ #define SIM_TRACE_DISASM 0x08 -//! Enable --trace-stall-info when passed to simulator_set_tracing(). -//! +/** Enable --trace-stall-info when passed to simulator_set_tracing(). */ #define SIM_TRACE_STALL_INFO 0x10 -//! Enable --trace-memory-controller when passed to simulator_set_tracing(). -//! +/** Enable --trace-memory-controller when passed to simulator_set_tracing(). */ #define SIM_TRACE_MEMORY_CONTROLLER 0x20 -//! Enable --trace-l2 when passed to simulator_set_tracing(). -//! +/** Enable --trace-l2 when passed to simulator_set_tracing(). */ #define SIM_TRACE_L2_CACHE 0x40 -//! Enable --trace-lines when passed to simulator_set_tracing(). -//! +/** Enable --trace-lines when passed to simulator_set_tracing(). */ #define SIM_TRACE_LINES 0x80 -//! Turn off all tracing when passed to simulator_set_tracing(). -//! +/** Turn off all tracing when passed to simulator_set_tracing(). */ #define SIM_TRACE_NONE 0 -//! Turn on all tracing when passed to simulator_set_tracing(). -//! +/** Turn on all tracing when passed to simulator_set_tracing(). */ #define SIM_TRACE_ALL (-1) -//! @} +/** @} */ -//! Computes the value to write to SPR_SIM_CONTROL to set tracing flags. -//! +/** Computes the value to write to SPR_SIM_CONTROL to set tracing flags. */ #define SIM_TRACE_SPR_ARG(mask) \ (SIM_CONTROL_SET_TRACING | ((mask) << _SIM_CONTROL_OPERATOR_BITS)) -//== Bit masks which can be shifted by 8, combined with -//== SIM_CONTROL_DUMP, and written to SPR_SIM_CONTROL. +/* + * Bit masks which can be shifted by 8, combined with + * SIM_CONTROL_DUMP, and written to SPR_SIM_CONTROL. + */ -//! @addtogroup arch_sim -//! @{ +/** + * @addtogroup arch_sim + * @{ + */ -//! Dump the general-purpose registers. -//! +/** Dump the general-purpose registers. */ #define SIM_DUMP_REGS 0x001 -//! Dump the SPRs. -//! +/** Dump the SPRs. */ #define SIM_DUMP_SPRS 0x002 -//! Dump the ITLB. -//! +/** Dump the ITLB. */ #define SIM_DUMP_ITLB 0x004 -//! Dump the DTLB. -//! +/** Dump the DTLB. */ #define SIM_DUMP_DTLB 0x008 -//! Dump the L1 I-cache. -//! +/** Dump the L1 I-cache. */ #define SIM_DUMP_L1I 0x010 -//! Dump the L1 D-cache. -//! +/** Dump the L1 D-cache. */ #define SIM_DUMP_L1D 0x020 -//! Dump the L2 cache. -//! +/** Dump the L2 cache. */ #define SIM_DUMP_L2 0x040 -//! Dump the switch registers. -//! +/** Dump the switch registers. */ #define SIM_DUMP_SNREGS 0x080 -//! Dump the switch ITLB. -//! +/** Dump the switch ITLB. */ #define SIM_DUMP_SNITLB 0x100 -//! Dump the switch L1 I-cache. -//! +/** Dump the switch L1 I-cache. */ #define SIM_DUMP_SNL1I 0x200 -//! Dump the current backtrace. -//! +/** Dump the current backtrace. */ #define SIM_DUMP_BACKTRACE 0x400 -//! Only dump valid lines in caches. -//! +/** Only dump valid lines in caches. */ #define SIM_DUMP_VALID_LINES 0x800 -//! Dump everything that is dumpable. -//! +/** Dump everything that is dumpable. */ #define SIM_DUMP_ALL (-1 & ~SIM_DUMP_VALID_LINES) -// @} +/** @} */ -//! Computes the value to write to SPR_SIM_CONTROL to dump machine state. -//! +/** Computes the value to write to SPR_SIM_CONTROL to dump machine state. */ #define SIM_DUMP_SPR_ARG(mask) \ (SIM_CONTROL_DUMP | ((mask) << _SIM_CONTROL_OPERATOR_BITS)) -//== Bit masks which can be shifted by 8, combined with -//== SIM_CONTROL_PROFILER_CHIP_xxx, and written to SPR_SIM_CONTROL. +/* + * Bit masks which can be shifted by 8, combined with + * SIM_CONTROL_PROFILER_CHIP_xxx, and written to SPR_SIM_CONTROL. + */ -//! @addtogroup arch_sim -//! @{ +/** + * @addtogroup arch_sim + * @{ + */ -//! Use with with SIM_PROFILER_CHIP_xxx to control the memory controllers. -//! +/** Use with with SIM_PROFILER_CHIP_xxx to control the memory controllers. */ #define SIM_CHIP_MEMCTL 0x001 -//! Use with with SIM_PROFILER_CHIP_xxx to control the XAUI interface. -//! +/** Use with with SIM_PROFILER_CHIP_xxx to control the XAUI interface. */ #define SIM_CHIP_XAUI 0x002 -//! Use with with SIM_PROFILER_CHIP_xxx to control the PCIe interface. -//! +/** Use with with SIM_PROFILER_CHIP_xxx to control the PCIe interface. */ #define SIM_CHIP_PCIE 0x004 -//! Use with with SIM_PROFILER_CHIP_xxx to control the MPIPE interface. -//! +/** Use with with SIM_PROFILER_CHIP_xxx to control the MPIPE interface. */ #define SIM_CHIP_MPIPE 0x008 -//! Reference all chip devices. -//! +/** Use with with SIM_PROFILER_CHIP_xxx to control the TRIO interface. */ +#define SIM_CHIP_TRIO 0x010 + +/** Reference all chip devices. */ #define SIM_CHIP_ALL (-1) -//! @} +/** @} */ -//! Computes the value to write to SPR_SIM_CONTROL to clear chip statistics. -//! +/** Computes the value to write to SPR_SIM_CONTROL to clear chip statistics. */ #define SIM_PROFILER_CHIP_CLEAR_SPR_ARG(mask) \ (SIM_CONTROL_PROFILER_CHIP_CLEAR | ((mask) << _SIM_CONTROL_OPERATOR_BITS)) -//! Computes the value to write to SPR_SIM_CONTROL to disable chip statistics. -//! +/** Computes the value to write to SPR_SIM_CONTROL to disable chip statistics.*/ #define SIM_PROFILER_CHIP_DISABLE_SPR_ARG(mask) \ (SIM_CONTROL_PROFILER_CHIP_DISABLE | ((mask) << _SIM_CONTROL_OPERATOR_BITS)) -//! Computes the value to write to SPR_SIM_CONTROL to enable chip statistics. -//! +/** Computes the value to write to SPR_SIM_CONTROL to enable chip statistics. */ #define SIM_PROFILER_CHIP_ENABLE_SPR_ARG(mask) \ (SIM_CONTROL_PROFILER_CHIP_ENABLE | ((mask) << _SIM_CONTROL_OPERATOR_BITS)) -// Shim bitrate controls. +/* Shim bitrate controls. */ -//! The number of bits used to store the shim id. -//! +/** The number of bits used to store the shim id. */ #define SIM_CONTROL_SHAPING_SHIM_ID_BITS 3 -//! @addtogroup arch_sim -//! @{ +/** + * @addtogroup arch_sim + * @{ + */ -//! Change the gbe 0 bitrate. -//! +/** Change the gbe 0 bitrate. */ #define SIM_CONTROL_SHAPING_GBE_0 0x0 -//! Change the gbe 1 bitrate. -//! +/** Change the gbe 1 bitrate. */ #define SIM_CONTROL_SHAPING_GBE_1 0x1 -//! Change the gbe 2 bitrate. -//! +/** Change the gbe 2 bitrate. */ #define SIM_CONTROL_SHAPING_GBE_2 0x2 -//! Change the gbe 3 bitrate. -//! +/** Change the gbe 3 bitrate. */ #define SIM_CONTROL_SHAPING_GBE_3 0x3 -//! Change the xgbe 0 bitrate. -//! +/** Change the xgbe 0 bitrate. */ #define SIM_CONTROL_SHAPING_XGBE_0 0x4 -//! Change the xgbe 1 bitrate. -//! +/** Change the xgbe 1 bitrate. */ #define SIM_CONTROL_SHAPING_XGBE_1 0x5 -//! The type of shaping to do. -//! +/** The type of shaping to do. */ #define SIM_CONTROL_SHAPING_TYPE_BITS 2 -//! Control the multiplier. -//! +/** Control the multiplier. */ #define SIM_CONTROL_SHAPING_MULTIPLIER 0 -//! Control the PPS. -//! +/** Control the PPS. */ #define SIM_CONTROL_SHAPING_PPS 1 -//! Control the BPS. -//! +/** Control the BPS. */ #define SIM_CONTROL_SHAPING_BPS 2 -//! The number of bits for the units for the shaping parameter. -//! +/** The number of bits for the units for the shaping parameter. */ #define SIM_CONTROL_SHAPING_UNITS_BITS 2 -//! Provide a number in single units. -//! +/** Provide a number in single units. */ #define SIM_CONTROL_SHAPING_UNITS_SINGLE 0 -//! Provide a number in kilo units. -//! +/** Provide a number in kilo units. */ #define SIM_CONTROL_SHAPING_UNITS_KILO 1 -//! Provide a number in mega units. -//! +/** Provide a number in mega units. */ #define SIM_CONTROL_SHAPING_UNITS_MEGA 2 -//! Provide a number in giga units. -//! +/** Provide a number in giga units. */ #define SIM_CONTROL_SHAPING_UNITS_GIGA 3 -// @} +/** @} */ -//! How many bits are available for the rate. -//! +/** How many bits are available for the rate. */ #define SIM_CONTROL_SHAPING_RATE_BITS \ (32 - (_SIM_CONTROL_OPERATOR_BITS + \ SIM_CONTROL_SHAPING_SHIM_ID_BITS + \ SIM_CONTROL_SHAPING_TYPE_BITS + \ SIM_CONTROL_SHAPING_UNITS_BITS)) -//! Computes the value to write to SPR_SIM_CONTROL to change a bitrate. -//! +/** Computes the value to write to SPR_SIM_CONTROL to change a bitrate. */ #define SIM_SHAPING_SPR_ARG(shim, type, units, rate) \ (SIM_CONTROL_SHAPING | \ ((shim) | \ @@ -483,30 +467,36 @@ SIM_CONTROL_SHAPING_UNITS_BITS))) << _SIM_CONTROL_OPERATOR_BITS) -//== Values returned when reading SPR_SIM_CONTROL. -// ISSUE: These names should share a longer common prefix. +/* + * Values returned when reading SPR_SIM_CONTROL. + * ISSUE: These names should share a longer common prefix. + */ -//! When reading SPR_SIM_CONTROL, the mask of simulator tracing bits -//! (SIM_TRACE_xxx values). -//! +/** + * When reading SPR_SIM_CONTROL, the mask of simulator tracing bits + * (SIM_TRACE_xxx values). + */ #define SIM_TRACE_FLAG_MASK 0xFFFF -//! When reading SPR_SIM_CONTROL, the mask for whether profiling is enabled. -//! +/** When reading SPR_SIM_CONTROL, the mask for whether profiling is enabled. */ #define SIM_PROFILER_ENABLED_MASK 0x10000 -//== Special arguments for "SIM_CONTROL_PUTC". +/* + * Special arguments for "SIM_CONTROL_PUTC". + */ -//! Flag value for forcing a PUTC string-flush, including -//! coordinate/cycle prefix and newline. -//! +/** + * Flag value for forcing a PUTC string-flush, including + * coordinate/cycle prefix and newline. + */ #define SIM_PUTC_FLUSH_STRING 0x100 -//! Flag value for forcing a PUTC binary-data-flush, which skips the -//! prefix and does not append a newline. -//! +/** + * Flag value for forcing a PUTC binary-data-flush, which skips the + * prefix and does not append a newline. + */ #define SIM_PUTC_FLUSH_BINARY 0x101 -#endif //__ARCH_SIM_DEF_H__ +#endif /* __ARCH_SIM_DEF_H__ */ diff --git a/arch/tile/include/arch/spr_def.h b/arch/tile/include/arch/spr_def.h index c8fdbd9a45e6..442fcba0d122 100644 --- a/arch/tile/include/arch/spr_def.h +++ b/arch/tile/include/arch/spr_def.h @@ -12,8 +12,93 @@ * more details. */ +/* + * In addition to including the proper base SPR definition file, depending + * on machine architecture, this file defines several macros which allow + * kernel code to use protection-level dependent SPRs without worrying + * about which PL it's running at. In these macros, the PL that the SPR + * or interrupt number applies to is replaced by K. + */ + +#if CONFIG_KERNEL_PL != 1 && CONFIG_KERNEL_PL != 2 +#error CONFIG_KERNEL_PL must be 1 or 2 +#endif + +/* Concatenate 4 strings. */ +#define __concat4(a, b, c, d) a ## b ## c ## d +#define _concat4(a, b, c, d) __concat4(a, b, c, d) + #ifdef __tilegx__ #include <arch/spr_def_64.h> + +/* TILE-Gx dependent, protection-level dependent SPRs. */ + +#define SPR_INTERRUPT_MASK_K \ + _concat4(SPR_INTERRUPT_MASK_, CONFIG_KERNEL_PL,,) +#define SPR_INTERRUPT_MASK_SET_K \ + _concat4(SPR_INTERRUPT_MASK_SET_, CONFIG_KERNEL_PL,,) +#define SPR_INTERRUPT_MASK_RESET_K \ + _concat4(SPR_INTERRUPT_MASK_RESET_, CONFIG_KERNEL_PL,,) +#define SPR_INTERRUPT_VECTOR_BASE_K \ + _concat4(SPR_INTERRUPT_VECTOR_BASE_, CONFIG_KERNEL_PL,,) + +#define SPR_IPI_MASK_K \ + _concat4(SPR_IPI_MASK_, CONFIG_KERNEL_PL,,) +#define SPR_IPI_MASK_RESET_K \ + _concat4(SPR_IPI_MASK_RESET_, CONFIG_KERNEL_PL,,) +#define SPR_IPI_MASK_SET_K \ + _concat4(SPR_IPI_MASK_SET_, CONFIG_KERNEL_PL,,) +#define SPR_IPI_EVENT_K \ + _concat4(SPR_IPI_EVENT_, CONFIG_KERNEL_PL,,) +#define SPR_IPI_EVENT_RESET_K \ + _concat4(SPR_IPI_EVENT_RESET_, CONFIG_KERNEL_PL,,) +#define SPR_IPI_MASK_SET_K \ + _concat4(SPR_IPI_MASK_SET_, CONFIG_KERNEL_PL,,) +#define INT_IPI_K \ + _concat4(INT_IPI_, CONFIG_KERNEL_PL,,) + +#define SPR_SINGLE_STEP_CONTROL_K \ + _concat4(SPR_SINGLE_STEP_CONTROL_, CONFIG_KERNEL_PL,,) +#define SPR_SINGLE_STEP_EN_K_K \ + _concat4(SPR_SINGLE_STEP_EN_, CONFIG_KERNEL_PL, _, CONFIG_KERNEL_PL) +#define INT_SINGLE_STEP_K \ + _concat4(INT_SINGLE_STEP_, CONFIG_KERNEL_PL,,) + #else #include <arch/spr_def_32.h> + +/* TILEPro dependent, protection-level dependent SPRs. */ + +#define SPR_INTERRUPT_MASK_K_0 \ + _concat4(SPR_INTERRUPT_MASK_, CONFIG_KERNEL_PL, _0,) +#define SPR_INTERRUPT_MASK_K_1 \ + _concat4(SPR_INTERRUPT_MASK_, CONFIG_KERNEL_PL, _1,) +#define SPR_INTERRUPT_MASK_SET_K_0 \ + _concat4(SPR_INTERRUPT_MASK_SET_, CONFIG_KERNEL_PL, _0,) +#define SPR_INTERRUPT_MASK_SET_K_1 \ + _concat4(SPR_INTERRUPT_MASK_SET_, CONFIG_KERNEL_PL, _1,) +#define SPR_INTERRUPT_MASK_RESET_K_0 \ + _concat4(SPR_INTERRUPT_MASK_RESET_, CONFIG_KERNEL_PL, _0,) +#define SPR_INTERRUPT_MASK_RESET_K_1 \ + _concat4(SPR_INTERRUPT_MASK_RESET_, CONFIG_KERNEL_PL, _1,) + #endif + +/* Generic protection-level dependent SPRs. */ + +#define SPR_SYSTEM_SAVE_K_0 \ + _concat4(SPR_SYSTEM_SAVE_, CONFIG_KERNEL_PL, _0,) +#define SPR_SYSTEM_SAVE_K_1 \ + _concat4(SPR_SYSTEM_SAVE_, CONFIG_KERNEL_PL, _1,) +#define SPR_SYSTEM_SAVE_K_2 \ + _concat4(SPR_SYSTEM_SAVE_, CONFIG_KERNEL_PL, _2,) +#define SPR_SYSTEM_SAVE_K_3 \ + _concat4(SPR_SYSTEM_SAVE_, CONFIG_KERNEL_PL, _3,) +#define SPR_EX_CONTEXT_K_0 \ + _concat4(SPR_EX_CONTEXT_, CONFIG_KERNEL_PL, _0,) +#define SPR_EX_CONTEXT_K_1 \ + _concat4(SPR_EX_CONTEXT_, CONFIG_KERNEL_PL, _1,) +#define SPR_INTCTRL_K_STATUS \ + _concat4(SPR_INTCTRL_, CONFIG_KERNEL_PL, _STATUS,) +#define INT_INTCTRL_K \ + _concat4(INT_INTCTRL_, CONFIG_KERNEL_PL,,) diff --git a/arch/tile/include/arch/spr_def_32.h b/arch/tile/include/arch/spr_def_32.h index b4fc06864df6..bbc1f4c924ee 100644 --- a/arch/tile/include/arch/spr_def_32.h +++ b/arch/tile/include/arch/spr_def_32.h @@ -56,58 +56,93 @@ #define SPR_EX_CONTEXT_1_1__ICS_SHIFT 2 #define SPR_EX_CONTEXT_1_1__ICS_RMASK 0x1 #define SPR_EX_CONTEXT_1_1__ICS_MASK 0x4 +#define SPR_EX_CONTEXT_2_0 0x4605 +#define SPR_EX_CONTEXT_2_1 0x4606 +#define SPR_EX_CONTEXT_2_1__PL_SHIFT 0 +#define SPR_EX_CONTEXT_2_1__PL_RMASK 0x3 +#define SPR_EX_CONTEXT_2_1__PL_MASK 0x3 +#define SPR_EX_CONTEXT_2_1__ICS_SHIFT 2 +#define SPR_EX_CONTEXT_2_1__ICS_RMASK 0x1 +#define SPR_EX_CONTEXT_2_1__ICS_MASK 0x4 #define SPR_FAIL 0x4e09 #define SPR_INTCTRL_0_STATUS 0x4a07 #define SPR_INTCTRL_1_STATUS 0x4807 +#define SPR_INTCTRL_2_STATUS 0x4607 #define SPR_INTERRUPT_CRITICAL_SECTION 0x4e0a #define SPR_INTERRUPT_MASK_0_0 0x4a08 #define SPR_INTERRUPT_MASK_0_1 0x4a09 #define SPR_INTERRUPT_MASK_1_0 0x4809 #define SPR_INTERRUPT_MASK_1_1 0x480a +#define SPR_INTERRUPT_MASK_2_0 0x4608 +#define SPR_INTERRUPT_MASK_2_1 0x4609 #define SPR_INTERRUPT_MASK_RESET_0_0 0x4a0a #define SPR_INTERRUPT_MASK_RESET_0_1 0x4a0b #define SPR_INTERRUPT_MASK_RESET_1_0 0x480b #define SPR_INTERRUPT_MASK_RESET_1_1 0x480c +#define SPR_INTERRUPT_MASK_RESET_2_0 0x460a +#define SPR_INTERRUPT_MASK_RESET_2_1 0x460b #define SPR_INTERRUPT_MASK_SET_0_0 0x4a0c #define SPR_INTERRUPT_MASK_SET_0_1 0x4a0d #define SPR_INTERRUPT_MASK_SET_1_0 0x480d #define SPR_INTERRUPT_MASK_SET_1_1 0x480e +#define SPR_INTERRUPT_MASK_SET_2_0 0x460c +#define SPR_INTERRUPT_MASK_SET_2_1 0x460d #define SPR_MPL_DMA_CPL_SET_0 0x5800 #define SPR_MPL_DMA_CPL_SET_1 0x5801 +#define SPR_MPL_DMA_CPL_SET_2 0x5802 #define SPR_MPL_DMA_NOTIFY_SET_0 0x3800 #define SPR_MPL_DMA_NOTIFY_SET_1 0x3801 +#define SPR_MPL_DMA_NOTIFY_SET_2 0x3802 #define SPR_MPL_INTCTRL_0_SET_0 0x4a00 #define SPR_MPL_INTCTRL_0_SET_1 0x4a01 +#define SPR_MPL_INTCTRL_0_SET_2 0x4a02 #define SPR_MPL_INTCTRL_1_SET_0 0x4800 #define SPR_MPL_INTCTRL_1_SET_1 0x4801 +#define SPR_MPL_INTCTRL_1_SET_2 0x4802 +#define SPR_MPL_INTCTRL_2_SET_0 0x4600 +#define SPR_MPL_INTCTRL_2_SET_1 0x4601 +#define SPR_MPL_INTCTRL_2_SET_2 0x4602 #define SPR_MPL_SN_ACCESS_SET_0 0x0800 #define SPR_MPL_SN_ACCESS_SET_1 0x0801 +#define SPR_MPL_SN_ACCESS_SET_2 0x0802 #define SPR_MPL_SN_CPL_SET_0 0x5a00 #define SPR_MPL_SN_CPL_SET_1 0x5a01 +#define SPR_MPL_SN_CPL_SET_2 0x5a02 #define SPR_MPL_SN_FIREWALL_SET_0 0x2c00 #define SPR_MPL_SN_FIREWALL_SET_1 0x2c01 +#define SPR_MPL_SN_FIREWALL_SET_2 0x2c02 #define SPR_MPL_SN_NOTIFY_SET_0 0x2a00 #define SPR_MPL_SN_NOTIFY_SET_1 0x2a01 +#define SPR_MPL_SN_NOTIFY_SET_2 0x2a02 #define SPR_MPL_UDN_ACCESS_SET_0 0x0c00 #define SPR_MPL_UDN_ACCESS_SET_1 0x0c01 +#define SPR_MPL_UDN_ACCESS_SET_2 0x0c02 #define SPR_MPL_UDN_AVAIL_SET_0 0x4000 #define SPR_MPL_UDN_AVAIL_SET_1 0x4001 +#define SPR_MPL_UDN_AVAIL_SET_2 0x4002 #define SPR_MPL_UDN_CA_SET_0 0x3c00 #define SPR_MPL_UDN_CA_SET_1 0x3c01 +#define SPR_MPL_UDN_CA_SET_2 0x3c02 #define SPR_MPL_UDN_COMPLETE_SET_0 0x1400 #define SPR_MPL_UDN_COMPLETE_SET_1 0x1401 +#define SPR_MPL_UDN_COMPLETE_SET_2 0x1402 #define SPR_MPL_UDN_FIREWALL_SET_0 0x3000 #define SPR_MPL_UDN_FIREWALL_SET_1 0x3001 +#define SPR_MPL_UDN_FIREWALL_SET_2 0x3002 #define SPR_MPL_UDN_REFILL_SET_0 0x1000 #define SPR_MPL_UDN_REFILL_SET_1 0x1001 +#define SPR_MPL_UDN_REFILL_SET_2 0x1002 #define SPR_MPL_UDN_TIMER_SET_0 0x3600 #define SPR_MPL_UDN_TIMER_SET_1 0x3601 +#define SPR_MPL_UDN_TIMER_SET_2 0x3602 #define SPR_MPL_WORLD_ACCESS_SET_0 0x4e00 #define SPR_MPL_WORLD_ACCESS_SET_1 0x4e01 +#define SPR_MPL_WORLD_ACCESS_SET_2 0x4e02 #define SPR_PASS 0x4e0b #define SPR_PERF_COUNT_0 0x4205 #define SPR_PERF_COUNT_1 0x4206 #define SPR_PERF_COUNT_CTL 0x4207 +#define SPR_PERF_COUNT_DN_CTL 0x4210 #define SPR_PERF_COUNT_STS 0x4208 #define SPR_PROC_STATUS 0x4f00 #define SPR_SIM_CONTROL 0x4e0c @@ -124,6 +159,10 @@ #define SPR_SYSTEM_SAVE_1_1 0x4901 #define SPR_SYSTEM_SAVE_1_2 0x4902 #define SPR_SYSTEM_SAVE_1_3 0x4903 +#define SPR_SYSTEM_SAVE_2_0 0x4700 +#define SPR_SYSTEM_SAVE_2_1 0x4701 +#define SPR_SYSTEM_SAVE_2_2 0x4702 +#define SPR_SYSTEM_SAVE_2_3 0x4703 #define SPR_TILE_COORD 0x4c17 #define SPR_TILE_RTF_HWM 0x4e10 #define SPR_TILE_TIMER_CONTROL 0x3205 diff --git a/arch/tile/include/asm/backtrace.h b/arch/tile/include/asm/backtrace.h index 758ca4619d50..f18887d82399 100644 --- a/arch/tile/include/asm/backtrace.h +++ b/arch/tile/include/asm/backtrace.h @@ -146,7 +146,10 @@ enum { CALLER_SP_IN_R52_BASE = 4, - CALLER_SP_OFFSET_BASE = 8 + CALLER_SP_OFFSET_BASE = 8, + + /* Marks the entry point of certain functions. */ + ENTRY_POINT_INFO_OP = 16 }; diff --git a/arch/tile/include/asm/bitops.h b/arch/tile/include/asm/bitops.h index 6832b4be8990..6d4f0ff2c68c 100644 --- a/arch/tile/include/asm/bitops.h +++ b/arch/tile/include/asm/bitops.h @@ -120,6 +120,7 @@ static inline unsigned long __arch_hweight64(__u64 w) #include <asm-generic/bitops/const_hweight.h> #include <asm-generic/bitops/lock.h> +#include <asm-generic/bitops/find.h> #include <asm-generic/bitops/sched.h> #include <asm-generic/bitops/ext2-non-atomic.h> #include <asm-generic/bitops/minix.h> diff --git a/arch/tile/include/asm/cacheflush.h b/arch/tile/include/asm/cacheflush.h index c5741da4eeac..14a3f8556ace 100644 --- a/arch/tile/include/asm/cacheflush.h +++ b/arch/tile/include/asm/cacheflush.h @@ -137,4 +137,56 @@ static inline void finv_buffer(void *buffer, size_t size) mb_incoherent(); } +/* + * Flush & invalidate a VA range that is homed remotely on a single core, + * waiting until the memory controller holds the flushed values. + */ +static inline void finv_buffer_remote(void *buffer, size_t size) +{ + char *p; + int i; + + /* + * Flush and invalidate the buffer out of the local L1/L2 + * and request the home cache to flush and invalidate as well. + */ + __finv_buffer(buffer, size); + + /* + * Wait for the home cache to acknowledge that it has processed + * all the flush-and-invalidate requests. This does not mean + * that the flushed data has reached the memory controller yet, + * but it does mean the home cache is processing the flushes. + */ + __insn_mf(); + + /* + * Issue a load to the last cache line, which can't complete + * until all the previously-issued flushes to the same memory + * controller have also completed. If we weren't striping + * memory, that one load would be sufficient, but since we may + * be, we also need to back up to the last load issued to + * another memory controller, which would be the point where + * we crossed an 8KB boundary (the granularity of striping + * across memory controllers). Keep backing up and doing this + * until we are before the beginning of the buffer, or have + * hit all the controllers. + */ + for (i = 0, p = (char *)buffer + size - 1; + i < (1 << CHIP_LOG_NUM_MSHIMS()) && p >= (char *)buffer; + ++i) { + const unsigned long STRIPE_WIDTH = 8192; + + /* Force a load instruction to issue. */ + *(volatile char *)p; + + /* Jump to end of previous stripe. */ + p -= STRIPE_WIDTH; + p = (char *)((unsigned long)p | (STRIPE_WIDTH - 1)); + } + + /* Wait for the loads (and thus flushes) to have completed. */ + __insn_mf(); +} + #endif /* _ASM_TILE_CACHEFLUSH_H */ diff --git a/arch/tile/include/asm/compat.h b/arch/tile/include/asm/compat.h index 8b60ec8b2d19..c3ae570c0a5d 100644 --- a/arch/tile/include/asm/compat.h +++ b/arch/tile/include/asm/compat.h @@ -216,15 +216,16 @@ struct compat_siginfo; struct compat_sigaltstack; long compat_sys_execve(const char __user *path, const compat_uptr_t __user *argv, - const compat_uptr_t __user *envp); + const compat_uptr_t __user *envp, struct pt_regs *); long compat_sys_rt_sigaction(int sig, struct compat_sigaction __user *act, struct compat_sigaction __user *oact, size_t sigsetsize); long compat_sys_rt_sigqueueinfo(int pid, int sig, struct compat_siginfo __user *uinfo); -long compat_sys_rt_sigreturn(void); +long compat_sys_rt_sigreturn(struct pt_regs *); long compat_sys_sigaltstack(const struct compat_sigaltstack __user *uss_ptr, - struct compat_sigaltstack __user *uoss_ptr); + struct compat_sigaltstack __user *uoss_ptr, + struct pt_regs *); long compat_sys_truncate64(char __user *filename, u32 dummy, u32 low, u32 high); long compat_sys_ftruncate64(unsigned int fd, u32 dummy, u32 low, u32 high); long compat_sys_pread64(unsigned int fd, char __user *ubuf, size_t count, @@ -255,4 +256,12 @@ long tile_compat_sys_ptrace(compat_long_t request, compat_long_t pid, /* Tilera Linux syscalls that don't have "compat" versions. */ #define compat_sys_flush_cache sys_flush_cache +/* These are the intvec_64.S trampolines. */ +long _compat_sys_execve(const char __user *path, + const compat_uptr_t __user *argv, + const compat_uptr_t __user *envp); +long _compat_sys_sigaltstack(const struct compat_sigaltstack __user *uss_ptr, + struct compat_sigaltstack __user *uoss_ptr); +long _compat_sys_rt_sigreturn(void); + #endif /* _ASM_TILE_COMPAT_H */ diff --git a/arch/tile/include/asm/highmem.h b/arch/tile/include/asm/highmem.h index d155db6fa9bd..b2a6c5de79ab 100644 --- a/arch/tile/include/asm/highmem.h +++ b/arch/tile/include/asm/highmem.h @@ -23,7 +23,6 @@ #include <linux/interrupt.h> #include <linux/threads.h> -#include <asm/kmap_types.h> #include <asm/tlbflush.h> #include <asm/homecache.h> @@ -60,12 +59,12 @@ void *kmap_fix_kpte(struct page *page, int finished); /* This macro is used only in map_new_virtual() to map "page". */ #define kmap_prot page_to_kpgprot(page) -void kunmap_atomic_notypecheck(void *kvaddr, enum km_type type); -void *kmap_atomic_pfn(unsigned long pfn, enum km_type type); -void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot); +void *__kmap_atomic(struct page *page); +void __kunmap_atomic(void *kvaddr); +void *kmap_atomic_pfn(unsigned long pfn); +void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot); struct page *kmap_atomic_to_page(void *ptr); -void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot); -void *kmap_atomic(struct page *page, enum km_type type); +void *kmap_atomic_prot(struct page *page, pgprot_t prot); void kmap_atomic_fix_kpte(struct page *page, int finished); #define flush_cache_kmaps() do { } while (0) diff --git a/arch/tile/include/asm/io.h b/arch/tile/include/asm/io.h index ee43328713ab..d3cbb9b14cbe 100644 --- a/arch/tile/include/asm/io.h +++ b/arch/tile/include/asm/io.h @@ -55,9 +55,6 @@ extern void iounmap(volatile void __iomem *addr); #define ioremap_writethrough(physaddr, size) ioremap(physaddr, size) #define ioremap_fullcache(physaddr, size) ioremap(physaddr, size) -void __iomem *ioport_map(unsigned long port, unsigned int len); -extern inline void ioport_unmap(void __iomem *addr) {} - #define mmiowb() /* Conversion between virtual and physical mappings. */ @@ -189,12 +186,22 @@ static inline void memcpy_toio(volatile void __iomem *dst, const void *src, * we never run, uses them unconditionally. */ -static inline int ioport_panic(void) +static inline long ioport_panic(void) { panic("inb/outb and friends do not exist on tile"); return 0; } +static inline void __iomem *ioport_map(unsigned long port, unsigned int len) +{ + return (void __iomem *) ioport_panic(); +} + +static inline void ioport_unmap(void __iomem *addr) +{ + ioport_panic(); +} + static inline u8 inb(unsigned long addr) { return ioport_panic(); diff --git a/arch/tile/include/asm/irqflags.h b/arch/tile/include/asm/irqflags.h index a11d4837ee4d..641e4ff3d805 100644 --- a/arch/tile/include/asm/irqflags.h +++ b/arch/tile/include/asm/irqflags.h @@ -47,53 +47,53 @@ int __n = (n); \ int __mask = 1 << (__n & 0x1f); \ if (__n < 32) \ - __insn_mtspr(SPR_INTERRUPT_MASK_SET_1_0, __mask); \ + __insn_mtspr(SPR_INTERRUPT_MASK_SET_K_0, __mask); \ else \ - __insn_mtspr(SPR_INTERRUPT_MASK_SET_1_1, __mask); \ + __insn_mtspr(SPR_INTERRUPT_MASK_SET_K_1, __mask); \ } while (0) #define interrupt_mask_reset(n) do { \ int __n = (n); \ int __mask = 1 << (__n & 0x1f); \ if (__n < 32) \ - __insn_mtspr(SPR_INTERRUPT_MASK_RESET_1_0, __mask); \ + __insn_mtspr(SPR_INTERRUPT_MASK_RESET_K_0, __mask); \ else \ - __insn_mtspr(SPR_INTERRUPT_MASK_RESET_1_1, __mask); \ + __insn_mtspr(SPR_INTERRUPT_MASK_RESET_K_1, __mask); \ } while (0) #define interrupt_mask_check(n) ({ \ int __n = (n); \ (((__n < 32) ? \ - __insn_mfspr(SPR_INTERRUPT_MASK_1_0) : \ - __insn_mfspr(SPR_INTERRUPT_MASK_1_1)) \ + __insn_mfspr(SPR_INTERRUPT_MASK_K_0) : \ + __insn_mfspr(SPR_INTERRUPT_MASK_K_1)) \ >> (__n & 0x1f)) & 1; \ }) #define interrupt_mask_set_mask(mask) do { \ unsigned long long __m = (mask); \ - __insn_mtspr(SPR_INTERRUPT_MASK_SET_1_0, (unsigned long)(__m)); \ - __insn_mtspr(SPR_INTERRUPT_MASK_SET_1_1, (unsigned long)(__m>>32)); \ + __insn_mtspr(SPR_INTERRUPT_MASK_SET_K_0, (unsigned long)(__m)); \ + __insn_mtspr(SPR_INTERRUPT_MASK_SET_K_1, (unsigned long)(__m>>32)); \ } while (0) #define interrupt_mask_reset_mask(mask) do { \ unsigned long long __m = (mask); \ - __insn_mtspr(SPR_INTERRUPT_MASK_RESET_1_0, (unsigned long)(__m)); \ - __insn_mtspr(SPR_INTERRUPT_MASK_RESET_1_1, (unsigned long)(__m>>32)); \ + __insn_mtspr(SPR_INTERRUPT_MASK_RESET_K_0, (unsigned long)(__m)); \ + __insn_mtspr(SPR_INTERRUPT_MASK_RESET_K_1, (unsigned long)(__m>>32)); \ } while (0) #else #define interrupt_mask_set(n) \ - __insn_mtspr(SPR_INTERRUPT_MASK_SET_1, (1UL << (n))) + __insn_mtspr(SPR_INTERRUPT_MASK_SET_K, (1UL << (n))) #define interrupt_mask_reset(n) \ - __insn_mtspr(SPR_INTERRUPT_MASK_RESET_1, (1UL << (n))) + __insn_mtspr(SPR_INTERRUPT_MASK_RESET_K, (1UL << (n))) #define interrupt_mask_check(n) \ - ((__insn_mfspr(SPR_INTERRUPT_MASK_1) >> (n)) & 1) + ((__insn_mfspr(SPR_INTERRUPT_MASK_K) >> (n)) & 1) #define interrupt_mask_set_mask(mask) \ - __insn_mtspr(SPR_INTERRUPT_MASK_SET_1, (mask)) + __insn_mtspr(SPR_INTERRUPT_MASK_SET_K, (mask)) #define interrupt_mask_reset_mask(mask) \ - __insn_mtspr(SPR_INTERRUPT_MASK_RESET_1, (mask)) + __insn_mtspr(SPR_INTERRUPT_MASK_RESET_K, (mask)) #endif /* * The set of interrupts we want active if irqs are enabled. * Note that in particular, the tile timer interrupt comes and goes * from this set, since we have no other way to turn off the timer. - * Likewise, INTCTRL_1 is removed and re-added during device + * Likewise, INTCTRL_K is removed and re-added during device * interrupts, as is the the hardwall UDN_FIREWALL interrupt. * We use a low bit (MEM_ERROR) as our sentinel value and make sure it * is always claimed as an "active interrupt" so we can query that bit @@ -170,14 +170,14 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask); /* Return 0 or 1 to indicate whether interrupts are currently disabled. */ #define IRQS_DISABLED(tmp) \ - mfspr tmp, INTERRUPT_MASK_1; \ + mfspr tmp, SPR_INTERRUPT_MASK_K; \ andi tmp, tmp, 1 /* Load up a pointer to &interrupts_enabled_mask. */ #define GET_INTERRUPTS_ENABLED_MASK_PTR(reg) \ - moveli reg, hw2_last(interrupts_enabled_mask); \ - shl16insli reg, reg, hw1(interrupts_enabled_mask); \ - shl16insli reg, reg, hw0(interrupts_enabled_mask); \ + moveli reg, hw2_last(interrupts_enabled_mask); \ + shl16insli reg, reg, hw1(interrupts_enabled_mask); \ + shl16insli reg, reg, hw0(interrupts_enabled_mask); \ add reg, reg, tp /* Disable interrupts. */ @@ -185,18 +185,18 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask); moveli tmp0, hw2_last(LINUX_MASKABLE_INTERRUPTS); \ shl16insli tmp0, tmp0, hw1(LINUX_MASKABLE_INTERRUPTS); \ shl16insli tmp0, tmp0, hw0(LINUX_MASKABLE_INTERRUPTS); \ - mtspr INTERRUPT_MASK_SET_1, tmp0 + mtspr SPR_INTERRUPT_MASK_SET_K, tmp0 /* Disable ALL synchronous interrupts (used by NMI entry). */ #define IRQ_DISABLE_ALL(tmp) \ movei tmp, -1; \ - mtspr INTERRUPT_MASK_SET_1, tmp + mtspr SPR_INTERRUPT_MASK_SET_K, tmp /* Enable interrupts. */ #define IRQ_ENABLE(tmp0, tmp1) \ GET_INTERRUPTS_ENABLED_MASK_PTR(tmp0); \ ld tmp0, tmp0; \ - mtspr INTERRUPT_MASK_RESET_1, tmp0 + mtspr SPR_INTERRUPT_MASK_RESET_K, tmp0 #else /* !__tilegx__ */ @@ -210,14 +210,14 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask); * (making the original code's write of the "high" mask word idempotent). */ #define IRQS_DISABLED(tmp) \ - mfspr tmp, INTERRUPT_MASK_1_0; \ + mfspr tmp, SPR_INTERRUPT_MASK_K_0; \ shri tmp, tmp, INT_MEM_ERROR; \ andi tmp, tmp, 1 /* Load up a pointer to &interrupts_enabled_mask. */ #define GET_INTERRUPTS_ENABLED_MASK_PTR(reg) \ - moveli reg, lo16(interrupts_enabled_mask); \ - auli reg, reg, ha16(interrupts_enabled_mask);\ + moveli reg, lo16(interrupts_enabled_mask); \ + auli reg, reg, ha16(interrupts_enabled_mask); \ add reg, reg, tp /* Disable interrupts. */ @@ -227,16 +227,16 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask); moveli tmp1, lo16(LINUX_MASKABLE_INTERRUPTS) \ }; \ { \ - mtspr INTERRUPT_MASK_SET_1_0, tmp0; \ + mtspr SPR_INTERRUPT_MASK_SET_K_0, tmp0; \ auli tmp1, tmp1, ha16(LINUX_MASKABLE_INTERRUPTS) \ }; \ - mtspr INTERRUPT_MASK_SET_1_1, tmp1 + mtspr SPR_INTERRUPT_MASK_SET_K_1, tmp1 /* Disable ALL synchronous interrupts (used by NMI entry). */ #define IRQ_DISABLE_ALL(tmp) \ movei tmp, -1; \ - mtspr INTERRUPT_MASK_SET_1_0, tmp; \ - mtspr INTERRUPT_MASK_SET_1_1, tmp + mtspr SPR_INTERRUPT_MASK_SET_K_0, tmp; \ + mtspr SPR_INTERRUPT_MASK_SET_K_1, tmp /* Enable interrupts. */ #define IRQ_ENABLE(tmp0, tmp1) \ @@ -246,8 +246,8 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask); addi tmp1, tmp0, 4 \ }; \ lw tmp1, tmp1; \ - mtspr INTERRUPT_MASK_RESET_1_0, tmp0; \ - mtspr INTERRUPT_MASK_RESET_1_1, tmp1 + mtspr SPR_INTERRUPT_MASK_RESET_K_0, tmp0; \ + mtspr SPR_INTERRUPT_MASK_RESET_K_1, tmp1 #endif /* diff --git a/arch/tile/include/asm/kmap_types.h b/arch/tile/include/asm/kmap_types.h index 1480106d1c05..3d0f20246260 100644 --- a/arch/tile/include/asm/kmap_types.h +++ b/arch/tile/include/asm/kmap_types.h @@ -16,28 +16,42 @@ #define _ASM_TILE_KMAP_TYPES_H /* - * In TILE Linux each set of four of these uses another 16MB chunk of - * address space, given 64 tiles and 64KB pages, so we only enable - * ones that are required by the kernel configuration. + * In 32-bit TILE Linux we have to balance the desire to have a lot of + * nested atomic mappings with the fact that large page sizes and many + * processors chew up address space quickly. In a typical + * 64-processor, 64KB-page layout build, making KM_TYPE_NR one larger + * adds 4MB of required address-space. For now we leave KM_TYPE_NR + * set to depth 8. */ enum km_type { + KM_TYPE_NR = 8 +}; + +/* + * We provide dummy definitions of all the stray values that used to be + * required for kmap_atomic() and no longer are. + */ +enum { KM_BOUNCE_READ, KM_SKB_SUNRPC_DATA, KM_SKB_DATA_SOFTIRQ, KM_USER0, KM_USER1, KM_BIO_SRC_IRQ, + KM_BIO_DST_IRQ, + KM_PTE0, + KM_PTE1, KM_IRQ0, KM_IRQ1, KM_SOFTIRQ0, KM_SOFTIRQ1, - KM_MEMCPY0, - KM_MEMCPY1, -#if defined(CONFIG_HIGHPTE) - KM_PTE0, - KM_PTE1, -#endif - KM_TYPE_NR + KM_SYNC_ICACHE, + KM_SYNC_DCACHE, + KM_UML_USERCOPY, + KM_IRQ_PTE, + KM_NMI, + KM_NMI_PTE, + KM_KDB }; #endif /* _ASM_TILE_KMAP_TYPES_H */ diff --git a/arch/tile/include/asm/mman.h b/arch/tile/include/asm/mman.h index 4c6811e3e8dc..81b8fc348d63 100644 --- a/arch/tile/include/asm/mman.h +++ b/arch/tile/include/asm/mman.h @@ -23,6 +23,7 @@ #define MAP_POPULATE 0x0040 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x0080 /* do not block on IO */ #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ +#define MAP_STACK MAP_GROWSDOWN /* provide convenience alias */ #define MAP_LOCKED 0x0200 /* pages are locked */ #define MAP_NORESERVE 0x0400 /* don't check for reservations */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ diff --git a/arch/tile/include/asm/page.h b/arch/tile/include/asm/page.h index 7d90641cf18d..7979a45430d3 100644 --- a/arch/tile/include/asm/page.h +++ b/arch/tile/include/asm/page.h @@ -199,17 +199,17 @@ static inline __attribute_const__ int get_order(unsigned long size) * If you want more physical memory than this then see the CONFIG_HIGHMEM * option in the kernel configuration. * - * The top two 16MB chunks in the table below (VIRT and HV) are - * unavailable to Linux. Since the kernel interrupt vectors must live - * at 0xfd000000, we map all of the bottom of RAM at this address with - * a huge page table entry to minimize its ITLB footprint (as well as - * at PAGE_OFFSET). The last architected requirement is that user - * interrupt vectors live at 0xfc000000, so we make that range of - * memory available to user processes. The remaining regions are sized - * as shown; after the first four addresses, we show "typical" values, - * since the actual addresses depend on kernel #defines. + * The top 16MB chunk in the table below is unavailable to Linux. Since + * the kernel interrupt vectors must live at ether 0xfe000000 or 0xfd000000 + * (depending on whether the kernel is at PL2 or Pl1), we map all of the + * bottom of RAM at this address with a huge page table entry to minimize + * its ITLB footprint (as well as at PAGE_OFFSET). The last architected + * requirement is that user interrupt vectors live at 0xfc000000, so we + * make that range of memory available to user processes. The remaining + * regions are sized as shown; the first four addresses use the PL 1 + * values, and after that, we show "typical" values, since the actual + * addresses depend on kernel #defines. * - * MEM_VIRT_INTRPT 0xff000000 * MEM_HV_INTRPT 0xfe000000 * MEM_SV_INTRPT (kernel code) 0xfd000000 * MEM_USER_INTRPT (user vector) 0xfc000000 @@ -221,9 +221,14 @@ static inline __attribute_const__ int get_order(unsigned long size) */ #define MEM_USER_INTRPT _AC(0xfc000000, UL) +#if CONFIG_KERNEL_PL == 1 #define MEM_SV_INTRPT _AC(0xfd000000, UL) #define MEM_HV_INTRPT _AC(0xfe000000, UL) -#define MEM_VIRT_INTRPT _AC(0xff000000, UL) +#else +#define MEM_GUEST_INTRPT _AC(0xfd000000, UL) +#define MEM_SV_INTRPT _AC(0xfe000000, UL) +#define MEM_HV_INTRPT _AC(0xff000000, UL) +#endif #define INTRPT_SIZE 0x4000 diff --git a/arch/tile/include/asm/pci-bridge.h b/arch/tile/include/asm/pci-bridge.h deleted file mode 100644 index e853b0e2793b..000000000000 --- a/arch/tile/include/asm/pci-bridge.h +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright 2010 Tilera Corporation. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, version 2. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for - * more details. - */ - -#ifndef _ASM_TILE_PCI_BRIDGE_H -#define _ASM_TILE_PCI_BRIDGE_H - -#include <linux/ioport.h> -#include <linux/pci.h> - -struct device_node; -struct pci_controller; - -/* - * pci_io_base returns the memory address at which you can access - * the I/O space for PCI bus number `bus' (or NULL on error). - */ -extern void __iomem *pci_bus_io_base(unsigned int bus); -extern unsigned long pci_bus_io_base_phys(unsigned int bus); -extern unsigned long pci_bus_mem_base_phys(unsigned int bus); - -/* Allocate a new PCI host bridge structure */ -extern struct pci_controller *pcibios_alloc_controller(void); - -/* Helper function for setting up resources */ -extern void pci_init_resource(struct resource *res, unsigned long start, - unsigned long end, int flags, char *name); - -/* Get the PCI host controller for a bus */ -extern struct pci_controller *pci_bus_to_hose(int bus); - -/* - * Structure of a PCI controller (host bridge) - */ -struct pci_controller { - int index; /* PCI domain number */ - struct pci_bus *root_bus; - - int first_busno; - int last_busno; - - int hv_cfg_fd[2]; /* config{0,1} fds for this PCIe controller */ - int hv_mem_fd; /* fd to Hypervisor for MMIO operations */ - - struct pci_ops *ops; - - int irq_base; /* Base IRQ from the Hypervisor */ - int plx_gen1; /* flag for PLX Gen 1 configuration */ - - /* Address ranges that are routed to this controller/bridge. */ - struct resource mem_resources[3]; -}; - -static inline struct pci_controller *pci_bus_to_host(struct pci_bus *bus) -{ - return bus->sysdata; -} - -extern void setup_indirect_pci_nomap(struct pci_controller *hose, - void __iomem *cfg_addr, void __iomem *cfg_data); -extern void setup_indirect_pci(struct pci_controller *hose, - u32 cfg_addr, u32 cfg_data); -extern void setup_grackle(struct pci_controller *hose); - -extern unsigned char common_swizzle(struct pci_dev *, unsigned char *); - -/* - * The following code swizzles for exactly one bridge. The routine - * common_swizzle below handles multiple bridges. But there are a - * some boards that don't follow the PCI spec's suggestion so we - * break this piece out separately. - */ -static inline unsigned char bridge_swizzle(unsigned char pin, - unsigned char idsel) -{ - return (((pin-1) + idsel) % 4) + 1; -} - -/* - * The following macro is used to lookup irqs in a standard table - * format for those PPC systems that do not already have PCI - * interrupts properly routed. - */ -/* FIXME - double check this */ -#define PCI_IRQ_TABLE_LOOKUP ({ \ - long _ctl_ = -1; \ - if (idsel >= min_idsel && idsel <= max_idsel && pin <= irqs_per_slot) \ - _ctl_ = pci_irq_table[idsel - min_idsel][pin-1]; \ - _ctl_; \ -}) - -/* - * Scan the buses below a given PCI host bridge and assign suitable - * resources to all devices found. - */ -extern int pciauto_bus_scan(struct pci_controller *, int); - -#ifdef CONFIG_PCI -extern unsigned long pci_address_to_pio(phys_addr_t address); -#else -static inline unsigned long pci_address_to_pio(phys_addr_t address) -{ - return (unsigned long)-1; -} -#endif - -#endif /* _ASM_TILE_PCI_BRIDGE_H */ diff --git a/arch/tile/include/asm/pci.h b/arch/tile/include/asm/pci.h index b0c15da2d5d5..c3fc458a0d32 100644 --- a/arch/tile/include/asm/pci.h +++ b/arch/tile/include/asm/pci.h @@ -15,7 +15,29 @@ #ifndef _ASM_TILE_PCI_H #define _ASM_TILE_PCI_H -#include <asm/pci-bridge.h> +#include <linux/pci.h> + +/* + * Structure of a PCI controller (host bridge) + */ +struct pci_controller { + int index; /* PCI domain number */ + struct pci_bus *root_bus; + + int first_busno; + int last_busno; + + int hv_cfg_fd[2]; /* config{0,1} fds for this PCIe controller */ + int hv_mem_fd; /* fd to Hypervisor for MMIO operations */ + + struct pci_ops *ops; + + int irq_base; /* Base IRQ from the Hypervisor */ + int plx_gen1; /* flag for PLX Gen 1 configuration */ + + /* Address ranges that are routed to this controller/bridge. */ + struct resource mem_resources[3]; +}; /* * The hypervisor maps the entirety of CPA-space as bus addresses, so @@ -24,56 +46,12 @@ */ #define PCI_DMA_BUS_IS_PHYS 1 -struct pci_controller *pci_bus_to_hose(int bus); -unsigned char __init common_swizzle(struct pci_dev *dev, unsigned char *pinp); int __init tile_pci_init(void); -void pci_iounmap(struct pci_dev *dev, void __iomem *addr); -void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max); -void __devinit pcibios_fixup_bus(struct pci_bus *bus); -int __devinit _tile_cfg_read(struct pci_controller *hose, - int bus, - int slot, - int function, - int offset, - int size, - u32 *val); -int __devinit _tile_cfg_write(struct pci_controller *hose, - int bus, - int slot, - int function, - int offset, - int size, - u32 val); +void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max); +static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) {} -/* - * These are used to to config reads and writes in the early stages of - * setup before the driver infrastructure has been set up enough to be - * able to do config reads and writes. - */ -#define early_cfg_read(where, size, value) \ - _tile_cfg_read(controller, \ - current_bus, \ - pci_slot, \ - pci_fn, \ - where, \ - size, \ - value) - -#define early_cfg_write(where, size, value) \ - _tile_cfg_write(controller, \ - current_bus, \ - pci_slot, \ - pci_fn, \ - where, \ - size, \ - value) - - - -#define PCICFG_BYTE 1 -#define PCICFG_WORD 2 -#define PCICFG_DWORD 4 +void __devinit pcibios_fixup_bus(struct pci_bus *bus); #define TILE_NUM_PCIE 2 @@ -88,33 +66,33 @@ static inline int pci_proc_domain(struct pci_bus *bus) } /* - * I/O space is currently not supported. + * pcibios_assign_all_busses() tells whether or not the bus numbers + * should be reassigned, in case the BIOS didn't do it correctly, or + * in case we don't have a BIOS and we want to let Linux do it. */ +static inline int pcibios_assign_all_busses(void) +{ + return 1; +} -#define TILE_PCIE_LOWER_IO 0x0 -#define TILE_PCIE_UPPER_IO 0x10000 -#define TILE_PCIE_PCIE_IO_SIZE 0x0000FFFF - -#define _PAGE_NO_CACHE 0 -#define _PAGE_GUARDED 0 - - -#define pcibios_assign_all_busses() pci_assign_all_buses -extern int pci_assign_all_buses; - +/* + * No special bus mastering setup handling. + */ static inline void pcibios_set_master(struct pci_dev *dev) { - /* No special bus mastering setup handling */ } #define PCIBIOS_MIN_MEM 0 -#define PCIBIOS_MIN_IO TILE_PCIE_LOWER_IO +#define PCIBIOS_MIN_IO 0 /* * This flag tells if the platform is TILEmpower that needs * special configuration for the PLX switch chip. */ -extern int blade_pci; +extern int tile_plx_gen1; + +/* Use any cpu for PCI. */ +#define cpumask_of_pcibus(bus) cpu_online_mask /* implement the pci_ DMA API in terms of the generic device dma_ one */ #include <asm-generic/pci-dma-compat.h> @@ -122,7 +100,4 @@ extern int blade_pci; /* generic pci stuff */ #include <asm-generic/pci.h> -/* Use any cpu for PCI. */ -#define cpumask_of_pcibus(bus) cpu_online_mask - #endif /* _ASM_TILE_PCI_H */ diff --git a/arch/tile/include/asm/pgtable.h b/arch/tile/include/asm/pgtable.h index b3367379d537..a6604e9485da 100644 --- a/arch/tile/include/asm/pgtable.h +++ b/arch/tile/include/asm/pgtable.h @@ -344,18 +344,11 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) #define pgd_offset_k(address) pgd_offset(&init_mm, address) #if defined(CONFIG_HIGHPTE) -extern pte_t *_pte_offset_map(pmd_t *, unsigned long address, enum km_type); -#define pte_offset_map(dir, address) \ - _pte_offset_map(dir, address, KM_PTE0) -#define pte_offset_map_nested(dir, address) \ - _pte_offset_map(dir, address, KM_PTE1) -#define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0) -#define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1) +extern pte_t *pte_offset_map(pmd_t *, unsigned long address); +#define pte_unmap(pte) kunmap_atomic(pte) #else #define pte_offset_map(dir, address) pte_offset_kernel(dir, address) -#define pte_offset_map_nested(dir, address) pte_offset_map(dir, address) #define pte_unmap(pte) do { } while (0) -#define pte_unmap_nested(pte) do { } while (0) #endif /* Clear a non-executable kernel PTE and flush it from the TLB. */ diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h index ccd5f8425688..a9e7c8760334 100644 --- a/arch/tile/include/asm/processor.h +++ b/arch/tile/include/asm/processor.h @@ -292,8 +292,18 @@ extern int kstack_hash; /* Are we using huge pages in the TLB for kernel data? */ extern int kdata_huge; +/* Support standard Linux prefetching. */ +#define ARCH_HAS_PREFETCH +#define prefetch(x) __builtin_prefetch(x) #define PREFETCH_STRIDE CHIP_L2_LINE_SIZE() +/* Bring a value into the L1D, faulting the TLB if necessary. */ +#ifdef __tilegx__ +#define prefetch_L1(x) __insn_prefetch_l1_fault((void *)(x)) +#else +#define prefetch_L1(x) __insn_prefetch_L1((void *)(x)) +#endif + #else /* __ASSEMBLY__ */ /* Do some slow action (e.g. read a slow SPR). */ @@ -328,18 +338,21 @@ extern int kdata_huge; * Note that assembly code assumes that USER_PL is zero. */ #define USER_PL 0 -#define KERNEL_PL 1 +#if CONFIG_KERNEL_PL == 2 +#define GUEST_PL 1 +#endif +#define KERNEL_PL CONFIG_KERNEL_PL -/* SYSTEM_SAVE_1_0 holds the current cpu number ORed with ksp0. */ +/* SYSTEM_SAVE_K_0 holds the current cpu number ORed with ksp0. */ #define CPU_LOG_MASK_VALUE 12 #define CPU_MASK_VALUE ((1 << CPU_LOG_MASK_VALUE) - 1) #if CONFIG_NR_CPUS > CPU_MASK_VALUE # error Too many cpus! #endif #define raw_smp_processor_id() \ - ((int)__insn_mfspr(SPR_SYSTEM_SAVE_1_0) & CPU_MASK_VALUE) + ((int)__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & CPU_MASK_VALUE) #define get_current_ksp0() \ - (__insn_mfspr(SPR_SYSTEM_SAVE_1_0) & ~CPU_MASK_VALUE) + (__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & ~CPU_MASK_VALUE) #define next_current_ksp0(task) ({ \ unsigned long __ksp0 = task_ksp0(task); \ int __cpu = raw_smp_processor_id(); \ diff --git a/arch/tile/include/asm/ptrace.h b/arch/tile/include/asm/ptrace.h index 4a02bb073979..ac6d343129d3 100644 --- a/arch/tile/include/asm/ptrace.h +++ b/arch/tile/include/asm/ptrace.h @@ -62,8 +62,8 @@ struct pt_regs { pt_reg_t lr; /* aliases regs[TREG_LR] */ /* Saved special registers. */ - pt_reg_t pc; /* stored in EX_CONTEXT_1_0 */ - pt_reg_t ex1; /* stored in EX_CONTEXT_1_1 (PL and ICS bit) */ + pt_reg_t pc; /* stored in EX_CONTEXT_K_0 */ + pt_reg_t ex1; /* stored in EX_CONTEXT_K_1 (PL and ICS bit) */ pt_reg_t faultnum; /* fault number (INT_SWINT_1 for syscall) */ pt_reg_t orig_r0; /* r0 at syscall entry, else zero */ pt_reg_t flags; /* flags (see below) */ diff --git a/arch/tile/include/asm/signal.h b/arch/tile/include/asm/signal.h index c1ee1d61d44c..81d92a45cd4b 100644 --- a/arch/tile/include/asm/signal.h +++ b/arch/tile/include/asm/signal.h @@ -25,7 +25,7 @@ #if defined(__KERNEL__) && !defined(__ASSEMBLY__) struct pt_regs; -int restore_sigcontext(struct pt_regs *, struct sigcontext __user *, long *); +int restore_sigcontext(struct pt_regs *, struct sigcontext __user *); int setup_sigcontext(struct sigcontext __user *, struct pt_regs *); void do_signal(struct pt_regs *regs); #endif diff --git a/arch/tile/include/asm/stat.h b/arch/tile/include/asm/stat.h index 3dc90fa92c70..b16e5db8f0e7 100644 --- a/arch/tile/include/asm/stat.h +++ b/arch/tile/include/asm/stat.h @@ -1 +1,4 @@ +#ifdef CONFIG_COMPAT +#define __ARCH_WANT_STAT64 /* Used for compat_sys_stat64() etc. */ +#endif #include <asm-generic/stat.h> diff --git a/arch/tile/include/asm/syscalls.h b/arch/tile/include/asm/syscalls.h index ce99ffefeacf..3b5507c31eae 100644 --- a/arch/tile/include/asm/syscalls.h +++ b/arch/tile/include/asm/syscalls.h @@ -32,8 +32,9 @@ extern void *compat_sys_call_table[]; /* * Note that by convention, any syscall which requires the current - * register set takes an additional "struct pt_regs *" pointer; the - * sys_xxx() function just adds the pointer and tail-calls to _sys_xxx(). + * register set takes an additional "struct pt_regs *" pointer; a + * _sys_xxx() trampoline in intvec*.S just sets up the pointer and + * jumps to sys_xxx(). */ /* kernel/sys.c */ @@ -43,66 +44,17 @@ long sys32_fadvise64(int fd, u32 offset_lo, u32 offset_hi, int sys32_fadvise64_64(int fd, u32 offset_lo, u32 offset_hi, u32 len_lo, u32 len_hi, int advice); long sys_flush_cache(void); -long sys_mmap2(unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff); -#ifdef __tilegx__ -long sys_mmap(unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, off_t pgoff); +#ifndef __tilegx__ /* No mmap() in the 32-bit kernel. */ +#define sys_mmap sys_mmap #endif -/* kernel/process.c */ -long sys_clone(unsigned long clone_flags, unsigned long newsp, - void __user *parent_tid, void __user *child_tid); -long _sys_clone(unsigned long clone_flags, unsigned long newsp, - void __user *parent_tid, void __user *child_tid, - struct pt_regs *regs); -long sys_fork(void); -long _sys_fork(struct pt_regs *regs); -long sys_vfork(void); -long _sys_vfork(struct pt_regs *regs); -long sys_execve(const char __user *filename, - const char __user *const __user *argv, - const char __user *const __user *envp); -long _sys_execve(const char __user *filename, - const char __user *const __user *argv, - const char __user *const __user *envp, struct pt_regs *regs); - -/* kernel/signal.c */ -long sys_sigaltstack(const stack_t __user *, stack_t __user *); -long _sys_sigaltstack(const stack_t __user *, stack_t __user *, - struct pt_regs *); -long sys_rt_sigreturn(void); -long _sys_rt_sigreturn(struct pt_regs *regs); - -/* platform-independent functions */ -long sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize); -long sys_rt_sigaction(int sig, const struct sigaction __user *act, - struct sigaction __user *oact, size_t sigsetsize); - #ifndef __tilegx__ /* mm/fault.c */ -int sys_cmpxchg_badaddr(unsigned long address); -int _sys_cmpxchg_badaddr(unsigned long address, struct pt_regs *); +long sys_cmpxchg_badaddr(unsigned long address, struct pt_regs *); +long _sys_cmpxchg_badaddr(unsigned long address); #endif #ifdef CONFIG_COMPAT -long compat_sys_execve(const char __user *path, - const compat_uptr_t __user *argv, - const compat_uptr_t __user *envp); -long _compat_sys_execve(const char __user *path, - const compat_uptr_t __user *argv, - const compat_uptr_t __user *envp, - struct pt_regs *regs); -long compat_sys_sigaltstack(const struct compat_sigaltstack __user *uss_ptr, - struct compat_sigaltstack __user *uoss_ptr); -long _compat_sys_sigaltstack(const struct compat_sigaltstack __user *uss_ptr, - struct compat_sigaltstack __user *uoss_ptr, - struct pt_regs *regs); -long compat_sys_rt_sigreturn(void); -long _compat_sys_rt_sigreturn(struct pt_regs *regs); - /* These four are not defined for 64-bit, but serve as "compat" syscalls. */ long sys_fcntl64(unsigned int fd, unsigned int cmd, unsigned long arg); long sys_fstat64(unsigned long fd, struct stat64 __user *statbuf); @@ -110,4 +62,15 @@ long sys_truncate64(const char __user *path, loff_t length); long sys_ftruncate64(unsigned int fd, loff_t length); #endif +/* These are the intvec*.S trampolines. */ +long _sys_sigaltstack(const stack_t __user *, stack_t __user *); +long _sys_rt_sigreturn(void); +long _sys_clone(unsigned long clone_flags, unsigned long newsp, + void __user *parent_tid, void __user *child_tid); +long _sys_execve(const char __user *filename, + const char __user *const __user *argv, + const char __user *const __user *envp); + +#include <asm-generic/syscalls.h> + #endif /* _ASM_TILE_SYSCALLS_H */ diff --git a/arch/tile/include/asm/system.h b/arch/tile/include/asm/system.h index f749be327ce0..5388850deeb2 100644 --- a/arch/tile/include/asm/system.h +++ b/arch/tile/include/asm/system.h @@ -89,6 +89,10 @@ #define get_cycles_low() __insn_mfspr(SPR_CYCLE) /* just get all 64 bits */ #endif +#if !CHIP_HAS_MF_WAITS_FOR_VICTIMS() +int __mb_incoherent(void); /* Helper routine for mb_incoherent(). */ +#endif + /* Fence to guarantee visibility of stores to incoherent memory. */ static inline void mb_incoherent(void) @@ -97,7 +101,6 @@ mb_incoherent(void) #if !CHIP_HAS_MF_WAITS_FOR_VICTIMS() { - int __mb_incoherent(void); #if CHIP_HAS_TILE_WRITE_PENDING() const unsigned long WRITE_TIMEOUT_CYCLES = 400; unsigned long start = get_cycles_low(); @@ -161,7 +164,7 @@ extern struct task_struct *_switch_to(struct task_struct *prev, /* Helper function for _switch_to(). */ extern struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *next, - unsigned long new_system_save_1_0); + unsigned long new_system_save_k_0); /* Address that switched-away from tasks are at. */ extern unsigned long get_switch_to_pc(void); @@ -214,13 +217,6 @@ int hardwall_deactivate(struct task_struct *task); } while (0) #endif -/* Invoke the simulator "syscall" mechanism (see arch/tile/kernel/entry.S). */ -extern int _sim_syscall(int syscall_num, ...); -#define sim_syscall(syscall_num, ...) \ - _sim_syscall(SIM_CONTROL_SYSCALL + \ - ((syscall_num) << _SIM_CONTROL_OPERATOR_BITS), \ - ## __VA_ARGS__) - /* * Kernel threads can check to see if they need to migrate their * stack whenever they return from a context switch; for user diff --git a/arch/tile/include/asm/traps.h b/arch/tile/include/asm/traps.h index 432a9c15c8a2..d06e35f57201 100644 --- a/arch/tile/include/asm/traps.h +++ b/arch/tile/include/asm/traps.h @@ -59,4 +59,8 @@ void do_hardwall_trap(struct pt_regs *, int fault_num); void do_breakpoint(struct pt_regs *, int fault_num); +#ifdef __tilegx__ +void gx_singlestep_handle(struct pt_regs *, int fault_num); +#endif + #endif /* _ASM_TILE_SYSCALLS_H */ diff --git a/arch/tile/include/asm/unistd.h b/arch/tile/include/asm/unistd.h index f2e3ff485333..b35c2db71199 100644 --- a/arch/tile/include/asm/unistd.h +++ b/arch/tile/include/asm/unistd.h @@ -41,6 +41,7 @@ __SYSCALL(__NR_cmpxchg_badaddr, sys_cmpxchg_badaddr) #ifdef CONFIG_COMPAT #define __ARCH_WANT_SYS_LLSEEK #endif +#define __ARCH_WANT_SYS_NEWFSTATAT #endif #endif /* _ASM_TILE_UNISTD_H */ diff --git a/arch/tile/include/hv/drv_xgbe_impl.h b/arch/tile/include/hv/drv_xgbe_impl.h new file mode 100644 index 000000000000..3a73b2b44913 --- /dev/null +++ b/arch/tile/include/hv/drv_xgbe_impl.h @@ -0,0 +1,300 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/** + * @file drivers/xgbe/impl.h + * Implementation details for the NetIO library. + */ + +#ifndef __DRV_XGBE_IMPL_H__ +#define __DRV_XGBE_IMPL_H__ + +#include <hv/netio_errors.h> +#include <hv/netio_intf.h> +#include <hv/drv_xgbe_intf.h> + + +/** How many groups we have (log2). */ +#define LOG2_NUM_GROUPS (12) +/** How many groups we have. */ +#define NUM_GROUPS (1 << LOG2_NUM_GROUPS) + +/** Number of output requests we'll buffer per tile. */ +#define EPP_REQS_PER_TILE (32) + +/** Words used in an eDMA command without checksum acceleration. */ +#define EDMA_WDS_NO_CSUM 8 +/** Words used in an eDMA command with checksum acceleration. */ +#define EDMA_WDS_CSUM 10 +/** Total available words in the eDMA command FIFO. */ +#define EDMA_WDS_TOTAL 128 + + +/* + * FIXME: These definitions are internal and should have underscores! + * NOTE: The actual numeric values here are intentional and allow us to + * optimize the concept "if small ... else if large ... else ...", by + * checking for the low bit being set, and then for non-zero. + * These are used as array indices, so they must have the values (0, 1, 2) + * in some order. + */ +#define SIZE_SMALL (1) /**< Small packet queue. */ +#define SIZE_LARGE (2) /**< Large packet queue. */ +#define SIZE_JUMBO (0) /**< Jumbo packet queue. */ + +/** The number of "SIZE_xxx" values. */ +#define NETIO_NUM_SIZES 3 + + +/* + * Default numbers of packets for IPP drivers. These values are chosen + * such that CIPP1 will not overflow its L2 cache. + */ + +/** The default number of small packets. */ +#define NETIO_DEFAULT_SMALL_PACKETS 2750 +/** The default number of large packets. */ +#define NETIO_DEFAULT_LARGE_PACKETS 2500 +/** The default number of jumbo packets. */ +#define NETIO_DEFAULT_JUMBO_PACKETS 250 + + +/** Log2 of the size of a memory arena. */ +#define NETIO_ARENA_SHIFT 24 /* 16 MB */ +/** Size of a memory arena. */ +#define NETIO_ARENA_SIZE (1 << NETIO_ARENA_SHIFT) + + +/** A queue of packets. + * + * This structure partially defines a queue of packets waiting to be + * processed. The queue as a whole is written to by an interrupt handler and + * read by non-interrupt code; this data structure is what's touched by the + * interrupt handler. The other part of the queue state, the read offset, is + * kept in user space, not in hypervisor space, so it is in a separate data + * structure. + * + * The read offset (__packet_receive_read in the user part of the queue + * structure) points to the next packet to be read. When the read offset is + * equal to the write offset, the queue is empty; therefore the queue must + * contain one more slot than the required maximum queue size. + * + * Here's an example of all 3 state variables and what they mean. All + * pointers move left to right. + * + * @code + * I I V V V V I I I I + * 0 1 2 3 4 5 6 7 8 9 10 + * ^ ^ ^ ^ + * | | | + * | | __last_packet_plus_one + * | __buffer_write + * __packet_receive_read + * @endcode + * + * This queue has 10 slots, and thus can hold 9 packets (_last_packet_plus_one + * = 10). The read pointer is at 2, and the write pointer is at 6; thus, + * there are valid, unread packets in slots 2, 3, 4, and 5. The remaining + * slots are invalid (do not contain a packet). + */ +typedef struct { + /** Byte offset of the next notify packet to be written: zero for the first + * packet on the queue, sizeof (netio_pkt_t) for the second packet on the + * queue, etc. */ + volatile uint32_t __packet_write; + + /** Offset of the packet after the last valid packet (i.e., when any + * pointer is incremented to this value, it wraps back to zero). */ + uint32_t __last_packet_plus_one; +} +__netio_packet_queue_t; + + +/** A queue of buffers. + * + * This structure partially defines a queue of empty buffers which have been + * obtained via requests to the IPP. (The elements of the queue are packet + * handles, which are transformed into a full netio_pkt_t when the buffer is + * retrieved.) The queue as a whole is written to by an interrupt handler and + * read by non-interrupt code; this data structure is what's touched by the + * interrupt handler. The other parts of the queue state, the read offset and + * requested write offset, are kept in user space, not in hypervisor space, so + * they are in a separate data structure. + * + * The read offset (__buffer_read in the user part of the queue structure) + * points to the next buffer to be read. When the read offset is equal to the + * write offset, the queue is empty; therefore the queue must contain one more + * slot than the required maximum queue size. + * + * The requested write offset (__buffer_requested_write in the user part of + * the queue structure) points to the slot which will hold the next buffer we + * request from the IPP, once we get around to sending such a request. When + * the requested write offset is equal to the write offset, no requests for + * new buffers are outstanding; when the requested write offset is one greater + * than the read offset, no more requests may be sent. + * + * Note that, unlike the packet_queue, the buffer_queue places incoming + * buffers at decreasing addresses. This makes the check for "is it time to + * wrap the buffer pointer" cheaper in the assembly code which receives new + * buffers, and means that the value which defines the queue size, + * __last_buffer, is different than in the packet queue. Also, the offset + * used in the packet_queue is already scaled by the size of a packet; here we + * use unscaled slot indices for the offsets. (These differences are + * historical, and in the future it's possible that the packet_queue will look + * more like this queue.) + * + * @code + * Here's an example of all 4 state variables and what they mean. Remember: + * all pointers move right to left. + * + * V V V I I R R V V V + * 0 1 2 3 4 5 6 7 8 9 + * ^ ^ ^ ^ + * | | | | + * | | | __last_buffer + * | | __buffer_write + * | __buffer_requested_write + * __buffer_read + * @endcode + * + * This queue has 10 slots, and thus can hold 9 buffers (_last_buffer = 9). + * The read pointer is at 2, and the write pointer is at 6; thus, there are + * valid, unread buffers in slots 2, 1, 0, 9, 8, and 7. The requested write + * pointer is at 4; thus, requests have been made to the IPP for buffers which + * will be placed in slots 6 and 5 when they arrive. Finally, the remaining + * slots are invalid (do not contain a buffer). + */ +typedef struct +{ + /** Ordinal number of the next buffer to be written: 0 for the first slot in + * the queue, 1 for the second slot in the queue, etc. */ + volatile uint32_t __buffer_write; + + /** Ordinal number of the last buffer (i.e., when any pointer is decremented + * below zero, it is reloaded with this value). */ + uint32_t __last_buffer; +} +__netio_buffer_queue_t; + + +/** + * An object for providing Ethernet packets to a process. + */ +typedef struct __netio_queue_impl_t +{ + /** The queue of packets waiting to be received. */ + __netio_packet_queue_t __packet_receive_queue; + /** The intr bit mask that IDs this device. */ + unsigned int __intr_id; + /** Offset to queues of empty buffers, one per size. */ + uint32_t __buffer_queue[NETIO_NUM_SIZES]; + /** The address of the first EPP tile, or -1 if no EPP. */ + /* ISSUE: Actually this is always "0" or "~0". */ + uint32_t __epp_location; + /** The queue ID that this queue represents. */ + unsigned int __queue_id; + /** Number of acknowledgements received. */ + volatile uint32_t __acks_received; + /** Last completion number received for packet_sendv. */ + volatile uint32_t __last_completion_rcv; + /** Number of packets allowed to be outstanding. */ + uint32_t __max_outstanding; + /** First VA available for packets. */ + void* __va_0; + /** First VA in second range available for packets. */ + void* __va_1; + /** Padding to align the "__packets" field to the size of a netio_pkt_t. */ + uint32_t __padding[3]; + /** The packets themselves. */ + netio_pkt_t __packets[0]; +} +netio_queue_impl_t; + + +/** + * An object for managing the user end of a NetIO queue. + */ +typedef struct __netio_queue_user_impl_t +{ + /** The next incoming packet to be read. */ + uint32_t __packet_receive_read; + /** The next empty buffers to be read, one index per size. */ + uint8_t __buffer_read[NETIO_NUM_SIZES]; + /** Where the empty buffer we next request from the IPP will go, one index + * per size. */ + uint8_t __buffer_requested_write[NETIO_NUM_SIZES]; + /** PCIe interface flag. */ + uint8_t __pcie; + /** Number of packets left to be received before we send a credit update. */ + uint32_t __receive_credit_remaining; + /** Value placed in __receive_credit_remaining when it reaches zero. */ + uint32_t __receive_credit_interval; + /** First fast I/O routine index. */ + uint32_t __fastio_index; + /** Number of acknowledgements expected. */ + uint32_t __acks_outstanding; + /** Last completion number requested. */ + uint32_t __last_completion_req; + /** File descriptor for driver. */ + int __fd; +} +netio_queue_user_impl_t; + + +#define NETIO_GROUP_CHUNK_SIZE 64 /**< Max # groups in one IPP request */ +#define NETIO_BUCKET_CHUNK_SIZE 64 /**< Max # buckets in one IPP request */ + + +/** Internal structure used to convey packet send information to the + * hypervisor. FIXME: Actually, it's not used for that anymore, but + * netio_packet_send() still uses it internally. + */ +typedef struct +{ + uint16_t flags; /**< Packet flags (__NETIO_SEND_FLG_xxx) */ + uint16_t transfer_size; /**< Size of packet */ + uint32_t va; /**< VA of start of packet */ + __netio_pkt_handle_t handle; /**< Packet handle */ + uint32_t csum0; /**< First checksum word */ + uint32_t csum1; /**< Second checksum word */ +} +__netio_send_cmd_t; + + +/** Flags used in two contexts: + * - As the "flags" member in the __netio_send_cmd_t, above; used only + * for netio_pkt_send_{prepare,commit}. + * - As part of the flags passed to the various send packet fast I/O calls. + */ + +/** Need acknowledgement on this packet. Note that some code in the + * normal send_pkt fast I/O handler assumes that this is equal to 1. */ +#define __NETIO_SEND_FLG_ACK 0x1 + +/** Do checksum on this packet. (Only used with the __netio_send_cmd_t; + * normal packet sends use a special fast I/O index to denote checksumming, + * and multi-segment sends test the checksum descriptor.) */ +#define __NETIO_SEND_FLG_CSUM 0x2 + +/** Get a completion on this packet. Only used with multi-segment sends. */ +#define __NETIO_SEND_FLG_COMPLETION 0x4 + +/** Position of the number-of-extra-segments value in the flags word. + Only used with multi-segment sends. */ +#define __NETIO_SEND_FLG_XSEG_SHIFT 3 + +/** Width of the number-of-extra-segments value in the flags word. */ +#define __NETIO_SEND_FLG_XSEG_WIDTH 2 + +#endif /* __DRV_XGBE_IMPL_H__ */ diff --git a/arch/tile/include/hv/drv_xgbe_intf.h b/arch/tile/include/hv/drv_xgbe_intf.h new file mode 100644 index 000000000000..146e47d5334b --- /dev/null +++ b/arch/tile/include/hv/drv_xgbe_intf.h @@ -0,0 +1,615 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/** + * @file drv_xgbe_intf.h + * Interface to the hypervisor XGBE driver. + */ + +#ifndef __DRV_XGBE_INTF_H__ +#define __DRV_XGBE_INTF_H__ + +/** + * An object for forwarding VAs and PAs to the hypervisor. + * @ingroup types + * + * This allows the supervisor to specify a number of areas of memory to + * store packet buffers. + */ +typedef struct +{ + /** The physical address of the memory. */ + HV_PhysAddr pa; + /** Page table entry for the memory. This is only used to derive the + * memory's caching mode; the PA bits are ignored. */ + HV_PTE pte; + /** The virtual address of the memory. */ + HV_VirtAddr va; + /** Size (in bytes) of the memory area. */ + int size; + +} +netio_ipp_address_t; + +/** The various pread/pwrite offsets into the hypervisor-level driver. + * @ingroup types + */ +typedef enum +{ + /** Inform the Linux driver of the address of the NetIO arena memory. + * This offset is actually only used to convey information from netio + * to the Linux driver; it never makes it from there to the hypervisor. + * Write-only; takes a uint32_t specifying the VA address. */ + NETIO_FIXED_ADDR = 0x5000000000000000ULL, + + /** Inform the Linux driver of the size of the NetIO arena memory. + * This offset is actually only used to convey information from netio + * to the Linux driver; it never makes it from there to the hypervisor. + * Write-only; takes a uint32_t specifying the VA size. */ + NETIO_FIXED_SIZE = 0x5100000000000000ULL, + + /** Register current tile with IPP. Write then read: write, takes a + * netio_input_config_t, read returns a pointer to a netio_queue_impl_t. */ + NETIO_IPP_INPUT_REGISTER_OFF = 0x6000000000000000ULL, + + /** Unregister current tile from IPP. Write-only, takes a dummy argument. */ + NETIO_IPP_INPUT_UNREGISTER_OFF = 0x6100000000000000ULL, + + /** Start packets flowing. Write-only, takes a dummy argument. */ + NETIO_IPP_INPUT_INIT_OFF = 0x6200000000000000ULL, + + /** Stop packets flowing. Write-only, takes a dummy argument. */ + NETIO_IPP_INPUT_UNINIT_OFF = 0x6300000000000000ULL, + + /** Configure group (typically we group on VLAN). Write-only: takes an + * array of netio_group_t's, low 24 bits of the offset is the base group + * number times the size of a netio_group_t. */ + NETIO_IPP_INPUT_GROUP_CFG_OFF = 0x6400000000000000ULL, + + /** Configure bucket. Write-only: takes an array of netio_bucket_t's, low + * 24 bits of the offset is the base bucket number times the size of a + * netio_bucket_t. */ + NETIO_IPP_INPUT_BUCKET_CFG_OFF = 0x6500000000000000ULL, + + /** Get/set a parameter. Read or write: read or write data is the parameter + * value, low 32 bits of the offset is a __netio_getset_offset_t. */ + NETIO_IPP_PARAM_OFF = 0x6600000000000000ULL, + + /** Get fast I/O index. Read-only; returns a 4-byte base index value. */ + NETIO_IPP_GET_FASTIO_OFF = 0x6700000000000000ULL, + + /** Configure hijack IP address. Packets with this IPv4 dest address + * go to bucket NETIO_NUM_BUCKETS - 1. Write-only: takes an IP address + * in some standard form. FIXME: Define the form! */ + NETIO_IPP_INPUT_HIJACK_CFG_OFF = 0x6800000000000000ULL, + + /** + * Offsets beyond this point are reserved for the supervisor (although that + * enforcement must be done by the supervisor driver itself). + */ + NETIO_IPP_USER_MAX_OFF = 0x6FFFFFFFFFFFFFFFULL, + + /** Register I/O memory. Write-only, takes a netio_ipp_address_t. */ + NETIO_IPP_IOMEM_REGISTER_OFF = 0x7000000000000000ULL, + + /** Unregister I/O memory. Write-only, takes a netio_ipp_address_t. */ + NETIO_IPP_IOMEM_UNREGISTER_OFF = 0x7100000000000000ULL, + + /* Offsets greater than 0x7FFFFFFF can't be used directly from Linux + * userspace code due to limitations in the pread/pwrite syscalls. */ + + /** Drain LIPP buffers. */ + NETIO_IPP_DRAIN_OFF = 0xFA00000000000000ULL, + + /** Supply a netio_ipp_address_t to be used as shared memory for the + * LEPP command queue. */ + NETIO_EPP_SHM_OFF = 0xFB00000000000000ULL, + + /* 0xFC... is currently unused. */ + + /** Stop IPP/EPP tiles. Write-only, takes a dummy argument. */ + NETIO_IPP_STOP_SHIM_OFF = 0xFD00000000000000ULL, + + /** Start IPP/EPP tiles. Write-only, takes a dummy argument. */ + NETIO_IPP_START_SHIM_OFF = 0xFE00000000000000ULL, + + /** Supply packet arena. Write-only, takes an array of + * netio_ipp_address_t values. */ + NETIO_IPP_ADDRESS_OFF = 0xFF00000000000000ULL, +} netio_hv_offset_t; + +/** Extract the base offset from an offset */ +#define NETIO_BASE_OFFSET(off) ((off) & 0xFF00000000000000ULL) +/** Extract the local offset from an offset */ +#define NETIO_LOCAL_OFFSET(off) ((off) & 0x00FFFFFFFFFFFFFFULL) + + +/** + * Get/set offset. + */ +typedef union +{ + struct + { + uint64_t addr:48; /**< Class-specific address */ + unsigned int class:8; /**< Class (e.g., NETIO_PARAM) */ + unsigned int opcode:8; /**< High 8 bits of NETIO_IPP_PARAM_OFF */ + } + bits; /**< Bitfields */ + uint64_t word; /**< Aggregated value to use as the offset */ +} +__netio_getset_offset_t; + +/** + * Fast I/O index offsets (must be contiguous). + */ +typedef enum +{ + NETIO_FASTIO_ALLOCATE = 0, /**< Get empty packet buffer */ + NETIO_FASTIO_FREE_BUFFER = 1, /**< Give buffer back to IPP */ + NETIO_FASTIO_RETURN_CREDITS = 2, /**< Give credits to IPP */ + NETIO_FASTIO_SEND_PKT_NOCK = 3, /**< Send a packet, no checksum */ + NETIO_FASTIO_SEND_PKT_CK = 4, /**< Send a packet, with checksum */ + NETIO_FASTIO_SEND_PKT_VEC = 5, /**< Send a vector of packets */ + NETIO_FASTIO_SENDV_PKT = 6, /**< Sendv one packet */ + NETIO_FASTIO_NUM_INDEX = 7, /**< Total number of fast I/O indices */ +} netio_fastio_index_t; + +/** 3-word return type for Fast I/O call. */ +typedef struct +{ + int err; /**< Error code. */ + uint32_t val0; /**< Value. Meaning depends upon the specific call. */ + uint32_t val1; /**< Value. Meaning depends upon the specific call. */ +} netio_fastio_rv3_t; + +/** 0-argument fast I/O call */ +int __netio_fastio0(uint32_t fastio_index); +/** 1-argument fast I/O call */ +int __netio_fastio1(uint32_t fastio_index, uint32_t arg0); +/** 3-argument fast I/O call, 2-word return value */ +netio_fastio_rv3_t __netio_fastio3_rv3(uint32_t fastio_index, uint32_t arg0, + uint32_t arg1, uint32_t arg2); +/** 4-argument fast I/O call */ +int __netio_fastio4(uint32_t fastio_index, uint32_t arg0, uint32_t arg1, + uint32_t arg2, uint32_t arg3); +/** 6-argument fast I/O call */ +int __netio_fastio6(uint32_t fastio_index, uint32_t arg0, uint32_t arg1, + uint32_t arg2, uint32_t arg3, uint32_t arg4, uint32_t arg5); +/** 9-argument fast I/O call */ +int __netio_fastio9(uint32_t fastio_index, uint32_t arg0, uint32_t arg1, + uint32_t arg2, uint32_t arg3, uint32_t arg4, uint32_t arg5, + uint32_t arg6, uint32_t arg7, uint32_t arg8); + +/** Allocate an empty packet. + * @param fastio_index Fast I/O index. + * @param size Size of the packet to allocate. + */ +#define __netio_fastio_allocate(fastio_index, size) \ + __netio_fastio1((fastio_index) + NETIO_FASTIO_ALLOCATE, size) + +/** Free a buffer. + * @param fastio_index Fast I/O index. + * @param handle Handle for the packet to free. + */ +#define __netio_fastio_free_buffer(fastio_index, handle) \ + __netio_fastio1((fastio_index) + NETIO_FASTIO_FREE_BUFFER, handle) + +/** Increment our receive credits. + * @param fastio_index Fast I/O index. + * @param credits Number of credits to add. + */ +#define __netio_fastio_return_credits(fastio_index, credits) \ + __netio_fastio1((fastio_index) + NETIO_FASTIO_RETURN_CREDITS, credits) + +/** Send packet, no checksum. + * @param fastio_index Fast I/O index. + * @param ackflag Nonzero if we want an ack. + * @param size Size of the packet. + * @param va Virtual address of start of packet. + * @param handle Packet handle. + */ +#define __netio_fastio_send_pkt_nock(fastio_index, ackflag, size, va, handle) \ + __netio_fastio4((fastio_index) + NETIO_FASTIO_SEND_PKT_NOCK, ackflag, \ + size, va, handle) + +/** Send packet, calculate checksum. + * @param fastio_index Fast I/O index. + * @param ackflag Nonzero if we want an ack. + * @param size Size of the packet. + * @param va Virtual address of start of packet. + * @param handle Packet handle. + * @param csum0 Shim checksum header. + * @param csum1 Checksum seed. + */ +#define __netio_fastio_send_pkt_ck(fastio_index, ackflag, size, va, handle, \ + csum0, csum1) \ + __netio_fastio6((fastio_index) + NETIO_FASTIO_SEND_PKT_CK, ackflag, \ + size, va, handle, csum0, csum1) + + +/** Format for the "csum0" argument to the __netio_fastio_send routines + * and LEPP. Note that this is currently exactly identical to the + * ShimProtocolOffloadHeader. + */ +typedef union +{ + struct + { + unsigned int start_byte:7; /**< The first byte to be checksummed */ + unsigned int count:14; /**< Number of bytes to be checksummed. */ + unsigned int destination_byte:7; /**< The byte to write the checksum to. */ + unsigned int reserved:4; /**< Reserved. */ + } bits; /**< Decomposed method of access. */ + unsigned int word; /**< To send out the IDN. */ +} __netio_checksum_header_t; + + +/** Sendv packet with 1 or 2 segments. + * @param fastio_index Fast I/O index. + * @param flags Ack/csum/notify flags in low 3 bits; number of segments minus + * 1 in next 2 bits; expected checksum in high 16 bits. + * @param confno Confirmation number to request, if notify flag set. + * @param csum0 Checksum descriptor; if zero, no checksum. + * @param va_F Virtual address of first segment. + * @param va_L Virtual address of last segment, if 2 segments. + * @param len_F_L Length of first segment in low 16 bits; length of last + * segment, if 2 segments, in high 16 bits. + */ +#define __netio_fastio_sendv_pkt_1_2(fastio_index, flags, confno, csum0, \ + va_F, va_L, len_F_L) \ + __netio_fastio6((fastio_index) + NETIO_FASTIO_SENDV_PKT, flags, confno, \ + csum0, va_F, va_L, len_F_L) + +/** Send packet on PCIe interface. + * @param fastio_index Fast I/O index. + * @param flags Ack/csum/notify flags in low 3 bits. + * @param confno Confirmation number to request, if notify flag set. + * @param csum0 Checksum descriptor; Hard wired 0, not needed for PCIe. + * @param va_F Virtual address of the packet buffer. + * @param va_L Virtual address of last segment, if 2 segments. Hard wired 0. + * @param len_F_L Length of the packet buffer in low 16 bits. + */ +#define __netio_fastio_send_pcie_pkt(fastio_index, flags, confno, csum0, \ + va_F, va_L, len_F_L) \ + __netio_fastio6((fastio_index) + PCIE_FASTIO_SENDV_PKT, flags, confno, \ + csum0, va_F, va_L, len_F_L) + +/** Sendv packet with 3 or 4 segments. + * @param fastio_index Fast I/O index. + * @param flags Ack/csum/notify flags in low 3 bits; number of segments minus + * 1 in next 2 bits; expected checksum in high 16 bits. + * @param confno Confirmation number to request, if notify flag set. + * @param csum0 Checksum descriptor; if zero, no checksum. + * @param va_F Virtual address of first segment. + * @param va_L Virtual address of last segment (third segment if 3 segments, + * fourth segment if 4 segments). + * @param len_F_L Length of first segment in low 16 bits; length of last + * segment in high 16 bits. + * @param va_M0 Virtual address of "middle 0" segment; this segment is sent + * second when there are three segments, and third if there are four. + * @param va_M1 Virtual address of "middle 1" segment; this segment is sent + * second when there are four segments. + * @param len_M0_M1 Length of middle 0 segment in low 16 bits; length of middle + * 1 segment, if 4 segments, in high 16 bits. + */ +#define __netio_fastio_sendv_pkt_3_4(fastio_index, flags, confno, csum0, va_F, \ + va_L, len_F_L, va_M0, va_M1, len_M0_M1) \ + __netio_fastio9((fastio_index) + NETIO_FASTIO_SENDV_PKT, flags, confno, \ + csum0, va_F, va_L, len_F_L, va_M0, va_M1, len_M0_M1) + +/** Send vector of packets. + * @param fastio_index Fast I/O index. + * @param seqno Number of packets transmitted so far on this interface; + * used to decide which packets should be acknowledged. + * @param nentries Number of entries in vector. + * @param va Virtual address of start of vector entry array. + * @return 3-word netio_fastio_rv3_t structure. The structure's err member + * is an error code, or zero if no error. The val0 member is the + * updated value of seqno; it has been incremented by 1 for each + * packet sent. That increment may be less than nentries if an + * error occured, or if some of the entries in the vector contain + * handles equal to NETIO_PKT_HANDLE_NONE. The val1 member is the + * updated value of nentries; it has been decremented by 1 for each + * vector entry processed. Again, that decrement may be less than + * nentries (leaving the returned value positive) if an error + * occurred. + */ +#define __netio_fastio_send_pkt_vec(fastio_index, seqno, nentries, va) \ + __netio_fastio3_rv3((fastio_index) + NETIO_FASTIO_SEND_PKT_VEC, seqno, \ + nentries, va) + + +/** An egress DMA command for LEPP. */ +typedef struct +{ + /** Is this a TSO transfer? + * + * NOTE: This field is always 0, to distinguish it from + * lepp_tso_cmd_t. It must come first! + */ + uint8_t tso : 1; + + /** Unused padding bits. */ + uint8_t _unused : 3; + + /** Should this packet be sent directly from caches instead of DRAM, + * using hash-for-home to locate the packet data? + */ + uint8_t hash_for_home : 1; + + /** Should we compute a checksum? */ + uint8_t compute_checksum : 1; + + /** Is this the final buffer for this packet? + * + * A single packet can be split over several input buffers (a "gather" + * operation). This flag indicates that this is the last buffer + * in a packet. + */ + uint8_t end_of_packet : 1; + + /** Should LEPP advance 'comp_busy' when this DMA is fully finished? */ + uint8_t send_completion : 1; + + /** High bits of Client Physical Address of the start of the buffer + * to be egressed. + * + * NOTE: Only 6 bits are actually needed here, as CPAs are + * currently 38 bits. So two bits could be scavenged from this. + */ + uint8_t cpa_hi; + + /** The number of bytes to be egressed. */ + uint16_t length; + + /** Low 32 bits of Client Physical Address of the start of the buffer + * to be egressed. + */ + uint32_t cpa_lo; + + /** Checksum information (only used if 'compute_checksum'). */ + __netio_checksum_header_t checksum_data; + +} lepp_cmd_t; + + +/** A chunk of physical memory for a TSO egress. */ +typedef struct +{ + /** The low bits of the CPA. */ + uint32_t cpa_lo; + /** The high bits of the CPA. */ + uint16_t cpa_hi : 15; + /** Should this packet be sent directly from caches instead of DRAM, + * using hash-for-home to locate the packet data? + */ + uint16_t hash_for_home : 1; + /** The length in bytes. */ + uint16_t length; +} lepp_frag_t; + + +/** An LEPP command that handles TSO. */ +typedef struct +{ + /** Is this a TSO transfer? + * + * NOTE: This field is always 1, to distinguish it from + * lepp_cmd_t. It must come first! + */ + uint8_t tso : 1; + + /** Unused padding bits. */ + uint8_t _unused : 7; + + /** Size of the header[] array in bytes. It must be in the range + * [40, 127], which are the smallest header for a TCP packet over + * Ethernet and the maximum possible prepend size supported by + * hardware, respectively. Note that the array storage must be + * padded out to a multiple of four bytes so that the following + * LEPP command is aligned properly. + */ + uint8_t header_size; + + /** Byte offset of the IP header in header[]. */ + uint8_t ip_offset; + + /** Byte offset of the TCP header in header[]. */ + uint8_t tcp_offset; + + /** The number of bytes to use for the payload of each packet, + * except of course the last one, which may not have enough bytes. + * This means that each Ethernet packet except the last will have a + * size of header_size + payload_size. + */ + uint16_t payload_size; + + /** The length of the 'frags' array that follows this struct. */ + uint16_t num_frags; + + /** The actual frags. */ + lepp_frag_t frags[0 /* Variable-sized; num_frags entries. */]; + + /* + * The packet header template logically follows frags[], + * but you can't declare that in C. + * + * uint32_t header[header_size_in_words_rounded_up]; + */ + +} lepp_tso_cmd_t; + + +/** An LEPP completion ring entry. */ +typedef void* lepp_comp_t; + + +/** Maximum number of frags for one TSO command. This is adapted from + * linux's "MAX_SKB_FRAGS", and presumably over-estimates by one, for + * our page size of exactly 65536. We add one for a "body" fragment. + */ +#define LEPP_MAX_FRAGS (65536 / HV_PAGE_SIZE_SMALL + 2 + 1) + +/** Total number of bytes needed for an lepp_tso_cmd_t. */ +#define LEPP_TSO_CMD_SIZE(num_frags, header_size) \ + (sizeof(lepp_tso_cmd_t) + \ + (num_frags) * sizeof(lepp_frag_t) + \ + (((header_size) + 3) & -4)) + +/** The size of the lepp "cmd" queue. */ +#define LEPP_CMD_QUEUE_BYTES \ + (((CHIP_L2_CACHE_SIZE() - 2 * CHIP_L2_LINE_SIZE()) / \ + (sizeof(lepp_cmd_t) + sizeof(lepp_comp_t))) * sizeof(lepp_cmd_t)) + +/** The largest possible command that can go in lepp_queue_t::cmds[]. */ +#define LEPP_MAX_CMD_SIZE LEPP_TSO_CMD_SIZE(LEPP_MAX_FRAGS, 128) + +/** The largest possible value of lepp_queue_t::cmd_{head, tail} (inclusive). + */ +#define LEPP_CMD_LIMIT \ + (LEPP_CMD_QUEUE_BYTES - LEPP_MAX_CMD_SIZE) + +/** The maximum number of completions in an LEPP queue. */ +#define LEPP_COMP_QUEUE_SIZE \ + ((LEPP_CMD_LIMIT + sizeof(lepp_cmd_t) - 1) / sizeof(lepp_cmd_t)) + +/** Increment an index modulo the queue size. */ +#define LEPP_QINC(var) \ + (var = __insn_mnz(var - (LEPP_COMP_QUEUE_SIZE - 1), var + 1)) + +/** A queue used to convey egress commands from the client to LEPP. */ +typedef struct +{ + /** Index of first completion not yet processed by user code. + * If this is equal to comp_busy, there are no such completions. + * + * NOTE: This is only read/written by the user. + */ + unsigned int comp_head; + + /** Index of first completion record not yet completed. + * If this is equal to comp_tail, there are no such completions. + * This index gets advanced (modulo LEPP_QUEUE_SIZE) whenever + * a command with the 'completion' bit set is finished. + * + * NOTE: This is only written by LEPP, only read by the user. + */ + volatile unsigned int comp_busy; + + /** Index of the first empty slot in the completion ring. + * Entries from this up to but not including comp_head (in ring order) + * can be filled in with completion data. + * + * NOTE: This is only read/written by the user. + */ + unsigned int comp_tail; + + /** Byte index of first command enqueued for LEPP but not yet processed. + * + * This is always divisible by sizeof(void*) and always <= LEPP_CMD_LIMIT. + * + * NOTE: LEPP advances this counter as soon as it no longer needs + * the cmds[] storage for this entry, but the transfer is not actually + * complete (i.e. the buffer pointed to by the command is no longer + * needed) until comp_busy advances. + * + * If this is equal to cmd_tail, the ring is empty. + * + * NOTE: This is only written by LEPP, only read by the user. + */ + volatile unsigned int cmd_head; + + /** Byte index of first empty slot in the command ring. This field can + * be incremented up to but not equal to cmd_head (because that would + * mean the ring is empty). + * + * This is always divisible by sizeof(void*) and always <= LEPP_CMD_LIMIT. + * + * NOTE: This is read/written by the user, only read by LEPP. + */ + volatile unsigned int cmd_tail; + + /** A ring of variable-sized egress DMA commands. + * + * NOTE: Only written by the user, only read by LEPP. + */ + char cmds[LEPP_CMD_QUEUE_BYTES] + __attribute__((aligned(CHIP_L2_LINE_SIZE()))); + + /** A ring of user completion data. + * NOTE: Only read/written by the user. + */ + lepp_comp_t comps[LEPP_COMP_QUEUE_SIZE] + __attribute__((aligned(CHIP_L2_LINE_SIZE()))); +} lepp_queue_t; + + +/** An internal helper function for determining the number of entries + * available in a ring buffer, given that there is one sentinel. + */ +static inline unsigned int +_lepp_num_free_slots(unsigned int head, unsigned int tail) +{ + /* + * One entry is reserved for use as a sentinel, to distinguish + * "empty" from "full". So we compute + * (head - tail - 1) % LEPP_QUEUE_SIZE, but without using a slow % operation. + */ + return (head - tail - 1) + ((head <= tail) ? LEPP_COMP_QUEUE_SIZE : 0); +} + + +/** Returns how many new comp entries can be enqueued. */ +static inline unsigned int +lepp_num_free_comp_slots(const lepp_queue_t* q) +{ + return _lepp_num_free_slots(q->comp_head, q->comp_tail); +} + +static inline int +lepp_qsub(int v1, int v2) +{ + int delta = v1 - v2; + return delta + ((delta >> 31) & LEPP_COMP_QUEUE_SIZE); +} + + +/** FIXME: Check this from linux, via a new "pwrite()" call. */ +#define LIPP_VERSION 1 + + +/** We use exactly two bytes of alignment padding. */ +#define LIPP_PACKET_PADDING 2 + +/** The minimum size of a "small" buffer (including the padding). */ +#define LIPP_SMALL_PACKET_SIZE 128 + +/* + * NOTE: The following two values should total to less than around + * 13582, to keep the total size used for "lipp_state_t" below 64K. + */ + +/** The maximum number of "small" buffers. + * This is enough for 53 network cpus with 128 credits. Note that + * if these are exhausted, we will fall back to using large buffers. + */ +#define LIPP_SMALL_BUFFERS 6785 + +/** The maximum number of "large" buffers. + * This is enough for 53 network cpus with 128 credits. + */ +#define LIPP_LARGE_BUFFERS 6785 + +#endif /* __DRV_XGBE_INTF_H__ */ diff --git a/arch/tile/include/hv/hypervisor.h b/arch/tile/include/hv/hypervisor.h index 9bd303a141b2..f672544cd4f9 100644 --- a/arch/tile/include/hv/hypervisor.h +++ b/arch/tile/include/hv/hypervisor.h @@ -1003,37 +1003,37 @@ int hv_console_write(HV_VirtAddr bytes, int len); * when these occur in a client's interrupt critical section, they must * be delivered through the downcall mechanism. * - * A downcall is initially delivered to the client as an INTCTRL_1 - * interrupt. Upon entry to the INTCTRL_1 vector, the client must - * immediately invoke the hv_downcall_dispatch service. This service - * will not return; instead it will cause one of the client's actual - * downcall-handling interrupt vectors to be entered. The EX_CONTEXT - * registers in the client will be set so that when the client irets, - * it will return to the code which was interrupted by the INTCTRL_1 - * interrupt. - * - * Under some circumstances, the firing of INTCTRL_1 can race with + * A downcall is initially delivered to the client as an INTCTRL_CL + * interrupt, where CL is the client's PL. Upon entry to the INTCTRL_CL + * vector, the client must immediately invoke the hv_downcall_dispatch + * service. This service will not return; instead it will cause one of + * the client's actual downcall-handling interrupt vectors to be entered. + * The EX_CONTEXT registers in the client will be set so that when the + * client irets, it will return to the code which was interrupted by the + * INTCTRL_CL interrupt. + * + * Under some circumstances, the firing of INTCTRL_CL can race with * the lowering of a device interrupt. In such a case, the * hv_downcall_dispatch service may issue an iret instruction instead * of entering one of the client's actual downcall-handling interrupt * vectors. This will return execution to the location that was - * interrupted by INTCTRL_1. + * interrupted by INTCTRL_CL. * * Any saving of registers should be done by the actual handling - * vectors; no registers should be changed by the INTCTRL_1 handler. + * vectors; no registers should be changed by the INTCTRL_CL handler. * In particular, the client should not use a jal instruction to invoke * the hv_downcall_dispatch service, as that would overwrite the client's * lr register. Note that the hv_downcall_dispatch service may overwrite * one or more of the client's system save registers. * - * The client must not modify the INTCTRL_1_STATUS SPR. The hypervisor + * The client must not modify the INTCTRL_CL_STATUS SPR. The hypervisor * will set this register to cause a downcall to happen, and will clear * it when no further downcalls are pending. * - * When a downcall vector is entered, the INTCTRL_1 interrupt will be + * When a downcall vector is entered, the INTCTRL_CL interrupt will be * masked. When the client is done processing a downcall, and is ready * to accept another, it must unmask this interrupt; if more downcalls - * are pending, this will cause the INTCTRL_1 vector to be reentered. + * are pending, this will cause the INTCTRL_CL vector to be reentered. * Currently the following interrupt vectors can be entered through a * downcall: * diff --git a/arch/tile/include/hv/netio_errors.h b/arch/tile/include/hv/netio_errors.h new file mode 100644 index 000000000000..e1591bff61b5 --- /dev/null +++ b/arch/tile/include/hv/netio_errors.h @@ -0,0 +1,122 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/** + * Error codes returned from NetIO routines. + */ + +#ifndef __NETIO_ERRORS_H__ +#define __NETIO_ERRORS_H__ + +/** + * @addtogroup error + * + * @brief The error codes returned by NetIO functions. + * + * NetIO functions return 0 (defined as ::NETIO_NO_ERROR) on success, and + * a negative value if an error occurs. + * + * In cases where a NetIO function failed due to a error reported by + * system libraries, the error code will be the negation of the + * system errno at the time of failure. The @ref netio_strerror() + * function will deliver error strings for both NetIO and system error + * codes. + * + * @{ + */ + +/** The set of all NetIO errors. */ +typedef enum +{ + /** Operation successfully completed. */ + NETIO_NO_ERROR = 0, + + /** A packet was successfully retrieved from an input queue. */ + NETIO_PKT = 0, + + /** Largest NetIO error number. */ + NETIO_ERR_MAX = -701, + + /** The tile is not registered with the IPP. */ + NETIO_NOT_REGISTERED = -701, + + /** No packet was available to retrieve from the input queue. */ + NETIO_NOPKT = -702, + + /** The requested function is not implemented. */ + NETIO_NOT_IMPLEMENTED = -703, + + /** On a registration operation, the target queue already has the maximum + * number of tiles registered for it, and no more may be added. On a + * packet send operation, the output queue is full and nothing more can + * be queued until some of the queued packets are actually transmitted. */ + NETIO_QUEUE_FULL = -704, + + /** The calling process or thread is not bound to exactly one CPU. */ + NETIO_BAD_AFFINITY = -705, + + /** Cannot allocate memory on requested controllers. */ + NETIO_CANNOT_HOME = -706, + + /** On a registration operation, the IPP specified is not configured + * to support the options requested; for instance, the application + * wants a specific type of tagged headers which the configured IPP + * doesn't support. Or, the supplied configuration information is + * not self-consistent, or is out of range; for instance, specifying + * both NETIO_RECV and NETIO_NO_RECV, or asking for more than + * NETIO_MAX_SEND_BUFFERS to be preallocated. On a VLAN or bucket + * configure operation, the number of items, or the base item, was + * out of range. + */ + NETIO_BAD_CONFIG = -707, + + /** Too many tiles have registered to transmit packets. */ + NETIO_TOOMANY_XMIT = -708, + + /** Packet transmission was attempted on a queue which was registered + with transmit disabled. */ + NETIO_UNREG_XMIT = -709, + + /** This tile is already registered with the IPP. */ + NETIO_ALREADY_REGISTERED = -710, + + /** The Ethernet link is down. The application should try again later. */ + NETIO_LINK_DOWN = -711, + + /** An invalid memory buffer has been specified. This may be an unmapped + * virtual address, or one which does not meet alignment requirements. + * For netio_input_register(), this error may be returned when multiple + * processes specify different memory regions to be used for NetIO + * buffers. That can happen if these processes specify explicit memory + * regions with the ::NETIO_FIXED_BUFFER_VA flag, or if tmc_cmem_init() + * has not been called by a common ancestor of the processes. + */ + NETIO_FAULT = -712, + + /** Cannot combine user-managed shared memory and cache coherence. */ + NETIO_BAD_CACHE_CONFIG = -713, + + /** Smallest NetIO error number. */ + NETIO_ERR_MIN = -713, + +#ifndef __DOXYGEN__ + /** Used internally to mean that no response is needed; never returned to + * an application. */ + NETIO_NO_RESPONSE = 1 +#endif +} netio_error_t; + +/** @} */ + +#endif /* __NETIO_ERRORS_H__ */ diff --git a/arch/tile/include/hv/netio_intf.h b/arch/tile/include/hv/netio_intf.h new file mode 100644 index 000000000000..8d20972aba2c --- /dev/null +++ b/arch/tile/include/hv/netio_intf.h @@ -0,0 +1,2975 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +/** + * NetIO interface structures and macros. + */ + +#ifndef __NETIO_INTF_H__ +#define __NETIO_INTF_H__ + +#include <hv/netio_errors.h> + +#ifdef __KERNEL__ +#include <linux/types.h> +#else +#include <stdint.h> +#endif + +#if !defined(__HV__) && !defined(__BOGUX__) && !defined(__KERNEL__) +#include <assert.h> +#define netio_assert assert /**< Enable assertions from macros */ +#else +#define netio_assert(...) ((void)(0)) /**< Disable assertions from macros */ +#endif + +/* + * If none of these symbols are defined, we're building libnetio in an + * environment where we have pthreads, so we'll enable locking. + */ +#if !defined(__HV__) && !defined(__BOGUX__) && !defined(__KERNEL__) && \ + !defined(__NEWLIB__) +#define _NETIO_PTHREAD /**< Include a mutex in netio_queue_t below */ + +/* + * If NETIO_UNLOCKED is defined, we don't do use per-cpu locks on + * per-packet NetIO operations. We still do pthread locking on things + * like netio_input_register, though. This is used for building + * libnetio_unlocked. + */ +#ifndef NETIO_UNLOCKED + +/* Avoid PLT overhead by using our own inlined per-cpu lock. */ +#include <sched.h> +typedef int _netio_percpu_mutex_t; + +static __inline int +_netio_percpu_mutex_init(_netio_percpu_mutex_t* lock) +{ + *lock = 0; + return 0; +} + +static __inline int +_netio_percpu_mutex_lock(_netio_percpu_mutex_t* lock) +{ + while (__builtin_expect(__insn_tns(lock), 0)) + sched_yield(); + return 0; +} + +static __inline int +_netio_percpu_mutex_unlock(_netio_percpu_mutex_t* lock) +{ + *lock = 0; + return 0; +} + +#else /* NETIO_UNLOCKED */ + +/* Don't do any locking for per-packet NetIO operations. */ +typedef int _netio_percpu_mutex_t; +#define _netio_percpu_mutex_init(L) +#define _netio_percpu_mutex_lock(L) +#define _netio_percpu_mutex_unlock(L) + +#endif /* NETIO_UNLOCKED */ +#endif /* !__HV__, !__BOGUX, !__KERNEL__, !__NEWLIB__ */ + +/** How many tiles can register for a given queue. + * @ingroup setup */ +#define NETIO_MAX_TILES_PER_QUEUE 64 + + +/** Largest permissible queue identifier. + * @ingroup setup */ +#define NETIO_MAX_QUEUE_ID 255 + + +#ifndef __DOXYGEN__ + +/* Metadata packet checksum/ethertype flags. */ + +/** The L4 checksum has not been calculated. */ +#define _NETIO_PKT_NO_L4_CSUM_SHIFT 0 +#define _NETIO_PKT_NO_L4_CSUM_RMASK 1 +#define _NETIO_PKT_NO_L4_CSUM_MASK \ + (_NETIO_PKT_NO_L4_CSUM_RMASK << _NETIO_PKT_NO_L4_CSUM_SHIFT) + +/** The L3 checksum has not been calculated. */ +#define _NETIO_PKT_NO_L3_CSUM_SHIFT 1 +#define _NETIO_PKT_NO_L3_CSUM_RMASK 1 +#define _NETIO_PKT_NO_L3_CSUM_MASK \ + (_NETIO_PKT_NO_L3_CSUM_RMASK << _NETIO_PKT_NO_L3_CSUM_SHIFT) + +/** The L3 checksum is incorrect (or perhaps has not been calculated). */ +#define _NETIO_PKT_BAD_L3_CSUM_SHIFT 2 +#define _NETIO_PKT_BAD_L3_CSUM_RMASK 1 +#define _NETIO_PKT_BAD_L3_CSUM_MASK \ + (_NETIO_PKT_BAD_L3_CSUM_RMASK << _NETIO_PKT_BAD_L3_CSUM_SHIFT) + +/** The Ethernet packet type is unrecognized. */ +#define _NETIO_PKT_TYPE_UNRECOGNIZED_SHIFT 3 +#define _NETIO_PKT_TYPE_UNRECOGNIZED_RMASK 1 +#define _NETIO_PKT_TYPE_UNRECOGNIZED_MASK \ + (_NETIO_PKT_TYPE_UNRECOGNIZED_RMASK << \ + _NETIO_PKT_TYPE_UNRECOGNIZED_SHIFT) + +/* Metadata packet type flags. */ + +/** Where the packet type bits are; this field is the index into + * _netio_pkt_info. */ +#define _NETIO_PKT_TYPE_SHIFT 4 +#define _NETIO_PKT_TYPE_RMASK 0x3F + +/** How many VLAN tags the packet has, and, if we have two, which one we + * actually grouped on. A VLAN within a proprietary (Marvell or Broadcom) + * tag is counted here. */ +#define _NETIO_PKT_VLAN_SHIFT 4 +#define _NETIO_PKT_VLAN_RMASK 0x3 +#define _NETIO_PKT_VLAN_MASK \ + (_NETIO_PKT_VLAN_RMASK << _NETIO_PKT_VLAN_SHIFT) +#define _NETIO_PKT_VLAN_NONE 0 /* No VLAN tag. */ +#define _NETIO_PKT_VLAN_ONE 1 /* One VLAN tag. */ +#define _NETIO_PKT_VLAN_TWO_OUTER 2 /* Two VLAN tags, outer one used. */ +#define _NETIO_PKT_VLAN_TWO_INNER 3 /* Two VLAN tags, inner one used. */ + +/** Which proprietary tags the packet has. */ +#define _NETIO_PKT_TAG_SHIFT 6 +#define _NETIO_PKT_TAG_RMASK 0x3 +#define _NETIO_PKT_TAG_MASK \ + (_NETIO_PKT_TAG_RMASK << _NETIO_PKT_TAG_SHIFT) +#define _NETIO_PKT_TAG_NONE 0 /* No proprietary tags. */ +#define _NETIO_PKT_TAG_MRVL 1 /* Marvell HyperG.Stack tags. */ +#define _NETIO_PKT_TAG_MRVL_EXT 2 /* HyperG.Stack extended tags. */ +#define _NETIO_PKT_TAG_BRCM 3 /* Broadcom HiGig tags. */ + +/** Whether a packet has an LLC + SNAP header. */ +#define _NETIO_PKT_SNAP_SHIFT 8 +#define _NETIO_PKT_SNAP_RMASK 0x1 +#define _NETIO_PKT_SNAP_MASK \ + (_NETIO_PKT_SNAP_RMASK << _NETIO_PKT_SNAP_SHIFT) + +/* NOTE: Bits 9 and 10 are unused. */ + +/** Length of any custom data before the L2 header, in words. */ +#define _NETIO_PKT_CUSTOM_LEN_SHIFT 11 +#define _NETIO_PKT_CUSTOM_LEN_RMASK 0x1F +#define _NETIO_PKT_CUSTOM_LEN_MASK \ + (_NETIO_PKT_CUSTOM_LEN_RMASK << _NETIO_PKT_CUSTOM_LEN_SHIFT) + +/** The L4 checksum is incorrect (or perhaps has not been calculated). */ +#define _NETIO_PKT_BAD_L4_CSUM_SHIFT 16 +#define _NETIO_PKT_BAD_L4_CSUM_RMASK 0x1 +#define _NETIO_PKT_BAD_L4_CSUM_MASK \ + (_NETIO_PKT_BAD_L4_CSUM_RMASK << _NETIO_PKT_BAD_L4_CSUM_SHIFT) + +/** Length of the L2 header, in words. */ +#define _NETIO_PKT_L2_LEN_SHIFT 17 +#define _NETIO_PKT_L2_LEN_RMASK 0x1F +#define _NETIO_PKT_L2_LEN_MASK \ + (_NETIO_PKT_L2_LEN_RMASK << _NETIO_PKT_L2_LEN_SHIFT) + + +/* Flags in minimal packet metadata. */ + +/** We need an eDMA checksum on this packet. */ +#define _NETIO_PKT_NEED_EDMA_CSUM_SHIFT 0 +#define _NETIO_PKT_NEED_EDMA_CSUM_RMASK 1 +#define _NETIO_PKT_NEED_EDMA_CSUM_MASK \ + (_NETIO_PKT_NEED_EDMA_CSUM_RMASK << _NETIO_PKT_NEED_EDMA_CSUM_SHIFT) + +/* Data within the packet information table. */ + +/* Note that, for efficiency, code which uses these fields assumes that none + * of the shift values below are zero. See uses below for an explanation. */ + +/** Offset within the L2 header of the innermost ethertype (in halfwords). */ +#define _NETIO_PKT_INFO_ETYPE_SHIFT 6 +#define _NETIO_PKT_INFO_ETYPE_RMASK 0x1F + +/** Offset within the L2 header of the VLAN tag (in halfwords). */ +#define _NETIO_PKT_INFO_VLAN_SHIFT 11 +#define _NETIO_PKT_INFO_VLAN_RMASK 0x1F + +#endif + + +/** The size of a memory buffer representing a small packet. + * @ingroup egress */ +#define SMALL_PACKET_SIZE 256 + +/** The size of a memory buffer representing a large packet. + * @ingroup egress */ +#define LARGE_PACKET_SIZE 2048 + +/** The size of a memory buffer representing a jumbo packet. + * @ingroup egress */ +#define JUMBO_PACKET_SIZE (12 * 1024) + + +/* Common ethertypes. + * @ingroup ingress */ +/** @{ */ +/** The ethertype of IPv4. */ +#define ETHERTYPE_IPv4 (0x0800) +/** The ethertype of ARP. */ +#define ETHERTYPE_ARP (0x0806) +/** The ethertype of VLANs. */ +#define ETHERTYPE_VLAN (0x8100) +/** The ethertype of a Q-in-Q header. */ +#define ETHERTYPE_Q_IN_Q (0x9100) +/** The ethertype of IPv6. */ +#define ETHERTYPE_IPv6 (0x86DD) +/** The ethertype of MPLS. */ +#define ETHERTYPE_MPLS (0x8847) +/** @} */ + + +/** The possible return values of NETIO_PKT_STATUS. + * @ingroup ingress + */ +typedef enum +{ + /** No problems were detected with this packet. */ + NETIO_PKT_STATUS_OK, + /** The packet is undersized; this is expected behavior if the packet's + * ethertype is unrecognized, but otherwise the packet is likely corrupt. */ + NETIO_PKT_STATUS_UNDERSIZE, + /** The packet is oversized and some trailing bytes have been discarded. + This is expected behavior for short packets, since it's impossible to + precisely determine the amount of padding which may have been added to + them to make them meet the minimum Ethernet packet size. */ + NETIO_PKT_STATUS_OVERSIZE, + /** The packet was judged to be corrupt by hardware (for instance, it had + a bad CRC, or part of it was discarded due to lack of buffer space in + the I/O shim) and should be discarded. */ + NETIO_PKT_STATUS_BAD +} netio_pkt_status_t; + + +/** Log2 of how many buckets we have. */ +#define NETIO_LOG2_NUM_BUCKETS (10) + +/** How many buckets we have. + * @ingroup ingress */ +#define NETIO_NUM_BUCKETS (1 << NETIO_LOG2_NUM_BUCKETS) + + +/** + * @brief A group-to-bucket identifier. + * + * @ingroup setup + * + * This tells us what to do with a given group. + */ +typedef union { + /** The header broken down into bits. */ + struct { + /** Whether we should balance on L4, if available */ + unsigned int __balance_on_l4:1; + /** Whether we should balance on L3, if available */ + unsigned int __balance_on_l3:1; + /** Whether we should balance on L2, if available */ + unsigned int __balance_on_l2:1; + /** Reserved for future use */ + unsigned int __reserved:1; + /** The base bucket to use to send traffic */ + unsigned int __bucket_base:NETIO_LOG2_NUM_BUCKETS; + /** The mask to apply to the balancing value. This must be one less + * than a power of two, e.g. 0x3 or 0xFF. + */ + unsigned int __bucket_mask:NETIO_LOG2_NUM_BUCKETS; + /** Pad to 32 bits */ + unsigned int __padding:(32 - 4 - 2 * NETIO_LOG2_NUM_BUCKETS); + } bits; + /** To send out the IDN. */ + unsigned int word; +} +netio_group_t; + + +/** + * @brief A VLAN-to-bucket identifier. + * + * @ingroup setup + * + * This tells us what to do with a given VLAN. + */ +typedef netio_group_t netio_vlan_t; + + +/** + * A bucket-to-queue mapping. + * @ingroup setup + */ +typedef unsigned char netio_bucket_t; + + +/** + * A packet size can always fit in a netio_size_t. + * @ingroup setup + */ +typedef unsigned int netio_size_t; + + +/** + * @brief Ethernet standard (ingress) packet metadata. + * + * @ingroup ingress + * + * This is additional data associated with each packet. + * This structure is opaque and accessed through the @ref ingress. + * + * Also, the buffer population operation currently assumes that standard + * metadata is at least as large as minimal metadata, and will need to be + * modified if that is no longer the case. + */ +typedef struct +{ +#ifdef __DOXYGEN__ + /** This structure is opaque. */ + unsigned char opaque[24]; +#else + /** The overall ordinal of the packet */ + unsigned int __packet_ordinal; + /** The ordinal of the packet within the group */ + unsigned int __group_ordinal; + /** The best flow hash IPP could compute. */ + unsigned int __flow_hash; + /** Flags pertaining to checksum calculation, packet type, etc. */ + unsigned int __flags; + /** The first word of "user data". */ + unsigned int __user_data_0; + /** The second word of "user data". */ + unsigned int __user_data_1; +#endif +} +netio_pkt_metadata_t; + + +/** To ensure that the L3 header is aligned mod 4, the L2 header should be + * aligned mod 4 plus 2, since every supported L2 header is 4n + 2 bytes + * long. The standard way to do this is to simply add 2 bytes of padding + * before the L2 header. + */ +#define NETIO_PACKET_PADDING 2 + + + +/** + * @brief Ethernet minimal (egress) packet metadata. + * + * @ingroup egress + * + * This structure represents information about packets which have + * been processed by @ref netio_populate_buffer() or + * @ref netio_populate_prepend_buffer(). This structure is opaque + * and accessed through the @ref egress. + * + * @internal This structure is actually copied into the memory used by + * standard metadata, which is assumed to be large enough. + */ +typedef struct +{ +#ifdef __DOXYGEN__ + /** This structure is opaque. */ + unsigned char opaque[14]; +#else + /** The offset of the L2 header from the start of the packet data. */ + unsigned short l2_offset; + /** The offset of the L3 header from the start of the packet data. */ + unsigned short l3_offset; + /** Where to write the checksum. */ + unsigned char csum_location; + /** Where to start checksumming from. */ + unsigned char csum_start; + /** Flags pertaining to checksum calculation etc. */ + unsigned short flags; + /** The L2 length of the packet. */ + unsigned short l2_length; + /** The checksum with which to seed the checksum generator. */ + unsigned short csum_seed; + /** How much to checksum. */ + unsigned short csum_length; +#endif +} +netio_pkt_minimal_metadata_t; + + +#ifndef __DOXYGEN__ + +/** + * @brief An I/O notification header. + * + * This is the first word of data received from an I/O shim in a notification + * packet. It contains framing and status information. + */ +typedef union +{ + unsigned int word; /**< The whole word. */ + /** The various fields. */ + struct + { + unsigned int __channel:7; /**< Resource channel. */ + unsigned int __type:4; /**< Type. */ + unsigned int __ack:1; /**< Whether an acknowledgement is needed. */ + unsigned int __reserved:1; /**< Reserved. */ + unsigned int __protocol:1; /**< A protocol-specific word is added. */ + unsigned int __status:2; /**< Status of the transfer. */ + unsigned int __framing:2; /**< Framing of the transfer. */ + unsigned int __transfer_size:14; /**< Transfer size in bytes (total). */ + } bits; +} +__netio_pkt_notif_t; + + +/** + * Returns the base address of the packet. + */ +#define _NETIO_PKT_HANDLE_BASE(p) \ + ((unsigned char*)((p).word & 0xFFFFFFC0)) + +/** + * Returns the base address of the packet. + */ +#define _NETIO_PKT_BASE(p) \ + _NETIO_PKT_HANDLE_BASE(p->__packet) + +/** + * @brief An I/O notification packet (second word) + * + * This is the second word of data received from an I/O shim in a notification + * packet. This is the virtual address of the packet buffer, plus some flag + * bits. (The virtual address of the packet is always 256-byte aligned so we + * have room for 8 bits' worth of flags in the low 8 bits.) + * + * @internal + * NOTE: The low two bits must contain "__queue", so the "packet size" + * (SIZE_SMALL, SIZE_LARGE, or SIZE_JUMBO) can be determined quickly. + * + * If __addr or __offset are moved, _NETIO_PKT_BASE + * (defined right below this) must be changed. + */ +typedef union +{ + unsigned int word; /**< The whole word. */ + /** The various fields. */ + struct + { + /** Which queue the packet will be returned to once it is sent back to + the IPP. This is one of the SIZE_xxx values. */ + unsigned int __queue:2; + + /** The IPP handle of the sending IPP. */ + unsigned int __ipp_handle:2; + + /** Reserved for future use. */ + unsigned int __reserved:1; + + /** If 1, this packet has minimal (egress) metadata; otherwise, it + has standard (ingress) metadata. */ + unsigned int __minimal:1; + + /** Offset of the metadata within the packet. This value is multiplied + * by 64 and added to the base packet address to get the metadata + * address. Note that this field is aligned within the word such that + * you can easily extract the metadata address with a 26-bit mask. */ + unsigned int __offset:2; + + /** The top 24 bits of the packet's virtual address. */ + unsigned int __addr:24; + } bits; +} +__netio_pkt_handle_t; + +#endif /* !__DOXYGEN__ */ + + +/** + * @brief A handle for an I/O packet's storage. + * @ingroup ingress + * + * netio_pkt_handle_t encodes the concept of a ::netio_pkt_t with its + * packet metadata removed. It is a much smaller type that exists to + * facilitate applications where the full ::netio_pkt_t type is too + * large, such as those that cache enormous numbers of packets or wish + * to transmit packet descriptors over the UDN. + * + * Because there is no metadata, most ::netio_pkt_t operations cannot be + * performed on a netio_pkt_handle_t. It supports only + * netio_free_handle() (to free the buffer) and + * NETIO_PKT_CUSTOM_DATA_H() (to access a pointer to its contents). + * The application must acquire any additional metadata it wants from the + * original ::netio_pkt_t and record it separately. + * + * A netio_pkt_handle_t can be extracted from a ::netio_pkt_t by calling + * NETIO_PKT_HANDLE(). An invalid handle (analogous to NULL) can be + * created by assigning the value ::NETIO_PKT_HANDLE_NONE. A handle can + * be tested for validity with NETIO_PKT_HANDLE_IS_VALID(). + */ +typedef struct +{ + unsigned int word; /**< Opaque bits. */ +} netio_pkt_handle_t; + +/** + * @brief A packet descriptor. + * + * @ingroup ingress + * @ingroup egress + * + * This data structure represents a packet. The structure is manipulated + * through the @ref ingress and the @ref egress. + * + * While the contents of a netio_pkt_t are opaque, the structure itself is + * portable. This means that it may be shared between all tiles which have + * done a netio_input_register() call for the interface on which the pkt_t + * was initially received (via netio_get_packet()) or retrieved (via + * netio_get_buffer()). The contents of a netio_pkt_t can be transmitted to + * another tile via shared memory, or via a UDN message, or by other means. + * The destination tile may then use the pkt_t as if it had originally been + * received locally; it may read or write the packet's data, read its + * metadata, free the packet, send the packet, transfer the netio_pkt_t to + * yet another tile, and so forth. + * + * Once a netio_pkt_t has been transferred to a second tile, the first tile + * should not reference the original copy; in particular, if more than one + * tile frees or sends the same netio_pkt_t, the IPP's packet free lists will + * become corrupted. Note also that each tile which reads or modifies + * packet data must obey the memory coherency rules outlined in @ref input. + */ +typedef struct +{ +#ifdef __DOXYGEN__ + /** This structure is opaque. */ + unsigned char opaque[32]; +#else + /** For an ingress packet (one with standard metadata), this is the + * notification header we got from the I/O shim. For an egress packet + * (one with minimal metadata), this word is zero if the packet has not + * been populated, and nonzero if it has. */ + __netio_pkt_notif_t __notif_header; + + /** Virtual address of the packet buffer, plus state flags. */ + __netio_pkt_handle_t __packet; + + /** Metadata associated with the packet. */ + netio_pkt_metadata_t __metadata; +#endif +} +netio_pkt_t; + + +#ifndef __DOXYGEN__ + +#define __NETIO_PKT_NOTIF_HEADER(pkt) ((pkt)->__notif_header) +#define __NETIO_PKT_IPP_HANDLE(pkt) ((pkt)->__packet.bits.__ipp_handle) +#define __NETIO_PKT_QUEUE(pkt) ((pkt)->__packet.bits.__queue) +#define __NETIO_PKT_NOTIF_HEADER_M(mda, pkt) ((pkt)->__notif_header) +#define __NETIO_PKT_IPP_HANDLE_M(mda, pkt) ((pkt)->__packet.bits.__ipp_handle) +#define __NETIO_PKT_MINIMAL(pkt) ((pkt)->__packet.bits.__minimal) +#define __NETIO_PKT_QUEUE_M(mda, pkt) ((pkt)->__packet.bits.__queue) +#define __NETIO_PKT_FLAGS_M(mda, pkt) ((mda)->__flags) + +/* Packet information table, used by the attribute access functions below. */ +extern const uint16_t _netio_pkt_info[]; + +#endif /* __DOXYGEN__ */ + + +#ifndef __DOXYGEN__ +/* These macros are deprecated and will disappear in a future MDE release. */ +#define NETIO_PKT_GOOD_CHECKSUM(pkt) \ + NETIO_PKT_L4_CSUM_CORRECT(pkt) +#define NETIO_PKT_GOOD_CHECKSUM_M(mda, pkt) \ + NETIO_PKT_L4_CSUM_CORRECT_M(mda, pkt) +#endif /* __DOXYGEN__ */ + + +/* Packet attribute access functions. */ + +/** Return a pointer to the metadata for a packet. + * @ingroup ingress + * + * Calling this function once and passing the result to other retrieval + * functions with a "_M" suffix usually improves performance. This + * function must be called on an 'ingress' packet (i.e. one retrieved + * by @ref netio_get_packet(), on which @ref netio_populate_buffer() or + * @ref netio_populate_prepend_buffer have not been called). Use of this + * function on an 'egress' packet will cause an assertion failure. + * + * @param[in] pkt Packet on which to operate. + * @return A pointer to the packet's standard metadata. + */ +static __inline netio_pkt_metadata_t* +NETIO_PKT_METADATA(netio_pkt_t* pkt) +{ + netio_assert(!pkt->__packet.bits.__minimal); + return &pkt->__metadata; +} + + +/** Return a pointer to the minimal metadata for a packet. + * @ingroup egress + * + * Calling this function once and passing the result to other retrieval + * functions with a "_MM" suffix usually improves performance. This + * function must be called on an 'egress' packet (i.e. one on which + * @ref netio_populate_buffer() or @ref netio_populate_prepend_buffer() + * have been called, or one retrieved by @ref netio_get_buffer()). Use of + * this function on an 'ingress' packet will cause an assertion failure. + * + * @param[in] pkt Packet on which to operate. + * @return A pointer to the packet's standard metadata. + */ +static __inline netio_pkt_minimal_metadata_t* +NETIO_PKT_MINIMAL_METADATA(netio_pkt_t* pkt) +{ + netio_assert(pkt->__packet.bits.__minimal); + return (netio_pkt_minimal_metadata_t*) &pkt->__metadata; +} + + +/** Determine whether a packet has 'minimal' metadata. + * @ingroup pktfuncs + * + * This function will return nonzero if the packet is an 'egress' + * packet (i.e. one on which @ref netio_populate_buffer() or + * @ref netio_populate_prepend_buffer() have been called, or one + * retrieved by @ref netio_get_buffer()), and zero if the packet + * is an 'ingress' packet (i.e. one retrieved by @ref netio_get_packet(), + * which has not been converted into an 'egress' packet). + * + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the packet has minimal metadata. + */ +static __inline unsigned int +NETIO_PKT_IS_MINIMAL(netio_pkt_t* pkt) +{ + return pkt->__packet.bits.__minimal; +} + + +/** Return a handle for a packet's storage. + * @ingroup pktfuncs + * + * @param[in] pkt Packet on which to operate. + * @return A handle for the packet's storage. + */ +static __inline netio_pkt_handle_t +NETIO_PKT_HANDLE(netio_pkt_t* pkt) +{ + netio_pkt_handle_t h; + h.word = pkt->__packet.word; + return h; +} + + +/** A special reserved value indicating the absence of a packet handle. + * + * @ingroup pktfuncs + */ +#define NETIO_PKT_HANDLE_NONE ((netio_pkt_handle_t) { 0 }) + + +/** Test whether a packet handle is valid. + * + * Applications may wish to use the reserved value NETIO_PKT_HANDLE_NONE + * to indicate no packet at all. This function tests to see if a packet + * handle is a real handle, not this special reserved value. + * + * @ingroup pktfuncs + * + * @param[in] handle Handle on which to operate. + * @return One if the packet handle is valid, else zero. + */ +static __inline unsigned int +NETIO_PKT_HANDLE_IS_VALID(netio_pkt_handle_t handle) +{ + return handle.word != 0; +} + + + +/** Return a pointer to the start of the packet's custom header. + * A custom header may or may not be present, depending upon the IPP; its + * contents and alignment are also IPP-dependent. Currently, none of the + * standard IPPs supplied by Tilera produce a custom header. If present, + * the custom header precedes the L2 header in the packet buffer. + * @ingroup ingress + * + * @param[in] handle Handle on which to operate. + * @return A pointer to start of the packet. + */ +static __inline unsigned char* +NETIO_PKT_CUSTOM_DATA_H(netio_pkt_handle_t handle) +{ + return _NETIO_PKT_HANDLE_BASE(handle) + NETIO_PACKET_PADDING; +} + + +/** Return the length of the packet's custom header. + * A custom header may or may not be present, depending upon the IPP; its + * contents and alignment are also IPP-dependent. Currently, none of the + * standard IPPs supplied by Tilera produce a custom header. If present, + * the custom header precedes the L2 header in the packet buffer. + * + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The length of the packet's custom header, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_CUSTOM_HEADER_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + /* + * Note that we effectively need to extract a quantity from the flags word + * which is measured in words, and then turn it into bytes by shifting + * it left by 2. We do this all at once by just shifting right two less + * bits, and shifting the mask up two bits. + */ + return ((mda->__flags >> (_NETIO_PKT_CUSTOM_LEN_SHIFT - 2)) & + (_NETIO_PKT_CUSTOM_LEN_RMASK << 2)); +} + + +/** Return the length of the packet, starting with the custom header. + * A custom header may or may not be present, depending upon the IPP; its + * contents and alignment are also IPP-dependent. Currently, none of the + * standard IPPs supplied by Tilera produce a custom header. If present, + * the custom header precedes the L2 header in the packet buffer. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The length of the packet, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_CUSTOM_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return (__NETIO_PKT_NOTIF_HEADER(pkt).bits.__transfer_size - + NETIO_PACKET_PADDING); +} + + +/** Return a pointer to the start of the packet's custom header. + * A custom header may or may not be present, depending upon the IPP; its + * contents and alignment are also IPP-dependent. Currently, none of the + * standard IPPs supplied by Tilera produce a custom header. If present, + * the custom header precedes the L2 header in the packet buffer. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return A pointer to start of the packet. + */ +static __inline unsigned char* +NETIO_PKT_CUSTOM_DATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return NETIO_PKT_CUSTOM_DATA_H(NETIO_PKT_HANDLE(pkt)); +} + + +/** Return the length of the packet's L2 (Ethernet plus VLAN or SNAP) header. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The length of the packet's L2 header, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_L2_HEADER_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + /* + * Note that we effectively need to extract a quantity from the flags word + * which is measured in words, and then turn it into bytes by shifting + * it left by 2. We do this all at once by just shifting right two less + * bits, and shifting the mask up two bits. We then add two bytes. + */ + return ((mda->__flags >> (_NETIO_PKT_L2_LEN_SHIFT - 2)) & + (_NETIO_PKT_L2_LEN_RMASK << 2)) + 2; +} + + +/** Return the length of the packet, starting with the L2 (Ethernet) header. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The length of the packet, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_L2_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return (NETIO_PKT_CUSTOM_LENGTH_M(mda, pkt) - + NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda,pkt)); +} + + +/** Return a pointer to the start of the packet's L2 (Ethernet) header. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return A pointer to start of the packet. + */ +static __inline unsigned char* +NETIO_PKT_L2_DATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return (NETIO_PKT_CUSTOM_DATA_M(mda, pkt) + + NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda, pkt)); +} + + +/** Retrieve the length of the packet, starting with the L3 (generally, + * the IP) header. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return Length of the packet's L3 header and data, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_L3_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return (NETIO_PKT_L2_LENGTH_M(mda, pkt) - + NETIO_PKT_L2_HEADER_LENGTH_M(mda,pkt)); +} + + +/** Return a pointer to the packet's L3 (generally, the IP) header. + * @ingroup ingress + * + * Note that we guarantee word alignment of the L3 header. + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return A pointer to the packet's L3 header. + */ +static __inline unsigned char* +NETIO_PKT_L3_DATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return (NETIO_PKT_L2_DATA_M(mda, pkt) + + NETIO_PKT_L2_HEADER_LENGTH_M(mda, pkt)); +} + + +/** Return the ordinal of the packet. + * @ingroup ingress + * + * Each packet is given an ordinal number when it is delivered by the IPP. + * In the medium term, the ordinal is unique and monotonically increasing, + * being incremented by 1 for each packet; the ordinal of the first packet + * delivered after the IPP starts is zero. (Since the ordinal is of finite + * size, given enough input packets, it will eventually wrap around to zero; + * in the long term, therefore, ordinals are not unique.) The ordinals + * handed out by different IPPs are not disjoint, so two packets from + * different IPPs may have identical ordinals. Packets dropped by the + * IPP or by the I/O shim are not assigned ordinals. + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The packet's per-IPP packet ordinal. + */ +static __inline unsigned int +NETIO_PKT_ORDINAL_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return mda->__packet_ordinal; +} + + +/** Return the per-group ordinal of the packet. + * @ingroup ingress + * + * Each packet is given a per-group ordinal number when it is + * delivered by the IPP. By default, the group is the packet's VLAN, + * although IPP can be recompiled to use different values. In + * the medium term, the ordinal is unique and monotonically + * increasing, being incremented by 1 for each packet; the ordinal of + * the first packet distributed to a particular group is zero. + * (Since the ordinal is of finite size, given enough input packets, + * it will eventually wrap around to zero; in the long term, + * therefore, ordinals are not unique.) The ordinals handed out by + * different IPPs are not disjoint, so two packets from different IPPs + * may have identical ordinals; similarly, packets distributed to + * different groups may have identical ordinals. Packets dropped by + * the IPP or by the I/O shim are not assigned ordinals. + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The packet's per-IPP, per-group ordinal. + */ +static __inline unsigned int +NETIO_PKT_GROUP_ORDINAL_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return mda->__group_ordinal; +} + + +/** Return the VLAN ID assigned to the packet. + * @ingroup ingress + * + * This value is usually contained within the packet header. + * + * This value will be zero if the packet does not have a VLAN tag, or if + * this value was not extracted from the packet. + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The packet's VLAN ID. + */ +static __inline unsigned short +NETIO_PKT_VLAN_ID_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + int vl = (mda->__flags >> _NETIO_PKT_VLAN_SHIFT) & _NETIO_PKT_VLAN_RMASK; + unsigned short* pkt_p; + int index; + unsigned short val; + + if (vl == _NETIO_PKT_VLAN_NONE) + return 0; + + pkt_p = (unsigned short*) NETIO_PKT_L2_DATA_M(mda, pkt); + index = (mda->__flags >> _NETIO_PKT_TYPE_SHIFT) & _NETIO_PKT_TYPE_RMASK; + + val = pkt_p[(_netio_pkt_info[index] >> _NETIO_PKT_INFO_VLAN_SHIFT) & + _NETIO_PKT_INFO_VLAN_RMASK]; + +#ifdef __TILECC__ + return (__insn_bytex(val) >> 16) & 0xFFF; +#else + return (__builtin_bswap32(val) >> 16) & 0xFFF; +#endif +} + + +/** Return the ethertype of the packet. + * @ingroup ingress + * + * This value is usually contained within the packet header. + * + * This value is reliable if @ref NETIO_PKT_ETHERTYPE_RECOGNIZED_M() + * returns true, and otherwise, may not be well defined. + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The packet's ethertype. + */ +static __inline unsigned short +NETIO_PKT_ETHERTYPE_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + unsigned short* pkt_p = (unsigned short*) NETIO_PKT_L2_DATA_M(mda, pkt); + int index = (mda->__flags >> _NETIO_PKT_TYPE_SHIFT) & _NETIO_PKT_TYPE_RMASK; + + unsigned short val = + pkt_p[(_netio_pkt_info[index] >> _NETIO_PKT_INFO_ETYPE_SHIFT) & + _NETIO_PKT_INFO_ETYPE_RMASK]; + + return __builtin_bswap32(val) >> 16; +} + + +/** Return the flow hash computed on the packet. + * @ingroup ingress + * + * For TCP and UDP packets, this hash is calculated by hashing together + * the "5-tuple" values, specifically the source IP address, destination + * IP address, protocol type, source port and destination port. + * The hash value is intended to be helpful for millions of distinct + * flows. + * + * For IPv4 or IPv6 packets which are neither TCP nor UDP, the flow hash is + * derived by hashing together the source and destination IP addresses. + * + * For MPLS-encapsulated packets, the flow hash is derived by hashing + * the first MPLS label. + * + * For all other packets the flow hash is computed from the source + * and destination Ethernet addresses. + * + * The hash is symmetric, meaning it produces the same value if the + * source and destination are swapped. The only exceptions are + * tunneling protocols 0x04 (IP in IP Encapsulation), 0x29 (Simple + * Internet Protocol), 0x2F (General Routing Encapsulation) and 0x32 + * (Encap Security Payload), which use only the destination address + * since the source address is not meaningful. + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The packet's 32-bit flow hash. + */ +static __inline unsigned int +NETIO_PKT_FLOW_HASH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return mda->__flow_hash; +} + + +/** Return the first word of "user data" for the packet. + * + * The contents of the user data words depend on the IPP. + * + * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the first + * word of user data contains the least significant bits of the 64-bit + * arrival cycle count (see @c get_cycle_count_low()). + * + * See the <em>System Programmer's Guide</em> for details. + * + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The packet's first word of "user data". + */ +static __inline unsigned int +NETIO_PKT_USER_DATA_0_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return mda->__user_data_0; +} + + +/** Return the second word of "user data" for the packet. + * + * The contents of the user data words depend on the IPP. + * + * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the second + * word of user data contains the most significant bits of the 64-bit + * arrival cycle count (see @c get_cycle_count_high()). + * + * See the <em>System Programmer's Guide</em> for details. + * + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The packet's second word of "user data". + */ +static __inline unsigned int +NETIO_PKT_USER_DATA_1_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return mda->__user_data_1; +} + + +/** Determine whether the L4 (TCP/UDP) checksum was calculated. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the L4 checksum was calculated. + */ +static __inline unsigned int +NETIO_PKT_L4_CSUM_CALCULATED_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return !(mda->__flags & _NETIO_PKT_NO_L4_CSUM_MASK); +} + + +/** Determine whether the L4 (TCP/UDP) checksum was calculated and found to + * be correct. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the checksum was calculated and is correct. + */ +static __inline unsigned int +NETIO_PKT_L4_CSUM_CORRECT_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return !(mda->__flags & + (_NETIO_PKT_BAD_L4_CSUM_MASK | _NETIO_PKT_NO_L4_CSUM_MASK)); +} + + +/** Determine whether the L3 (IP) checksum was calculated. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the L3 (IP) checksum was calculated. +*/ +static __inline unsigned int +NETIO_PKT_L3_CSUM_CALCULATED_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return !(mda->__flags & _NETIO_PKT_NO_L3_CSUM_MASK); +} + + +/** Determine whether the L3 (IP) checksum was calculated and found to be + * correct. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the checksum was calculated and is correct. + */ +static __inline unsigned int +NETIO_PKT_L3_CSUM_CORRECT_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return !(mda->__flags & + (_NETIO_PKT_BAD_L3_CSUM_MASK | _NETIO_PKT_NO_L3_CSUM_MASK)); +} + + +/** Determine whether the ethertype was recognized and L3 packet data was + * processed. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the ethertype was recognized and L3 packet data was + * processed. + */ +static __inline unsigned int +NETIO_PKT_ETHERTYPE_RECOGNIZED_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return !(mda->__flags & _NETIO_PKT_TYPE_UNRECOGNIZED_MASK); +} + + +/** Retrieve the status of a packet and any errors that may have occurred + * during ingress processing (length mismatches, CRC errors, etc.). + * @ingroup ingress + * + * Note that packets for which @ref NETIO_PKT_ETHERTYPE_RECOGNIZED() + * returns zero are always reported as underlength, as there is no a priori + * means to determine their length. Normally, applications should use + * @ref NETIO_PKT_BAD_M() instead of explicitly checking status with this + * function. + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return The packet's status. + */ +static __inline netio_pkt_status_t +NETIO_PKT_STATUS_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return (netio_pkt_status_t) __NETIO_PKT_NOTIF_HEADER(pkt).bits.__status; +} + + +/** Report whether a packet is bad (i.e., was shorter than expected based on + * its headers, or had a bad CRC). + * @ingroup ingress + * + * Note that this function does not verify L3 or L4 checksums. + * + * @param[in] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the packet is bad and should be discarded. + */ +static __inline unsigned int +NETIO_PKT_BAD_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return ((NETIO_PKT_STATUS_M(mda, pkt) & 1) && + (NETIO_PKT_ETHERTYPE_RECOGNIZED_M(mda, pkt) || + NETIO_PKT_STATUS_M(mda, pkt) == NETIO_PKT_STATUS_BAD)); +} + + +/** Return the length of the packet, starting with the L2 (Ethernet) header. + * @ingroup egress + * + * @param[in] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + * @return The length of the packet, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_L2_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt) +{ + return mmd->l2_length; +} + + +/** Return the length of the L2 (Ethernet) header. + * @ingroup egress + * + * @param[in] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + * @return The length of the packet's L2 header, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_L2_HEADER_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, + netio_pkt_t* pkt) +{ + return mmd->l3_offset - mmd->l2_offset; +} + + +/** Return the length of the packet, starting with the L3 (IP) header. + * @ingroup egress + * + * @param[in] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + * @return Length of the packet's L3 header and data, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_L3_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt) +{ + return (NETIO_PKT_L2_LENGTH_MM(mmd, pkt) - + NETIO_PKT_L2_HEADER_LENGTH_MM(mmd, pkt)); +} + + +/** Return a pointer to the packet's L3 (generally, the IP) header. + * @ingroup egress + * + * Note that we guarantee word alignment of the L3 header. + * + * @param[in] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + * @return A pointer to the packet's L3 header. + */ +static __inline unsigned char* +NETIO_PKT_L3_DATA_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt) +{ + return _NETIO_PKT_BASE(pkt) + mmd->l3_offset; +} + + +/** Return a pointer to the packet's L2 (Ethernet) header. + * @ingroup egress + * + * @param[in] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + * @return A pointer to start of the packet. + */ +static __inline unsigned char* +NETIO_PKT_L2_DATA_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt) +{ + return _NETIO_PKT_BASE(pkt) + mmd->l2_offset; +} + + +/** Retrieve the status of a packet and any errors that may have occurred + * during ingress processing (length mismatches, CRC errors, etc.). + * @ingroup ingress + * + * Note that packets for which @ref NETIO_PKT_ETHERTYPE_RECOGNIZED() + * returns zero are always reported as underlength, as there is no a priori + * means to determine their length. Normally, applications should use + * @ref NETIO_PKT_BAD() instead of explicitly checking status with this + * function. + * + * @param[in] pkt Packet on which to operate. + * @return The packet's status. + */ +static __inline netio_pkt_status_t +NETIO_PKT_STATUS(netio_pkt_t* pkt) +{ + netio_assert(!pkt->__packet.bits.__minimal); + + return (netio_pkt_status_t) __NETIO_PKT_NOTIF_HEADER(pkt).bits.__status; +} + + +/** Report whether a packet is bad (i.e., was shorter than expected based on + * its headers, or had a bad CRC). + * @ingroup ingress + * + * Note that this function does not verify L3 or L4 checksums. + * + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the packet is bad and should be discarded. + */ +static __inline unsigned int +NETIO_PKT_BAD(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_BAD_M(mda, pkt); +} + + +/** Return the length of the packet's custom header. + * A custom header may or may not be present, depending upon the IPP; its + * contents and alignment are also IPP-dependent. Currently, none of the + * standard IPPs supplied by Tilera produce a custom header. If present, + * the custom header precedes the L2 header in the packet buffer. + * @ingroup pktfuncs + * + * @param[in] pkt Packet on which to operate. + * @return The length of the packet's custom header, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_CUSTOM_HEADER_LENGTH(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda, pkt); +} + + +/** Return the length of the packet, starting with the custom header. + * A custom header may or may not be present, depending upon the IPP; its + * contents and alignment are also IPP-dependent. Currently, none of the + * standard IPPs supplied by Tilera produce a custom header. If present, + * the custom header precedes the L2 header in the packet buffer. + * @ingroup pktfuncs + * + * @param[in] pkt Packet on which to operate. + * @return The length of the packet, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_CUSTOM_LENGTH(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_CUSTOM_LENGTH_M(mda, pkt); +} + + +/** Return a pointer to the packet's custom header. + * A custom header may or may not be present, depending upon the IPP; its + * contents and alignment are also IPP-dependent. Currently, none of the + * standard IPPs supplied by Tilera produce a custom header. If present, + * the custom header precedes the L2 header in the packet buffer. + * @ingroup pktfuncs + * + * @param[in] pkt Packet on which to operate. + * @return A pointer to start of the packet. + */ +static __inline unsigned char* +NETIO_PKT_CUSTOM_DATA(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_CUSTOM_DATA_M(mda, pkt); +} + + +/** Return the length of the packet's L2 (Ethernet plus VLAN or SNAP) header. + * @ingroup pktfuncs + * + * @param[in] pkt Packet on which to operate. + * @return The length of the packet's L2 header, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_L2_HEADER_LENGTH(netio_pkt_t* pkt) +{ + if (NETIO_PKT_IS_MINIMAL(pkt)) + { + netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); + + return NETIO_PKT_L2_HEADER_LENGTH_MM(mmd, pkt); + } + else + { + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_L2_HEADER_LENGTH_M(mda, pkt); + } +} + + +/** Return the length of the packet, starting with the L2 (Ethernet) header. + * @ingroup pktfuncs + * + * @param[in] pkt Packet on which to operate. + * @return The length of the packet, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_L2_LENGTH(netio_pkt_t* pkt) +{ + if (NETIO_PKT_IS_MINIMAL(pkt)) + { + netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); + + return NETIO_PKT_L2_LENGTH_MM(mmd, pkt); + } + else + { + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_L2_LENGTH_M(mda, pkt); + } +} + + +/** Return a pointer to the packet's L2 (Ethernet) header. + * @ingroup pktfuncs + * + * @param[in] pkt Packet on which to operate. + * @return A pointer to start of the packet. + */ +static __inline unsigned char* +NETIO_PKT_L2_DATA(netio_pkt_t* pkt) +{ + if (NETIO_PKT_IS_MINIMAL(pkt)) + { + netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); + + return NETIO_PKT_L2_DATA_MM(mmd, pkt); + } + else + { + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_L2_DATA_M(mda, pkt); + } +} + + +/** Retrieve the length of the packet, starting with the L3 (generally, the IP) + * header. + * @ingroup pktfuncs + * + * @param[in] pkt Packet on which to operate. + * @return Length of the packet's L3 header and data, in bytes. + */ +static __inline netio_size_t +NETIO_PKT_L3_LENGTH(netio_pkt_t* pkt) +{ + if (NETIO_PKT_IS_MINIMAL(pkt)) + { + netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); + + return NETIO_PKT_L3_LENGTH_MM(mmd, pkt); + } + else + { + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_L3_LENGTH_M(mda, pkt); + } +} + + +/** Return a pointer to the packet's L3 (generally, the IP) header. + * @ingroup pktfuncs + * + * Note that we guarantee word alignment of the L3 header. + * + * @param[in] pkt Packet on which to operate. + * @return A pointer to the packet's L3 header. + */ +static __inline unsigned char* +NETIO_PKT_L3_DATA(netio_pkt_t* pkt) +{ + if (NETIO_PKT_IS_MINIMAL(pkt)) + { + netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); + + return NETIO_PKT_L3_DATA_MM(mmd, pkt); + } + else + { + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_L3_DATA_M(mda, pkt); + } +} + + +/** Return the ordinal of the packet. + * @ingroup ingress + * + * Each packet is given an ordinal number when it is delivered by the IPP. + * In the medium term, the ordinal is unique and monotonically increasing, + * being incremented by 1 for each packet; the ordinal of the first packet + * delivered after the IPP starts is zero. (Since the ordinal is of finite + * size, given enough input packets, it will eventually wrap around to zero; + * in the long term, therefore, ordinals are not unique.) The ordinals + * handed out by different IPPs are not disjoint, so two packets from + * different IPPs may have identical ordinals. Packets dropped by the + * IPP or by the I/O shim are not assigned ordinals. + * + * + * @param[in] pkt Packet on which to operate. + * @return The packet's per-IPP packet ordinal. + */ +static __inline unsigned int +NETIO_PKT_ORDINAL(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_ORDINAL_M(mda, pkt); +} + + +/** Return the per-group ordinal of the packet. + * @ingroup ingress + * + * Each packet is given a per-group ordinal number when it is + * delivered by the IPP. By default, the group is the packet's VLAN, + * although IPP can be recompiled to use different values. In + * the medium term, the ordinal is unique and monotonically + * increasing, being incremented by 1 for each packet; the ordinal of + * the first packet distributed to a particular group is zero. + * (Since the ordinal is of finite size, given enough input packets, + * it will eventually wrap around to zero; in the long term, + * therefore, ordinals are not unique.) The ordinals handed out by + * different IPPs are not disjoint, so two packets from different IPPs + * may have identical ordinals; similarly, packets distributed to + * different groups may have identical ordinals. Packets dropped by + * the IPP or by the I/O shim are not assigned ordinals. + * + * @param[in] pkt Packet on which to operate. + * @return The packet's per-IPP, per-group ordinal. + */ +static __inline unsigned int +NETIO_PKT_GROUP_ORDINAL(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_GROUP_ORDINAL_M(mda, pkt); +} + + +/** Return the VLAN ID assigned to the packet. + * @ingroup ingress + * + * This is usually also contained within the packet header. If the packet + * does not have a VLAN tag, the VLAN ID returned by this function is zero. + * + * @param[in] pkt Packet on which to operate. + * @return The packet's VLAN ID. + */ +static __inline unsigned short +NETIO_PKT_VLAN_ID(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_VLAN_ID_M(mda, pkt); +} + + +/** Return the ethertype of the packet. + * @ingroup ingress + * + * This value is reliable if @ref NETIO_PKT_ETHERTYPE_RECOGNIZED() + * returns true, and otherwise, may not be well defined. + * + * @param[in] pkt Packet on which to operate. + * @return The packet's ethertype. + */ +static __inline unsigned short +NETIO_PKT_ETHERTYPE(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_ETHERTYPE_M(mda, pkt); +} + + +/** Return the flow hash computed on the packet. + * @ingroup ingress + * + * For TCP and UDP packets, this hash is calculated by hashing together + * the "5-tuple" values, specifically the source IP address, destination + * IP address, protocol type, source port and destination port. + * The hash value is intended to be helpful for millions of distinct + * flows. + * + * For IPv4 or IPv6 packets which are neither TCP nor UDP, the flow hash is + * derived by hashing together the source and destination IP addresses. + * + * For MPLS-encapsulated packets, the flow hash is derived by hashing + * the first MPLS label. + * + * For all other packets the flow hash is computed from the source + * and destination Ethernet addresses. + * + * The hash is symmetric, meaning it produces the same value if the + * source and destination are swapped. The only exceptions are + * tunneling protocols 0x04 (IP in IP Encapsulation), 0x29 (Simple + * Internet Protocol), 0x2F (General Routing Encapsulation) and 0x32 + * (Encap Security Payload), which use only the destination address + * since the source address is not meaningful. + * + * @param[in] pkt Packet on which to operate. + * @return The packet's 32-bit flow hash. + */ +static __inline unsigned int +NETIO_PKT_FLOW_HASH(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_FLOW_HASH_M(mda, pkt); +} + + +/** Return the first word of "user data" for the packet. + * + * The contents of the user data words depend on the IPP. + * + * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the first + * word of user data contains the least significant bits of the 64-bit + * arrival cycle count (see @c get_cycle_count_low()). + * + * See the <em>System Programmer's Guide</em> for details. + * + * @ingroup ingress + * + * @param[in] pkt Packet on which to operate. + * @return The packet's first word of "user data". + */ +static __inline unsigned int +NETIO_PKT_USER_DATA_0(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_USER_DATA_0_M(mda, pkt); +} + + +/** Return the second word of "user data" for the packet. + * + * The contents of the user data words depend on the IPP. + * + * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the second + * word of user data contains the most significant bits of the 64-bit + * arrival cycle count (see @c get_cycle_count_high()). + * + * See the <em>System Programmer's Guide</em> for details. + * + * @ingroup ingress + * + * @param[in] pkt Packet on which to operate. + * @return The packet's second word of "user data". + */ +static __inline unsigned int +NETIO_PKT_USER_DATA_1(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_USER_DATA_1_M(mda, pkt); +} + + +/** Determine whether the L4 (TCP/UDP) checksum was calculated. + * @ingroup ingress + * + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the L4 checksum was calculated. + */ +static __inline unsigned int +NETIO_PKT_L4_CSUM_CALCULATED(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_L4_CSUM_CALCULATED_M(mda, pkt); +} + + +/** Determine whether the L4 (TCP/UDP) checksum was calculated and found to + * be correct. + * @ingroup ingress + * + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the checksum was calculated and is correct. + */ +static __inline unsigned int +NETIO_PKT_L4_CSUM_CORRECT(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_L4_CSUM_CORRECT_M(mda, pkt); +} + + +/** Determine whether the L3 (IP) checksum was calculated. + * @ingroup ingress + * + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the L3 (IP) checksum was calculated. +*/ +static __inline unsigned int +NETIO_PKT_L3_CSUM_CALCULATED(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_L3_CSUM_CALCULATED_M(mda, pkt); +} + + +/** Determine whether the L3 (IP) checksum was calculated and found to be + * correct. + * @ingroup ingress + * + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the checksum was calculated and is correct. + */ +static __inline unsigned int +NETIO_PKT_L3_CSUM_CORRECT(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_L3_CSUM_CORRECT_M(mda, pkt); +} + + +/** Determine whether the Ethertype was recognized and L3 packet data was + * processed. + * @ingroup ingress + * + * @param[in] pkt Packet on which to operate. + * @return Nonzero if the Ethertype was recognized and L3 packet data was + * processed. + */ +static __inline unsigned int +NETIO_PKT_ETHERTYPE_RECOGNIZED(netio_pkt_t* pkt) +{ + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_ETHERTYPE_RECOGNIZED_M(mda, pkt); +} + + +/** Set an egress packet's L2 length, using a metadata pointer to speed the + * computation. + * @ingroup egress + * + * @param[in,out] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + * @param[in] len Packet L2 length, in bytes. + */ +static __inline void +NETIO_PKT_SET_L2_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt, + int len) +{ + mmd->l2_length = len; +} + + +/** Set an egress packet's L2 length. + * @ingroup egress + * + * @param[in,out] pkt Packet on which to operate. + * @param[in] len Packet L2 length, in bytes. + */ +static __inline void +NETIO_PKT_SET_L2_LENGTH(netio_pkt_t* pkt, int len) +{ + netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); + + NETIO_PKT_SET_L2_LENGTH_MM(mmd, pkt, len); +} + + +/** Set an egress packet's L2 header length, using a metadata pointer to + * speed the computation. + * @ingroup egress + * + * It is not normally necessary to call this routine; only the L2 length, + * not the header length, is needed to transmit a packet. It may be useful if + * the egress packet will later be processed by code which expects to use + * functions like @ref NETIO_PKT_L3_DATA() to get a pointer to the L3 payload. + * + * @param[in,out] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + * @param[in] len Packet L2 header length, in bytes. + */ +static __inline void +NETIO_PKT_SET_L2_HEADER_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, + netio_pkt_t* pkt, int len) +{ + mmd->l3_offset = mmd->l2_offset + len; +} + + +/** Set an egress packet's L2 header length. + * @ingroup egress + * + * It is not normally necessary to call this routine; only the L2 length, + * not the header length, is needed to transmit a packet. It may be useful if + * the egress packet will later be processed by code which expects to use + * functions like @ref NETIO_PKT_L3_DATA() to get a pointer to the L3 payload. + * + * @param[in,out] pkt Packet on which to operate. + * @param[in] len Packet L2 header length, in bytes. + */ +static __inline void +NETIO_PKT_SET_L2_HEADER_LENGTH(netio_pkt_t* pkt, int len) +{ + netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); + + NETIO_PKT_SET_L2_HEADER_LENGTH_MM(mmd, pkt, len); +} + + +/** Set up an egress packet for hardware checksum computation, using a + * metadata pointer to speed the operation. + * @ingroup egress + * + * NetIO provides the ability to automatically calculate a standard + * 16-bit Internet checksum on transmitted packets. The application + * may specify the point in the packet where the checksum starts, the + * number of bytes to be checksummed, and the two bytes in the packet + * which will be replaced with the completed checksum. (If the range + * of bytes to be checksummed includes the bytes to be replaced, the + * initial values of those bytes will be included in the checksum.) + * + * For some protocols, the packet checksum covers data which is not present + * in the packet, or is at least not contiguous to the main data payload. + * For instance, the TCP checksum includes a "pseudo-header" which includes + * the source and destination IP addresses of the packet. To accommodate + * this, the checksum engine may be "seeded" with an initial value, which + * the application would need to compute based on the specific protocol's + * requirements. Note that the seed is given in host byte order (little- + * endian), not network byte order (big-endian); code written to compute a + * pseudo-header checksum in network byte order will need to byte-swap it + * before use as the seed. + * + * Note that the checksum is computed as part of the transmission process, + * so it will not be present in the packet upon completion of this routine. + * + * @param[in,out] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + * @param[in] start Offset within L2 packet of the first byte to include in + * the checksum. + * @param[in] length Number of bytes to include in the checksum. + * the checksum. + * @param[in] location Offset within L2 packet of the first of the two bytes + * to be replaced with the calculated checksum. + * @param[in] seed Initial value of the running checksum before any of the + * packet data is added. + */ +static __inline void +NETIO_PKT_DO_EGRESS_CSUM_MM(netio_pkt_minimal_metadata_t* mmd, + netio_pkt_t* pkt, int start, int length, + int location, uint16_t seed) +{ + mmd->csum_start = start; + mmd->csum_length = length; + mmd->csum_location = location; + mmd->csum_seed = seed; + mmd->flags |= _NETIO_PKT_NEED_EDMA_CSUM_MASK; +} + + +/** Set up an egress packet for hardware checksum computation. + * @ingroup egress + * + * NetIO provides the ability to automatically calculate a standard + * 16-bit Internet checksum on transmitted packets. The application + * may specify the point in the packet where the checksum starts, the + * number of bytes to be checksummed, and the two bytes in the packet + * which will be replaced with the completed checksum. (If the range + * of bytes to be checksummed includes the bytes to be replaced, the + * initial values of those bytes will be included in the checksum.) + * + * For some protocols, the packet checksum covers data which is not present + * in the packet, or is at least not contiguous to the main data payload. + * For instance, the TCP checksum includes a "pseudo-header" which includes + * the source and destination IP addresses of the packet. To accommodate + * this, the checksum engine may be "seeded" with an initial value, which + * the application would need to compute based on the specific protocol's + * requirements. Note that the seed is given in host byte order (little- + * endian), not network byte order (big-endian); code written to compute a + * pseudo-header checksum in network byte order will need to byte-swap it + * before use as the seed. + * + * Note that the checksum is computed as part of the transmission process, + * so it will not be present in the packet upon completion of this routine. + * + * @param[in,out] pkt Packet on which to operate. + * @param[in] start Offset within L2 packet of the first byte to include in + * the checksum. + * @param[in] length Number of bytes to include in the checksum. + * the checksum. + * @param[in] location Offset within L2 packet of the first of the two bytes + * to be replaced with the calculated checksum. + * @param[in] seed Initial value of the running checksum before any of the + * packet data is added. + */ +static __inline void +NETIO_PKT_DO_EGRESS_CSUM(netio_pkt_t* pkt, int start, int length, + int location, uint16_t seed) +{ + netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); + + NETIO_PKT_DO_EGRESS_CSUM_MM(mmd, pkt, start, length, location, seed); +} + + +/** Return the number of bytes which could be prepended to a packet, using a + * metadata pointer to speed the operation. + * See @ref netio_populate_prepend_buffer() to get a full description of + * prepending. + * + * @param[in,out] mda Pointer to packet's standard metadata. + * @param[in] pkt Packet on which to operate. + */ +static __inline int +NETIO_PKT_PREPEND_AVAIL_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ + return (pkt->__packet.bits.__offset << 6) + + NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda, pkt); +} + + +/** Return the number of bytes which could be prepended to a packet, using a + * metadata pointer to speed the operation. + * See @ref netio_populate_prepend_buffer() to get a full description of + * prepending. + * @ingroup egress + * + * @param[in,out] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + */ +static __inline int +NETIO_PKT_PREPEND_AVAIL_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt) +{ + return (pkt->__packet.bits.__offset << 6) + mmd->l2_offset; +} + + +/** Return the number of bytes which could be prepended to a packet. + * See @ref netio_populate_prepend_buffer() to get a full description of + * prepending. + * @ingroup egress + * + * @param[in] pkt Packet on which to operate. + */ +static __inline int +NETIO_PKT_PREPEND_AVAIL(netio_pkt_t* pkt) +{ + if (NETIO_PKT_IS_MINIMAL(pkt)) + { + netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); + + return NETIO_PKT_PREPEND_AVAIL_MM(mmd, pkt); + } + else + { + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); + + return NETIO_PKT_PREPEND_AVAIL_M(mda, pkt); + } +} + + +/** Flush a packet's minimal metadata from the cache, using a metadata pointer + * to speed the operation. + * @ingroup egress + * + * @param[in] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_FLUSH_MINIMAL_METADATA_MM(netio_pkt_minimal_metadata_t* mmd, + netio_pkt_t* pkt) +{ +} + + +/** Invalidate a packet's minimal metadata from the cache, using a metadata + * pointer to speed the operation. + * @ingroup egress + * + * @param[in] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_INV_MINIMAL_METADATA_MM(netio_pkt_minimal_metadata_t* mmd, + netio_pkt_t* pkt) +{ +} + + +/** Flush and then invalidate a packet's minimal metadata from the cache, + * using a metadata pointer to speed the operation. + * @ingroup egress + * + * @param[in] mmd Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_FLUSH_INV_MINIMAL_METADATA_MM(netio_pkt_minimal_metadata_t* mmd, + netio_pkt_t* pkt) +{ +} + + +/** Flush a packet's metadata from the cache, using a metadata pointer + * to speed the operation. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's minimal metadata. + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_FLUSH_METADATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ +} + + +/** Invalidate a packet's metadata from the cache, using a metadata + * pointer to speed the operation. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's metadata. + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_INV_METADATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ +} + + +/** Flush and then invalidate a packet's metadata from the cache, + * using a metadata pointer to speed the operation. + * @ingroup ingress + * + * @param[in] mda Pointer to packet's metadata. + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_FLUSH_INV_METADATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) +{ +} + + +/** Flush a packet's minimal metadata from the cache. + * @ingroup egress + * + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_FLUSH_MINIMAL_METADATA(netio_pkt_t* pkt) +{ +} + + +/** Invalidate a packet's minimal metadata from the cache. + * @ingroup egress + * + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_INV_MINIMAL_METADATA(netio_pkt_t* pkt) +{ +} + + +/** Flush and then invalidate a packet's minimal metadata from the cache. + * @ingroup egress + * + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_FLUSH_INV_MINIMAL_METADATA(netio_pkt_t* pkt) +{ +} + + +/** Flush a packet's metadata from the cache. + * @ingroup ingress + * + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_FLUSH_METADATA(netio_pkt_t* pkt) +{ +} + + +/** Invalidate a packet's metadata from the cache. + * @ingroup ingress + * + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_INV_METADATA(netio_pkt_t* pkt) +{ +} + + +/** Flush and then invalidate a packet's metadata from the cache. + * @ingroup ingress + * + * @param[in] pkt Packet on which to operate. + */ +static __inline void +NETIO_PKT_FLUSH_INV_METADATA(netio_pkt_t* pkt) +{ +} + +/** Number of NUMA nodes we can distribute buffers to. + * @ingroup setup */ +#define NETIO_NUM_NODE_WEIGHTS 16 + +/** + * @brief An object for specifying the characteristics of NetIO communication + * endpoint. + * + * @ingroup setup + * + * The @ref netio_input_register() function uses this structure to define + * how an application tile will communicate with an IPP. + * + * + * Future updates to NetIO may add new members to this structure, + * which can affect the success of the registration operation. Thus, + * if dynamically initializing the structure, applications are urged to + * zero it out first, for example: + * + * @code + * netio_input_config_t config; + * memset(&config, 0, sizeof (config)); + * config.flags = NETIO_RECV | NETIO_XMIT_CSUM | NETIO_TAG_NONE; + * config.num_receive_packets = NETIO_MAX_RECEIVE_PKTS; + * config.queue_id = 0; + * . + * . + * . + * @endcode + * + * since that guarantees that any unused structure members, including + * members which did not exist when the application was first developed, + * will not have unexpected values. + * + * If statically initializing the structure, we strongly recommend use of + * C99-style named initializers, for example: + * + * @code + * netio_input_config_t config = { + * .flags = NETIO_RECV | NETIO_XMIT_CSUM | NETIO_TAG_NONE, + * .num_receive_packets = NETIO_MAX_RECEIVE_PKTS, + * .queue_id = 0, + * }, + * @endcode + * + * instead of the old-style structure initialization: + * + * @code + * // Bad example! Currently equivalent to the above, but don't do this. + * netio_input_config_t config = { + * NETIO_RECV | NETIO_XMIT_CSUM | NETIO_TAG_NONE, NETIO_MAX_RECEIVE_PKTS, 0 + * }, + * @endcode + * + * since the C99 style requires no changes to the code if elements of the + * config structure are rearranged. (It also makes the initialization much + * easier to understand.) + * + * Except for items which address a particular tile's transmit or receive + * characteristics, such as the ::NETIO_RECV flag, applications are advised + * to specify the same set of configuration data on all registrations. + * This prevents differing results if multiple tiles happen to do their + * registration operations in a different order on different invocations of + * the application. This is particularly important for things like link + * management flags, and buffer size and homing specifications. + * + * Unless the ::NETIO_FIXED_BUFFER_VA flag is specified in flags, the NetIO + * buffer pool is automatically created and mapped into the application's + * virtual address space at an address chosen by the operating system, + * using the common memory (cmem) facility in the Tilera Multicore + * Components library. The cmem facility allows multiple processes to gain + * access to shared memory which is mapped into each process at an + * identical virtual address. In order for this to work, the processes + * must have a common ancestor, which must create the common memory using + * tmc_cmem_init(). + * + * In programs using the iLib process creation API, or in programs which use + * only one process (which include programs using the pthreads library), + * tmc_cmem_init() is called automatically. All other applications + * must call it explicitly, before any child processes which might call + * netio_input_register() are created. + */ +typedef struct +{ + /** Registration characteristics. + + This value determines several characteristics of the registration; + flags for different types of behavior are ORed together to make the + final flag value. Generally applications should specify exactly + one flag from each of the following categories: + + - Whether the application will be receiving packets on this queue + (::NETIO_RECV or ::NETIO_NO_RECV). + + - Whether the application will be transmitting packets on this queue, + and if so, whether it will request egress checksum calculation + (::NETIO_XMIT, ::NETIO_XMIT_CSUM, or ::NETIO_NO_XMIT). It is + legal to call netio_get_buffer() without one of the XMIT flags, + as long as ::NETIO_RECV is specified; in this case, the retrieved + buffers must be passed to another tile for transmission. + + - Whether the application expects any vendor-specific tags in + its packets' L2 headers (::NETIO_TAG_NONE, ::NETIO_TAG_BRCM, + or ::NETIO_TAG_MRVL). This must match the configuration of the + target IPP. + + To accommodate applications written to previous versions of the NetIO + interface, none of the flags above are currently required; if omitted, + NetIO behaves more or less as if ::NETIO_RECV | ::NETIO_XMIT_CSUM | + ::NETIO_TAG_NONE were used. However, explicit specification of + the relevant flags allows NetIO to do a better job of resource + allocation, allows earlier detection of certain configuration errors, + and may enable advanced features or higher performance in the future, + so their use is strongly recommended. + + Note that specifying ::NETIO_NO_RECV along with ::NETIO_NO_XMIT + is a special case, intended primarily for use by programs which + retrieve network statistics or do link management operations. + When these flags are both specified, the resulting queue may not + be used with NetIO routines other than netio_get(), netio_set(), + and netio_input_unregister(). See @ref link for more information + on link management. + + Other flags are optional; their use is described below. + */ + int flags; + + /** Interface name. This is a string which identifies the specific + Ethernet controller hardware to be used. The format of the string + is a device type and a device index, separated by a slash; so, + the first 10 Gigabit Ethernet controller is named "xgbe/0", while + the second 10/100/1000 Megabit Ethernet controller is named "gbe/1". + */ + const char* interface; + + /** Receive packet queue size. This specifies the maximum number + of ingress packets that can be received on this queue without + being retrieved by @ref netio_get_packet(). If the IPP's distribution + algorithm calls for a packet to be sent to this queue, and this + number of packets are already pending there, the new packet + will either be discarded, or sent to another tile registered + for the same queue_id (see @ref drops). This value must + be at least ::NETIO_MIN_RECEIVE_PKTS, can always be at least + ::NETIO_MAX_RECEIVE_PKTS, and may be larger than that on certain + interfaces. + */ + int num_receive_packets; + + /** The queue ID being requested. Legal values for this range from 0 + to ::NETIO_MAX_QUEUE_ID, inclusive. ::NETIO_MAX_QUEUE_ID is always + greater than or equal to the number of tiles; this allows one queue + for each tile, plus at least one additional queue. Some applications + may wish to use the additional queue as a destination for unwanted + packets, since packets delivered to queues for which no tiles have + registered are discarded. + */ + unsigned int queue_id; + + /** Maximum number of small send buffers to be held in the local empty + buffer cache. This specifies the size of the area which holds + empty small egress buffers requested from the IPP but not yet + retrieved via @ref netio_get_buffer(). This value must be greater + than zero if the application will ever use @ref netio_get_buffer() + to allocate empty small egress buffers; it may be no larger than + ::NETIO_MAX_SEND_BUFFERS. See @ref epp for more details on empty + buffer caching. + */ + int num_send_buffers_small_total; + + /** Number of small send buffers to be preallocated at registration. + If this value is nonzero, the specified number of empty small egress + buffers will be requested from the IPP during the netio_input_register + operation; this may speed the execution of @ref netio_get_buffer(). + This may be no larger than @ref num_send_buffers_small_total. See @ref + epp for more details on empty buffer caching. + */ + int num_send_buffers_small_prealloc; + + /** Maximum number of large send buffers to be held in the local empty + buffer cache. This specifies the size of the area which holds empty + large egress buffers requested from the IPP but not yet retrieved via + @ref netio_get_buffer(). This value must be greater than zero if the + application will ever use @ref netio_get_buffer() to allocate empty + large egress buffers; it may be no larger than ::NETIO_MAX_SEND_BUFFERS. + See @ref epp for more details on empty buffer caching. + */ + int num_send_buffers_large_total; + + /** Number of large send buffers to be preallocated at registration. + If this value is nonzero, the specified number of empty large egress + buffers will be requested from the IPP during the netio_input_register + operation; this may speed the execution of @ref netio_get_buffer(). + This may be no larger than @ref num_send_buffers_large_total. See @ref + epp for more details on empty buffer caching. + */ + int num_send_buffers_large_prealloc; + + /** Maximum number of jumbo send buffers to be held in the local empty + buffer cache. This specifies the size of the area which holds empty + jumbo egress buffers requested from the IPP but not yet retrieved via + @ref netio_get_buffer(). This value must be greater than zero if the + application will ever use @ref netio_get_buffer() to allocate empty + jumbo egress buffers; it may be no larger than ::NETIO_MAX_SEND_BUFFERS. + See @ref epp for more details on empty buffer caching. + */ + int num_send_buffers_jumbo_total; + + /** Number of jumbo send buffers to be preallocated at registration. + If this value is nonzero, the specified number of empty jumbo egress + buffers will be requested from the IPP during the netio_input_register + operation; this may speed the execution of @ref netio_get_buffer(). + This may be no larger than @ref num_send_buffers_jumbo_total. See @ref + epp for more details on empty buffer caching. + */ + int num_send_buffers_jumbo_prealloc; + + /** Total packet buffer size. This determines the total size, in bytes, + of the NetIO buffer pool. Note that the maximum number of available + buffers of each size is determined during hypervisor configuration + (see the <em>System Programmer's Guide</em> for details); this just + influences how much host memory is allocated for those buffers. + + The buffer pool is allocated from common memory, which will be + automatically initialized if needed. If your buffer pool is larger + than 240 MB, you might need to explicitly call @c tmc_cmem_init(), + as described in the Application Libraries Reference Manual (UG227). + + Packet buffers are currently allocated in chunks of 16 MB; this + value will be rounded up to the next larger multiple of 16 MB. + If this value is zero, a default of 32 MB will be used; this was + the value used by previous versions of NetIO. Note that taking this + default also affects the placement of buffers on Linux NUMA nodes. + See @ref buffer_node_weights for an explanation of buffer placement. + + In order to successfully allocate packet buffers, Linux must have + available huge pages on the relevant Linux NUMA nodes. See the + <em>System Programmer's Guide</em> for information on configuring + huge page support in Linux. + */ + uint64_t total_buffer_size; + + /** Buffer placement weighting factors. + + This array specifies the relative amount of buffering to place + on each of the available Linux NUMA nodes. This array is + indexed by the NUMA node, and the values in the array are + proportional to the amount of buffer space to allocate on that + node. + + If memory striping is enabled in the Hypervisor, then there is + only one logical NUMA node (node 0). In that case, NetIO will by + default ignore the suggested buffer node weights, and buffers + will be striped across the physical memory controllers. See + UG209 System Programmer's Guide for a description of the + hypervisor option that controls memory striping. + + If memory striping is disabled, then there are up to four NUMA + nodes, corresponding to the four DDRAM controllers in the TILE + processor architecture. See UG100 Tile Processor Architecture + Overview for a diagram showing the location of each of the DDRAM + controllers relative to the tile array. + + For instance, if memory striping is disabled, the following + configuration strucure: + + @code + netio_input_config_t config = { + . + . + . + .total_buffer_size = 4 * 16 * 1024 * 1024; + .buffer_node_weights = { 1, 0, 1, 0 }, + }, + @endcode + + would result in 32 MB of buffers being placed on controller 0, and + 32 MB on controller 2. (Since buffers are allocated in units of + 16 MB, some sets of weights will not be able to be matched exactly.) + + For the weights to be effective, @ref total_buffer_size must be + nonzero. If @ref total_buffer_size is zero, causing the default + 32 MB of buffer space to be used, then any specified weights will + be ignored, and buffers will positioned as they were in previous + versions of NetIO: + + - For xgbe/0 and gbe/0, 16 MB of buffers will be placed on controller 1, + and the other 16 MB will be placed on controller 2. + + - For xgbe/1 and gbe/1, 16 MB of buffers will be placed on controller 2, + and the other 16 MB will be placed on controller 3. + + If @ref total_buffer_size is nonzero, but all weights are zero, + then all buffer space will be allocated on Linux NUMA node zero. + + By default, the specified buffer placement is treated as a hint; + if sufficient free memory is not available on the specified + controllers, the buffers will be allocated elsewhere. However, + if the ::NETIO_STRICT_HOMING flag is specified in @ref flags, then a + failure to allocate buffer space exactly as requested will cause the + registration operation to fail with an error of ::NETIO_CANNOT_HOME. + + Note that maximal network performance cannot be achieved with + only one memory controller. + */ + uint8_t buffer_node_weights[NETIO_NUM_NODE_WEIGHTS]; + + /** Fixed virtual address for packet buffers. Only valid when + ::NETIO_FIXED_BUFFER_VA is specified in @ref flags; see the + description of that flag for details. + */ + void* fixed_buffer_va; + + /** + Maximum number of outstanding send packet requests. This value is + only relevant when an EPP is in use; it determines the number of + slots in the EPP's outgoing packet queue which this tile is allowed + to consume, and thus the number of packets which may be sent before + the sending tile must wait for an acknowledgment from the EPP. + Modifying this value is generally only helpful when using @ref + netio_send_packet_vector(), where it can help improve performance by + allowing a single vector send operation to process more packets. + Typically it is not specified, and the default, which divides the + outgoing packet slots evenly between all tiles on the chip, is used. + + If a registration asks for more outgoing packet queue slots than are + available, ::NETIO_TOOMANY_XMIT will be returned. The total number + of packet queue slots which are available for all tiles for each EPP + is subject to change, but is currently ::NETIO_TOTAL_SENDS_OUTSTANDING. + + + This value is ignored if ::NETIO_XMIT is not specified in flags. + If you want to specify a large value here for a specific tile, you are + advised to specify NETIO_NO_XMIT on other, non-transmitting tiles so + that they do not consume a default number of packet slots. Any tile + transmitting is required to have at least ::NETIO_MIN_SENDS_OUTSTANDING + slots allocated to it; values less than that will be silently + increased by the NetIO library. + */ + int num_sends_outstanding; +} +netio_input_config_t; + + +/** Registration flags; used in the @ref netio_input_config_t structure. + * @addtogroup setup + */ +/** @{ */ + +/** Fail a registration request if we can't put packet buffers + on the specified memory controllers. */ +#define NETIO_STRICT_HOMING 0x00000002 + +/** This application expects no tags on its L2 headers. */ +#define NETIO_TAG_NONE 0x00000004 + +/** This application expects Marvell extended tags on its L2 headers. */ +#define NETIO_TAG_MRVL 0x00000008 + +/** This application expects Broadcom tags on its L2 headers. */ +#define NETIO_TAG_BRCM 0x00000010 + +/** This registration may call routines which receive packets. */ +#define NETIO_RECV 0x00000020 + +/** This registration may not call routines which receive packets. */ +#define NETIO_NO_RECV 0x00000040 + +/** This registration may call routines which transmit packets. */ +#define NETIO_XMIT 0x00000080 + +/** This registration may call routines which transmit packets with + checksum acceleration. */ +#define NETIO_XMIT_CSUM 0x00000100 + +/** This registration may not call routines which transmit packets. */ +#define NETIO_NO_XMIT 0x00000200 + +/** This registration wants NetIO buffers mapped at an application-specified + virtual address. + + NetIO buffers are by default created by the TMC common memory facility, + which must be configured by a common ancestor of all processes sharing + a network interface. When this flag is specified, NetIO buffers are + instead mapped at an address chosen by the application (and specified + in @ref netio_input_config_t::fixed_buffer_va). This allows multiple + unrelated but cooperating processes to share a NetIO interface. + All processes sharing the same interface must specify this flag, + and all must specify the same fixed virtual address. + + @ref netio_input_config_t::fixed_buffer_va must be a + multiple of 16 MB, and the packet buffers will occupy @ref + netio_input_config_t::total_buffer_size bytes of virtual address + space, beginning at that address. If any of those virtual addresses + are currently occupied by other memory objects, like application or + shared library code or data, @ref netio_input_register() will return + ::NETIO_FAULT. While it is impossible to provide a fixed_buffer_va + which will work for all applications, a good first guess might be to + use 0xb0000000 minus @ref netio_input_config_t::total_buffer_size. + If that fails, it might be helpful to consult the running application's + virtual address description file (/proc/<em>pid</em>/maps) to see + which regions of virtual address space are available. + */ +#define NETIO_FIXED_BUFFER_VA 0x00000400 + +/** This registration call will not complete unless the network link + is up. The process will wait several seconds for this to happen (the + precise interval is link-dependent), but if the link does not come up, + ::NETIO_LINK_DOWN will be returned. This flag is the default if + ::NETIO_NOREQUIRE_LINK_UP is not specified. Note that this flag by + itself does not request that the link be brought up; that can be done + with the ::NETIO_AUTO_LINK_UPDN or ::NETIO_AUTO_LINK_UP flags (the + latter is the default if no NETIO_AUTO_LINK_xxx flags are specified), + or by explicitly setting the link's desired state via netio_set(). + If the link is not brought up by one of those methods, and this flag + is specified, the registration operation will return ::NETIO_LINK_DOWN. + This flag is ignored if it is specified along with ::NETIO_NO_XMIT and + ::NETIO_NO_RECV. See @ref link for more information on link + management. + */ +#define NETIO_REQUIRE_LINK_UP 0x00000800 + +/** This registration call will complete even if the network link is not up. + Whenever the link is not up, packets will not be sent or received: + netio_get_packet() will return ::NETIO_NOPKT once all queued packets + have been drained, and netio_send_packet() and similar routines will + return NETIO_QUEUE_FULL once the outgoing packet queue in the EPP + or the I/O shim is full. See @ref link for more information on link + management. + */ +#define NETIO_NOREQUIRE_LINK_UP 0x00001000 + +#ifndef __DOXYGEN__ +/* + * These are part of the implementation of the NETIO_AUTO_LINK_xxx flags, + * but should not be used directly by applications, and are thus not + * documented. + */ +#define _NETIO_AUTO_UP 0x00002000 +#define _NETIO_AUTO_DN 0x00004000 +#define _NETIO_AUTO_PRESENT 0x00008000 +#endif + +/** Set the desired state of the link to up, allowing any speeds which are + supported by the link hardware, as part of this registration operation. + Do not take down the link automatically. This is the default if + no other NETIO_AUTO_LINK_xxx flags are specified. This flag is ignored + if it is specified along with ::NETIO_NO_XMIT and ::NETIO_NO_RECV. + See @ref link for more information on link management. + */ +#define NETIO_AUTO_LINK_UP (_NETIO_AUTO_PRESENT | _NETIO_AUTO_UP) + +/** Set the desired state of the link to up, allowing any speeds which are + supported by the link hardware, as part of this registration operation. + Set the desired state of the link to down the next time no tiles are + registered for packet reception or transmission. This flag is ignored + if it is specified along with ::NETIO_NO_XMIT and ::NETIO_NO_RECV. + See @ref link for more information on link management. + */ +#define NETIO_AUTO_LINK_UPDN (_NETIO_AUTO_PRESENT | _NETIO_AUTO_UP | \ + _NETIO_AUTO_DN) + +/** Set the desired state of the link to down the next time no tiles are + registered for packet reception or transmission. This flag is ignored + if it is specified along with ::NETIO_NO_XMIT and ::NETIO_NO_RECV. + See @ref link for more information on link management. + */ +#define NETIO_AUTO_LINK_DN (_NETIO_AUTO_PRESENT | _NETIO_AUTO_DN) + +/** Do not bring up the link automatically as part of this registration + operation. Do not take down the link automatically. This flag + is ignored if it is specified along with ::NETIO_NO_XMIT and + ::NETIO_NO_RECV. See @ref link for more information on link management. + */ +#define NETIO_AUTO_LINK_NONE _NETIO_AUTO_PRESENT + + +/** Minimum number of receive packets. */ +#define NETIO_MIN_RECEIVE_PKTS 16 + +/** Lower bound on the maximum number of receive packets; may be higher + than this on some interfaces. */ +#define NETIO_MAX_RECEIVE_PKTS 128 + +/** Maximum number of send buffers, per packet size. */ +#define NETIO_MAX_SEND_BUFFERS 16 + +/** Number of EPP queue slots, and thus outstanding sends, per EPP. */ +#define NETIO_TOTAL_SENDS_OUTSTANDING 2015 + +/** Minimum number of EPP queue slots, and thus outstanding sends, per + * transmitting tile. */ +#define NETIO_MIN_SENDS_OUTSTANDING 16 + + +/**@}*/ + +#ifndef __DOXYGEN__ + +/** + * An object for providing Ethernet packets to a process. + */ +struct __netio_queue_impl_t; + +/** + * An object for managing the user end of a NetIO queue. + */ +struct __netio_queue_user_impl_t; + +#endif /* !__DOXYGEN__ */ + + +/** A netio_queue_t describes a NetIO communications endpoint. + * @ingroup setup + */ +typedef struct +{ +#ifdef __DOXYGEN__ + uint8_t opaque[8]; /**< This is an opaque structure. */ +#else + struct __netio_queue_impl_t* __system_part; /**< The system part. */ + struct __netio_queue_user_impl_t* __user_part; /**< The user part. */ +#ifdef _NETIO_PTHREAD + _netio_percpu_mutex_t lock; /**< Queue lock. */ +#endif +#endif +} +netio_queue_t; + + +/** + * @brief Packet send context. + * + * @ingroup egress + * + * Packet send context for use with netio_send_packet_prepare and _commit. + */ +typedef struct +{ +#ifdef __DOXYGEN__ + uint8_t opaque[44]; /**< This is an opaque structure. */ +#else + uint8_t flags; /**< Defined below */ + uint8_t datalen; /**< Number of valid words pointed to by data. */ + uint32_t request[9]; /**< Request to be sent to the EPP or shim. Note + that this is smaller than the 11-word maximum + request size, since some constant values are + not saved in the context. */ + uint32_t *data; /**< Data to be sent to the EPP or shim via IDN. */ +#endif +} +netio_send_pkt_context_t; + + +#ifndef __DOXYGEN__ +#define SEND_PKT_CTX_USE_EPP 1 /**< We're sending to an EPP. */ +#define SEND_PKT_CTX_SEND_CSUM 2 /**< Request includes a checksum. */ +#endif + +/** + * @brief Packet vector entry. + * + * @ingroup egress + * + * This data structure is used with netio_send_packet_vector() to send multiple + * packets with one NetIO call. The structure should be initialized by + * calling netio_pkt_vector_set(), rather than by setting the fields + * directly. + * + * This structure is guaranteed to be a power of two in size, no + * bigger than one L2 cache line, and to be aligned modulo its size. + */ +typedef struct +#ifndef __DOXYGEN__ +__attribute__((aligned(8))) +#endif +{ + /** Reserved for use by the user application. When initialized with + * the netio_set_pkt_vector_entry() function, this field is guaranteed + * to be visible to readers only after all other fields are already + * visible. This way it can be used as a valid flag or generation + * counter. */ + uint8_t user_data; + + /* Structure members below this point should not be accessed directly by + * applications, as they may change in the future. */ + + /** Low 8 bits of the packet address to send. The high bits are + * acquired from the 'handle' field. */ + uint8_t buffer_address_low; + + /** Number of bytes to transmit. */ + uint16_t size; + + /** The raw handle from a netio_pkt_t. If this is NETIO_PKT_HANDLE_NONE, + * this vector entry will be skipped and no packet will be transmitted. */ + netio_pkt_handle_t handle; +} +netio_pkt_vector_entry_t; + + +/** + * @brief Initialize fields in a packet vector entry. + * + * @ingroup egress + * + * @param[out] v Pointer to the vector entry to be initialized. + * @param[in] pkt Packet to be transmitted when the vector entry is passed to + * netio_send_packet_vector(). Note that the packet's attributes + * (e.g., its L2 offset and length) are captured at the time this + * routine is called; subsequent changes in those attributes will not + * be reflected in the packet which is actually transmitted. + * Changes in the packet's contents, however, will be so reflected. + * If this is NULL, no packet will be transmitted. + * @param[in] user_data User data to be set in the vector entry. + * This function guarantees that the "user_data" field will become + * visible to a reader only after all other fields have become visible. + * This allows a structure in a ring buffer to be written and read + * by a polling reader without any locks or other synchronization. + */ +static __inline void +netio_pkt_vector_set(volatile netio_pkt_vector_entry_t* v, netio_pkt_t* pkt, + uint8_t user_data) +{ + if (pkt) + { + if (NETIO_PKT_IS_MINIMAL(pkt)) + { + netio_pkt_minimal_metadata_t* mmd = + (netio_pkt_minimal_metadata_t*) &pkt->__metadata; + v->buffer_address_low = (uintptr_t) NETIO_PKT_L2_DATA_MM(mmd, pkt) & 0xFF; + v->size = NETIO_PKT_L2_LENGTH_MM(mmd, pkt); + } + else + { + netio_pkt_metadata_t* mda = &pkt->__metadata; + v->buffer_address_low = (uintptr_t) NETIO_PKT_L2_DATA_M(mda, pkt) & 0xFF; + v->size = NETIO_PKT_L2_LENGTH_M(mda, pkt); + } + v->handle.word = pkt->__packet.word; + } + else + { + v->handle.word = 0; /* Set handle to NETIO_PKT_HANDLE_NONE. */ + } + + __asm__("" : : : "memory"); + + v->user_data = user_data; +} + + +/** + * Flags and structures for @ref netio_get() and @ref netio_set(). + * @ingroup config + */ + +/** @{ */ +/** Parameter class; addr is a NETIO_PARAM_xxx value. */ +#define NETIO_PARAM 0 +/** Interface MAC address. This address is only valid with @ref netio_get(). + * The value is a 6-byte MAC address. Depending upon the overall system + * design, a MAC address may or may not be available for each interface. */ +#define NETIO_PARAM_MAC 0 + +/** Determine whether to suspend output on the receipt of pause frames. + * If the value is nonzero, the I/O shim will suspend output when a pause + * frame is received. If the value is zero, pause frames will be ignored. */ +#define NETIO_PARAM_PAUSE_IN 1 + +/** Determine whether to send pause frames if the I/O shim packet FIFOs are + * nearly full. If the value is zero, pause frames are not sent. If + * the value is nonzero, it is the delay value which will be sent in any + * pause frames which are output, in units of 512 bit times. */ +#define NETIO_PARAM_PAUSE_OUT 2 + +/** Jumbo frame support. The value is a 4-byte integer. If the value is + * nonzero, the MAC will accept frames of up to 10240 bytes. If the value + * is zero, the MAC will only accept frames of up to 1544 bytes. */ +#define NETIO_PARAM_JUMBO 3 + +/** I/O shim's overflow statistics register. The value is two 16-bit integers. + * The first 16-bit value (or the low 16 bits, if the value is treated as a + * 32-bit number) is the count of packets which were completely dropped and + * not delivered by the shim. The second 16-bit value (or the high 16 bits, + * if the value is treated as a 32-bit number) is the count of packets + * which were truncated and thus only partially delivered by the shim. This + * register is automatically reset to zero after it has been read. + */ +#define NETIO_PARAM_OVERFLOW 4 + +/** IPP statistics. This address is only valid with @ref netio_get(). The + * value is a netio_stat_t structure. Unlike the I/O shim statistics, the + * IPP statistics are not all reset to zero on read; see the description + * of the netio_stat_t for details. */ +#define NETIO_PARAM_STAT 5 + +/** Possible link state. The value is a combination of "NETIO_LINK_xxx" + * flags. With @ref netio_get(), this will indicate which flags are + * actually supported by the hardware. + * + * For historical reasons, specifying this value to netio_set() will have + * the same behavior as using ::NETIO_PARAM_LINK_CONFIG, but this usage is + * discouraged. + */ +#define NETIO_PARAM_LINK_POSSIBLE_STATE 6 + +/** Link configuration. The value is a combination of "NETIO_LINK_xxx" flags. + * With @ref netio_set(), this will attempt to immediately bring up the + * link using whichever of the requested flags are supported by the + * hardware, or take down the link if the flags are zero; if this is + * not possible, an error will be returned. Many programs will want + * to use ::NETIO_PARAM_LINK_DESIRED_STATE instead. + * + * For historical reasons, specifying this value to netio_get() will + * have the same behavior as using ::NETIO_PARAM_LINK_POSSIBLE_STATE, + * but this usage is discouraged. + */ +#define NETIO_PARAM_LINK_CONFIG NETIO_PARAM_LINK_POSSIBLE_STATE + +/** Current link state. This address is only valid with @ref netio_get(). + * The value is zero or more of the "NETIO_LINK_xxx" flags, ORed together. + * If the link is down, the value ANDed with NETIO_LINK_SPEED will be + * zero; if the link is up, the value ANDed with NETIO_LINK_SPEED will + * result in exactly one of the NETIO_LINK_xxx values, indicating the + * current speed. */ +#define NETIO_PARAM_LINK_CURRENT_STATE 7 + +/** Variant symbol for current state, retained for compatibility with + * pre-MDE-2.1 programs. */ +#define NETIO_PARAM_LINK_STATUS NETIO_PARAM_LINK_CURRENT_STATE + +/** Packet Coherence protocol. This address is only valid with @ref netio_get(). + * The value is nonzero if the interface is configured for cache-coherent DMA. + */ +#define NETIO_PARAM_COHERENT 8 + +/** Desired link state. The value is a conbination of "NETIO_LINK_xxx" + * flags, which specify the desired state for the link. With @ref + * netio_set(), this will, in the background, attempt to bring up the link + * using whichever of the requested flags are reasonable, or take down the + * link if the flags are zero. The actual link up or down operation may + * happen after this call completes. If the link state changes in the + * future, the system will continue to try to get back to the desired link + * state; for instance, if the link is brought up successfully, and then + * the network cable is disconnected, the link will go down. However, the + * desired state of the link is still up, so if the cable is reconnected, + * the link will be brought up again. + * + * With @ref netio_get(), this will indicate the desired state for the + * link, as set with a previous netio_set() call, or implicitly by a + * netio_input_register() or netio_input_unregister() operation. This may + * not reflect the current state of the link; to get that, use + * ::NETIO_PARAM_LINK_CURRENT_STATE. */ +#define NETIO_PARAM_LINK_DESIRED_STATE 9 + +/** NetIO statistics structure. Retrieved using the ::NETIO_PARAM_STAT + * address passed to @ref netio_get(). */ +typedef struct +{ + /** Number of packets which have been received by the IPP and forwarded + * to a tile's receive queue for processing. This value wraps at its + * maximum, and is not cleared upon read. */ + uint32_t packets_received; + + /** Number of packets which have been dropped by the IPP, because they could + * not be received, or could not be forwarded to a tile. The former happens + * when the IPP does not have a free packet buffer of suitable size for an + * incoming frame. The latter happens when all potential destination tiles + * for a packet, as defined by the group, bucket, and queue configuration, + * have full receive queues. This value wraps at its maximum, and is not + * cleared upon read. */ + uint32_t packets_dropped; + + /* + * Note: the #defines after each of the following four one-byte values + * denote their location within the third word of the netio_stat_t. They + * are intended for use only by the IPP implementation and are thus omitted + * from the Doxygen output. + */ + + /** Number of packets dropped because no worker was able to accept a new + * packet. This value saturates at its maximum, and is cleared upon + * read. */ + uint8_t drops_no_worker; +#ifndef __DOXYGEN__ +#define NETIO_STAT_DROPS_NO_WORKER 0 +#endif + + /** Number of packets dropped because no small buffers were available. + * This value saturates at its maximum, and is cleared upon read. */ + uint8_t drops_no_smallbuf; +#ifndef __DOXYGEN__ +#define NETIO_STAT_DROPS_NO_SMALLBUF 1 +#endif + + /** Number of packets dropped because no large buffers were available. + * This value saturates at its maximum, and is cleared upon read. */ + uint8_t drops_no_largebuf; +#ifndef __DOXYGEN__ +#define NETIO_STAT_DROPS_NO_LARGEBUF 2 +#endif + + /** Number of packets dropped because no jumbo buffers were available. + * This value saturates at its maximum, and is cleared upon read. */ + uint8_t drops_no_jumbobuf; +#ifndef __DOXYGEN__ +#define NETIO_STAT_DROPS_NO_JUMBOBUF 3 +#endif +} +netio_stat_t; + + +/** Link can run, should run, or is running at 10 Mbps. */ +#define NETIO_LINK_10M 0x01 + +/** Link can run, should run, or is running at 100 Mbps. */ +#define NETIO_LINK_100M 0x02 + +/** Link can run, should run, or is running at 1 Gbps. */ +#define NETIO_LINK_1G 0x04 + +/** Link can run, should run, or is running at 10 Gbps. */ +#define NETIO_LINK_10G 0x08 + +/** Link should run at the highest speed supported by the link and by + * the device connected to the link. Only usable as a value for + * the link's desired state; never returned as a value for the current + * or possible states. */ +#define NETIO_LINK_ANYSPEED 0x10 + +/** All legal link speeds. */ +#define NETIO_LINK_SPEED (NETIO_LINK_10M | \ + NETIO_LINK_100M | \ + NETIO_LINK_1G | \ + NETIO_LINK_10G | \ + NETIO_LINK_ANYSPEED) + + +/** MAC register class. Addr is a register offset within the MAC. + * Registers within the XGbE and GbE MACs are documented in the Tile + * Processor I/O Device Guide (UG104). MAC registers start at address + * 0x4000, and do not include the MAC_INTERFACE registers. */ +#define NETIO_MAC 1 + +/** MDIO register class (IEEE 802.3 clause 22 format). Addr is the "addr" + * member of a netio_mdio_addr_t structure. */ +#define NETIO_MDIO 2 + +/** MDIO register class (IEEE 802.3 clause 45 format). Addr is the "addr" + * member of a netio_mdio_addr_t structure. */ +#define NETIO_MDIO_CLAUSE45 3 + +/** NetIO MDIO address type. Retrieved or provided using the ::NETIO_MDIO + * address passed to @ref netio_get() or @ref netio_set(). */ +typedef union +{ + struct + { + unsigned int reg:16; /**< MDIO register offset. For clause 22 access, + must be less than 32. */ + unsigned int phy:5; /**< Which MDIO PHY to access. */ + unsigned int dev:5; /**< Which MDIO device to access within that PHY. + Applicable for clause 45 access only; ignored + for clause 22 access. */ + } + bits; /**< Container for bitfields. */ + uint64_t addr; /**< Value to pass to @ref netio_get() or + * @ref netio_set(). */ +} +netio_mdio_addr_t; + +/** @} */ + +#endif /* __NETIO_INTF_H__ */ diff --git a/arch/tile/kernel/Makefile b/arch/tile/kernel/Makefile index 112b1e248f05..b4c8e8ec45dc 100644 --- a/arch/tile/kernel/Makefile +++ b/arch/tile/kernel/Makefile @@ -15,3 +15,4 @@ obj-$(CONFIG_SMP) += smpboot.o smp.o tlb.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o +obj-$(CONFIG_PCI) += pci.o diff --git a/arch/tile/kernel/backtrace.c b/arch/tile/kernel/backtrace.c index d3c41c1ff6bd..55a6a74974b4 100644 --- a/arch/tile/kernel/backtrace.c +++ b/arch/tile/kernel/backtrace.c @@ -369,6 +369,10 @@ static void find_caller_pc_and_caller_sp(CallerLocation *location, /* Weird; reserved value, ignore it. */ continue; } + if (info_operand & ENTRY_POINT_INFO_OP) { + /* This info op is ignored by the backtracer. */ + continue; + } /* Skip info ops which are not in the * "one_ago" mode we want right now. diff --git a/arch/tile/kernel/compat.c b/arch/tile/kernel/compat.c index b1e06d041555..dbc213adf5e1 100644 --- a/arch/tile/kernel/compat.c +++ b/arch/tile/kernel/compat.c @@ -21,7 +21,6 @@ #include <linux/kdev_t.h> #include <linux/fs.h> #include <linux/fcntl.h> -#include <linux/smp_lock.h> #include <linux/uaccess.h> #include <linux/signal.h> #include <asm/syscalls.h> @@ -148,14 +147,20 @@ long tile_compat_sys_msgrcv(int msqid, #define compat_sys_readahead sys32_readahead #define compat_sys_sync_file_range compat_sys_sync_file_range2 -/* The native 64-bit "struct stat" matches the 32-bit "struct stat64". */ -#define compat_sys_stat64 sys_newstat -#define compat_sys_lstat64 sys_newlstat -#define compat_sys_fstat64 sys_newfstat -#define compat_sys_fstatat64 sys_newfstatat +/* We leverage the "struct stat64" type for 32-bit time_t/nsec. */ +#define compat_sys_stat64 sys_stat64 +#define compat_sys_lstat64 sys_lstat64 +#define compat_sys_fstat64 sys_fstat64 +#define compat_sys_fstatat64 sys_fstatat64 -/* Pass full 64-bit values through ptrace. */ -#define compat_sys_ptrace tile_compat_sys_ptrace +/* The native sys_ptrace dynamically handles compat binaries. */ +#define compat_sys_ptrace sys_ptrace + +/* Call the trampolines to manage pt_regs where necessary. */ +#define compat_sys_execve _compat_sys_execve +#define compat_sys_sigaltstack _compat_sys_sigaltstack +#define compat_sys_rt_sigreturn _compat_sys_rt_sigreturn +#define sys_clone _sys_clone /* * Note that we can't include <linux/unistd.h> here since the header diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c index 9c710db43f13..dbb0dfc7bece 100644 --- a/arch/tile/kernel/compat_signal.c +++ b/arch/tile/kernel/compat_signal.c @@ -15,7 +15,6 @@ #include <linux/sched.h> #include <linux/mm.h> #include <linux/smp.h> -#include <linux/smp_lock.h> #include <linux/kernel.h> #include <linux/signal.h> #include <linux/errno.h> @@ -256,9 +255,9 @@ int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from) return err; } -long _compat_sys_sigaltstack(const struct compat_sigaltstack __user *uss_ptr, - struct compat_sigaltstack __user *uoss_ptr, - struct pt_regs *regs) +long compat_sys_sigaltstack(const struct compat_sigaltstack __user *uss_ptr, + struct compat_sigaltstack __user *uoss_ptr, + struct pt_regs *regs) { stack_t uss, uoss; int ret; @@ -291,12 +290,12 @@ long _compat_sys_sigaltstack(const struct compat_sigaltstack __user *uss_ptr, return ret; } -long _compat_sys_rt_sigreturn(struct pt_regs *regs) +/* The assembly shim for this function arranges to ignore the return value. */ +long compat_sys_rt_sigreturn(struct pt_regs *regs) { struct compat_rt_sigframe __user *frame = (struct compat_rt_sigframe __user *) compat_ptr(regs->sp); sigset_t set; - long r0; if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) goto badframe; @@ -309,13 +308,13 @@ long _compat_sys_rt_sigreturn(struct pt_regs *regs) recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); - if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &r0)) + if (restore_sigcontext(regs, &frame->uc.uc_mcontext)) goto badframe; - if (_compat_sys_sigaltstack(&frame->uc.uc_stack, NULL, regs) != 0) + if (compat_sys_sigaltstack(&frame->uc.uc_stack, NULL, regs) != 0) goto badframe; - return r0; + return 0; badframe: force_sig(SIGSEGV, current); diff --git a/arch/tile/kernel/early_printk.c b/arch/tile/kernel/early_printk.c index 2c54fd43a8a0..493a0e66d916 100644 --- a/arch/tile/kernel/early_printk.c +++ b/arch/tile/kernel/early_printk.c @@ -54,7 +54,7 @@ void early_printk(const char *fmt, ...) void early_panic(const char *fmt, ...) { va_list ap; - raw_local_irq_disable_all(); + arch_local_irq_disable_all(); va_start(ap, fmt); early_printk("Kernel panic - not syncing: "); early_vprintk(fmt, ap); diff --git a/arch/tile/kernel/entry.S b/arch/tile/kernel/entry.S index 3d01383b1b0e..fd8dc42abdcb 100644 --- a/arch/tile/kernel/entry.S +++ b/arch/tile/kernel/entry.S @@ -15,7 +15,9 @@ #include <linux/linkage.h> #include <linux/unistd.h> #include <asm/irqflags.h> +#include <asm/processor.h> #include <arch/abi.h> +#include <arch/spr_def.h> #ifdef __tilegx__ #define bnzt bnezt @@ -25,28 +27,6 @@ STD_ENTRY(current_text_addr) { move r0, lr; jrp lr } STD_ENDPROC(current_text_addr) -STD_ENTRY(_sim_syscall) - /* - * Wait for r0-r9 to be ready (and lr on the off chance we - * want the syscall to locate its caller), then make a magic - * simulator syscall. - * - * We carefully stall until the registers are readable in case they - * are the target of a slow load, etc. so that tile-sim will - * definitely be able to read all of them inside the magic syscall. - * - * Technically this is wrong for r3-r9 and lr, since an interrupt - * could come in and restore the registers with a slow load right - * before executing the mtspr. We may need to modify tile-sim to - * explicitly stall for this case, but we do not yet have - * a way to implement such a stall. - */ - { and zero, lr, r9 ; and zero, r8, r7 } - { and zero, r6, r5 ; and zero, r4, r3 } - { and zero, r2, r1 ; mtspr SIM_CONTROL, r0 } - { jrp lr } - STD_ENDPROC(_sim_syscall) - /* * Implement execve(). The i386 code has a note that forking from kernel * space results in no copy on write until the execve, so we should be @@ -102,7 +82,7 @@ STD_ENTRY(KBacktraceIterator_init_current) STD_ENTRY(cpu_idle_on_new_stack) { move sp, r1 - mtspr SYSTEM_SAVE_1_0, r2 + mtspr SPR_SYSTEM_SAVE_K_0, r2 } jal free_thread_info j cpu_idle @@ -124,15 +104,15 @@ STD_ENTRY(smp_nap) STD_ENTRY(_cpu_idle) { lnk r0 - movei r1, 1 + movei r1, KERNEL_PL } { addli r0, r0, _cpu_idle_nap - . mtspr INTERRUPT_CRITICAL_SECTION, r1 } - IRQ_ENABLE(r2, r3) /* unmask, but still with ICS set */ - mtspr EX_CONTEXT_1_1, r1 /* PL1, ICS clear */ - mtspr EX_CONTEXT_1_0, r0 + IRQ_ENABLE(r2, r3) /* unmask, but still with ICS set */ + mtspr SPR_EX_CONTEXT_K_1, r1 /* Kernel PL, ICS clear */ + mtspr SPR_EX_CONTEXT_K_0, r0 iret .global _cpu_idle_nap _cpu_idle_nap: diff --git a/arch/tile/kernel/hardwall.c b/arch/tile/kernel/hardwall.c index 584b965dc824..e910530436e6 100644 --- a/arch/tile/kernel/hardwall.c +++ b/arch/tile/kernel/hardwall.c @@ -151,12 +151,12 @@ enum direction_protect { static void enable_firewall_interrupts(void) { - raw_local_irq_unmask_now(INT_UDN_FIREWALL); + arch_local_irq_unmask_now(INT_UDN_FIREWALL); } static void disable_firewall_interrupts(void) { - raw_local_irq_mask_now(INT_UDN_FIREWALL); + arch_local_irq_mask_now(INT_UDN_FIREWALL); } /* Set up hardwall on this cpu based on the passed hardwall_info. */ @@ -768,6 +768,7 @@ static int hardwall_release(struct inode *inode, struct file *file) } static const struct file_operations dev_hardwall_fops = { + .open = nonseekable_open, .unlocked_ioctl = hardwall_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = hardwall_compat_ioctl, diff --git a/arch/tile/kernel/head_32.S b/arch/tile/kernel/head_32.S index 2b4f6c091701..90e7c4435693 100644 --- a/arch/tile/kernel/head_32.S +++ b/arch/tile/kernel/head_32.S @@ -23,6 +23,7 @@ #include <asm/asm-offsets.h> #include <hv/hypervisor.h> #include <arch/chip.h> +#include <arch/spr_def.h> /* * This module contains the entry code for kernel images. It performs the @@ -76,7 +77,7 @@ ENTRY(_start) } 1: - /* Get our processor number and save it away in SAVE_1_0. */ + /* Get our processor number and save it away in SAVE_K_0. */ jal hv_inquire_topology mulll_uu r4, r1, r2 /* r1 == y, r2 == width */ add r4, r4, r0 /* r0 == x, so r4 == cpu == y*width + x */ @@ -124,7 +125,7 @@ ENTRY(_start) lw r0, r0 lw sp, r1 or r4, sp, r4 - mtspr SYSTEM_SAVE_1_0, r4 /* save ksp0 + cpu */ + mtspr SPR_SYSTEM_SAVE_K_0, r4 /* save ksp0 + cpu */ addi sp, sp, -STACK_TOP_DELTA { move lr, zero /* stop backtraces in the called function */ diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index 8f58bdff20d7..5eed4a02bf62 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S @@ -32,8 +32,8 @@ # error "No support for kernel preemption currently" #endif -#if INT_INTCTRL_1 < 32 || INT_INTCTRL_1 >= 48 -# error INT_INTCTRL_1 coded to set high interrupt mask +#if INT_INTCTRL_K < 32 || INT_INTCTRL_K >= 48 +# error INT_INTCTRL_K coded to set high interrupt mask #endif #define PTREGS_PTR(reg, ptreg) addli reg, sp, C_ABI_SAVE_AREA_SIZE + (ptreg) @@ -132,8 +132,8 @@ intvec_\vecname: /* Temporarily save a register so we have somewhere to work. */ - mtspr SYSTEM_SAVE_1_1, r0 - mfspr r0, EX_CONTEXT_1_1 + mtspr SPR_SYSTEM_SAVE_K_1, r0 + mfspr r0, SPR_EX_CONTEXT_K_1 /* The cmpxchg code clears sp to force us to reset it here on fault. */ { @@ -167,18 +167,18 @@ intvec_\vecname: * The page_fault handler may be downcalled directly by the * hypervisor even when Linux is running and has ICS set. * - * In this case the contents of EX_CONTEXT_1_1 reflect the + * In this case the contents of EX_CONTEXT_K_1 reflect the * previous fault and can't be relied on to choose whether or * not to reinitialize the stack pointer. So we add a test - * to see whether SYSTEM_SAVE_1_2 has the high bit set, + * to see whether SYSTEM_SAVE_K_2 has the high bit set, * and if so we don't reinitialize sp, since we must be coming * from Linux. (In fact the precise case is !(val & ~1), * but any Linux PC has to have the high bit set.) * - * Note that the hypervisor *always* sets SYSTEM_SAVE_1_2 for + * Note that the hypervisor *always* sets SYSTEM_SAVE_K_2 for * any path that turns into a downcall to one of our TLB handlers. */ - mfspr r0, SYSTEM_SAVE_1_2 + mfspr r0, SPR_SYSTEM_SAVE_K_2 { blz r0, 0f /* high bit in S_S_1_2 is for a PC to use */ move r0, sp @@ -187,12 +187,12 @@ intvec_\vecname: 2: /* - * SYSTEM_SAVE_1_0 holds the cpu number in the low bits, and + * SYSTEM_SAVE_K_0 holds the cpu number in the low bits, and * the current stack top in the higher bits. So we recover * our stack top by just masking off the low bits, then * point sp at the top aligned address on the actual stack page. */ - mfspr r0, SYSTEM_SAVE_1_0 + mfspr r0, SPR_SYSTEM_SAVE_K_0 mm r0, r0, zero, LOG2_THREAD_SIZE, 31 0: @@ -254,7 +254,7 @@ intvec_\vecname: sw sp, r3 addli sp, sp, PTREGS_OFFSET_PC - PTREGS_OFFSET_REG(3) } - mfspr r0, EX_CONTEXT_1_0 + mfspr r0, SPR_EX_CONTEXT_K_0 .ifc \processing,handle_syscall /* * Bump the saved PC by one bundle so that when we return, we won't @@ -267,7 +267,7 @@ intvec_\vecname: sw sp, r0 addli sp, sp, PTREGS_OFFSET_EX1 - PTREGS_OFFSET_PC } - mfspr r0, EX_CONTEXT_1_1 + mfspr r0, SPR_EX_CONTEXT_K_1 { sw sp, r0 addi sp, sp, PTREGS_OFFSET_FAULTNUM - PTREGS_OFFSET_EX1 @@ -289,7 +289,7 @@ intvec_\vecname: .endif addli sp, sp, PTREGS_OFFSET_REG(0) - PTREGS_OFFSET_FAULTNUM } - mfspr r0, SYSTEM_SAVE_1_1 /* Original r0 */ + mfspr r0, SPR_SYSTEM_SAVE_K_1 /* Original r0 */ { sw sp, r0 addi sp, sp, -PTREGS_OFFSET_REG(0) - 4 @@ -309,12 +309,12 @@ intvec_\vecname: * See discussion below at "finish_interrupt_save". */ .ifc \c_routine, do_page_fault - mfspr r2, SYSTEM_SAVE_1_3 /* address of page fault */ - mfspr r3, SYSTEM_SAVE_1_2 /* info about page fault */ + mfspr r2, SPR_SYSTEM_SAVE_K_3 /* address of page fault */ + mfspr r3, SPR_SYSTEM_SAVE_K_2 /* info about page fault */ .else .ifc \vecnum, INT_DOUBLE_FAULT { - mfspr r2, SYSTEM_SAVE_1_2 /* double fault info from HV */ + mfspr r2, SPR_SYSTEM_SAVE_K_2 /* double fault info from HV */ movei r3, 0 } .else @@ -467,7 +467,7 @@ intvec_\vecname: /* Load tp with our per-cpu offset. */ #ifdef CONFIG_SMP { - mfspr r20, SYSTEM_SAVE_1_0 + mfspr r20, SPR_SYSTEM_SAVE_K_0 moveli r21, lo16(__per_cpu_offset) } { @@ -487,7 +487,7 @@ intvec_\vecname: * We load flags in r32 here so we can jump to .Lrestore_regs * directly after do_page_fault_ics() if necessary. */ - mfspr r32, EX_CONTEXT_1_1 + mfspr r32, SPR_EX_CONTEXT_K_1 { andi r32, r32, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */ PTREGS_PTR(r21, PTREGS_OFFSET_FLAGS) @@ -957,11 +957,11 @@ STD_ENTRY(interrupt_return) pop_reg_zero r21, r3, sp, PTREGS_OFFSET_EX1 - PTREGS_OFFSET_PC pop_reg_zero lr, r4, sp, PTREGS_OFFSET_REG(52) - PTREGS_OFFSET_EX1 { - mtspr EX_CONTEXT_1_0, r21 + mtspr SPR_EX_CONTEXT_K_0, r21 move r5, zero } { - mtspr EX_CONTEXT_1_1, lr + mtspr SPR_EX_CONTEXT_K_1, lr andi lr, lr, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */ } @@ -1020,7 +1020,7 @@ STD_ENTRY(interrupt_return) /* Set r1 to errno if we are returning an error, otherwise zero. */ { - moveli r29, 1024 + moveli r29, 4096 sub r1, zero, r0 } slt_u r29, r1, r29 @@ -1199,7 +1199,7 @@ STD_ENTRY(interrupt_return) STD_ENDPROC(interrupt_return) /* - * This interrupt variant clears the INT_INTCTRL_1 interrupt mask bit + * This interrupt variant clears the INT_INTCTRL_K interrupt mask bit * before returning, so we can properly get more downcalls. */ .pushsection .text.handle_interrupt_downcall,"ax" @@ -1208,11 +1208,11 @@ handle_interrupt_downcall: check_single_stepping normal, .Ldispatch_downcall .Ldispatch_downcall: - /* Clear INTCTRL_1 from the set of interrupts we ever enable. */ + /* Clear INTCTRL_K from the set of interrupts we ever enable. */ GET_INTERRUPTS_ENABLED_MASK_PTR(r30) { addi r30, r30, 4 - movei r31, INT_MASK(INT_INTCTRL_1) + movei r31, INT_MASK(INT_INTCTRL_K) } { lw r20, r30 @@ -1227,7 +1227,7 @@ handle_interrupt_downcall: } FEEDBACK_REENTER(handle_interrupt_downcall) - /* Allow INTCTRL_1 to be enabled next time we enable interrupts. */ + /* Allow INTCTRL_K to be enabled next time we enable interrupts. */ lw r20, r30 or r20, r20, r31 sw r30, r20 @@ -1342,8 +1342,8 @@ handle_syscall: lw r20, r20 /* Jump to syscall handler. */ - jalr r20; .Lhandle_syscall_link: - FEEDBACK_REENTER(handle_syscall) + jalr r20 +.Lhandle_syscall_link: /* value of "lr" after "jalr r20" above */ /* * Write our r0 onto the stack so it gets restored instead @@ -1352,6 +1352,9 @@ handle_syscall: PTREGS_PTR(r29, PTREGS_OFFSET_REG(0)) sw r29, r0 +.Lsyscall_sigreturn_skip: + FEEDBACK_REENTER(handle_syscall) + /* Do syscall trace again, if requested. */ lw r30, r31 andi r30, r30, _TIF_SYSCALL_TRACE @@ -1472,7 +1475,12 @@ handle_ill: lw r26, r24 sw r28, r26 - /* Clear TIF_SINGLESTEP */ + /* + * Clear TIF_SINGLESTEP to prevent recursion if we execute an ill. + * The normal non-arch flow redundantly clears TIF_SINGLESTEP, but we + * need to clear it here and can't really impose on all other arches. + * So what's another write between friends? + */ GET_THREAD_INFO(r0) addi r1, r0, THREAD_INFO_FLAGS_OFFSET @@ -1509,7 +1517,7 @@ handle_ill: /* Various stub interrupt handlers and syscall handlers */ STD_ENTRY_LOCAL(_kernel_double_fault) - mfspr r1, EX_CONTEXT_1_0 + mfspr r1, SPR_EX_CONTEXT_K_0 move r2, lr move r3, sp move r4, r52 @@ -1518,34 +1526,44 @@ STD_ENTRY_LOCAL(_kernel_double_fault) STD_ENDPROC(_kernel_double_fault) STD_ENTRY_LOCAL(bad_intr) - mfspr r2, EX_CONTEXT_1_0 + mfspr r2, SPR_EX_CONTEXT_K_0 panic "Unhandled interrupt %#x: PC %#lx" STD_ENDPROC(bad_intr) /* Put address of pt_regs in reg and jump. */ #define PTREGS_SYSCALL(x, reg) \ - STD_ENTRY(x); \ + STD_ENTRY(_##x); \ { \ PTREGS_PTR(reg, PTREGS_OFFSET_BASE); \ - j _##x \ + j x \ }; \ - STD_ENDPROC(x) + STD_ENDPROC(_##x) + +/* + * Special-case sigreturn to not write r0 to the stack on return. + * This is technically more efficient, but it also avoids difficulties + * in the 64-bit OS when handling 32-bit compat code, since we must not + * sign-extend r0 for the sigreturn return-value case. + */ +#define PTREGS_SYSCALL_SIGRETURN(x, reg) \ + STD_ENTRY(_##x); \ + addli lr, lr, .Lsyscall_sigreturn_skip - .Lhandle_syscall_link; \ + { \ + PTREGS_PTR(reg, PTREGS_OFFSET_BASE); \ + j x \ + }; \ + STD_ENDPROC(_##x) PTREGS_SYSCALL(sys_execve, r3) PTREGS_SYSCALL(sys_sigaltstack, r2) -PTREGS_SYSCALL(sys_rt_sigreturn, r0) +PTREGS_SYSCALL_SIGRETURN(sys_rt_sigreturn, r0) +PTREGS_SYSCALL(sys_cmpxchg_badaddr, r1) -/* Save additional callee-saves to pt_regs, put address in reg and jump. */ -#define PTREGS_SYSCALL_ALL_REGS(x, reg) \ - STD_ENTRY(x); \ - push_extra_callee_saves reg; \ - j _##x; \ - STD_ENDPROC(x) - -PTREGS_SYSCALL_ALL_REGS(sys_fork, r0) -PTREGS_SYSCALL_ALL_REGS(sys_vfork, r0) -PTREGS_SYSCALL_ALL_REGS(sys_clone, r4) -PTREGS_SYSCALL_ALL_REGS(sys_cmpxchg_badaddr, r1) +/* Save additional callee-saves to pt_regs, put address in r4 and jump. */ +STD_ENTRY(_sys_clone) + push_extra_callee_saves r4 + j sys_clone + STD_ENDPROC(_sys_clone) /* * This entrypoint is taken for the cmpxchg and atomic_update fast @@ -1558,12 +1576,14 @@ PTREGS_SYSCALL_ALL_REGS(sys_cmpxchg_badaddr, r1) * to be available to it on entry. It does not modify any callee-save * registers (including "lr"). It does not check what PL it is being * called at, so you'd better not call it other than at PL0. + * The <atomic.h> wrapper assumes it only clobbers r20-r29, so if + * it ever is necessary to use more registers, be aware. * * It does not use the stack, but since it might be re-interrupted by * a page fault which would assume the stack was valid, it does * save/restore the stack pointer and zero it out to make sure it gets reset. * Since we always keep interrupts disabled, the hypervisor won't - * clobber our EX_CONTEXT_1_x registers, so we don't save/restore them + * clobber our EX_CONTEXT_K_x registers, so we don't save/restore them * (other than to advance the PC on return). * * We have to manually validate the user vs kernel address range @@ -1769,7 +1789,7 @@ ENTRY(sys_cmpxchg) /* Do slow mtspr here so the following "mf" waits less. */ { move sp, r27 - mtspr EX_CONTEXT_1_0, r28 + mtspr SPR_EX_CONTEXT_K_0, r28 } mf @@ -1788,7 +1808,7 @@ ENTRY(sys_cmpxchg) } { move sp, r27 - mtspr EX_CONTEXT_1_0, r28 + mtspr SPR_EX_CONTEXT_K_0, r28 } iret @@ -1816,7 +1836,7 @@ ENTRY(sys_cmpxchg) #endif /* Issue the slow SPR here while the tns result is in flight. */ - mfspr r28, EX_CONTEXT_1_0 + mfspr r28, SPR_EX_CONTEXT_K_0 { addi r28, r28, 8 /* return to the instruction after the swint1 */ @@ -1904,7 +1924,7 @@ ENTRY(sys_cmpxchg) .Lcmpxchg64_mismatch: { move sp, r27 - mtspr EX_CONTEXT_1_0, r28 + mtspr SPR_EX_CONTEXT_K_0, r28 } mf { @@ -1985,8 +2005,13 @@ int_unalign: int_hand INT_PERF_COUNT, PERF_COUNT, \ op_handle_perf_interrupt, handle_nmi int_hand INT_INTCTRL_3, INTCTRL_3, bad_intr +#if CONFIG_KERNEL_PL == 2 + dc_dispatch INT_INTCTRL_2, INTCTRL_2 + int_hand INT_INTCTRL_1, INTCTRL_1, bad_intr +#else int_hand INT_INTCTRL_2, INTCTRL_2, bad_intr dc_dispatch INT_INTCTRL_1, INTCTRL_1 +#endif int_hand INT_INTCTRL_0, INTCTRL_0, bad_intr int_hand INT_MESSAGE_RCV_DWNCL, MESSAGE_RCV_DWNCL, \ hv_message_intr, handle_interrupt_downcall diff --git a/arch/tile/kernel/irq.c b/arch/tile/kernel/irq.c index 9a27d563fc30..128805ef8f2c 100644 --- a/arch/tile/kernel/irq.c +++ b/arch/tile/kernel/irq.c @@ -26,7 +26,7 @@ #define IS_HW_CLEARED 1 /* - * The set of interrupts we enable for raw_local_irq_enable(). + * The set of interrupts we enable for arch_local_irq_enable(). * This is initialized to have just a single interrupt that the kernel * doesn't actually use as a sentinel. During kernel init, * interrupts are added as the kernel gets prepared to support them. @@ -61,9 +61,9 @@ static DEFINE_SPINLOCK(available_irqs_lock); #if CHIP_HAS_IPI() /* Use SPRs to manipulate device interrupts. */ -#define mask_irqs(irq_mask) __insn_mtspr(SPR_IPI_MASK_SET_1, irq_mask) -#define unmask_irqs(irq_mask) __insn_mtspr(SPR_IPI_MASK_RESET_1, irq_mask) -#define clear_irqs(irq_mask) __insn_mtspr(SPR_IPI_EVENT_RESET_1, irq_mask) +#define mask_irqs(irq_mask) __insn_mtspr(SPR_IPI_MASK_SET_K, irq_mask) +#define unmask_irqs(irq_mask) __insn_mtspr(SPR_IPI_MASK_RESET_K, irq_mask) +#define clear_irqs(irq_mask) __insn_mtspr(SPR_IPI_EVENT_RESET_K, irq_mask) #else /* Use HV to manipulate device interrupts. */ #define mask_irqs(irq_mask) hv_disable_intr(irq_mask) @@ -89,16 +89,16 @@ void tile_dev_intr(struct pt_regs *regs, int intnum) * masked by a previous interrupt. Then, mask out the ones * we're going to handle. */ - unsigned long masked = __insn_mfspr(SPR_IPI_MASK_1); - original_irqs = __insn_mfspr(SPR_IPI_EVENT_1) & ~masked; - __insn_mtspr(SPR_IPI_MASK_SET_1, original_irqs); + unsigned long masked = __insn_mfspr(SPR_IPI_MASK_K); + original_irqs = __insn_mfspr(SPR_IPI_EVENT_K) & ~masked; + __insn_mtspr(SPR_IPI_MASK_SET_K, original_irqs); #else /* * Hypervisor performs the equivalent of the Gx code above and * then puts the pending interrupt mask into a system save reg * for us to find. */ - original_irqs = __insn_mfspr(SPR_SYSTEM_SAVE_1_3); + original_irqs = __insn_mfspr(SPR_SYSTEM_SAVE_K_3); #endif remaining_irqs = original_irqs; @@ -225,7 +225,7 @@ void __cpuinit setup_irq_regs(void) /* Enable interrupt delivery. */ unmask_irqs(~0UL); #if CHIP_HAS_IPI() - raw_local_irq_unmask(INT_IPI_1); + arch_local_irq_unmask(INT_IPI_K); #endif } diff --git a/arch/tile/kernel/machine_kexec.c b/arch/tile/kernel/machine_kexec.c index ba7a265d6179..0d8b9e933487 100644 --- a/arch/tile/kernel/machine_kexec.c +++ b/arch/tile/kernel/machine_kexec.c @@ -182,13 +182,13 @@ static void kexec_find_and_set_command_line(struct kimage *image) if ((entry & IND_SOURCE)) { void *va = - kmap_atomic_pfn(entry >> PAGE_SHIFT, KM_USER0); + kmap_atomic_pfn(entry >> PAGE_SHIFT); r = kexec_bn2cl(va); if (r) { command_line = r; break; } - kunmap_atomic(va, KM_USER0); + kunmap_atomic(va); } } @@ -198,7 +198,7 @@ static void kexec_find_and_set_command_line(struct kimage *image) hverr = hv_set_command_line( (HV_VirtAddr) command_line, strlen(command_line)); - kunmap_atomic(command_line, KM_USER0); + kunmap_atomic(command_line); } else { pr_info("%s: no command line found; making empty\n", __func__); diff --git a/arch/tile/kernel/messaging.c b/arch/tile/kernel/messaging.c index 6d23ed271d10..0858ee6b520f 100644 --- a/arch/tile/kernel/messaging.c +++ b/arch/tile/kernel/messaging.c @@ -34,7 +34,7 @@ void __cpuinit init_messaging(void) panic("hv_register_message_state: error %d", rc); /* Make sure downcall interrupts will be enabled. */ - raw_local_irq_unmask(INT_INTCTRL_1); + arch_local_irq_unmask(INT_INTCTRL_K); } void hv_message_intr(struct pt_regs *regs, int intnum) diff --git a/arch/tile/kernel/pci.c b/arch/tile/kernel/pci.c new file mode 100644 index 000000000000..a1ee25be9ad9 --- /dev/null +++ b/arch/tile/kernel/pci.c @@ -0,0 +1,621 @@ +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + */ + +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/delay.h> +#include <linux/string.h> +#include <linux/init.h> +#include <linux/capability.h> +#include <linux/sched.h> +#include <linux/errno.h> +#include <linux/bootmem.h> +#include <linux/irq.h> +#include <linux/io.h> +#include <linux/uaccess.h> + +#include <asm/processor.h> +#include <asm/sections.h> +#include <asm/byteorder.h> +#include <asm/hv_driver.h> +#include <hv/drv_pcie_rc_intf.h> + + +/* + * Initialization flow and process + * ------------------------------- + * + * This files containes the routines to search for PCI buses, + * enumerate the buses, and configure any attached devices. + * + * There are two entry points here: + * 1) tile_pci_init + * This sets up the pci_controller structs, and opens the + * FDs to the hypervisor. This is called from setup_arch() early + * in the boot process. + * 2) pcibios_init + * This probes the PCI bus(es) for any attached hardware. It's + * called by subsys_initcall. All of the real work is done by the + * generic Linux PCI layer. + * + */ + +/* + * This flag tells if the platform is TILEmpower that needs + * special configuration for the PLX switch chip. + */ +int __write_once tile_plx_gen1; + +static struct pci_controller controllers[TILE_NUM_PCIE]; +static int num_controllers; + +static struct pci_ops tile_cfg_ops; + + +/* + * We don't need to worry about the alignment of resources. + */ +resource_size_t pcibios_align_resource(void *data, const struct resource *res, + resource_size_t size, resource_size_t align) +{ + return res->start; +} +EXPORT_SYMBOL(pcibios_align_resource); + +/* + * Open a FD to the hypervisor PCI device. + * + * controller_id is the controller number, config type is 0 or 1 for + * config0 or config1 operations. + */ +static int __init tile_pcie_open(int controller_id, int config_type) +{ + char filename[32]; + int fd; + + sprintf(filename, "pcie/%d/config%d", controller_id, config_type); + + fd = hv_dev_open((HV_VirtAddr)filename, 0); + + return fd; +} + + +/* + * Get the IRQ numbers from the HV and set up the handlers for them. + */ +static int __init tile_init_irqs(int controller_id, + struct pci_controller *controller) +{ + char filename[32]; + int fd; + int ret; + int x; + struct pcie_rc_config rc_config; + + sprintf(filename, "pcie/%d/ctl", controller_id); + fd = hv_dev_open((HV_VirtAddr)filename, 0); + if (fd < 0) { + pr_err("PCI: hv_dev_open(%s) failed\n", filename); + return -1; + } + ret = hv_dev_pread(fd, 0, (HV_VirtAddr)(&rc_config), + sizeof(rc_config), PCIE_RC_CONFIG_MASK_OFF); + hv_dev_close(fd); + if (ret != sizeof(rc_config)) { + pr_err("PCI: wanted %zd bytes, got %d\n", + sizeof(rc_config), ret); + return -1; + } + /* Record irq_base so that we can map INTx to IRQ # later. */ + controller->irq_base = rc_config.intr; + + for (x = 0; x < 4; x++) + tile_irq_activate(rc_config.intr + x, + TILE_IRQ_HW_CLEAR); + + if (rc_config.plx_gen1) + controller->plx_gen1 = 1; + + return 0; +} + +/* + * First initialization entry point, called from setup_arch(). + * + * Find valid controllers and fill in pci_controller structs for each + * of them. + * + * Returns the number of controllers discovered. + */ +int __init tile_pci_init(void) +{ + int i; + + pr_info("PCI: Searching for controllers...\n"); + + /* Do any configuration we need before using the PCIe */ + + for (i = 0; i < TILE_NUM_PCIE; i++) { + int hv_cfg_fd0 = -1; + int hv_cfg_fd1 = -1; + int hv_mem_fd = -1; + char name[32]; + struct pci_controller *controller; + + /* + * Open the fd to the HV. If it fails then this + * device doesn't exist. + */ + hv_cfg_fd0 = tile_pcie_open(i, 0); + if (hv_cfg_fd0 < 0) + continue; + hv_cfg_fd1 = tile_pcie_open(i, 1); + if (hv_cfg_fd1 < 0) { + pr_err("PCI: Couldn't open config fd to HV " + "for controller %d\n", i); + goto err_cont; + } + + sprintf(name, "pcie/%d/mem", i); + hv_mem_fd = hv_dev_open((HV_VirtAddr)name, 0); + if (hv_mem_fd < 0) { + pr_err("PCI: Could not open mem fd to HV!\n"); + goto err_cont; + } + + pr_info("PCI: Found PCI controller #%d\n", i); + + controller = &controllers[num_controllers]; + + if (tile_init_irqs(i, controller)) { + pr_err("PCI: Could not initialize " + "IRQs, aborting.\n"); + goto err_cont; + } + + controller->index = num_controllers; + controller->hv_cfg_fd[0] = hv_cfg_fd0; + controller->hv_cfg_fd[1] = hv_cfg_fd1; + controller->hv_mem_fd = hv_mem_fd; + controller->first_busno = 0; + controller->last_busno = 0xff; + controller->ops = &tile_cfg_ops; + + num_controllers++; + continue; + +err_cont: + if (hv_cfg_fd0 >= 0) + hv_dev_close(hv_cfg_fd0); + if (hv_cfg_fd1 >= 0) + hv_dev_close(hv_cfg_fd1); + if (hv_mem_fd >= 0) + hv_dev_close(hv_mem_fd); + continue; + } + + /* + * Before using the PCIe, see if we need to do any platform-specific + * configuration, such as the PLX switch Gen 1 issue on TILEmpower. + */ + for (i = 0; i < num_controllers; i++) { + struct pci_controller *controller = &controllers[i]; + + if (controller->plx_gen1) + tile_plx_gen1 = 1; + } + + return num_controllers; +} + +/* + * (pin - 1) converts from the PCI standard's [1:4] convention to + * a normal [0:3] range. + */ +static int tile_map_irq(struct pci_dev *dev, u8 slot, u8 pin) +{ + struct pci_controller *controller = + (struct pci_controller *)dev->sysdata; + return (pin - 1) + controller->irq_base; +} + + +static void __init fixup_read_and_payload_sizes(void) +{ + struct pci_dev *dev = NULL; + int smallest_max_payload = 0x1; /* Tile maxes out at 256 bytes. */ + int max_read_size = 0x2; /* Limit to 512 byte reads. */ + u16 new_values; + + /* Scan for the smallest maximum payload size. */ + while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { + int pcie_caps_offset; + u32 devcap; + int max_payload; + + pcie_caps_offset = pci_find_capability(dev, PCI_CAP_ID_EXP); + if (pcie_caps_offset == 0) + continue; + + pci_read_config_dword(dev, pcie_caps_offset + PCI_EXP_DEVCAP, + &devcap); + max_payload = devcap & PCI_EXP_DEVCAP_PAYLOAD; + if (max_payload < smallest_max_payload) + smallest_max_payload = max_payload; + } + + /* Now, set the max_payload_size for all devices to that value. */ + new_values = (max_read_size << 12) | (smallest_max_payload << 5); + while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { + int pcie_caps_offset; + u16 devctl; + + pcie_caps_offset = pci_find_capability(dev, PCI_CAP_ID_EXP); + if (pcie_caps_offset == 0) + continue; + + pci_read_config_word(dev, pcie_caps_offset + PCI_EXP_DEVCTL, + &devctl); + devctl &= ~(PCI_EXP_DEVCTL_PAYLOAD | PCI_EXP_DEVCTL_READRQ); + devctl |= new_values; + pci_write_config_word(dev, pcie_caps_offset + PCI_EXP_DEVCTL, + devctl); + } +} + + +/* + * Second PCI initialization entry point, called by subsys_initcall. + * + * The controllers have been set up by the time we get here, by a call to + * tile_pci_init. + */ +static int __init pcibios_init(void) +{ + int i; + + pr_info("PCI: Probing PCI hardware\n"); + + /* + * Delay a bit in case devices aren't ready. Some devices are + * known to require at least 20ms here, but we use a more + * conservative value. + */ + mdelay(250); + + /* Scan all of the recorded PCI controllers. */ + for (i = 0; i < num_controllers; i++) { + struct pci_controller *controller = &controllers[i]; + struct pci_bus *bus; + + pr_info("PCI: initializing controller #%d\n", i); + + /* + * This comes from the generic Linux PCI driver. + * + * It reads the PCI tree for this bus into the Linux + * data structures. + * + * This is inlined in linux/pci.h and calls into + * pci_scan_bus_parented() in probe.c. + */ + bus = pci_scan_bus(0, controller->ops, controller); + controller->root_bus = bus; + controller->last_busno = bus->subordinate; + + } + + /* Do machine dependent PCI interrupt routing */ + pci_fixup_irqs(pci_common_swizzle, tile_map_irq); + + /* + * This comes from the generic Linux PCI driver. + * + * It allocates all of the resources (I/O memory, etc) + * associated with the devices read in above. + */ + + pci_assign_unassigned_resources(); + + /* Configure the max_read_size and max_payload_size values. */ + fixup_read_and_payload_sizes(); + + /* Record the I/O resources in the PCI controller structure. */ + for (i = 0; i < num_controllers; i++) { + struct pci_bus *root_bus = controllers[i].root_bus; + struct pci_bus *next_bus; + struct pci_dev *dev; + + list_for_each_entry(dev, &root_bus->devices, bus_list) { + /* Find the PCI host controller, ie. the 1st bridge. */ + if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && + (PCI_SLOT(dev->devfn) == 0)) { + next_bus = dev->subordinate; + controllers[i].mem_resources[0] = + *next_bus->resource[0]; + controllers[i].mem_resources[1] = + *next_bus->resource[1]; + controllers[i].mem_resources[2] = + *next_bus->resource[2]; + + break; + } + } + + } + + return 0; +} +subsys_initcall(pcibios_init); + +/* + * No bus fixups needed. + */ +void __devinit pcibios_fixup_bus(struct pci_bus *bus) +{ + /* Nothing needs to be done. */ +} + +/* + * This can be called from the generic PCI layer, but doesn't need to + * do anything. + */ +char __devinit *pcibios_setup(char *str) +{ + /* Nothing needs to be done. */ + return str; +} + +/* + * This is called from the generic Linux layer. + */ +void __init pcibios_update_irq(struct pci_dev *dev, int irq) +{ + pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); +} + +/* + * Enable memory and/or address decoding, as appropriate, for the + * device described by the 'dev' struct. + * + * This is called from the generic PCI layer, and can be called + * for bridges or endpoints. + */ +int pcibios_enable_device(struct pci_dev *dev, int mask) +{ + u16 cmd, old_cmd; + u8 header_type; + int i; + struct resource *r; + + pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type); + + pci_read_config_word(dev, PCI_COMMAND, &cmd); + old_cmd = cmd; + if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) { + /* + * For bridges, we enable both memory and I/O decoding + * in call cases. + */ + cmd |= PCI_COMMAND_IO; + cmd |= PCI_COMMAND_MEMORY; + } else { + /* + * For endpoints, we enable memory and/or I/O decoding + * only if they have a memory resource of that type. + */ + for (i = 0; i < 6; i++) { + r = &dev->resource[i]; + if (r->flags & IORESOURCE_UNSET) { + pr_err("PCI: Device %s not available " + "because of resource collisions\n", + pci_name(dev)); + return -EINVAL; + } + if (r->flags & IORESOURCE_IO) + cmd |= PCI_COMMAND_IO; + if (r->flags & IORESOURCE_MEM) + cmd |= PCI_COMMAND_MEMORY; + } + } + + /* + * We only write the command if it changed. + */ + if (cmd != old_cmd) + pci_write_config_word(dev, PCI_COMMAND, cmd); + return 0; +} + +void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max) +{ + unsigned long start = pci_resource_start(dev, bar); + unsigned long len = pci_resource_len(dev, bar); + unsigned long flags = pci_resource_flags(dev, bar); + + if (!len) + return NULL; + if (max && len > max) + len = max; + + if (!(flags & IORESOURCE_MEM)) { + pr_info("PCI: Trying to map invalid resource %#lx\n", flags); + start = 0; + } + + return (void __iomem *)start; +} +EXPORT_SYMBOL(pci_iomap); + + +/**************************************************************** + * + * Tile PCI config space read/write routines + * + ****************************************************************/ + +/* + * These are the normal read and write ops + * These are expanded with macros from pci_bus_read_config_byte() etc. + * + * devfn is the combined PCI slot & function. + * + * offset is in bytes, from the start of config space for the + * specified bus & slot. + */ + +static int __devinit tile_cfg_read(struct pci_bus *bus, + unsigned int devfn, + int offset, + int size, + u32 *val) +{ + struct pci_controller *controller = bus->sysdata; + int busnum = bus->number & 0xff; + int slot = (devfn >> 3) & 0x1f; + int function = devfn & 0x7; + u32 addr; + int config_mode = 1; + + /* + * There is no bridge between the Tile and bus 0, so we + * use config0 to talk to bus 0. + * + * If we're talking to a bus other than zero then we + * must have found a bridge. + */ + if (busnum == 0) { + /* + * We fake an empty slot for (busnum == 0) && (slot > 0), + * since there is only one slot on bus 0. + */ + if (slot) { + *val = 0xFFFFFFFF; + return 0; + } + config_mode = 0; + } + + addr = busnum << 20; /* Bus in 27:20 */ + addr |= slot << 15; /* Slot (device) in 19:15 */ + addr |= function << 12; /* Function is in 14:12 */ + addr |= (offset & 0xFFF); /* byte address in 0:11 */ + + return hv_dev_pread(controller->hv_cfg_fd[config_mode], 0, + (HV_VirtAddr)(val), size, addr); +} + + +/* + * See tile_cfg_read() for relevent comments. + * Note that "val" is the value to write, not a pointer to that value. + */ +static int __devinit tile_cfg_write(struct pci_bus *bus, + unsigned int devfn, + int offset, + int size, + u32 val) +{ + struct pci_controller *controller = bus->sysdata; + int busnum = bus->number & 0xff; + int slot = (devfn >> 3) & 0x1f; + int function = devfn & 0x7; + u32 addr; + int config_mode = 1; + HV_VirtAddr valp = (HV_VirtAddr)&val; + + /* + * For bus 0 slot 0 we use config 0 accesses. + */ + if (busnum == 0) { + /* + * We fake an empty slot for (busnum == 0) && (slot > 0), + * since there is only one slot on bus 0. + */ + if (slot) + return 0; + config_mode = 0; + } + + addr = busnum << 20; /* Bus in 27:20 */ + addr |= slot << 15; /* Slot (device) in 19:15 */ + addr |= function << 12; /* Function is in 14:12 */ + addr |= (offset & 0xFFF); /* byte address in 0:11 */ + +#ifdef __BIG_ENDIAN + /* Point to the correct part of the 32-bit "val". */ + valp += 4 - size; +#endif + + return hv_dev_pwrite(controller->hv_cfg_fd[config_mode], 0, + valp, size, addr); +} + + +static struct pci_ops tile_cfg_ops = { + .read = tile_cfg_read, + .write = tile_cfg_write, +}; + + +/* + * In the following, each PCI controller's mem_resources[1] + * represents its (non-prefetchable) PCI memory resource. + * mem_resources[0] and mem_resources[2] refer to its PCI I/O and + * prefetchable PCI memory resources, respectively. + * For more details, see pci_setup_bridge() in setup-bus.c. + * By comparing the target PCI memory address against the + * end address of controller 0, we can determine the controller + * that should accept the PCI memory access. + */ +#define TILE_READ(size, type) \ +type _tile_read##size(unsigned long addr) \ +{ \ + type val; \ + int idx = 0; \ + if (addr > controllers[0].mem_resources[1].end && \ + addr > controllers[0].mem_resources[2].end) \ + idx = 1; \ + if (hv_dev_pread(controllers[idx].hv_mem_fd, 0, \ + (HV_VirtAddr)(&val), sizeof(type), addr)) \ + pr_err("PCI: read %zd bytes at 0x%lX failed\n", \ + sizeof(type), addr); \ + return val; \ +} \ +EXPORT_SYMBOL(_tile_read##size) + +TILE_READ(b, u8); +TILE_READ(w, u16); +TILE_READ(l, u32); +TILE_READ(q, u64); + +#define TILE_WRITE(size, type) \ +void _tile_write##size(type val, unsigned long addr) \ +{ \ + int idx = 0; \ + if (addr > controllers[0].mem_resources[1].end && \ + addr > controllers[0].mem_resources[2].end) \ + idx = 1; \ + if (hv_dev_pwrite(controllers[idx].hv_mem_fd, 0, \ + (HV_VirtAddr)(&val), sizeof(type), addr)) \ + pr_err("PCI: write %zd bytes at 0x%lX failed\n", \ + sizeof(type), addr); \ +} \ +EXPORT_SYMBOL(_tile_write##size) + +TILE_WRITE(b, u8); +TILE_WRITE(w, u16); +TILE_WRITE(l, u32); +TILE_WRITE(q, u64); diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index 84c29111756c..e90eb53173b0 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -212,11 +212,19 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, childregs->sp = sp; /* override with new user stack pointer */ /* + * If CLONE_SETTLS is set, set "tp" in the new task to "r4", + * which is passed in as arg #5 to sys_clone(). + */ + if (clone_flags & CLONE_SETTLS) + childregs->tp = regs->regs[4]; + + /* * Copy the callee-saved registers from the passed pt_regs struct * into the context-switch callee-saved registers area. - * We have to restore the callee-saved registers since we may - * be cloning a userspace task with userspace register state, - * and we won't be unwinding the same kernel frames to restore them. + * This way when we start the interrupt-return sequence, the + * callee-save registers will be correctly in registers, which + * is how we assume the compiler leaves them as we start doing + * the normal return-from-interrupt path after calling C code. * Zero out the C ABI save area to mark the top of the stack. */ ksp = (unsigned long) childregs; @@ -304,15 +312,25 @@ int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs) /* Allow user processes to access the DMA SPRs */ void grant_dma_mpls(void) { +#if CONFIG_KERNEL_PL == 2 + __insn_mtspr(SPR_MPL_DMA_CPL_SET_1, 1); + __insn_mtspr(SPR_MPL_DMA_NOTIFY_SET_1, 1); +#else __insn_mtspr(SPR_MPL_DMA_CPL_SET_0, 1); __insn_mtspr(SPR_MPL_DMA_NOTIFY_SET_0, 1); +#endif } /* Forbid user processes from accessing the DMA SPRs */ void restrict_dma_mpls(void) { +#if CONFIG_KERNEL_PL == 2 + __insn_mtspr(SPR_MPL_DMA_CPL_SET_2, 1); + __insn_mtspr(SPR_MPL_DMA_NOTIFY_SET_2, 1); +#else __insn_mtspr(SPR_MPL_DMA_CPL_SET_1, 1); __insn_mtspr(SPR_MPL_DMA_NOTIFY_SET_1, 1); +#endif } /* Pause the DMA engine, then save off its state registers. */ @@ -523,19 +541,15 @@ struct task_struct *__sched _switch_to(struct task_struct *prev, * Switch kernel SP, PC, and callee-saved registers. * In the context of the new task, return the old task pointer * (i.e. the task that actually called __switch_to). - * Pass the value to use for SYSTEM_SAVE_1_0 when we reset our sp. + * Pass the value to use for SYSTEM_SAVE_K_0 when we reset our sp. */ return __switch_to(prev, next, next_current_ksp0(next)); } -long _sys_fork(struct pt_regs *regs) -{ - return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL); -} - -long _sys_clone(unsigned long clone_flags, unsigned long newsp, - void __user *parent_tidptr, void __user *child_tidptr, - struct pt_regs *regs) +/* Note there is an implicit fifth argument if (clone_flags & CLONE_SETTLS). */ +SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, + void __user *, parent_tidptr, void __user *, child_tidptr, + struct pt_regs *, regs) { if (!newsp) newsp = regs->sp; @@ -543,18 +557,13 @@ long _sys_clone(unsigned long clone_flags, unsigned long newsp, parent_tidptr, child_tidptr); } -long _sys_vfork(struct pt_regs *regs) -{ - return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, - regs, 0, NULL, NULL); -} - /* * sys_execve() executes a new program. */ -long _sys_execve(const char __user *path, - const char __user *const __user *argv, - const char __user *const __user *envp, struct pt_regs *regs) +SYSCALL_DEFINE4(execve, const char __user *, path, + const char __user *const __user *, argv, + const char __user *const __user *, envp, + struct pt_regs *, regs) { long error; char *filename; @@ -570,9 +579,10 @@ out: } #ifdef CONFIG_COMPAT -long _compat_sys_execve(const char __user *path, - const compat_uptr_t __user *argv, - const compat_uptr_t __user *envp, struct pt_regs *regs) +long compat_sys_execve(const char __user *path, + const compat_uptr_t __user *argv, + const compat_uptr_t __user *envp, + struct pt_regs *regs) { long error; char *filename; diff --git a/arch/tile/kernel/ptrace.c b/arch/tile/kernel/ptrace.c index 7161bd03d2fd..e92e40527d6d 100644 --- a/arch/tile/kernel/ptrace.c +++ b/arch/tile/kernel/ptrace.c @@ -32,25 +32,6 @@ void user_disable_single_step(struct task_struct *child) } /* - * This routine will put a word on the process's privileged stack. - */ -static void putreg(struct task_struct *task, - unsigned long addr, unsigned long value) -{ - unsigned int regno = addr / sizeof(unsigned long); - struct pt_regs *childregs = task_pt_regs(task); - childregs->regs[regno] = value; - childregs->flags |= PT_FLAGS_RESTORE_REGS; -} - -static unsigned long getreg(struct task_struct *task, unsigned long addr) -{ - unsigned int regno = addr / sizeof(unsigned long); - struct pt_regs *childregs = task_pt_regs(task); - return childregs->regs[regno]; -} - -/* * Called by kernel/ptrace.c when detaching.. */ void ptrace_disable(struct task_struct *child) @@ -64,61 +45,80 @@ void ptrace_disable(struct task_struct *child) clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); } -long arch_ptrace(struct task_struct *child, long request, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) { - unsigned long __user *datap; + unsigned long __user *datap = (long __user __force *)data; unsigned long tmp; - int i; long ret = -EIO; - -#ifdef CONFIG_COMPAT - if (task_thread_info(current)->status & TS_COMPAT) - data = (u32)data; - if (task_thread_info(child)->status & TS_COMPAT) - addr = (u32)addr; -#endif - datap = (unsigned long __user __force *)data; + char *childreg; + struct pt_regs copyregs; + int ex1_offset; switch (request) { case PTRACE_PEEKUSR: /* Read register from pt_regs. */ - if (addr & (sizeof(data)-1)) - break; - if (addr < 0 || addr >= PTREGS_SIZE) + if (addr >= PTREGS_SIZE) break; - tmp = getreg(child, addr); /* Read register */ - ret = put_user(tmp, datap); + childreg = (char *)task_pt_regs(child) + addr; +#ifdef CONFIG_COMPAT + if (is_compat_task()) { + if (addr & (sizeof(compat_long_t)-1)) + break; + ret = put_user(*(compat_long_t *)childreg, + (compat_long_t __user *)datap); + } else +#endif + { + if (addr & (sizeof(long)-1)) + break; + ret = put_user(*(long *)childreg, datap); + } break; case PTRACE_POKEUSR: /* Write register in pt_regs. */ - if (addr & (sizeof(data)-1)) + if (addr >= PTREGS_SIZE) break; - if (addr < 0 || addr >= PTREGS_SIZE) - break; - putreg(child, addr, data); /* Write register */ + childreg = (char *)task_pt_regs(child) + addr; + + /* Guard against overwrites of the privilege level. */ + ex1_offset = PTREGS_OFFSET_EX1; +#if defined(CONFIG_COMPAT) && defined(__BIG_ENDIAN) + if (is_compat_task()) /* point at low word */ + ex1_offset += sizeof(compat_long_t); +#endif + if (addr == ex1_offset) + data = PL_ICS_EX1(USER_PL, EX1_ICS(data)); + +#ifdef CONFIG_COMPAT + if (is_compat_task()) { + if (addr & (sizeof(compat_long_t)-1)) + break; + *(compat_long_t *)childreg = data; + } else +#endif + { + if (addr & (sizeof(long)-1)) + break; + *(long *)childreg = data; + } ret = 0; break; case PTRACE_GETREGS: /* Get all registers from the child. */ - if (!access_ok(VERIFY_WRITE, datap, PTREGS_SIZE)) - break; - for (i = 0; i < PTREGS_SIZE; i += sizeof(long)) { - ret = __put_user(getreg(child, i), datap); - if (ret != 0) - break; - datap++; + if (copy_to_user(datap, task_pt_regs(child), + sizeof(struct pt_regs)) == 0) { + ret = 0; } break; case PTRACE_SETREGS: /* Set all registers in the child. */ - if (!access_ok(VERIFY_READ, datap, PTREGS_SIZE)) - break; - for (i = 0; i < PTREGS_SIZE; i += sizeof(long)) { - ret = __get_user(tmp, datap); - if (ret != 0) - break; - putreg(child, i, tmp); - datap++; + if (copy_from_user(©regs, datap, + sizeof(struct pt_regs)) == 0) { + copyregs.ex1 = + PL_ICS_EX1(USER_PL, EX1_ICS(copyregs.ex1)); + *task_pt_regs(child) = copyregs; + ret = 0; } break; diff --git a/arch/tile/kernel/reboot.c b/arch/tile/kernel/reboot.c index acd86d20beba..baa3d905fee2 100644 --- a/arch/tile/kernel/reboot.c +++ b/arch/tile/kernel/reboot.c @@ -27,7 +27,7 @@ void machine_halt(void) { warn_early_printk(); - raw_local_irq_disable_all(); + arch_local_irq_disable_all(); smp_send_stop(); hv_halt(); } @@ -35,14 +35,14 @@ void machine_halt(void) void machine_power_off(void) { warn_early_printk(); - raw_local_irq_disable_all(); + arch_local_irq_disable_all(); smp_send_stop(); hv_power_off(); } void machine_restart(char *cmd) { - raw_local_irq_disable_all(); + arch_local_irq_disable_all(); smp_send_stop(); hv_restart((HV_VirtAddr) "vmlinux", (HV_VirtAddr) cmd); } diff --git a/arch/tile/kernel/regs_32.S b/arch/tile/kernel/regs_32.S index e88d6e122783..caa13101c264 100644 --- a/arch/tile/kernel/regs_32.S +++ b/arch/tile/kernel/regs_32.S @@ -85,7 +85,7 @@ STD_ENTRY_SECTION(__switch_to, .sched.text) { /* Update sp and ksp0 simultaneously to avoid backtracer warnings. */ move sp, r13 - mtspr SYSTEM_SAVE_1_0, r2 + mtspr SPR_SYSTEM_SAVE_K_0, r2 } FOR_EACH_CALLEE_SAVED_REG(LOAD_REG) .L__switch_to_pc: diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index e7d54c73d5c1..f18573643ed1 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -30,8 +30,6 @@ #include <linux/timex.h> #include <asm/setup.h> #include <asm/sections.h> -#include <asm/sections.h> -#include <asm/cacheflush.h> #include <asm/cacheflush.h> #include <asm/pgalloc.h> #include <asm/mmu_context.h> @@ -187,11 +185,11 @@ early_param("vmalloc", parse_vmalloc); #ifdef CONFIG_HIGHMEM /* - * Determine for each controller where its lowmem is mapped and how - * much of it is mapped there. On controller zero, the first few - * megabytes are mapped at 0xfd000000 as code, so in principle we - * could start our data mappings higher up, but for now we don't - * bother, to avoid additional confusion. + * Determine for each controller where its lowmem is mapped and how much of + * it is mapped there. On controller zero, the first few megabytes are + * already mapped in as code at MEM_SV_INTRPT, so in principle we could + * start our data mappings higher up, but for now we don't bother, to avoid + * additional confusion. * * One question is whether, on systems with more than 768 Mb and * controllers of different sizes, to map in a proportionate amount of @@ -311,7 +309,7 @@ static void __init setup_memory(void) #endif /* We are using a char to hold the cpu_2_node[] mapping */ - BUG_ON(MAX_NUMNODES > 127); + BUILD_BUG_ON(MAX_NUMNODES > 127); /* Discover the ranges of memory available to us */ for (i = 0; ; ++i) { @@ -842,7 +840,7 @@ static int __init topology_init(void) for_each_online_node(i) register_one_node(i); - for_each_present_cpu(i) + for (i = 0; i < smp_height * smp_width; ++i) register_cpu(&cpu_devices[i], i); return 0; @@ -870,11 +868,14 @@ void __cpuinit setup_cpu(int boot) /* Allow asynchronous TLB interrupts. */ #if CHIP_HAS_TILE_DMA() - raw_local_irq_unmask(INT_DMATLB_MISS); - raw_local_irq_unmask(INT_DMATLB_ACCESS); + arch_local_irq_unmask(INT_DMATLB_MISS); + arch_local_irq_unmask(INT_DMATLB_ACCESS); #endif #if CHIP_HAS_SN_PROC() - raw_local_irq_unmask(INT_SNITLB_MISS); + arch_local_irq_unmask(INT_SNITLB_MISS); +#endif +#ifdef __tilegx__ + arch_local_irq_unmask(INT_SINGLE_STEP_K); #endif /* @@ -893,11 +894,12 @@ void __cpuinit setup_cpu(int boot) #endif /* - * Set the MPL for interrupt control 0 to user level. - * This includes access to the SYSTEM_SAVE and EX_CONTEXT SPRs, - * as well as the PL 0 interrupt mask. + * Set the MPL for interrupt control 0 & 1 to the corresponding + * values. This includes access to the SYSTEM_SAVE and EX_CONTEXT + * SPRs, as well as the interrupt mask. */ __insn_mtspr(SPR_MPL_INTCTRL_0_SET_0, 1); + __insn_mtspr(SPR_MPL_INTCTRL_1_SET_1, 1); /* Initialize IRQ support for this cpu. */ setup_irq_regs(); @@ -1033,7 +1035,7 @@ static void __init validate_va(void) * In addition, make sure we CAN'T use the end of memory, since * we use the last chunk of each pgd for the pgd_list. */ - int i, fc_fd_ok = 0; + int i, user_kernel_ok = 0; unsigned long max_va = 0; unsigned long list_va = ((PGD_LIST_OFFSET / sizeof(pgd_t)) << PGDIR_SHIFT); @@ -1044,13 +1046,13 @@ static void __init validate_va(void) break; if (range.start <= MEM_USER_INTRPT && range.start + range.size >= MEM_HV_INTRPT) - fc_fd_ok = 1; + user_kernel_ok = 1; if (range.start == 0) max_va = range.size; BUG_ON(range.start + range.size > list_va); } - if (!fc_fd_ok) - early_panic("Hypervisor not configured for VAs 0xfc/0xfd\n"); + if (!user_kernel_ok) + early_panic("Hypervisor not configured for user/kernel VAs\n"); if (max_va == 0) early_panic("Hypervisor not configured for low VAs\n"); if (max_va < KERNEL_HIGH_VADDR) @@ -1334,6 +1336,10 @@ static void __init pcpu_fc_populate_pte(unsigned long addr) pte_t *pte; BUG_ON(pgd_addr_invalid(addr)); + if (addr < VMALLOC_START || addr >= VMALLOC_END) + panic("PCPU addr %#lx outside vmalloc range %#lx..%#lx;" + " try increasing CONFIG_VMALLOC_RESERVE\n", + addr, VMALLOC_START, VMALLOC_END); pgd = swapper_pg_dir + pgd_index(addr); pud = pud_offset(pgd, addr); diff --git a/arch/tile/kernel/signal.c b/arch/tile/kernel/signal.c index ce183aa1492c..1260321155f1 100644 --- a/arch/tile/kernel/signal.c +++ b/arch/tile/kernel/signal.c @@ -16,7 +16,6 @@ #include <linux/sched.h> #include <linux/mm.h> #include <linux/smp.h> -#include <linux/smp_lock.h> #include <linux/kernel.h> #include <linux/signal.h> #include <linux/errno.h> @@ -41,8 +40,8 @@ #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) -long _sys_sigaltstack(const stack_t __user *uss, - stack_t __user *uoss, struct pt_regs *regs) +SYSCALL_DEFINE3(sigaltstack, const stack_t __user *, uss, + stack_t __user *, uoss, struct pt_regs *, regs) { return do_sigaltstack(uss, uoss, regs->sp); } @@ -53,7 +52,7 @@ long _sys_sigaltstack(const stack_t __user *uss, */ int restore_sigcontext(struct pt_regs *regs, - struct sigcontext __user *sc, long *pr0) + struct sigcontext __user *sc) { int err = 0; int i; @@ -71,19 +70,20 @@ int restore_sigcontext(struct pt_regs *regs, for (i = 0; i < sizeof(struct pt_regs)/sizeof(long); ++i) err |= __get_user(regs->regs[i], &sc->gregs[i]); + /* Ensure that the PL is always set to USER_PL. */ + regs->ex1 = PL_ICS_EX1(USER_PL, EX1_ICS(regs->ex1)); + regs->faultnum = INT_SWINT_1_SIGRETURN; - err |= __get_user(*pr0, &sc->gregs[0]); return err; } -/* sigreturn() returns long since it restores r0 in the interrupted code. */ -long _sys_rt_sigreturn(struct pt_regs *regs) +/* The assembly shim for this function arranges to ignore the return value. */ +SYSCALL_DEFINE1(rt_sigreturn, struct pt_regs *, regs) { struct rt_sigframe __user *frame = (struct rt_sigframe __user *)(regs->sp); sigset_t set; - long r0; if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) goto badframe; @@ -96,13 +96,13 @@ long _sys_rt_sigreturn(struct pt_regs *regs) recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); - if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &r0)) + if (restore_sigcontext(regs, &frame->uc.uc_mcontext)) goto badframe; if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT) goto badframe; - return r0; + return 0; badframe: force_sig(SIGSEGV, current); @@ -330,7 +330,7 @@ void do_signal(struct pt_regs *regs) current_thread_info()->status &= ~TS_RESTORE_SIGMASK; } - return; + goto done; } /* Did we come from a system call? */ @@ -358,4 +358,8 @@ void do_signal(struct pt_regs *regs) current_thread_info()->status &= ~TS_RESTORE_SIGMASK; sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); } + +done: + /* Avoid double syscall restart if there are nested signals. */ + regs->faultnum = INT_SWINT_1_SIGRETURN; } diff --git a/arch/tile/kernel/single_step.c b/arch/tile/kernel/single_step.c index 5ec4b9c651f2..1eb3b39e36c7 100644 --- a/arch/tile/kernel/single_step.c +++ b/arch/tile/kernel/single_step.c @@ -15,7 +15,7 @@ * Derived from iLib's single-stepping code. */ -#ifndef __tilegx__ /* No support for single-step yet. */ +#ifndef __tilegx__ /* Hardware support for single step unavailable. */ /* These functions are only used on the TILE platform */ #include <linux/slab.h> @@ -660,4 +660,75 @@ void single_step_once(struct pt_regs *regs) regs->pc += 8; } +#else +#include <linux/smp.h> +#include <linux/ptrace.h> +#include <arch/spr_def.h> + +static DEFINE_PER_CPU(unsigned long, ss_saved_pc); + + +/* + * Called directly on the occasion of an interrupt. + * + * If the process doesn't have single step set, then we use this as an + * opportunity to turn single step off. + * + * It has been mentioned that we could conditionally turn off single stepping + * on each entry into the kernel and rely on single_step_once to turn it + * on for the processes that matter (as we already do), but this + * implementation is somewhat more efficient in that we muck with registers + * once on a bum interrupt rather than on every entry into the kernel. + * + * If SINGLE_STEP_CONTROL_K has CANCELED set, then an interrupt occurred, + * so we have to run through this process again before we can say that an + * instruction has executed. + * + * swint will set CANCELED, but it's a legitimate instruction. Fortunately + * it changes the PC. If it hasn't changed, then we know that the interrupt + * wasn't generated by swint and we'll need to run this process again before + * we can say an instruction has executed. + * + * If either CANCELED == 0 or the PC's changed, we send out SIGTRAPs and get + * on with our lives. + */ + +void gx_singlestep_handle(struct pt_regs *regs, int fault_num) +{ + unsigned long *ss_pc = &__get_cpu_var(ss_saved_pc); + struct thread_info *info = (void *)current_thread_info(); + int is_single_step = test_ti_thread_flag(info, TIF_SINGLESTEP); + unsigned long control = __insn_mfspr(SPR_SINGLE_STEP_CONTROL_K); + + if (is_single_step == 0) { + __insn_mtspr(SPR_SINGLE_STEP_EN_K_K, 0); + + } else if ((*ss_pc != regs->pc) || + (!(control & SPR_SINGLE_STEP_CONTROL_1__CANCELED_MASK))) { + + ptrace_notify(SIGTRAP); + control |= SPR_SINGLE_STEP_CONTROL_1__CANCELED_MASK; + control |= SPR_SINGLE_STEP_CONTROL_1__INHIBIT_MASK; + __insn_mtspr(SPR_SINGLE_STEP_CONTROL_K, control); + } +} + + +/* + * Called from need_singlestep. Set up the control registers and the enable + * register, then return back. + */ + +void single_step_once(struct pt_regs *regs) +{ + unsigned long *ss_pc = &__get_cpu_var(ss_saved_pc); + unsigned long control = __insn_mfspr(SPR_SINGLE_STEP_CONTROL_K); + + *ss_pc = regs->pc; + control |= SPR_SINGLE_STEP_CONTROL_1__CANCELED_MASK; + control |= SPR_SINGLE_STEP_CONTROL_1__INHIBIT_MASK; + __insn_mtspr(SPR_SINGLE_STEP_CONTROL_K, control); + __insn_mtspr(SPR_SINGLE_STEP_EN_K_K, 1 << USER_PL); +} + #endif /* !__tilegx__ */ diff --git a/arch/tile/kernel/smp.c b/arch/tile/kernel/smp.c index 1cb5ec79de04..9575b37a8b75 100644 --- a/arch/tile/kernel/smp.c +++ b/arch/tile/kernel/smp.c @@ -115,7 +115,7 @@ static void smp_start_cpu_interrupt(void) static void smp_stop_cpu_interrupt(void) { set_cpu_online(smp_processor_id(), 0); - raw_local_irq_disable_all(); + arch_local_irq_disable_all(); for (;;) asm("nap"); } @@ -212,7 +212,7 @@ void __init ipi_init(void) tile.x = cpu_x(cpu); tile.y = cpu_y(cpu); - if (hv_get_ipi_pte(tile, 1, &pte) != 0) + if (hv_get_ipi_pte(tile, KERNEL_PL, &pte) != 0) panic("Failed to initialize IPI for cpu %d\n", cpu); offset = hv_pte_get_pfn(pte) << PAGE_SHIFT; diff --git a/arch/tile/kernel/smpboot.c b/arch/tile/kernel/smpboot.c index 74d62d098edf..b949edcec200 100644 --- a/arch/tile/kernel/smpboot.c +++ b/arch/tile/kernel/smpboot.c @@ -18,7 +18,6 @@ #include <linux/mm.h> #include <linux/sched.h> #include <linux/kernel_stat.h> -#include <linux/smp_lock.h> #include <linux/bootmem.h> #include <linux/notifier.h> #include <linux/cpu.h> diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c index ea2e0ce28380..0d54106be3d6 100644 --- a/arch/tile/kernel/stack.c +++ b/arch/tile/kernel/stack.c @@ -30,6 +30,10 @@ #include <arch/abi.h> #include <arch/interrupts.h> +#define KBT_ONGOING 0 /* Backtrace still ongoing */ +#define KBT_DONE 1 /* Backtrace cleanly completed */ +#define KBT_RUNNING 2 /* Can't run backtrace on a running task */ +#define KBT_LOOP 3 /* Backtrace entered a loop */ /* Is address on the specified kernel stack? */ static int in_kernel_stack(struct KBacktraceIterator *kbt, VirtualAddress sp) @@ -207,11 +211,11 @@ static int KBacktraceIterator_next_item_inclusive( for (;;) { do { if (!KBacktraceIterator_is_sigreturn(kbt)) - return 1; + return KBT_ONGOING; } while (backtrace_next(&kbt->it)); if (!KBacktraceIterator_restart(kbt)) - return 0; + return KBT_DONE; } } @@ -264,7 +268,7 @@ void KBacktraceIterator_init(struct KBacktraceIterator *kbt, kbt->pgtable = NULL; kbt->verbose = 0; /* override in caller if desired */ kbt->profile = 0; /* override in caller if desired */ - kbt->end = 0; + kbt->end = KBT_ONGOING; kbt->new_context = 0; if (is_current) { HV_PhysAddr pgdir_pa = hv_inquire_context().page_table; @@ -290,7 +294,7 @@ void KBacktraceIterator_init(struct KBacktraceIterator *kbt, if (regs == NULL) { if (is_current || t->state == TASK_RUNNING) { /* Can't do this; we need registers */ - kbt->end = 1; + kbt->end = KBT_RUNNING; return; } pc = get_switch_to_pc(); @@ -305,26 +309,29 @@ void KBacktraceIterator_init(struct KBacktraceIterator *kbt, } backtrace_init(&kbt->it, read_memory_func, kbt, pc, lr, sp, r52); - kbt->end = !KBacktraceIterator_next_item_inclusive(kbt); + kbt->end = KBacktraceIterator_next_item_inclusive(kbt); } EXPORT_SYMBOL(KBacktraceIterator_init); int KBacktraceIterator_end(struct KBacktraceIterator *kbt) { - return kbt->end; + return kbt->end != KBT_ONGOING; } EXPORT_SYMBOL(KBacktraceIterator_end); void KBacktraceIterator_next(struct KBacktraceIterator *kbt) { + VirtualAddress old_pc = kbt->it.pc, old_sp = kbt->it.sp; kbt->new_context = 0; - if (!backtrace_next(&kbt->it) && - !KBacktraceIterator_restart(kbt)) { - kbt->end = 1; - return; - } - - kbt->end = !KBacktraceIterator_next_item_inclusive(kbt); + if (!backtrace_next(&kbt->it) && !KBacktraceIterator_restart(kbt)) { + kbt->end = KBT_DONE; + return; + } + kbt->end = KBacktraceIterator_next_item_inclusive(kbt); + if (old_pc == kbt->it.pc && old_sp == kbt->it.sp) { + /* Trapped in a loop; give up. */ + kbt->end = KBT_LOOP; + } } EXPORT_SYMBOL(KBacktraceIterator_next); @@ -387,6 +394,8 @@ void tile_show_stack(struct KBacktraceIterator *kbt, int headers) break; } } + if (kbt->end == KBT_LOOP) + pr_err("Stack dump stopped; next frame identical to this one\n"); if (headers) pr_err("Stack dump complete\n"); } diff --git a/arch/tile/kernel/sys.c b/arch/tile/kernel/sys.c index f0f87eab8c39..e2187d24a9b4 100644 --- a/arch/tile/kernel/sys.c +++ b/arch/tile/kernel/sys.c @@ -20,7 +20,6 @@ #include <linux/sched.h> #include <linux/mm.h> #include <linux/smp.h> -#include <linux/smp_lock.h> #include <linux/syscalls.h> #include <linux/mman.h> #include <linux/file.h> @@ -110,6 +109,15 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, #define sys_sync_file_range sys_sync_file_range2 #endif +/* Call the trampolines to manage pt_regs where necessary. */ +#define sys_execve _sys_execve +#define sys_sigaltstack _sys_sigaltstack +#define sys_rt_sigreturn _sys_rt_sigreturn +#define sys_clone _sys_clone +#ifndef __tilegx__ +#define sys_cmpxchg_badaddr _sys_cmpxchg_badaddr +#endif + /* * Note that we can't include <linux/unistd.h> here since the header * guard will defeat us; <asm/unistd.h> checks for __SYSCALL as well. diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c index 6bed820e1421..f2e156e44692 100644 --- a/arch/tile/kernel/time.c +++ b/arch/tile/kernel/time.c @@ -132,7 +132,7 @@ static int tile_timer_set_next_event(unsigned long ticks, { BUG_ON(ticks > MAX_TICK); __insn_mtspr(SPR_TILE_TIMER_CONTROL, ticks); - raw_local_irq_unmask_now(INT_TILE_TIMER); + arch_local_irq_unmask_now(INT_TILE_TIMER); return 0; } @@ -143,7 +143,7 @@ static int tile_timer_set_next_event(unsigned long ticks, static void tile_timer_set_mode(enum clock_event_mode mode, struct clock_event_device *evt) { - raw_local_irq_mask_now(INT_TILE_TIMER); + arch_local_irq_mask_now(INT_TILE_TIMER); } /* @@ -172,7 +172,7 @@ void __cpuinit setup_tile_timer(void) evt->cpumask = cpumask_of(smp_processor_id()); /* Start out with timer not firing. */ - raw_local_irq_mask_now(INT_TILE_TIMER); + arch_local_irq_mask_now(INT_TILE_TIMER); /* Register tile timer. */ clockevents_register_device(evt); @@ -188,7 +188,7 @@ void do_timer_interrupt(struct pt_regs *regs, int fault_num) * Mask the timer interrupt here, since we are a oneshot timer * and there are now by definition no events pending. */ - raw_local_irq_mask(INT_TILE_TIMER); + arch_local_irq_mask(INT_TILE_TIMER); /* Track time spent here in an interrupt context */ irq_enter(); diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c index 0f362dc2c57f..5474fc2e77e8 100644 --- a/arch/tile/kernel/traps.c +++ b/arch/tile/kernel/traps.c @@ -260,7 +260,7 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num, address = regs->pc; break; case INT_UNALIGN_DATA: -#ifndef __tilegx__ /* FIXME: GX: no single-step yet */ +#ifndef __tilegx__ /* Emulated support for single step debugging */ if (unaligned_fixup >= 0) { struct single_step_state *state = current_thread_info()->step_state; @@ -278,7 +278,7 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num, case INT_DOUBLE_FAULT: /* * For double fault, "reason" is actually passed as - * SYSTEM_SAVE_1_2, the hypervisor's double-fault info, so + * SYSTEM_SAVE_K_2, the hypervisor's double-fault info, so * we can provide the original fault number rather than * the uninteresting "INT_DOUBLE_FAULT" so the user can * learn what actually struck while PL0 ICS was set. diff --git a/arch/tile/kvm/Kconfig b/arch/tile/kvm/Kconfig new file mode 100644 index 000000000000..b88f9c047781 --- /dev/null +++ b/arch/tile/kvm/Kconfig @@ -0,0 +1,38 @@ +# +# KVM configuration +# + +source "virt/kvm/Kconfig" + +menuconfig VIRTUALIZATION + bool "Virtualization" + ---help--- + Say Y here to get to see options for using your Linux host to run + other operating systems inside virtual machines (guests). + This option alone does not add any kernel code. + + If you say N, all options in this submenu will be skipped and + disabled. + +if VIRTUALIZATION + +config KVM + tristate "Kernel-based Virtual Machine (KVM) support" + depends on HAVE_KVM && MODULES && EXPERIMENTAL + select PREEMPT_NOTIFIERS + select ANON_INODES + ---help--- + Support hosting paravirtualized guest machines. + + This module provides access to the hardware capabilities through + a character device node named /dev/kvm. + + To compile this as a module, choose M here: the module + will be called kvm. + + If unsure, say N. + +source drivers/vhost/Kconfig +source drivers/virtio/Kconfig + +endif # VIRTUALIZATION diff --git a/arch/tile/lib/Makefile b/arch/tile/lib/Makefile index 746dc81ed3c4..93122d5b1558 100644 --- a/arch/tile/lib/Makefile +++ b/arch/tile/lib/Makefile @@ -3,8 +3,8 @@ # lib-y = cacheflush.o checksum.o cpumask.o delay.o \ - mb_incoherent.o uaccess.o \ - memcpy_$(BITS).o memchr_$(BITS).o memmove_$(BITS).o memset_$(BITS).o \ + mb_incoherent.o uaccess.o memmove.o \ + memcpy_$(BITS).o memchr_$(BITS).o memset_$(BITS).o \ strchr_$(BITS).o strlen_$(BITS).o ifeq ($(CONFIG_TILEGX),y) diff --git a/arch/tile/lib/atomic_32.c b/arch/tile/lib/atomic_32.c index 8040b42a8eea..7a5cc706ab62 100644 --- a/arch/tile/lib/atomic_32.c +++ b/arch/tile/lib/atomic_32.c @@ -300,7 +300,7 @@ void __init __init_atomic_per_cpu(void) #else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ /* Validate power-of-two and "bigger than cpus" assumption */ - BUG_ON(ATOMIC_HASH_SIZE & (ATOMIC_HASH_SIZE-1)); + BUILD_BUG_ON(ATOMIC_HASH_SIZE & (ATOMIC_HASH_SIZE-1)); BUG_ON(ATOMIC_HASH_SIZE < nr_cpu_ids); /* @@ -314,17 +314,17 @@ void __init __init_atomic_per_cpu(void) BUG_ON((unsigned long)atomic_locks % PAGE_SIZE != 0); /* The locks must all fit on one page. */ - BUG_ON(ATOMIC_HASH_SIZE * sizeof(int) > PAGE_SIZE); + BUILD_BUG_ON(ATOMIC_HASH_SIZE * sizeof(int) > PAGE_SIZE); /* * We use the page offset of the atomic value's address as * an index into atomic_locks, excluding the low 3 bits. * That should not produce more indices than ATOMIC_HASH_SIZE. */ - BUG_ON((PAGE_SIZE >> 3) > ATOMIC_HASH_SIZE); + BUILD_BUG_ON((PAGE_SIZE >> 3) > ATOMIC_HASH_SIZE); #endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ /* The futex code makes this assumption, so we validate it here. */ - BUG_ON(sizeof(atomic_t) != sizeof(int)); + BUILD_BUG_ON(sizeof(atomic_t) != sizeof(int)); } diff --git a/arch/tile/lib/exports.c b/arch/tile/lib/exports.c index ce5dbf56578f..1509c5597653 100644 --- a/arch/tile/lib/exports.c +++ b/arch/tile/lib/exports.c @@ -45,6 +45,9 @@ EXPORT_SYMBOL(__copy_from_user_zeroing); EXPORT_SYMBOL(__copy_in_user_inatomic); #endif +/* arch/tile/lib/mb_incoherent.S */ +EXPORT_SYMBOL(__mb_incoherent); + /* hypervisor glue */ #include <hv/hypervisor.h> EXPORT_SYMBOL(hv_dev_open); diff --git a/arch/tile/lib/memchr_32.c b/arch/tile/lib/memchr_32.c index 6235283b4859..cc3d9badf030 100644 --- a/arch/tile/lib/memchr_32.c +++ b/arch/tile/lib/memchr_32.c @@ -18,12 +18,24 @@ void *memchr(const void *s, int c, size_t n) { + const uint32_t *last_word_ptr; + const uint32_t *p; + const char *last_byte_ptr; + uintptr_t s_int; + uint32_t goal, before_mask, v, bits; + char *ret; + + if (__builtin_expect(n == 0, 0)) { + /* Don't dereference any memory if the array is empty. */ + return NULL; + } + /* Get an aligned pointer. */ - const uintptr_t s_int = (uintptr_t) s; - const uint32_t *p = (const uint32_t *)(s_int & -4); + s_int = (uintptr_t) s; + p = (const uint32_t *)(s_int & -4); /* Create four copies of the byte for which we are looking. */ - const uint32_t goal = 0x01010101 * (uint8_t) c; + goal = 0x01010101 * (uint8_t) c; /* Read the first word, but munge it so that bytes before the array * will not match goal. @@ -31,23 +43,14 @@ void *memchr(const void *s, int c, size_t n) * Note that this shift count expression works because we know * shift counts are taken mod 32. */ - const uint32_t before_mask = (1 << (s_int << 3)) - 1; - uint32_t v = (*p | before_mask) ^ (goal & before_mask); + before_mask = (1 << (s_int << 3)) - 1; + v = (*p | before_mask) ^ (goal & before_mask); /* Compute the address of the last byte. */ - const char *const last_byte_ptr = (const char *)s + n - 1; + last_byte_ptr = (const char *)s + n - 1; /* Compute the address of the word containing the last byte. */ - const uint32_t *const last_word_ptr = - (const uint32_t *)((uintptr_t) last_byte_ptr & -4); - - uint32_t bits; - char *ret; - - if (__builtin_expect(n == 0, 0)) { - /* Don't dereference any memory if the array is empty. */ - return NULL; - } + last_word_ptr = (const uint32_t *)((uintptr_t) last_byte_ptr & -4); while ((bits = __insn_seqb(v, goal)) == 0) { if (__builtin_expect(p == last_word_ptr, 0)) { diff --git a/arch/tile/lib/memcpy_32.S b/arch/tile/lib/memcpy_32.S index 30c3b7ebb55d..2a419a6122db 100644 --- a/arch/tile/lib/memcpy_32.S +++ b/arch/tile/lib/memcpy_32.S @@ -10,14 +10,16 @@ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or * NON INFRINGEMENT. See the GNU General Public License for * more details. - * - * This file shares the implementation of the userspace memcpy and - * the kernel's memcpy, copy_to_user and copy_from_user. */ #include <arch/chip.h> +/* + * This file shares the implementation of the userspace memcpy and + * the kernel's memcpy, copy_to_user and copy_from_user. + */ + #include <linux/linkage.h> /* On TILE64, we wrap these functions via arch/tile/lib/memcpy_tile64.c */ @@ -53,9 +55,9 @@ */ ENTRY(__copy_from_user_inatomic) .type __copy_from_user_inatomic, @function - FEEDBACK_ENTER_EXPLICIT(__copy_from_user_inatomic, \ + FEEDBACK_ENTER_EXPLICIT(__copy_from_user_inatomic, \ .text.memcpy_common, \ - .Lend_memcpy_common - __copy_from_user_inatomic) + .Lend_memcpy_common - __copy_from_user_inatomic) { movei r29, IS_COPY_FROM_USER; j memcpy_common } .size __copy_from_user_inatomic, . - __copy_from_user_inatomic @@ -64,7 +66,7 @@ ENTRY(__copy_from_user_inatomic) */ ENTRY(__copy_from_user_zeroing) .type __copy_from_user_zeroing, @function - FEEDBACK_REENTER(__copy_from_user_inatomic) + FEEDBACK_REENTER(__copy_from_user_inatomic) { movei r29, IS_COPY_FROM_USER_ZEROING; j memcpy_common } .size __copy_from_user_zeroing, . - __copy_from_user_zeroing @@ -74,13 +76,13 @@ ENTRY(__copy_from_user_zeroing) */ ENTRY(__copy_to_user_inatomic) .type __copy_to_user_inatomic, @function - FEEDBACK_REENTER(__copy_from_user_inatomic) + FEEDBACK_REENTER(__copy_from_user_inatomic) { movei r29, IS_COPY_TO_USER; j memcpy_common } .size __copy_to_user_inatomic, . - __copy_to_user_inatomic ENTRY(memcpy) .type memcpy, @function - FEEDBACK_REENTER(__copy_from_user_inatomic) + FEEDBACK_REENTER(__copy_from_user_inatomic) { movei r29, IS_MEMCPY } .size memcpy, . - memcpy /* Fall through */ @@ -157,35 +159,35 @@ EX: { sw r0, r3; addi r0, r0, 4; addi r2, r2, -4 } { addi r3, r1, 60; andi r9, r9, -64 } #if CHIP_HAS_WH64() - /* No need to prefetch dst, we'll just do the wh64 - * right before we copy a line. + /* No need to prefetch dst, we'll just do the wh64 + * right before we copy a line. */ #endif EX: { lw r5, r3; addi r3, r3, 64; movei r4, 1 } - /* Intentionally stall for a few cycles to leave L2 cache alone. */ - { bnzt zero, .; move r27, lr } + /* Intentionally stall for a few cycles to leave L2 cache alone. */ + { bnzt zero, .; move r27, lr } EX: { lw r6, r3; addi r3, r3, 64 } - /* Intentionally stall for a few cycles to leave L2 cache alone. */ - { bnzt zero, . } + /* Intentionally stall for a few cycles to leave L2 cache alone. */ + { bnzt zero, . } EX: { lw r7, r3; addi r3, r3, 64 } #if !CHIP_HAS_WH64() - /* Prefetch the dest */ - /* Intentionally stall for a few cycles to leave L2 cache alone. */ - { bnzt zero, . } - /* Use a real load to cause a TLB miss if necessary. We aren't using - * r28, so this should be fine. - */ + /* Prefetch the dest */ + /* Intentionally stall for a few cycles to leave L2 cache alone. */ + { bnzt zero, . } + /* Use a real load to cause a TLB miss if necessary. We aren't using + * r28, so this should be fine. + */ EX: { lw r28, r9; addi r9, r9, 64 } - /* Intentionally stall for a few cycles to leave L2 cache alone. */ - { bnzt zero, . } - { prefetch r9; addi r9, r9, 64 } - /* Intentionally stall for a few cycles to leave L2 cache alone. */ - { bnzt zero, . } - { prefetch r9; addi r9, r9, 64 } + /* Intentionally stall for a few cycles to leave L2 cache alone. */ + { bnzt zero, . } + { prefetch r9; addi r9, r9, 64 } + /* Intentionally stall for a few cycles to leave L2 cache alone. */ + { bnzt zero, . } + { prefetch r9; addi r9, r9, 64 } #endif - /* Intentionally stall for a few cycles to leave L2 cache alone. */ - { bz zero, .Lbig_loop2 } + /* Intentionally stall for a few cycles to leave L2 cache alone. */ + { bz zero, .Lbig_loop2 } /* On entry to this loop: * - r0 points to the start of dst line 0 @@ -197,7 +199,7 @@ EX: { lw r28, r9; addi r9, r9, 64 } * to some "safe" recently loaded address. * - r5 contains *(r1 + 60) [i.e. last word of source line 0] * - r6 contains *(r1 + 64 + 60) [i.e. last word of source line 1] - * - r9 contains ((r0 + 63) & -64) + * - r9 contains ((r0 + 63) & -64) * [start of next dst cache line.] */ @@ -208,137 +210,137 @@ EX: { lw r28, r9; addi r9, r9, 64 } /* Copy line 0, first stalling until r5 is ready. */ EX: { move r12, r5; lw r16, r1 } { bz r4, .Lcopy_8_check; slti_u r8, r2, 8 } - /* Prefetch several lines ahead. */ + /* Prefetch several lines ahead. */ EX: { lw r5, r3; addi r3, r3, 64 } - { jal .Lcopy_line } + { jal .Lcopy_line } /* Copy line 1, first stalling until r6 is ready. */ EX: { move r12, r6; lw r16, r1 } { bz r4, .Lcopy_8_check; slti_u r8, r2, 8 } - /* Prefetch several lines ahead. */ + /* Prefetch several lines ahead. */ EX: { lw r6, r3; addi r3, r3, 64 } { jal .Lcopy_line } /* Copy line 2, first stalling until r7 is ready. */ EX: { move r12, r7; lw r16, r1 } { bz r4, .Lcopy_8_check; slti_u r8, r2, 8 } - /* Prefetch several lines ahead. */ + /* Prefetch several lines ahead. */ EX: { lw r7, r3; addi r3, r3, 64 } - /* Use up a caches-busy cycle by jumping back to the top of the - * loop. Might as well get it out of the way now. - */ - { j .Lbig_loop } + /* Use up a caches-busy cycle by jumping back to the top of the + * loop. Might as well get it out of the way now. + */ + { j .Lbig_loop } /* On entry: * - r0 points to the destination line. * - r1 points to the source line. - * - r3 is the next prefetch address. + * - r3 is the next prefetch address. * - r9 holds the last address used for wh64. * - r12 = WORD_15 - * - r16 = WORD_0. - * - r17 == r1 + 16. - * - r27 holds saved lr to restore. + * - r16 = WORD_0. + * - r17 == r1 + 16. + * - r27 holds saved lr to restore. * * On exit: * - r0 is incremented by 64. * - r1 is incremented by 64, unless that would point to a word - * beyond the end of the source array, in which case it is redirected - * to point to an arbitrary word already in the cache. + * beyond the end of the source array, in which case it is redirected + * to point to an arbitrary word already in the cache. * - r2 is decremented by 64. - * - r3 is unchanged, unless it points to a word beyond the - * end of the source array, in which case it is redirected - * to point to an arbitrary word already in the cache. - * Redirecting is OK since if we are that close to the end - * of the array we will not come back to this subroutine - * and use the contents of the prefetched address. + * - r3 is unchanged, unless it points to a word beyond the + * end of the source array, in which case it is redirected + * to point to an arbitrary word already in the cache. + * Redirecting is OK since if we are that close to the end + * of the array we will not come back to this subroutine + * and use the contents of the prefetched address. * - r4 is nonzero iff r2 >= 64. - * - r9 is incremented by 64, unless it points beyond the - * end of the last full destination cache line, in which - * case it is redirected to a "safe address" that can be - * clobbered (sp - 64) + * - r9 is incremented by 64, unless it points beyond the + * end of the last full destination cache line, in which + * case it is redirected to a "safe address" that can be + * clobbered (sp - 64) * - lr contains the value in r27. */ /* r26 unused */ .Lcopy_line: - /* TODO: when r3 goes past the end, we would like to redirect it - * to prefetch the last partial cache line (if any) just once, for the - * benefit of the final cleanup loop. But we don't want to - * prefetch that line more than once, or subsequent prefetches - * will go into the RTF. But then .Lbig_loop should unconditionally - * branch to top of loop to execute final prefetch, and its - * nop should become a conditional branch. - */ - - /* We need two non-memory cycles here to cover the resources - * used by the loads initiated by the caller. - */ - { add r15, r1, r2 } + /* TODO: when r3 goes past the end, we would like to redirect it + * to prefetch the last partial cache line (if any) just once, for the + * benefit of the final cleanup loop. But we don't want to + * prefetch that line more than once, or subsequent prefetches + * will go into the RTF. But then .Lbig_loop should unconditionally + * branch to top of loop to execute final prefetch, and its + * nop should become a conditional branch. + */ + + /* We need two non-memory cycles here to cover the resources + * used by the loads initiated by the caller. + */ + { add r15, r1, r2 } .Lcopy_line2: - { slt_u r13, r3, r15; addi r17, r1, 16 } + { slt_u r13, r3, r15; addi r17, r1, 16 } - /* NOTE: this will stall for one cycle as L1 is busy. */ + /* NOTE: this will stall for one cycle as L1 is busy. */ - /* Fill second L1D line. */ + /* Fill second L1D line. */ EX: { lw r17, r17; addi r1, r1, 48; mvz r3, r13, r1 } /* r17 = WORD_4 */ #if CHIP_HAS_WH64() - /* Prepare destination line for writing. */ + /* Prepare destination line for writing. */ EX: { wh64 r9; addi r9, r9, 64 } #else - /* Prefetch dest line */ + /* Prefetch dest line */ { prefetch r9; addi r9, r9, 64 } #endif - /* Load seven words that are L1D hits to cover wh64 L2 usage. */ + /* Load seven words that are L1D hits to cover wh64 L2 usage. */ - /* Load the three remaining words from the last L1D line, which - * we know has already filled the L1D. - */ + /* Load the three remaining words from the last L1D line, which + * we know has already filled the L1D. + */ EX: { lw r4, r1; addi r1, r1, 4; addi r20, r1, 16 } /* r4 = WORD_12 */ EX: { lw r8, r1; addi r1, r1, 4; slt_u r13, r20, r15 }/* r8 = WORD_13 */ EX: { lw r11, r1; addi r1, r1, -52; mvz r20, r13, r1 } /* r11 = WORD_14 */ - /* Load the three remaining words from the first L1D line, first - * stalling until it has filled by "looking at" r16. - */ + /* Load the three remaining words from the first L1D line, first + * stalling until it has filled by "looking at" r16. + */ EX: { lw r13, r1; addi r1, r1, 4; move zero, r16 } /* r13 = WORD_1 */ EX: { lw r14, r1; addi r1, r1, 4 } /* r14 = WORD_2 */ EX: { lw r15, r1; addi r1, r1, 8; addi r10, r0, 60 } /* r15 = WORD_3 */ - /* Load second word from the second L1D line, first - * stalling until it has filled by "looking at" r17. - */ + /* Load second word from the second L1D line, first + * stalling until it has filled by "looking at" r17. + */ EX: { lw r19, r1; addi r1, r1, 4; move zero, r17 } /* r19 = WORD_5 */ - /* Store last word to the destination line, potentially dirtying it - * for the first time, which keeps the L2 busy for two cycles. - */ + /* Store last word to the destination line, potentially dirtying it + * for the first time, which keeps the L2 busy for two cycles. + */ EX: { sw r10, r12 } /* store(WORD_15) */ - /* Use two L1D hits to cover the sw L2 access above. */ + /* Use two L1D hits to cover the sw L2 access above. */ EX: { lw r10, r1; addi r1, r1, 4 } /* r10 = WORD_6 */ EX: { lw r12, r1; addi r1, r1, 4 } /* r12 = WORD_7 */ - /* Fill third L1D line. */ + /* Fill third L1D line. */ EX: { lw r18, r1; addi r1, r1, 4 } /* r18 = WORD_8 */ - /* Store first L1D line. */ + /* Store first L1D line. */ EX: { sw r0, r16; addi r0, r0, 4; add r16, r0, r2 } /* store(WORD_0) */ EX: { sw r0, r13; addi r0, r0, 4; andi r16, r16, -64 } /* store(WORD_1) */ EX: { sw r0, r14; addi r0, r0, 4; slt_u r16, r9, r16 } /* store(WORD_2) */ #if CHIP_HAS_WH64() EX: { sw r0, r15; addi r0, r0, 4; addi r13, sp, -64 } /* store(WORD_3) */ #else - /* Back up the r9 to a cache line we are already storing to + /* Back up the r9 to a cache line we are already storing to * if it gets past the end of the dest vector. Strictly speaking, * we don't need to back up to the start of a cache line, but it's free * and tidy, so why not? - */ + */ EX: { sw r0, r15; addi r0, r0, 4; andi r13, r0, -64 } /* store(WORD_3) */ #endif - /* Store second L1D line. */ + /* Store second L1D line. */ EX: { sw r0, r17; addi r0, r0, 4; mvz r9, r16, r13 }/* store(WORD_4) */ EX: { sw r0, r19; addi r0, r0, 4 } /* store(WORD_5) */ EX: { sw r0, r10; addi r0, r0, 4 } /* store(WORD_6) */ @@ -348,30 +350,30 @@ EX: { lw r13, r1; addi r1, r1, 4; move zero, r18 } /* r13 = WORD_9 */ EX: { lw r14, r1; addi r1, r1, 4 } /* r14 = WORD_10 */ EX: { lw r15, r1; move r1, r20 } /* r15 = WORD_11 */ - /* Store third L1D line. */ + /* Store third L1D line. */ EX: { sw r0, r18; addi r0, r0, 4 } /* store(WORD_8) */ EX: { sw r0, r13; addi r0, r0, 4 } /* store(WORD_9) */ EX: { sw r0, r14; addi r0, r0, 4 } /* store(WORD_10) */ EX: { sw r0, r15; addi r0, r0, 4 } /* store(WORD_11) */ - /* Store rest of fourth L1D line. */ + /* Store rest of fourth L1D line. */ EX: { sw r0, r4; addi r0, r0, 4 } /* store(WORD_12) */ - { + { EX: sw r0, r8 /* store(WORD_13) */ - addi r0, r0, 4 + addi r0, r0, 4 /* Will r2 be > 64 after we subtract 64 below? */ - shri r4, r2, 7 - } - { + shri r4, r2, 7 + } + { EX: sw r0, r11 /* store(WORD_14) */ - addi r0, r0, 8 - /* Record 64 bytes successfully copied. */ - addi r2, r2, -64 - } + addi r0, r0, 8 + /* Record 64 bytes successfully copied. */ + addi r2, r2, -64 + } { jrp lr; move lr, r27 } - /* Convey to the backtrace library that the stack frame is size + /* Convey to the backtrace library that the stack frame is size * zero, and the real return address is on the stack rather than * in 'lr'. */ diff --git a/arch/tile/lib/memcpy_tile64.c b/arch/tile/lib/memcpy_tile64.c index dfedea7b266b..f7d4a6ad61e8 100644 --- a/arch/tile/lib/memcpy_tile64.c +++ b/arch/tile/lib/memcpy_tile64.c @@ -54,7 +54,7 @@ typedef unsigned long (*memcpy_t)(void *, const void *, unsigned long); * we must run with interrupts disabled to avoid the risk of some * other code seeing the incoherent data in our cache. (Recall that * our cache is indexed by PA, so even if the other code doesn't use - * our KM_MEMCPY virtual addresses, they'll still hit in cache using + * our kmap_atomic virtual addresses, they'll still hit in cache using * the normal VAs that aren't supposed to hit in cache.) */ static void memcpy_multicache(void *dest, const void *source, @@ -64,6 +64,7 @@ static void memcpy_multicache(void *dest, const void *source, unsigned long flags, newsrc, newdst; pmd_t *pmdp; pte_t *ptep; + int type0, type1; int cpu = get_cpu(); /* @@ -77,7 +78,8 @@ static void memcpy_multicache(void *dest, const void *source, sim_allow_multiple_caching(1); /* Set up the new dest mapping */ - idx = FIX_KMAP_BEGIN + (KM_TYPE_NR * cpu) + KM_MEMCPY0; + type0 = kmap_atomic_idx_push(); + idx = FIX_KMAP_BEGIN + (KM_TYPE_NR * cpu) + type0; newdst = __fix_to_virt(idx) + ((unsigned long)dest & (PAGE_SIZE-1)); pmdp = pmd_offset(pud_offset(pgd_offset_k(newdst), newdst), newdst); ptep = pte_offset_kernel(pmdp, newdst); @@ -87,7 +89,8 @@ static void memcpy_multicache(void *dest, const void *source, } /* Set up the new source mapping */ - idx += (KM_MEMCPY0 - KM_MEMCPY1); + type1 = kmap_atomic_idx_push(); + idx += (type0 - type1); src_pte = hv_pte_set_nc(src_pte); src_pte = hv_pte_clear_writable(src_pte); /* be paranoid */ newsrc = __fix_to_virt(idx) + ((unsigned long)source & (PAGE_SIZE-1)); @@ -119,6 +122,8 @@ static void memcpy_multicache(void *dest, const void *source, * We're done: notify the simulator that all is back to normal, * and re-enable interrupts and pre-emption. */ + kmap_atomic_idx_pop(); + kmap_atomic_idx_pop(); sim_allow_multiple_caching(0); local_irq_restore(flags); put_cpu(); diff --git a/arch/tile/lib/memmove_32.c b/arch/tile/lib/memmove.c index fd615ae6ade7..fd615ae6ade7 100644 --- a/arch/tile/lib/memmove_32.c +++ b/arch/tile/lib/memmove.c diff --git a/arch/tile/lib/memset_32.c b/arch/tile/lib/memset_32.c index d014c1fbcbc2..57dbb3a5bff8 100644 --- a/arch/tile/lib/memset_32.c +++ b/arch/tile/lib/memset_32.c @@ -18,6 +18,7 @@ #include <linux/string.h> #include <linux/module.h> +#undef memset void *memset(void *s, int c, size_t n) { diff --git a/arch/tile/lib/spinlock_32.c b/arch/tile/lib/spinlock_32.c index 485e24d62c6b..5cd1c4004eca 100644 --- a/arch/tile/lib/spinlock_32.c +++ b/arch/tile/lib/spinlock_32.c @@ -167,23 +167,30 @@ void arch_write_lock_slow(arch_rwlock_t *rwlock, u32 val) * when we compare them. */ u32 my_ticket_; + u32 iterations = 0; - /* Take out the next ticket; this will also stop would-be readers. */ - if (val & 1) - val = get_rwlock(rwlock); - rwlock->lock = __insn_addb(val, 1 << WR_NEXT_SHIFT); + /* + * Wait until there are no readers, then bump up the next + * field and capture the ticket value. + */ + for (;;) { + if (!(val & 1)) { + if ((val >> RD_COUNT_SHIFT) == 0) + break; + rwlock->lock = val; + } + delay_backoff(iterations++); + val = __insn_tns((int *)&rwlock->lock); + } - /* Extract my ticket value from the original word. */ + /* Take out the next ticket and extract my ticket value. */ + rwlock->lock = __insn_addb(val, 1 << WR_NEXT_SHIFT); my_ticket_ = val >> WR_NEXT_SHIFT; - /* - * Wait until the "current" field matches our ticket, and - * there are no remaining readers. - */ + /* Wait until the "current" field matches our ticket. */ for (;;) { u32 curr_ = val >> WR_CURR_SHIFT; - u32 readers = val >> RD_COUNT_SHIFT; - u32 delta = ((my_ticket_ - curr_) & WR_MASK) + !!readers; + u32 delta = ((my_ticket_ - curr_) & WR_MASK); if (likely(delta == 0)) break; diff --git a/arch/tile/lib/strlen_32.c b/arch/tile/lib/strlen_32.c index f26f88e11e4a..4974292a5534 100644 --- a/arch/tile/lib/strlen_32.c +++ b/arch/tile/lib/strlen_32.c @@ -16,6 +16,8 @@ #include <linux/string.h> #include <linux/module.h> +#undef strlen + size_t strlen(const char *s) { /* Get an aligned pointer. */ diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c index 704f3e8a4385..dcebfc831cd6 100644 --- a/arch/tile/mm/fault.c +++ b/arch/tile/mm/fault.c @@ -24,7 +24,6 @@ #include <linux/mman.h> #include <linux/mm.h> #include <linux/smp.h> -#include <linux/smp_lock.h> #include <linux/interrupt.h> #include <linux/init.h> #include <linux/tty.h> @@ -66,10 +65,10 @@ static noinline void force_sig_info_fault(int si_signo, int si_code, #ifndef __tilegx__ /* * Synthesize the fault a PL0 process would get by doing a word-load of - * an unaligned address or a high kernel address. Called indirectly - * from sys_cmpxchg() in kernel/intvec.S. + * an unaligned address or a high kernel address. */ -int _sys_cmpxchg_badaddr(unsigned long address, struct pt_regs *regs) +SYSCALL_DEFINE2(cmpxchg_badaddr, unsigned long, address, + struct pt_regs *, regs) { if (address >= PAGE_OFFSET) force_sig_info_fault(SIGSEGV, SEGV_MAPERR, address, @@ -563,10 +562,10 @@ do_sigbus: /* * When we take an ITLB or DTLB fault or access violation in the * supervisor while the critical section bit is set, the hypervisor is - * reluctant to write new values into the EX_CONTEXT_1_x registers, + * reluctant to write new values into the EX_CONTEXT_K_x registers, * since that might indicate we have not yet squirreled the SPR * contents away and can thus safely take a recursive interrupt. - * Accordingly, the hypervisor passes us the PC via SYSTEM_SAVE_1_2. + * Accordingly, the hypervisor passes us the PC via SYSTEM_SAVE_K_2. * * Note that this routine is called before homecache_tlb_defer_enter(), * which means that we can properly unlock any atomics that might @@ -610,7 +609,7 @@ struct intvec_state do_page_fault_ics(struct pt_regs *regs, int fault_num, * fault. We didn't set up a kernel stack on initial entry to * sys_cmpxchg, but instead had one set up by the fault, which * (because sys_cmpxchg never releases ICS) came to us via the - * SYSTEM_SAVE_1_2 mechanism, and thus EX_CONTEXT_1_[01] are + * SYSTEM_SAVE_K_2 mechanism, and thus EX_CONTEXT_K_[01] are * still referencing the original user code. We release the * atomic lock and rewrite pt_regs so that it appears that we * came from user-space directly, and after we finish the diff --git a/arch/tile/mm/highmem.c b/arch/tile/mm/highmem.c index 12ab137e7d4f..31dbbd9afe47 100644 --- a/arch/tile/mm/highmem.c +++ b/arch/tile/mm/highmem.c @@ -56,50 +56,6 @@ void kunmap(struct page *page) } EXPORT_SYMBOL(kunmap); -static void debug_kmap_atomic_prot(enum km_type type) -{ -#ifdef CONFIG_DEBUG_HIGHMEM - static unsigned warn_count = 10; - - if (unlikely(warn_count == 0)) - return; - - if (unlikely(in_interrupt())) { - if (in_irq()) { - if (type != KM_IRQ0 && type != KM_IRQ1 && - type != KM_BIO_SRC_IRQ && - /* type != KM_BIO_DST_IRQ && */ - type != KM_BOUNCE_READ) { - WARN_ON(1); - warn_count--; - } - } else if (!irqs_disabled()) { /* softirq */ - if (type != KM_IRQ0 && type != KM_IRQ1 && - type != KM_SOFTIRQ0 && type != KM_SOFTIRQ1 && - type != KM_SKB_SUNRPC_DATA && - type != KM_SKB_DATA_SOFTIRQ && - type != KM_BOUNCE_READ) { - WARN_ON(1); - warn_count--; - } - } - } - - if (type == KM_IRQ0 || type == KM_IRQ1 || type == KM_BOUNCE_READ || - type == KM_BIO_SRC_IRQ /* || type == KM_BIO_DST_IRQ */) { - if (!irqs_disabled()) { - WARN_ON(1); - warn_count--; - } - } else if (type == KM_SOFTIRQ0 || type == KM_SOFTIRQ1) { - if (irq_count() == 0 && !irqs_disabled()) { - WARN_ON(1); - warn_count--; - } - } -#endif -} - /* * Describe a single atomic mapping of a page on a given cpu at a * given address, and allow it to be linked into a list. @@ -240,10 +196,10 @@ void kmap_atomic_fix_kpte(struct page *page, int finished) * When holding an atomic kmap is is not legal to sleep, so atomic * kmaps are appropriate for short, tight code paths only. */ -void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) +void *kmap_atomic_prot(struct page *page, pgprot_t prot) { - enum fixed_addresses idx; unsigned long vaddr; + int idx, type; pte_t *pte; /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ @@ -255,8 +211,7 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) if (!PageHighMem(page)) return page_address(page); - debug_kmap_atomic_prot(type); - + type = kmap_atomic_idx_push(); idx = type + KM_TYPE_NR*smp_processor_id(); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); pte = kmap_get_pte(vaddr); @@ -269,28 +224,35 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) } EXPORT_SYMBOL(kmap_atomic_prot); -void *kmap_atomic(struct page *page, enum km_type type) +void *__kmap_atomic(struct page *page) { /* PAGE_NONE is a magic value that tells us to check immutability. */ - return kmap_atomic_prot(page, type, PAGE_NONE); + return kmap_atomic_prot(page, PAGE_NONE); } -EXPORT_SYMBOL(kmap_atomic); +EXPORT_SYMBOL(__kmap_atomic); -void kunmap_atomic_notypecheck(void *kvaddr, enum km_type type) +void __kunmap_atomic(void *kvaddr) { unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; - enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); - /* - * Force other mappings to Oops if they try to access this pte without - * first remapping it. Keeping stale mappings around is a bad idea. - */ - if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx)) { + if (vaddr >= __fix_to_virt(FIX_KMAP_END) && + vaddr <= __fix_to_virt(FIX_KMAP_BEGIN)) { pte_t *pte = kmap_get_pte(vaddr); pte_t pteval = *pte; + int idx, type; + + type = kmap_atomic_idx(); + idx = type + KM_TYPE_NR*smp_processor_id(); + + /* + * Force other mappings to Oops if they try to access this pte + * without first remapping it. Keeping stale mappings around + * is a bad idea. + */ BUG_ON(!pte_present(pteval) && !pte_migrating(pteval)); kmap_atomic_unregister(pte_page(pteval), vaddr); kpte_clear_flush(pte, vaddr); + kmap_atomic_idx_pop(); } else { /* Must be a lowmem page */ BUG_ON(vaddr < PAGE_OFFSET); @@ -300,19 +262,19 @@ void kunmap_atomic_notypecheck(void *kvaddr, enum km_type type) arch_flush_lazy_mmu_mode(); pagefault_enable(); } -EXPORT_SYMBOL(kunmap_atomic_notypecheck); +EXPORT_SYMBOL(__kunmap_atomic); /* * This API is supposed to allow us to map memory without a "struct page". * Currently we don't support this, though this may change in the future. */ -void *kmap_atomic_pfn(unsigned long pfn, enum km_type type) +void *kmap_atomic_pfn(unsigned long pfn) { - return kmap_atomic(pfn_to_page(pfn), type); + return kmap_atomic(pfn_to_page(pfn)); } -void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) +void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot) { - return kmap_atomic_prot(pfn_to_page(pfn), type, prot); + return kmap_atomic_prot(pfn_to_page(pfn), prot); } struct page *kmap_atomic_to_page(void *ptr) diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c index fb3b4a55cec4..d78df3a6ee15 100644 --- a/arch/tile/mm/homecache.c +++ b/arch/tile/mm/homecache.c @@ -37,6 +37,8 @@ #include <asm/pgalloc.h> #include <asm/homecache.h> +#include <arch/sim.h> + #include "migrate.h" @@ -217,13 +219,6 @@ static unsigned long cache_flush_length(unsigned long length) return (length >= CHIP_L2_CACHE_SIZE()) ? HV_FLUSH_EVICT_L2 : length; } -/* On the simulator, confirm lines have been evicted everywhere. */ -static void validate_lines_evicted(unsigned long pfn, size_t length) -{ - sim_syscall(SIM_SYSCALL_VALIDATE_LINES_EVICTED, - (HV_PhysAddr)pfn << PAGE_SHIFT, length); -} - /* Flush a page out of whatever cache(s) it is in. */ void homecache_flush_cache(struct page *page, int order) { @@ -234,7 +229,7 @@ void homecache_flush_cache(struct page *page, int order) homecache_mask(page, pages, &home_mask); flush_remote(pfn, length, &home_mask, 0, 0, 0, NULL, NULL, 0); - validate_lines_evicted(pfn, pages * PAGE_SIZE); + sim_validate_lines_evicted(PFN_PHYS(pfn), pages * PAGE_SIZE); } diff --git a/arch/tile/mm/hugetlbpage.c b/arch/tile/mm/hugetlbpage.c index 24688b697a8d..201a582c4137 100644 --- a/arch/tile/mm/hugetlbpage.c +++ b/arch/tile/mm/hugetlbpage.c @@ -21,7 +21,6 @@ #include <linux/mm.h> #include <linux/hugetlb.h> #include <linux/pagemap.h> -#include <linux/smp_lock.h> #include <linux/slab.h> #include <linux/err.h> #include <linux/sysctl.h> diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c index d89c9eacd162..0b9ce69b0ee5 100644 --- a/arch/tile/mm/init.c +++ b/arch/tile/mm/init.c @@ -988,8 +988,12 @@ static long __write_once initfree = 1; /* Select whether to free (1) or mark unusable (0) the __init pages. */ static int __init set_initfree(char *str) { - strict_strtol(str, 0, &initfree); - pr_info("initfree: %s free init pages\n", initfree ? "will" : "won't"); + long val; + if (strict_strtol(str, 0, &val)) { + initfree = val; + pr_info("initfree: %s free init pages\n", + initfree ? "will" : "won't"); + } return 1; } __setup("initfree=", set_initfree); @@ -1060,7 +1064,7 @@ void free_initmem(void) /* * Free the pages mapped from 0xc0000000 that correspond to code - * pages from 0xfd000000 that we won't use again after init. + * pages from MEM_SV_INTRPT that we won't use again after init. */ free_init_pages("unused kernel text", (unsigned long)_sinittext - text_delta, diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c index 335c24621c41..1f5430c53d0d 100644 --- a/arch/tile/mm/pgtable.c +++ b/arch/tile/mm/pgtable.c @@ -134,9 +134,9 @@ void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t flags) } #if defined(CONFIG_HIGHPTE) -pte_t *_pte_offset_map(pmd_t *dir, unsigned long address, enum km_type type) +pte_t *_pte_offset_map(pmd_t *dir, unsigned long address) { - pte_t *pte = kmap_atomic(pmd_page(*dir), type) + + pte_t *pte = kmap_atomic(pmd_page(*dir)) + (pmd_ptfn(*dir) << HV_LOG2_PAGE_TABLE_ALIGN) & ~PAGE_MASK; return &pte[pte_index(address)]; } |