diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-05-09 10:10:15 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-05-09 10:10:15 -0700 |
commit | 4a1e31c68e9f40be32838944931178b0d9ed9162 (patch) | |
tree | 98b2cd1280ec759543a07a9517db04706ebf5de5 /arch/arc/mm | |
parent | c6778ff813d2ca3e3c8733c87dc8b6831a64578b (diff) | |
parent | cf4100d1cddcd243f80ac2af2e4c4273919ff225 (diff) |
Merge tag 'arc-4.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc
Pull ARC updates from Vineet Gupta:
- AXS10x platform clk updates for I2S, PGU
- add region based cache flush operation for ARCv2 cores
- enforce PAE40 dependency on HIGHMEM
- ptrace support for additional regs in ARCv2 cores
- fix build failure in linux-next dut to a header include ordering
change
* tag 'arc-4.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc:
Revert "ARCv2: Allow enabling PAE40 w/o HIGHMEM"
ARC: mm: fix build failure in linux-next for UP builds
ARCv2: ptrace: provide regset for accumulator/r30 regs
elf: Add ARCv2 specific core note section
ARCv2: mm: micro-optimize region flush generated code
ARCv2: mm: Merge 2 updates to DC_CTRL for region flush
ARCv2: mm: Implement cache region flush operations
ARC: mm: Move full_page computation into cache version agnostic wrapper
arc: axs10x: Fix ARC PGU default clock frequency
arc: axs10x: Add DT bindings for I2S audio playback
Diffstat (limited to 'arch/arc/mm')
-rw-r--r-- | arch/arc/mm/cache.c | 111 |
1 files changed, 98 insertions, 13 deletions
diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index 928562967f3c..a867575a758b 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -21,6 +21,10 @@ #include <asm/cachectl.h> #include <asm/setup.h> +#ifdef CONFIG_ISA_ARCV2 +#define USE_RGN_FLSH 1 +#endif + static int l2_line_sz; static int ioc_exists; int slc_enable = 1, ioc_enable = 1; @@ -28,7 +32,7 @@ unsigned long perip_base = ARC_UNCACHED_ADDR_SPACE; /* legacy value for boot */ unsigned long perip_end = 0xFFFFFFFF; /* legacy value */ void (*_cache_line_loop_ic_fn)(phys_addr_t paddr, unsigned long vaddr, - unsigned long sz, const int cacheop); + unsigned long sz, const int op, const int full_page); void (*__dma_cache_wback_inv)(phys_addr_t start, unsigned long sz); void (*__dma_cache_inv)(phys_addr_t start, unsigned long sz); @@ -233,11 +237,10 @@ slc_chk: static inline void __cache_line_loop_v2(phys_addr_t paddr, unsigned long vaddr, - unsigned long sz, const int op) + unsigned long sz, const int op, const int full_page) { unsigned int aux_cmd; int num_lines; - const int full_page = __builtin_constant_p(sz) && sz == PAGE_SIZE; if (op == OP_INV_IC) { aux_cmd = ARC_REG_IC_IVIL; @@ -279,11 +282,10 @@ void __cache_line_loop_v2(phys_addr_t paddr, unsigned long vaddr, */ static inline void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr, - unsigned long sz, const int op) + unsigned long sz, const int op, const int full_page) { unsigned int aux_cmd, aux_tag; int num_lines; - const int full_page = __builtin_constant_p(sz) && sz == PAGE_SIZE; if (op == OP_INV_IC) { aux_cmd = ARC_REG_IC_IVIL; @@ -334,6 +336,8 @@ void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr, } } +#ifndef USE_RGN_FLSH + /* * In HS38x (MMU v4), I-cache is VIPT (can alias), D-cache is PIPT * Here's how cache ops are implemented @@ -349,17 +353,16 @@ void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr, */ static inline void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr, - unsigned long sz, const int cacheop) + unsigned long sz, const int op, const int full_page) { unsigned int aux_cmd; int num_lines; - const int full_page_op = __builtin_constant_p(sz) && sz == PAGE_SIZE; - if (cacheop == OP_INV_IC) { + if (op == OP_INV_IC) { aux_cmd = ARC_REG_IC_IVIL; } else { /* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */ - aux_cmd = cacheop & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL; + aux_cmd = op & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL; } /* Ensure we properly floor/ceil the non-line aligned/sized requests @@ -368,7 +371,7 @@ void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr, * -@paddr will be cache-line aligned already (being page aligned) * -@sz will be integral multiple of line size (being page sized). */ - if (!full_page_op) { + if (!full_page) { sz += paddr & ~CACHE_LINE_MASK; paddr &= CACHE_LINE_MASK; } @@ -381,7 +384,7 @@ void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr, * - (and needs to be written before the lower 32 bits) */ if (is_pae40_enabled()) { - if (cacheop == OP_INV_IC) + if (op == OP_INV_IC) /* * Non aliasing I-cache in HS38, * aliasing I-cache handled in __cache_line_loop_v3() @@ -397,6 +400,55 @@ void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr, } } +#else + +/* + * optimized flush operation which takes a region as opposed to iterating per line + */ +static inline +void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr, + unsigned long sz, const int op, const int full_page) +{ + unsigned int s, e; + + /* Only for Non aliasing I-cache in HS38 */ + if (op == OP_INV_IC) { + s = ARC_REG_IC_IVIR; + e = ARC_REG_IC_ENDR; + } else { + s = ARC_REG_DC_STARTR; + e = ARC_REG_DC_ENDR; + } + + if (!full_page) { + /* for any leading gap between @paddr and start of cache line */ + sz += paddr & ~CACHE_LINE_MASK; + paddr &= CACHE_LINE_MASK; + + /* + * account for any trailing gap to end of cache line + * this is equivalent to DIV_ROUND_UP() in line ops above + */ + sz += L1_CACHE_BYTES - 1; + } + + if (is_pae40_enabled()) { + /* TBD: check if crossing 4TB boundary */ + if (op == OP_INV_IC) + write_aux_reg(ARC_REG_IC_PTAG_HI, (u64)paddr >> 32); + else + write_aux_reg(ARC_REG_DC_PTAG_HI, (u64)paddr >> 32); + } + + /* ENDR needs to be set ahead of START */ + write_aux_reg(e, paddr + sz); /* ENDR is exclusive */ + write_aux_reg(s, paddr); + + /* caller waits on DC_CTRL.FS */ +} + +#endif + #if (CONFIG_ARC_MMU_VER < 3) #define __cache_line_loop __cache_line_loop_v2 #elif (CONFIG_ARC_MMU_VER == 3) @@ -411,6 +463,11 @@ void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr, * Machine specific helpers for Entire D-Cache or Per Line ops */ +#ifndef USE_RGN_FLSH +/* + * this version avoids extra read/write of DC_CTRL for flush or invalid ops + * in the non region flush regime (such as for ARCompact) + */ static inline void __before_dc_op(const int op) { if (op == OP_FLUSH_N_INV) { @@ -424,6 +481,32 @@ static inline void __before_dc_op(const int op) } } +#else + +static inline void __before_dc_op(const int op) +{ + const unsigned int ctl = ARC_REG_DC_CTRL; + unsigned int val = read_aux_reg(ctl); + + if (op == OP_FLUSH_N_INV) { + val |= DC_CTRL_INV_MODE_FLUSH; + } + + if (op != OP_INV_IC) { + /* + * Flush / Invalidate is provided by DC_CTRL.RNG_OP 0 or 1 + * combined Flush-n-invalidate uses DC_CTRL.IM = 1 set above + */ + val &= ~DC_CTRL_RGN_OP_MSK; + if (op & OP_INV) + val |= DC_CTRL_RGN_OP_INV; + } + write_aux_reg(ctl, val); +} + +#endif + + static inline void __after_dc_op(const int op) { if (op & OP_FLUSH) { @@ -486,13 +569,14 @@ static void __dc_enable(void) static inline void __dc_line_op(phys_addr_t paddr, unsigned long vaddr, unsigned long sz, const int op) { + const int full_page = __builtin_constant_p(sz) && sz == PAGE_SIZE; unsigned long flags; local_irq_save(flags); __before_dc_op(op); - __cache_line_loop(paddr, vaddr, sz, op); + __cache_line_loop(paddr, vaddr, sz, op, full_page); __after_dc_op(op); @@ -521,10 +605,11 @@ static inline void __ic_line_inv_vaddr_local(phys_addr_t paddr, unsigned long vaddr, unsigned long sz) { + const int full_page = __builtin_constant_p(sz) && sz == PAGE_SIZE; unsigned long flags; local_irq_save(flags); - (*_cache_line_loop_ic_fn)(paddr, vaddr, sz, OP_INV_IC); + (*_cache_line_loop_ic_fn)(paddr, vaddr, sz, OP_INV_IC, full_page); local_irq_restore(flags); } |