diff options
author | Thierry Reding <treding@nvidia.com> | 2014-06-04 12:09:58 +0200 |
---|---|---|
committer | Thierry Reding <treding@nvidia.com> | 2014-06-04 12:09:58 +0200 |
commit | 61fdd9a3b603f7cd4bd0dc19f229405139d345e3 (patch) | |
tree | 315ab44d75cd6bc49b2e87aa683121fe04c634d0 | |
parent | bc186a6d2a5305a7f6440207c8823f597b6c72d0 (diff) | |
parent | 87c486fad18ee93d7da8955b21aa499c4c01118b (diff) |
Merge branch 'staging/gk20a' into staging/master
50 files changed, 1951 insertions, 98 deletions
diff --git a/Documentation/devicetree/bindings/gpu/nvidia,gk20a.txt b/Documentation/devicetree/bindings/gpu/nvidia,gk20a.txt new file mode 100644 index 00000000000..59abdd0a6b5 --- /dev/null +++ b/Documentation/devicetree/bindings/gpu/nvidia,gk20a.txt @@ -0,0 +1,43 @@ +NVIDIA GK20A Graphics Processing Unit + +Required properties: +- compatible: "nvidia,<chip>-<gpu>" + Currently recognized values: + - nvidia,tegra124-gk20a +- reg: Physical base address and length of the controller's registers. + Must contain two entries: + - first entry for bar0 + - second entry for bar1 +- interrupts: Must contain an entry for each entry in interrupt-names. + See ../interrupt-controller/interrupts.txt for details. +- interrupt-names: Must include the following entries: + - stall + - nonstall +- vdd-supply: regulator for supply voltage. +- clocks: Must contain an entry for each entry in clock-names. + See ../clocks/clock-bindings.txt for details. +- clock-names: Must include the following entries: + - gpu + - pwr +- resets: Must contain an entry for each entry in reset-names. + See ../reset/reset.txt for details. +- reset-names: Must include the following entries: + - gpu + +Example: + + gpu@0,57000000 { + compatible = "nvidia,gk20a"; + reg = <0x0 0x57000000 0x0 0x01000000>, + <0x0 0x58000000 0x0 0x01000000>; + interrupts = <GIC_SPI 157 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 158 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "stall", "nonstall"; + vdd-supply = <&vdd_gpu>; + clocks = <&tegra_car TEGRA124_CLK_GPU>, + <&tegra_car TEGRA124_CLK_PLL_P_OUT5>; + clock-names = "gpu", "pll"; + resets = <&tegra_car 184>; + reset-names = "gpu"; + status = "disabled"; + }; diff --git a/arch/arm/boot/dts/tegra124-jetson-tk1.dts b/arch/arm/boot/dts/tegra124-jetson-tk1.dts index e31fb61a81d..15a194d1277 100644 --- a/arch/arm/boot/dts/tegra124-jetson-tk1.dts +++ b/arch/arm/boot/dts/tegra124-jetson-tk1.dts @@ -30,6 +30,12 @@ }; }; + gpu@0,57000000 { + status = "okay"; + + vdd-supply = <&vdd_gpu>; + }; + pinmux: pinmux@0,70000868 { pinctrl-names = "default"; pinctrl-0 = <&state_default>; @@ -1505,7 +1511,7 @@ regulator-always-on; }; - sd6 { + vdd_gpu: sd6 { regulator-name = "+VDD_GPU_AP"; regulator-min-microvolt = <650000>; regulator-max-microvolt = <1200000>; diff --git a/arch/arm/boot/dts/tegra124-venice2.dts b/arch/arm/boot/dts/tegra124-venice2.dts index f0bb8424402..86970ee4870 100644 --- a/arch/arm/boot/dts/tegra124-venice2.dts +++ b/arch/arm/boot/dts/tegra124-venice2.dts @@ -42,6 +42,12 @@ }; }; + gpu@0,57000000 { + status = "okay"; + + vdd-supply = <&vdd_gpu>; + }; + pinmux: pinmux@0,70000868 { pinctrl-names = "default"; pinctrl-0 = <&pinmux_default>; @@ -726,7 +732,7 @@ regulator-always-on; }; - sd6 { + vdd_gpu: sd6 { regulator-name = "+VDD_GPU_AP"; regulator-min-microvolt = <650000>; regulator-max-microvolt = <1200000>; diff --git a/arch/arm/boot/dts/tegra124.dtsi b/arch/arm/boot/dts/tegra124.dtsi index 011831ffd39..4a40cc54304 100644 --- a/arch/arm/boot/dts/tegra124.dtsi +++ b/arch/arm/boot/dts/tegra124.dtsi @@ -103,6 +103,21 @@ (GIC_CPU_MASK_SIMPLE(4) | IRQ_TYPE_LEVEL_HIGH)>; }; + gpu@0,57000000 { + compatible = "nvidia,gk20a"; + reg = <0x0 0x57000000 0x0 0x01000000>, + <0x0 0x58000000 0x0 0x01000000>; + interrupts = <GIC_SPI 157 IRQ_TYPE_LEVEL_HIGH>, + <GIC_SPI 158 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "stall", "nonstall"; + clocks = <&tegra_car TEGRA124_CLK_GPU>, + <&tegra_car TEGRA124_CLK_PLL_P_OUT5>; + clock-names = "gpu", "pwr"; + resets = <&tegra_car 184>; + reset-names = "gpu"; + status = "disabled"; + }; + timer@0,60005000 { compatible = "nvidia,tegra124-timer", "nvidia,tegra20-timer"; reg = <0x0 0x60005000 0x0 0x400>; diff --git a/arch/arm/configs/tegra_defconfig b/arch/arm/configs/tegra_defconfig index fb25e2982f6..860165422dd 100644 --- a/arch/arm/configs/tegra_defconfig +++ b/arch/arm/configs/tegra_defconfig @@ -90,7 +90,6 @@ CONFIG_RFKILL_INPUT=y CONFIG_RFKILL_GPIO=y CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y -# CONFIG_FIRMWARE_IN_KERNEL is not set CONFIG_DMA_CMA=y CONFIG_CMA_SIZE_MBYTES=64 CONFIG_MTD=y @@ -293,3 +292,11 @@ CONFIG_CRYPTO_TWOFISH=y # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_DEV_TEGRA_AES=y CONFIG_CRC_CCITT=y +CONFIG_DRM_NOUVEAU=y +CONFIG_NOUVEAU_DEBUG=5 +CONFIG_NOUVEAU_DEBUG_DEFAULT=3 +CONFIG_FW_LOADER=y +CONFIG_FIRMWARE_IN_KERNEL=y +CONFIG_EXTRA_FIRMWARE="nouveau/nvea_fuc409c nouveau/nvea_fuc409d nouveau/nvea_fuc41ac nouveau/nvea_fuc41ad" +CONFIG_EXTRA_FIRMWARE_DIR="firmware" +CONFIG_FW_LOADER_USER_HELPER=y diff --git a/arch/arm/mach-tegra/tegra.c b/arch/arm/mach-tegra/tegra.c index 15ac9fcc96b..972fc87ec18 100644 --- a/arch/arm/mach-tegra/tegra.c +++ b/arch/arm/mach-tegra/tegra.c @@ -73,6 +73,8 @@ u32 tegra_uart_config[3] = { static void __init tegra_init_early(void) { of_register_trusted_foundations(); + /* workaround to avoid FIFO regs freeze on T124 */ + writel(0x1, IO_ADDRESS(0x70019000 + 0x650)); tegra_apb_io_init(); tegra_init_fuse(); tegra_cpu_reset_handler_init(); diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig index 637c29a3312..b8834ad55eb 100644 --- a/drivers/gpu/drm/nouveau/Kconfig +++ b/drivers/gpu/drm/nouveau/Kconfig @@ -25,6 +25,14 @@ config DRM_NOUVEAU help Choose this option for open-source nVidia support. +config NOUVEAU_PLATFORM_DRIVER + bool + depends on DRM_NOUVEAU + default y if ARCH_TEGRA + help + Support for Nouveau platform driver, used for integrated GPUs as found + on NVIDIA Tegra K1. + config NOUVEAU_DEBUG int "Maximum debug level" depends on DRM_NOUVEAU diff --git a/drivers/gpu/drm/nouveau/Makefile b/drivers/gpu/drm/nouveau/Makefile index b7d21626477..5d1de264fc9 100644 --- a/drivers/gpu/drm/nouveau/Makefile +++ b/drivers/gpu/drm/nouveau/Makefile @@ -64,6 +64,7 @@ nouveau-y += core/subdev/clock/nva3.o nouveau-y += core/subdev/clock/nvaa.o nouveau-y += core/subdev/clock/nvc0.o nouveau-y += core/subdev/clock/nve0.o +nouveau-y += core/subdev/clock/gk20a.o nouveau-y += core/subdev/clock/pllnv04.o nouveau-y += core/subdev/clock/pllnva3.o nouveau-y += core/subdev/devinit/base.o @@ -102,6 +103,7 @@ nouveau-y += core/subdev/fb/nvaa.o nouveau-y += core/subdev/fb/nvaf.o nouveau-y += core/subdev/fb/nvc0.o nouveau-y += core/subdev/fb/nve0.o +nouveau-y += core/subdev/fb/gk20a.o nouveau-y += core/subdev/fb/gm107.o nouveau-y += core/subdev/fb/ramnv04.o nouveau-y += core/subdev/fb/ramnv10.o @@ -117,6 +119,7 @@ nouveau-y += core/subdev/fb/ramnva3.o nouveau-y += core/subdev/fb/ramnvaa.o nouveau-y += core/subdev/fb/ramnvc0.o nouveau-y += core/subdev/fb/ramnve0.o +nouveau-y += core/subdev/fb/ramgk20a.o nouveau-y += core/subdev/fb/ramgm107.o nouveau-y += core/subdev/fb/sddr3.o nouveau-y += core/subdev/fb/gddr5.o @@ -136,10 +139,12 @@ nouveau-y += core/subdev/i2c/nv94.o nouveau-y += core/subdev/i2c/nvd0.o nouveau-y += core/subdev/ibus/nvc0.o nouveau-y += core/subdev/ibus/nve0.o +nouveau-y += core/subdev/ibus/gk20a.o nouveau-y += core/subdev/instmem/base.o nouveau-y += core/subdev/instmem/nv04.o nouveau-y += core/subdev/instmem/nv40.o nouveau-y += core/subdev/instmem/nv50.o +nouveau-y += core/subdev/instmem/gk20a.o nouveau-y += core/subdev/ltcg/gf100.o nouveau-y += core/subdev/ltcg/gm107.o nouveau-y += core/subdev/mc/base.o @@ -245,6 +250,7 @@ nouveau-y += core/engine/fifo/nv50.o nouveau-y += core/engine/fifo/nv84.o nouveau-y += core/engine/fifo/nvc0.o nouveau-y += core/engine/fifo/nve0.o +nouveau-y += core/engine/fifo/gk20a.o nouveau-y += core/engine/fifo/nv108.o nouveau-y += core/engine/graph/ctxnv40.o nouveau-y += core/engine/graph/ctxnv50.o @@ -255,6 +261,7 @@ nouveau-y += core/engine/graph/ctxnvc8.o nouveau-y += core/engine/graph/ctxnvd7.o nouveau-y += core/engine/graph/ctxnvd9.o nouveau-y += core/engine/graph/ctxnve4.o +nouveau-y += core/engine/graph/ctxgk20a.o nouveau-y += core/engine/graph/ctxnvf0.o nouveau-y += core/engine/graph/ctxnv108.o nouveau-y += core/engine/graph/ctxgm107.o @@ -275,6 +282,7 @@ nouveau-y += core/engine/graph/nvc8.o nouveau-y += core/engine/graph/nvd7.o nouveau-y += core/engine/graph/nvd9.o nouveau-y += core/engine/graph/nve4.o +nouveau-y += core/engine/graph/gk20a.o nouveau-y += core/engine/graph/nvf0.o nouveau-y += core/engine/graph/nv108.o nouveau-y += core/engine/graph/gm107.o @@ -310,6 +318,7 @@ nouveau-y += nouveau_ttm.o nouveau_sgdma.o nouveau_bo.o nouveau_gem.o nouveau-y += nouveau_prime.o nouveau_abi16.o nouveau-y += nv04_fence.o nv10_fence.o nv17_fence.o nouveau-y += nv50_fence.o nv84_fence.o nvc0_fence.o +nouveau-y += nouveau_debug.o # drm/kms nouveau-y += nouveau_bios.o nouveau_fbcon.o nouveau_display.o @@ -325,6 +334,9 @@ nouveau-y += nv50_display.o # drm/pm nouveau-y += nouveau_hwmon.o nouveau_sysfs.o +# platform driver +nouveau-$(CONFIG_NOUVEAU_PLATFORM_DRIVER) += nouveau_platform.o + # other random bits nouveau-$(CONFIG_COMPAT) += nouveau_ioc32.o ifdef CONFIG_X86 diff --git a/drivers/gpu/drm/nouveau/core/engine/device/nve0.c b/drivers/gpu/drm/nouveau/core/engine/device/nve0.c index 9784cbf8a9d..5b4204f19de 100644 --- a/drivers/gpu/drm/nouveau/core/engine/device/nve0.c +++ b/drivers/gpu/drm/nouveau/core/engine/device/nve0.c @@ -156,6 +156,24 @@ nve0_identify(struct nouveau_device *device) device->oclass[NVDEV_ENGINE_PPP ] = &nvc0_ppp_oclass; device->oclass[NVDEV_ENGINE_PERFMON] = &nve0_perfmon_oclass; break; + case 0xea: + device->cname = "GK20A"; + device->oclass[NVDEV_SUBDEV_CLOCK ] = &gk20a_clock_oclass; + device->oclass[NVDEV_SUBDEV_MC ] = nvc3_mc_oclass; + device->oclass[NVDEV_SUBDEV_BUS ] = nvc0_bus_oclass; + device->oclass[NVDEV_SUBDEV_TIMER ] = &gk20a_timer_oclass; + device->oclass[NVDEV_SUBDEV_FB ] = gk20a_fb_oclass; + device->oclass[NVDEV_SUBDEV_IBUS ] = &gk20a_ibus_oclass; + device->oclass[NVDEV_SUBDEV_INSTMEM] = gk20a_instmem_oclass; + device->oclass[NVDEV_SUBDEV_VM ] = &nvc0_vmmgr_oclass; + device->oclass[NVDEV_SUBDEV_BAR ] = &nvc0_bar_oclass; + device->oclass[NVDEV_ENGINE_DMAOBJ ] = &nvd0_dmaeng_oclass; + device->oclass[NVDEV_ENGINE_FIFO ] = gk20a_fifo_oclass; + device->oclass[NVDEV_ENGINE_SW ] = nvc0_software_oclass; + device->oclass[NVDEV_ENGINE_GR ] = gk20a_graph_oclass; + device->oclass[NVDEV_ENGINE_COPY2 ] = &nve0_copy2_oclass; + device->oclass[NVDEV_ENGINE_PERFMON] = &nve0_perfmon_oclass; + break; case 0xf0: device->cname = "GK110"; device->oclass[NVDEV_SUBDEV_VBIOS ] = &nouveau_bios_oclass; diff --git a/drivers/gpu/drm/nouveau/core/engine/fifo/gk20a.c b/drivers/gpu/drm/nouveau/core/engine/fifo/gk20a.c new file mode 100644 index 00000000000..327456eae96 --- /dev/null +++ b/drivers/gpu/drm/nouveau/core/engine/fifo/gk20a.c @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "nve0.h" + +struct nouveau_oclass * +gk20a_fifo_oclass = &(struct nve0_fifo_impl) { + .base.handle = NV_ENGINE(FIFO, 0xea), + .base.ofuncs = &(struct nouveau_ofuncs) { + .ctor = nve0_fifo_ctor, + .dtor = nve0_fifo_dtor, + .init = nve0_fifo_init, + .fini = nve0_fifo_fini, + }, + .channels = 128, +}.base; diff --git a/drivers/gpu/drm/nouveau/core/engine/fifo/nve0.h b/drivers/gpu/drm/nouveau/core/engine/fifo/nve0.h index 014344ebee6..e96b32bb1bb 100644 --- a/drivers/gpu/drm/nouveau/core/engine/fifo/nve0.h +++ b/drivers/gpu/drm/nouveau/core/engine/fifo/nve0.h @@ -8,6 +8,7 @@ int nve0_fifo_ctor(struct nouveau_object *, struct nouveau_object *, struct nouveau_object **); void nve0_fifo_dtor(struct nouveau_object *); int nve0_fifo_init(struct nouveau_object *); +int nve0_fifo_fini(struct nouveau_object *, bool); struct nve0_fifo_impl { struct nouveau_oclass base; diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/ctxgk20a.c b/drivers/gpu/drm/nouveau/core/engine/graph/ctxgk20a.c new file mode 100644 index 00000000000..224ee0287ab --- /dev/null +++ b/drivers/gpu/drm/nouveau/core/engine/graph/ctxgk20a.c @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "ctxnvc0.h" + +static const struct nvc0_graph_pack +gk20a_grctx_pack_mthd[] = { + { nve4_grctx_init_a097_0, 0xa297 }, + { nvc0_grctx_init_902d_0, 0x902d }, + {} +}; + +struct nouveau_oclass * +gk20a_grctx_oclass = &(struct nvc0_grctx_oclass) { + .base.handle = NV_ENGCTX(GR, 0xea), + .base.ofuncs = &(struct nouveau_ofuncs) { + .ctor = nvc0_graph_context_ctor, + .dtor = nvc0_graph_context_dtor, + .init = _nouveau_graph_context_init, + .fini = _nouveau_graph_context_fini, + .rd32 = _nouveau_graph_context_rd32, + .wr32 = _nouveau_graph_context_wr32, + }, + .main = nve4_grctx_generate_main, + .mods = nve4_grctx_generate_mods, + .unkn = nve4_grctx_generate_unkn, + .hub = nve4_grctx_pack_hub, + .gpc = nve4_grctx_pack_gpc, + .zcull = nvc0_grctx_pack_zcull, + .tpc = nve4_grctx_pack_tpc, + .ppc = nve4_grctx_pack_ppc, + .icmd = nve4_grctx_pack_icmd, + .mthd = gk20a_grctx_pack_mthd, +}.base; diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/ctxnvc0.h b/drivers/gpu/drm/nouveau/core/engine/graph/ctxnvc0.h index 9c815d1f99e..8da8b627b9d 100644 --- a/drivers/gpu/drm/nouveau/core/engine/graph/ctxnvc0.h +++ b/drivers/gpu/drm/nouveau/core/engine/graph/ctxnvc0.h @@ -69,7 +69,9 @@ extern struct nouveau_oclass *nvd7_grctx_oclass; extern struct nouveau_oclass *nvd9_grctx_oclass; extern struct nouveau_oclass *nve4_grctx_oclass; +extern struct nouveau_oclass *gk20a_grctx_oclass; void nve4_grctx_generate_main(struct nvc0_graph_priv *, struct nvc0_grctx *); +void nve4_grctx_generate_mods(struct nvc0_graph_priv *, struct nvc0_grctx *); void nve4_grctx_generate_unkn(struct nvc0_graph_priv *); void nve4_grctx_generate_r418bb8(struct nvc0_graph_priv *); @@ -151,6 +153,13 @@ extern const struct nvc0_graph_init nve4_grctx_init_gpm_0[]; extern const struct nvc0_graph_init nve4_grctx_init_pes_0[]; +extern const struct nvc0_graph_pack nve4_grctx_pack_hub[]; +extern const struct nvc0_graph_pack nve4_grctx_pack_gpc[]; +extern const struct nvc0_graph_pack nve4_grctx_pack_tpc[]; +extern const struct nvc0_graph_pack nve4_grctx_pack_ppc[]; +extern const struct nvc0_graph_pack nve4_grctx_pack_icmd[]; +extern const struct nvc0_graph_init nve4_grctx_init_a097_0[]; + extern const struct nvc0_graph_pack nvf0_grctx_pack_mthd[]; extern const struct nvc0_graph_init nvf0_grctx_init_pri_0[]; diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/ctxnve4.c b/drivers/gpu/drm/nouveau/core/engine/graph/ctxnve4.c index 49a14b116a5..f722089945d 100644 --- a/drivers/gpu/drm/nouveau/core/engine/graph/ctxnve4.c +++ b/drivers/gpu/drm/nouveau/core/engine/graph/ctxnve4.c @@ -272,13 +272,13 @@ nve4_grctx_init_icmd_0[] = { {} }; -static const struct nvc0_graph_pack +const struct nvc0_graph_pack nve4_grctx_pack_icmd[] = { { nve4_grctx_init_icmd_0 }, {} }; -static const struct nvc0_graph_init +const struct nvc0_graph_init nve4_grctx_init_a097_0[] = { { 0x000800, 8, 0x40, 0x00000000 }, { 0x000804, 8, 0x40, 0x00000000 }, @@ -697,7 +697,7 @@ nve4_grctx_init_be_0[] = { {} }; -static const struct nvc0_graph_pack +const struct nvc0_graph_pack nve4_grctx_pack_hub[] = { { nvc0_grctx_init_main_0 }, { nve4_grctx_init_fe_0 }, @@ -737,7 +737,7 @@ nve4_grctx_init_gpm_0[] = { {} }; -static const struct nvc0_graph_pack +const struct nvc0_graph_pack nve4_grctx_pack_gpc[] = { { nvc0_grctx_init_gpc_unk_0 }, { nvd9_grctx_init_prop_0 }, @@ -802,7 +802,7 @@ nve4_grctx_init_sm_0[] = { {} }; -static const struct nvc0_graph_pack +const struct nvc0_graph_pack nve4_grctx_pack_tpc[] = { { nvd7_grctx_init_pe_0 }, { nve4_grctx_init_tex_0 }, @@ -826,7 +826,7 @@ nve4_grctx_init_cbm_0[] = { {} }; -static const struct nvc0_graph_pack +const struct nvc0_graph_pack nve4_grctx_pack_ppc[] = { { nve4_grctx_init_pes_0 }, { nve4_grctx_init_cbm_0 }, @@ -838,7 +838,7 @@ nve4_grctx_pack_ppc[] = { * PGRAPH context implementation ******************************************************************************/ -static void +void nve4_grctx_generate_mods(struct nvc0_graph_priv *priv, struct nvc0_grctx *info) { u32 magic[GPC_MAX][2]; @@ -957,7 +957,7 @@ nve4_grctx_generate_main(struct nvc0_graph_priv *priv, struct nvc0_grctx *info) struct nvc0_grctx_oclass *oclass = (void *)nv_engine(priv)->cclass; int i; - nv_mask(priv, 0x000260, 0x00000001, 0x00000000); + //nv_mask(priv, 0x000260, 0x00000001, 0x00000000); nvc0_graph_mmio(priv, oclass->hub); nvc0_graph_mmio(priv, oclass->gpc); @@ -991,7 +991,7 @@ nve4_grctx_generate_main(struct nvc0_graph_priv *priv, struct nvc0_grctx *info) nvc0_graph_icmd(priv, oclass->icmd); nv_wr32(priv, 0x404154, 0x00000400); nvc0_graph_mthd(priv, oclass->mthd); - nv_mask(priv, 0x000260, 0x00000001, 0x00000001); + //nv_mask(priv, 0x000260, 0x00000001, 0x00000001); nv_mask(priv, 0x418800, 0x00200000, 0x00200000); nv_mask(priv, 0x41be10, 0x00800000, 0x00800000); diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/gk20a.c b/drivers/gpu/drm/nouveau/core/engine/graph/gk20a.c new file mode 100644 index 00000000000..83048a56430 --- /dev/null +++ b/drivers/gpu/drm/nouveau/core/engine/graph/gk20a.c @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "nvc0.h" +#include "ctxnvc0.h" + +static struct nouveau_oclass +gk20a_graph_sclass[] = { + { 0x902d, &nouveau_object_ofuncs }, + { 0xa040, &nouveau_object_ofuncs }, + { 0xa297, &nouveau_object_ofuncs }, + { 0xa0c0, &nouveau_object_ofuncs }, + {} +}; + +struct nouveau_oclass * +gk20a_graph_oclass = &(struct nvc0_graph_oclass) { + .base.handle = NV_ENGINE(GR, 0xea), + .base.ofuncs = &(struct nouveau_ofuncs) { + .ctor = nvc0_graph_ctor, + .dtor = nvc0_graph_dtor, + .init = nve4_graph_init, + .fini = nve4_graph_fini, + }, + .cclass = &gk20a_grctx_oclass, + .sclass = gk20a_graph_sclass, + .mmio = nve4_graph_pack_mmio, +}.base; diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c b/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c index f3c7329da0a..7a6cd1e2f29 100644 --- a/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c +++ b/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c @@ -894,6 +894,10 @@ nvc0_graph_init_fw(struct nvc0_graph_priv *priv, u32 fuc_base, nv_wr32(priv, fuc_base + 0x0188, i >> 6); nv_wr32(priv, fuc_base + 0x0184, code->data[i]); } + + /* code must be padded to 0x40 words */ + for (; i & 0x3f; i++) + nv_wr32(priv, fuc_base + 0x0184, 0); } static void @@ -942,17 +946,17 @@ nvc0_graph_init_ctxctl(struct nvc0_graph_priv *priv) { struct nvc0_graph_oclass *oclass = (void *)nv_object(priv)->oclass; struct nvc0_grctx_oclass *cclass = (void *)nv_engine(priv)->cclass; - u32 r000260; + //u32 r000260; int i; if (priv->firmware) { /* load fuc microcode */ - r000260 = nv_mask(priv, 0x000260, 0x00000001, 0x00000000); + //r000260 = nv_mask(priv, 0x000260, 0x00000001, 0x00000000); nvc0_graph_init_fw(priv, 0x409000, &priv->fuc409c, &priv->fuc409d); nvc0_graph_init_fw(priv, 0x41a000, &priv->fuc41ac, &priv->fuc41ad); - nv_wr32(priv, 0x000260, r000260); + //nv_wr32(priv, 0x000260, r000260); /* start both of them running */ nv_wr32(priv, 0x409840, 0xffffffff); @@ -1039,7 +1043,7 @@ nvc0_graph_init_ctxctl(struct nvc0_graph_priv *priv) } /* load HUB microcode */ - r000260 = nv_mask(priv, 0x000260, 0x00000001, 0x00000000); + //r000260 = nv_mask(priv, 0x000260, 0x00000001, 0x00000000); nv_wr32(priv, 0x4091c0, 0x01000000); for (i = 0; i < oclass->fecs.ucode->data.size / 4; i++) nv_wr32(priv, 0x4091c4, oclass->fecs.ucode->data.data[i]); @@ -1062,7 +1066,7 @@ nvc0_graph_init_ctxctl(struct nvc0_graph_priv *priv) nv_wr32(priv, 0x41a188, i >> 6); nv_wr32(priv, 0x41a184, oclass->gpccs.ucode->code.data[i]); } - nv_wr32(priv, 0x000260, r000260); + //nv_wr32(priv, 0x000260, r000260); /* load register lists */ nvc0_graph_init_csdata(priv, cclass->hub, 0x409000, 0x000, 0x000000); @@ -1259,10 +1263,14 @@ nvc0_graph_ctor(struct nouveau_object *parent, struct nouveau_object *engine, struct nvc0_graph_oclass *oclass = (void *)bclass; struct nouveau_device *device = nv_device(parent); struct nvc0_graph_priv *priv; + bool use_ext_fw, enable; int ret, i; - ret = nouveau_graph_create(parent, engine, bclass, - (oclass->fecs.ucode != NULL), &priv); + use_ext_fw = nouveau_boolopt(device->cfgopt, "NvGrUseFW", + oclass->fecs.ucode == NULL); + enable = use_ext_fw || oclass->fecs.ucode != NULL; + + ret = nouveau_graph_create(parent, engine, bclass, enable, &priv); *pobject = nv_object(priv); if (ret) return ret; @@ -1272,7 +1280,7 @@ nvc0_graph_ctor(struct nouveau_object *parent, struct nouveau_object *engine, priv->base.units = nvc0_graph_units; - if (nouveau_boolopt(device->cfgopt, "NvGrUseFW", false)) { + if (use_ext_fw) { nv_info(priv, "using external firmware\n"); if (nvc0_graph_ctor_fw(priv, "fuc409c", &priv->fuc409c) || nvc0_graph_ctor_fw(priv, "fuc409d", &priv->fuc409d) || diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.h b/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.h index 90d44616c87..75203a99d90 100644 --- a/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.h +++ b/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.h @@ -116,6 +116,7 @@ int nvc0_graph_ctor(struct nouveau_object *, struct nouveau_object *, struct nouveau_object **); void nvc0_graph_dtor(struct nouveau_object *); int nvc0_graph_init(struct nouveau_object *); +int nve4_graph_fini(struct nouveau_object *, bool); int nve4_graph_init(struct nouveau_object *); extern struct nouveau_oclass nvc0_graph_sclass[]; @@ -217,6 +218,7 @@ extern const struct nvc0_graph_init nve4_graph_init_main_0[]; extern const struct nvc0_graph_init nve4_graph_init_tpccs_0[]; extern const struct nvc0_graph_init nve4_graph_init_pe_0[]; extern const struct nvc0_graph_init nve4_graph_init_be_0[]; +extern const struct nvc0_graph_pack nve4_graph_pack_mmio[]; extern const struct nvc0_graph_init nvf0_graph_init_fe_0[]; extern const struct nvc0_graph_init nvf0_graph_init_sked_0[]; diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/nve4.c b/drivers/gpu/drm/nouveau/core/engine/graph/nve4.c index f7c01121717..51e0c075ad3 100644 --- a/drivers/gpu/drm/nouveau/core/engine/graph/nve4.c +++ b/drivers/gpu/drm/nouveau/core/engine/graph/nve4.c @@ -151,7 +151,7 @@ nve4_graph_init_be_0[] = { {} }; -static const struct nvc0_graph_pack +const struct nvc0_graph_pack nve4_graph_pack_mmio[] = { { nve4_graph_init_main_0 }, { nvc0_graph_init_fe_0 }, @@ -189,7 +189,7 @@ nve4_graph_pack_mmio[] = { * PGRAPH engine/subdev functions ******************************************************************************/ -static int +int nve4_graph_fini(struct nouveau_object *object, bool suspend) { struct nvc0_graph_priv *priv = (void *)object; diff --git a/drivers/gpu/drm/nouveau/core/include/engine/fifo.h b/drivers/gpu/drm/nouveau/core/include/engine/fifo.h index 26b6b2bb111..b639eb2c74f 100644 --- a/drivers/gpu/drm/nouveau/core/include/engine/fifo.h +++ b/drivers/gpu/drm/nouveau/core/include/engine/fifo.h @@ -109,6 +109,7 @@ extern struct nouveau_oclass *nv50_fifo_oclass; extern struct nouveau_oclass *nv84_fifo_oclass; extern struct nouveau_oclass *nvc0_fifo_oclass; extern struct nouveau_oclass *nve0_fifo_oclass; +extern struct nouveau_oclass *gk20a_fifo_oclass; extern struct nouveau_oclass *nv108_fifo_oclass; void nv04_fifo_intr(struct nouveau_subdev *); diff --git a/drivers/gpu/drm/nouveau/core/include/engine/graph.h b/drivers/gpu/drm/nouveau/core/include/engine/graph.h index 871edfdf3d5..8c1d4772da0 100644 --- a/drivers/gpu/drm/nouveau/core/include/engine/graph.h +++ b/drivers/gpu/drm/nouveau/core/include/engine/graph.h @@ -68,6 +68,7 @@ extern struct nouveau_oclass *nvc8_graph_oclass; extern struct nouveau_oclass *nvd7_graph_oclass; extern struct nouveau_oclass *nvd9_graph_oclass; extern struct nouveau_oclass *nve4_graph_oclass; +extern struct nouveau_oclass *gk20a_graph_oclass; extern struct nouveau_oclass *nvf0_graph_oclass; extern struct nouveau_oclass *nv108_graph_oclass; extern struct nouveau_oclass *gm107_graph_oclass; diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/clock.h b/drivers/gpu/drm/nouveau/core/include/subdev/clock.h index 8f4ced75444..e330503f101 100644 --- a/drivers/gpu/drm/nouveau/core/include/subdev/clock.h +++ b/drivers/gpu/drm/nouveau/core/include/subdev/clock.h @@ -134,6 +134,7 @@ extern struct nouveau_oclass *nvaa_clock_oclass; extern struct nouveau_oclass nva3_clock_oclass; extern struct nouveau_oclass nvc0_clock_oclass; extern struct nouveau_oclass nve0_clock_oclass; +extern struct nouveau_oclass gk20a_clock_oclass; int nv04_clock_pll_set(struct nouveau_clock *, u32 type, u32 freq); int nv04_clock_pll_calc(struct nouveau_clock *, struct nvbios_pll *, diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/fb.h b/drivers/gpu/drm/nouveau/core/include/subdev/fb.h index 58c7ccdebb0..871e73914b2 100644 --- a/drivers/gpu/drm/nouveau/core/include/subdev/fb.h +++ b/drivers/gpu/drm/nouveau/core/include/subdev/fb.h @@ -105,6 +105,7 @@ extern struct nouveau_oclass *nvaa_fb_oclass; extern struct nouveau_oclass *nvaf_fb_oclass; extern struct nouveau_oclass *nvc0_fb_oclass; extern struct nouveau_oclass *nve0_fb_oclass; +extern struct nouveau_oclass *gk20a_fb_oclass; extern struct nouveau_oclass *gm107_fb_oclass; #include <subdev/bios/ramcfg.h> diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/ibus.h b/drivers/gpu/drm/nouveau/core/include/subdev/ibus.h index 88814f159d8..31df634c0fd 100644 --- a/drivers/gpu/drm/nouveau/core/include/subdev/ibus.h +++ b/drivers/gpu/drm/nouveau/core/include/subdev/ibus.h @@ -30,5 +30,6 @@ nouveau_ibus(void *obj) extern struct nouveau_oclass nvc0_ibus_oclass; extern struct nouveau_oclass nve0_ibus_oclass; +extern struct nouveau_oclass gk20a_ibus_oclass; #endif diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/instmem.h b/drivers/gpu/drm/nouveau/core/include/subdev/instmem.h index c1df26f3230..6264660bedc 100644 --- a/drivers/gpu/drm/nouveau/core/include/subdev/instmem.h +++ b/drivers/gpu/drm/nouveau/core/include/subdev/instmem.h @@ -48,5 +48,6 @@ nouveau_instmem(void *obj) extern struct nouveau_oclass *nv04_instmem_oclass; extern struct nouveau_oclass *nv40_instmem_oclass; extern struct nouveau_oclass *nv50_instmem_oclass; +extern struct nouveau_oclass *gk20a_instmem_oclass; #endif diff --git a/drivers/gpu/drm/nouveau/core/os.h b/drivers/gpu/drm/nouveau/core/os.h index d0ced94ca54..fc84a258f91 100644 --- a/drivers/gpu/drm/nouveau/core/os.h +++ b/drivers/gpu/drm/nouveau/core/os.h @@ -21,6 +21,7 @@ #include <linux/interrupt.h> #include <linux/log2.h> #include <linux/pm_runtime.h> +#include <asm/cacheflush.h> #include <asm/unaligned.h> @@ -38,4 +39,23 @@ #endif /* def __BIG_ENDIAN else */ #endif /* !ioread32_native */ +#if defined(__arm__) + +static inline void +nv_cpu_cache_flush_area(void *va, size_t size) +{ + phys_addr_t pa = virt_to_phys(va); + __cpuc_flush_dcache_area(va, size); + outer_flush_range(pa, pa + size); +} + +#else + +static inline void +nv_cpu_cache_flush_area(void *va, size_t size) +{ +} + +#endif /* defined(__arm__) */ + #endif diff --git a/drivers/gpu/drm/nouveau/core/subdev/bar/base.c b/drivers/gpu/drm/nouveau/core/subdev/bar/base.c index bdf594116f3..73b1ed20c8d 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/bar/base.c +++ b/drivers/gpu/drm/nouveau/core/subdev/bar/base.c @@ -118,8 +118,10 @@ nouveau_bar_create_(struct nouveau_object *parent, if (ret) return ret; - bar->iomem = ioremap(nv_device_resource_start(device, 3), - nv_device_resource_len(device, 3)); + if (nv_device_resource_len(device, 3) != 0) + bar->iomem = ioremap(nv_device_resource_start(device, 3), + nv_device_resource_len(device, 3)); + return 0; } diff --git a/drivers/gpu/drm/nouveau/core/subdev/bar/nvc0.c b/drivers/gpu/drm/nouveau/core/subdev/bar/nvc0.c index 3f30db62e65..ca8139b9ab2 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/bar/nvc0.c +++ b/drivers/gpu/drm/nouveau/core/subdev/bar/nvc0.c @@ -30,14 +30,16 @@ #include "priv.h" +struct nvc0_bar_priv_vm { + struct nouveau_gpuobj *mem; + struct nouveau_gpuobj *pgd; + struct nouveau_vm *vm; +}; + struct nvc0_bar_priv { struct nouveau_bar base; spinlock_t lock; - struct { - struct nouveau_gpuobj *mem; - struct nouveau_gpuobj *pgd; - struct nouveau_vm *vm; - } bar[2]; + struct nvc0_bar_priv_vm bar[2]; }; static int @@ -79,87 +81,87 @@ nvc0_bar_unmap(struct nouveau_bar *bar, struct nouveau_vma *vma) } static int -nvc0_bar_ctor(struct nouveau_object *parent, struct nouveau_object *engine, - struct nouveau_oclass *oclass, void *data, u32 size, - struct nouveau_object **pobject) +nvc0_bar_init_vm(struct nvc0_bar_priv *priv, struct nvc0_bar_priv_vm *bar_vm, + int bar_nr) { - struct nouveau_device *device = nv_device(parent); - struct nvc0_bar_priv *priv; - struct nouveau_gpuobj *mem; + struct nouveau_device *device = nv_device(&priv->base); struct nouveau_vm *vm; + resource_size_t bar_len; int ret; - ret = nouveau_bar_create(parent, engine, oclass, &priv); - *pobject = nv_object(priv); - if (ret) - return ret; - - /* BAR3 */ ret = nouveau_gpuobj_new(nv_object(priv), NULL, 0x1000, 0, 0, - &priv->bar[0].mem); - mem = priv->bar[0].mem; + &bar_vm->mem); if (ret) return ret; ret = nouveau_gpuobj_new(nv_object(priv), NULL, 0x8000, 0, 0, - &priv->bar[0].pgd); + &bar_vm->pgd); if (ret) return ret; - ret = nouveau_vm_new(device, 0, nv_device_resource_len(device, 3), 0, &vm); + bar_len = nv_device_resource_len(device, bar_nr); + + ret = nouveau_vm_new(device, 0, bar_len, 0, &vm); if (ret) return ret; atomic_inc(&vm->engref[NVDEV_SUBDEV_BAR]); - ret = nouveau_gpuobj_new(nv_object(priv), NULL, - (nv_device_resource_len(device, 3) >> 12) * 8, - 0x1000, NVOBJ_FLAG_ZERO_ALLOC, - &vm->pgt[0].obj[0]); - vm->pgt[0].refcount[0] = 1; - if (ret) - return ret; + /* + * Bootstrap page table lookup. + */ + if (bar_nr == 3) { + ret = nouveau_gpuobj_new(nv_object(priv), NULL, + (bar_len >> 12) * 8, 0x1000, + NVOBJ_FLAG_ZERO_ALLOC, + &vm->pgt[0].obj[0]); + vm->pgt[0].refcount[0] = 1; + if (ret) + return ret; + } - ret = nouveau_vm_ref(vm, &priv->bar[0].vm, priv->bar[0].pgd); + ret = nouveau_vm_ref(vm, &bar_vm->vm, bar_vm->pgd); nouveau_vm_ref(NULL, &vm, NULL); if (ret) return ret; - nv_wo32(mem, 0x0200, lower_32_bits(priv->bar[0].pgd->addr)); - nv_wo32(mem, 0x0204, upper_32_bits(priv->bar[0].pgd->addr)); - nv_wo32(mem, 0x0208, lower_32_bits(nv_device_resource_len(device, 3) - 1)); - nv_wo32(mem, 0x020c, upper_32_bits(nv_device_resource_len(device, 3) - 1)); + nv_wo32(bar_vm->mem, 0x0200, lower_32_bits(bar_vm->pgd->addr)); + nv_wo32(bar_vm->mem, 0x0204, upper_32_bits(bar_vm->pgd->addr)); + nv_wo32(bar_vm->mem, 0x0208, lower_32_bits(bar_len - 1)); + nv_wo32(bar_vm->mem, 0x020c, upper_32_bits(bar_len - 1)); - /* BAR1 */ - ret = nouveau_gpuobj_new(nv_object(priv), NULL, 0x1000, 0, 0, - &priv->bar[1].mem); - mem = priv->bar[1].mem; - if (ret) - return ret; + return 0; +} - ret = nouveau_gpuobj_new(nv_object(priv), NULL, 0x8000, 0, 0, - &priv->bar[1].pgd); - if (ret) - return ret; +static int +nvc0_bar_ctor(struct nouveau_object *parent, struct nouveau_object *engine, + struct nouveau_oclass *oclass, void *data, u32 size, + struct nouveau_object **pobject) +{ + struct nouveau_device *device = nv_device(parent); + struct nvc0_bar_priv *priv; + bool has_bar3 = nv_device_resource_len(device, 3) != 0; + int ret; - ret = nouveau_vm_new(device, 0, nv_device_resource_len(device, 1), 0, &vm); + ret = nouveau_bar_create(parent, engine, oclass, &priv); + *pobject = nv_object(priv); if (ret) return ret; - atomic_inc(&vm->engref[NVDEV_SUBDEV_BAR]); + /* BAR3 */ + if (has_bar3) { + ret = nvc0_bar_init_vm(priv, &priv->bar[0], 3); + if (ret) + return ret; + priv->base.alloc = nouveau_bar_alloc; + priv->base.kmap = nvc0_bar_kmap; + } - ret = nouveau_vm_ref(vm, &priv->bar[1].vm, priv->bar[1].pgd); - nouveau_vm_ref(NULL, &vm, NULL); + /* BAR1 */ + ret = nvc0_bar_init_vm(priv, &priv->bar[1], 1); if (ret) return ret; - nv_wo32(mem, 0x0200, lower_32_bits(priv->bar[1].pgd->addr)); - nv_wo32(mem, 0x0204, upper_32_bits(priv->bar[1].pgd->addr)); - nv_wo32(mem, 0x0208, lower_32_bits(nv_device_resource_len(device, 1) - 1)); - nv_wo32(mem, 0x020c, upper_32_bits(nv_device_resource_len(device, 1) - 1)); - - priv->base.alloc = nouveau_bar_alloc; - priv->base.kmap = nvc0_bar_kmap; priv->base.umap = nvc0_bar_umap; priv->base.unmap = nvc0_bar_unmap; priv->base.flush = nv84_bar_flush; @@ -201,7 +203,9 @@ nvc0_bar_init(struct nouveau_object *object) nv_mask(priv, 0x100c80, 0x00000001, 0x00000000); nv_wr32(priv, 0x001704, 0x80000000 | priv->bar[1].mem->addr >> 12); - nv_wr32(priv, 0x001714, 0xc0000000 | priv->bar[0].mem->addr >> 12); + if (priv->bar[0].mem) + nv_wr32(priv, 0x001714, + 0xc0000000 | priv->bar[0].mem->addr >> 12); return 0; } diff --git a/drivers/gpu/drm/nouveau/core/subdev/clock/gk20a.c b/drivers/gpu/drm/nouveau/core/subdev/clock/gk20a.c new file mode 100644 index 00000000000..ac80513ddea --- /dev/null +++ b/drivers/gpu/drm/nouveau/core/subdev/clock/gk20a.c @@ -0,0 +1,496 @@ +/* + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Shamelessly ripped off from ChromeOS's gk20a/clk_pllg.c + * + */ + +#define MHZ (1000 * 1000) + +#define MASK(w) ((1 << w) - 1) + +#define SYS_GPCPLL_CFG_BASE 0x00137000 +#define GPC_BCASE_GPCPLL_CFG_BASE 0x00132800 + +#define GPCPLL_CFG (SYS_GPCPLL_CFG_BASE + 0) +#define GPCPLL_CFG_ENABLE BIT(0) +#define GPCPLL_CFG_IDDQ BIT(1) +#define GPCPLL_CFG_LOCK_DET_OFF BIT(4) +#define GPCPLL_CFG_LOCK BIT(17) + +#define GPCPLL_COEFF (SYS_GPCPLL_CFG_BASE + 4) +#define GPCPLL_COEFF_M_SHIFT 0 +#define GPCPLL_COEFF_N_SHIFT 8 +#define GPCPLL_COEFF_P_SHIFT 16 + +#define GPCPLL_CFG2 (SYS_GPCPLL_CFG_BASE + 0xc) +#define GPCPLL_CFG2_SETUP2_SHIFT 16 +#define GPCPLL_CFG2_PLL_STEPA_SHIFT 24 + +#define GPCPLL_CFG3 (SYS_GPCPLL_CFG_BASE + 0x18) +#define GPCPLL_CFG3_PLL_STEPB_SHIFT 16 + +#define GPCPLL_NDIV_SLOWDOWN (SYS_GPCPLL_CFG_BASE + 0x1c) +#define GPCPLL_NDIV_SLOWDOWN_NDIV_LO_SHIFT 0 +#define GPCPLL_NDIV_SLOWDOWN_NDIV_MID_SHIFT 8 +#define GPCPLL_NDIV_SLOWDOWN_STEP_SIZE_LO2MID_SHIFT 16 +#define GPCPLL_NDIV_SLOWDOWN_SLOWDOWN_USING_PLL_SHIFT 22 +#define GPCPLL_NDIV_SLOWDOWN_EN_DYNRAMP_SHIFT 31 + +#define SEL_VCO (SYS_GPCPLL_CFG_BASE + 0x100) +#define SEL_VCO_GPC2CLK_OUT_SHIFT 0 + +#define GPC2CLK_OUT (SYS_GPCPLL_CFG_BASE + 0x250) +#define GPC2CLK_OUT_SDIV14_INDIV4_WIDTH 1 +#define GPC2CLK_OUT_SDIV14_INDIV4_SHIFT 31 +#define GPC2CLK_OUT_SDIV14_INDIV4_MODE 1 +#define GPC2CLK_OUT_VCODIV_WIDTH 6 +#define GPC2CLK_OUT_VCODIV_SHIFT 8 +#define GPC2CLK_OUT_VCODIV1 0 +#define GPC2CLK_OUT_VCODIV_MASK (MASK(GPC2CLK_OUT_VCODIV_WIDTH) << \ + GPC2CLK_OUT_VCODIV_SHIFT) +#define GPC2CLK_OUT_BYPDIV_WIDTH 6 +#define GPC2CLK_OUT_BYPDIV_SHIFT 0 +#define GPC2CLK_OUT_BYPDIV31 0x3c +#define GPC2CLK_OUT_INIT_MASK ((MASK(GPC2CLK_OUT_SDIV14_INDIV4_WIDTH) << \ + GPC2CLK_OUT_SDIV14_INDIV4_SHIFT)\ + | (MASK(GPC2CLK_OUT_VCODIV_WIDTH) << GPC2CLK_OUT_VCODIV_SHIFT)\ + | (MASK(GPC2CLK_OUT_BYPDIV_WIDTH) << GPC2CLK_OUT_BYPDIV_SHIFT)) +#define GPC2CLK_OUT_INIT_VAL ((GPC2CLK_OUT_SDIV14_INDIV4_MODE << \ + GPC2CLK_OUT_SDIV14_INDIV4_SHIFT) \ + | (GPC2CLK_OUT_VCODIV1 << GPC2CLK_OUT_VCODIV_SHIFT) \ + | (GPC2CLK_OUT_BYPDIV31 << GPC2CLK_OUT_BYPDIV_SHIFT)) + +#define GPC_BCAST_NDIV_SLOWDOWN_DEBUG (GPC_BCASE_GPCPLL_CFG_BASE + 0xa0) +#define GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_SHIFT 24 +#define GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_MASK \ + (0x1 << GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_SHIFT) + +#include <linux/types.h> + +static u8 pl_to_div[] = { +/* PL: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 */ +/* p: */ 1, 2, 3, 4, 5, 6, 8, 10, 12, 16, 12, 16, 20, 24, 32, +}; + +struct gk20a_clk_pllg_params { + u32 min_freq, max_freq; /* MHz */ + u32 min_vco, max_vco; /* MHz */ + u32 min_u, max_u; /* MHz */ + u32 min_m, max_m; + u32 min_n, max_n; + u32 min_pl, max_pl; +}; + +const struct gk20a_clk_pllg_params gk20a_pllg_params = { + .min_freq = 144, .max_freq = 2064, + .min_vco = 1000, .max_vco = 2064, + .min_u = 12, .max_u = 38, + .min_m = 1, .max_m = 255, + .min_n = 8, .max_n = 255, + .min_pl = 1, .max_pl = 32, +}; + +#include <subdev/clock.h> +#include <subdev/timer.h> + +#include <nouveau_platform.h> + +#include <linux/clk.h> +#include <linux/delay.h> + +struct gk20a_clock_priv { + struct nouveau_clock base; + const struct gk20a_clk_pllg_params *params; + u32 m, n, pl; + unsigned long parent_rate; + bool enabled; + bool init; +}; +#define to_gk20a_clock(base) container_of(base, struct gk20a_clock_priv, base) + +static unsigned long +gk20a_pllg_calc_rate(struct gk20a_clock_priv *priv) +{ + unsigned long rate; + int divider; + + rate = priv->parent_rate * priv->n; + divider = priv->m * pl_to_div[priv->pl]; + do_div(rate, divider); + + return rate; +} + +static int +gk20a_pllg_calc_mnp(struct gk20a_clock_priv *priv, unsigned long rate) +{ + unsigned int target_clk_f, ref_clk_f, target_freq; + unsigned int min_vco_f, max_vco_f; + u32 low_pl, high_pl, best_pl; + unsigned int target_vco_f, vco_f; + u32 best_m, best_n; + unsigned int u_f; + u32 m, n, n2; + u32 delta, lwv, best_delta = ~0; + int pl; + + target_clk_f = rate * 2 / MHZ; + ref_clk_f = priv->parent_rate / MHZ; + + max_vco_f = priv->params->max_vco; + min_vco_f = priv->params->min_vco; + best_m = priv->params->max_m; + best_n = priv->params->min_n; + best_pl = priv->params->min_pl; + + target_vco_f = target_clk_f + target_clk_f / 50; + if (max_vco_f < target_vco_f) + max_vco_f = target_vco_f; + + /* min_pl <= high_pl <= max_pl */ + high_pl = (max_vco_f + target_vco_f - 1) / target_vco_f; + high_pl = min(high_pl, priv->params->max_pl); + high_pl = max(high_pl, priv->params->min_pl); + + /* min_pl <= low_pl <= max_pl */ + low_pl = min_vco_f / target_vco_f; + low_pl = min(low_pl, priv->params->max_pl); + low_pl = max(low_pl, priv->params->min_pl); + + /* Find Indices of high_pl and low_pl */ + for (pl = 0; pl < ARRAY_SIZE(pl_to_div) - 1; pl++) { + if (pl_to_div[pl] >= low_pl) { + low_pl = pl; + break; + } + } + for (pl = 0; pl < ARRAY_SIZE(pl_to_div) - 1; pl++) { + if (pl_to_div[pl] >= high_pl) { + high_pl = pl; + break; + } + } + + /* Select lowest possible VCO */ + for (pl = low_pl; pl <= high_pl; pl++) { + target_vco_f = target_clk_f * pl_to_div[pl]; + for (m = priv->params->min_m; m <= priv->params->max_m; m++) { + u_f = ref_clk_f / m; + + if (u_f < priv->params->min_u) + break; + if (u_f > priv->params->max_u) + continue; + + n = (target_vco_f * m) / ref_clk_f; + n2 = ((target_vco_f * m) + (ref_clk_f - 1)) / ref_clk_f; + + if (n > priv->params->max_n) + break; + + for (; n <= n2; n++) { + if (n < priv->params->min_n) + continue; + if (n > priv->params->max_n) + break; + + vco_f = ref_clk_f * n / m; + + if (vco_f >= min_vco_f && vco_f <= max_vco_f) { + lwv = (vco_f + (pl_to_div[pl] / 2)) + / pl_to_div[pl]; + delta = abs(lwv - target_clk_f); + + if (delta < best_delta) { + best_delta = delta; + best_m = m; + best_n = n; + best_pl = pl; + + if (best_delta == 0) + goto found_match; + } + } + } + } + } + +found_match: + WARN_ON(best_delta == ~0); + + if (best_delta != 0) + nv_debug(priv, "no best match for target @ %dMHz on gpc_pll", + target_clk_f); + + priv->m = best_m; + priv->n = best_n; + priv->pl = best_pl; + + target_freq = gk20a_pllg_calc_rate(priv) / MHZ; + + nv_debug(priv, "actual target freq %d MHz, M %d, N %d, PL %d(div%d)\n", + target_freq, priv->m, priv->n, priv->pl, pl_to_div[priv->pl]); + + return 0; +} + +static int +gk20a_pllg_slide(struct gk20a_clock_priv *priv, u32 n) +{ + u32 val; + int ramp_timeout; + + /* get old coefficients */ + val = nv_rd32(priv, GPCPLL_COEFF); + /* do nothing if NDIV is the same */ + if (n == ((val >> GPCPLL_COEFF_N_SHIFT) & 0xff)) + return 0; + + /* setup */ + nv_mask(priv, GPCPLL_CFG2, 0xff << GPCPLL_CFG2_PLL_STEPA_SHIFT, + 0x2b << GPCPLL_CFG2_PLL_STEPA_SHIFT); + nv_mask(priv, GPCPLL_CFG3, 0xff << GPCPLL_CFG3_PLL_STEPB_SHIFT, + 0xb << GPCPLL_CFG3_PLL_STEPB_SHIFT); + + /* pll slowdown mode */ + nv_mask(priv, GPCPLL_NDIV_SLOWDOWN, + BIT(GPCPLL_NDIV_SLOWDOWN_SLOWDOWN_USING_PLL_SHIFT), + BIT(GPCPLL_NDIV_SLOWDOWN_SLOWDOWN_USING_PLL_SHIFT)); + + /* new ndiv ready for ramp */ + val = nv_rd32(priv, GPCPLL_COEFF); + val &= ~(0xff << GPCPLL_COEFF_N_SHIFT); + val |= (n & 0xff) << GPCPLL_COEFF_N_SHIFT; + udelay(1); + nv_wr32(priv, GPCPLL_COEFF, val); + + /* dynamic ramp to new ndiv */ + val = nv_rd32(priv, GPCPLL_NDIV_SLOWDOWN); + val |= 0x1 << GPCPLL_NDIV_SLOWDOWN_EN_DYNRAMP_SHIFT; + udelay(1); + nv_wr32(priv, GPCPLL_NDIV_SLOWDOWN, val); + + for (ramp_timeout = 500; ramp_timeout > 0; ramp_timeout--) { + udelay(1); + val = nv_rd32(priv, GPC_BCAST_NDIV_SLOWDOWN_DEBUG); + if (val & GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_MASK) + break; + } + + /* exit slowdown mode */ + nv_mask(priv, GPCPLL_NDIV_SLOWDOWN, + BIT(GPCPLL_NDIV_SLOWDOWN_SLOWDOWN_USING_PLL_SHIFT) | + BIT(GPCPLL_NDIV_SLOWDOWN_EN_DYNRAMP_SHIFT), 0); + nv_rd32(priv, GPCPLL_NDIV_SLOWDOWN); + + if (ramp_timeout <= 0) { + nv_error(priv, "gpcpll dynamic ramp timeout\n"); + return -ETIMEDOUT; + } + + return 0; +} + +static void +_gk20a_pllg_disable(struct gk20a_clock_priv *priv) +{ + /* disable PLL */ + nv_mask(priv, GPCPLL_CFG, GPCPLL_CFG_ENABLE, 0); + nv_rd32(priv, GPCPLL_CFG); +} + + +static int +_gk20a_pllg_program_mnp(struct gk20a_clock_priv *priv, bool allow_slide) +{ + u32 val, cfg; + u32 m_old, pl_old, n_lo; + + /* get old coefficients */ + val = nv_rd32(priv, GPCPLL_COEFF); + m_old = (val >> GPCPLL_COEFF_M_SHIFT) & 0xff; + pl_old = (val >> GPCPLL_COEFF_P_SHIFT) & 0xff; + + /* do NDIV slide if there is no change in M and PL */ + cfg = nv_rd32(priv, GPCPLL_CFG); + if (allow_slide && priv->m == m_old && priv->pl == pl_old && + (cfg & GPCPLL_CFG_ENABLE)) { + return gk20a_pllg_slide(priv, priv->n); + } + + /* slide down to NDIV_LO */ + n_lo = DIV_ROUND_UP(m_old * priv->params->min_vco, + priv->parent_rate / MHZ); + if (allow_slide && (cfg & GPCPLL_CFG_ENABLE)) { + int ret = gk20a_pllg_slide(priv, n_lo); + if (ret) + return ret; + } + + /* split FO-to-bypass jump in halfs by setting out divider 1:2 */ + nv_mask(priv, GPC2CLK_OUT, GPC2CLK_OUT_VCODIV_MASK, + 0x2 << GPC2CLK_OUT_VCODIV_SHIFT); + + /* put PLL in bypass before programming it */ + val = nv_rd32(priv, SEL_VCO); + val &= ~(0x1 << SEL_VCO_GPC2CLK_OUT_SHIFT); + udelay(2); + nv_wr32(priv, SEL_VCO, val); + + /* get out from IDDQ */ + val = nv_rd32(priv, GPCPLL_CFG); + if (val & GPCPLL_CFG_IDDQ) { + val &= ~GPCPLL_CFG_IDDQ; + nv_wr32(priv, GPCPLL_CFG, val); + nv_rd32(priv, GPCPLL_CFG); + udelay(2); + } + + _gk20a_pllg_disable(priv); + + nv_debug(priv, "%s: m=%d n=%d pl=%d\n", __func__, priv->m, priv->n, + priv->pl); + + n_lo = DIV_ROUND_UP(priv->m * priv->params->min_vco, + priv->parent_rate / MHZ); + val = priv->m << GPCPLL_COEFF_M_SHIFT; + val |= (allow_slide ? n_lo : priv->n) << GPCPLL_COEFF_N_SHIFT; + val |= priv->pl << GPCPLL_COEFF_P_SHIFT; + nv_wr32(priv, GPCPLL_COEFF, val); + + /* enable PLL */ + nv_mask(priv, GPCPLL_CFG, GPCPLL_CFG_ENABLE, GPCPLL_CFG_ENABLE); + + val = nv_rd32(priv, GPCPLL_CFG); + if (val & GPCPLL_CFG_LOCK_DET_OFF) { + val &= ~GPCPLL_CFG_LOCK_DET_OFF; + nv_wr32(priv, GPCPLL_CFG, val); + } + + if (!nouveau_timer_wait_eq(priv, 300000, GPCPLL_CFG, GPCPLL_CFG_LOCK, + GPCPLL_CFG_LOCK)) { + nv_error(priv, "%s: timeout waiting for pllg lock\n", __func__); + return -ETIMEDOUT; + } + + /* switch to VCO mode */ + nv_mask(priv, SEL_VCO, 0, BIT(SEL_VCO_GPC2CLK_OUT_SHIFT)); + + /* restore out divider 1:1 */ + val = nv_rd32(priv, GPC2CLK_OUT); + val &= ~GPC2CLK_OUT_VCODIV_MASK; + udelay(2); + nv_wr32(priv, GPC2CLK_OUT, val); + + /* slide up to new NDIV */ + return allow_slide ? gk20a_pllg_slide(priv, priv->n) : 0; +} + +static int +gk20a_pllg_program_mnp(struct gk20a_clock_priv *priv) +{ + int err; + + err = _gk20a_pllg_program_mnp(priv, true); + if (err) + err = _gk20a_pllg_program_mnp(priv, false); + + return err; +} + +static void +gk20a_pllg_init(struct gk20a_clock_priv *priv) +{ + nv_mask(priv, GPC2CLK_OUT, GPC2CLK_OUT_INIT_MASK, GPC2CLK_OUT_INIT_VAL); + + priv->init = true; +} + +static int +gk20a_pllg_enable(struct gk20a_clock_priv *priv) +{ + int err; + + if (!priv->init) + gk20a_pllg_init(priv); + + err = gk20a_pllg_program_mnp(priv); + if (!err) + priv->enabled = true; + + return err; +} + +static int +gk20a_clock_init(struct nouveau_object *object) +{ + struct gk20a_clock_priv *priv = (void *)object; + int ret; + + ret = gk20a_pllg_calc_mnp(priv, 72 * MHZ); + if (ret) { + nv_error(priv, "cannot compute clock parameters\n"); + return ret; + } + + ret = gk20a_pllg_enable(priv); + if (ret) { + nv_error(priv, "cannot initialize PLLG\n"); + return ret; + } + + return 0; +} + +static int +gk20a_clock_ctor(struct nouveau_object *parent, struct nouveau_object *engine, + struct nouveau_oclass *oclass, void *data, u32 size, + struct nouveau_object **pobject) +{ + struct gk20a_clock_priv *priv; + struct nouveau_platform_gpu *plat = to_platform_gpu(nv_device(parent)); + int ret; + + ret = nouveau_subdev_create(parent, engine, oclass, 0, "CLK", "clock", + &priv); + *pobject = nv_object(priv); + if (ret) + return ret; + + priv->params = &gk20a_pllg_params; + priv->parent_rate = clk_get_rate(plat->clk); + + nv_info(priv, "parent clk rate: %ld\n", priv->parent_rate); + + return 0; +} + +struct nouveau_oclass +gk20a_clock_oclass = { + .handle = NV_SUBDEV(CLOCK, 0xea), + .ofuncs = &(struct nouveau_ofuncs) { + .ctor = gk20a_clock_ctor, + .dtor = _nouveau_subdev_dtor, + .init = gk20a_clock_init, + .fini = _nouveau_subdev_fini, + }, +}; diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/gk20a.c b/drivers/gpu/drm/nouveau/core/subdev/fb/gk20a.c new file mode 100644 index 00000000000..a16024a7477 --- /dev/null +++ b/drivers/gpu/drm/nouveau/core/subdev/fb/gk20a.c @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "nvc0.h" + +struct gk20a_fb_priv { + struct nouveau_fb base; +}; + +static int +gk20a_fb_ctor(struct nouveau_object *parent, struct nouveau_object *engine, + struct nouveau_oclass *oclass, void *data, u32 size, + struct nouveau_object **pobject) +{ + struct gk20a_fb_priv *priv; + int ret; + + ret = nouveau_fb_create(parent, engine, oclass, &priv); + *pobject = nv_object(priv); + if (ret) + return ret; + + return 0; +} + +struct nouveau_oclass * +gk20a_fb_oclass = &(struct nouveau_fb_impl) { + .base.handle = NV_SUBDEV(FB, 0xea), + .base.ofuncs = &(struct nouveau_ofuncs) { + .ctor = gk20a_fb_ctor, + .dtor = _nouveau_fb_dtor, + .init = _nouveau_fb_init, + .fini = _nouveau_fb_fini, + }, + .memtype = nvc0_fb_memtype_valid, + .ram = &gk20a_ram_oclass, +}.base; diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/priv.h b/drivers/gpu/drm/nouveau/core/subdev/fb/priv.h index da74c889aed..82273f832e4 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/fb/priv.h +++ b/drivers/gpu/drm/nouveau/core/subdev/fb/priv.h @@ -32,6 +32,7 @@ extern struct nouveau_oclass nva3_ram_oclass; extern struct nouveau_oclass nvaa_ram_oclass; extern struct nouveau_oclass nvc0_ram_oclass; extern struct nouveau_oclass nve0_ram_oclass; +extern struct nouveau_oclass gk20a_ram_oclass; extern struct nouveau_oclass gm107_ram_oclass; int nouveau_sddr3_calc(struct nouveau_ram *ram); diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/ramgk20a.c b/drivers/gpu/drm/nouveau/core/subdev/fb/ramgk20a.c new file mode 100644 index 00000000000..1b8b91dc5ba --- /dev/null +++ b/drivers/gpu/drm/nouveau/core/subdev/fb/ramgk20a.c @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "priv.h" +#include "ramgk20a.h" + +#include <subdev/fb.h> + +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/dma-mapping.h> + +static void +gk20a_ram_put(struct nouveau_fb *pfb, struct nouveau_mem **pmem) +{ + struct device *dev = nv_device_base(nv_device(pfb)); + struct gk20a_mem *mem = to_gk20a_mem(*pmem); + + *pmem = NULL; + if (unlikely(mem == NULL)) + return; + + if (likely(mem->cpuaddr)) + dma_free_coherent(dev, mem->base.size << PAGE_SHIFT, + mem->cpuaddr, mem->handle); + + kfree(mem->base.pages); + kfree(mem); +} + +static int +gk20a_ram_get(struct nouveau_fb *pfb, u64 size, u32 align, u32 ncmin, + u32 memtype, struct nouveau_mem **pmem) +{ + struct device *dev = nv_device_base(nv_device(pfb)); + struct gk20a_mem *mem; + u32 type = memtype & 0xff; + u32 npages, order; + int i; + + nv_debug(pfb, "%s: size: %llx align: %x, ncmin: %x\n", __func__, size, + align, ncmin); + + npages = size >> PAGE_SHIFT; + if (npages == 0) + npages = 1; + + if (align == 0) + align = PAGE_SIZE; + align >>= PAGE_SHIFT; + + /* round alignment to the next power of 2, if needed */ + order = fls(align); + if ((align & (align - 1)) == 0) + order--; + align = BIT(order); + + /* ensure returned address is correctly aligned */ + npages = max(align, npages); + + mem = kzalloc(sizeof(*mem), GFP_KERNEL); + if (!mem) + return -ENOMEM; + + mem->base.size = npages; + mem->base.memtype = type; + + mem->base.pages = kzalloc(sizeof(dma_addr_t) * npages, GFP_KERNEL); + if (!mem->base.pages) { + kfree(mem); + return -ENOMEM; + } + + *pmem = &mem->base; + + mem->cpuaddr = dma_alloc_coherent(dev, npages << PAGE_SHIFT, + &mem->handle, GFP_KERNEL); + if (!mem->cpuaddr) { + nv_error(pfb, "%s: cannot allocate memory!\n", __func__); + gk20a_ram_put(pfb, pmem); + return -ENOMEM; + } + + align <<= PAGE_SHIFT; + + /* alignment check */ + if (unlikely(mem->handle & (align - 1))) + nv_warn(pfb, "memory not aligned as requested: %pad (0x%x)\n", + &mem->handle, align); + + nv_debug(pfb, "alloc size: 0x%x, align: 0x%x, paddr: %pad, vaddr: %p\n", + npages << PAGE_SHIFT, align, &mem->handle, mem->cpuaddr); + + for (i = 0; i < npages; i++) + mem->base.pages[i] = mem->handle + (PAGE_SIZE * i); + + mem->base.offset = (u64)mem->base.pages[0]; + + return 0; +} + +static int +gk20a_ram_ctor(struct nouveau_object *parent, struct nouveau_object *engine, + struct nouveau_oclass *oclass, void *data, u32 datasize, + struct nouveau_object **pobject) +{ + struct nouveau_ram *ram; + int ret; + + ret = nouveau_ram_create(parent, engine, oclass, &ram); + *pobject = nv_object(ram); + if (ret) + return ret; + ram->type = NV_MEM_TYPE_STOLEN; + ram->size = get_num_physpages() << PAGE_SHIFT; + + ram->get = gk20a_ram_get; + ram->put = gk20a_ram_put; + + return 0; +} + +struct nouveau_oclass +gk20a_ram_oclass = { + .ofuncs = &(struct nouveau_ofuncs) { + .ctor = gk20a_ram_ctor, + .dtor = _nouveau_ram_dtor, + .init = _nouveau_ram_init, + .fini = _nouveau_ram_fini, + }, +}; diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/ramgk20a.h b/drivers/gpu/drm/nouveau/core/subdev/fb/ramgk20a.h new file mode 100644 index 00000000000..105ad258169 --- /dev/null +++ b/drivers/gpu/drm/nouveau/core/subdev/fb/ramgk20a.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + + +#ifndef __NVKM_FB_RAMGK20A_H__ +#define __NVKM_FB_RAMGK20A_H__ + +#include <linux/types.h> +#include <subdev/fb.h> + +struct gk20a_mem { + struct nouveau_mem base; + void *cpuaddr; + dma_addr_t handle; +}; + +#define to_gk20a_mem(m) container_of(m, struct gk20a_mem, base) + +#endif diff --git a/drivers/gpu/drm/nouveau/core/subdev/ibus/gk20a.c b/drivers/gpu/drm/nouveau/core/subdev/ibus/gk20a.c new file mode 100644 index 00000000000..245f0ebaa6a --- /dev/null +++ b/drivers/gpu/drm/nouveau/core/subdev/ibus/gk20a.c @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include <subdev/ibus.h> +#include <subdev/timer.h> + +struct gk20a_ibus_priv { + struct nouveau_ibus base; +}; + +static void +gk20a_ibus_init_priv_ring(struct gk20a_ibus_priv *priv) +{ + nv_mask(priv, 0x137250, 0x3f, 0); + + nv_mask(priv, 0x000200, 0x20, 0); + usleep_range(20, 30); + nv_mask(priv, 0x000200, 0x20, 0x20); + + nv_wr32(priv, 0x12004c, 0x4); + nv_wr32(priv, 0x122204, 0x2); + nv_rd32(priv, 0x122204); +} + +static void +gk20a_ibus_intr(struct nouveau_subdev *subdev) +{ + struct gk20a_ibus_priv *priv = (void *)subdev; + u32 status0 = nv_rd32(priv, 0x120058); + + if (status0 & 0x7) { + nv_debug(priv, "resetting priv ring\n"); + gk20a_ibus_init_priv_ring(priv); + } + + /* Acknowledge interrupt */ + nv_mask(priv, 0x12004c, 0x2, 0x2); + + if (!nv_wait(subdev, 0x12004c, 0x3f, 0x00)) + nv_warn(priv, "timeout waiting for ringmaster ack\n"); +} + +static int +gk20a_ibus_init(struct nouveau_object *object) +{ + struct gk20a_ibus_priv *priv = (void *)object; + int ret; + + ret = _nouveau_ibus_init(object); + if (ret) + return ret; + + gk20a_ibus_init_priv_ring(priv); + + return 0; +} + +static int +gk20a_ibus_ctor(struct nouveau_object *parent, struct nouveau_object *engine, + struct nouveau_oclass *oclass, void *data, u32 size, + struct nouveau_object **pobject) +{ + struct gk20a_ibus_priv *priv; + int ret; + + ret = nouveau_ibus_create(parent, engine, oclass, &priv); + *pobject = nv_object(priv); + if (ret) + return ret; + + nv_subdev(priv)->intr = gk20a_ibus_intr; + return 0; +} + +struct nouveau_oclass +gk20a_ibus_oclass = { + .handle = NV_SUBDEV(IBUS, 0xea), + .ofuncs = &(struct nouveau_ofuncs) { + .ctor = gk20a_ibus_ctor, + .dtor = _nouveau_ibus_dtor, + .init = gk20a_ibus_init, + .fini = _nouveau_ibus_fini, + }, +}; diff --git a/drivers/gpu/drm/nouveau/core/subdev/instmem/gk20a.c b/drivers/gpu/drm/nouveau/core/subdev/instmem/gk20a.c new file mode 100644 index 00000000000..0bcbba09eb0 --- /dev/null +++ b/drivers/gpu/drm/nouveau/core/subdev/instmem/gk20a.c @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include <subdev/fb.h> + +#include "priv.h" +#include "core/subdev/fb/ramgk20a.h" + +struct gk20a_instmem_priv { + struct nouveau_instmem base; +}; + +struct gk20a_instobj_priv { + struct nouveau_instobj base; + struct nouveau_mem *mem; +}; + +static u32 +gk20a_instobj_rd32(struct nouveau_object *object, u64 offset) +{ + struct gk20a_instobj_priv *node = (void *)object; + struct gk20a_mem *mem = to_gk20a_mem(node->mem); + + return ((u32 *)mem->cpuaddr)[offset / 4]; +} + +static void +gk20a_instobj_wr32(struct nouveau_object *object, u64 offset, u32 data) +{ + struct gk20a_instobj_priv *node = (void *)object; + struct gk20a_mem *mem = to_gk20a_mem(node->mem); + + ((u32 *)mem->cpuaddr)[offset / 4] = data; +} + +static void +gk20a_instobj_dtor(struct nouveau_object *object) +{ + struct gk20a_instobj_priv *node = (void *)object; + struct nouveau_fb *pfb = nouveau_fb(object); + + pfb->ram->put(pfb, &node->mem); + nouveau_instobj_destroy(&node->base); +} + +static int +gk20a_instobj_ctor(struct nouveau_object *parent, struct nouveau_object *engine, + struct nouveau_oclass *oclass, void *data, u32 size, + struct nouveau_object **pobject) +{ + struct nouveau_fb *pfb = nouveau_fb(parent); + struct nouveau_instobj_args *args = data; + struct gk20a_instobj_priv *node; + int ret; + + args->size = max((args->size + 4095) & ~4095, (u32)4096); + args->align = max((args->align + 4095) & ~4095, (u32)4096); + + ret = nouveau_instobj_create(parent, engine, oclass, &node); + *pobject = nv_object(node); + if (ret) + return ret; + + ret = pfb->ram->get(pfb, args->size, args->align, 0, 0x800, &node->mem); + if (ret) + return ret; + + node->base.addr = node->mem->offset; + node->base.size = node->mem->size << 12; + node->mem->page_shift = 12; + return 0; +} + +static struct nouveau_instobj_impl +gk20a_instobj_oclass = { + .base.ofuncs = &(struct nouveau_ofuncs) { + .ctor = gk20a_instobj_ctor, + .dtor = gk20a_instobj_dtor, + .init = _nouveau_instobj_init, + .fini = _nouveau_instobj_fini, + .rd32 = gk20a_instobj_rd32, + .wr32 = gk20a_instobj_wr32, + }, +}; + +static int +gk20a_instmem_fini(struct nouveau_object *object, bool suspend) +{ + struct gk20a_instmem_priv *priv = (void *)object; + return nouveau_instmem_fini(&priv->base, suspend); +} + +static int +gk20a_instmem_ctor(struct nouveau_object *parent, struct nouveau_object *engine, + struct nouveau_oclass *oclass, void *data, u32 size, + struct nouveau_object **pobject) +{ + struct gk20a_instmem_priv *priv; + int ret; + + ret = nouveau_instmem_create(parent, engine, oclass, &priv); + *pobject = nv_object(priv); + if (ret) + return ret; + + return 0; +} + +struct nouveau_oclass * +gk20a_instmem_oclass = &(struct nouveau_instmem_impl) { + .base.handle = NV_SUBDEV(INSTMEM, 0xea), + .base.ofuncs = &(struct nouveau_ofuncs) { + .ctor = gk20a_instmem_ctor, + .dtor = _nouveau_instmem_dtor, + .init = _nouveau_instmem_init, + .fini = gk20a_instmem_fini, + }, + .instobj = &gk20a_instobj_oclass.base, +}.base; diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index b6dc85c614b..f00ae18003f 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -33,6 +33,7 @@ #include <subdev/fb.h> #include <subdev/vm.h> #include <subdev/bar.h> +#include <subdev/timer.h> #include "nouveau_drm.h" #include "nouveau_dma.h" @@ -407,6 +408,8 @@ nouveau_bo_validate(struct nouveau_bo *nvbo, bool interruptible, { int ret; + nouveau_bo_sync_for_device(nvbo); + ret = ttm_bo_validate(&nvbo->bo, &nvbo->placement, interruptible, no_wait_gpu); if (ret) @@ -435,8 +438,10 @@ nouveau_bo_wr16(struct nouveau_bo *nvbo, unsigned index, u16 val) mem = &mem[index]; if (is_iomem) iowrite16_native(val, (void __force __iomem *)mem); - else + else { *mem = val; + nv_cpu_cache_flush_area(mem, 2); + } } u32 @@ -459,8 +464,10 @@ nouveau_bo_wr32(struct nouveau_bo *nvbo, unsigned index, u32 val) mem = &mem[index]; if (is_iomem) iowrite32_native(val, (void __force __iomem *)mem); - else + else { *mem = val; + nv_cpu_cache_flush_area(mem, 4); + } } static struct ttm_tt * @@ -487,6 +494,38 @@ nouveau_bo_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags) return 0; } +void +nouveau_bo_sync_for_cpu(struct nouveau_bo *nvbo) +{ + struct nouveau_device *device; + struct ttm_tt *ttm = nvbo->bo.ttm; + + device = nouveau_dev(nouveau_bdev(ttm->bdev)->dev); + + if (nvbo->bo.ttm && nvbo->bo.ttm->caching_state == tt_cached) + ttm_dma_tt_cache_sync_for_cpu((struct ttm_dma_tt *)nvbo->bo.ttm, + nv_device_base(device)); +} + +void +nouveau_bo_sync_for_device(struct nouveau_bo *nvbo) +{ + struct ttm_tt *ttm = nvbo->bo.ttm; + + if (ttm && ttm->caching_state == tt_cached) { + struct nouveau_device *device; + + device = nouveau_dev(nouveau_bdev(ttm->bdev)->dev); + + ttm_dma_tt_cache_sync_for_device((struct ttm_dma_tt *)ttm, + nv_device_base(device)); + + nv_wr32(device, 0x70004, 0x00000001); + if (!nv_wait(device, 0x070004, 0x00000001, 0x00000000)) + nv_warn(device, "L2 invalidate timeout\n"); + } +} + static int nouveau_bo_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, struct ttm_mem_type_manager *man) @@ -511,7 +550,11 @@ nouveau_bo_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, TTM_MEMTYPE_FLAG_MAPPABLE; man->available_caching = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_WC; +#if defined(__arm__) + man->default_caching = TTM_PL_FLAG_UNCACHED; +#else man->default_caching = TTM_PL_FLAG_WC; +#endif break; case TTM_PL_TT: if (nv_device(drm->device)->card_type >= NV_50) diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.h b/drivers/gpu/drm/nouveau/nouveau_bo.h index ff17c1f432f..50f721eac65 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.h +++ b/drivers/gpu/drm/nouveau/nouveau_bo.h @@ -89,6 +89,9 @@ int nouveau_bo_vma_add(struct nouveau_bo *, struct nouveau_vm *, struct nouveau_vma *); void nouveau_bo_vma_del(struct nouveau_bo *, struct nouveau_vma *); +void nouveau_bo_sync_for_cpu(struct nouveau_bo *); +void nouveau_bo_sync_for_device(struct nouveau_bo *); + /* TODO: submit equivalent to TTM generic API upstream? */ static inline void __iomem * nvbo_kmap_obj_iovirtual(struct nouveau_bo *nvbo) diff --git a/drivers/gpu/drm/nouveau/nouveau_debug.c b/drivers/gpu/drm/nouveau/nouveau_debug.c new file mode 100644 index 00000000000..92b5f5a2bb7 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nouveau_debug.c @@ -0,0 +1,185 @@ + +#include <linux/highmem.h> +#include <linux/printk.h> +#include <core/object.h> +#include <core/gpuobj.h> +#include <engine/fifo.h> +#include <subdev/vm.h> +#include <nouveau_drm.h> +#include <nouveau_chan.h> +#include <nouveau_bo.h> + +static u32 +pfn_for_vaddr(u32 pgtpfn, u64 vaddr) +{ + u32 pde = vaddr >> 26; + u32 pte = (vaddr & 0x3ffffff) >> 12; + u32 ptepfn; + struct page *p = pfn_to_page(pgtpfn); + u32 *pgt = kmap(p); + u32 *pet; + u32 ret; + ptepfn = pgt[pde * 2 + 1] >> 4; + kunmap(p); + + p = pfn_to_page(ptepfn); + pet = kmap(p); + ret = pet[pte * 2] >> 4; + kunmap(p); + + return ret; +} + +void +dump_pte(u32 ptepfn, u32 base) +{ + struct page *p = pfn_to_page(ptepfn); + u32 *pte = kmap(p); + int k; + + for (k = 0; k < 512; k++) { + u32 mappfn = pte[0] >> 4; + bool ptevol = pte[1] & 0x1; + u32 pteflags = pte[1] & (0x7 << 1); + if (pte[0] & 1) { + printk(" %08x -> %08x VOL=%d FLAGS=%x\n", base * 0x4000000 + k * 0x1000, mappfn << 12, ptevol, pteflags); + } + pte += 2; + } + + kunmap(p); +} + +void +dump_pgt(u32 pgtpfn) +{ + struct page *p = pfn_to_page(pgtpfn); + u32 *pgt = kmap(p); + int j; + + for (j = 0; j < 128; j++) { + int flags = pgt[1] & 0xf; + int ptepfn = pgt[1] >> 4; + /* PTE valid */ + if ((flags & 0x3) != 0) { + dump_pte(ptepfn, j); + } + pgt += 2; + } + + kunmap(p); +} + +void +dump_ramuser(struct nouveau_channel *ch) +{ + struct nouveau_fifo_chan *chan = (struct nouveau_fifo_chan *) ch->object; + /* Read the values from the snoop area to get up-to-date information */ + u32 *ramuser = chan->user; + u32 get, get_hi, put, put_hi; + /* Read the low fields first as specified in the doc */ + get = ramuser[17]; + rmb(); + get_hi = ramuser[24]; + rmb(); + put = ramuser[16]; + rmb(); + put_hi = ramuser[19]; + rmb(); + printk(" PUT: %08x%08x\n", put_hi, put); + printk(" GET: %08x%08x\n", get_hi, get); + printk(" GP_PUT: %08x\n", ramuser[35]); + printk(" GP_GET: %08x\n", ramuser[34]); +} + +void dump_pb(u32 pbpfn, u32 start, u32 len) +{ + struct page *p = pfn_to_page(pbpfn); + u32 *pb = kmap(p) + start; + u32 i; + + for (i = start; i < start + len; i++) + printk("%08x: %08x\n", (pbpfn << PAGE_SHIFT) + i * 4, pb[i]); + + kunmap(p); +} + +void dump_gpentries(struct nouveau_channel *ch, u32 gppfn, u32 pgtpfn) +{ + struct nouveau_fifo_chan *chan = (struct nouveau_fifo_chan *) ch->object; + struct page *p = pfn_to_page(gppfn); + u32 *gp = kmap(p); + u32 *ramuser = chan->user; + u32 gp0, gp1; + u64 addr; + u32 len; + u32 gp_get, gp_put, i; + + gp_get = ramuser[34]; + gp_put = ramuser[35]; + + for (i = gp_get; i <= gp_put; i++) { + gp0 = gp[i * 2]; + gp1 = gp[i * 2 + 1]; + addr = ((u64)gp0) | (((u64)(gp1 & 0xff)) << 32); + len = ((gp1 & 0x7ffffc00) >> 10); + printk(" addr: %llx len: %x\n", addr, len); + if (addr != 0) + dump_pb(pfn_for_vaddr(pgtpfn, addr), addr & (~PAGE_MASK), len); + } + + kunmap(p); +} + +void dump_ramin(struct nouveau_channel *ch, u32 raminpfn) +{ + struct page *p = pfn_to_page(raminpfn); + u32 *ramin = kmap(p); + u32 pgtpfn; + u64 gpbase; + + pgtpfn = (ramin[128] & (~0x3)) >> PAGE_SHIFT; + + gpbase = (((u64)(ramin[19] & 0xff)) << 32) | (ramin[18] & 0xfffffff8); + + printk(" GP_BASE: %llx\n", gpbase); + printk(" phys: %x\n", pfn_for_vaddr(pgtpfn, gpbase)); + printk(" GP ENTRIES:\n"); + dump_gpentries(ch, pfn_for_vaddr(pgtpfn, gpbase), pgtpfn); + printk(" USERD: %08x%08x\n", ramin[3], ramin[2]); + dump_ramuser(ch); + printk(" PGT: %02x%08x %02x%08x\n", ramin[129], ramin[128], ramin[131], ramin[130]); + dump_pgt(pgtpfn); + kunmap(p); +} + +void dump_channel(struct nouveau_channel *chan) +{ + struct nouveau_device *device = nv_device(chan->drm->device); + struct nouveau_fifo_chan *fchan = (struct nouveau_fifo_chan *) chan->object; + + u32 raminpfn = nv_rd32(device, 0x800000 + (fchan->chid * 8)) & 0xfffff; + printk("Channel 0x%x, RAMIN: %x\n", fchan->chid, raminpfn << PAGE_SHIFT); + + dump_ramin(chan, raminpfn); +} + +/* +void +dump_runlist(struct nouveau_object *priv, u32 nchan, struct nouveau_gpuobj *cur) +{ + int i; + + for (i = 0; i < nchan; i++) { + u32 val = nv_ro32(cur, i * 8); + u32 chst1 = nv_rd32(priv, 0x800000 + (val * 8)); + u32 chst2 = nv_rd32(priv, 0x800004 + (val * 8)); + u32 raminpfn = chst1 & 0xfffff; + if ((chst1 & 0x80000000) && val) { + printk(" channel %x %08x %08x\n", val, chst1, chst2); + printk(" RAMIN: %08x\n", raminpfn << PAGE_SHIFT); + dump_ramin(raminpfn); + } + } +} +*/
\ No newline at end of file diff --git a/drivers/gpu/drm/nouveau/nouveau_debug.h b/drivers/gpu/drm/nouveau/nouveau_debug.h new file mode 100644 index 00000000000..b5a3a420926 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nouveau_debug.h @@ -0,0 +1,11 @@ + +struct nouveau_object; +struct nouveau_gpuobj; +struct nouveau_channel; + +void dump_pte(u32 ptepfn, u32 base); +void dump_pgt(u32 pgtpfn); +void dump_ramuser(struct nouveau_channel *chan); +void dump_ramin(struct nouveau_channel *chan, u32 raminpfn); +void dump_channel(struct nouveau_channel *chan); +/*void dump_runlist(struct nouveau_object *priv, u32 nchan, struct nouveau_gpuobj *cur);*/
\ No newline at end of file diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index e8ae68a9aaf..e838fb8624c 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -172,6 +172,11 @@ nouveau_accel_init(struct nouveau_drm *drm) return; } + if (device->chipset == 0xea) { + /* gk20a does not have CE0/CE1 */ + arg0 = NVE0_CHANNEL_IND_ENGINE_GR; + arg1 = 1; + } else if (device->card_type >= NV_E0) { ret = nouveau_channel_new(drm, &drm->client, NVDRM_DEVICE, NVDRM_CHAN + 1, @@ -359,6 +364,9 @@ nouveau_drm_load(struct drm_device *dev, unsigned long flags) dev->dev_private = drm; drm->dev = dev; + if (dev->platformdev) + platform_set_drvdata(dev->platformdev, dev); + nouveau_client(drm)->debug = nouveau_dbgopt(nouveau_debug, "DRM"); INIT_LIST_HEAD(&drm->clients); @@ -1004,23 +1012,19 @@ nouveau_drm_pci_driver = { .driver.pm = &nouveau_pm_ops, }; -int nouveau_drm_platform_probe(struct platform_device *pdev) +int nouveau_drm_platform_device_create(struct platform_device *pdev, int length, + void **pobject) { - struct nouveau_device *device; - int ret; - - ret = nouveau_device_create(pdev, NOUVEAU_BUS_PLATFORM, - nouveau_platform_name(pdev), - dev_name(&pdev->dev), nouveau_config, - nouveau_debug, &device); - - ret = drm_platform_init(&driver, pdev); - if (ret) { - nouveau_object_ref(NULL, (struct nouveau_object **)&device); - return ret; - } + return nouveau_device_create_(pdev, NOUVEAU_BUS_PLATFORM, + nouveau_platform_name(pdev), + dev_name(&pdev->dev), + nouveau_config, nouveau_debug, length, + pobject); +} - return ret; +int nouveau_drm_platform_device_init(struct platform_device *pdev) +{ + return drm_platform_init(&driver, pdev); } static int __init @@ -1036,6 +1040,8 @@ nouveau_drm_init(void) if (!nouveau_modeset) return 0; + nouveau_platform_driver_init(); + nouveau_register_dsm_handler(); return drm_pci_init(&driver, &nouveau_drm_pci_driver); } @@ -1048,6 +1054,8 @@ nouveau_drm_exit(void) drm_pci_exit(&driver, &nouveau_drm_pci_driver); nouveau_unregister_dsm_handler(); + + nouveau_platform_driver_exit(); } module_init(nouveau_drm_init); diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.h b/drivers/gpu/drm/nouveau/nouveau_drm.h index 7efbafaf7c1..178f8cfb148 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.h +++ b/drivers/gpu/drm/nouveau/nouveau_drm.h @@ -157,6 +157,27 @@ nouveau_dev(struct drm_device *dev) int nouveau_pmops_suspend(struct device *); int nouveau_pmops_resume(struct device *); +int nouveau_drm_platform_device_create(struct platform_device *, int, void **); +int nouveau_drm_platform_device_init(struct platform_device *); + +#if IS_ENABLED(CONFIG_NOUVEAU_PLATFORM_DRIVER) + +int nouveau_platform_driver_init(void); +void nouveau_platform_driver_exit(void); + +#else + +static inline int nouveau_platform_driver_init(void) +{ + return 0; +} + +static inline void nouveau_platform_driver_exit(void) +{ +} + +#endif + #define NV_FATAL(cli, fmt, args...) nv_fatal((cli), fmt, ##args) #define NV_ERROR(cli, fmt, args...) nv_error((cli), fmt, ##args) #define NV_WARN(cli, fmt, args...) nv_warn((cli), fmt, ##args) diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index c90c0dc0afe..b7e42fdc963 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -897,7 +897,13 @@ nouveau_gem_ioctl_cpu_prep(struct drm_device *dev, void *data, ret = ttm_bo_wait(&nvbo->bo, true, true, no_wait); spin_unlock(&nvbo->bo.bdev->fence_lock); drm_gem_object_unreference_unlocked(gem); - return ret; + + if (ret) + return ret; + + nouveau_bo_sync_for_cpu(nvbo); + + return 0; } int diff --git a/drivers/gpu/drm/nouveau/nouveau_platform.c b/drivers/gpu/drm/nouveau/nouveau_platform.c new file mode 100644 index 00000000000..fc720376554 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nouveau_platform.c @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include <linux/clk.h> +#include <linux/io.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/of.h> +#include <linux/reset.h> +#include <linux/regulator/consumer.h> +#include <linux/tegra-powergate.h> + +#include "engine/device.h" +#include "nouveau_drm.h" +#include "nouveau_platform.h" + +static int nouveau_platform_power_up(struct nouveau_platform_gpu *gpu) +{ + int err; + + err = regulator_enable(gpu->vdd); + if (err) + goto err_power; + + err = clk_prepare_enable(gpu->clk); + if (err) + goto err_clk; + err = clk_prepare_enable(gpu->clk_pwr); + if (err) + goto err_clk_pwr; + clk_set_rate(gpu->clk_pwr, 204000000); + udelay(10); + + reset_control_assert(gpu->rst); + udelay(10); + + err = tegra_powergate_remove_clamping(TEGRA_POWERGATE_3D); + if (err) + goto err_clamp; + udelay(10); + + reset_control_deassert(gpu->rst); + udelay(10); + + return 0; + +err_clamp: + clk_disable_unprepare(gpu->clk_pwr); +err_clk_pwr: + clk_disable_unprepare(gpu->clk); +err_clk: + regulator_disable(gpu->vdd); +err_power: + return err; +} + +static int nouveau_platform_power_down(struct nouveau_platform_gpu *gpu) +{ + int err; + + reset_control_assert(gpu->rst); + udelay(10); + + clk_disable_unprepare(gpu->clk_pwr); + clk_disable_unprepare(gpu->clk); + udelay(10); + + err = regulator_disable(gpu->vdd); + if (err) + return err; + + return 0; +} + +static int nouveau_platform_probe(struct platform_device *pdev) +{ + struct nouveau_platform_gpu *gpu; + struct regulator *vdd; + struct reset_control *rst; + struct clk *clk, *pwr; + int err; + + /* + * get the resources we need before we allocate the device' memory + * in case we need to return -EPROBE_DEFER + */ + vdd = devm_regulator_get(&pdev->dev, "vdd"); + if (IS_ERR(vdd)) + return PTR_ERR(vdd); + + rst = devm_reset_control_get(&pdev->dev, "gpu"); + if (IS_ERR(rst)) + return PTR_ERR(rst); + + clk = devm_clk_get(&pdev->dev, "gpu"); + if (IS_ERR(clk)) + return PTR_ERR(clk); + + pwr = devm_clk_get(&pdev->dev, "pwr"); + if (IS_ERR(pwr)) + return PTR_ERR(pwr); + + err = nouveau_drm_platform_device_create(pdev, sizeof(*gpu), + (void **)&gpu); + if (err) + return err; + + gpu->vdd = vdd; + gpu->rst = rst; + gpu->clk = clk; + gpu->clk_pwr = pwr; + + err = nouveau_platform_power_up(gpu); + if (err) + goto err_probe; + + err = nouveau_drm_platform_device_init(pdev); + if (err) + goto err_probe; + + return 0; + +err_probe: + nouveau_object_ref(NULL, (struct nouveau_object **)&gpu->device); + return err; +} + +static int nouveau_platform_remove(struct platform_device *pdev) +{ + struct drm_device *drm_dev = platform_get_drvdata(pdev); + struct nouveau_device *device = nouveau_dev(drm_dev); + struct nouveau_platform_gpu *gpu = to_platform_gpu(device); + int err; + + drm_dev->irq_enabled = false; + drm_put_dev(drm_dev); + + err = nouveau_platform_power_down(gpu); + if (err) + return err; + + nouveau_object_ref(NULL, (struct nouveau_object **)&device); + nouveau_object_debug(); + + return 0; +} + +#if IS_ENABLED(CONFIG_OF) +static const struct of_device_id nouveau_platform_match[] = { + { .compatible = "nvidia,gk20a" }, + { } +}; + +MODULE_DEVICE_TABLE(of, nouveau_platform_match); +#endif + +struct platform_driver nouveau_platform_driver = { + .driver = { + .name = "nouveau", + .of_match_table = of_match_ptr(nouveau_platform_match), + }, + .probe = nouveau_platform_probe, + .remove = nouveau_platform_remove, +}; + +int __init nouveau_platform_driver_init(void) +{ + return platform_driver_register(&nouveau_platform_driver); +} + +void __exit nouveau_platform_driver_exit(void) +{ + platform_driver_unregister(&nouveau_platform_driver); +} diff --git a/drivers/gpu/drm/nouveau/nouveau_platform.h b/drivers/gpu/drm/nouveau/nouveau_platform.h new file mode 100644 index 00000000000..847b3671e60 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nouveau_platform.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __NOUVEAU_PLATFORM_H__ +#define __NOUVEAU_PLATFORM_H__ + +#include "core/device.h" + +struct reset_control; +struct clk; +struct regulator; + +struct nouveau_platform_gpu { + struct nouveau_device device; + + struct reset_control *rst; + struct clk *clk; + struct clk *clk_pwr; + + struct regulator *vdd; +}; +#define to_platform_gpu(d) container_of(d, struct nouveau_platform_gpu, device) + + +#endif diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 1df856f7856..30e5d90cb7b 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -500,7 +500,7 @@ pgprot_t ttm_io_prot(uint32_t caching_flags, pgprot_t tmp) pgprot_val(tmp) |= _PAGE_GUARDED; } #endif -#if defined(__ia64__) +#if defined(__ia64__) || defined(__arm__) if (caching_flags & TTM_PL_FLAG_WC) tmp = pgprot_writecombine(tmp); else diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 75f31909004..66c16ad35f7 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -38,6 +38,7 @@ #include <linux/swap.h> #include <linux/slab.h> #include <linux/export.h> +#include <linux/dma-mapping.h> #include <drm/drm_cache.h> #include <drm/drm_mem_util.h> #include <drm/ttm/ttm_module.h> @@ -248,6 +249,30 @@ void ttm_dma_tt_fini(struct ttm_dma_tt *ttm_dma) } EXPORT_SYMBOL(ttm_dma_tt_fini); +void ttm_dma_tt_cache_sync_for_device(struct ttm_dma_tt *ttm_dma, + struct device *dev) +{ + unsigned long i; + + for (i = 0; i < ttm_dma->ttm.num_pages; i++) { + dma_sync_single_for_device(dev, ttm_dma->dma_address[i], + PAGE_SIZE, DMA_TO_DEVICE); + } +} +EXPORT_SYMBOL(ttm_dma_tt_cache_sync_for_device); + +void ttm_dma_tt_cache_sync_for_cpu(struct ttm_dma_tt *ttm_dma, + struct device *dev) +{ + unsigned long i; + + for (i = 0; i < ttm_dma->ttm.num_pages; i++) { + dma_sync_single_for_cpu(dev, ttm_dma->dma_address[i], + PAGE_SIZE, DMA_FROM_DEVICE); + } +} +EXPORT_SYMBOL(ttm_dma_tt_cache_sync_for_cpu); + void ttm_tt_unbind(struct ttm_tt *ttm) { int ret; diff --git a/firmware/nouveau/nvea_fuc409c b/firmware/nouveau/nvea_fuc409c Binary files differnew file mode 100644 index 00000000000..176bcc184e2 --- /dev/null +++ b/firmware/nouveau/nvea_fuc409c diff --git a/firmware/nouveau/nvea_fuc409d b/firmware/nouveau/nvea_fuc409d Binary files differnew file mode 100644 index 00000000000..e6997c0252c --- /dev/null +++ b/firmware/nouveau/nvea_fuc409d diff --git a/firmware/nouveau/nvea_fuc41ac b/firmware/nouveau/nvea_fuc41ac Binary files differnew file mode 100644 index 00000000000..143a923b953 --- /dev/null +++ b/firmware/nouveau/nvea_fuc41ac diff --git a/firmware/nouveau/nvea_fuc41ad b/firmware/nouveau/nvea_fuc41ad Binary files differnew file mode 100644 index 00000000000..b14ead08bdf --- /dev/null +++ b/firmware/nouveau/nvea_fuc41ad diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index a5183da3ef9..52fb709568f 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -41,6 +41,7 @@ #include <linux/fs.h> #include <linux/spinlock.h> #include <linux/reservation.h> +#include <linux/device.h> struct ttm_backend_func { /** @@ -690,6 +691,33 @@ extern int ttm_tt_swapout(struct ttm_tt *ttm, */ extern void ttm_tt_unpopulate(struct ttm_tt *ttm); +/** + * ttm_dma_tt_cache_sync_for_device: + * + * @ttm A struct ttm_tt of the type returned by ttm_dma_tt_init. + * @dev A struct device representing the device to which to sync. + * + * This function will flush the CPU caches on arches where snooping in the + * TT is not available. On fully coherent arches this will turn into an (almost) + * noop. This makes sure that data written by the CPU is visible to the device. + */ +extern void ttm_dma_tt_cache_sync_for_device(struct ttm_dma_tt *ttm_dma, + struct device *dev); + +/** + * ttm_dma_tt_cache_sync_for_cpu: + * + * @ttm A struct ttm_tt of the type returned by ttm_dma_tt_init. + * @dev A struct device representing the device from which to sync. + * + * This function will invalidate the CPU caches on arches where snooping in the + * TT is not available. On fully coherent arches this will turn into an (almost) + * noop. This makes sure that the CPU does not read any stale cached or + * prefetched data. + */ +extern void ttm_dma_tt_cache_sync_for_cpu(struct ttm_dma_tt *ttm_dma, + struct device *dev); + /* * ttm_bo.c */ |