63 files changed, 2828 insertions, 797 deletions
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index c55b63750757..7f0d48f406e3 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -8,7 +8,6 @@ menu "Misc devices"
 config SENSORS_LIS3LV02D
 	tristate
 	depends on INPUT
-	select INPUT_POLLDEV
 
 config AD525X_DPOT
 	tristate "Analog Devices Digital Potentiometers"
@@ -326,14 +325,14 @@ config SENSORS_TSL2550
 	  will be called tsl2550.
 
 config SENSORS_BH1770
-         tristate "BH1770GLC / SFH7770 combined ALS - Proximity sensor"
-         depends on I2C
-         ---help---
-           Say Y here if you want to build a driver for BH1770GLC (ROHM) or
+	 tristate "BH1770GLC / SFH7770 combined ALS - Proximity sensor"
+	 depends on I2C
+	 ---help---
+	   Say Y here if you want to build a driver for BH1770GLC (ROHM) or
 	   SFH7770 (Osram) combined ambient light and proximity sensor chip.
 
-           To compile this driver as a module, choose M here: the
-           module will be called bh1770glc. If unsure, say N here.
+	   To compile this driver as a module, choose M here: the
+	   module will be called bh1770glc. If unsure, say N here.
 
 config SENSORS_APDS990X
 	 tristate "APDS990X combined als and proximity sensors"
@@ -438,8 +437,8 @@ config PCI_ENDPOINT_TEST
 	select CRC32
 	tristate "PCI Endpoint Test driver"
 	---help---
-           Enable this configuration option to enable the host side test driver
-           for PCI Endpoint.
+	   Enable this configuration option to enable the host side test driver
+	   for PCI Endpoint.
 
 config XILINX_SDFEC
 	tristate "Xilinx SDFEC 16"
diff --git a/drivers/misc/atmel_tclib.c b/drivers/misc/atmel_tclib.c
index 08b5b639d77f..7de7840f613c 100644
--- a/drivers/misc/atmel_tclib.c
+++ b/drivers/misc/atmel_tclib.c
@@ -109,7 +109,6 @@ static int __init tc_probe(struct platform_device *pdev)
 	struct atmel_tc *tc;
 	struct clk	*clk;
 	int		irq;
-	struct resource	*r;
 	unsigned int	i;
 
 	if (of_get_child_count(pdev->dev.of_node))
@@ -133,8 +132,7 @@ static int __init tc_probe(struct platform_device *pdev)
 	if (IS_ERR(tc->slow_clk))
 		return PTR_ERR(tc->slow_clk);
 
-	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	tc->regs = devm_ioremap_resource(&pdev->dev, r);
+	tc->regs = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(tc->regs))
 		return PTR_ERR(tc->regs);
 
diff --git a/drivers/misc/cardreader/Makefile b/drivers/misc/cardreader/Makefile
index d9bff5a2217e..1f56267ed2f4 100644
--- a/drivers/misc/cardreader/Makefile
+++ b/drivers/misc/cardreader/Makefile
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_MISC_ALCOR_PCI)	+= alcor_pci.o
 obj-$(CONFIG_MISC_RTSX_PCI)	+= rtsx_pci.o
-rtsx_pci-objs := rtsx_pcr.o rts5209.o rts5229.o rtl8411.o rts5227.o rts5249.o rts5260.o
+rtsx_pci-objs := rtsx_pcr.o rts5209.o rts5229.o rtl8411.o rts5227.o rts5249.o rts5260.o rts5261.o
 obj-$(CONFIG_MISC_RTSX_USB)	+= rtsx_usb.o
diff --git a/drivers/misc/cardreader/rts5260.c b/drivers/misc/cardreader/rts5260.c
index 40a6d199f2ea..4214f02a17fd 100644
--- a/drivers/misc/cardreader/rts5260.c
+++ b/drivers/misc/cardreader/rts5260.c
@@ -191,7 +191,6 @@ static int sd_set_sample_push_timing_sd30(struct rtsx_pcr *pcr)
 
 static int rts5260_card_power_on(struct rtsx_pcr *pcr, int card)
 {
-	int err = 0;
 	struct rtsx_cr_option *option = &pcr->option;
 
 	if (option->ocp_en)
@@ -231,7 +230,7 @@ static int rts5260_card_power_on(struct rtsx_pcr *pcr, int card)
 
 	rtsx_pci_write_register(pcr, REG_PRE_RW_MODE, EN_INFINITE_MODE, 0);
 
-	return err;
+	return 0;
 }
 
 static int rts5260_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage)
diff --git a/drivers/misc/cardreader/rts5261.c b/drivers/misc/cardreader/rts5261.c
new file mode 100644
index 000000000000..32dcec2e9dfd
--- /dev/null
+++ b/drivers/misc/cardreader/rts5261.c
@@ -0,0 +1,792 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Driver for Realtek PCI-Express card reader
+ *
+ * Copyright(c) 2018-2019 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * Author:
+ *   Rui FENG <rui_feng@realsil.com.cn>
+ *   Wei WANG <wei_wang@realsil.com.cn>
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/rtsx_pci.h>
+
+#include "rts5261.h"
+#include "rtsx_pcr.h"
+
+static u8 rts5261_get_ic_version(struct rtsx_pcr *pcr)
+{
+	u8 val;
+
+	rtsx_pci_read_register(pcr, DUMMY_REG_RESET_0, &val);
+	return val & IC_VERSION_MASK;
+}
+
+static void rts5261_fill_driving(struct rtsx_pcr *pcr, u8 voltage)
+{
+	u8 driving_3v3[4][3] = {
+		{0x13, 0x13, 0x13},
+		{0x96, 0x96, 0x96},
+		{0x7F, 0x7F, 0x7F},
+		{0x96, 0x96, 0x96},
+	};
+	u8 driving_1v8[4][3] = {
+		{0x99, 0x99, 0x99},
+		{0x3A, 0x3A, 0x3A},
+		{0xE6, 0xE6, 0xE6},
+		{0xB3, 0xB3, 0xB3},
+	};
+	u8 (*driving)[3], drive_sel;
+
+	if (voltage == OUTPUT_3V3) {
+		driving = driving_3v3;
+		drive_sel = pcr->sd30_drive_sel_3v3;
+	} else {
+		driving = driving_1v8;
+		drive_sel = pcr->sd30_drive_sel_1v8;
+	}
+
+	rtsx_pci_write_register(pcr, SD30_CLK_DRIVE_SEL,
+			 0xFF, driving[drive_sel][0]);
+
+	rtsx_pci_write_register(pcr, SD30_CMD_DRIVE_SEL,
+			 0xFF, driving[drive_sel][1]);
+
+	rtsx_pci_write_register(pcr, SD30_DAT_DRIVE_SEL,
+			 0xFF, driving[drive_sel][2]);
+}
+
+static void rtsx5261_fetch_vendor_settings(struct rtsx_pcr *pcr)
+{
+	u32 reg;
+	/* 0x814~0x817 */
+	rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG2, &reg);
+	pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG2, reg);
+
+	if (!rts5261_vendor_setting_valid(reg)) {
+		pcr_dbg(pcr, "skip fetch vendor setting\n");
+		return;
+	}
+
+	pcr->card_drive_sel &= 0x3F;
+	pcr->card_drive_sel |= rts5261_reg_to_card_drive_sel(reg);
+
+	if (rts5261_reg_check_reverse_socket(reg))
+		pcr->flags |= PCR_REVERSE_SOCKET;
+
+	/* 0x724~0x727 */
+	rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG1, &reg);
+	pcr_dbg(pcr, "Cfg 0x%x: 0x%x\n", PCR_SETTING_REG1, reg);
+
+	pcr->aspm_en = rts5261_reg_to_aspm(reg);
+	pcr->sd30_drive_sel_1v8 = rts5261_reg_to_sd30_drive_sel_1v8(reg);
+	pcr->sd30_drive_sel_3v3 = rts5261_reg_to_sd30_drive_sel_3v3(reg);
+}
+
+static void rts5261_force_power_down(struct rtsx_pcr *pcr, u8 pm_state)
+{
+	/* Set relink_time to 0 */
+	rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 1, MASK_8_BIT_DEF, 0);
+	rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 2, MASK_8_BIT_DEF, 0);
+	rtsx_pci_write_register(pcr, AUTOLOAD_CFG_BASE + 3,
+				RELINK_TIME_MASK, 0);
+
+	if (pm_state == HOST_ENTER_S3)
+		rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3,
+					D3_DELINK_MODE_EN, D3_DELINK_MODE_EN);
+
+	rtsx_pci_write_register(pcr, RTS5261_REG_FPDCTL,
+		SSC_POWER_DOWN, SSC_POWER_DOWN);
+}
+
+static int rts5261_enable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, OLT_LED_CTL,
+		LED_SHINE_MASK, LED_SHINE_EN);
+}
+
+static int rts5261_disable_auto_blink(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, OLT_LED_CTL,
+		LED_SHINE_MASK, LED_SHINE_DISABLE);
+}
+
+static int rts5261_turn_on_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, GPIO_CTL,
+		0x02, 0x02);
+}
+
+static int rts5261_turn_off_led(struct rtsx_pcr *pcr)
+{
+	return rtsx_pci_write_register(pcr, GPIO_CTL,
+		0x02, 0x00);
+}
+
+/* SD Pull Control Enable:
+ *     SD_DAT[3:0] ==> pull up
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull up
+ *     SD_CMD      ==> pull up
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rts5261_sd_pull_ctl_enable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0xAA),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xE9),
+	0,
+};
+
+/* SD Pull Control Disable:
+ *     SD_DAT[3:0] ==> pull down
+ *     SD_CD       ==> pull up
+ *     SD_WP       ==> pull down
+ *     SD_CMD      ==> pull down
+ *     SD_CLK      ==> pull down
+ */
+static const u32 rts5261_sd_pull_ctl_disable_tbl[] = {
+	RTSX_REG_PAIR(CARD_PULL_CTL2, 0x55),
+	RTSX_REG_PAIR(CARD_PULL_CTL3, 0xD5),
+	0,
+};
+
+static int rts5261_sd_set_sample_push_timing_sd30(struct rtsx_pcr *pcr)
+{
+	rtsx_pci_write_register(pcr, SD_CFG1, SD_MODE_SELECT_MASK
+		| SD_ASYNC_FIFO_NOT_RST, SD_30_MODE | SD_ASYNC_FIFO_NOT_RST);
+	rtsx_pci_write_register(pcr, CLK_CTL, CLK_LOW_FREQ, CLK_LOW_FREQ);
+	rtsx_pci_write_register(pcr, CARD_CLK_SOURCE, 0xFF,
+			CRC_VAR_CLK0 | SD30_FIX_CLK | SAMPLE_VAR_CLK1);
+	rtsx_pci_write_register(pcr, CLK_CTL, CLK_LOW_FREQ, 0);
+
+	return 0;
+}
+
+static int rts5261_card_power_on(struct rtsx_pcr *pcr, int card)
+{
+	struct rtsx_cr_option *option = &pcr->option;
+
+	if (option->ocp_en)
+		rtsx_pci_enable_ocp(pcr);
+
+
+	rtsx_pci_write_register(pcr, RTS5261_LDO1_CFG1,
+			RTS5261_LDO1_TUNE_MASK, RTS5261_LDO1_33);
+	rtsx_pci_write_register(pcr, RTS5261_LDO1233318_POW_CTL,
+			RTS5261_LDO1_POWERON, RTS5261_LDO1_POWERON);
+
+	rtsx_pci_write_register(pcr, RTS5261_LDO1233318_POW_CTL,
+			RTS5261_LDO3318_POWERON, RTS5261_LDO3318_POWERON);
+
+	msleep(20);
+
+	rtsx_pci_write_register(pcr, CARD_OE, SD_OUTPUT_EN, SD_OUTPUT_EN);
+
+	/* Initialize SD_CFG1 register */
+	rtsx_pci_write_register(pcr, SD_CFG1, 0xFF,
+			SD_CLK_DIVIDE_128 | SD_20_MODE | SD_BUS_WIDTH_1BIT);
+
+	rtsx_pci_write_register(pcr, SD_SAMPLE_POINT_CTL,
+			0xFF, SD20_RX_POS_EDGE);
+	rtsx_pci_write_register(pcr, SD_PUSH_POINT_CTL, 0xFF, 0);
+	rtsx_pci_write_register(pcr, CARD_STOP, SD_STOP | SD_CLR_ERR,
+			SD_STOP | SD_CLR_ERR);
+
+	/* Reset SD_CFG3 register */
+	rtsx_pci_write_register(pcr, SD_CFG3, SD30_CLK_END_EN, 0);
+	rtsx_pci_write_register(pcr, REG_SD_STOP_SDCLK_CFG,
+			SD30_CLK_STOP_CFG_EN | SD30_CLK_STOP_CFG1 |
+			SD30_CLK_STOP_CFG0, 0);
+
+	if (pcr->extra_caps & EXTRA_CAPS_SD_SDR50 ||
+	    pcr->extra_caps & EXTRA_CAPS_SD_SDR104)
+		rts5261_sd_set_sample_push_timing_sd30(pcr);
+
+	return 0;
+}
+
+static int rts5261_switch_output_voltage(struct rtsx_pcr *pcr, u8 voltage)
+{
+	int err;
+	u16 val = 0;
+
+	rtsx_pci_write_register(pcr, RTS5261_CARD_PWR_CTL,
+			RTS5261_PUPDC, RTS5261_PUPDC);
+
+	switch (voltage) {
+	case OUTPUT_3V3:
+		rtsx_pci_read_phy_register(pcr, PHY_TUNE, &val);
+		val |= PHY_TUNE_SDBUS_33;
+		err = rtsx_pci_write_phy_register(pcr, PHY_TUNE, val);
+		if (err < 0)
+			return err;
+
+		rtsx_pci_write_register(pcr, RTS5261_DV3318_CFG,
+				RTS5261_DV3318_TUNE_MASK, RTS5261_DV3318_33);
+		rtsx_pci_write_register(pcr, SD_PAD_CTL,
+				SD_IO_USING_1V8, 0);
+		break;
+	case OUTPUT_1V8:
+		rtsx_pci_read_phy_register(pcr, PHY_TUNE, &val);
+		val &= ~PHY_TUNE_SDBUS_33;
+		err = rtsx_pci_write_phy_register(pcr, PHY_TUNE, val);
+		if (err < 0)
+			return err;
+
+		rtsx_pci_write_register(pcr, RTS5261_DV3318_CFG,
+				RTS5261_DV3318_TUNE_MASK, RTS5261_DV3318_18);
+		rtsx_pci_write_register(pcr, SD_PAD_CTL,
+				SD_IO_USING_1V8, SD_IO_USING_1V8);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* set pad drive */
+	rts5261_fill_driving(pcr, voltage);
+
+	return 0;
+}
+
+static void rts5261_stop_cmd(struct rtsx_pcr *pcr)
+{
+	rtsx_pci_writel(pcr, RTSX_HCBCTLR, STOP_CMD);
+	rtsx_pci_writel(pcr, RTSX_HDBCTLR, STOP_DMA);
+	rtsx_pci_write_register(pcr, RTS5260_DMA_RST_CTL_0,
+				RTS5260_DMA_RST | RTS5260_ADMA3_RST,
+				RTS5260_DMA_RST | RTS5260_ADMA3_RST);
+	rtsx_pci_write_register(pcr, RBCTL, RB_FLUSH, RB_FLUSH);
+}
+
+static void rts5261_card_before_power_off(struct rtsx_pcr *pcr)
+{
+	rts5261_stop_cmd(pcr);
+	rts5261_switch_output_voltage(pcr, OUTPUT_3V3);
+
+}
+
+static void rts5261_enable_ocp(struct rtsx_pcr *pcr)
+{
+	u8 val = 0;
+
+	val = SD_OCP_INT_EN | SD_DETECT_EN;
+	rtsx_pci_write_register(pcr, REG_OCPCTL, 0xFF, val);
+
+}
+
+static void rts5261_disable_ocp(struct rtsx_pcr *pcr)
+{
+	u8 mask = 0;
+
+	mask = SD_OCP_INT_EN | SD_DETECT_EN;
+	rtsx_pci_write_register(pcr, REG_OCPCTL, mask, 0);
+	rtsx_pci_write_register(pcr, RTS5261_LDO1_CFG0,
+			RTS5261_LDO1_OCP_EN | RTS5261_LDO1_OCP_LMT_EN, 0);
+
+}
+
+static int rts5261_card_power_off(struct rtsx_pcr *pcr, int card)
+{
+	int err = 0;
+
+	rts5261_card_before_power_off(pcr);
+	err = rtsx_pci_write_register(pcr, RTS5261_LDO1233318_POW_CTL,
+				RTS5261_LDO_POWERON_MASK, 0);
+
+	if (pcr->option.ocp_en)
+		rtsx_pci_disable_ocp(pcr);
+
+	return err;
+}
+
+static void rts5261_init_ocp(struct rtsx_pcr *pcr)
+{
+	struct rtsx_cr_option *option = &pcr->option;
+
+	if (option->ocp_en) {
+		u8 mask, val;
+
+		rtsx_pci_write_register(pcr, RTS5261_LDO1_CFG0,
+			RTS5261_LDO1_OCP_EN | RTS5261_LDO1_OCP_LMT_EN,
+			RTS5261_LDO1_OCP_EN | RTS5261_LDO1_OCP_LMT_EN);
+
+		rtsx_pci_write_register(pcr, RTS5261_LDO1_CFG0,
+			RTS5261_LDO1_OCP_THD_MASK, option->sd_800mA_ocp_thd);
+
+		rtsx_pci_write_register(pcr, RTS5261_LDO1_CFG0,
+			RTS5261_LDO1_OCP_LMT_THD_MASK,
+			RTS5261_LDO1_LMT_THD_2000);
+
+		mask = SD_OCP_GLITCH_MASK;
+		val = pcr->hw_param.ocp_glitch;
+		rtsx_pci_write_register(pcr, REG_OCPGLITCH, mask, val);
+
+		rts5261_enable_ocp(pcr);
+	} else {
+		rtsx_pci_write_register(pcr, RTS5261_LDO1_CFG0,
+			RTS5261_LDO1_OCP_EN | RTS5261_LDO1_OCP_LMT_EN, 0);
+	}
+}
+
+static void rts5261_clear_ocpstat(struct rtsx_pcr *pcr)
+{
+	u8 mask = 0;
+	u8 val = 0;
+
+	mask = SD_OCP_INT_CLR | SD_OC_CLR;
+	val = SD_OCP_INT_CLR | SD_OC_CLR;
+
+	rtsx_pci_write_register(pcr, REG_OCPCTL, mask, val);
+
+	udelay(10);
+	rtsx_pci_write_register(pcr, REG_OCPCTL, mask, 0);
+
+}
+
+static void rts5261_process_ocp(struct rtsx_pcr *pcr)
+{
+	if (!pcr->option.ocp_en)
+		return;
+
+	rtsx_pci_get_ocpstat(pcr, &pcr->ocp_stat);
+
+	if (pcr->ocp_stat & (SD_OC_NOW | SD_OC_EVER)) {
+		rts5261_card_power_off(pcr, RTSX_SD_CARD);
+		rtsx_pci_write_register(pcr, CARD_OE, SD_OUTPUT_EN, 0);
+		rts5261_clear_ocpstat(pcr);
+		pcr->ocp_stat = 0;
+	}
+
+}
+
+static int rts5261_init_from_hw(struct rtsx_pcr *pcr)
+{
+	int retval;
+	u32 lval, i;
+	u8 valid, efuse_valid, tmp;
+
+	rtsx_pci_write_register(pcr, RTS5261_REG_PME_FORCE_CTL,
+		REG_EFUSE_POR | REG_EFUSE_POWER_MASK,
+		REG_EFUSE_POR | REG_EFUSE_POWERON);
+	udelay(1);
+	rtsx_pci_write_register(pcr, RTS5261_EFUSE_ADDR,
+		RTS5261_EFUSE_ADDR_MASK, 0x00);
+	rtsx_pci_write_register(pcr, RTS5261_EFUSE_CTL,
+		RTS5261_EFUSE_ENABLE | RTS5261_EFUSE_MODE_MASK,
+		RTS5261_EFUSE_ENABLE);
+
+	/* Wait transfer end */
+	for (i = 0; i < MAX_RW_REG_CNT; i++) {
+		rtsx_pci_read_register(pcr, RTS5261_EFUSE_CTL, &tmp);
+		if ((tmp & 0x80) == 0)
+			break;
+	}
+	rtsx_pci_read_register(pcr, RTS5261_EFUSE_READ_DATA, &tmp);
+	efuse_valid = ((tmp & 0x0C) >> 2);
+	pcr_dbg(pcr, "Load efuse valid: 0x%x\n", efuse_valid);
+
+	if (efuse_valid == 0) {
+		retval = rtsx_pci_read_config_dword(pcr,
+			PCR_SETTING_REG2, &lval);
+		if (retval != 0)
+			pcr_dbg(pcr, "read 0x814 DW fail\n");
+		pcr_dbg(pcr, "DW from 0x814: 0x%x\n", lval);
+		/* 0x816 */
+		valid = (u8)((lval >> 16) & 0x03);
+		pcr_dbg(pcr, "0x816: %d\n", valid);
+	}
+	rtsx_pci_write_register(pcr, RTS5261_REG_PME_FORCE_CTL,
+		REG_EFUSE_POR, 0);
+	pcr_dbg(pcr, "Disable efuse por!\n");
+
+	rtsx_pci_read_config_dword(pcr, PCR_SETTING_REG2, &lval);
+	lval = lval & 0x00FFFFFF;
+	retval = rtsx_pci_write_config_dword(pcr, PCR_SETTING_REG2, lval);
+	if (retval != 0)
+		pcr_dbg(pcr, "write config fail\n");
+
+	return retval;
+}
+
+static void rts5261_init_from_cfg(struct rtsx_pcr *pcr)
+{
+	u32 lval;
+	struct rtsx_cr_option *option = &pcr->option;
+
+	rtsx_pci_read_config_dword(pcr, PCR_ASPM_SETTING_REG1, &lval);
+
+	if (lval & ASPM_L1_1_EN_MASK)
+		rtsx_set_dev_flag(pcr, ASPM_L1_1_EN);
+	else
+		rtsx_clear_dev_flag(pcr, ASPM_L1_1_EN);
+
+	if (lval & ASPM_L1_2_EN_MASK)
+		rtsx_set_dev_flag(pcr, ASPM_L1_2_EN);
+	else
+		rtsx_clear_dev_flag(pcr, ASPM_L1_2_EN);
+
+	if (lval & PM_L1_1_EN_MASK)
+		rtsx_set_dev_flag(pcr, PM_L1_1_EN);
+	else
+		rtsx_clear_dev_flag(pcr, PM_L1_1_EN);
+
+	if (lval & PM_L1_2_EN_MASK)
+		rtsx_set_dev_flag(pcr, PM_L1_2_EN);
+	else
+		rtsx_clear_dev_flag(pcr, PM_L1_2_EN);
+
+	rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, 0xFF, 0);
+	if (option->ltr_en) {
+		u16 val;
+
+		pcie_capability_read_word(pcr->pci, PCI_EXP_DEVCTL2, &val);
+		if (val & PCI_EXP_DEVCTL2_LTR_EN) {
+			option->ltr_enabled = true;
+			option->ltr_active = true;
+			rtsx_set_ltr_latency(pcr, option->ltr_active_latency);
+		} else {
+			option->ltr_enabled = false;
+		}
+	}
+
+	if (rtsx_check_dev_flag(pcr, ASPM_L1_1_EN | ASPM_L1_2_EN
+				| PM_L1_1_EN | PM_L1_2_EN))
+		option->force_clkreq_0 = false;
+	else
+		option->force_clkreq_0 = true;
+}
+
+static int rts5261_extra_init_hw(struct rtsx_pcr *pcr)
+{
+	struct rtsx_cr_option *option = &pcr->option;
+
+	rtsx_pci_write_register(pcr, RTS5261_AUTOLOAD_CFG1,
+			CD_RESUME_EN_MASK, CD_RESUME_EN_MASK);
+
+	rts5261_init_from_cfg(pcr);
+	rts5261_init_from_hw(pcr);
+
+	/* power off efuse */
+	rtsx_pci_write_register(pcr, RTS5261_REG_PME_FORCE_CTL,
+			REG_EFUSE_POWER_MASK, REG_EFUSE_POWEROFF);
+	rtsx_pci_write_register(pcr, L1SUB_CONFIG1,
+			AUX_CLK_ACTIVE_SEL_MASK, MAC_CKSW_DONE);
+	rtsx_pci_write_register(pcr, L1SUB_CONFIG3, 0xFF, 0);
+
+	rtsx_pci_write_register(pcr, RTS5261_AUTOLOAD_CFG4,
+			RTS5261_AUX_CLK_16M_EN, 0);
+
+	/* Release PRSNT# */
+	rtsx_pci_write_register(pcr, RTS5261_AUTOLOAD_CFG4,
+			RTS5261_FORCE_PRSNT_LOW, 0);
+	rtsx_pci_write_register(pcr, FUNC_FORCE_CTL,
+			FUNC_FORCE_UPME_XMT_DBG, FUNC_FORCE_UPME_XMT_DBG);
+
+	rtsx_pci_write_register(pcr, PCLK_CTL,
+			PCLK_MODE_SEL, PCLK_MODE_SEL);
+
+	rtsx_pci_write_register(pcr, PM_EVENT_DEBUG, PME_DEBUG_0, PME_DEBUG_0);
+	rtsx_pci_write_register(pcr, PM_CLK_FORCE_CTL, CLK_PM_EN, CLK_PM_EN);
+
+	/* LED shine disabled, set initial shine cycle period */
+	rtsx_pci_write_register(pcr, OLT_LED_CTL, 0x0F, 0x02);
+
+	/* Configure driving */
+	rts5261_fill_driving(pcr, OUTPUT_3V3);
+
+	/*
+	 * If u_force_clkreq_0 is enabled, CLKREQ# PIN will be forced
+	 * to drive low, and we forcibly request clock.
+	 */
+	if (option->force_clkreq_0)
+		rtsx_pci_write_register(pcr, PETXCFG,
+				 FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW);
+	else
+		rtsx_pci_write_register(pcr, PETXCFG,
+				 FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH);
+
+	rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x10, 0x00);
+	rtsx_pci_write_register(pcr, RTS5261_REG_PME_FORCE_CTL,
+			FORCE_PM_CONTROL | FORCE_PM_VALUE, FORCE_PM_CONTROL);
+
+	/* Clear Enter RTD3_cold Information*/
+	rtsx_pci_write_register(pcr, RTS5261_FW_CTL,
+		RTS5261_INFORM_RTD3_COLD, 0);
+
+	return 0;
+}
+
+static void rts5261_enable_aspm(struct rtsx_pcr *pcr, bool enable)
+{
+	struct rtsx_cr_option *option = &pcr->option;
+	u8 val = 0;
+
+	if (pcr->aspm_enabled == enable)
+		return;
+
+	if (option->dev_aspm_mode == DEV_ASPM_DYNAMIC) {
+		val = pcr->aspm_en;
+		rtsx_pci_update_cfg_byte(pcr, pcr->pcie_cap + PCI_EXP_LNKCTL,
+					 ASPM_MASK_NEG, val);
+	} else if (option->dev_aspm_mode == DEV_ASPM_BACKDOOR) {
+		u8 mask = FORCE_ASPM_VAL_MASK | FORCE_ASPM_CTL0;
+
+		val = FORCE_ASPM_CTL0;
+		val |= (pcr->aspm_en & 0x02);
+		rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, mask, val);
+		val = pcr->aspm_en;
+		rtsx_pci_update_cfg_byte(pcr, pcr->pcie_cap + PCI_EXP_LNKCTL,
+					 ASPM_MASK_NEG, val);
+	}
+	pcr->aspm_enabled = enable;
+
+}
+
+static void rts5261_disable_aspm(struct rtsx_pcr *pcr, bool enable)
+{
+	struct rtsx_cr_option *option = &pcr->option;
+	u8 val = 0;
+
+	if (pcr->aspm_enabled == enable)
+		return;
+
+	if (option->dev_aspm_mode == DEV_ASPM_DYNAMIC) {
+		val = 0;
+		rtsx_pci_update_cfg_byte(pcr, pcr->pcie_cap + PCI_EXP_LNKCTL,
+					 ASPM_MASK_NEG, val);
+	} else if (option->dev_aspm_mode == DEV_ASPM_BACKDOOR) {
+		u8 mask = FORCE_ASPM_VAL_MASK | FORCE_ASPM_CTL0;
+
+		val = 0;
+		rtsx_pci_update_cfg_byte(pcr, pcr->pcie_cap + PCI_EXP_LNKCTL,
+					 ASPM_MASK_NEG, val);
+		val = FORCE_ASPM_CTL0;
+		rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, mask, val);
+	}
+	rtsx_pci_write_register(pcr, SD_CFG1, SD_ASYNC_FIFO_NOT_RST, 0);
+	udelay(10);
+	pcr->aspm_enabled = enable;
+}
+
+static void rts5261_set_aspm(struct rtsx_pcr *pcr, bool enable)
+{
+	if (enable)
+		rts5261_enable_aspm(pcr, true);
+	else
+		rts5261_disable_aspm(pcr, false);
+}
+
+static void rts5261_set_l1off_cfg_sub_d0(struct rtsx_pcr *pcr, int active)
+{
+	struct rtsx_cr_option *option = &pcr->option;
+	int aspm_L1_1, aspm_L1_2;
+	u8 val = 0;
+
+	aspm_L1_1 = rtsx_check_dev_flag(pcr, ASPM_L1_1_EN);
+	aspm_L1_2 = rtsx_check_dev_flag(pcr, ASPM_L1_2_EN);
+
+	if (active) {
+		/* run, latency: 60us */
+		if (aspm_L1_1)
+			val = option->ltr_l1off_snooze_sspwrgate;
+	} else {
+		/* l1off, latency: 300us */
+		if (aspm_L1_2)
+			val = option->ltr_l1off_sspwrgate;
+	}
+
+	rtsx_set_l1off_sub(pcr, val);
+}
+
+static const struct pcr_ops rts5261_pcr_ops = {
+	.fetch_vendor_settings = rtsx5261_fetch_vendor_settings,
+	.turn_on_led = rts5261_turn_on_led,
+	.turn_off_led = rts5261_turn_off_led,
+	.extra_init_hw = rts5261_extra_init_hw,
+	.enable_auto_blink = rts5261_enable_auto_blink,
+	.disable_auto_blink = rts5261_disable_auto_blink,
+	.card_power_on = rts5261_card_power_on,
+	.card_power_off = rts5261_card_power_off,
+	.switch_output_voltage = rts5261_switch_output_voltage,
+	.force_power_down = rts5261_force_power_down,
+	.stop_cmd = rts5261_stop_cmd,
+	.set_aspm = rts5261_set_aspm,
+	.set_l1off_cfg_sub_d0 = rts5261_set_l1off_cfg_sub_d0,
+	.enable_ocp = rts5261_enable_ocp,
+	.disable_ocp = rts5261_disable_ocp,
+	.init_ocp = rts5261_init_ocp,
+	.process_ocp = rts5261_process_ocp,
+	.clear_ocpstat = rts5261_clear_ocpstat,
+};
+
+static inline u8 double_ssc_depth(u8 depth)
+{
+	return ((depth > 1) ? (depth - 1) : depth);
+}
+
+int rts5261_pci_switch_clock(struct rtsx_pcr *pcr, unsigned int card_clock,
+		u8 ssc_depth, bool initial_mode, bool double_clk, bool vpclk)
+{
+	int err, clk;
+	u8 n, clk_divider, mcu_cnt, div;
+	static const u8 depth[] = {
+		[RTSX_SSC_DEPTH_4M] = RTS5261_SSC_DEPTH_4M,
+		[RTSX_SSC_DEPTH_2M] = RTS5261_SSC_DEPTH_2M,
+		[RTSX_SSC_DEPTH_1M] = RTS5261_SSC_DEPTH_1M,
+		[RTSX_SSC_DEPTH_500K] = RTS5261_SSC_DEPTH_512K,
+	};
+
+	if (initial_mode) {
+		/* We use 250k(around) here, in initial stage */
+		clk_divider = SD_CLK_DIVIDE_128;
+		card_clock = 30000000;
+	} else {
+		clk_divider = SD_CLK_DIVIDE_0;
+	}
+	err = rtsx_pci_write_register(pcr, SD_CFG1,
+			SD_CLK_DIVIDE_MASK, clk_divider);
+	if (err < 0)
+		return err;
+
+	card_clock /= 1000000;
+	pcr_dbg(pcr, "Switch card clock to %dMHz\n", card_clock);
+
+	clk = card_clock;
+	if (!initial_mode && double_clk)
+		clk = card_clock * 2;
+	pcr_dbg(pcr, "Internal SSC clock: %dMHz (cur_clock = %d)\n",
+		clk, pcr->cur_clock);
+
+	if (clk == pcr->cur_clock)
+		return 0;
+
+	if (pcr->ops->conv_clk_and_div_n)
+		n = (u8)pcr->ops->conv_clk_and_div_n(clk, CLK_TO_DIV_N);
+	else
+		n = (u8)(clk - 4);
+	if ((clk <= 4) || (n > 396))
+		return -EINVAL;
+
+	mcu_cnt = (u8)(125/clk + 3);
+	if (mcu_cnt > 15)
+		mcu_cnt = 15;
+
+	div = CLK_DIV_1;
+	while ((n < MIN_DIV_N_PCR - 4) && (div < CLK_DIV_8)) {
+		if (pcr->ops->conv_clk_and_div_n) {
+			int dbl_clk = pcr->ops->conv_clk_and_div_n(n,
+					DIV_N_TO_CLK) * 2;
+			n = (u8)pcr->ops->conv_clk_and_div_n(dbl_clk,
+					CLK_TO_DIV_N);
+		} else {
+			n = (n + 4) * 2 - 4;
+		}
+		div++;
+	}
+
+	n = (n / 2);
+	pcr_dbg(pcr, "n = %d, div = %d\n", n, div);
+
+	ssc_depth = depth[ssc_depth];
+	if (double_clk)
+		ssc_depth = double_ssc_depth(ssc_depth);
+
+	if (ssc_depth) {
+		if (div == CLK_DIV_2) {
+			if (ssc_depth > 1)
+				ssc_depth -= 1;
+			else
+				ssc_depth = RTS5261_SSC_DEPTH_8M;
+		} else if (div == CLK_DIV_4) {
+			if (ssc_depth > 2)
+				ssc_depth -= 2;
+			else
+				ssc_depth = RTS5261_SSC_DEPTH_8M;
+		} else if (div == CLK_DIV_8) {
+			if (ssc_depth > 3)
+				ssc_depth -= 3;
+			else
+				ssc_depth = RTS5261_SSC_DEPTH_8M;
+		}
+	} else {
+		ssc_depth = 0;
+	}
+	pcr_dbg(pcr, "ssc_depth = %d\n", ssc_depth);
+
+	rtsx_pci_init_cmd(pcr);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CLK_CTL,
+				CLK_LOW_FREQ, CLK_LOW_FREQ);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CLK_DIV,
+			0xFF, (div << 4) | mcu_cnt);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL1, SSC_RSTB, 0);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL2,
+			SSC_DEPTH_MASK, ssc_depth);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_DIV_N_0, 0xFF, n);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL1, SSC_RSTB, SSC_RSTB);
+	if (vpclk) {
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD_VPCLK0_CTL,
+				PHASE_NOT_RESET, 0);
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD_VPCLK1_CTL,
+				PHASE_NOT_RESET, 0);
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD_VPCLK0_CTL,
+				PHASE_NOT_RESET, PHASE_NOT_RESET);
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD_VPCLK1_CTL,
+				PHASE_NOT_RESET, PHASE_NOT_RESET);
+	}
+
+	err = rtsx_pci_send_cmd(pcr, 2000);
+	if (err < 0)
+		return err;
+
+	/* Wait SSC clock stable */
+	udelay(SSC_CLOCK_STABLE_WAIT);
+	err = rtsx_pci_write_register(pcr, CLK_CTL, CLK_LOW_FREQ, 0);
+	if (err < 0)
+		return err;
+
+	pcr->cur_clock = clk;
+	return 0;
+
+}
+
+void rts5261_init_params(struct rtsx_pcr *pcr)
+{
+	struct rtsx_cr_option *option = &pcr->option;
+	struct rtsx_hw_param *hw_param = &pcr->hw_param;
+
+	pcr->extra_caps = EXTRA_CAPS_SD_SDR50 | EXTRA_CAPS_SD_SDR104;
+	pcr->num_slots = 1;
+	pcr->ops = &rts5261_pcr_ops;
+
+	pcr->flags = 0;
+	pcr->card_drive_sel = RTSX_CARD_DRIVE_DEFAULT;
+	pcr->sd30_drive_sel_1v8 = CFG_DRIVER_TYPE_B;
+	pcr->sd30_drive_sel_3v3 = CFG_DRIVER_TYPE_B;
+	pcr->aspm_en = ASPM_L1_EN;
+	pcr->tx_initial_phase = SET_CLOCK_PHASE(20, 27, 16);
+	pcr->rx_initial_phase = SET_CLOCK_PHASE(24, 6, 5);
+
+	pcr->ic_version = rts5261_get_ic_version(pcr);
+	pcr->sd_pull_ctl_enable_tbl = rts5261_sd_pull_ctl_enable_tbl;
+	pcr->sd_pull_ctl_disable_tbl = rts5261_sd_pull_ctl_disable_tbl;
+
+	pcr->reg_pm_ctrl3 = RTS5261_AUTOLOAD_CFG3;
+
+	option->dev_flags = (LTR_L1SS_PWR_GATE_CHECK_CARD_EN
+				| LTR_L1SS_PWR_GATE_EN);
+	option->ltr_en = true;
+
+	/* init latency of active, idle, L1OFF to 60us, 300us, 3ms */
+	option->ltr_active_latency = LTR_ACTIVE_LATENCY_DEF;
+	option->ltr_idle_latency = LTR_IDLE_LATENCY_DEF;
+	option->ltr_l1off_latency = LTR_L1OFF_LATENCY_DEF;
+	option->l1_snooze_delay = L1_SNOOZE_DELAY_DEF;
+	option->ltr_l1off_sspwrgate = 0x7F;
+	option->ltr_l1off_snooze_sspwrgate = 0x78;
+	option->dev_aspm_mode = DEV_ASPM_DYNAMIC;
+
+	option->ocp_en = 1;
+	hw_param->interrupt_en |= SD_OC_INT_EN;
+	hw_param->ocp_glitch =  SD_OCP_GLITCH_800U;
+	option->sd_800mA_ocp_thd =  RTS5261_LDO1_OCP_THD_1040;
+}
diff --git a/drivers/misc/cardreader/rts5261.h b/drivers/misc/cardreader/rts5261.h
new file mode 100644
index 000000000000..ebfdd236a553
--- /dev/null
+++ b/drivers/misc/cardreader/rts5261.h
@@ -0,0 +1,233 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Driver for Realtek PCI-Express card reader
+ *
+ * Copyright(c) 2018-2019 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * Author:
+ *   Rui FENG <rui_feng@realsil.com.cn>
+ *   Wei WANG <wei_wang@realsil.com.cn>
+ */
+#ifndef RTS5261_H
+#define RTS5261_H
+
+/*New add*/
+#define rts5261_vendor_setting_valid(reg)	((reg) & 0x010000)
+#define rts5261_reg_to_aspm(reg)		(((reg) >> 28) ^ 0x03)
+#define rts5261_reg_check_reverse_socket(reg)	((reg) & 0x04)
+#define rts5261_reg_to_card_drive_sel(reg)	((((reg) >> 6) & 0x01) << 6)
+#define rts5261_reg_to_sd30_drive_sel_1v8(reg)	(((reg) >> 22) ^ 0x03)
+#define rts5261_reg_to_sd30_drive_sel_3v3(reg)	(((reg) >> 16) ^ 0x03)
+
+
+#define RTS5261_AUTOLOAD_CFG0		0xFF7B
+#define RTS5261_AUTOLOAD_CFG1		0xFF7C
+#define RTS5261_AUTOLOAD_CFG2		0xFF7D
+#define RTS5261_AUTOLOAD_CFG3		0xFF7E
+#define RTS5261_AUTOLOAD_CFG4		0xFF7F
+#define RTS5261_FORCE_PRSNT_LOW		(1 << 6)
+#define RTS5261_AUX_CLK_16M_EN		(1 << 5)
+
+#define RTS5261_REG_VREF		0xFE97
+#define RTS5261_PWD_SUSPND_EN		(1 << 4)
+
+#define RTS5261_PAD_H3L1		0xFF79
+#define PAD_GPIO_H3L1			(1 << 3)
+
+/* SSC_CTL2 0xFC12 */
+#define RTS5261_SSC_DEPTH_MASK		0x07
+#define RTS5261_SSC_DEPTH_DISALBE	0x00
+#define RTS5261_SSC_DEPTH_8M		0x01
+#define RTS5261_SSC_DEPTH_4M		0x02
+#define RTS5261_SSC_DEPTH_2M		0x03
+#define RTS5261_SSC_DEPTH_1M		0x04
+#define RTS5261_SSC_DEPTH_512K		0x05
+#define RTS5261_SSC_DEPTH_256K		0x06
+#define RTS5261_SSC_DEPTH_128K		0x07
+
+/* efuse control register*/
+#define RTS5261_EFUSE_CTL		0xFC30
+#define RTS5261_EFUSE_ENABLE		0x80
+/* EFUSE_MODE: 0=READ 1=PROGRAM */
+#define RTS5261_EFUSE_MODE_MASK		0x40
+#define RTS5261_EFUSE_PROGRAM		0x40
+
+#define RTS5261_EFUSE_ADDR		0xFC31
+#define	RTS5261_EFUSE_ADDR_MASK		0x3F
+
+#define RTS5261_EFUSE_WRITE_DATA	0xFC32
+#define RTS5261_EFUSE_READ_DATA		0xFC34
+
+/* DMACTL 0xFE2C */
+#define RTS5261_DMA_PACK_SIZE_MASK	0xF0
+
+/* FW config info register */
+#define RTS5261_FW_CFG_INFO0		0xFF50
+#define RTS5261_FW_EXPRESS_TEST_MASK	(0x01<<0)
+#define RTS5261_FW_EA_MODE_MASK		(0x01<<5)
+
+/* FW config register */
+#define RTS5261_FW_CFG0			0xFF54
+#define RTS5261_FW_ENTER_EXPRESS	(0x01<<0)
+
+#define RTS5261_FW_CFG1			0xFF55
+#define RTS5261_SYS_CLK_SEL_MCU_CLK	(0x01<<7)
+#define RTS5261_CRC_CLK_SEL_MCU_CLK	(0x01<<6)
+#define RTS5261_FAKE_MCU_CLOCK_GATING	(0x01<<5)
+/*MCU_bus_mode_sel: 0=real 8051 1=fake mcu*/
+#define RTS5261_MCU_BUS_SEL_MASK	(0x01<<4)
+/*MCU_clock_sel:VerA 00=aux16M 01=aux400K 1x=REFCLK100M*/
+/*MCU_clock_sel:VerB 00=aux400K 01=aux16M 10=REFCLK100M*/
+#define RTS5261_MCU_CLOCK_SEL_MASK	(0x03<<2)
+#define RTS5261_MCU_CLOCK_SEL_16M	(0x01<<2)
+#define RTS5261_MCU_CLOCK_GATING	(0x01<<1)
+#define RTS5261_DRIVER_ENABLE_FW	(0x01<<0)
+
+/* FW status register */
+#define RTS5261_FW_STATUS		0xFF56
+#define RTS5261_EXPRESS_LINK_FAIL_MASK	(0x01<<7)
+
+/* FW control register */
+#define RTS5261_FW_CTL			0xFF5F
+#define RTS5261_INFORM_RTD3_COLD	(0x01<<5)
+
+#define RTS5261_REG_FPDCTL		0xFF60
+
+#define RTS5261_REG_LDO12_CFG		0xFF6E
+#define RTS5261_LDO12_VO_TUNE_MASK	(0x07<<1)
+#define RTS5261_LDO12_115		(0x03<<1)
+#define RTS5261_LDO12_120		(0x04<<1)
+#define RTS5261_LDO12_125		(0x05<<1)
+#define RTS5261_LDO12_130		(0x06<<1)
+#define RTS5261_LDO12_135		(0x07<<1)
+
+/* LDO control register */
+#define RTS5261_CARD_PWR_CTL		0xFD50
+#define RTS5261_SD_CLK_ISO		(0x01<<7)
+#define RTS5261_PAD_SD_DAT_FW_CTRL	(0x01<<6)
+#define RTS5261_PUPDC			(0x01<<5)
+#define RTS5261_SD_CMD_ISO		(0x01<<4)
+#define RTS5261_SD_DAT_ISO_MASK		(0x0F<<0)
+
+#define RTS5261_LDO1233318_POW_CTL	0xFF70
+#define RTS5261_LDO3318_POWERON		(0x01<<3)
+#define RTS5261_LDO3_POWERON		(0x01<<2)
+#define RTS5261_LDO2_POWERON		(0x01<<1)
+#define RTS5261_LDO1_POWERON		(0x01<<0)
+#define RTS5261_LDO_POWERON_MASK	(0x0F<<0)
+
+#define RTS5261_DV3318_CFG		0xFF71
+#define RTS5261_DV3318_TUNE_MASK	(0x07<<4)
+#define RTS5261_DV3318_18		(0x02<<4)
+#define RTS5261_DV3318_19		(0x04<<4)
+#define RTS5261_DV3318_33		(0x07<<4)
+
+#define RTS5261_LDO1_CFG0		0xFF72
+#define RTS5261_LDO1_OCP_THD_MASK	(0x07<<5)
+#define RTS5261_LDO1_OCP_EN		(0x01<<4)
+#define RTS5261_LDO1_OCP_LMT_THD_MASK	(0x03<<2)
+#define RTS5261_LDO1_OCP_LMT_EN		(0x01<<1)
+
+/* CRD6603-433 190319 request changed */
+#define RTS5261_LDO1_OCP_THD_740	(0x00<<5)
+#define RTS5261_LDO1_OCP_THD_800	(0x01<<5)
+#define RTS5261_LDO1_OCP_THD_860	(0x02<<5)
+#define RTS5261_LDO1_OCP_THD_920	(0x03<<5)
+#define RTS5261_LDO1_OCP_THD_980	(0x04<<5)
+#define RTS5261_LDO1_OCP_THD_1040	(0x05<<5)
+#define RTS5261_LDO1_OCP_THD_1100	(0x06<<5)
+#define RTS5261_LDO1_OCP_THD_1160	(0x07<<5)
+
+#define RTS5261_LDO1_LMT_THD_450	(0x00<<2)
+#define RTS5261_LDO1_LMT_THD_1000	(0x01<<2)
+#define RTS5261_LDO1_LMT_THD_1500	(0x02<<2)
+#define RTS5261_LDO1_LMT_THD_2000	(0x03<<2)
+
+#define RTS5261_LDO1_CFG1		0xFF73
+#define RTS5261_LDO1_TUNE_MASK		(0x07<<1)
+#define RTS5261_LDO1_18			(0x05<<1)
+#define RTS5261_LDO1_33			(0x07<<1)
+#define RTS5261_LDO1_PWD_MASK		(0x01<<0)
+
+#define RTS5261_LDO2_CFG0		0xFF74
+#define RTS5261_LDO2_OCP_THD_MASK	(0x07<<5)
+#define RTS5261_LDO2_OCP_EN		(0x01<<4)
+#define RTS5261_LDO2_OCP_LMT_THD_MASK	(0x03<<2)
+#define RTS5261_LDO2_OCP_LMT_EN		(0x01<<1)
+
+#define RTS5261_LDO2_OCP_THD_620	(0x00<<5)
+#define RTS5261_LDO2_OCP_THD_650	(0x01<<5)
+#define RTS5261_LDO2_OCP_THD_680	(0x02<<5)
+#define RTS5261_LDO2_OCP_THD_720	(0x03<<5)
+#define RTS5261_LDO2_OCP_THD_750	(0x04<<5)
+#define RTS5261_LDO2_OCP_THD_780	(0x05<<5)
+#define RTS5261_LDO2_OCP_THD_810	(0x06<<5)
+#define RTS5261_LDO2_OCP_THD_840	(0x07<<5)
+
+#define RTS5261_LDO2_CFG1		0xFF75
+#define RTS5261_LDO2_TUNE_MASK		(0x07<<1)
+#define RTS5261_LDO2_18			(0x05<<1)
+#define RTS5261_LDO2_33			(0x07<<1)
+#define RTS5261_LDO2_PWD_MASK		(0x01<<0)
+
+#define RTS5261_LDO3_CFG0		0xFF76
+#define RTS5261_LDO3_OCP_THD_MASK	(0x07<<5)
+#define RTS5261_LDO3_OCP_EN		(0x01<<4)
+#define RTS5261_LDO3_OCP_LMT_THD_MASK	(0x03<<2)
+#define RTS5261_LDO3_OCP_LMT_EN		(0x01<<1)
+
+#define RTS5261_LDO3_OCP_THD_620	(0x00<<5)
+#define RTS5261_LDO3_OCP_THD_650	(0x01<<5)
+#define RTS5261_LDO3_OCP_THD_680	(0x02<<5)
+#define RTS5261_LDO3_OCP_THD_720	(0x03<<5)
+#define RTS5261_LDO3_OCP_THD_750	(0x04<<5)
+#define RTS5261_LDO3_OCP_THD_780	(0x05<<5)
+#define RTS5261_LDO3_OCP_THD_810	(0x06<<5)
+#define RTS5261_LDO3_OCP_THD_840	(0x07<<5)
+
+#define RTS5261_LDO3_CFG1		0xFF77
+#define RTS5261_LDO3_TUNE_MASK		(0x07<<1)
+#define RTS5261_LDO3_18			(0x05<<1)
+#define RTS5261_LDO3_33			(0x07<<1)
+#define RTS5261_LDO3_PWD_MASK		(0x01<<0)
+
+#define RTS5261_REG_PME_FORCE_CTL	0xFF78
+#define FORCE_PM_CONTROL		0x20
+#define FORCE_PM_VALUE			0x10
+#define REG_EFUSE_BYPASS		0x08
+#define REG_EFUSE_POR			0x04
+#define REG_EFUSE_POWER_MASK		0x03
+#define REG_EFUSE_POWERON		0x03
+#define REG_EFUSE_POWEROFF		0x00
+
+
+/* Single LUN, support SD/SD EXPRESS */
+#define DEFAULT_SINGLE		0
+#define SD_LUN			1
+#define SD_EXPRESS_LUN		2
+
+/* For Change_FPGA_SSCClock Function */
+#define MULTIPLY_BY_1    0x00
+#define MULTIPLY_BY_2    0x01
+#define MULTIPLY_BY_3    0x02
+#define MULTIPLY_BY_4    0x03
+#define MULTIPLY_BY_5    0x04
+#define MULTIPLY_BY_6    0x05
+#define MULTIPLY_BY_7    0x06
+#define MULTIPLY_BY_8    0x07
+#define MULTIPLY_BY_9    0x08
+#define MULTIPLY_BY_10   0x09
+
+#define DIVIDE_BY_2      0x01
+#define DIVIDE_BY_3      0x02
+#define DIVIDE_BY_4      0x03
+#define DIVIDE_BY_5      0x04
+#define DIVIDE_BY_6      0x05
+#define DIVIDE_BY_7      0x06
+#define DIVIDE_BY_8      0x07
+#define DIVIDE_BY_9      0x08
+#define DIVIDE_BY_10     0x09
+
+int rts5261_pci_switch_clock(struct rtsx_pcr *pcr, unsigned int card_clock,
+		u8 ssc_depth, bool initial_mode, bool double_clk, bool vpclk);
+
+#endif /* RTS5261_H */
diff --git a/drivers/misc/cardreader/rtsx_pcr.c b/drivers/misc/cardreader/rtsx_pcr.c
index b4a66b64f742..fd7b2167103d 100644
--- a/drivers/misc/cardreader/rtsx_pcr.c
+++ b/drivers/misc/cardreader/rtsx_pcr.c
@@ -22,6 +22,7 @@
 #include <asm/unaligned.h>
 
 #include "rtsx_pcr.h"
+#include "rts5261.h"
 
 static bool msi_en = true;
 module_param(msi_en, bool, S_IRUGO | S_IWUSR);
@@ -34,9 +35,6 @@ static struct mfd_cell rtsx_pcr_cells[] = {
 	[RTSX_SD_CARD] = {
 		.name = DRV_NAME_RTSX_PCI_SDMMC,
 	},
-	[RTSX_MS_CARD] = {
-		.name = DRV_NAME_RTSX_PCI_MS,
-	},
 };
 
 static const struct pci_device_id rtsx_pci_ids[] = {
@@ -51,6 +49,7 @@ static const struct pci_device_id rtsx_pci_ids[] = {
 	{ PCI_DEVICE(0x10EC, 0x524A), PCI_CLASS_OTHERS << 16, 0xFF0000 },
 	{ PCI_DEVICE(0x10EC, 0x525A), PCI_CLASS_OTHERS << 16, 0xFF0000 },
 	{ PCI_DEVICE(0x10EC, 0x5260), PCI_CLASS_OTHERS << 16, 0xFF0000 },
+	{ PCI_DEVICE(0x10EC, 0x5261), PCI_CLASS_OTHERS << 16, 0xFF0000 },
 	{ 0, }
 };
 
@@ -438,8 +437,16 @@ static void rtsx_pci_add_sg_tbl(struct rtsx_pcr *pcr,
 
 	if (end)
 		option |= RTSX_SG_END;
-	val = ((u64)addr << 32) | ((u64)len << 12) | option;
 
+	if (PCI_PID(pcr) == PID_5261) {
+		if (len > 0xFFFF)
+			val = ((u64)addr << 32) | (((u64)len & 0xFFFF) << 16)
+				| (((u64)len >> 16) << 6) | option;
+		else
+			val = ((u64)addr << 32) | ((u64)len << 16) | option;
+	} else {
+		val = ((u64)addr << 32) | ((u64)len << 12) | option;
+	}
 	put_unaligned_le64(val, ptr);
 	pcr->sgi++;
 }
@@ -684,7 +691,6 @@ int rtsx_pci_card_pull_ctl_disable(struct rtsx_pcr *pcr, int card)
 	else
 		return -EINVAL;
 
-
 	return rtsx_pci_set_pull_ctl(pcr, tbl);
 }
 EXPORT_SYMBOL_GPL(rtsx_pci_card_pull_ctl_disable);
@@ -735,6 +741,10 @@ int rtsx_pci_switch_clock(struct rtsx_pcr *pcr, unsigned int card_clock,
 		[RTSX_SSC_DEPTH_250K] = SSC_DEPTH_250K,
 	};
 
+	if (PCI_PID(pcr) == PID_5261)
+		return rts5261_pci_switch_clock(pcr, card_clock,
+				ssc_depth, initial_mode, double_clk, vpclk);
+
 	if (initial_mode) {
 		/* We use 250k(around) here, in initial stage */
 		clk_divider = SD_CLK_DIVIDE_128;
@@ -1253,7 +1263,15 @@ static int rtsx_pci_init_hw(struct rtsx_pcr *pcr)
 	rtsx_pci_enable_bus_int(pcr);
 
 	/* Power on SSC */
-	err = rtsx_pci_write_register(pcr, FPDCTL, SSC_POWER_DOWN, 0);
+	if (PCI_PID(pcr) == PID_5261) {
+		/* Gating real mcu clock */
+		err = rtsx_pci_write_register(pcr, RTS5261_FW_CFG1,
+			RTS5261_MCU_CLOCK_GATING, 0);
+		err = rtsx_pci_write_register(pcr, RTS5261_REG_FPDCTL,
+			SSC_POWER_DOWN, 0);
+	} else {
+		err = rtsx_pci_write_register(pcr, FPDCTL, SSC_POWER_DOWN, 0);
+	}
 	if (err < 0)
 		return err;
 
@@ -1283,7 +1301,12 @@ static int rtsx_pci_init_hw(struct rtsx_pcr *pcr)
 	/* Enable SSC Clock */
 	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL1,
 			0xFF, SSC_8X_EN | SSC_SEL_4M);
-	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL2, 0xFF, 0x12);
+	if (PCI_PID(pcr) == PID_5261)
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL2, 0xFF,
+			RTS5261_SSC_DEPTH_2M);
+	else
+		rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SSC_CTL2, 0xFF, 0x12);
+
 	/* Disable cd_pwr_save */
 	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CHANGE_LINK_STATE, 0x16, 0x10);
 	/* Clear Link Ready Interrupt */
@@ -1314,6 +1337,7 @@ static int rtsx_pci_init_hw(struct rtsx_pcr *pcr)
 	case PID_524A:
 	case PID_525A:
 	case PID_5260:
+	case PID_5261:
 		rtsx_pci_write_register(pcr, PM_CLK_FORCE_CTL, 1, 1);
 		break;
 	default:
@@ -1393,9 +1417,14 @@ static int rtsx_pci_init_chip(struct rtsx_pcr *pcr)
 	case 0x5286:
 		rtl8402_init_params(pcr);
 		break;
+
 	case 0x5260:
 		rts5260_init_params(pcr);
 		break;
+
+	case 0x5261:
+		rts5261_init_params(pcr);
+		break;
 	}
 
 	pcr_dbg(pcr, "PID: 0x%04x, IC version: 0x%02x\n",
diff --git a/drivers/misc/cardreader/rtsx_pcr.h b/drivers/misc/cardreader/rtsx_pcr.h
index 98f729263dc1..ed391df52f4f 100644
--- a/drivers/misc/cardreader/rtsx_pcr.h
+++ b/drivers/misc/cardreader/rtsx_pcr.h
@@ -53,6 +53,7 @@ void rts524a_init_params(struct rtsx_pcr *pcr);
 void rts525a_init_params(struct rtsx_pcr *pcr);
 void rtl8411b_init_params(struct rtsx_pcr *pcr);
 void rts5260_init_params(struct rtsx_pcr *pcr);
+void rts5261_init_params(struct rtsx_pcr *pcr);
 
 static inline u8 map_sd_drive(int idx)
 {
diff --git a/drivers/misc/cxl/flash.c b/drivers/misc/cxl/flash.c
index 4d6836f19489..cb9cca35a226 100644
--- a/drivers/misc/cxl/flash.c
+++ b/drivers/misc/cxl/flash.c
@@ -473,12 +473,6 @@ static long device_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		return -EINVAL;
 }
 
-static long device_compat_ioctl(struct file *file, unsigned int cmd,
-				unsigned long arg)
-{
-	return device_ioctl(file, cmd, arg);
-}
-
 static int device_close(struct inode *inode, struct file *file)
 {
 	struct cxl *adapter = file->private_data;
@@ -514,7 +508,7 @@ static const struct file_operations fops = {
 	.owner		= THIS_MODULE,
 	.open		= device_open,
 	.unlocked_ioctl	= device_ioctl,
-	.compat_ioctl	= device_compat_ioctl,
+	.compat_ioctl	= compat_ptr_ioctl,
 	.release	= device_close,
 };
 
diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c
index 2cccd82a3106..0681d5fdd538 100644
--- a/drivers/misc/eeprom/at24.c
+++ b/drivers/misc/eeprom/at24.c
@@ -716,9 +716,12 @@ static int at24_probe(struct i2c_client *client)
 		return -ENODEV;
 	}
 
-	dev_info(dev, "%u byte %s EEPROM, %s, %u bytes/write\n",
-		 byte_len, client->name,
-		 writable ? "writable" : "read-only", at24->write_max);
+	if (writable)
+		dev_info(dev, "%u byte %s EEPROM, writable, %u bytes/write\n",
+			 byte_len, client->name, at24->write_max);
+	else
+		dev_info(dev, "%u byte %s EEPROM, read-only\n",
+			 byte_len, client->name);
 
 	return 0;
 }
diff --git a/drivers/misc/eeprom/eeprom.c b/drivers/misc/eeprom/eeprom.c
index 2cfe3d4ae144..226b5efa6a77 100644
--- a/drivers/misc/eeprom/eeprom.c
+++ b/drivers/misc/eeprom/eeprom.c
@@ -175,6 +175,10 @@ static int eeprom_probe(struct i2c_client *client,
 		}
 	}
 
+	/* Let the users know they are using deprecated driver */
+	dev_notice(&client->dev,
+		   "eeprom driver is deprecated, please use at24 instead\n");
+
 	/* create the sysfs eeprom file */
 	return sysfs_create_bin_file(&client->dev.kobj, &eeprom_attr);
 }
diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c
index 1b1a794d639d..ae4ee27a63c4 100644
--- a/drivers/misc/fastrpc.c
+++ b/drivers/misc/fastrpc.c
@@ -32,8 +32,9 @@
 #define FASTRPC_CTX_MAX (256)
 #define FASTRPC_INIT_HANDLE	1
 #define FASTRPC_CTXID_MASK (0xFF0)
-#define INIT_FILELEN_MAX (64 * 1024 * 1024)
+#define INIT_FILELEN_MAX (2 * 1024 * 1024)
 #define FASTRPC_DEVICE_NAME	"fastrpc"
+#define ADSP_MMAP_ADD_PAGES 0x1000
 
 /* Retrives number of input buffers from the scalars parameter */
 #define REMOTE_SCALARS_INBUFS(sc)	(((sc) >> 16) & 0x0ff)
@@ -66,6 +67,8 @@
 /* Remote Method id table */
 #define FASTRPC_RMID_INIT_ATTACH	0
 #define FASTRPC_RMID_INIT_RELEASE	1
+#define FASTRPC_RMID_INIT_MMAP		4
+#define FASTRPC_RMID_INIT_MUNMAP	5
 #define FASTRPC_RMID_INIT_CREATE	6
 #define FASTRPC_RMID_INIT_CREATE_ATTR	7
 #define FASTRPC_RMID_INIT_CREATE_STATIC	8
@@ -89,6 +92,23 @@ struct fastrpc_remote_arg {
 	u64 len;
 };
 
+struct fastrpc_mmap_rsp_msg {
+	u64 vaddr;
+};
+
+struct fastrpc_mmap_req_msg {
+	s32 pgid;
+	u32 flags;
+	u64 vaddr;
+	s32 num;
+};
+
+struct fastrpc_munmap_req_msg {
+	s32 pgid;
+	u64 vaddr;
+	u64 size;
+};
+
 struct fastrpc_msg {
 	int pid;		/* process group id */
 	int tid;		/* thread id */
@@ -123,6 +143,9 @@ struct fastrpc_buf {
 	/* Lock for dma buf attachments */
 	struct mutex lock;
 	struct list_head attachments;
+	/* mmap support */
+	struct list_head node; /* list of user requested mmaps */
+	uintptr_t raddr;
 };
 
 struct fastrpc_dma_buf_attachment {
@@ -192,6 +215,7 @@ struct fastrpc_user {
 	struct list_head user;
 	struct list_head maps;
 	struct list_head pending;
+	struct list_head mmaps;
 
 	struct fastrpc_channel_ctx *cctx;
 	struct fastrpc_session_ctx *sctx;
@@ -269,6 +293,7 @@ static int fastrpc_buf_alloc(struct fastrpc_user *fl, struct device *dev,
 		return -ENOMEM;
 
 	INIT_LIST_HEAD(&buf->attachments);
+	INIT_LIST_HEAD(&buf->node);
 	mutex_init(&buf->lock);
 
 	buf->fl = fl;
@@ -276,6 +301,7 @@ static int fastrpc_buf_alloc(struct fastrpc_user *fl, struct device *dev,
 	buf->phys = 0;
 	buf->size = size;
 	buf->dev = dev;
+	buf->raddr = 0;
 
 	buf->virt = dma_alloc_coherent(dev, buf->size, (dma_addr_t *)&buf->phys,
 				       GFP_KERNEL);
@@ -934,8 +960,13 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl,  u32 kernel,
 	if (err)
 		goto bail;
 
-	/* Wait for remote dsp to respond or time out */
-	err = wait_for_completion_interruptible(&ctx->work);
+	if (kernel) {
+		if (!wait_for_completion_timeout(&ctx->work, 10 * HZ))
+			err = -ETIMEDOUT;
+	} else {
+		err = wait_for_completion_interruptible(&ctx->work);
+	}
+
 	if (err)
 		goto bail;
 
@@ -954,12 +985,13 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl,  u32 kernel,
 	}
 
 bail:
-	/* We are done with this compute context, remove it from pending list */
-	spin_lock(&fl->lock);
-	list_del(&ctx->node);
-	spin_unlock(&fl->lock);
-	fastrpc_context_put(ctx);
-
+	if (err != -ERESTARTSYS && err != -ETIMEDOUT) {
+		/* We are done with this compute context */
+		spin_lock(&fl->lock);
+		list_del(&ctx->node);
+		spin_unlock(&fl->lock);
+		fastrpc_context_put(ctx);
+	}
 	if (err)
 		dev_dbg(fl->sctx->dev, "Error: Invoke Failed %d\n", err);
 
@@ -1131,6 +1163,7 @@ static int fastrpc_device_release(struct inode *inode, struct file *file)
 	struct fastrpc_channel_ctx *cctx = fl->cctx;
 	struct fastrpc_invoke_ctx *ctx, *n;
 	struct fastrpc_map *map, *m;
+	struct fastrpc_buf *buf, *b;
 	unsigned long flags;
 
 	fastrpc_release_current_dsp_process(fl);
@@ -1152,6 +1185,11 @@ static int fastrpc_device_release(struct inode *inode, struct file *file)
 		fastrpc_map_put(map);
 	}
 
+	list_for_each_entry_safe(buf, b, &fl->mmaps, node) {
+		list_del(&buf->node);
+		fastrpc_buf_free(buf);
+	}
+
 	fastrpc_session_free(cctx, fl->sctx);
 	fastrpc_channel_ctx_put(cctx);
 
@@ -1180,6 +1218,7 @@ static int fastrpc_device_open(struct inode *inode, struct file *filp)
 	mutex_init(&fl->mutex);
 	INIT_LIST_HEAD(&fl->pending);
 	INIT_LIST_HEAD(&fl->maps);
+	INIT_LIST_HEAD(&fl->mmaps);
 	INIT_LIST_HEAD(&fl->user);
 	fl->tgid = current->tgid;
 	fl->cctx = cctx;
@@ -1285,6 +1324,148 @@ static int fastrpc_invoke(struct fastrpc_user *fl, char __user *argp)
 	return err;
 }
 
+static int fastrpc_req_munmap_impl(struct fastrpc_user *fl,
+				   struct fastrpc_req_munmap *req)
+{
+	struct fastrpc_invoke_args args[1] = { [0] = { 0 } };
+	struct fastrpc_buf *buf, *b;
+	struct fastrpc_munmap_req_msg req_msg;
+	struct device *dev = fl->sctx->dev;
+	int err;
+	u32 sc;
+
+	spin_lock(&fl->lock);
+	list_for_each_entry_safe(buf, b, &fl->mmaps, node) {
+		if ((buf->raddr == req->vaddrout) && (buf->size == req->size))
+			break;
+		buf = NULL;
+	}
+	spin_unlock(&fl->lock);
+
+	if (!buf) {
+		dev_err(dev, "mmap not in list\n");
+		return -EINVAL;
+	}
+
+	req_msg.pgid = fl->tgid;
+	req_msg.size = buf->size;
+	req_msg.vaddr = buf->raddr;
+
+	args[0].ptr = (u64) (uintptr_t) &req_msg;
+	args[0].length = sizeof(req_msg);
+
+	sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_MUNMAP, 1, 0);
+	err = fastrpc_internal_invoke(fl, true, FASTRPC_INIT_HANDLE, sc,
+				      &args[0]);
+	if (!err) {
+		dev_dbg(dev, "unmmap\tpt 0x%09lx OK\n", buf->raddr);
+		spin_lock(&fl->lock);
+		list_del(&buf->node);
+		spin_unlock(&fl->lock);
+		fastrpc_buf_free(buf);
+	} else {
+		dev_err(dev, "unmmap\tpt 0x%09lx ERROR\n", buf->raddr);
+	}
+
+	return err;
+}
+
+static int fastrpc_req_munmap(struct fastrpc_user *fl, char __user *argp)
+{
+	struct fastrpc_req_munmap req;
+
+	if (copy_from_user(&req, argp, sizeof(req)))
+		return -EFAULT;
+
+	return fastrpc_req_munmap_impl(fl, &req);
+}
+
+static int fastrpc_req_mmap(struct fastrpc_user *fl, char __user *argp)
+{
+	struct fastrpc_invoke_args args[3] = { [0 ... 2] = { 0 } };
+	struct fastrpc_buf *buf = NULL;
+	struct fastrpc_mmap_req_msg req_msg;
+	struct fastrpc_mmap_rsp_msg rsp_msg;
+	struct fastrpc_req_munmap req_unmap;
+	struct fastrpc_phy_page pages;
+	struct fastrpc_req_mmap req;
+	struct device *dev = fl->sctx->dev;
+	int err;
+	u32 sc;
+
+	if (copy_from_user(&req, argp, sizeof(req)))
+		return -EFAULT;
+
+	if (req.flags != ADSP_MMAP_ADD_PAGES) {
+		dev_err(dev, "flag not supported 0x%x\n", req.flags);
+		return -EINVAL;
+	}
+
+	if (req.vaddrin) {
+		dev_err(dev, "adding user allocated pages is not supported\n");
+		return -EINVAL;
+	}
+
+	err = fastrpc_buf_alloc(fl, fl->sctx->dev, req.size, &buf);
+	if (err) {
+		dev_err(dev, "failed to allocate buffer\n");
+		return err;
+	}
+
+	req_msg.pgid = fl->tgid;
+	req_msg.flags = req.flags;
+	req_msg.vaddr = req.vaddrin;
+	req_msg.num = sizeof(pages);
+
+	args[0].ptr = (u64) (uintptr_t) &req_msg;
+	args[0].length = sizeof(req_msg);
+
+	pages.addr = buf->phys;
+	pages.size = buf->size;
+
+	args[1].ptr = (u64) (uintptr_t) &pages;
+	args[1].length = sizeof(pages);
+
+	args[2].ptr = (u64) (uintptr_t) &rsp_msg;
+	args[2].length = sizeof(rsp_msg);
+
+	sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_MMAP, 2, 1);
+	err = fastrpc_internal_invoke(fl, true, FASTRPC_INIT_HANDLE, sc,
+				      &args[0]);
+	if (err) {
+		dev_err(dev, "mmap error (len 0x%08llx)\n", buf->size);
+		goto err_invoke;
+	}
+
+	/* update the buffer to be able to deallocate the memory on the DSP */
+	buf->raddr = (uintptr_t) rsp_msg.vaddr;
+
+	/* let the client know the address to use */
+	req.vaddrout = rsp_msg.vaddr;
+
+	spin_lock(&fl->lock);
+	list_add_tail(&buf->node, &fl->mmaps);
+	spin_unlock(&fl->lock);
+
+	if (copy_to_user((void __user *)argp, &req, sizeof(req))) {
+		/* unmap the memory and release the buffer */
+		req_unmap.vaddrout = buf->raddr;
+		req_unmap.size = buf->size;
+		fastrpc_req_munmap_impl(fl, &req_unmap);
+		return -EFAULT;
+	}
+
+	dev_dbg(dev, "mmap\t\tpt 0x%09lx OK [len 0x%08llx]\n",
+		buf->raddr, buf->size);
+
+	return 0;
+
+err_invoke:
+	fastrpc_buf_free(buf);
+
+	return err;
+}
+
 static long fastrpc_device_ioctl(struct file *file, unsigned int cmd,
 				 unsigned long arg)
 {
@@ -1305,6 +1486,12 @@ static long fastrpc_device_ioctl(struct file *file, unsigned int cmd,
 	case FASTRPC_IOCTL_ALLOC_DMA_BUFF:
 		err = fastrpc_dmabuf_alloc(fl, argp);
 		break;
+	case FASTRPC_IOCTL_MMAP:
+		err = fastrpc_req_mmap(fl, argp);
+		break;
+	case FASTRPC_IOCTL_MUNMAP:
+		err = fastrpc_req_munmap(fl, argp);
+		break;
 	default:
 		err = -ENOTTY;
 		break;
@@ -1430,8 +1617,8 @@ static int fastrpc_rpmsg_probe(struct rpmsg_device *rpdev)
 		return -ENOMEM;
 
 	data->miscdev.minor = MISC_DYNAMIC_MINOR;
-	data->miscdev.name = kasprintf(GFP_KERNEL, "fastrpc-%s",
-				domains[domain_id]);
+	data->miscdev.name = devm_kasprintf(rdev, GFP_KERNEL, "fastrpc-%s",
+					    domains[domain_id]);
 	data->miscdev.fops = &fastrpc_fops;
 	err = misc_register(&data->miscdev);
 	if (err)
diff --git a/drivers/misc/genwqe/card_dev.c b/drivers/misc/genwqe/card_dev.c
index 0e34c0568fed..040a0bda3125 100644
--- a/drivers/misc/genwqe/card_dev.c
+++ b/drivers/misc/genwqe/card_dev.c
@@ -1215,34 +1215,13 @@ static long genwqe_ioctl(struct file *filp, unsigned int cmd,
 	return rc;
 }
 
-#if defined(CONFIG_COMPAT)
-/**
- * genwqe_compat_ioctl() - Compatibility ioctl
- *
- * Called whenever a 32-bit process running under a 64-bit kernel
- * performs an ioctl on /dev/genwqe<n>_card.
- *
- * @filp:        file pointer.
- * @cmd:         command.
- * @arg:         user argument.
- * Return:       zero on success or negative number on failure.
- */
-static long genwqe_compat_ioctl(struct file *filp, unsigned int cmd,
-				unsigned long arg)
-{
-	return genwqe_ioctl(filp, cmd, arg);
-}
-#endif /* defined(CONFIG_COMPAT) */
-
 static const struct file_operations genwqe_fops = {
 	.owner		= THIS_MODULE,
 	.open		= genwqe_open,
 	.fasync		= genwqe_fasync,
 	.mmap		= genwqe_mmap,
 	.unlocked_ioctl	= genwqe_ioctl,
-#if defined(CONFIG_COMPAT)
-	.compat_ioctl   = genwqe_compat_ioctl,
-#endif
+	.compat_ioctl   = compat_ptr_ioctl,
 	.release	= genwqe_release,
 };
 
diff --git a/drivers/misc/habanalabs/command_submission.c b/drivers/misc/habanalabs/command_submission.c
index a9ac045dcfde..8850f475a413 100644
--- a/drivers/misc/habanalabs/command_submission.c
+++ b/drivers/misc/habanalabs/command_submission.c
@@ -65,6 +65,18 @@ static void cs_put(struct hl_cs *cs)
 	kref_put(&cs->refcount, cs_do_release);
 }
 
+static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job)
+{
+	/*
+	 * Patched CB is created for external queues jobs, and for H/W queues
+	 * jobs if the user CB was allocated by driver and MMU is disabled.
+	 */
+	return (job->queue_type == QUEUE_TYPE_EXT ||
+			(job->queue_type == QUEUE_TYPE_HW &&
+					job->is_kernel_allocated_cb &&
+					!hdev->mmu_enable));
+}
+
 /*
  * cs_parser - parse the user command submission
  *
@@ -91,11 +103,13 @@ static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
 	parser.patched_cb = NULL;
 	parser.user_cb = job->user_cb;
 	parser.user_cb_size = job->user_cb_size;
-	parser.ext_queue = job->ext_queue;
+	parser.queue_type = job->queue_type;
+	parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb;
 	job->patched_cb = NULL;
 
 	rc = hdev->asic_funcs->cs_parser(hdev, &parser);
-	if (job->ext_queue) {
+
+	if (is_cb_patched(hdev, job)) {
 		if (!rc) {
 			job->patched_cb = parser.patched_cb;
 			job->job_cb_size = parser.patched_cb_size;
@@ -124,7 +138,7 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job)
 {
 	struct hl_cs *cs = job->cs;
 
-	if (job->ext_queue) {
+	if (is_cb_patched(hdev, job)) {
 		hl_userptr_delete_list(hdev, &job->userptr_list);
 
 		/*
@@ -140,6 +154,19 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job)
 		}
 	}
 
+	/* For H/W queue jobs, if a user CB was allocated by driver and MMU is
+	 * enabled, the user CB isn't released in cs_parser() and thus should be
+	 * released here.
+	 */
+	if (job->queue_type == QUEUE_TYPE_HW &&
+			job->is_kernel_allocated_cb && hdev->mmu_enable) {
+		spin_lock(&job->user_cb->lock);
+		job->user_cb->cs_cnt--;
+		spin_unlock(&job->user_cb->lock);
+
+		hl_cb_put(job->user_cb);
+	}
+
 	/*
 	 * This is the only place where there can be multiple threads
 	 * modifying the list at the same time
@@ -150,7 +177,8 @@ static void free_job(struct hl_device *hdev, struct hl_cs_job *job)
 
 	hl_debugfs_remove_job(hdev, job);
 
-	if (job->ext_queue)
+	if (job->queue_type == QUEUE_TYPE_EXT ||
+			job->queue_type == QUEUE_TYPE_HW)
 		cs_put(cs);
 
 	kfree(job);
@@ -387,18 +415,13 @@ static void job_wq_completion(struct work_struct *work)
 	free_job(hdev, job);
 }
 
-static struct hl_cb *validate_queue_index(struct hl_device *hdev,
-					struct hl_cb_mgr *cb_mgr,
-					struct hl_cs_chunk *chunk,
-					bool *ext_queue)
+static int validate_queue_index(struct hl_device *hdev,
+				struct hl_cs_chunk *chunk,
+				enum hl_queue_type *queue_type,
+				bool *is_kernel_allocated_cb)
 {
 	struct asic_fixed_properties *asic = &hdev->asic_prop;
 	struct hw_queue_properties *hw_queue_prop;
-	u32 cb_handle;
-	struct hl_cb *cb;
-
-	/* Assume external queue */
-	*ext_queue = true;
 
 	hw_queue_prop = &asic->hw_queues_props[chunk->queue_index];
 
@@ -406,20 +429,29 @@ static struct hl_cb *validate_queue_index(struct hl_device *hdev,
 			(hw_queue_prop->type == QUEUE_TYPE_NA)) {
 		dev_err(hdev->dev, "Queue index %d is invalid\n",
 			chunk->queue_index);
-		return NULL;
+		return -EINVAL;
 	}
 
 	if (hw_queue_prop->driver_only) {
 		dev_err(hdev->dev,
 			"Queue index %d is restricted for the kernel driver\n",
 			chunk->queue_index);
-		return NULL;
-	} else if (hw_queue_prop->type == QUEUE_TYPE_INT) {
-		*ext_queue = false;
-		return (struct hl_cb *) (uintptr_t) chunk->cb_handle;
+		return -EINVAL;
 	}
 
-	/* Retrieve CB object */
+	*queue_type = hw_queue_prop->type;
+	*is_kernel_allocated_cb = !!hw_queue_prop->requires_kernel_cb;
+
+	return 0;
+}
+
+static struct hl_cb *get_cb_from_cs_chunk(struct hl_device *hdev,
+					struct hl_cb_mgr *cb_mgr,
+					struct hl_cs_chunk *chunk)
+{
+	struct hl_cb *cb;
+	u32 cb_handle;
+
 	cb_handle = (u32) (chunk->cb_handle >> PAGE_SHIFT);
 
 	cb = hl_cb_get(hdev, cb_mgr, cb_handle);
@@ -444,7 +476,8 @@ release_cb:
 	return NULL;
 }
 
-struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, bool ext_queue)
+struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
+		enum hl_queue_type queue_type, bool is_kernel_allocated_cb)
 {
 	struct hl_cs_job *job;
 
@@ -452,12 +485,14 @@ struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, bool ext_queue)
 	if (!job)
 		return NULL;
 
-	job->ext_queue = ext_queue;
+	job->queue_type = queue_type;
+	job->is_kernel_allocated_cb = is_kernel_allocated_cb;
 
-	if (job->ext_queue) {
+	if (is_cb_patched(hdev, job))
 		INIT_LIST_HEAD(&job->userptr_list);
+
+	if (job->queue_type == QUEUE_TYPE_EXT)
 		INIT_WORK(&job->finish_work, job_wq_completion);
-	}
 
 	return job;
 }
@@ -470,7 +505,7 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
 	struct hl_cs_job *job;
 	struct hl_cs *cs;
 	struct hl_cb *cb;
-	bool ext_queue_present = false;
+	bool int_queues_only = true;
 	u32 size_to_copy;
 	int rc, i, parse_cnt;
 
@@ -514,23 +549,33 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
 	/* Validate ALL the CS chunks before submitting the CS */
 	for (i = 0, parse_cnt = 0 ; i < num_chunks ; i++, parse_cnt++) {
 		struct hl_cs_chunk *chunk = &cs_chunk_array[i];
-		bool ext_queue;
+		enum hl_queue_type queue_type;
+		bool is_kernel_allocated_cb;
+
+		rc = validate_queue_index(hdev, chunk, &queue_type,
+						&is_kernel_allocated_cb);
+		if (rc)
+			goto free_cs_object;
 
-		cb = validate_queue_index(hdev, &hpriv->cb_mgr, chunk,
-					&ext_queue);
-		if (ext_queue) {
-			ext_queue_present = true;
+		if (is_kernel_allocated_cb) {
+			cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk);
 			if (!cb) {
 				rc = -EINVAL;
 				goto free_cs_object;
 			}
+		} else {
+			cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle;
 		}
 
-		job = hl_cs_allocate_job(hdev, ext_queue);
+		if (queue_type == QUEUE_TYPE_EXT || queue_type == QUEUE_TYPE_HW)
+			int_queues_only = false;
+
+		job = hl_cs_allocate_job(hdev, queue_type,
+						is_kernel_allocated_cb);
 		if (!job) {
 			dev_err(hdev->dev, "Failed to allocate a new job\n");
 			rc = -ENOMEM;
-			if (ext_queue)
+			if (is_kernel_allocated_cb)
 				goto release_cb;
 			else
 				goto free_cs_object;
@@ -540,7 +585,7 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
 		job->cs = cs;
 		job->user_cb = cb;
 		job->user_cb_size = chunk->cb_size;
-		if (job->ext_queue)
+		if (is_kernel_allocated_cb)
 			job->job_cb_size = cb->size;
 		else
 			job->job_cb_size = chunk->cb_size;
@@ -553,10 +598,11 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
 		/*
 		 * Increment CS reference. When CS reference is 0, CS is
 		 * done and can be signaled to user and free all its resources
-		 * Only increment for JOB on external queues, because only
-		 * for those JOBs we get completion
+		 * Only increment for JOB on external or H/W queues, because
+		 * only for those JOBs we get completion
 		 */
-		if (job->ext_queue)
+		if (job->queue_type == QUEUE_TYPE_EXT ||
+				job->queue_type == QUEUE_TYPE_HW)
 			cs_get(cs);
 
 		hl_debugfs_add_job(hdev, job);
@@ -570,9 +616,9 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
 		}
 	}
 
-	if (!ext_queue_present) {
+	if (int_queues_only) {
 		dev_err(hdev->dev,
-			"Reject CS %d.%llu because no external queues jobs\n",
+			"Reject CS %d.%llu because only internal queues jobs are present\n",
 			cs->ctx->asid, cs->sequence);
 		rc = -EINVAL;
 		goto free_cs_object;
@@ -580,9 +626,10 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
 
 	rc = hl_hw_queue_schedule_cs(cs);
 	if (rc) {
-		dev_err(hdev->dev,
-			"Failed to submit CS %d.%llu to H/W queues, error %d\n",
-			cs->ctx->asid, cs->sequence, rc);
+		if (rc != -EAGAIN)
+			dev_err(hdev->dev,
+				"Failed to submit CS %d.%llu to H/W queues, error %d\n",
+				cs->ctx->asid, cs->sequence, rc);
 		goto free_cs_object;
 	}
 
diff --git a/drivers/misc/habanalabs/debugfs.c b/drivers/misc/habanalabs/debugfs.c
index 87f37ac31ccd..20413e350343 100644
--- a/drivers/misc/habanalabs/debugfs.c
+++ b/drivers/misc/habanalabs/debugfs.c
@@ -307,45 +307,57 @@ static inline u64 get_hop0_addr(struct hl_ctx *ctx)
 			(ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
 }
 
-static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
-		u64 virt_addr)
+static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
+					u64 virt_addr, u64 mask, u64 shift)
 {
 	return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
-			((virt_addr & HOP0_MASK) >> HOP0_SHIFT);
+			((virt_addr & mask) >> shift);
 }
 
-static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
-		u64 virt_addr)
+static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx,
+					struct hl_mmu_properties *mmu_specs,
+					u64 hop_addr, u64 vaddr)
 {
-	return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
-			((virt_addr & HOP1_MASK) >> HOP1_SHIFT);
+	return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop0_mask,
+					mmu_specs->hop0_shift);
 }
 
-static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
-		u64 virt_addr)
+static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx,
+					struct hl_mmu_properties *mmu_specs,
+					u64 hop_addr, u64 vaddr)
 {
-	return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
-			((virt_addr & HOP2_MASK) >> HOP2_SHIFT);
+	return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop1_mask,
+					mmu_specs->hop1_shift);
 }
 
-static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
-		u64 virt_addr)
+static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx,
+					struct hl_mmu_properties *mmu_specs,
+					u64 hop_addr, u64 vaddr)
 {
-	return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
-			((virt_addr & HOP3_MASK) >> HOP3_SHIFT);
+	return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop2_mask,
+					mmu_specs->hop2_shift);
 }
 
-static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
-		u64 virt_addr)
+static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx,
+					struct hl_mmu_properties *mmu_specs,
+					u64 hop_addr, u64 vaddr)
 {
-	return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
-			((virt_addr & HOP4_MASK) >> HOP4_SHIFT);
+	return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop3_mask,
+					mmu_specs->hop3_shift);
+}
+
+static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx,
+					struct hl_mmu_properties *mmu_specs,
+					u64 hop_addr, u64 vaddr)
+{
+	return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop4_mask,
+					mmu_specs->hop4_shift);
 }
 
 static inline u64 get_next_hop_addr(u64 curr_pte)
 {
 	if (curr_pte & PAGE_PRESENT_MASK)
-		return curr_pte & PHYS_ADDR_MASK;
+		return curr_pte & HOP_PHYS_ADDR_MASK;
 	else
 		return ULLONG_MAX;
 }
@@ -355,7 +367,10 @@ static int mmu_show(struct seq_file *s, void *data)
 	struct hl_debugfs_entry *entry = s->private;
 	struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
 	struct hl_device *hdev = dev_entry->hdev;
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
+	struct hl_mmu_properties *mmu_prop;
 	struct hl_ctx *ctx;
+	bool is_dram_addr;
 
 	u64 hop0_addr = 0, hop0_pte_addr = 0, hop0_pte = 0,
 		hop1_addr = 0, hop1_pte_addr = 0, hop1_pte = 0,
@@ -377,33 +392,39 @@ static int mmu_show(struct seq_file *s, void *data)
 		return 0;
 	}
 
+	is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
+				prop->va_space_dram_start_address,
+				prop->va_space_dram_end_address);
+
+	mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
+
 	mutex_lock(&ctx->mmu_lock);
 
 	/* the following lookup is copied from unmap() in mmu.c */
 
 	hop0_addr = get_hop0_addr(ctx);
-	hop0_pte_addr = get_hop0_pte_addr(ctx, hop0_addr, virt_addr);
+	hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
 	hop0_pte = hdev->asic_funcs->read_pte(hdev, hop0_pte_addr);
 	hop1_addr = get_next_hop_addr(hop0_pte);
 
 	if (hop1_addr == ULLONG_MAX)
 		goto not_mapped;
 
-	hop1_pte_addr = get_hop1_pte_addr(ctx, hop1_addr, virt_addr);
+	hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
 	hop1_pte = hdev->asic_funcs->read_pte(hdev, hop1_pte_addr);
 	hop2_addr = get_next_hop_addr(hop1_pte);
 
 	if (hop2_addr == ULLONG_MAX)
 		goto not_mapped;
 
-	hop2_pte_addr = get_hop2_pte_addr(ctx, hop2_addr, virt_addr);
+	hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
 	hop2_pte = hdev->asic_funcs->read_pte(hdev, hop2_pte_addr);
 	hop3_addr = get_next_hop_addr(hop2_pte);
 
 	if (hop3_addr == ULLONG_MAX)
 		goto not_mapped;
 
-	hop3_pte_addr = get_hop3_pte_addr(ctx, hop3_addr, virt_addr);
+	hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
 	hop3_pte = hdev->asic_funcs->read_pte(hdev, hop3_pte_addr);
 
 	if (!(hop3_pte & LAST_MASK)) {
@@ -412,7 +433,8 @@ static int mmu_show(struct seq_file *s, void *data)
 		if (hop4_addr == ULLONG_MAX)
 			goto not_mapped;
 
-		hop4_pte_addr = get_hop4_pte_addr(ctx, hop4_addr, virt_addr);
+		hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
+							virt_addr);
 		hop4_pte = hdev->asic_funcs->read_pte(hdev, hop4_pte_addr);
 		if (!(hop4_pte & PAGE_PRESENT_MASK))
 			goto not_mapped;
@@ -506,6 +528,12 @@ static int engines_show(struct seq_file *s, void *data)
 	struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
 	struct hl_device *hdev = dev_entry->hdev;
 
+	if (atomic_read(&hdev->in_reset)) {
+		dev_warn_ratelimited(hdev->dev,
+				"Can't check device idle during reset\n");
+		return 0;
+	}
+
 	hdev->asic_funcs->is_device_idle(hdev, NULL, s);
 
 	return 0;
@@ -534,41 +562,50 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr,
 				u64 *phys_addr)
 {
 	struct hl_ctx *ctx = hdev->compute_ctx;
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
+	struct hl_mmu_properties *mmu_prop;
 	u64 hop_addr, hop_pte_addr, hop_pte;
-	u64 offset_mask = HOP4_MASK | OFFSET_MASK;
+	u64 offset_mask = HOP4_MASK | FLAGS_MASK;
 	int rc = 0;
+	bool is_dram_addr;
 
 	if (!ctx) {
 		dev_err(hdev->dev, "no ctx available\n");
 		return -EINVAL;
 	}
 
+	is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
+				prop->va_space_dram_start_address,
+				prop->va_space_dram_end_address);
+
+	mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
+
 	mutex_lock(&ctx->mmu_lock);
 
 	/* hop 0 */
 	hop_addr = get_hop0_addr(ctx);
-	hop_pte_addr = get_hop0_pte_addr(ctx, hop_addr, virt_addr);
+	hop_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
 	hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
 
 	/* hop 1 */
 	hop_addr = get_next_hop_addr(hop_pte);
 	if (hop_addr == ULLONG_MAX)
 		goto not_mapped;
-	hop_pte_addr = get_hop1_pte_addr(ctx, hop_addr, virt_addr);
+	hop_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
 	hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
 
 	/* hop 2 */
 	hop_addr = get_next_hop_addr(hop_pte);
 	if (hop_addr == ULLONG_MAX)
 		goto not_mapped;
-	hop_pte_addr = get_hop2_pte_addr(ctx, hop_addr, virt_addr);
+	hop_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
 	hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
 
 	/* hop 3 */
 	hop_addr = get_next_hop_addr(hop_pte);
 	if (hop_addr == ULLONG_MAX)
 		goto not_mapped;
-	hop_pte_addr = get_hop3_pte_addr(ctx, hop_addr, virt_addr);
+	hop_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
 	hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
 
 	if (!(hop_pte & LAST_MASK)) {
@@ -576,10 +613,11 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr,
 		hop_addr = get_next_hop_addr(hop_pte);
 		if (hop_addr == ULLONG_MAX)
 			goto not_mapped;
-		hop_pte_addr = get_hop4_pte_addr(ctx, hop_addr, virt_addr);
+		hop_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop_addr,
+							virt_addr);
 		hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
 
-		offset_mask = OFFSET_MASK;
+		offset_mask = FLAGS_MASK;
 	}
 
 	if (!(hop_pte & PAGE_PRESENT_MASK))
@@ -608,6 +646,11 @@ static ssize_t hl_data_read32(struct file *f, char __user *buf,
 	u32 val;
 	ssize_t rc;
 
+	if (atomic_read(&hdev->in_reset)) {
+		dev_warn_ratelimited(hdev->dev, "Can't read during reset\n");
+		return 0;
+	}
+
 	if (*ppos)
 		return 0;
 
@@ -637,6 +680,11 @@ static ssize_t hl_data_write32(struct file *f, const char __user *buf,
 	u32 value;
 	ssize_t rc;
 
+	if (atomic_read(&hdev->in_reset)) {
+		dev_warn_ratelimited(hdev->dev, "Can't write during reset\n");
+		return 0;
+	}
+
 	rc = kstrtouint_from_user(buf, count, 16, &value);
 	if (rc)
 		return rc;
diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c
index 459fee70a597..b155e9549076 100644
--- a/drivers/misc/habanalabs/device.c
+++ b/drivers/misc/habanalabs/device.c
@@ -42,12 +42,10 @@ static void hpriv_release(struct kref *ref)
 {
 	struct hl_fpriv *hpriv;
 	struct hl_device *hdev;
-	struct hl_ctx *ctx;
 
 	hpriv = container_of(ref, struct hl_fpriv, refcount);
 
 	hdev = hpriv->hdev;
-	ctx = hpriv->ctx;
 
 	put_pid(hpriv->taskpid);
 
@@ -889,13 +887,19 @@ again:
 	/* Go over all the queues, release all CS and their jobs */
 	hl_cs_rollback_all(hdev);
 
-	/* Kill processes here after CS rollback. This is because the process
-	 * can't really exit until all its CSs are done, which is what we
-	 * do in cs rollback
-	 */
-	if (from_hard_reset_thread)
+	if (hard_reset) {
+		/* Kill processes here after CS rollback. This is because the
+		 * process can't really exit until all its CSs are done, which
+		 * is what we do in cs rollback
+		 */
 		device_kill_open_processes(hdev);
 
+		/* Flush the Event queue workers to make sure no other thread is
+		 * reading or writing to registers during the reset
+		 */
+		flush_workqueue(hdev->eq_wq);
+	}
+
 	/* Release kernel context */
 	if ((hard_reset) && (hl_ctx_put(hdev->kernel_ctx) == 1))
 		hdev->kernel_ctx = NULL;
diff --git a/drivers/misc/habanalabs/firmware_if.c b/drivers/misc/habanalabs/firmware_if.c
index ea2ca67fbfbf..f5bd03171dac 100644
--- a/drivers/misc/habanalabs/firmware_if.c
+++ b/drivers/misc/habanalabs/firmware_if.c
@@ -143,10 +143,7 @@ int hl_fw_test_cpu_queue(struct hl_device *hdev)
 			sizeof(test_pkt), HL_DEVICE_TIMEOUT_USEC, &result);
 
 	if (!rc) {
-		if (result == ARMCP_PACKET_FENCE_VAL)
-			dev_info(hdev->dev,
-				"queue test on CPU queue succeeded\n");
-		else
+		if (result != ARMCP_PACKET_FENCE_VAL)
 			dev_err(hdev->dev,
 				"CPU queue test failed (0x%08lX)\n", result);
 	} else {
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 6fba14b81f90..c8d16aa4382c 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -72,6 +72,9 @@
  *
  */
 
+#define GOYA_UBOOT_FW_FILE	"habanalabs/goya/goya-u-boot.bin"
+#define GOYA_LINUX_FW_FILE	"habanalabs/goya/goya-fit.itb"
+
 #define GOYA_MMU_REGS_NUM		63
 
 #define GOYA_DMA_POOL_BLK_SIZE		0x100		/* 256 bytes */
@@ -337,17 +340,20 @@ void goya_get_fixed_properties(struct hl_device *hdev)
 	for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
 		prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
 		prop->hw_queues_props[i].driver_only = 0;
+		prop->hw_queues_props[i].requires_kernel_cb = 1;
 	}
 
 	for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES ; i++) {
 		prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
 		prop->hw_queues_props[i].driver_only = 1;
+		prop->hw_queues_props[i].requires_kernel_cb = 0;
 	}
 
 	for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES +
 			NUMBER_OF_INT_HW_QUEUES; i++) {
 		prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
 		prop->hw_queues_props[i].driver_only = 0;
+		prop->hw_queues_props[i].requires_kernel_cb = 0;
 	}
 
 	for (; i < HL_MAX_QUEUES; i++)
@@ -377,6 +383,23 @@ void goya_get_fixed_properties(struct hl_device *hdev)
 	prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
 	prop->dram_page_size = PAGE_SIZE_2MB;
 
+	prop->dmmu.hop0_shift = HOP0_SHIFT;
+	prop->dmmu.hop1_shift = HOP1_SHIFT;
+	prop->dmmu.hop2_shift = HOP2_SHIFT;
+	prop->dmmu.hop3_shift = HOP3_SHIFT;
+	prop->dmmu.hop4_shift = HOP4_SHIFT;
+	prop->dmmu.hop0_mask = HOP0_MASK;
+	prop->dmmu.hop1_mask = HOP1_MASK;
+	prop->dmmu.hop2_mask = HOP2_MASK;
+	prop->dmmu.hop3_mask = HOP3_MASK;
+	prop->dmmu.hop4_mask = HOP4_MASK;
+	prop->dmmu.huge_page_size = PAGE_SIZE_2MB;
+
+	/* No difference between PMMU and DMMU except of page size */
+	memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu));
+	prop->dmmu.page_size = PAGE_SIZE_2MB;
+	prop->pmmu.page_size = PAGE_SIZE_4KB;
+
 	prop->va_space_host_start_address = VA_HOST_SPACE_START;
 	prop->va_space_host_end_address = VA_HOST_SPACE_END;
 	prop->va_space_dram_start_address = VA_DDR_SPACE_START;
@@ -393,6 +416,9 @@ void goya_get_fixed_properties(struct hl_device *hdev)
 	prop->tpc_enabled_mask = TPC_ENABLED_MASK;
 	prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
 	prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
+
+	strncpy(prop->armcp_info.card_name, GOYA_DEFAULT_CARD_NAME,
+		CARD_NAME_MAX_LEN);
 }
 
 /*
@@ -1454,6 +1480,9 @@ static void goya_init_golden_registers(struct hl_device *hdev)
 				1 << TPC0_NRTR_SCRAMB_EN_VAL_SHIFT);
 		WREG32(mmTPC0_NRTR_NON_LIN_SCRAMB + offset,
 				1 << TPC0_NRTR_NON_LIN_SCRAMB_EN_SHIFT);
+
+		WREG32_FIELD(TPC0_CFG_MSS_CONFIG, offset,
+				ICACHE_FETCH_LINE_NUM, 2);
 	}
 
 	WREG32(mmDMA_NRTR_SCRAMB_EN, 1 << DMA_NRTR_SCRAMB_EN_VAL_SHIFT);
@@ -1533,7 +1562,6 @@ static void goya_init_mme_cmdq(struct hl_device *hdev)
 	u32 mtr_base_lo, mtr_base_hi;
 	u32 so_base_lo, so_base_hi;
 	u32 gic_base_lo, gic_base_hi;
-	u64 qman_base_addr;
 
 	mtr_base_lo = lower_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
 	mtr_base_hi = upper_32_bits(CFG_BASE + mmSYNC_MNGR_MON_PAY_ADDRL_0);
@@ -1545,9 +1573,6 @@ static void goya_init_mme_cmdq(struct hl_device *hdev)
 	gic_base_hi =
 		upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR);
 
-	qman_base_addr = hdev->asic_prop.sram_base_address +
-				MME_QMAN_BASE_OFFSET;
-
 	WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_LO, mtr_base_lo);
 	WREG32(mmMME_CMDQ_CP_MSG_BASE0_ADDR_HI, mtr_base_hi);
 	WREG32(mmMME_CMDQ_CP_MSG_BASE1_ADDR_LO,	so_base_lo);
@@ -2141,13 +2166,11 @@ static void goya_halt_engines(struct hl_device *hdev, bool hard_reset)
  */
 static int goya_push_uboot_to_device(struct hl_device *hdev)
 {
-	char fw_name[200];
 	void __iomem *dst;
 
-	snprintf(fw_name, sizeof(fw_name), "habanalabs/goya/goya-u-boot.bin");
 	dst = hdev->pcie_bar[SRAM_CFG_BAR_ID] + UBOOT_FW_OFFSET;
 
-	return hl_fw_push_fw_to_device(hdev, fw_name, dst);
+	return hl_fw_push_fw_to_device(hdev, GOYA_UBOOT_FW_FILE, dst);
 }
 
 /*
@@ -2160,13 +2183,11 @@ static int goya_push_uboot_to_device(struct hl_device *hdev)
  */
 static int goya_push_linux_to_device(struct hl_device *hdev)
 {
-	char fw_name[200];
 	void __iomem *dst;
 
-	snprintf(fw_name, sizeof(fw_name), "habanalabs/goya/goya-fit.itb");
 	dst = hdev->pcie_bar[DDR_BAR_ID] + LINUX_FW_OFFSET;
 
-	return hl_fw_push_fw_to_device(hdev, fw_name, dst);
+	return hl_fw_push_fw_to_device(hdev, GOYA_LINUX_FW_FILE, dst);
 }
 
 static int goya_pldm_init_cpu(struct hl_device *hdev)
@@ -2291,6 +2312,10 @@ static int goya_init_cpu(struct hl_device *hdev, u32 cpu_timeout)
 		10000,
 		cpu_timeout);
 
+	/* Read U-Boot version now in case we will later fail */
+	goya_read_device_fw_version(hdev, FW_COMP_UBOOT);
+	goya_read_device_fw_version(hdev, FW_COMP_PREBOOT);
+
 	if (rc) {
 		dev_err(hdev->dev, "Error in ARM u-boot!");
 		switch (status) {
@@ -2328,6 +2353,11 @@ static int goya_init_cpu(struct hl_device *hdev, u32 cpu_timeout)
 				"ARM status %d - u-boot stopped by user\n",
 				status);
 			break;
+		case CPU_BOOT_STATUS_TS_INIT_FAIL:
+			dev_err(hdev->dev,
+				"ARM status %d - Thermal Sensor initialization failed\n",
+				status);
+			break;
 		default:
 			dev_err(hdev->dev,
 				"ARM status %d - Invalid status code\n",
@@ -2337,10 +2367,6 @@ static int goya_init_cpu(struct hl_device *hdev, u32 cpu_timeout)
 		return -EIO;
 	}
 
-	/* Read U-Boot version now in case we will later fail */
-	goya_read_device_fw_version(hdev, FW_COMP_UBOOT);
-	goya_read_device_fw_version(hdev, FW_COMP_PREBOOT);
-
 	if (!hdev->fw_loading) {
 		dev_info(hdev->dev, "Skip loading FW\n");
 		goto out;
@@ -2453,7 +2479,8 @@ int goya_mmu_init(struct hl_device *hdev)
 	WREG32_AND(mmSTLB_STLB_FEATURE_EN,
 			(~STLB_STLB_FEATURE_EN_FOLLOWER_EN_MASK));
 
-	hdev->asic_funcs->mmu_invalidate_cache(hdev, true);
+	hdev->asic_funcs->mmu_invalidate_cache(hdev, true,
+					VM_TYPE_USERPTR | VM_TYPE_PHYS_PACK);
 
 	WREG32(mmMMU_MMU_ENABLE, 1);
 	WREG32(mmMMU_SPI_MASK, 0xF);
@@ -2978,9 +3005,6 @@ int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id)
 			"H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
 			hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
 		rc = -EIO;
-	} else {
-		dev_info(hdev->dev, "queue test on H/W queue %d succeeded\n",
-			hw_queue_id);
 	}
 
 free_pkt:
@@ -3925,7 +3949,7 @@ static int goya_parse_cb_no_ext_queue(struct hl_device *hdev,
 		return 0;
 
 	dev_err(hdev->dev,
-		"Internal CB address %px + 0x%x is not in SRAM nor in DRAM\n",
+		"Internal CB address 0x%px + 0x%x is not in SRAM nor in DRAM\n",
 		parser->user_cb, parser->user_cb_size);
 
 	return -EFAULT;
@@ -3935,7 +3959,7 @@ int goya_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
 {
 	struct goya_device *goya = hdev->asic_specific;
 
-	if (!parser->ext_queue)
+	if (parser->queue_type == QUEUE_TYPE_INT)
 		return goya_parse_cb_no_ext_queue(hdev, parser);
 
 	if (goya->hw_cap_initialized & HW_CAP_MMU)
@@ -4606,7 +4630,7 @@ static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size,
 		lin_dma_pkt++;
 	} while (--lin_dma_pkts_cnt);
 
-	job = hl_cs_allocate_job(hdev, true);
+	job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
 	if (!job) {
 		dev_err(hdev->dev, "Failed to allocate a new job\n");
 		rc = -ENOMEM;
@@ -4835,13 +4859,15 @@ static void goya_mmu_prepare(struct hl_device *hdev, u32 asid)
 		goya_mmu_prepare_reg(hdev, goya_mmu_regs[i], asid);
 }
 
-static void goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard)
+static void goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
+					u32 flags)
 {
 	struct goya_device *goya = hdev->asic_specific;
 	u32 status, timeout_usec;
 	int rc;
 
-	if (!(goya->hw_cap_initialized & HW_CAP_MMU))
+	if (!(goya->hw_cap_initialized & HW_CAP_MMU) ||
+		hdev->hard_reset_pending)
 		return;
 
 	/* no need in L1 only invalidation in Goya */
@@ -4880,7 +4906,8 @@ static void goya_mmu_invalidate_cache_range(struct hl_device *hdev,
 	u32 status, timeout_usec, inv_data, pi;
 	int rc;
 
-	if (!(goya->hw_cap_initialized & HW_CAP_MMU))
+	if (!(goya->hw_cap_initialized & HW_CAP_MMU) ||
+		hdev->hard_reset_pending)
 		return;
 
 	/* no need in L1 only invalidation in Goya */
@@ -5137,7 +5164,8 @@ static const struct hl_asic_funcs goya_funcs = {
 	.init_iatu = goya_init_iatu,
 	.rreg = hl_rreg,
 	.wreg = hl_wreg,
-	.halt_coresight = goya_halt_coresight
+	.halt_coresight = goya_halt_coresight,
+	.get_clk_rate = goya_get_clk_rate
 };
 
 /*
diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h
index 89b6574f8e4f..c3230cb6e25c 100644
--- a/drivers/misc/habanalabs/goya/goyaP.h
+++ b/drivers/misc/habanalabs/goya/goyaP.h
@@ -233,4 +233,6 @@ void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
 					void *vaddr);
 void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev);
 
+int goya_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
+
 #endif /* GOYAP_H_ */
diff --git a/drivers/misc/habanalabs/goya/goya_coresight.c b/drivers/misc/habanalabs/goya/goya_coresight.c
index b4d406af1bed..c1ee6e2b5dff 100644
--- a/drivers/misc/habanalabs/goya/goya_coresight.c
+++ b/drivers/misc/habanalabs/goya/goya_coresight.c
@@ -8,6 +8,7 @@
 #include "goyaP.h"
 #include "include/goya/goya_coresight.h"
 #include "include/goya/asic_reg/goya_regs.h"
+#include "include/goya/asic_reg/goya_masks.h"
 
 #include <uapi/misc/habanalabs.h>
 
@@ -377,33 +378,32 @@ static int goya_config_etr(struct hl_device *hdev,
 		struct hl_debug_params *params)
 {
 	struct hl_debug_params_etr *input;
-	u64 base_reg = mmPSOC_ETR_BASE - CFG_BASE;
 	u32 val;
 	int rc;
 
-	WREG32(base_reg + 0xFB0, CORESIGHT_UNLOCK);
+	WREG32(mmPSOC_ETR_LAR, CORESIGHT_UNLOCK);
 
-	val = RREG32(base_reg + 0x304);
+	val = RREG32(mmPSOC_ETR_FFCR);
 	val |= 0x1000;
-	WREG32(base_reg + 0x304, val);
+	WREG32(mmPSOC_ETR_FFCR, val);
 	val |= 0x40;
-	WREG32(base_reg + 0x304, val);
+	WREG32(mmPSOC_ETR_FFCR, val);
 
-	rc = goya_coresight_timeout(hdev, base_reg + 0x304, 6, false);
+	rc = goya_coresight_timeout(hdev, mmPSOC_ETR_FFCR, 6, false);
 	if (rc) {
 		dev_err(hdev->dev, "Failed to %s ETR on timeout, error %d\n",
 				params->enable ? "enable" : "disable", rc);
 		return rc;
 	}
 
-	rc = goya_coresight_timeout(hdev, base_reg + 0xC, 2, true);
+	rc = goya_coresight_timeout(hdev, mmPSOC_ETR_STS, 2, true);
 	if (rc) {
 		dev_err(hdev->dev, "Failed to %s ETR on timeout, error %d\n",
 				params->enable ? "enable" : "disable", rc);
 		return rc;
 	}
 
-	WREG32(base_reg + 0x20, 0);
+	WREG32(mmPSOC_ETR_CTL, 0);
 
 	if (params->enable) {
 		input = params->input;
@@ -423,25 +423,26 @@ static int goya_config_etr(struct hl_device *hdev,
 			return -EINVAL;
 		}
 
-		WREG32(base_reg + 0x34, 0x3FFC);
-		WREG32(base_reg + 0x4, input->buffer_size);
-		WREG32(base_reg + 0x28, input->sink_mode);
-		WREG32(base_reg + 0x110, 0x700);
-		WREG32(base_reg + 0x118,
+		WREG32(mmPSOC_ETR_BUFWM, 0x3FFC);
+		WREG32(mmPSOC_ETR_RSZ, input->buffer_size);
+		WREG32(mmPSOC_ETR_MODE, input->sink_mode);
+		WREG32(mmPSOC_ETR_AXICTL,
+				0x700 | PSOC_ETR_AXICTL_PROTCTRLBIT1_SHIFT);
+		WREG32(mmPSOC_ETR_DBALO,
 				lower_32_bits(input->buffer_address));
-		WREG32(base_reg + 0x11C,
+		WREG32(mmPSOC_ETR_DBAHI,
 				upper_32_bits(input->buffer_address));
-		WREG32(base_reg + 0x304, 3);
-		WREG32(base_reg + 0x308, 0xA);
-		WREG32(base_reg + 0x20, 1);
+		WREG32(mmPSOC_ETR_FFCR, 3);
+		WREG32(mmPSOC_ETR_PSCR, 0xA);
+		WREG32(mmPSOC_ETR_CTL, 1);
 	} else {
-		WREG32(base_reg + 0x34, 0);
-		WREG32(base_reg + 0x4, 0x400);
-		WREG32(base_reg + 0x118, 0);
-		WREG32(base_reg + 0x11C, 0);
-		WREG32(base_reg + 0x308, 0);
-		WREG32(base_reg + 0x28, 0);
-		WREG32(base_reg + 0x304, 0);
+		WREG32(mmPSOC_ETR_BUFWM, 0);
+		WREG32(mmPSOC_ETR_RSZ, 0x400);
+		WREG32(mmPSOC_ETR_DBALO, 0);
+		WREG32(mmPSOC_ETR_DBAHI, 0);
+		WREG32(mmPSOC_ETR_PSCR, 0);
+		WREG32(mmPSOC_ETR_MODE, 0);
+		WREG32(mmPSOC_ETR_FFCR, 0);
 
 		if (params->output_size >= sizeof(u64)) {
 			u32 rwp, rwphi;
@@ -451,8 +452,8 @@ static int goya_config_etr(struct hl_device *hdev,
 			 * the buffer is set in the RWP register (lower 32
 			 * bits), and in the RWPHI register (upper 8 bits).
 			 */
-			rwp = RREG32(base_reg + 0x18);
-			rwphi = RREG32(base_reg + 0x3c) & 0xff;
+			rwp = RREG32(mmPSOC_ETR_RWP);
+			rwphi = RREG32(mmPSOC_ETR_RWPHI) & 0xff;
 			*(u64 *) params->output = ((u64) rwphi << 32) | rwp;
 		}
 	}
diff --git a/drivers/misc/habanalabs/goya/goya_hwmgr.c b/drivers/misc/habanalabs/goya/goya_hwmgr.c
index a2a700c3d597..b2ebc01e27f4 100644
--- a/drivers/misc/habanalabs/goya/goya_hwmgr.c
+++ b/drivers/misc/habanalabs/goya/goya_hwmgr.c
@@ -32,6 +32,37 @@ void goya_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq)
 	}
 }
 
+int goya_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)
+{
+	long value;
+
+	if (hl_device_disabled_or_in_reset(hdev))
+		return -ENODEV;
+
+	value = hl_get_frequency(hdev, MME_PLL, false);
+
+	if (value < 0) {
+		dev_err(hdev->dev, "Failed to retrieve device max clock %ld\n",
+			value);
+		return value;
+	}
+
+	*max_clk = (value / 1000 / 1000);
+
+	value = hl_get_frequency(hdev, MME_PLL, true);
+
+	if (value < 0) {
+		dev_err(hdev->dev,
+			"Failed to retrieve device current clock %ld\n",
+			value);
+		return value;
+	}
+
+	*cur_clk = (value / 1000 / 1000);
+
+	return 0;
+}
+
 static ssize_t mme_clk_show(struct device *dev, struct device_attribute *attr,
 				char *buf)
 {
diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
index 75862be53c60..00c949f4ccd1 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -40,8 +40,6 @@
 
 #define HL_MAX_QUEUES			128
 
-#define HL_MAX_JOBS_PER_CS		64
-
 /* MUST BE POWER OF 2 and larger than 1 */
 #define HL_MAX_PENDING_CS		64
 
@@ -85,12 +83,15 @@ struct hl_fpriv;
  * @QUEUE_TYPE_INT: internal queue that performs DMA inside the device's
  *			memories and/or operates the compute engines.
  * @QUEUE_TYPE_CPU: S/W queue for communication with the device's CPU.
+ * @QUEUE_TYPE_HW: queue of DMA and compute engines jobs, for which completion
+ *                 notifications are sent by H/W.
  */
 enum hl_queue_type {
 	QUEUE_TYPE_NA,
 	QUEUE_TYPE_EXT,
 	QUEUE_TYPE_INT,
-	QUEUE_TYPE_CPU
+	QUEUE_TYPE_CPU,
+	QUEUE_TYPE_HW
 };
 
 /**
@@ -98,10 +99,13 @@ enum hl_queue_type {
  * @type: queue type.
  * @driver_only: true if only the driver is allowed to send a job to this queue,
  *               false otherwise.
+ * @requires_kernel_cb: true if a CB handle must be provided for jobs on this
+ *                      queue, false otherwise (a CB address must be provided).
  */
 struct hw_queue_properties {
 	enum hl_queue_type	type;
 	u8			driver_only;
+	u8			requires_kernel_cb;
 };
 
 /**
@@ -110,8 +114,8 @@ struct hw_queue_properties {
  * @VM_TYPE_PHYS_PACK: mapping of DRAM memory to device virtual address.
  */
 enum vm_type_t {
-	VM_TYPE_USERPTR,
-	VM_TYPE_PHYS_PACK
+	VM_TYPE_USERPTR = 0x1,
+	VM_TYPE_PHYS_PACK = 0x2
 };
 
 /**
@@ -127,12 +131,44 @@ enum hl_device_hw_state {
 };
 
 /**
+ * struct hl_mmu_properties - ASIC specific MMU address translation properties.
+ * @hop0_shift: shift of hop 0 mask.
+ * @hop1_shift: shift of hop 1 mask.
+ * @hop2_shift: shift of hop 2 mask.
+ * @hop3_shift: shift of hop 3 mask.
+ * @hop4_shift: shift of hop 4 mask.
+ * @hop0_mask: mask to get the PTE address in hop 0.
+ * @hop1_mask: mask to get the PTE address in hop 1.
+ * @hop2_mask: mask to get the PTE address in hop 2.
+ * @hop3_mask: mask to get the PTE address in hop 3.
+ * @hop4_mask: mask to get the PTE address in hop 4.
+ * @page_size: default page size used to allocate memory.
+ * @huge_page_size: page size used to allocate memory with huge pages.
+ */
+struct hl_mmu_properties {
+	u64	hop0_shift;
+	u64	hop1_shift;
+	u64	hop2_shift;
+	u64	hop3_shift;
+	u64	hop4_shift;
+	u64	hop0_mask;
+	u64	hop1_mask;
+	u64	hop2_mask;
+	u64	hop3_mask;
+	u64	hop4_mask;
+	u32	page_size;
+	u32	huge_page_size;
+};
+
+/**
  * struct asic_fixed_properties - ASIC specific immutable properties.
  * @hw_queues_props: H/W queues properties.
  * @armcp_info: received various information from ArmCP regarding the H/W, e.g.
  *		available sensors.
  * @uboot_ver: F/W U-boot version.
  * @preboot_ver: F/W Preboot version.
+ * @dmmu: DRAM MMU address translation properties.
+ * @pmmu: PCI (host) MMU address translation properties.
  * @sram_base_address: SRAM physical start address.
  * @sram_end_address: SRAM physical end address.
  * @sram_user_base_address - SRAM physical start address for user access.
@@ -169,53 +205,55 @@ enum hl_device_hw_state {
  * @psoc_pci_pll_nf: PCI PLL NF value.
  * @psoc_pci_pll_od: PCI PLL OD value.
  * @psoc_pci_pll_div_factor: PCI PLL DIV FACTOR 1 value.
- * @completion_queues_count: number of completion queues.
  * @high_pll: high PLL frequency used by the device.
  * @cb_pool_cb_cnt: number of CBs in the CB pool.
  * @cb_pool_cb_size: size of each CB in the CB pool.
  * @tpc_enabled_mask: which TPCs are enabled.
+ * @completion_queues_count: number of completion queues.
  */
 struct asic_fixed_properties {
 	struct hw_queue_properties	hw_queues_props[HL_MAX_QUEUES];
-	struct armcp_info	armcp_info;
-	char			uboot_ver[VERSION_MAX_LEN];
-	char			preboot_ver[VERSION_MAX_LEN];
-	u64			sram_base_address;
-	u64			sram_end_address;
-	u64			sram_user_base_address;
-	u64			dram_base_address;
-	u64			dram_end_address;
-	u64			dram_user_base_address;
-	u64			dram_size;
-	u64			dram_pci_bar_size;
-	u64			max_power_default;
-	u64			va_space_host_start_address;
-	u64			va_space_host_end_address;
-	u64			va_space_dram_start_address;
-	u64			va_space_dram_end_address;
-	u64			dram_size_for_default_page_mapping;
-	u64			pcie_dbi_base_address;
-	u64			pcie_aux_dbi_reg_addr;
-	u64			mmu_pgt_addr;
-	u64			mmu_dram_default_page_addr;
-	u32			mmu_pgt_size;
-	u32			mmu_pte_size;
-	u32			mmu_hop_table_size;
-	u32			mmu_hop0_tables_total_size;
-	u32			dram_page_size;
-	u32			cfg_size;
-	u32			sram_size;
-	u32			max_asid;
-	u32			num_of_events;
-	u32			psoc_pci_pll_nr;
-	u32			psoc_pci_pll_nf;
-	u32			psoc_pci_pll_od;
-	u32			psoc_pci_pll_div_factor;
-	u32			high_pll;
-	u32			cb_pool_cb_cnt;
-	u32			cb_pool_cb_size;
-	u8			completion_queues_count;
-	u8			tpc_enabled_mask;
+	struct armcp_info		armcp_info;
+	char				uboot_ver[VERSION_MAX_LEN];
+	char				preboot_ver[VERSION_MAX_LEN];
+	struct hl_mmu_properties	dmmu;
+	struct hl_mmu_properties	pmmu;
+	u64				sram_base_address;
+	u64				sram_end_address;
+	u64				sram_user_base_address;
+	u64				dram_base_address;
+	u64				dram_end_address;
+	u64				dram_user_base_address;
+	u64				dram_size;
+	u64				dram_pci_bar_size;
+	u64				max_power_default;
+	u64				va_space_host_start_address;
+	u64				va_space_host_end_address;
+	u64				va_space_dram_start_address;
+	u64				va_space_dram_end_address;
+	u64				dram_size_for_default_page_mapping;
+	u64				pcie_dbi_base_address;
+	u64				pcie_aux_dbi_reg_addr;
+	u64				mmu_pgt_addr;
+	u64				mmu_dram_default_page_addr;
+	u32				mmu_pgt_size;
+	u32				mmu_pte_size;
+	u32				mmu_hop_table_size;
+	u32				mmu_hop0_tables_total_size;
+	u32				dram_page_size;
+	u32				cfg_size;
+	u32				sram_size;
+	u32				max_asid;
+	u32				num_of_events;
+	u32				psoc_pci_pll_nr;
+	u32				psoc_pci_pll_nf;
+	u32				psoc_pci_pll_od;
+	u32				psoc_pci_pll_div_factor;
+	u32				high_pll;
+	u32				cb_pool_cb_cnt;
+	u32				cb_pool_cb_size;
+	u8				tpc_enabled_mask;
+	u8				completion_queues_count;
 };
 
 /**
@@ -236,8 +274,6 @@ struct hl_dma_fence {
  * Command Buffers
  */
 
-#define HL_MAX_CB_SIZE		0x200000	/* 2MB */
-
 /**
  * struct hl_cb_mgr - describes a Command Buffer Manager.
  * @cb_lock: protects cb_handles.
@@ -481,8 +517,8 @@ enum hl_pll_frequency {
  * @get_events_stat: retrieve event queue entries histogram.
  * @read_pte: read MMU page table entry from DRAM.
  * @write_pte: write MMU page table entry to DRAM.
- * @mmu_invalidate_cache: flush MMU STLB cache, either with soft (L1 only) or
- *                        hard (L0 & L1) flush.
+ * @mmu_invalidate_cache: flush MMU STLB host/DRAM cache, either with soft
+ *                        (L1 only) or hard (L0 & L1) flush.
  * @mmu_invalidate_cache_range: flush specific MMU STLB cache lines with
  *                              ASID-VA-size mask.
  * @send_heartbeat: send is-alive packet to ArmCP and verify response.
@@ -502,6 +538,7 @@ enum hl_pll_frequency {
  * @rreg: Read a register. Needed for simulator support.
  * @wreg: Write a register. Needed for simulator support.
  * @halt_coresight: stop the ETF and ETR traces.
+ * @get_clk_rate: Retrieve the ASIC current and maximum clock rate in MHz
  */
 struct hl_asic_funcs {
 	int (*early_init)(struct hl_device *hdev);
@@ -562,7 +599,8 @@ struct hl_asic_funcs {
 				u32 *size);
 	u64 (*read_pte)(struct hl_device *hdev, u64 addr);
 	void (*write_pte)(struct hl_device *hdev, u64 addr, u64 val);
-	void (*mmu_invalidate_cache)(struct hl_device *hdev, bool is_hard);
+	void (*mmu_invalidate_cache)(struct hl_device *hdev, bool is_hard,
+					u32 flags);
 	void (*mmu_invalidate_cache_range)(struct hl_device *hdev, bool is_hard,
 			u32 asid, u64 va, u64 size);
 	int (*send_heartbeat)(struct hl_device *hdev);
@@ -584,6 +622,7 @@ struct hl_asic_funcs {
 	u32 (*rreg)(struct hl_device *hdev, u32 reg);
 	void (*wreg)(struct hl_device *hdev, u32 reg, u32 val);
 	void (*halt_coresight)(struct hl_device *hdev);
+	int (*get_clk_rate)(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
 };
 
 
@@ -688,7 +727,7 @@ struct hl_ctx_mgr {
  * @sgt: pointer to the scatter-gather table that holds the pages.
  * @dir: for DMA unmapping, the direction must be supplied, so save it.
  * @debugfs_list: node in debugfs list of command submissions.
- * @addr: user-space virtual pointer to the start of the memory area.
+ * @addr: user-space virtual address of the start of the memory area.
  * @size: size of the memory area to pin & map.
  * @dma_mapped: true if the SG was mapped to DMA addresses, false otherwise.
  */
@@ -752,11 +791,14 @@ struct hl_cs {
  * @userptr_list: linked-list of userptr mappings that belong to this job and
  *			wait for completion.
  * @debugfs_list: node in debugfs list of command submission jobs.
+ * @queue_type: the type of the H/W queue this job is submitted to.
  * @id: the id of this job inside a CS.
  * @hw_queue_id: the id of the H/W queue this job is submitted to.
  * @user_cb_size: the actual size of the CB we got from the user.
  * @job_cb_size: the actual size of the CB that we put on the queue.
- * @ext_queue: whether the job is for external queue or internal queue.
+ * @is_kernel_allocated_cb: true if the CB handle we got from the user holds a
+ *                          handle to a kernel-allocated CB object, false
+ *                          otherwise (SRAM/DRAM/host address).
  */
 struct hl_cs_job {
 	struct list_head	cs_node;
@@ -766,39 +808,44 @@ struct hl_cs_job {
 	struct work_struct	finish_work;
 	struct list_head	userptr_list;
 	struct list_head	debugfs_list;
+	enum hl_queue_type	queue_type;
 	u32			id;
 	u32			hw_queue_id;
 	u32			user_cb_size;
 	u32			job_cb_size;
-	u8			ext_queue;
+	u8			is_kernel_allocated_cb;
 };
 
 /**
- * struct hl_cs_parser - command submission paerser properties.
+ * struct hl_cs_parser - command submission parser properties.
  * @user_cb: the CB we got from the user.
  * @patched_cb: in case of patching, this is internal CB which is submitted on
  *		the queue instead of the CB we got from the IOCTL.
  * @job_userptr_list: linked-list of userptr mappings that belong to the related
  *			job and wait for completion.
  * @cs_sequence: the sequence number of the related CS.
+ * @queue_type: the type of the H/W queue this job is submitted to.
  * @ctx_id: the ID of the context the related CS belongs to.
  * @hw_queue_id: the id of the H/W queue this job is submitted to.
  * @user_cb_size: the actual size of the CB we got from the user.
  * @patched_cb_size: the size of the CB after parsing.
- * @ext_queue: whether the job is for external queue or internal queue.
  * @job_id: the id of the related job inside the related CS.
+ * @is_kernel_allocated_cb: true if the CB handle we got from the user holds a
+ *                          handle to a kernel-allocated CB object, false
+ *                          otherwise (SRAM/DRAM/host address).
  */
 struct hl_cs_parser {
 	struct hl_cb		*user_cb;
 	struct hl_cb		*patched_cb;
 	struct list_head	*job_userptr_list;
 	u64			cs_sequence;
+	enum hl_queue_type	queue_type;
 	u32			ctx_id;
 	u32			hw_queue_id;
 	u32			user_cb_size;
 	u32			patched_cb_size;
-	u8			ext_queue;
 	u8			job_id;
+	u8			is_kernel_allocated_cb;
 };
 
 
@@ -1048,9 +1095,10 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
 
 #define REG_FIELD_SHIFT(reg, field) reg##_##field##_SHIFT
 #define REG_FIELD_MASK(reg, field) reg##_##field##_MASK
-#define WREG32_FIELD(reg, field, val)	\
-	WREG32(mm##reg, (RREG32(mm##reg) & ~REG_FIELD_MASK(reg, field)) | \
-			(val) << REG_FIELD_SHIFT(reg, field))
+#define WREG32_FIELD(reg, offset, field, val)	\
+	WREG32(mm##reg + offset, (RREG32(mm##reg + offset) & \
+				~REG_FIELD_MASK(reg, field)) | \
+				(val) << REG_FIELD_SHIFT(reg, field))
 
 /* Timeout should be longer when working with simulator but cap the
  * increased timeout to some maximum
@@ -1501,7 +1549,8 @@ int hl_cb_pool_init(struct hl_device *hdev);
 int hl_cb_pool_fini(struct hl_device *hdev);
 
 void hl_cs_rollback_all(struct hl_device *hdev);
-struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, bool ext_queue);
+struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
+		enum hl_queue_type queue_type, bool is_kernel_allocated_cb);
 
 void goya_set_asic_funcs(struct hl_device *hdev);
 
@@ -1513,7 +1562,7 @@ void hl_vm_fini(struct hl_device *hdev);
 
 int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
 			struct hl_userptr *userptr);
-int hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr);
+void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr);
 void hl_userptr_delete_list(struct hl_device *hdev,
 				struct list_head *userptr_list);
 bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr, u32 size,
diff --git a/drivers/misc/habanalabs/habanalabs_ioctl.c b/drivers/misc/habanalabs/habanalabs_ioctl.c
index 66d9c710073c..6474b868ef27 100644
--- a/drivers/misc/habanalabs/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/habanalabs_ioctl.c
@@ -60,11 +60,16 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
 	hw_ip.tpc_enabled_mask = prop->tpc_enabled_mask;
 	hw_ip.sram_size = prop->sram_size - sram_kmd_size;
 	hw_ip.dram_size = prop->dram_size - dram_kmd_size;
-	if (hw_ip.dram_size > 0)
+	if (hw_ip.dram_size > PAGE_SIZE)
 		hw_ip.dram_enabled = 1;
 	hw_ip.num_of_events = prop->num_of_events;
-	memcpy(hw_ip.armcp_version,
-		prop->armcp_info.armcp_version, VERSION_MAX_LEN);
+
+	memcpy(hw_ip.armcp_version, prop->armcp_info.armcp_version,
+		min(VERSION_MAX_LEN, HL_INFO_VERSION_MAX_LEN));
+
+	memcpy(hw_ip.card_name, prop->armcp_info.card_name,
+		min(CARD_NAME_MAX_LEN, HL_INFO_CARD_NAME_MAX_LEN));
+
 	hw_ip.armcp_cpld_version = le32_to_cpu(prop->armcp_info.cpld_version);
 	hw_ip.psoc_pci_pll_nr = prop->psoc_pci_pll_nr;
 	hw_ip.psoc_pci_pll_nf = prop->psoc_pci_pll_nf;
@@ -179,17 +184,14 @@ static int debug_coresight(struct hl_device *hdev, struct hl_debug_args *args)
 		goto out;
 	}
 
-	if (output) {
-		if (copy_to_user((void __user *) (uintptr_t) args->output_ptr,
-					output,
-					args->output_size)) {
-			dev_err(hdev->dev,
-				"copy to user failed in debug ioctl\n");
-			rc = -EFAULT;
-			goto out;
-		}
+	if (output && copy_to_user((void __user *) (uintptr_t) args->output_ptr,
+					output, args->output_size)) {
+		dev_err(hdev->dev, "copy to user failed in debug ioctl\n");
+		rc = -EFAULT;
+		goto out;
 	}
 
+
 out:
 	kfree(params);
 	kfree(output);
@@ -221,6 +223,41 @@ static int device_utilization(struct hl_device *hdev, struct hl_info_args *args)
 		min((size_t) max_size, sizeof(device_util))) ? -EFAULT : 0;
 }
 
+static int get_clk_rate(struct hl_device *hdev, struct hl_info_args *args)
+{
+	struct hl_info_clk_rate clk_rate = {0};
+	u32 max_size = args->return_size;
+	void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+	int rc;
+
+	if ((!max_size) || (!out))
+		return -EINVAL;
+
+	rc = hdev->asic_funcs->get_clk_rate(hdev, &clk_rate.cur_clk_rate_mhz,
+						&clk_rate.max_clk_rate_mhz);
+	if (rc)
+		return rc;
+
+	return copy_to_user(out, &clk_rate,
+		min((size_t) max_size, sizeof(clk_rate))) ? -EFAULT : 0;
+}
+
+static int get_reset_count(struct hl_device *hdev, struct hl_info_args *args)
+{
+	struct hl_info_reset_count reset_count = {0};
+	u32 max_size = args->return_size;
+	void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+
+	if ((!max_size) || (!out))
+		return -EINVAL;
+
+	reset_count.hard_reset_cnt = hdev->hard_reset_cnt;
+	reset_count.soft_reset_cnt = hdev->soft_reset_cnt;
+
+	return copy_to_user(out, &reset_count,
+		min((size_t) max_size, sizeof(reset_count))) ? -EFAULT : 0;
+}
+
 static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
 				struct device *dev)
 {
@@ -239,6 +276,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
 	case HL_INFO_DEVICE_STATUS:
 		return device_status_info(hdev, args);
 
+	case HL_INFO_RESET_COUNT:
+		return get_reset_count(hdev, args);
+
 	default:
 		break;
 	}
@@ -271,6 +311,10 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
 		rc = hw_events_info(hdev, true, args);
 		break;
 
+	case HL_INFO_CLK_RATE:
+		rc = get_clk_rate(hdev, args);
+		break;
+
 	default:
 		dev_err(dev, "Invalid request %d\n", args->op);
 		rc = -ENOTTY;
@@ -406,9 +450,8 @@ static long _hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg,
 
 	retcode = func(hpriv, kdata);
 
-	if (cmd & IOC_OUT)
-		if (copy_to_user((void __user *)arg, kdata, usize))
-			retcode = -EFAULT;
+	if ((cmd & IOC_OUT) && copy_to_user((void __user *)arg, kdata, usize))
+		retcode = -EFAULT;
 
 out_err:
 	if (retcode)
diff --git a/drivers/misc/habanalabs/hw_queue.c b/drivers/misc/habanalabs/hw_queue.c
index 55b383b2a116..91579dde9262 100644
--- a/drivers/misc/habanalabs/hw_queue.c
+++ b/drivers/misc/habanalabs/hw_queue.c
@@ -58,8 +58,8 @@ out:
 }
 
 /*
- * ext_queue_submit_bd - Submit a buffer descriptor to an external queue
- *
+ * ext_and_hw_queue_submit_bd() - Submit a buffer descriptor to an external or a
+ *                                H/W queue.
  * @hdev: pointer to habanalabs device structure
  * @q: pointer to habanalabs queue structure
  * @ctl: BD's control word
@@ -73,8 +73,8 @@ out:
  * This function must be called when the scheduler mutex is taken
  *
  */
-static void ext_queue_submit_bd(struct hl_device *hdev, struct hl_hw_queue *q,
-				u32 ctl, u32 len, u64 ptr)
+static void ext_and_hw_queue_submit_bd(struct hl_device *hdev,
+			struct hl_hw_queue *q, u32 ctl, u32 len, u64 ptr)
 {
 	struct hl_bd *bd;
 
@@ -174,6 +174,45 @@ static int int_queue_sanity_checks(struct hl_device *hdev,
 }
 
 /*
+ * hw_queue_sanity_checks() - Perform some sanity checks on a H/W queue.
+ * @hdev: Pointer to hl_device structure.
+ * @q: Pointer to hl_hw_queue structure.
+ * @num_of_entries: How many entries to check for space.
+ *
+ * Perform the following:
+ * - Make sure we have enough space in the completion queue.
+ *   This check also ensures that there is enough space in the h/w queue, as
+ *   both queues are of the same size.
+ * - Reserve space in the completion queue (needs to be reversed if there
+ *   is a failure down the road before the actual submission of work).
+ *
+ * Both operations are done using the "free_slots_cnt" field of the completion
+ * queue. The CI counters of the queue and the completion queue are not
+ * needed/used for the H/W queue type.
+ */
+static int hw_queue_sanity_checks(struct hl_device *hdev, struct hl_hw_queue *q,
+					int num_of_entries)
+{
+	atomic_t *free_slots =
+			&hdev->completion_queue[q->hw_queue_id].free_slots_cnt;
+
+	/*
+	 * Check we have enough space in the completion queue.
+	 * Add -1 to counter (decrement) unless counter was already 0.
+	 * In that case, CQ is full so we can't submit a new CB.
+	 * atomic_add_unless will return 0 if counter was already 0.
+	 */
+	if (atomic_add_negative(num_of_entries * -1, free_slots)) {
+		dev_dbg(hdev->dev, "No space for %d entries on CQ %d\n",
+			num_of_entries, q->hw_queue_id);
+		atomic_add(num_of_entries, free_slots);
+		return -EAGAIN;
+	}
+
+	return 0;
+}
+
+/*
  * hl_hw_queue_send_cb_no_cmpl - send a single CB (not a JOB) without completion
  *
  * @hdev: pointer to hl_device structure
@@ -188,7 +227,7 @@ int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
 				u32 cb_size, u64 cb_ptr)
 {
 	struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id];
-	int rc;
+	int rc = 0;
 
 	/*
 	 * The CPU queue is a synchronous queue with an effective depth of
@@ -206,11 +245,18 @@ int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
 		goto out;
 	}
 
-	rc = ext_queue_sanity_checks(hdev, q, 1, false);
-	if (rc)
-		goto out;
+	/*
+	 * hl_hw_queue_send_cb_no_cmpl() is called for queues of a H/W queue
+	 * type only on init phase, when the queues are empty and being tested,
+	 * so there is no need for sanity checks.
+	 */
+	if (q->queue_type != QUEUE_TYPE_HW) {
+		rc = ext_queue_sanity_checks(hdev, q, 1, false);
+		if (rc)
+			goto out;
+	}
 
-	ext_queue_submit_bd(hdev, q, 0, cb_size, cb_ptr);
+	ext_and_hw_queue_submit_bd(hdev, q, 0, cb_size, cb_ptr);
 
 out:
 	if (q->queue_type != QUEUE_TYPE_CPU)
@@ -220,14 +266,14 @@ out:
 }
 
 /*
- * ext_hw_queue_schedule_job - submit an JOB to an external queue
+ * ext_queue_schedule_job - submit a JOB to an external queue
  *
  * @job: pointer to the job that needs to be submitted to the queue
  *
  * This function must be called when the scheduler mutex is taken
  *
  */
-static void ext_hw_queue_schedule_job(struct hl_cs_job *job)
+static void ext_queue_schedule_job(struct hl_cs_job *job)
 {
 	struct hl_device *hdev = job->cs->ctx->hdev;
 	struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id];
@@ -260,7 +306,7 @@ static void ext_hw_queue_schedule_job(struct hl_cs_job *job)
 	 * H/W queues is done under the scheduler mutex
 	 *
 	 * No need to check if CQ is full because it was already
-	 * checked in hl_queue_sanity_checks
+	 * checked in ext_queue_sanity_checks
 	 */
 	cq = &hdev->completion_queue[q->hw_queue_id];
 	cq_addr = cq->bus_address + cq->pi * sizeof(struct hl_cq_entry);
@@ -274,18 +320,18 @@ static void ext_hw_queue_schedule_job(struct hl_cs_job *job)
 
 	cq->pi = hl_cq_inc_ptr(cq->pi);
 
-	ext_queue_submit_bd(hdev, q, ctl, len, ptr);
+	ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
 }
 
 /*
- * int_hw_queue_schedule_job - submit an JOB to an internal queue
+ * int_queue_schedule_job - submit a JOB to an internal queue
  *
  * @job: pointer to the job that needs to be submitted to the queue
  *
  * This function must be called when the scheduler mutex is taken
  *
  */
-static void int_hw_queue_schedule_job(struct hl_cs_job *job)
+static void int_queue_schedule_job(struct hl_cs_job *job)
 {
 	struct hl_device *hdev = job->cs->ctx->hdev;
 	struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id];
@@ -308,6 +354,60 @@ static void int_hw_queue_schedule_job(struct hl_cs_job *job)
 }
 
 /*
+ * hw_queue_schedule_job - submit a JOB to a H/W queue
+ *
+ * @job: pointer to the job that needs to be submitted to the queue
+ *
+ * This function must be called when the scheduler mutex is taken
+ *
+ */
+static void hw_queue_schedule_job(struct hl_cs_job *job)
+{
+	struct hl_device *hdev = job->cs->ctx->hdev;
+	struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id];
+	struct hl_cq *cq;
+	u64 ptr;
+	u32 offset, ctl, len;
+
+	/*
+	 * Upon PQE completion, COMP_DATA is used as the write data to the
+	 * completion queue (QMAN HBW message), and COMP_OFFSET is used as the
+	 * write address offset in the SM block (QMAN LBW message).
+	 * The write address offset is calculated as "COMP_OFFSET << 2".
+	 */
+	offset = job->cs->sequence & (HL_MAX_PENDING_CS - 1);
+	ctl = ((offset << BD_CTL_COMP_OFFSET_SHIFT) & BD_CTL_COMP_OFFSET_MASK) |
+		((q->pi << BD_CTL_COMP_DATA_SHIFT) & BD_CTL_COMP_DATA_MASK);
+
+	len = job->job_cb_size;
+
+	/*
+	 * A patched CB is created only if a user CB was allocated by driver and
+	 * MMU is disabled. If MMU is enabled, the user CB should be used
+	 * instead. If the user CB wasn't allocated by driver, assume that it
+	 * holds an address.
+	 */
+	if (job->patched_cb)
+		ptr = job->patched_cb->bus_address;
+	else if (job->is_kernel_allocated_cb)
+		ptr = job->user_cb->bus_address;
+	else
+		ptr = (u64) (uintptr_t) job->user_cb;
+
+	/*
+	 * No need to protect pi_offset because scheduling to the
+	 * H/W queues is done under the scheduler mutex
+	 *
+	 * No need to check if CQ is full because it was already
+	 * checked in hw_queue_sanity_checks
+	 */
+	cq = &hdev->completion_queue[q->hw_queue_id];
+	cq->pi = hl_cq_inc_ptr(cq->pi);
+
+	ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
+}
+
+/*
  * hl_hw_queue_schedule_cs - schedule a command submission
  *
  * @job        : pointer to the CS
@@ -330,23 +430,34 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
 	}
 
 	q = &hdev->kernel_queues[0];
-	/* This loop assumes all external queues are consecutive */
 	for (i = 0, cq_cnt = 0 ; i < HL_MAX_QUEUES ; i++, q++) {
-		if (q->queue_type == QUEUE_TYPE_EXT) {
-			if (cs->jobs_in_queue_cnt[i]) {
+		if (cs->jobs_in_queue_cnt[i]) {
+			switch (q->queue_type) {
+			case QUEUE_TYPE_EXT:
 				rc = ext_queue_sanity_checks(hdev, q,
-					cs->jobs_in_queue_cnt[i], true);
-				if (rc)
-					goto unroll_cq_resv;
-				cq_cnt++;
-			}
-		} else if (q->queue_type == QUEUE_TYPE_INT) {
-			if (cs->jobs_in_queue_cnt[i]) {
+						cs->jobs_in_queue_cnt[i], true);
+				break;
+			case QUEUE_TYPE_INT:
 				rc = int_queue_sanity_checks(hdev, q,
-					cs->jobs_in_queue_cnt[i]);
-				if (rc)
-					goto unroll_cq_resv;
+						cs->jobs_in_queue_cnt[i]);
+				break;
+			case QUEUE_TYPE_HW:
+				rc = hw_queue_sanity_checks(hdev, q,
+						cs->jobs_in_queue_cnt[i]);
+				break;
+			default:
+				dev_err(hdev->dev, "Queue type %d is invalid\n",
+					q->queue_type);
+				rc = -EINVAL;
+				break;
 			}
+
+			if (rc)
+				goto unroll_cq_resv;
+
+			if (q->queue_type == QUEUE_TYPE_EXT ||
+					q->queue_type == QUEUE_TYPE_HW)
+				cq_cnt++;
 		}
 	}
 
@@ -373,21 +484,30 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
 	}
 
 	list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
-		if (job->ext_queue)
-			ext_hw_queue_schedule_job(job);
-		else
-			int_hw_queue_schedule_job(job);
+		switch (job->queue_type) {
+		case QUEUE_TYPE_EXT:
+			ext_queue_schedule_job(job);
+			break;
+		case QUEUE_TYPE_INT:
+			int_queue_schedule_job(job);
+			break;
+		case QUEUE_TYPE_HW:
+			hw_queue_schedule_job(job);
+			break;
+		default:
+			break;
+		}
 
 	cs->submitted = true;
 
 	goto out;
 
 unroll_cq_resv:
-	/* This loop assumes all external queues are consecutive */
 	q = &hdev->kernel_queues[0];
 	for (i = 0 ; (i < HL_MAX_QUEUES) && (cq_cnt > 0) ; i++, q++) {
-		if ((q->queue_type == QUEUE_TYPE_EXT) &&
-				(cs->jobs_in_queue_cnt[i])) {
+		if ((q->queue_type == QUEUE_TYPE_EXT ||
+				q->queue_type == QUEUE_TYPE_HW) &&
+				cs->jobs_in_queue_cnt[i]) {
 			atomic_t *free_slots =
 				&hdev->completion_queue[i].free_slots_cnt;
 			atomic_add(cs->jobs_in_queue_cnt[i], free_slots);
@@ -414,8 +534,8 @@ void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id)
 	q->ci = hl_queue_inc_ptr(q->ci);
 }
 
-static int ext_and_cpu_hw_queue_init(struct hl_device *hdev,
-				struct hl_hw_queue *q, bool is_cpu_queue)
+static int ext_and_cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
+					bool is_cpu_queue)
 {
 	void *p;
 	int rc;
@@ -465,7 +585,7 @@ free_queue:
 	return rc;
 }
 
-static int int_hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
+static int int_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
 {
 	void *p;
 
@@ -485,18 +605,38 @@ static int int_hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
 	return 0;
 }
 
-static int cpu_hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
+static int cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
+{
+	return ext_and_cpu_queue_init(hdev, q, true);
+}
+
+static int ext_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
 {
-	return ext_and_cpu_hw_queue_init(hdev, q, true);
+	return ext_and_cpu_queue_init(hdev, q, false);
 }
 
-static int ext_hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
+static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
 {
-	return ext_and_cpu_hw_queue_init(hdev, q, false);
+	void *p;
+
+	p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
+						HL_QUEUE_SIZE_IN_BYTES,
+						&q->bus_address,
+						GFP_KERNEL | __GFP_ZERO);
+	if (!p)
+		return -ENOMEM;
+
+	q->kernel_address = (u64) (uintptr_t) p;
+
+	/* Make sure read/write pointers are initialized to start of queue */
+	q->ci = 0;
+	q->pi = 0;
+
+	return 0;
 }
 
 /*
- * hw_queue_init - main initialization function for H/W queue object
+ * queue_init - main initialization function for H/W queue object
  *
  * @hdev: pointer to hl_device device structure
  * @q: pointer to hl_hw_queue queue structure
@@ -505,7 +645,7 @@ static int ext_hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q)
  * Allocate dma-able memory for the queue and initialize fields
  * Returns 0 on success
  */
-static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
+static int queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
 			u32 hw_queue_id)
 {
 	int rc;
@@ -516,21 +656,20 @@ static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
 
 	switch (q->queue_type) {
 	case QUEUE_TYPE_EXT:
-		rc = ext_hw_queue_init(hdev, q);
+		rc = ext_queue_init(hdev, q);
 		break;
-
 	case QUEUE_TYPE_INT:
-		rc = int_hw_queue_init(hdev, q);
+		rc = int_queue_init(hdev, q);
 		break;
-
 	case QUEUE_TYPE_CPU:
-		rc = cpu_hw_queue_init(hdev, q);
+		rc = cpu_queue_init(hdev, q);
+		break;
+	case QUEUE_TYPE_HW:
+		rc = hw_queue_init(hdev, q);
 		break;
-
 	case QUEUE_TYPE_NA:
 		q->valid = 0;
 		return 0;
-
 	default:
 		dev_crit(hdev->dev, "wrong queue type %d during init\n",
 			q->queue_type);
@@ -554,7 +693,7 @@ static int hw_queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
  *
  * Free the queue memory
  */
-static void hw_queue_fini(struct hl_device *hdev, struct hl_hw_queue *q)
+static void queue_fini(struct hl_device *hdev, struct hl_hw_queue *q)
 {
 	if (!q->valid)
 		return;
@@ -612,7 +751,7 @@ int hl_hw_queues_create(struct hl_device *hdev)
 			i < HL_MAX_QUEUES ; i++, q_ready_cnt++, q++) {
 
 		q->queue_type = asic->hw_queues_props[i].type;
-		rc = hw_queue_init(hdev, q, i);
+		rc = queue_init(hdev, q, i);
 		if (rc) {
 			dev_err(hdev->dev,
 				"failed to initialize queue %d\n", i);
@@ -624,7 +763,7 @@ int hl_hw_queues_create(struct hl_device *hdev)
 
 release_queues:
 	for (i = 0, q = hdev->kernel_queues ; i < q_ready_cnt ; i++, q++)
-		hw_queue_fini(hdev, q);
+		queue_fini(hdev, q);
 
 	kfree(hdev->kernel_queues);
 
@@ -637,7 +776,7 @@ void hl_hw_queues_destroy(struct hl_device *hdev)
 	int i;
 
 	for (i = 0, q = hdev->kernel_queues ; i < HL_MAX_QUEUES ; i++, q++)
-		hw_queue_fini(hdev, q);
+		queue_fini(hdev, q);
 
 	kfree(hdev->kernel_queues);
 }
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/goya_masks.h b/drivers/misc/habanalabs/include/goya/asic_reg/goya_masks.h
index 8618891d5afa..3c44ef3a23ed 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/goya_masks.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/goya_masks.h
@@ -260,4 +260,6 @@
 #define DMA_QM_3_GLBL_CFG1_DMA_STOP_SHIFT DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT
 #define DMA_QM_4_GLBL_CFG1_DMA_STOP_SHIFT DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT
 
+#define PSOC_ETR_AXICTL_PROTCTRLBIT1_SHIFT                           1
+
 #endif /* ASIC_REG_GOYA_MASKS_H_ */
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/goya_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/goya_regs.h
index 19b0f0ef1d0b..fce490e6a231 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/goya_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/goya_regs.h
@@ -84,6 +84,7 @@
 #include "tpc6_rtr_regs.h"
 #include "tpc7_nrtr_regs.h"
 #include "tpc0_eml_cfg_regs.h"
+#include "psoc_etr_regs.h"
 
 #include "psoc_global_conf_masks.h"
 #include "dma_macro_masks.h"
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/psoc_etr_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/psoc_etr_regs.h
new file mode 100644
index 000000000000..b7c33e025db5
--- /dev/null
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/psoc_etr_regs.h
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright 2016-2018 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ *
+ */
+
+/************************************
+ ** This is an auto-generated file **
+ **       DO NOT EDIT BELOW        **
+ ************************************/
+
+#ifndef ASIC_REG_PSOC_ETR_REGS_H_
+#define ASIC_REG_PSOC_ETR_REGS_H_
+
+/*
+ *****************************************
+ *   PSOC_ETR (Prototype: ETR)
+ *****************************************
+ */
+
+#define mmPSOC_ETR_RSZ                                               0x2C43004
+
+#define mmPSOC_ETR_STS                                               0x2C4300C
+
+#define mmPSOC_ETR_RRD                                               0x2C43010
+
+#define mmPSOC_ETR_RRP                                               0x2C43014
+
+#define mmPSOC_ETR_RWP                                               0x2C43018
+
+#define mmPSOC_ETR_TRG                                               0x2C4301C
+
+#define mmPSOC_ETR_CTL                                               0x2C43020
+
+#define mmPSOC_ETR_RWD                                               0x2C43024
+
+#define mmPSOC_ETR_MODE                                              0x2C43028
+
+#define mmPSOC_ETR_LBUFLEVEL                                         0x2C4302C
+
+#define mmPSOC_ETR_CBUFLEVEL                                         0x2C43030
+
+#define mmPSOC_ETR_BUFWM                                             0x2C43034
+
+#define mmPSOC_ETR_RRPHI                                             0x2C43038
+
+#define mmPSOC_ETR_RWPHI                                             0x2C4303C
+
+#define mmPSOC_ETR_AXICTL                                            0x2C43110
+
+#define mmPSOC_ETR_DBALO                                             0x2C43118
+
+#define mmPSOC_ETR_DBAHI                                             0x2C4311C
+
+#define mmPSOC_ETR_FFSR                                              0x2C43300
+
+#define mmPSOC_ETR_FFCR                                              0x2C43304
+
+#define mmPSOC_ETR_PSCR                                              0x2C43308
+
+#define mmPSOC_ETR_ITMISCOP0                                         0x2C43EE0
+
+#define mmPSOC_ETR_ITTRFLIN                                          0x2C43EE8
+
+#define mmPSOC_ETR_ITATBDATA0                                        0x2C43EEC
+
+#define mmPSOC_ETR_ITATBCTR2                                         0x2C43EF0
+
+#define mmPSOC_ETR_ITATBCTR1                                         0x2C43EF4
+
+#define mmPSOC_ETR_ITATBCTR0                                         0x2C43EF8
+
+#define mmPSOC_ETR_ITCTRL                                            0x2C43F00
+
+#define mmPSOC_ETR_CLAIMSET                                          0x2C43FA0
+
+#define mmPSOC_ETR_CLAIMCLR                                          0x2C43FA4
+
+#define mmPSOC_ETR_LAR                                               0x2C43FB0
+
+#define mmPSOC_ETR_LSR                                               0x2C43FB4
+
+#define mmPSOC_ETR_AUTHSTATUS                                        0x2C43FB8
+
+#define mmPSOC_ETR_DEVID                                             0x2C43FC8
+
+#define mmPSOC_ETR_DEVTYPE                                           0x2C43FCC
+
+#define mmPSOC_ETR_PERIPHID4                                         0x2C43FD0
+
+#define mmPSOC_ETR_PERIPHID5                                         0x2C43FD4
+
+#define mmPSOC_ETR_PERIPHID6                                         0x2C43FD8
+
+#define mmPSOC_ETR_PERIPHID7                                         0x2C43FDC
+
+#define mmPSOC_ETR_PERIPHID0                                         0x2C43FE0
+
+#define mmPSOC_ETR_PERIPHID1                                         0x2C43FE4
+
+#define mmPSOC_ETR_PERIPHID2                                         0x2C43FE8
+
+#define mmPSOC_ETR_PERIPHID3                                         0x2C43FEC
+
+#define mmPSOC_ETR_COMPID0                                           0x2C43FF0
+
+#define mmPSOC_ETR_COMPID1                                           0x2C43FF4
+
+#define mmPSOC_ETR_COMPID2                                           0x2C43FF8
+
+#define mmPSOC_ETR_COMPID3                                           0x2C43FFC
+
+#endif /* ASIC_REG_PSOC_ETR_REGS_H_ */
diff --git a/drivers/misc/habanalabs/include/hl_boot_if.h b/drivers/misc/habanalabs/include/hl_boot_if.h
index 4cd04c090285..2853a2de8cf6 100644
--- a/drivers/misc/habanalabs/include/hl_boot_if.h
+++ b/drivers/misc/habanalabs/include/hl_boot_if.h
@@ -20,6 +20,8 @@ enum cpu_boot_status {
 	CPU_BOOT_STATUS_DRAM_INIT_FAIL,
 	CPU_BOOT_STATUS_FIT_CORRUPTED,
 	CPU_BOOT_STATUS_UBOOT_NOT_READY,
+	CPU_BOOT_STATUS_RESERVED,
+	CPU_BOOT_STATUS_TS_INIT_FAIL,
 };
 
 enum kmd_msg {
diff --git a/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h b/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h
index 71ea3c3e8ba3..a6851a9d3f03 100644
--- a/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h
+++ b/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h
@@ -12,18 +12,16 @@
 #define PAGE_SHIFT_2MB			21
 #define PAGE_SIZE_2MB			(_AC(1, UL) << PAGE_SHIFT_2MB)
 #define PAGE_SIZE_4KB			(_AC(1, UL) << PAGE_SHIFT_4KB)
-#define PAGE_MASK_2MB			(~(PAGE_SIZE_2MB - 1))
 
 #define PAGE_PRESENT_MASK		0x0000000000001ull
 #define SWAP_OUT_MASK			0x0000000000004ull
 #define LAST_MASK			0x0000000000800ull
-#define PHYS_ADDR_MASK			0xFFFFFFFFFFFFF000ull
 #define HOP0_MASK			0x3000000000000ull
 #define HOP1_MASK			0x0FF8000000000ull
 #define HOP2_MASK			0x0007FC0000000ull
 #define HOP3_MASK			0x000003FE00000ull
 #define HOP4_MASK			0x00000001FF000ull
-#define OFFSET_MASK			0x0000000000FFFull
+#define FLAGS_MASK			0x0000000000FFFull
 
 #define HOP0_SHIFT			48
 #define HOP1_SHIFT			39
@@ -31,8 +29,7 @@
 #define HOP3_SHIFT			21
 #define HOP4_SHIFT			12
 
-#define PTE_PHYS_ADDR_SHIFT		12
-#define PTE_PHYS_ADDR_MASK		~OFFSET_MASK
+#define HOP_PHYS_ADDR_MASK		(~FLAGS_MASK)
 
 #define HL_PTE_SIZE			sizeof(u64)
 #define HOP_TABLE_SIZE			PAGE_SIZE_4KB
diff --git a/drivers/misc/habanalabs/include/qman_if.h b/drivers/misc/habanalabs/include/qman_if.h
index bf59bbe27fdc..0fdb49188ed7 100644
--- a/drivers/misc/habanalabs/include/qman_if.h
+++ b/drivers/misc/habanalabs/include/qman_if.h
@@ -23,6 +23,8 @@ struct hl_bd {
 #define HL_BD_SIZE			sizeof(struct hl_bd)
 
 /*
+ * S/W CTL FIELDS.
+ *
  * BD_CTL_REPEAT_VALID tells the CP whether the repeat field in the BD CTL is
  * valid. 1 means the repeat field is valid, 0 means not-valid,
  * i.e. repeat == 1
@@ -34,6 +36,16 @@ struct hl_bd {
 #define BD_CTL_SHADOW_INDEX_MASK	0x00000FFF
 
 /*
+ * H/W CTL FIELDS
+ */
+
+#define BD_CTL_COMP_OFFSET_SHIFT	16
+#define BD_CTL_COMP_OFFSET_MASK		0x00FF0000
+
+#define BD_CTL_COMP_DATA_SHIFT		0
+#define BD_CTL_COMP_DATA_MASK		0x0000FFFF
+
+/*
  * COMPLETION QUEUE
  */
 
diff --git a/drivers/misc/habanalabs/memory.c b/drivers/misc/habanalabs/memory.c
index 365fb0cb8dff..6c72cb4eff54 100644
--- a/drivers/misc/habanalabs/memory.c
+++ b/drivers/misc/habanalabs/memory.c
@@ -13,7 +13,6 @@
 #include <linux/slab.h>
 #include <linux/genalloc.h>
 
-#define PGS_IN_2MB_PAGE	(PAGE_SIZE_2MB >> PAGE_SHIFT)
 #define HL_MMU_DEBUG	0
 
 /*
@@ -159,20 +158,19 @@ pages_pack_err:
 }
 
 /*
- * get_userptr_from_host_va - initialize userptr structure from given host
- *                            virtual address
- *
- * @hdev                : habanalabs device structure
- * @args                : parameters containing the virtual address and size
- * @p_userptr           : pointer to result userptr structure
+ * dma_map_host_va - DMA mapping of the given host virtual address.
+ * @hdev: habanalabs device structure
+ * @addr: the host virtual address of the memory area
+ * @size: the size of the memory area
+ * @p_userptr: pointer to result userptr structure
  *
  * This function does the following:
  * - Allocate userptr structure
  * - Pin the given host memory using the userptr structure
  * - Perform DMA mapping to have the DMA addresses of the pages
  */
-static int get_userptr_from_host_va(struct hl_device *hdev,
-		struct hl_mem_in *args, struct hl_userptr **p_userptr)
+static int dma_map_host_va(struct hl_device *hdev, u64 addr, u64 size,
+				struct hl_userptr **p_userptr)
 {
 	struct hl_userptr *userptr;
 	int rc;
@@ -183,8 +181,7 @@ static int get_userptr_from_host_va(struct hl_device *hdev,
 		goto userptr_err;
 	}
 
-	rc = hl_pin_host_memory(hdev, args->map_host.host_virt_addr,
-			args->map_host.mem_size, userptr);
+	rc = hl_pin_host_memory(hdev, addr, size, userptr);
 	if (rc) {
 		dev_err(hdev->dev, "Failed to pin host memory\n");
 		goto pin_err;
@@ -215,16 +212,16 @@ userptr_err:
 }
 
 /*
- * free_userptr - free userptr structure
- *
- * @hdev                : habanalabs device structure
- * @userptr             : userptr to free
+ * dma_unmap_host_va - DMA unmapping of the given host virtual address.
+ * @hdev: habanalabs device structure
+ * @userptr: userptr to free
  *
  * This function does the following:
  * - Unpins the physical pages
  * - Frees the userptr structure
  */
-static void free_userptr(struct hl_device *hdev, struct hl_userptr *userptr)
+static void dma_unmap_host_va(struct hl_device *hdev,
+				struct hl_userptr *userptr)
 {
 	hl_unpin_host_memory(hdev, userptr);
 	kfree(userptr);
@@ -253,10 +250,9 @@ static void dram_pg_pool_do_release(struct kref *ref)
 }
 
 /*
- * free_phys_pg_pack   - free physical page pack
- *
- * @hdev               : habanalabs device structure
- * @phys_pg_pack       : physical page pack to free
+ * free_phys_pg_pack - free physical page pack
+ * @hdev: habanalabs device structure
+ * @phys_pg_pack: physical page pack to free
  *
  * This function does the following:
  * - For DRAM memory only, iterate over the pack and free each physical block
@@ -264,7 +260,7 @@ static void dram_pg_pool_do_release(struct kref *ref)
  * - Free the hl_vm_phys_pg_pack structure
  */
 static void free_phys_pg_pack(struct hl_device *hdev,
-		struct hl_vm_phys_pg_pack *phys_pg_pack)
+				struct hl_vm_phys_pg_pack *phys_pg_pack)
 {
 	struct hl_vm *vm = &hdev->vm;
 	u64 i;
@@ -519,8 +515,8 @@ static inline int add_va_block(struct hl_device *hdev,
  * - Return the start address of the virtual block
  */
 static u64 get_va_block(struct hl_device *hdev,
-		struct hl_va_range *va_range, u64 size, u64 hint_addr,
-		bool is_userptr)
+			struct hl_va_range *va_range, u64 size, u64 hint_addr,
+			bool is_userptr)
 {
 	struct hl_vm_va_block *va_block, *new_va_block = NULL;
 	u64 valid_start, valid_size, prev_start, prev_end, page_mask,
@@ -528,18 +524,17 @@ static u64 get_va_block(struct hl_device *hdev,
 	u32 page_size;
 	bool add_prev = false;
 
-	if (is_userptr) {
+	if (is_userptr)
 		/*
 		 * We cannot know if the user allocated memory with huge pages
 		 * or not, hence we continue with the biggest possible
 		 * granularity.
 		 */
-		page_size = PAGE_SIZE_2MB;
-		page_mask = PAGE_MASK_2MB;
-	} else {
-		page_size = hdev->asic_prop.dram_page_size;
-		page_mask = ~((u64)page_size - 1);
-	}
+		page_size = hdev->asic_prop.pmmu.huge_page_size;
+	else
+		page_size = hdev->asic_prop.dmmu.page_size;
+
+	page_mask = ~((u64)page_size - 1);
 
 	mutex_lock(&va_range->lock);
 
@@ -549,7 +544,6 @@ static u64 get_va_block(struct hl_device *hdev,
 		/* calc the first possible aligned addr */
 		valid_start = va_block->start;
 
-
 		if (valid_start & (page_size - 1)) {
 			valid_start &= page_mask;
 			valid_start += page_size;
@@ -561,7 +555,6 @@ static u64 get_va_block(struct hl_device *hdev,
 
 		if (valid_size >= size &&
 			(!new_va_block || valid_size < res_valid_size)) {
-
 			new_va_block = va_block;
 			res_valid_start = valid_start;
 			res_valid_size = valid_size;
@@ -631,11 +624,10 @@ static u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr)
 
 /*
  * init_phys_pg_pack_from_userptr - initialize physical page pack from host
- *                                   memory
- *
- * @ctx                : current context
- * @userptr            : userptr to initialize from
- * @pphys_pg_pack      : res pointer
+ *                                  memory
+ * @ctx: current context
+ * @userptr: userptr to initialize from
+ * @pphys_pg_pack: result pointer
  *
  * This function does the following:
  * - Pin the physical pages related to the given virtual block
@@ -643,16 +635,19 @@ static u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr)
  *   virtual block
  */
 static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
-		struct hl_userptr *userptr,
-		struct hl_vm_phys_pg_pack **pphys_pg_pack)
+				struct hl_userptr *userptr,
+				struct hl_vm_phys_pg_pack **pphys_pg_pack)
 {
+	struct hl_mmu_properties *mmu_prop = &ctx->hdev->asic_prop.pmmu;
 	struct hl_vm_phys_pg_pack *phys_pg_pack;
 	struct scatterlist *sg;
 	dma_addr_t dma_addr;
 	u64 page_mask, total_npages;
-	u32 npages, page_size = PAGE_SIZE;
+	u32 npages, page_size = PAGE_SIZE,
+		huge_page_size = mmu_prop->huge_page_size;
 	bool first = true, is_huge_page_opt = true;
 	int rc, i, j;
+	u32 pgs_in_huge_page = huge_page_size >> __ffs(page_size);
 
 	phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL);
 	if (!phys_pg_pack)
@@ -675,14 +670,14 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
 
 		total_npages += npages;
 
-		if ((npages % PGS_IN_2MB_PAGE) ||
-					(dma_addr & (PAGE_SIZE_2MB - 1)))
+		if ((npages % pgs_in_huge_page) ||
+					(dma_addr & (huge_page_size - 1)))
 			is_huge_page_opt = false;
 	}
 
 	if (is_huge_page_opt) {
-		page_size = PAGE_SIZE_2MB;
-		total_npages /= PGS_IN_2MB_PAGE;
+		page_size = huge_page_size;
+		do_div(total_npages, pgs_in_huge_page);
 	}
 
 	page_mask = ~(((u64) page_size) - 1);
@@ -714,7 +709,7 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
 			dma_addr += page_size;
 
 			if (is_huge_page_opt)
-				npages -= PGS_IN_2MB_PAGE;
+				npages -= pgs_in_huge_page;
 			else
 				npages--;
 		}
@@ -731,19 +726,18 @@ page_pack_arr_mem_err:
 }
 
 /*
- * map_phys_page_pack - maps the physical page pack
- *
- * @ctx                : current context
- * @vaddr              : start address of the virtual area to map from
- * @phys_pg_pack       : the pack of physical pages to map to
+ * map_phys_pg_pack - maps the physical page pack.
+ * @ctx: current context
+ * @vaddr: start address of the virtual area to map from
+ * @phys_pg_pack: the pack of physical pages to map to
  *
  * This function does the following:
  * - Maps each chunk of virtual memory to matching physical chunk
  * - Stores number of successful mappings in the given argument
- * - Returns 0 on success, error code otherwise.
+ * - Returns 0 on success, error code otherwise
  */
-static int map_phys_page_pack(struct hl_ctx *ctx, u64 vaddr,
-		struct hl_vm_phys_pg_pack *phys_pg_pack)
+static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
+				struct hl_vm_phys_pg_pack *phys_pg_pack)
 {
 	struct hl_device *hdev = ctx->hdev;
 	u64 next_vaddr = vaddr, paddr, mapped_pg_cnt = 0, i;
@@ -783,6 +777,36 @@ err:
 	return rc;
 }
 
+/*
+ * unmap_phys_pg_pack - unmaps the physical page pack
+ * @ctx: current context
+ * @vaddr: start address of the virtual area to unmap
+ * @phys_pg_pack: the pack of physical pages to unmap
+ */
+static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
+				struct hl_vm_phys_pg_pack *phys_pg_pack)
+{
+	struct hl_device *hdev = ctx->hdev;
+	u64 next_vaddr, i;
+	u32 page_size;
+
+	page_size = phys_pg_pack->page_size;
+	next_vaddr = vaddr;
+
+	for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) {
+		if (hl_mmu_unmap(ctx, next_vaddr, page_size))
+			dev_warn_ratelimited(hdev->dev,
+			"unmap failed for vaddr: 0x%llx\n", next_vaddr);
+
+		/*
+		 * unmapping on Palladium can be really long, so avoid a CPU
+		 * soft lockup bug by sleeping a little between unmapping pages
+		 */
+		if (hdev->pldm)
+			usleep_range(500, 1000);
+	}
+}
+
 static int get_paddr_from_handle(struct hl_ctx *ctx, struct hl_mem_in *args,
 				u64 *paddr)
 {
@@ -839,7 +863,10 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
 	*device_addr = 0;
 
 	if (is_userptr) {
-		rc = get_userptr_from_host_va(hdev, args, &userptr);
+		u64 addr = args->map_host.host_virt_addr,
+			size = args->map_host.mem_size;
+
+		rc = dma_map_host_va(hdev, addr, size, &userptr);
 		if (rc) {
 			dev_err(hdev->dev, "failed to get userptr from va\n");
 			return rc;
@@ -850,7 +877,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
 		if (rc) {
 			dev_err(hdev->dev,
 				"unable to init page pack for vaddr 0x%llx\n",
-				args->map_host.host_virt_addr);
+				addr);
 			goto init_page_pack_err;
 		}
 
@@ -909,7 +936,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
 
 	mutex_lock(&ctx->mmu_lock);
 
-	rc = map_phys_page_pack(ctx, ret_vaddr, phys_pg_pack);
+	rc = map_phys_pg_pack(ctx, ret_vaddr, phys_pg_pack);
 	if (rc) {
 		mutex_unlock(&ctx->mmu_lock);
 		dev_err(hdev->dev, "mapping page pack failed for handle %u\n",
@@ -917,7 +944,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
 		goto map_err;
 	}
 
-	hdev->asic_funcs->mmu_invalidate_cache(hdev, false);
+	hdev->asic_funcs->mmu_invalidate_cache(hdev, false, *vm_type);
 
 	mutex_unlock(&ctx->mmu_lock);
 
@@ -955,7 +982,7 @@ shared_err:
 		free_phys_pg_pack(hdev, phys_pg_pack);
 init_page_pack_err:
 	if (is_userptr)
-		free_userptr(hdev, userptr);
+		dma_unmap_host_va(hdev, userptr);
 
 	return rc;
 }
@@ -965,20 +992,20 @@ init_page_pack_err:
  *
  * @ctx                 : current context
  * @vaddr               : device virtual address to unmap
+ * @ctx_free            : true if in context free flow, false otherwise.
  *
  * This function does the following:
  * - Unmap the physical pages related to the given virtual address
  * - return the device virtual block to the virtual block list
  */
-static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr)
+static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free)
 {
 	struct hl_device *hdev = ctx->hdev;
 	struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
 	struct hl_vm_hash_node *hnode = NULL;
 	struct hl_userptr *userptr = NULL;
+	struct hl_va_range *va_range;
 	enum vm_type_t *vm_type;
-	u64 next_vaddr, i;
-	u32 page_size;
 	bool is_userptr;
 	int rc;
 
@@ -1003,9 +1030,10 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr)
 
 	if (*vm_type == VM_TYPE_USERPTR) {
 		is_userptr = true;
+		va_range = &ctx->host_va_range;
 		userptr = hnode->ptr;
 		rc = init_phys_pg_pack_from_userptr(ctx, userptr,
-				&phys_pg_pack);
+							&phys_pg_pack);
 		if (rc) {
 			dev_err(hdev->dev,
 				"unable to init page pack for vaddr 0x%llx\n",
@@ -1014,6 +1042,7 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr)
 		}
 	} else if (*vm_type == VM_TYPE_PHYS_PACK) {
 		is_userptr = false;
+		va_range = &ctx->dram_va_range;
 		phys_pg_pack = hnode->ptr;
 	} else {
 		dev_warn(hdev->dev,
@@ -1029,42 +1058,41 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr)
 		goto mapping_cnt_err;
 	}
 
-	page_size = phys_pg_pack->page_size;
-	vaddr &= ~(((u64) page_size) - 1);
-
-	next_vaddr = vaddr;
+	vaddr &= ~(((u64) phys_pg_pack->page_size) - 1);
 
 	mutex_lock(&ctx->mmu_lock);
 
-	for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) {
-		if (hl_mmu_unmap(ctx, next_vaddr, page_size))
-			dev_warn_ratelimited(hdev->dev,
-			"unmap failed for vaddr: 0x%llx\n", next_vaddr);
-
-		/* unmapping on Palladium can be really long, so avoid a CPU
-		 * soft lockup bug by sleeping a little between unmapping pages
-		 */
-		if (hdev->pldm)
-			usleep_range(500, 1000);
-	}
+	unmap_phys_pg_pack(ctx, vaddr, phys_pg_pack);
 
-	hdev->asic_funcs->mmu_invalidate_cache(hdev, true);
+	/*
+	 * During context free this function is called in a loop to clean all
+	 * the context mappings. Hence the cache invalidation can be called once
+	 * at the loop end rather than for each iteration
+	 */
+	if (!ctx_free)
+		hdev->asic_funcs->mmu_invalidate_cache(hdev, true, *vm_type);
 
 	mutex_unlock(&ctx->mmu_lock);
 
-	if (add_va_block(hdev,
-			is_userptr ? &ctx->host_va_range : &ctx->dram_va_range,
-			vaddr,
-			vaddr + phys_pg_pack->total_size - 1))
-		dev_warn(hdev->dev, "add va block failed for vaddr: 0x%llx\n",
-				vaddr);
+	/*
+	 * No point in maintaining the free VA block list if the context is
+	 * closing as the list will be freed anyway
+	 */
+	if (!ctx_free) {
+		rc = add_va_block(hdev, va_range, vaddr,
+					vaddr + phys_pg_pack->total_size - 1);
+		if (rc)
+			dev_warn(hdev->dev,
+					"add va block failed for vaddr: 0x%llx\n",
+					vaddr);
+	}
 
 	atomic_dec(&phys_pg_pack->mapping_cnt);
 	kfree(hnode);
 
 	if (is_userptr) {
 		free_phys_pg_pack(hdev, phys_pg_pack);
-		free_userptr(hdev, userptr);
+		dma_unmap_host_va(hdev, userptr);
 	}
 
 	return 0;
@@ -1189,8 +1217,8 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
 		break;
 
 	case HL_MEM_OP_UNMAP:
-		rc = unmap_device_va(ctx,
-				args->in.unmap.device_virt_addr);
+		rc = unmap_device_va(ctx, args->in.unmap.device_virt_addr,
+					false);
 		break;
 
 	default:
@@ -1203,20 +1231,72 @@ out:
 	return rc;
 }
 
+static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size,
+				u32 npages, u64 start, u32 offset,
+				struct hl_userptr *userptr)
+{
+	int rc;
+
+	if (!access_ok((void __user *) (uintptr_t) addr, size)) {
+		dev_err(hdev->dev, "user pointer is invalid - 0x%llx\n", addr);
+		return -EFAULT;
+	}
+
+	userptr->vec = frame_vector_create(npages);
+	if (!userptr->vec) {
+		dev_err(hdev->dev, "Failed to create frame vector\n");
+		return -ENOMEM;
+	}
+
+	rc = get_vaddr_frames(start, npages, FOLL_FORCE | FOLL_WRITE,
+				userptr->vec);
+
+	if (rc != npages) {
+		dev_err(hdev->dev,
+			"Failed to map host memory, user ptr probably wrong\n");
+		if (rc < 0)
+			goto destroy_framevec;
+		rc = -EFAULT;
+		goto put_framevec;
+	}
+
+	if (frame_vector_to_pages(userptr->vec) < 0) {
+		dev_err(hdev->dev,
+			"Failed to translate frame vector to pages\n");
+		rc = -EFAULT;
+		goto put_framevec;
+	}
+
+	rc = sg_alloc_table_from_pages(userptr->sgt,
+					frame_vector_pages(userptr->vec),
+					npages, offset, size, GFP_ATOMIC);
+	if (rc < 0) {
+		dev_err(hdev->dev, "failed to create SG table from pages\n");
+		goto put_framevec;
+	}
+
+	return 0;
+
+put_framevec:
+	put_vaddr_frames(userptr->vec);
+destroy_framevec:
+	frame_vector_destroy(userptr->vec);
+	return rc;
+}
+
 /*
- * hl_pin_host_memory - pins a chunk of host memory
- *
- * @hdev                : pointer to the habanalabs device structure
- * @addr                : the user-space virtual address of the memory area
- * @size                : the size of the memory area
- * @userptr	        : pointer to hl_userptr structure
+ * hl_pin_host_memory - pins a chunk of host memory.
+ * @hdev: pointer to the habanalabs device structure
+ * @addr: the host virtual address of the memory area
+ * @size: the size of the memory area
+ * @userptr: pointer to hl_userptr structure
  *
  * This function does the following:
  * - Pins the physical pages
- * - Create a SG list from those pages
+ * - Create an SG list from those pages
  */
 int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
-			struct hl_userptr *userptr)
+					struct hl_userptr *userptr)
 {
 	u64 start, end;
 	u32 npages, offset;
@@ -1227,11 +1307,6 @@ int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
 		return -EINVAL;
 	}
 
-	if (!access_ok((void __user *) (uintptr_t) addr, size)) {
-		dev_err(hdev->dev, "user pointer is invalid - 0x%llx\n", addr);
-		return -EFAULT;
-	}
-
 	/*
 	 * If the combination of the address and size requested for this memory
 	 * region causes an integer overflow, return error.
@@ -1244,6 +1319,14 @@ int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
 		return -EINVAL;
 	}
 
+	/*
+	 * This function can be called also from data path, hence use atomic
+	 * always as it is not a big allocation.
+	 */
+	userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_ATOMIC);
+	if (!userptr->sgt)
+		return -ENOMEM;
+
 	start = addr & PAGE_MASK;
 	offset = addr & ~PAGE_MASK;
 	end = PAGE_ALIGN(addr + size);
@@ -1254,42 +1337,12 @@ int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
 	userptr->dma_mapped = false;
 	INIT_LIST_HEAD(&userptr->job_node);
 
-	userptr->vec = frame_vector_create(npages);
-	if (!userptr->vec) {
-		dev_err(hdev->dev, "Failed to create frame vector\n");
-		return -ENOMEM;
-	}
-
-	rc = get_vaddr_frames(start, npages, FOLL_FORCE | FOLL_WRITE,
-				userptr->vec);
-
-	if (rc != npages) {
-		dev_err(hdev->dev,
-			"Failed to map host memory, user ptr probably wrong\n");
-		if (rc < 0)
-			goto destroy_framevec;
-		rc = -EFAULT;
-		goto put_framevec;
-	}
-
-	if (frame_vector_to_pages(userptr->vec) < 0) {
+	rc = get_user_memory(hdev, addr, size, npages, start, offset,
+				userptr);
+	if (rc) {
 		dev_err(hdev->dev,
-			"Failed to translate frame vector to pages\n");
-		rc = -EFAULT;
-		goto put_framevec;
-	}
-
-	userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_ATOMIC);
-	if (!userptr->sgt) {
-		rc = -ENOMEM;
-		goto put_framevec;
-	}
-
-	rc = sg_alloc_table_from_pages(userptr->sgt,
-					frame_vector_pages(userptr->vec),
-					npages, offset, size, GFP_ATOMIC);
-	if (rc < 0) {
-		dev_err(hdev->dev, "failed to create SG table from pages\n");
+			"failed to get user memory for address 0x%llx\n",
+			addr);
 		goto free_sgt;
 	}
 
@@ -1299,34 +1352,28 @@ int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
 
 free_sgt:
 	kfree(userptr->sgt);
-put_framevec:
-	put_vaddr_frames(userptr->vec);
-destroy_framevec:
-	frame_vector_destroy(userptr->vec);
 	return rc;
 }
 
 /*
- * hl_unpin_host_memory - unpins a chunk of host memory
- *
- * @hdev                : pointer to the habanalabs device structure
- * @userptr             : pointer to hl_userptr structure
+ * hl_unpin_host_memory - unpins a chunk of host memory.
+ * @hdev: pointer to the habanalabs device structure
+ * @userptr: pointer to hl_userptr structure
  *
  * This function does the following:
  * - Unpins the physical pages related to the host memory
  * - Free the SG list
  */
-int hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr)
+void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr)
 {
 	struct page **pages;
 
 	hl_debugfs_remove_userptr(hdev, userptr);
 
 	if (userptr->dma_mapped)
-		hdev->asic_funcs->hl_dma_unmap_sg(hdev,
-				userptr->sgt->sgl,
-				userptr->sgt->nents,
-				userptr->dir);
+		hdev->asic_funcs->hl_dma_unmap_sg(hdev, userptr->sgt->sgl,
+							userptr->sgt->nents,
+							userptr->dir);
 
 	pages = frame_vector_pages(userptr->vec);
 	if (!IS_ERR(pages)) {
@@ -1342,8 +1389,6 @@ int hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr)
 
 	sg_free_table(userptr->sgt);
 	kfree(userptr->sgt);
-
-	return 0;
 }
 
 /*
@@ -1542,43 +1587,16 @@ int hl_vm_ctx_init(struct hl_ctx *ctx)
  * @hdev                : pointer to the habanalabs structure
  * va_range             : pointer to virtual addresses range
  *
- * This function initializes the following:
- * - Checks that the given range contains the whole initial range
+ * This function does the following:
  * - Frees the virtual addresses block list and its lock
  */
 static void hl_va_range_fini(struct hl_device *hdev,
 		struct hl_va_range *va_range)
 {
-	struct hl_vm_va_block *va_block;
-
-	if (list_empty(&va_range->list)) {
-		dev_warn(hdev->dev,
-				"va list should not be empty on cleanup!\n");
-		goto out;
-	}
-
-	if (!list_is_singular(&va_range->list)) {
-		dev_warn(hdev->dev,
-			"va list should not contain multiple blocks on cleanup!\n");
-		goto free_va_list;
-	}
-
-	va_block = list_first_entry(&va_range->list, typeof(*va_block), node);
-
-	if (va_block->start != va_range->start_addr ||
-		va_block->end != va_range->end_addr) {
-		dev_warn(hdev->dev,
-			"wrong va block on cleanup, from 0x%llx to 0x%llx\n",
-				va_block->start, va_block->end);
-		goto free_va_list;
-	}
-
-free_va_list:
 	mutex_lock(&va_range->lock);
 	clear_va_list_locked(hdev, &va_range->list);
 	mutex_unlock(&va_range->lock);
 
-out:
 	mutex_destroy(&va_range->lock);
 }
 
@@ -1613,21 +1631,31 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx)
 
 	hl_debugfs_remove_ctx_mem_hash(hdev, ctx);
 
-	if (!hash_empty(ctx->mem_hash))
-		dev_notice(hdev->dev, "ctx is freed while it has va in use\n");
+	/*
+	 * Clearly something went wrong on hard reset so no point in printing
+	 * another side effect error
+	 */
+	if (!hdev->hard_reset_pending && !hash_empty(ctx->mem_hash))
+		dev_notice(hdev->dev,
+				"ctx %d is freed while it has va in use\n",
+				ctx->asid);
 
 	hash_for_each_safe(ctx->mem_hash, i, tmp_node, hnode, node) {
 		dev_dbg(hdev->dev,
 			"hl_mem_hash_node of vaddr 0x%llx of asid %d is still alive\n",
 			hnode->vaddr, ctx->asid);
-		unmap_device_va(ctx, hnode->vaddr);
+		unmap_device_va(ctx, hnode->vaddr, true);
 	}
 
+	/* invalidate the cache once after the unmapping loop */
+	hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
+	hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_PHYS_PACK);
+
 	spin_lock(&vm->idr_lock);
 	idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_list, i)
 		if (phys_pg_list->asid == ctx->asid) {
 			dev_dbg(hdev->dev,
-				"page list 0x%p of asid %d is still alive\n",
+				"page list 0x%px of asid %d is still alive\n",
 				phys_pg_list, ctx->asid);
 			atomic64_sub(phys_pg_list->total_size,
 					&hdev->dram_used_mem);
diff --git a/drivers/misc/habanalabs/mmu.c b/drivers/misc/habanalabs/mmu.c
index 176c315836f1..6262b26e2086 100644
--- a/drivers/misc/habanalabs/mmu.c
+++ b/drivers/misc/habanalabs/mmu.c
@@ -25,10 +25,9 @@ static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 hop_addr)
 	return pgt_info;
 }
 
-static void free_hop(struct hl_ctx *ctx, u64 hop_addr)
+static void _free_hop(struct hl_ctx *ctx, struct pgt_info *pgt_info)
 {
 	struct hl_device *hdev = ctx->hdev;
-	struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
 
 	gen_pool_free(hdev->mmu_pgt_pool, pgt_info->phys_addr,
 			hdev->asic_prop.mmu_hop_table_size);
@@ -37,6 +36,13 @@ static void free_hop(struct hl_ctx *ctx, u64 hop_addr)
 	kfree(pgt_info);
 }
 
+static void free_hop(struct hl_ctx *ctx, u64 hop_addr)
+{
+	struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
+
+	_free_hop(ctx, pgt_info);
+}
+
 static u64 alloc_hop(struct hl_ctx *ctx)
 {
 	struct hl_device *hdev = ctx->hdev;
@@ -105,8 +111,8 @@ static inline void write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val)
 	 * clear the 12 LSBs and translate the shadow hop to its associated
 	 * physical hop, and add back the original 12 LSBs.
 	 */
-	u64 phys_val = get_phys_addr(ctx, val & PTE_PHYS_ADDR_MASK) |
-				(val & OFFSET_MASK);
+	u64 phys_val = get_phys_addr(ctx, val & HOP_PHYS_ADDR_MASK) |
+				(val & FLAGS_MASK);
 
 	ctx->hdev->asic_funcs->write_pte(ctx->hdev,
 					get_phys_addr(ctx, shadow_pte_addr),
@@ -159,7 +165,7 @@ static inline int put_pte(struct hl_ctx *ctx, u64 hop_addr)
 	 */
 	num_of_ptes_left = pgt_info->num_of_ptes;
 	if (!num_of_ptes_left)
-		free_hop(ctx, hop_addr);
+		_free_hop(ctx, pgt_info);
 
 	return num_of_ptes_left;
 }
@@ -171,35 +177,50 @@ static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
 			((virt_addr & mask) >> shift);
 }
 
-static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
+static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx,
+					struct hl_mmu_properties *mmu_prop,
+					u64 hop_addr, u64 vaddr)
 {
-	return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP0_MASK, HOP0_SHIFT);
+	return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop0_mask,
+					mmu_prop->hop0_shift);
 }
 
-static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
+static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx,
+					struct hl_mmu_properties *mmu_prop,
+					u64 hop_addr, u64 vaddr)
 {
-	return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP1_MASK, HOP1_SHIFT);
+	return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop1_mask,
+					mmu_prop->hop1_shift);
 }
 
-static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
+static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx,
+					struct hl_mmu_properties *mmu_prop,
+					u64 hop_addr, u64 vaddr)
 {
-	return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP2_MASK, HOP2_SHIFT);
+	return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop2_mask,
+					mmu_prop->hop2_shift);
 }
 
-static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
+static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx,
+					struct hl_mmu_properties *mmu_prop,
+					u64 hop_addr, u64 vaddr)
 {
-	return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP3_MASK, HOP3_SHIFT);
+	return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop3_mask,
+					mmu_prop->hop3_shift);
 }
 
-static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
+static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx,
+					struct hl_mmu_properties *mmu_prop,
+					u64 hop_addr, u64 vaddr)
 {
-	return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP4_MASK, HOP4_SHIFT);
+	return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop4_mask,
+					mmu_prop->hop4_shift);
 }
 
 static inline u64 get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte)
 {
 	if (curr_pte & PAGE_PRESENT_MASK)
-		return curr_pte & PHYS_ADDR_MASK;
+		return curr_pte & HOP_PHYS_ADDR_MASK;
 	else
 		return ULLONG_MAX;
 }
@@ -288,23 +309,23 @@ static int dram_default_mapping_init(struct hl_ctx *ctx)
 	}
 
 	/* need only pte 0 in hops 0 and 1 */
-	pte_val = (hop1_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
+	pte_val = (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
 	write_pte(ctx, hop0_addr, pte_val);
 
-	pte_val = (hop2_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
+	pte_val = (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
 	write_pte(ctx, hop1_addr, pte_val);
 	get_pte(ctx, hop1_addr);
 
 	hop2_pte_addr = hop2_addr;
 	for (i = 0 ; i < num_of_hop3 ; i++) {
-		pte_val = (ctx->dram_default_hops[i] & PTE_PHYS_ADDR_MASK) |
+		pte_val = (ctx->dram_default_hops[i] & HOP_PHYS_ADDR_MASK) |
 				PAGE_PRESENT_MASK;
 		write_pte(ctx, hop2_pte_addr, pte_val);
 		get_pte(ctx, hop2_addr);
 		hop2_pte_addr += HL_PTE_SIZE;
 	}
 
-	pte_val = (prop->mmu_dram_default_page_addr & PTE_PHYS_ADDR_MASK) |
+	pte_val = (prop->mmu_dram_default_page_addr & HOP_PHYS_ADDR_MASK) |
 			LAST_MASK | PAGE_PRESENT_MASK;
 
 	for (i = 0 ; i < num_of_hop3 ; i++) {
@@ -400,8 +421,6 @@ int hl_mmu_init(struct hl_device *hdev)
 	if (!hdev->mmu_enable)
 		return 0;
 
-	/* MMU H/W init was already done in device hw_init() */
-
 	hdev->mmu_pgt_pool =
 			gen_pool_create(__ffs(prop->mmu_hop_table_size), -1);
 
@@ -427,6 +446,8 @@ int hl_mmu_init(struct hl_device *hdev)
 		goto err_pool_add;
 	}
 
+	/* MMU H/W init will be done in device hw_init() */
+
 	return 0;
 
 err_pool_add:
@@ -450,10 +471,10 @@ void hl_mmu_fini(struct hl_device *hdev)
 	if (!hdev->mmu_enable)
 		return;
 
+	/* MMU H/W fini was already done in device hw_fini() */
+
 	kvfree(hdev->mmu_shadow_hop0);
 	gen_pool_destroy(hdev->mmu_pgt_pool);
-
-	/* MMU H/W fini will be done in device hw_fini() */
 }
 
 /**
@@ -501,36 +522,36 @@ void hl_mmu_ctx_fini(struct hl_ctx *ctx)
 	dram_default_mapping_fini(ctx);
 
 	if (!hash_empty(ctx->mmu_shadow_hash))
-		dev_err(hdev->dev, "ctx is freed while it has pgts in use\n");
+		dev_err(hdev->dev, "ctx %d is freed while it has pgts in use\n",
+			ctx->asid);
 
 	hash_for_each_safe(ctx->mmu_shadow_hash, i, tmp, pgt_info, node) {
-		dev_err(hdev->dev,
+		dev_err_ratelimited(hdev->dev,
 			"pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n",
 			pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes);
-		free_hop(ctx, pgt_info->shadow_addr);
+		_free_hop(ctx, pgt_info);
 	}
 
 	mutex_destroy(&ctx->mmu_lock);
 }
 
-static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr)
+static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr)
 {
 	struct hl_device *hdev = ctx->hdev;
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
+	struct hl_mmu_properties *mmu_prop;
 	u64 hop0_addr = 0, hop0_pte_addr = 0,
 		hop1_addr = 0, hop1_pte_addr = 0,
 		hop2_addr = 0, hop2_pte_addr = 0,
 		hop3_addr = 0, hop3_pte_addr = 0,
 		hop4_addr = 0, hop4_pte_addr = 0,
 		curr_pte;
-	bool is_dram_addr, is_huge, clear_hop3 = true;
+	bool is_huge, clear_hop3 = true;
 
-	is_dram_addr = hl_mem_area_inside_range(virt_addr, PAGE_SIZE_2MB,
-				prop->va_space_dram_start_address,
-				prop->va_space_dram_end_address);
+	mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
 
 	hop0_addr = get_hop0_addr(ctx);
-	hop0_pte_addr = get_hop0_pte_addr(ctx, hop0_addr, virt_addr);
+	hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
 
 	curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
 
@@ -539,7 +560,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr)
 	if (hop1_addr == ULLONG_MAX)
 		goto not_mapped;
 
-	hop1_pte_addr = get_hop1_pte_addr(ctx, hop1_addr, virt_addr);
+	hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
 
 	curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
 
@@ -548,7 +569,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr)
 	if (hop2_addr == ULLONG_MAX)
 		goto not_mapped;
 
-	hop2_pte_addr = get_hop2_pte_addr(ctx, hop2_addr, virt_addr);
+	hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
 
 	curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
 
@@ -557,7 +578,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr)
 	if (hop3_addr == ULLONG_MAX)
 		goto not_mapped;
 
-	hop3_pte_addr = get_hop3_pte_addr(ctx, hop3_addr, virt_addr);
+	hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
 
 	curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
 
@@ -575,7 +596,8 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr)
 		if (hop4_addr == ULLONG_MAX)
 			goto not_mapped;
 
-		hop4_pte_addr = get_hop4_pte_addr(ctx, hop4_addr, virt_addr);
+		hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
+							virt_addr);
 
 		curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
 
@@ -584,7 +606,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr)
 
 	if (hdev->dram_default_page_mapping && is_dram_addr) {
 		u64 default_pte = (prop->mmu_dram_default_page_addr &
-				PTE_PHYS_ADDR_MASK) | LAST_MASK |
+				HOP_PHYS_ADDR_MASK) | LAST_MASK |
 					PAGE_PRESENT_MASK;
 		if (curr_pte == default_pte) {
 			dev_err(hdev->dev,
@@ -667,25 +689,36 @@ not_mapped:
 int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size)
 {
 	struct hl_device *hdev = ctx->hdev;
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
+	struct hl_mmu_properties *mmu_prop;
 	u64 real_virt_addr;
 	u32 real_page_size, npages;
 	int i, rc;
+	bool is_dram_addr;
 
 	if (!hdev->mmu_enable)
 		return 0;
 
+	is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
+				prop->va_space_dram_start_address,
+				prop->va_space_dram_end_address);
+
+	mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
+
 	/*
-	 * The H/W handles mapping of 4KB/2MB page. Hence if the host page size
-	 * is bigger, we break it to sub-pages and unmap them separately.
+	 * The H/W handles mapping of specific page sizes. Hence if the page
+	 * size is bigger, we break it to sub-pages and unmap them separately.
 	 */
-	if ((page_size % PAGE_SIZE_2MB) == 0) {
-		real_page_size = PAGE_SIZE_2MB;
-	} else if ((page_size % PAGE_SIZE_4KB) == 0) {
-		real_page_size = PAGE_SIZE_4KB;
+	if ((page_size % mmu_prop->huge_page_size) == 0) {
+		real_page_size = mmu_prop->huge_page_size;
+	} else if ((page_size % mmu_prop->page_size) == 0) {
+		real_page_size = mmu_prop->page_size;
 	} else {
 		dev_err(hdev->dev,
-			"page size of %u is not 4KB nor 2MB aligned, can't unmap\n",
-				page_size);
+			"page size of %u is not %uKB nor %uMB aligned, can't unmap\n",
+			page_size,
+			mmu_prop->page_size >> 10,
+			mmu_prop->huge_page_size >> 20);
 
 		return -EFAULT;
 	}
@@ -694,7 +727,7 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size)
 	real_virt_addr = virt_addr;
 
 	for (i = 0 ; i < npages ; i++) {
-		rc = _hl_mmu_unmap(ctx, real_virt_addr);
+		rc = _hl_mmu_unmap(ctx, real_virt_addr, is_dram_addr);
 		if (rc)
 			return rc;
 
@@ -705,10 +738,11 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size)
 }
 
 static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
-		u32 page_size)
+			u32 page_size, bool is_dram_addr)
 {
 	struct hl_device *hdev = ctx->hdev;
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
+	struct hl_mmu_properties *mmu_prop;
 	u64 hop0_addr = 0, hop0_pte_addr = 0,
 		hop1_addr = 0, hop1_pte_addr = 0,
 		hop2_addr = 0, hop2_pte_addr = 0,
@@ -716,21 +750,19 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
 		hop4_addr = 0, hop4_pte_addr = 0,
 		curr_pte = 0;
 	bool hop1_new = false, hop2_new = false, hop3_new = false,
-		hop4_new = false, is_huge, is_dram_addr;
+		hop4_new = false, is_huge;
 	int rc = -ENOMEM;
 
+	mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
+
 	/*
-	 * This mapping function can map a 4KB/2MB page. For 2MB page there are
-	 * only 3 hops rather than 4. Currently the DRAM allocation uses 2MB
-	 * pages only but user memory could have been allocated with one of the
-	 * two page sizes. Since this is a common code for all the three cases,
-	 * we need this hugs page check.
+	 * This mapping function can map a page or a huge page. For huge page
+	 * there are only 3 hops rather than 4. Currently the DRAM allocation
+	 * uses huge pages only but user memory could have been allocated with
+	 * one of the two page sizes. Since this is a common code for all the
+	 * three cases, we need this hugs page check.
 	 */
-	is_huge = page_size == PAGE_SIZE_2MB;
-
-	is_dram_addr = hl_mem_area_inside_range(virt_addr, page_size,
-				prop->va_space_dram_start_address,
-				prop->va_space_dram_end_address);
+	is_huge = page_size == mmu_prop->huge_page_size;
 
 	if (is_dram_addr && !is_huge) {
 		dev_err(hdev->dev, "DRAM mapping should use huge pages only\n");
@@ -738,28 +770,28 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
 	}
 
 	hop0_addr = get_hop0_addr(ctx);
-	hop0_pte_addr = get_hop0_pte_addr(ctx, hop0_addr, virt_addr);
+	hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
 	curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
 
 	hop1_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop1_new);
 	if (hop1_addr == ULLONG_MAX)
 		goto err;
 
-	hop1_pte_addr = get_hop1_pte_addr(ctx, hop1_addr, virt_addr);
+	hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
 	curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
 
 	hop2_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop2_new);
 	if (hop2_addr == ULLONG_MAX)
 		goto err;
 
-	hop2_pte_addr = get_hop2_pte_addr(ctx, hop2_addr, virt_addr);
+	hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
 	curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
 
 	hop3_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop3_new);
 	if (hop3_addr == ULLONG_MAX)
 		goto err;
 
-	hop3_pte_addr = get_hop3_pte_addr(ctx, hop3_addr, virt_addr);
+	hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
 	curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
 
 	if (!is_huge) {
@@ -767,13 +799,14 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
 		if (hop4_addr == ULLONG_MAX)
 			goto err;
 
-		hop4_pte_addr = get_hop4_pte_addr(ctx, hop4_addr, virt_addr);
+		hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
+							virt_addr);
 		curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
 	}
 
 	if (hdev->dram_default_page_mapping && is_dram_addr) {
 		u64 default_pte = (prop->mmu_dram_default_page_addr &
-					PTE_PHYS_ADDR_MASK) | LAST_MASK |
+					HOP_PHYS_ADDR_MASK) | LAST_MASK |
 						PAGE_PRESENT_MASK;
 
 		if (curr_pte != default_pte) {
@@ -813,7 +846,7 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
 		goto err;
 	}
 
-	curr_pte = (phys_addr & PTE_PHYS_ADDR_MASK) | LAST_MASK
+	curr_pte = (phys_addr & HOP_PHYS_ADDR_MASK) | LAST_MASK
 			| PAGE_PRESENT_MASK;
 
 	if (is_huge)
@@ -823,25 +856,25 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
 
 	if (hop1_new) {
 		curr_pte =
-			(hop1_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
+			(hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
 		write_pte(ctx, hop0_pte_addr, curr_pte);
 	}
 	if (hop2_new) {
 		curr_pte =
-			(hop2_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
+			(hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
 		write_pte(ctx, hop1_pte_addr, curr_pte);
 		get_pte(ctx, hop1_addr);
 	}
 	if (hop3_new) {
 		curr_pte =
-			(hop3_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
+			(hop3_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
 		write_pte(ctx, hop2_pte_addr, curr_pte);
 		get_pte(ctx, hop2_addr);
 	}
 
 	if (!is_huge) {
 		if (hop4_new) {
-			curr_pte = (hop4_addr & PTE_PHYS_ADDR_MASK) |
+			curr_pte = (hop4_addr & HOP_PHYS_ADDR_MASK) |
 					PAGE_PRESENT_MASK;
 			write_pte(ctx, hop3_pte_addr, curr_pte);
 			get_pte(ctx, hop3_addr);
@@ -890,25 +923,36 @@ err:
 int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
 {
 	struct hl_device *hdev = ctx->hdev;
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
+	struct hl_mmu_properties *mmu_prop;
 	u64 real_virt_addr, real_phys_addr;
 	u32 real_page_size, npages;
 	int i, rc, mapped_cnt = 0;
+	bool is_dram_addr;
 
 	if (!hdev->mmu_enable)
 		return 0;
 
+	is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
+				prop->va_space_dram_start_address,
+				prop->va_space_dram_end_address);
+
+	mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
+
 	/*
-	 * The H/W handles mapping of 4KB/2MB page. Hence if the host page size
-	 * is bigger, we break it to sub-pages and map them separately.
+	 * The H/W handles mapping of specific page sizes. Hence if the page
+	 * size is bigger, we break it to sub-pages and map them separately.
 	 */
-	if ((page_size % PAGE_SIZE_2MB) == 0) {
-		real_page_size = PAGE_SIZE_2MB;
-	} else if ((page_size % PAGE_SIZE_4KB) == 0) {
-		real_page_size = PAGE_SIZE_4KB;
+	if ((page_size % mmu_prop->huge_page_size) == 0) {
+		real_page_size = mmu_prop->huge_page_size;
+	} else if ((page_size % mmu_prop->page_size) == 0) {
+		real_page_size = mmu_prop->page_size;
 	} else {
 		dev_err(hdev->dev,
-			"page size of %u is not 4KB nor 2MB aligned, can't map\n",
-				page_size);
+			"page size of %u is not %dKB nor %dMB aligned, can't unmap\n",
+			page_size,
+			mmu_prop->page_size >> 10,
+			mmu_prop->huge_page_size >> 20);
 
 		return -EFAULT;
 	}
@@ -923,7 +967,7 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
 
 	for (i = 0 ; i < npages ; i++) {
 		rc = _hl_mmu_map(ctx, real_virt_addr, real_phys_addr,
-				real_page_size);
+				real_page_size, is_dram_addr);
 		if (rc)
 			goto err;
 
@@ -937,7 +981,7 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
 err:
 	real_virt_addr = virt_addr;
 	for (i = 0 ; i < mapped_cnt ; i++) {
-		if (_hl_mmu_unmap(ctx, real_virt_addr))
+		if (_hl_mmu_unmap(ctx, real_virt_addr, is_dram_addr))
 			dev_warn_ratelimited(hdev->dev,
 				"failed to unmap va: 0x%llx\n", real_virt_addr);
 
diff --git a/drivers/misc/hpilo.h b/drivers/misc/hpilo.h
index 94dfb9e40e29..1aa433a7f66c 100644
--- a/drivers/misc/hpilo.h
+++ b/drivers/misc/hpilo.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * linux/drivers/char/hpilo.h
  *
diff --git a/drivers/misc/ibmvmc.h b/drivers/misc/ibmvmc.h
index e140ada8fe2c..0e1756fffeae 100644
--- a/drivers/misc/ibmvmc.h
+++ b/drivers/misc/ibmvmc.h
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0+
- *
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
  * linux/drivers/misc/ibmvmc.h
  *
  * IBM Power Systems Virtual Management Channel Support.
diff --git a/drivers/misc/lis3lv02d/lis3lv02d.c b/drivers/misc/lis3lv02d/lis3lv02d.c
index 057d7bbde402..dd65cedf3b12 100644
--- a/drivers/misc/lis3lv02d/lis3lv02d.c
+++ b/drivers/misc/lis3lv02d/lis3lv02d.c
@@ -16,7 +16,7 @@
 #include <linux/types.h>
 #include <linux/platform_device.h>
 #include <linux/interrupt.h>
-#include <linux/input-polldev.h>
+#include <linux/input.h>
 #include <linux/delay.h>
 #include <linux/wait.h>
 #include <linux/poll.h>
@@ -434,23 +434,23 @@ int lis3lv02d_poweron(struct lis3lv02d *lis3)
 EXPORT_SYMBOL_GPL(lis3lv02d_poweron);
 
 
-static void lis3lv02d_joystick_poll(struct input_polled_dev *pidev)
+static void lis3lv02d_joystick_poll(struct input_dev *input)
 {
-	struct lis3lv02d *lis3 = pidev->private;
+	struct lis3lv02d *lis3 = input_get_drvdata(input);
 	int x, y, z;
 
 	mutex_lock(&lis3->mutex);
 	lis3lv02d_get_xyz(lis3, &x, &y, &z);
-	input_report_abs(pidev->input, ABS_X, x);
-	input_report_abs(pidev->input, ABS_Y, y);
-	input_report_abs(pidev->input, ABS_Z, z);
-	input_sync(pidev->input);
+	input_report_abs(input, ABS_X, x);
+	input_report_abs(input, ABS_Y, y);
+	input_report_abs(input, ABS_Z, z);
+	input_sync(input);
 	mutex_unlock(&lis3->mutex);
 }
 
-static void lis3lv02d_joystick_open(struct input_polled_dev *pidev)
+static int lis3lv02d_joystick_open(struct input_dev *input)
 {
-	struct lis3lv02d *lis3 = pidev->private;
+	struct lis3lv02d *lis3 = input_get_drvdata(input);
 
 	if (lis3->pm_dev)
 		pm_runtime_get_sync(lis3->pm_dev);
@@ -461,12 +461,14 @@ static void lis3lv02d_joystick_open(struct input_polled_dev *pidev)
 	 * Update coordinates for the case where poll interval is 0 and
 	 * the chip in running purely under interrupt control
 	 */
-	lis3lv02d_joystick_poll(pidev);
+	lis3lv02d_joystick_poll(input);
+
+	return 0;
 }
 
-static void lis3lv02d_joystick_close(struct input_polled_dev *pidev)
+static void lis3lv02d_joystick_close(struct input_dev *input)
 {
-	struct lis3lv02d *lis3 = pidev->private;
+	struct lis3lv02d *lis3 = input_get_drvdata(input);
 
 	atomic_set(&lis3->wake_thread, 0);
 	if (lis3->pm_dev)
@@ -497,7 +499,7 @@ out:
 
 static void lis302dl_interrupt_handle_click(struct lis3lv02d *lis3)
 {
-	struct input_dev *dev = lis3->idev->input;
+	struct input_dev *dev = lis3->idev;
 	u8 click_src;
 
 	mutex_lock(&lis3->mutex);
@@ -677,26 +679,19 @@ int lis3lv02d_joystick_enable(struct lis3lv02d *lis3)
 	if (lis3->idev)
 		return -EINVAL;
 
-	lis3->idev = input_allocate_polled_device();
-	if (!lis3->idev)
+	input_dev = input_allocate_device();
+	if (!input_dev)
 		return -ENOMEM;
 
-	lis3->idev->poll = lis3lv02d_joystick_poll;
-	lis3->idev->open = lis3lv02d_joystick_open;
-	lis3->idev->close = lis3lv02d_joystick_close;
-	lis3->idev->poll_interval = MDPS_POLL_INTERVAL;
-	lis3->idev->poll_interval_min = MDPS_POLL_MIN;
-	lis3->idev->poll_interval_max = MDPS_POLL_MAX;
-	lis3->idev->private = lis3;
-	input_dev = lis3->idev->input;
-
 	input_dev->name       = "ST LIS3LV02DL Accelerometer";
 	input_dev->phys       = DRIVER_NAME "/input0";
 	input_dev->id.bustype = BUS_HOST;
 	input_dev->id.vendor  = 0;
 	input_dev->dev.parent = &lis3->pdev->dev;
 
-	set_bit(EV_ABS, input_dev->evbit);
+	input_dev->open = lis3lv02d_joystick_open;
+	input_dev->close = lis3lv02d_joystick_close;
+
 	max_val = (lis3->mdps_max_val * lis3->scale) / LIS3_ACCURACY;
 	if (lis3->whoami == WAI_12B) {
 		fuzz = LIS3_DEFAULT_FUZZ_12B;
@@ -712,17 +707,32 @@ int lis3lv02d_joystick_enable(struct lis3lv02d *lis3)
 	input_set_abs_params(input_dev, ABS_Y, -max_val, max_val, fuzz, flat);
 	input_set_abs_params(input_dev, ABS_Z, -max_val, max_val, fuzz, flat);
 
+	input_set_drvdata(input_dev, lis3);
+	lis3->idev = input_dev;
+
+	err = input_setup_polling(input_dev, lis3lv02d_joystick_poll);
+	if (err)
+		goto err_free_input;
+
+	input_set_poll_interval(input_dev, MDPS_POLL_INTERVAL);
+	input_set_min_poll_interval(input_dev, MDPS_POLL_MIN);
+	input_set_max_poll_interval(input_dev, MDPS_POLL_MAX);
+
 	lis3->mapped_btns[0] = lis3lv02d_get_axis(abs(lis3->ac.x), btns);
 	lis3->mapped_btns[1] = lis3lv02d_get_axis(abs(lis3->ac.y), btns);
 	lis3->mapped_btns[2] = lis3lv02d_get_axis(abs(lis3->ac.z), btns);
 
-	err = input_register_polled_device(lis3->idev);
-	if (err) {
-		input_free_polled_device(lis3->idev);
-		lis3->idev = NULL;
-	}
+	err = input_register_device(lis3->idev);
+	if (err)
+		goto err_free_input;
 
+	return 0;
+
+err_free_input:
+	input_free_device(input_dev);
+	lis3->idev = NULL;
 	return err;
+
 }
 EXPORT_SYMBOL_GPL(lis3lv02d_joystick_enable);
 
@@ -738,8 +748,7 @@ void lis3lv02d_joystick_disable(struct lis3lv02d *lis3)
 
 	if (lis3->irq)
 		misc_deregister(&lis3->miscdev);
-	input_unregister_polled_device(lis3->idev);
-	input_free_polled_device(lis3->idev);
+	input_unregister_device(lis3->idev);
 	lis3->idev = NULL;
 }
 EXPORT_SYMBOL_GPL(lis3lv02d_joystick_disable);
@@ -895,10 +904,9 @@ static void lis3lv02d_8b_configure(struct lis3lv02d *lis3,
 			(p->click_thresh_y << 4));
 
 		if (lis3->idev) {
-			struct input_dev *input_dev = lis3->idev->input;
-			input_set_capability(input_dev, EV_KEY, BTN_X);
-			input_set_capability(input_dev, EV_KEY, BTN_Y);
-			input_set_capability(input_dev, EV_KEY, BTN_Z);
+			input_set_capability(lis3->idev, EV_KEY, BTN_X);
+			input_set_capability(lis3->idev, EV_KEY, BTN_Y);
+			input_set_capability(lis3->idev, EV_KEY, BTN_Z);
 		}
 	}
 
diff --git a/drivers/misc/lis3lv02d/lis3lv02d.h b/drivers/misc/lis3lv02d/lis3lv02d.h
index 1b0c99883c57..c394c0b08519 100644
--- a/drivers/misc/lis3lv02d/lis3lv02d.h
+++ b/drivers/misc/lis3lv02d/lis3lv02d.h
@@ -6,7 +6,7 @@
  *  Copyright (C) 2008-2009 Eric Piel
  */
 #include <linux/platform_device.h>
-#include <linux/input-polldev.h>
+#include <linux/input.h>
 #include <linux/regulator/consumer.h>
 #include <linux/miscdevice.h>
 
@@ -281,7 +281,7 @@ struct lis3lv02d {
 					* (1/1000th of earth gravity)
 					*/
 
-	struct input_polled_dev	*idev;     /* input device */
+	struct input_dev	*idev;     /* input device */
 	struct platform_device	*pdev;     /* platform device */
 	struct regulator_bulk_data regulators[2];
 	atomic_t		count;     /* interrupt count after last read */
diff --git a/drivers/misc/lkdtm/bugs.c b/drivers/misc/lkdtm/bugs.c
index 7284a22b1a09..a4fdad04809a 100644
--- a/drivers/misc/lkdtm/bugs.c
+++ b/drivers/misc/lkdtm/bugs.c
@@ -12,6 +12,10 @@
 #include <linux/sched/task_stack.h>
 #include <linux/uaccess.h>
 
+#ifdef CONFIG_X86_32
+#include <asm/desc.h>
+#endif
+
 struct lkdtm_list {
 	struct list_head node;
 };
@@ -337,3 +341,38 @@ void lkdtm_UNSET_SMEP(void)
 	pr_err("FAIL: this test is x86_64-only\n");
 #endif
 }
+
+#ifdef CONFIG_X86_32
+void lkdtm_DOUBLE_FAULT(void)
+{
+	/*
+	 * Trigger #DF by setting the stack limit to zero.  This clobbers
+	 * a GDT TLS slot, which is okay because the current task will die
+	 * anyway due to the double fault.
+	 */
+	struct desc_struct d = {
+		.type = 3,	/* expand-up, writable, accessed data */
+		.p = 1,		/* present */
+		.d = 1,		/* 32-bit */
+		.g = 0,		/* limit in bytes */
+		.s = 1,		/* not system */
+	};
+
+	local_irq_disable();
+	write_gdt_entry(get_cpu_gdt_rw(smp_processor_id()),
+			GDT_ENTRY_TLS_MIN, &d, DESCTYPE_S);
+
+	/*
+	 * Put our zero-limit segment in SS and then trigger a fault.  The
+	 * 4-byte access to (%esp) will fault with #SS, and the attempt to
+	 * deliver the fault will recursively cause #SS and result in #DF.
+	 * This whole process happens while NMIs and MCEs are blocked by the
+	 * MOV SS window.  This is nice because an NMI with an invalid SS
+	 * would also double-fault, resulting in the NMI or MCE being lost.
+	 */
+	asm volatile ("movw %0, %%ss; addl $0, (%%esp)" ::
+		      "r" ((unsigned short)(GDT_ENTRY_TLS_MIN << 3)));
+
+	panic("tried to double fault but didn't die\n");
+}
+#endif
diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c
index cbc4c9045a99..ee0d6e721441 100644
--- a/drivers/misc/lkdtm/core.c
+++ b/drivers/misc/lkdtm/core.c
@@ -171,6 +171,9 @@ static const struct crashtype crashtypes[] = {
 	CRASHTYPE(USERCOPY_KERNEL_DS),
 	CRASHTYPE(STACKLEAK_ERASING),
 	CRASHTYPE(CFI_FORWARD_PROTO),
+#ifdef CONFIG_X86_32
+	CRASHTYPE(DOUBLE_FAULT),
+#endif
 };
 
 
diff --git a/drivers/misc/lkdtm/lkdtm.h b/drivers/misc/lkdtm/lkdtm.h
index ab446e0bde97..c56d23e37643 100644
--- a/drivers/misc/lkdtm/lkdtm.h
+++ b/drivers/misc/lkdtm/lkdtm.h
@@ -28,6 +28,9 @@ void lkdtm_CORRUPT_USER_DS(void);
 void lkdtm_STACK_GUARD_PAGE_LEADING(void);
 void lkdtm_STACK_GUARD_PAGE_TRAILING(void);
 void lkdtm_UNSET_SMEP(void);
+#ifdef CONFIG_X86_32
+void lkdtm_DOUBLE_FAULT(void);
+#endif
 
 /* lkdtm_heap.c */
 void __init lkdtm_heap_init(void);
diff --git a/drivers/misc/lkdtm/refcount.c b/drivers/misc/lkdtm/refcount.c
index 0a146b32da13..de7c5ab528d9 100644
--- a/drivers/misc/lkdtm/refcount.c
+++ b/drivers/misc/lkdtm/refcount.c
@@ -6,14 +6,6 @@
 #include "lkdtm.h"
 #include <linux/refcount.h>
 
-#ifdef CONFIG_REFCOUNT_FULL
-#define REFCOUNT_MAX		(UINT_MAX - 1)
-#define REFCOUNT_SATURATED	UINT_MAX
-#else
-#define REFCOUNT_MAX		INT_MAX
-#define REFCOUNT_SATURATED	(INT_MIN / 2)
-#endif
-
 static void overflow_check(refcount_t *ref)
 {
 	switch (refcount_read(ref)) {
@@ -127,7 +119,7 @@ void lkdtm_REFCOUNT_DEC_ZERO(void)
 static void check_negative(refcount_t *ref, int start)
 {
 	/*
-	 * CONFIG_REFCOUNT_FULL refuses to move a refcount at all on an
+	 * refcount_t refuses to move a refcount at all on an
 	 * over-sub, so we have to track our starting position instead of
 	 * looking only at zero-pinning.
 	 */
@@ -210,7 +202,6 @@ static void check_from_zero(refcount_t *ref)
 
 /*
  * A refcount_inc() from zero should pin to zero or saturate and may WARN.
- * Only CONFIG_REFCOUNT_FULL provides this protection currently.
  */
 void lkdtm_REFCOUNT_INC_ZERO(void)
 {
diff --git a/drivers/misc/mei/bus-fixup.c b/drivers/misc/mei/bus-fixup.c
index 0a2b99e1af45..9ad9c01ddf41 100644
--- a/drivers/misc/mei/bus-fixup.c
+++ b/drivers/misc/mei/bus-fixup.c
@@ -46,8 +46,6 @@ static const uuid_le mei_nfc_info_guid = MEI_UUID_NFC_INFO;
  */
 static void number_of_connections(struct mei_cl_device *cldev)
 {
-	dev_dbg(&cldev->dev, "running hook %s\n", __func__);
-
 	if (cldev->me_cl->props.max_number_of_connections > 1)
 		cldev->do_match = 0;
 }
@@ -59,8 +57,6 @@ static void number_of_connections(struct mei_cl_device *cldev)
  */
 static void blacklist(struct mei_cl_device *cldev)
 {
-	dev_dbg(&cldev->dev, "running hook %s\n", __func__);
-
 	cldev->do_match = 0;
 }
 
@@ -71,8 +67,6 @@ static void blacklist(struct mei_cl_device *cldev)
  */
 static void whitelist(struct mei_cl_device *cldev)
 {
-	dev_dbg(&cldev->dev, "running hook %s\n", __func__);
-
 	cldev->do_match = 1;
 }
 
@@ -256,7 +250,6 @@ static void mei_wd(struct mei_cl_device *cldev)
 {
 	struct pci_dev *pdev = to_pci_dev(cldev->dev.parent);
 
-	dev_dbg(&cldev->dev, "running hook %s\n", __func__);
 	if (pdev->device == MEI_DEV_ID_WPT_LP ||
 	    pdev->device == MEI_DEV_ID_SPT ||
 	    pdev->device == MEI_DEV_ID_SPT_H)
@@ -410,8 +403,6 @@ static void mei_nfc(struct mei_cl_device *cldev)
 
 	bus = cldev->bus;
 
-	dev_dbg(&cldev->dev, "running hook %s\n", __func__);
-
 	mutex_lock(&bus->device_lock);
 	/* we need to connect to INFO GUID */
 	cl = mei_cl_alloc_linked(bus);
diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c
index 985bd4fd3328..a0a495c95e3c 100644
--- a/drivers/misc/mei/bus.c
+++ b/drivers/misc/mei/bus.c
@@ -791,11 +791,44 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *a,
 }
 static DEVICE_ATTR_RO(modalias);
 
+static ssize_t max_conn_show(struct device *dev, struct device_attribute *a,
+			     char *buf)
+{
+	struct mei_cl_device *cldev = to_mei_cl_device(dev);
+	u8 maxconn = mei_me_cl_max_conn(cldev->me_cl);
+
+	return scnprintf(buf, PAGE_SIZE, "%d", maxconn);
+}
+static DEVICE_ATTR_RO(max_conn);
+
+static ssize_t fixed_show(struct device *dev, struct device_attribute *a,
+			  char *buf)
+{
+	struct mei_cl_device *cldev = to_mei_cl_device(dev);
+	u8 fixed = mei_me_cl_fixed(cldev->me_cl);
+
+	return scnprintf(buf, PAGE_SIZE, "%d", fixed);
+}
+static DEVICE_ATTR_RO(fixed);
+
+static ssize_t max_len_show(struct device *dev, struct device_attribute *a,
+			    char *buf)
+{
+	struct mei_cl_device *cldev = to_mei_cl_device(dev);
+	u32 maxlen = mei_me_cl_max_len(cldev->me_cl);
+
+	return scnprintf(buf, PAGE_SIZE, "%u", maxlen);
+}
+static DEVICE_ATTR_RO(max_len);
+
 static struct attribute *mei_cldev_attrs[] = {
 	&dev_attr_name.attr,
 	&dev_attr_uuid.attr,
 	&dev_attr_version.attr,
 	&dev_attr_modalias.attr,
+	&dev_attr_max_conn.attr,
+	&dev_attr_fixed.attr,
+	&dev_attr_max_len.attr,
 	NULL,
 };
 ATTRIBUTE_GROUPS(mei_cldev);
@@ -873,15 +906,16 @@ static const struct device_type mei_cl_device_type = {
 
 /**
  * mei_cl_bus_set_name - set device name for me client device
+ *  <controller>-<client device>
+ *  Example: 0000:00:16.0-55213584-9a29-4916-badf-0fb7ed682aeb
  *
  * @cldev: me client device
  */
 static inline void mei_cl_bus_set_name(struct mei_cl_device *cldev)
 {
-	dev_set_name(&cldev->dev, "mei:%s:%pUl:%02X",
-		     cldev->name,
-		     mei_me_cl_uuid(cldev->me_cl),
-		     mei_me_cl_ver(cldev->me_cl));
+	dev_set_name(&cldev->dev, "%s-%pUl",
+		     dev_name(cldev->bus->dev),
+		     mei_me_cl_uuid(cldev->me_cl));
 }
 
 /**
diff --git a/drivers/misc/mei/client.h b/drivers/misc/mei/client.h
index c1f9e810cf81..2f8954def591 100644
--- a/drivers/misc/mei/client.h
+++ b/drivers/misc/mei/client.h
@@ -69,6 +69,42 @@ static inline u8 mei_me_cl_ver(const struct mei_me_client *me_cl)
 	return me_cl->props.protocol_version;
 }
 
+/**
+ * mei_me_cl_max_conn - return me client max number of connections
+ *
+ * @me_cl: me client
+ *
+ * Return: me client max number of connections
+ */
+static inline u8 mei_me_cl_max_conn(const struct mei_me_client *me_cl)
+{
+	return me_cl->props.max_number_of_connections;
+}
+
+/**
+ * mei_me_cl_fixed - return me client fixed address, if any
+ *
+ * @me_cl: me client
+ *
+ * Return: me client fixed address
+ */
+static inline u8 mei_me_cl_fixed(const struct mei_me_client *me_cl)
+{
+	return me_cl->props.fixed_address;
+}
+
+/**
+ * mei_me_cl_max_len - return me client max msg length
+ *
+ * @me_cl: me client
+ *
+ * Return: me client max msg length
+ */
+static inline u32 mei_me_cl_max_len(const struct mei_me_client *me_cl)
+{
+	return me_cl->props.max_msg_length;
+}
+
 /*
  * MEI IO Functions
  */
diff --git a/drivers/misc/mei/hdcp/mei_hdcp.c b/drivers/misc/mei/hdcp/mei_hdcp.c
index c681f6fab342..93027fd96c71 100644
--- a/drivers/misc/mei/hdcp/mei_hdcp.c
+++ b/drivers/misc/mei/hdcp/mei_hdcp.c
@@ -27,18 +27,6 @@
 
 #include "mei_hdcp.h"
 
-static inline u8 mei_get_ddi_index(enum port port)
-{
-	switch (port) {
-	case PORT_A:
-		return MEI_DDI_A;
-	case PORT_B ... PORT_F:
-		return (u8)port;
-	default:
-		return MEI_DDI_INVALID_PORT;
-	}
-}
-
 /**
  * mei_hdcp_initiate_session() - Initiate a Wired HDCP2.2 Tx Session in ME FW
  * @dev: device corresponding to the mei_cl_device
@@ -69,7 +57,8 @@ mei_hdcp_initiate_session(struct device *dev, struct hdcp_port_data *data,
 				WIRED_CMD_BUF_LEN_INITIATE_HDCP2_SESSION_IN;
 
 	session_init_in.port.integrated_port_type = data->port_type;
-	session_init_in.port.physical_port = mei_get_ddi_index(data->port);
+	session_init_in.port.physical_port = (u8)data->fw_ddi;
+	session_init_in.port.attached_transcoder = (u8)data->fw_tc;
 	session_init_in.protocol = data->protocol;
 
 	byte = mei_cldev_send(cldev, (u8 *)&session_init_in,
@@ -138,7 +127,8 @@ mei_hdcp_verify_receiver_cert_prepare_km(struct device *dev,
 				WIRED_CMD_BUF_LEN_VERIFY_RECEIVER_CERT_IN;
 
 	verify_rxcert_in.port.integrated_port_type = data->port_type;
-	verify_rxcert_in.port.physical_port = mei_get_ddi_index(data->port);
+	verify_rxcert_in.port.physical_port = (u8)data->fw_ddi;
+	verify_rxcert_in.port.attached_transcoder = (u8)data->fw_tc;
 
 	verify_rxcert_in.cert_rx = rx_cert->cert_rx;
 	memcpy(verify_rxcert_in.r_rx, &rx_cert->r_rx, HDCP_2_2_RRX_LEN);
@@ -208,7 +198,8 @@ mei_hdcp_verify_hprime(struct device *dev, struct hdcp_port_data *data,
 	send_hprime_in.header.buffer_len = WIRED_CMD_BUF_LEN_AKE_SEND_HPRIME_IN;
 
 	send_hprime_in.port.integrated_port_type = data->port_type;
-	send_hprime_in.port.physical_port = mei_get_ddi_index(data->port);
+	send_hprime_in.port.physical_port = (u8)data->fw_ddi;
+	send_hprime_in.port.attached_transcoder = (u8)data->fw_tc;
 
 	memcpy(send_hprime_in.h_prime, rx_hprime->h_prime,
 	       HDCP_2_2_H_PRIME_LEN);
@@ -265,7 +256,8 @@ mei_hdcp_store_pairing_info(struct device *dev, struct hdcp_port_data *data,
 					WIRED_CMD_BUF_LEN_SEND_PAIRING_INFO_IN;
 
 	pairing_info_in.port.integrated_port_type = data->port_type;
-	pairing_info_in.port.physical_port = mei_get_ddi_index(data->port);
+	pairing_info_in.port.physical_port = (u8)data->fw_ddi;
+	pairing_info_in.port.attached_transcoder = (u8)data->fw_tc;
 
 	memcpy(pairing_info_in.e_kh_km, pairing_info->e_kh_km,
 	       HDCP_2_2_E_KH_KM_LEN);
@@ -323,7 +315,8 @@ mei_hdcp_initiate_locality_check(struct device *dev,
 	lc_init_in.header.buffer_len = WIRED_CMD_BUF_LEN_INIT_LOCALITY_CHECK_IN;
 
 	lc_init_in.port.integrated_port_type = data->port_type;
-	lc_init_in.port.physical_port = mei_get_ddi_index(data->port);
+	lc_init_in.port.physical_port = (u8)data->fw_ddi;
+	lc_init_in.port.attached_transcoder = (u8)data->fw_tc;
 
 	byte = mei_cldev_send(cldev, (u8 *)&lc_init_in, sizeof(lc_init_in));
 	if (byte < 0) {
@@ -378,7 +371,8 @@ mei_hdcp_verify_lprime(struct device *dev, struct hdcp_port_data *data,
 					WIRED_CMD_BUF_LEN_VALIDATE_LOCALITY_IN;
 
 	verify_lprime_in.port.integrated_port_type = data->port_type;
-	verify_lprime_in.port.physical_port = mei_get_ddi_index(data->port);
+	verify_lprime_in.port.physical_port = (u8)data->fw_ddi;
+	verify_lprime_in.port.attached_transcoder = (u8)data->fw_tc;
 
 	memcpy(verify_lprime_in.l_prime, rx_lprime->l_prime,
 	       HDCP_2_2_L_PRIME_LEN);
@@ -435,7 +429,8 @@ static int mei_hdcp_get_session_key(struct device *dev,
 	get_skey_in.header.buffer_len = WIRED_CMD_BUF_LEN_GET_SESSION_KEY_IN;
 
 	get_skey_in.port.integrated_port_type = data->port_type;
-	get_skey_in.port.physical_port = mei_get_ddi_index(data->port);
+	get_skey_in.port.physical_port = (u8)data->fw_ddi;
+	get_skey_in.port.attached_transcoder = (u8)data->fw_tc;
 
 	byte = mei_cldev_send(cldev, (u8 *)&get_skey_in, sizeof(get_skey_in));
 	if (byte < 0) {
@@ -499,7 +494,8 @@ mei_hdcp_repeater_check_flow_prepare_ack(struct device *dev,
 					WIRED_CMD_BUF_LEN_VERIFY_REPEATER_IN;
 
 	verify_repeater_in.port.integrated_port_type = data->port_type;
-	verify_repeater_in.port.physical_port = mei_get_ddi_index(data->port);
+	verify_repeater_in.port.physical_port = (u8)data->fw_ddi;
+	verify_repeater_in.port.attached_transcoder = (u8)data->fw_tc;
 
 	memcpy(verify_repeater_in.rx_info, rep_topology->rx_info,
 	       HDCP_2_2_RXINFO_LEN);
@@ -569,7 +565,8 @@ static int mei_hdcp_verify_mprime(struct device *dev,
 			WIRED_CMD_BUF_LEN_REPEATER_AUTH_STREAM_REQ_MIN_IN;
 
 	verify_mprime_in.port.integrated_port_type = data->port_type;
-	verify_mprime_in.port.physical_port = mei_get_ddi_index(data->port);
+	verify_mprime_in.port.physical_port = (u8)data->fw_ddi;
+	verify_mprime_in.port.attached_transcoder = (u8)data->fw_tc;
 
 	memcpy(verify_mprime_in.m_prime, stream_ready->m_prime,
 	       HDCP_2_2_MPRIME_LEN);
@@ -630,7 +627,8 @@ static int mei_hdcp_enable_authentication(struct device *dev,
 	enable_auth_in.header.buffer_len = WIRED_CMD_BUF_LEN_ENABLE_AUTH_IN;
 
 	enable_auth_in.port.integrated_port_type = data->port_type;
-	enable_auth_in.port.physical_port = mei_get_ddi_index(data->port);
+	enable_auth_in.port.physical_port = (u8)data->fw_ddi;
+	enable_auth_in.port.attached_transcoder = (u8)data->fw_tc;
 	enable_auth_in.stream_type = data->streams[0].stream_type;
 
 	byte = mei_cldev_send(cldev, (u8 *)&enable_auth_in,
@@ -684,7 +682,8 @@ mei_hdcp_close_session(struct device *dev, struct hdcp_port_data *data)
 				WIRED_CMD_BUF_LEN_CLOSE_SESSION_IN;
 
 	session_close_in.port.integrated_port_type = data->port_type;
-	session_close_in.port.physical_port = mei_get_ddi_index(data->port);
+	session_close_in.port.physical_port = (u8)data->fw_ddi;
+	session_close_in.port.attached_transcoder = (u8)data->fw_tc;
 
 	byte = mei_cldev_send(cldev, (u8 *)&session_close_in,
 			      sizeof(session_close_in));
diff --git a/drivers/misc/mei/hdcp/mei_hdcp.h b/drivers/misc/mei/hdcp/mei_hdcp.h
index e4b1cd54c853..18ffc773fa18 100644
--- a/drivers/misc/mei/hdcp/mei_hdcp.h
+++ b/drivers/misc/mei/hdcp/mei_hdcp.h
@@ -184,8 +184,11 @@ struct hdcp_cmd_no_data {
 /* Uniquely identifies the hdcp port being addressed for a given command. */
 struct hdcp_port_id {
 	u8	integrated_port_type;
+	/* physical_port is used until Gen11.5. Must be zero for Gen11.5+ */
 	u8	physical_port;
-	u16	reserved;
+	/* attached_transcoder is for Gen11.5+. Set to zero for <Gen11.5 */
+	u8	attached_transcoder;
+	u8	reserved;
 } __packed;
 
 /*
@@ -362,16 +365,4 @@ struct wired_cmd_repeater_auth_stream_req_out {
 	struct hdcp_cmd_header	header;
 	struct hdcp_port_id	port;
 } __packed;
-
-enum mei_fw_ddi {
-	MEI_DDI_INVALID_PORT = 0x0,
-
-	MEI_DDI_B = 1,
-	MEI_DDI_C,
-	MEI_DDI_D,
-	MEI_DDI_E,
-	MEI_DDI_F,
-	MEI_DDI_A = 7,
-	MEI_DDI_RANGE_END = MEI_DDI_A,
-};
 #endif /* __MEI_HDCP_H__ */
diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h
index c09f8bb49495..7cd67fb2365d 100644
--- a/drivers/misc/mei/hw-me-regs.h
+++ b/drivers/misc/mei/hw-me-regs.h
@@ -81,6 +81,7 @@
 
 #define MEI_DEV_ID_CMP_LP     0x02e0  /* Comet Point LP */
 #define MEI_DEV_ID_CMP_LP_3   0x02e4  /* Comet Point LP 3 (iTouch) */
+#define MEI_DEV_ID_CMP_V      0xA3BA  /* Comet Point Lake V */
 
 #define MEI_DEV_ID_ICP_LP     0x34E0  /* Ice Lake Point LP */
 
@@ -162,7 +163,8 @@ access to ME_CBD */
 #define ME_IS_HRA         0x00000002
 /* ME Interrupt Enable HRA - host read only access to ME_IE */
 #define ME_IE_HRA         0x00000001
-
+/* TRC control shadow register */
+#define ME_TRC            0x00000030
 
 /* H_HPG_CSR register bits */
 #define H_HPG_CSR_PGIHEXR 0x00000001
diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c
index c4f6991d3028..668418d7ea77 100644
--- a/drivers/misc/mei/hw-me.c
+++ b/drivers/misc/mei/hw-me.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * Copyright (c) 2003-2018, Intel Corporation. All rights reserved.
+ * Copyright (c) 2003-2019, Intel Corporation. All rights reserved.
  * Intel Management Engine Interface (Intel MEI) Linux driver
  */
 
@@ -173,6 +173,27 @@ static inline void mei_me_d0i3c_write(struct mei_device *dev, u32 reg)
 }
 
 /**
+ * mei_me_trc_status - read trc status register
+ *
+ * @dev: mei device
+ * @trc: trc status register value
+ *
+ * Return: 0 on success, error otherwise
+ */
+static int mei_me_trc_status(struct mei_device *dev, u32 *trc)
+{
+	struct mei_me_hw *hw = to_me_hw(dev);
+
+	if (!hw->cfg->hw_trc_supported)
+		return -EOPNOTSUPP;
+
+	*trc = mei_me_reg_read(hw, ME_TRC);
+	trace_mei_reg_read(dev->dev, "ME_TRC", ME_TRC, *trc);
+
+	return 0;
+}
+
+/**
  * mei_me_fw_status - read fw status register from pci config space
  *
  * @dev: mei device
@@ -183,20 +204,19 @@ static inline void mei_me_d0i3c_write(struct mei_device *dev, u32 reg)
 static int mei_me_fw_status(struct mei_device *dev,
 			    struct mei_fw_status *fw_status)
 {
-	struct pci_dev *pdev = to_pci_dev(dev->dev);
 	struct mei_me_hw *hw = to_me_hw(dev);
 	const struct mei_fw_status *fw_src = &hw->cfg->fw_status;
 	int ret;
 	int i;
 
-	if (!fw_status)
+	if (!fw_status || !hw->read_fws)
 		return -EINVAL;
 
 	fw_status->count = fw_src->count;
 	for (i = 0; i < fw_src->count && i < MEI_FW_STATUS_MAX; i++) {
-		ret = pci_read_config_dword(pdev, fw_src->status[i],
-					    &fw_status->status[i]);
-		trace_mei_pci_cfg_read(dev->dev, "PCI_CFG_HSF_X",
+		ret = hw->read_fws(dev, fw_src->status[i],
+				   &fw_status->status[i]);
+		trace_mei_pci_cfg_read(dev->dev, "PCI_CFG_HFS_X",
 				       fw_src->status[i],
 				       fw_status->status[i]);
 		if (ret)
@@ -210,19 +230,26 @@ static int mei_me_fw_status(struct mei_device *dev,
  * mei_me_hw_config - configure hw dependent settings
  *
  * @dev: mei device
+ *
+ * Return:
+ *  * -EINVAL when read_fws is not set
+ *  * 0 on success
+ *
  */
-static void mei_me_hw_config(struct mei_device *dev)
+static int mei_me_hw_config(struct mei_device *dev)
 {
-	struct pci_dev *pdev = to_pci_dev(dev->dev);
 	struct mei_me_hw *hw = to_me_hw(dev);
 	u32 hcsr, reg;
 
+	if (WARN_ON(!hw->read_fws))
+		return -EINVAL;
+
 	/* Doesn't change in runtime */
 	hcsr = mei_hcsr_read(dev);
 	hw->hbuf_depth = (hcsr & H_CBD) >> 24;
 
 	reg = 0;
-	pci_read_config_dword(pdev, PCI_CFG_HFS_1, &reg);
+	hw->read_fws(dev, PCI_CFG_HFS_1, &reg);
 	trace_mei_pci_cfg_read(dev->dev, "PCI_CFG_HFS_1", PCI_CFG_HFS_1, reg);
 	hw->d0i3_supported =
 		((reg & PCI_CFG_HFS_1_D0I3_MSK) == PCI_CFG_HFS_1_D0I3_MSK);
@@ -233,6 +260,8 @@ static void mei_me_hw_config(struct mei_device *dev)
 		if (reg & H_D0I3C_I3)
 			hw->pg_state = MEI_PG_ON;
 	}
+
+	return 0;
 }
 
 /**
@@ -269,7 +298,7 @@ static inline void me_intr_disable(struct mei_device *dev, u32 hcsr)
 }
 
 /**
- * mei_me_intr_clear - clear and stop interrupts
+ * me_intr_clear - clear and stop interrupts
  *
  * @dev: the device structure
  * @hcsr: supplied hcsr register value
@@ -323,9 +352,9 @@ static void mei_me_intr_disable(struct mei_device *dev)
  */
 static void mei_me_synchronize_irq(struct mei_device *dev)
 {
-	struct pci_dev *pdev = to_pci_dev(dev->dev);
+	struct mei_me_hw *hw = to_me_hw(dev);
 
-	synchronize_irq(pdev->irq);
+	synchronize_irq(hw->irq);
 }
 
 /**
@@ -1294,6 +1323,7 @@ end:
 
 static const struct mei_hw_ops mei_me_hw_ops = {
 
+	.trc_status = mei_me_trc_status,
 	.fw_status = mei_me_fw_status,
 	.pg_state  = mei_me_pg_state,
 
@@ -1384,6 +1414,9 @@ static bool mei_me_fw_type_sps(struct pci_dev *pdev)
 	.dma_size[DMA_DSCR_DEVICE] = SZ_128K, \
 	.dma_size[DMA_DSCR_CTRL] = PAGE_SIZE
 
+#define MEI_CFG_TRC \
+	.hw_trc_supported = 1
+
 /* ICH Legacy devices */
 static const struct mei_cfg mei_me_ich_cfg = {
 	MEI_CFG_ICH_HFS,
@@ -1432,6 +1465,14 @@ static const struct mei_cfg mei_me_pch12_cfg = {
 	MEI_CFG_DMA_128,
 };
 
+/* Tiger Lake and newer devices */
+static const struct mei_cfg mei_me_pch15_cfg = {
+	MEI_CFG_PCH8_HFS,
+	MEI_CFG_FW_VER_SUPP,
+	MEI_CFG_DMA_128,
+	MEI_CFG_TRC,
+};
+
 /*
  * mei_cfg_list - A list of platform platform specific configurations.
  * Note: has to be synchronized with  enum mei_cfg_idx.
@@ -1446,6 +1487,7 @@ static const struct mei_cfg *const mei_cfg_list[] = {
 	[MEI_ME_PCH8_CFG] = &mei_me_pch8_cfg,
 	[MEI_ME_PCH8_SPS_CFG] = &mei_me_pch8_sps_cfg,
 	[MEI_ME_PCH12_CFG] = &mei_me_pch12_cfg,
+	[MEI_ME_PCH15_CFG] = &mei_me_pch15_cfg,
 };
 
 const struct mei_cfg *mei_me_get_cfg(kernel_ulong_t idx)
@@ -1461,19 +1503,19 @@ const struct mei_cfg *mei_me_get_cfg(kernel_ulong_t idx)
 /**
  * mei_me_dev_init - allocates and initializes the mei device structure
  *
- * @pdev: The pci device structure
+ * @parent: device associated with physical device (pci/platform)
  * @cfg: per device generation config
  *
  * Return: The mei_device pointer on success, NULL on failure.
  */
-struct mei_device *mei_me_dev_init(struct pci_dev *pdev,
+struct mei_device *mei_me_dev_init(struct device *parent,
 				   const struct mei_cfg *cfg)
 {
 	struct mei_device *dev;
 	struct mei_me_hw *hw;
 	int i;
 
-	dev = devm_kzalloc(&pdev->dev, sizeof(struct mei_device) +
+	dev = devm_kzalloc(parent, sizeof(struct mei_device) +
 			   sizeof(struct mei_me_hw), GFP_KERNEL);
 	if (!dev)
 		return NULL;
@@ -1483,7 +1525,7 @@ struct mei_device *mei_me_dev_init(struct pci_dev *pdev,
 	for (i = 0; i < DMA_DSCR_NUM; i++)
 		dev->dr_dscr[i].size = cfg->dma_size[i];
 
-	mei_device_init(dev, &pdev->dev, &mei_me_hw_ops);
+	mei_device_init(dev, parent, &mei_me_hw_ops);
 	hw->cfg = cfg;
 
 	dev->fw_f_fw_ver_supported = cfg->fw_ver_supported;
diff --git a/drivers/misc/mei/hw-me.h b/drivers/misc/mei/hw-me.h
index 1d8794828cbc..4a8d4dcd5a91 100644
--- a/drivers/misc/mei/hw-me.h
+++ b/drivers/misc/mei/hw-me.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
- * Copyright (c) 2012-2018, Intel Corporation. All rights reserved.
+ * Copyright (c) 2012-2019, Intel Corporation. All rights reserved.
  * Intel Management Engine Interface (Intel MEI) Linux driver
  */
 
@@ -21,12 +21,14 @@
  * @quirk_probe: device exclusion quirk
  * @dma_size: device DMA buffers size
  * @fw_ver_supported: is fw version retrievable from FW
+ * @hw_trc_supported: does the hw support trc register
  */
 struct mei_cfg {
 	const struct mei_fw_status fw_status;
 	bool (*quirk_probe)(struct pci_dev *pdev);
 	size_t dma_size[DMA_DSCR_NUM];
 	u32 fw_ver_supported:1;
+	u32 hw_trc_supported:1;
 };
 
 
@@ -42,16 +44,20 @@ struct mei_cfg {
  *
  * @cfg: per device generation config and ops
  * @mem_addr: io memory address
+ * @irq: irq number
  * @pg_state: power gating state
  * @d0i3_supported: di03 support
  * @hbuf_depth: depth of hardware host/write buffer in slots
+ * @read_fws: read FW status register handler
  */
 struct mei_me_hw {
 	const struct mei_cfg *cfg;
 	void __iomem *mem_addr;
+	int irq;
 	enum mei_pg_state pg_state;
 	bool d0i3_supported;
 	u8 hbuf_depth;
+	int (*read_fws)(const struct mei_device *dev, int where, u32 *val);
 };
 
 #define to_me_hw(dev) (struct mei_me_hw *)((dev)->hw)
@@ -74,6 +80,7 @@ struct mei_me_hw {
  *                         servers platforms with quirk for
  *                         SPS firmware exclusion.
  * @MEI_ME_PCH12_CFG:      Platform Controller Hub Gen12 and newer
+ * @MEI_ME_PCH15_CFG:      Platform Controller Hub Gen15 and newer
  * @MEI_ME_NUM_CFG:        Upper Sentinel.
  */
 enum mei_cfg_idx {
@@ -86,12 +93,13 @@ enum mei_cfg_idx {
 	MEI_ME_PCH8_CFG,
 	MEI_ME_PCH8_SPS_CFG,
 	MEI_ME_PCH12_CFG,
+	MEI_ME_PCH15_CFG,
 	MEI_ME_NUM_CFG,
 };
 
 const struct mei_cfg *mei_me_get_cfg(kernel_ulong_t idx);
 
-struct mei_device *mei_me_dev_init(struct pci_dev *pdev,
+struct mei_device *mei_me_dev_init(struct device *parent,
 				   const struct mei_cfg *cfg);
 
 int mei_me_pg_enter_sync(struct mei_device *dev);
diff --git a/drivers/misc/mei/hw-txe.c b/drivers/misc/mei/hw-txe.c
index 5e58656b8e19..785b260b3ae9 100644
--- a/drivers/misc/mei/hw-txe.c
+++ b/drivers/misc/mei/hw-txe.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * Copyright (c) 2013-2014, Intel Corporation. All rights reserved.
+ * Copyright (c) 2013-2019, Intel Corporation. All rights reserved.
  * Intel Management Engine Interface (Intel MEI) Linux driver
  */
 
@@ -660,14 +660,16 @@ static int mei_txe_fw_status(struct mei_device *dev,
 }
 
 /**
- *  mei_txe_hw_config - configure hardware at the start of the devices
+ * mei_txe_hw_config - configure hardware at the start of the devices
  *
  * @dev: the device structure
  *
  * Configure hardware at the start of the device should be done only
  *   once at the device probe time
+ *
+ * Return: always 0
  */
-static void mei_txe_hw_config(struct mei_device *dev)
+static int mei_txe_hw_config(struct mei_device *dev)
 {
 
 	struct mei_txe_hw *hw = to_txe_hw(dev);
@@ -677,6 +679,8 @@ static void mei_txe_hw_config(struct mei_device *dev)
 
 	dev_dbg(dev->dev, "aliveness_resp = 0x%08x, readiness = 0x%08x.\n",
 		hw->aliveness, hw->readiness);
+
+	return 0;
 }
 
 /**
diff --git a/drivers/misc/mei/init.c b/drivers/misc/mei/init.c
index b9fef773e71b..bcee77768b91 100644
--- a/drivers/misc/mei/init.c
+++ b/drivers/misc/mei/init.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * Copyright (c) 2012-2018, Intel Corporation. All rights reserved.
+ * Copyright (c) 2012-2019, Intel Corporation. All rights reserved.
  * Intel Management Engine Interface (Intel MEI) Linux driver
  */
 
@@ -190,7 +190,9 @@ int mei_start(struct mei_device *dev)
 	/* acknowledge interrupt and stop interrupts */
 	mei_clear_interrupts(dev);
 
-	mei_hw_config(dev);
+	ret = mei_hw_config(dev);
+	if (ret)
+		goto err;
 
 	dev_dbg(dev->dev, "reset in start the mei device.\n");
 
diff --git a/drivers/misc/mei/main.c b/drivers/misc/mei/main.c
index 7310b476323c..f17297f2943d 100644
--- a/drivers/misc/mei/main.c
+++ b/drivers/misc/mei/main.c
@@ -533,24 +533,6 @@ out:
 }
 
 /**
- * mei_compat_ioctl - the compat IOCTL function
- *
- * @file: pointer to file structure
- * @cmd: ioctl command
- * @data: pointer to mei message structure
- *
- * Return: 0 on success , <0 on error
- */
-#ifdef CONFIG_COMPAT
-static long mei_compat_ioctl(struct file *file,
-			unsigned int cmd, unsigned long data)
-{
-	return mei_ioctl(file, cmd, (unsigned long)compat_ptr(data));
-}
-#endif
-
-
-/**
  * mei_poll - the poll function
  *
  * @file: pointer to file structure
@@ -701,6 +683,29 @@ static int mei_fasync(int fd, struct file *file, int band)
 }
 
 /**
+ * trc_show - mei device trc attribute show method
+ *
+ * @device: device pointer
+ * @attr: attribute pointer
+ * @buf:  char out buffer
+ *
+ * Return: number of the bytes printed into buf or error
+ */
+static ssize_t trc_show(struct device *device,
+			struct device_attribute *attr, char *buf)
+{
+	struct mei_device *dev = dev_get_drvdata(device);
+	u32 trc;
+	int ret;
+
+	ret = mei_trc_status(dev, &trc);
+	if (ret)
+		return ret;
+	return sprintf(buf, "%08X\n", trc);
+}
+static DEVICE_ATTR_RO(trc);
+
+/**
  * fw_status_show - mei device fw_status attribute show method
  *
  * @device: device pointer
@@ -887,6 +892,7 @@ static struct attribute *mei_attrs[] = {
 	&dev_attr_tx_queue_limit.attr,
 	&dev_attr_fw_ver.attr,
 	&dev_attr_dev_state.attr,
+	&dev_attr_trc.attr,
 	NULL
 };
 ATTRIBUTE_GROUPS(mei);
@@ -898,9 +904,7 @@ static const struct file_operations mei_fops = {
 	.owner = THIS_MODULE,
 	.read = mei_read,
 	.unlocked_ioctl = mei_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl = mei_compat_ioctl,
-#endif
+	.compat_ioctl = compat_ptr_ioctl,
 	.open = mei_open,
 	.release = mei_release,
 	.write = mei_write,
diff --git a/drivers/misc/mei/mei_dev.h b/drivers/misc/mei/mei_dev.h
index 0f2141178299..76f8ff5ff974 100644
--- a/drivers/misc/mei/mei_dev.h
+++ b/drivers/misc/mei/mei_dev.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
- * Copyright (c) 2003-2018, Intel Corporation. All rights reserved.
+ * Copyright (c) 2003-2019, Intel Corporation. All rights reserved.
  * Intel Management Engine Interface (Intel MEI) Linux driver
  */
 
@@ -260,6 +260,7 @@ struct mei_cl {
  * @hw_config        : configure hw
  *
  * @fw_status        : get fw status registers
+ * @trc_status       : get trc status register
  * @pg_state         : power gating state of the device
  * @pg_in_transition : is device now in pg transition
  * @pg_is_enabled    : is power gating enabled
@@ -287,9 +288,11 @@ struct mei_hw_ops {
 	bool (*hw_is_ready)(struct mei_device *dev);
 	int (*hw_reset)(struct mei_device *dev, bool enable);
 	int (*hw_start)(struct mei_device *dev);
-	void (*hw_config)(struct mei_device *dev);
+	int (*hw_config)(struct mei_device *dev);
 
 	int (*fw_status)(struct mei_device *dev, struct mei_fw_status *fw_sts);
+	int (*trc_status)(struct mei_device *dev, u32 *trc);
+
 	enum mei_pg_state (*pg_state)(struct mei_device *dev);
 	bool (*pg_in_transition)(struct mei_device *dev);
 	bool (*pg_is_enabled)(struct mei_device *dev);
@@ -614,9 +617,9 @@ void mei_irq_compl_handler(struct mei_device *dev, struct list_head *cmpl_list);
  */
 
 
-static inline void mei_hw_config(struct mei_device *dev)
+static inline int mei_hw_config(struct mei_device *dev)
 {
-	dev->ops->hw_config(dev);
+	return dev->ops->hw_config(dev);
 }
 
 static inline enum mei_pg_state mei_pg_state(struct mei_device *dev)
@@ -711,6 +714,13 @@ static inline int mei_count_full_read_slots(struct mei_device *dev)
 	return dev->ops->rdbuf_full_slots(dev);
 }
 
+static inline int mei_trc_status(struct mei_device *dev, u32 *trc)
+{
+	if (dev->ops->trc_status)
+		return dev->ops->trc_status(dev, trc);
+	return -EOPNOTSUPP;
+}
+
 static inline int mei_fw_status(struct mei_device *dev,
 				struct mei_fw_status *fw_status)
 {
diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c
index 3dca63eddaa0..c845b7e40f26 100644
--- a/drivers/misc/mei/pci-me.c
+++ b/drivers/misc/mei/pci-me.c
@@ -98,12 +98,13 @@ static const struct pci_device_id mei_me_pci_tbl[] = {
 
 	{MEI_PCI_DEVICE(MEI_DEV_ID_CMP_LP, MEI_ME_PCH12_CFG)},
 	{MEI_PCI_DEVICE(MEI_DEV_ID_CMP_LP_3, MEI_ME_PCH8_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_CMP_V, MEI_ME_PCH12_CFG)},
 
 	{MEI_PCI_DEVICE(MEI_DEV_ID_ICP_LP, MEI_ME_PCH12_CFG)},
 
-	{MEI_PCI_DEVICE(MEI_DEV_ID_TGP_LP, MEI_ME_PCH12_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_TGP_LP, MEI_ME_PCH15_CFG)},
 
-	{MEI_PCI_DEVICE(MEI_DEV_ID_MCC, MEI_ME_PCH12_CFG)},
+	{MEI_PCI_DEVICE(MEI_DEV_ID_MCC, MEI_ME_PCH15_CFG)},
 	{MEI_PCI_DEVICE(MEI_DEV_ID_MCC_4, MEI_ME_PCH8_CFG)},
 
 	/* required last entry */
@@ -120,6 +121,13 @@ static inline void mei_me_set_pm_domain(struct mei_device *dev) {}
 static inline void mei_me_unset_pm_domain(struct mei_device *dev) {}
 #endif /* CONFIG_PM */
 
+static int mei_me_read_fws(const struct mei_device *dev, int where, u32 *val)
+{
+	struct pci_dev *pdev = to_pci_dev(dev->dev);
+
+	return pci_read_config_dword(pdev, where, val);
+}
+
 /**
  * mei_me_quirk_probe - probe for devices that doesn't valid ME interface
  *
@@ -191,13 +199,15 @@ static int mei_me_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	}
 
 	/* allocates and initializes the mei dev structure */
-	dev = mei_me_dev_init(pdev, cfg);
+	dev = mei_me_dev_init(&pdev->dev, cfg);
 	if (!dev) {
 		err = -ENOMEM;
 		goto end;
 	}
 	hw = to_me_hw(dev);
 	hw->mem_addr = pcim_iomap_table(pdev)[0];
+	hw->irq = pdev->irq;
+	hw->read_fws = mei_me_read_fws;
 
 	pci_enable_msi(pdev);
 
diff --git a/drivers/misc/mic/Kconfig b/drivers/misc/mic/Kconfig
index 948f45bbf135..b6841ba6d922 100644
--- a/drivers/misc/mic/Kconfig
+++ b/drivers/misc/mic/Kconfig
@@ -1,8 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 menu "Intel MIC & related support"
 
-comment "Intel MIC Bus Driver"
-
 config INTEL_MIC_BUS
 	tristate "Intel MIC Bus Driver"
 	depends on 64BIT && PCI && X86
@@ -18,8 +16,6 @@ config INTEL_MIC_BUS
 	  OS and tools for MIC to use with this driver are available from
 	  <http://software.intel.com/en-us/mic-developer>.
 
-comment "SCIF Bus Driver"
-
 config SCIF_BUS
 	tristate "SCIF Bus Driver"
 	depends on 64BIT && PCI && X86
@@ -35,8 +31,6 @@ config SCIF_BUS
 	  OS and tools for MIC to use with this driver are available from
 	  <http://software.intel.com/en-us/mic-developer>.
 
-comment "VOP Bus Driver"
-
 config VOP_BUS
 	tristate "VOP Bus Driver"
 	help
@@ -51,8 +45,6 @@ config VOP_BUS
 	  OS and tools for MIC to use with this driver are available from
 	  <http://software.intel.com/en-us/mic-developer>.
 
-comment "Intel MIC Host Driver"
-
 config INTEL_MIC_HOST
 	tristate "Intel MIC Host Driver"
 	depends on 64BIT && PCI && X86
@@ -71,8 +63,6 @@ config INTEL_MIC_HOST
 	  OS and tools for MIC to use with this driver are available from
 	  <http://software.intel.com/en-us/mic-developer>.
 
-comment "Intel MIC Card Driver"
-
 config INTEL_MIC_CARD
 	tristate "Intel MIC Card Driver"
 	depends on 64BIT && X86
@@ -90,8 +80,6 @@ config INTEL_MIC_CARD
 	  For more information see
 	  <http://software.intel.com/en-us/mic-developer>.
 
-comment "SCIF Driver"
-
 config SCIF
 	tristate "SCIF Driver"
 	depends on 64BIT && PCI && X86 && SCIF_BUS && IOMMU_SUPPORT
@@ -110,8 +98,6 @@ config SCIF
 	  OS and tools for MIC to use with this driver are available from
 	  <http://software.intel.com/en-us/mic-developer>.
 
-comment "Intel MIC Coprocessor State Management (COSM) Drivers"
-
 config MIC_COSM
 	tristate "Intel MIC Coprocessor State Management (COSM) Drivers"
 	depends on 64BIT && PCI && X86 && SCIF
@@ -128,8 +114,6 @@ config MIC_COSM
 	  OS and tools for MIC to use with this driver are available from
 	  <http://software.intel.com/en-us/mic-developer>.
 
-comment "VOP Driver"
-
 config VOP
 	tristate "VOP Driver"
 	depends on VOP_BUS
diff --git a/drivers/misc/ocxl/ocxl_internal.h b/drivers/misc/ocxl/ocxl_internal.h
index 97415afd79f3..345bf843a38e 100644
--- a/drivers/misc/ocxl/ocxl_internal.h
+++ b/drivers/misc/ocxl/ocxl_internal.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0+
+/* SPDX-License-Identifier: GPL-2.0+ */
 // Copyright 2017 IBM Corp.
 #ifndef _OCXL_INTERNAL_H_
 #define _OCXL_INTERNAL_H_
diff --git a/drivers/misc/ocxl/trace.h b/drivers/misc/ocxl/trace.h
index 024f417e7e01..17e21cb2addd 100644
--- a/drivers/misc/ocxl/trace.h
+++ b/drivers/misc/ocxl/trace.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0+
+/* SPDX-License-Identifier: GPL-2.0+ */
 // Copyright 2017 IBM Corp.
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM ocxl
diff --git a/drivers/misc/sgi-gru/gruprocfs.c b/drivers/misc/sgi-gru/gruprocfs.c
index 3a8d76d1ccae..2817f4751306 100644
--- a/drivers/misc/sgi-gru/gruprocfs.c
+++ b/drivers/misc/sgi-gru/gruprocfs.c
@@ -119,7 +119,7 @@ static int mcs_statistics_show(struct seq_file *s, void *p)
 		"cch_interrupt_sync", "cch_deallocate", "tfh_write_only",
 		"tfh_write_restart", "tgh_invalidate"};
 
-	seq_printf(s, "%-20s%12s%12s%12s\n", "#id", "count", "aver-clks", "max-clks");
+	seq_puts(s, "#id                        count   aver-clks    max-clks\n");
 	for (op = 0; op < mcsop_last; op++) {
 		count = atomic_long_read(&mcs_op_statistics[op].count);
 		total = atomic_long_read(&mcs_op_statistics[op].total);
@@ -165,8 +165,7 @@ static int cch_seq_show(struct seq_file *file, void *data)
 	const char *mode[] = { "??", "UPM", "INTR", "OS_POLL" };
 
 	if (gid == 0)
-		seq_printf(file, "#%5s%5s%6s%7s%9s%6s%8s%8s\n", "gid", "bid",
-			   "ctx#", "asid", "pid", "cbrs", "dsbytes", "mode");
+		seq_puts(file, "#  gid  bid  ctx#   asid      pid  cbrs dsbytes    mode\n");
 	if (gru)
 		for (i = 0; i < GRU_NUM_CCH; i++) {
 			ts = gru->gs_gts[i];
@@ -191,10 +190,8 @@ static int gru_seq_show(struct seq_file *file, void *data)
 	struct gru_state *gru = GID_TO_GRU(gid);
 
 	if (gid == 0) {
-		seq_printf(file, "#%5s%5s%7s%6s%6s%8s%6s%6s\n", "gid", "nid",
-			   "ctx", "cbr", "dsr", "ctx", "cbr", "dsr");
-		seq_printf(file, "#%5s%5s%7s%6s%6s%8s%6s%6s\n", "", "", "busy",
-			   "busy", "busy", "free", "free", "free");
+		seq_puts(file, "#  gid  nid    ctx   cbr   dsr     ctx   cbr   dsr\n");
+		seq_puts(file, "#             busy  busy  busy    free  free  free\n");
 	}
 	if (gru) {
 		ctxfree = GRU_NUM_CCH - gru->gs_active_contexts;
diff --git a/drivers/misc/sram.c b/drivers/misc/sram.c
index f30448bf3a63..6c1a23cb3e8c 100644
--- a/drivers/misc/sram.c
+++ b/drivers/misc/sram.c
@@ -340,8 +340,6 @@ static const struct of_device_id sram_dt_ids[] = {
 static int sram_probe(struct platform_device *pdev)
 {
 	struct sram_dev *sram;
-	struct resource *res;
-	size_t size;
 	int ret;
 	int (*init_func)(void);
 
@@ -351,25 +349,14 @@ static int sram_probe(struct platform_device *pdev)
 
 	sram->dev = &pdev->dev;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res) {
-		dev_err(sram->dev, "found no memory resource\n");
-		return -EINVAL;
-	}
-
-	size = resource_size(res);
-
-	if (!devm_request_mem_region(sram->dev, res->start, size, pdev->name)) {
-		dev_err(sram->dev, "could not request region for resource\n");
-		return -EBUSY;
-	}
-
 	if (of_property_read_bool(pdev->dev.of_node, "no-memory-wc"))
-		sram->virt_base = devm_ioremap(sram->dev, res->start, size);
+		sram->virt_base = devm_platform_ioremap_resource(pdev, 0);
 	else
-		sram->virt_base = devm_ioremap_wc(sram->dev, res->start, size);
-	if (!sram->virt_base)
-		return -ENOMEM;
+		sram->virt_base = devm_platform_ioremap_resource_wc(pdev, 0);
+	if (IS_ERR(sram->virt_base)) {
+		dev_err(&pdev->dev, "could not map SRAM registers\n");
+		return PTR_ERR(sram->virt_base);
+	}
 
 	sram->pool = devm_gen_pool_create(sram->dev, ilog2(SRAM_GRANULARITY),
 					  NUMA_NO_NODE, NULL);
@@ -382,7 +369,8 @@ static int sram_probe(struct platform_device *pdev)
 	else
 		clk_prepare_enable(sram->clk);
 
-	ret = sram_reserve_regions(sram, res);
+	ret = sram_reserve_regions(sram,
+			platform_get_resource(pdev, IORESOURCE_MEM, 0));
 	if (ret)
 		goto err_disable_clk;
 
diff --git a/drivers/misc/ti-st/st_core.c b/drivers/misc/ti-st/st_core.c
index 7d9e23aa0b92..2ae9948a91e1 100644
--- a/drivers/misc/ti-st/st_core.c
+++ b/drivers/misc/ti-st/st_core.c
@@ -708,7 +708,6 @@ EXPORT_SYMBOL_GPL(st_unregister);
  */
 static int st_tty_open(struct tty_struct *tty)
 {
-	int err = 0;
 	struct st_data_s *st_gdata;
 	pr_info("%s ", __func__);
 
@@ -731,7 +730,8 @@ static int st_tty_open(struct tty_struct *tty)
 	 */
 	st_kim_complete(st_gdata->kim_data);
 	pr_debug("done %s", __func__);
-	return err;
+
+	return 0;
 }
 
 static void st_tty_close(struct tty_struct *tty)
diff --git a/drivers/misc/vmw_vmci/vmci_driver.c b/drivers/misc/vmw_vmci/vmci_driver.c
index 819e35995d32..cbb706dabede 100644
--- a/drivers/misc/vmw_vmci/vmci_driver.c
+++ b/drivers/misc/vmw_vmci/vmci_driver.c
@@ -28,6 +28,10 @@ MODULE_PARM_DESC(disable_guest,
 static bool vmci_guest_personality_initialized;
 static bool vmci_host_personality_initialized;
 
+static DEFINE_MUTEX(vmci_vsock_mutex); /* protects vmci_vsock_transport_cb */
+static vmci_vsock_cb vmci_vsock_transport_cb;
+static bool vmci_vsock_cb_host_called;
+
 /*
  * vmci_get_context_id() - Gets the current context ID.
  *
@@ -45,6 +49,69 @@ u32 vmci_get_context_id(void)
 }
 EXPORT_SYMBOL_GPL(vmci_get_context_id);
 
+/*
+ * vmci_register_vsock_callback() - Register the VSOCK vmci_transport callback.
+ *
+ * The callback will be called when the first host or guest becomes active,
+ * or if they are already active when this function is called.
+ * To unregister the callback, call this function with NULL parameter.
+ *
+ * Returns 0 on success. -EBUSY if a callback is already registered.
+ */
+int vmci_register_vsock_callback(vmci_vsock_cb callback)
+{
+	int err = 0;
+
+	mutex_lock(&vmci_vsock_mutex);
+
+	if (vmci_vsock_transport_cb && callback) {
+		err = -EBUSY;
+		goto out;
+	}
+
+	vmci_vsock_transport_cb = callback;
+
+	if (!vmci_vsock_transport_cb) {
+		vmci_vsock_cb_host_called = false;
+		goto out;
+	}
+
+	if (vmci_guest_code_active())
+		vmci_vsock_transport_cb(false);
+
+	if (vmci_host_users() > 0) {
+		vmci_vsock_cb_host_called = true;
+		vmci_vsock_transport_cb(true);
+	}
+
+out:
+	mutex_unlock(&vmci_vsock_mutex);
+	return err;
+}
+EXPORT_SYMBOL_GPL(vmci_register_vsock_callback);
+
+void vmci_call_vsock_callback(bool is_host)
+{
+	mutex_lock(&vmci_vsock_mutex);
+
+	if (!vmci_vsock_transport_cb)
+		goto out;
+
+	/* In the host, this function could be called multiple times,
+	 * but we want to register it only once.
+	 */
+	if (is_host) {
+		if (vmci_vsock_cb_host_called)
+			goto out;
+
+		vmci_vsock_cb_host_called = true;
+	}
+
+	vmci_vsock_transport_cb(is_host);
+out:
+	mutex_unlock(&vmci_vsock_mutex);
+}
+
 static int __init vmci_drv_init(void)
 {
 	int vmci_err;
diff --git a/drivers/misc/vmw_vmci/vmci_driver.h b/drivers/misc/vmw_vmci/vmci_driver.h
index aab81b67670c..990682480bf6 100644
--- a/drivers/misc/vmw_vmci/vmci_driver.h
+++ b/drivers/misc/vmw_vmci/vmci_driver.h
@@ -36,10 +36,12 @@ extern struct pci_dev *vmci_pdev;
 
 u32 vmci_get_context_id(void);
 int vmci_send_datagram(struct vmci_datagram *dg);
+void vmci_call_vsock_callback(bool is_host);
 
 int vmci_host_init(void);
 void vmci_host_exit(void);
 bool vmci_host_code_active(void);
+int vmci_host_users(void);
 
 int vmci_guest_init(void);
 void vmci_guest_exit(void);
diff --git a/drivers/misc/vmw_vmci/vmci_guest.c b/drivers/misc/vmw_vmci/vmci_guest.c
index 7a84a48c75da..cc8eeb361fcd 100644
--- a/drivers/misc/vmw_vmci/vmci_guest.c
+++ b/drivers/misc/vmw_vmci/vmci_guest.c
@@ -637,6 +637,8 @@ static int vmci_guest_probe_device(struct pci_dev *pdev,
 		  vmci_dev->iobase + VMCI_CONTROL_ADDR);
 
 	pci_set_drvdata(pdev, vmci_dev);
+
+	vmci_call_vsock_callback(false);
 	return 0;
 
 err_free_irq:
diff --git a/drivers/misc/vmw_vmci/vmci_host.c b/drivers/misc/vmw_vmci/vmci_host.c
index 833e2bd248a5..ce16d6b99295 100644
--- a/drivers/misc/vmw_vmci/vmci_host.c
+++ b/drivers/misc/vmw_vmci/vmci_host.c
@@ -108,6 +108,11 @@ bool vmci_host_code_active(void)
 	     atomic_read(&vmci_host_active_users) > 0);
 }
 
+int vmci_host_users(void)
+{
+	return atomic_read(&vmci_host_active_users);
+}
+
 /*
  * Called on open of /dev/vmci.
  */
@@ -338,6 +343,8 @@ static int vmci_host_do_init_context(struct vmci_host_dev *vmci_host_dev,
 	vmci_host_dev->ct_type = VMCIOBJ_CONTEXT;
 	atomic_inc(&vmci_host_active_users);
 
+	vmci_call_vsock_callback(true);
+
 	retval = 0;
 
 out:
@@ -961,7 +968,7 @@ static const struct file_operations vmuser_fops = {
 	.release	= vmci_host_close,
 	.poll		= vmci_host_poll,
 	.unlocked_ioctl	= vmci_host_unlocked_ioctl,
-	.compat_ioctl	= vmci_host_unlocked_ioctl,
+	.compat_ioctl	= compat_ptr_ioctl,
 };
 
 static struct miscdevice vmci_host_miscdev = {