summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRamesh Errabolu <Ramesh.Errabolu@amd.com>2020-09-29 12:15:13 -0500
committerAlex Deucher <alexander.deucher@amd.com>2020-09-30 15:26:27 -0400
commitf2fa07b39fafb2a5f49c71a504862c5efa57d03e (patch)
tree8450b33c3f7b6eda43227491d455f7a5af015b4a
parent43a4bc828c5b156f08024fd0a966c5c2a3f09af1 (diff)
drm/amd/amdkfd: Surface files in Sysfs to allow users to get number ofamd-drm-next-5.10-2020-09-30drm-next-5.10
compute units that are in use. [Why] Allow user to know how many compute units (CU) are in use at any given moment. [How] Surface files in Sysfs that allow user to determine the number of compute units that are in use for a given process. One Sysfs file is used per device. Signed-off-by: Ramesh Errabolu <Ramesh.Errabolu@amd.com> Reviewed-By: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h25
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c71
2 files changed, 94 insertions, 2 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 56f92cfff591..b7be5c5751b7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -705,6 +705,31 @@ struct kfd_process_device {
struct kobject *kobj_stats;
unsigned int doorbell_index;
+
+ /*
+ * @cu_occupancy: Reports occupancy of Compute Units (CU) of a process
+ * that is associated with device encoded by "this" struct instance. The
+ * value reflects CU usage by all of the waves launched by this process
+ * on this device. A very important property of occupancy parameter is
+ * that its value is a snapshot of current use.
+ *
+ * Following is to be noted regarding how this parameter is reported:
+ *
+ * The number of waves that a CU can launch is limited by couple of
+ * parameters. These are encoded by struct amdgpu_cu_info instance
+ * that is part of every device definition. For GFX9 devices this
+ * translates to 40 waves (simd_per_cu * max_waves_per_simd) when waves
+ * do not use scratch memory and 32 waves (max_scratch_slots_per_cu)
+ * when they do use scratch memory. This could change for future
+ * devices and therefore this example should be considered as a guide.
+ *
+ * All CU's of a device are available for the process. This may not be true
+ * under certain conditions - e.g. CU masking.
+ *
+ * Finally number of CU's that are occupied by a process is affected by both
+ * number of CU's a device has along with number of other competing processes
+ */
+ struct attribute attr_cu_occupancy;
};
#define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 17d909c86f50..2807e1c4d59b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -249,6 +249,52 @@ cleanup:
}
}
+/**
+ * @kfd_get_cu_occupancy() - Collect number of waves in-flight on this device
+ * by current process. Translates acquired wave count into number of compute units
+ * that are occupied.
+ *
+ * @atr: Handle of attribute that allows reporting of wave count. The attribute
+ * handle encapsulates GPU device it is associated with, thereby allowing collection
+ * of waves in flight, etc
+ *
+ * @buffer: Handle of user provided buffer updated with wave count
+ *
+ * Return: Number of bytes written to user buffer or an error value
+ */
+static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer)
+{
+ int cu_cnt;
+ int wave_cnt;
+ int max_waves_per_cu;
+ struct kfd_dev *dev = NULL;
+ struct kfd_process *proc = NULL;
+ struct kfd_process_device *pdd = NULL;
+
+ pdd = container_of(attr, struct kfd_process_device, attr_cu_occupancy);
+ dev = pdd->dev;
+ if (dev->kfd2kgd->get_cu_occupancy == NULL)
+ return -EINVAL;
+
+ cu_cnt = 0;
+ proc = pdd->process;
+ if (pdd->qpd.queue_count == 0) {
+ pr_debug("Gpu-Id: %d has no active queues for process %d\n",
+ dev->id, proc->pasid);
+ return snprintf(buffer, PAGE_SIZE, "%d\n", cu_cnt);
+ }
+
+ /* Collect wave count from device if it supports */
+ wave_cnt = 0;
+ max_waves_per_cu = 0;
+ dev->kfd2kgd->get_cu_occupancy(dev->kgd, proc->pasid, &wave_cnt,
+ &max_waves_per_cu);
+
+ /* Translate wave count to number of compute units */
+ cu_cnt = (wave_cnt + (max_waves_per_cu - 1)) / max_waves_per_cu;
+ return snprintf(buffer, PAGE_SIZE, "%d\n", cu_cnt);
+}
+
static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
char *buffer)
{
@@ -344,6 +390,7 @@ static ssize_t kfd_procfs_queue_show(struct kobject *kobj,
return 0;
}
+
static ssize_t kfd_procfs_stats_show(struct kobject *kobj,
struct attribute *attr, char *buffer)
{
@@ -359,8 +406,13 @@ static ssize_t kfd_procfs_stats_show(struct kobject *kobj,
PAGE_SIZE,
"%llu\n",
jiffies64_to_msecs(evict_jiffies));
- } else
+
+ /* Sysfs handle that gets CU occupancy is per device */
+ } else if (strcmp(attr->name, "cu_occupancy") == 0) {
+ return kfd_get_cu_occupancy(attr, buffer);
+ } else {
pr_err("Invalid attribute");
+ }
return 0;
}
@@ -466,6 +518,7 @@ static int kfd_procfs_add_sysfs_stats(struct kfd_process *p)
* Create sysfs files for each GPU:
* - proc/<pid>/stats_<gpuid>/
* - proc/<pid>/stats_<gpuid>/evicted_ms
+ * - proc/<pid>/stats_<gpuid>/cu_occupancy
*/
list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
struct kobject *kobj_stats;
@@ -496,6 +549,19 @@ static int kfd_procfs_add_sysfs_stats(struct kfd_process *p)
if (ret)
pr_warn("Creating eviction stats for gpuid %d failed",
(int)pdd->dev->id);
+
+ /* Add sysfs file to report compute unit occupancy */
+ if (pdd->dev->kfd2kgd->get_cu_occupancy != NULL) {
+ pdd->attr_cu_occupancy.name = "cu_occupancy";
+ pdd->attr_cu_occupancy.mode = KFD_SYSFS_FILE_MODE;
+ sysfs_attr_init(&pdd->attr_cu_occupancy);
+ ret = sysfs_create_file(kobj_stats,
+ &pdd->attr_cu_occupancy);
+ if (ret)
+ pr_warn("Creating %s failed for gpuid: %d",
+ pdd->attr_cu_occupancy.name,
+ (int)pdd->dev->id);
+ }
}
err:
return ret;
@@ -537,7 +603,6 @@ static int kfd_procfs_add_sysfs_files(struct kfd_process *p)
return ret;
}
-
void kfd_procfs_del_queue(struct queue *q)
{
if (!q)
@@ -909,6 +974,8 @@ static void kfd_process_wq_release(struct work_struct *work)
sysfs_remove_file(p->kobj, &pdd->attr_vram);
sysfs_remove_file(p->kobj, &pdd->attr_sdma);
sysfs_remove_file(p->kobj, &pdd->attr_evict);
+ if (pdd->dev->kfd2kgd->get_cu_occupancy != NULL)
+ sysfs_remove_file(p->kobj, &pdd->attr_cu_occupancy);
kobject_del(pdd->kobj_stats);
kobject_put(pdd->kobj_stats);
pdd->kobj_stats = NULL;