1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
|
// SPDX-License-Identifier: MIT
/*
* Copyright © 2021-2023 Intel Corporation
* Copyright (C) 2021-2002 Red Hat
*/
#include <drm/drm_managed.h>
#include <drm/drm_mm.h>
#include <drm/ttm/ttm_device.h>
#include <drm/ttm/ttm_placement.h>
#include <drm/ttm/ttm_range_manager.h>
#include "generated/xe_wa_oob.h"
#include "regs/xe_gt_regs.h"
#include "regs/xe_regs.h"
#include "xe_bo.h"
#include "xe_device.h"
#include "xe_gt.h"
#include "xe_mmio.h"
#include "xe_res_cursor.h"
#include "xe_ttm_stolen_mgr.h"
#include "xe_ttm_vram_mgr.h"
#include "xe_wa.h"
struct xe_ttm_stolen_mgr {
struct xe_ttm_vram_mgr base;
/* PCI base offset */
resource_size_t io_base;
/* GPU base offset */
resource_size_t stolen_base;
void __iomem *mapping;
};
static inline struct xe_ttm_stolen_mgr *
to_stolen_mgr(struct ttm_resource_manager *man)
{
return container_of(man, struct xe_ttm_stolen_mgr, base.manager);
}
/**
* xe_ttm_stolen_cpu_access_needs_ggtt() - If we can't directly CPU access
* stolen, can we then fallback to mapping through the GGTT.
* @xe: xe device
*
* Some older integrated platforms don't support reliable CPU access for stolen,
* however on such hardware we can always use the mappable part of the GGTT for
* CPU access. Check if that's the case for this device.
*/
bool xe_ttm_stolen_cpu_access_needs_ggtt(struct xe_device *xe)
{
return GRAPHICS_VERx100(xe) < 1270 && !IS_DGFX(xe);
}
static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
{
struct xe_tile *tile = xe_device_get_root_tile(xe);
struct xe_gt *mmio = xe_root_mmio_gt(xe);
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
u64 stolen_size;
u64 tile_offset;
u64 tile_size;
tile_offset = tile->mem.vram.io_start - xe->mem.vram.io_start;
tile_size = tile->mem.vram.actual_physical_size;
/* Use DSM base address instead for stolen memory */
mgr->stolen_base = (xe_mmio_read64_2x32(mmio, DSMBASE) & BDSM_MASK) - tile_offset;
if (drm_WARN_ON(&xe->drm, tile_size < mgr->stolen_base))
return 0;
stolen_size = tile_size - mgr->stolen_base;
/* Verify usage fits in the actual resource available */
if (mgr->stolen_base + stolen_size <= pci_resource_len(pdev, LMEM_BAR))
mgr->io_base = tile->mem.vram.io_start + mgr->stolen_base;
/*
* There may be few KB of platform dependent reserved memory at the end
* of vram which is not part of the DSM. Such reserved memory portion is
* always less then DSM granularity so align down the stolen_size to DSM
* granularity to accommodate such reserve vram portion.
*/
return ALIGN_DOWN(stolen_size, SZ_1M);
}
static u32 get_wopcm_size(struct xe_device *xe)
{
u32 wopcm_size;
u64 val;
val = xe_mmio_read64_2x32(xe_root_mmio_gt(xe), STOLEN_RESERVED);
val = REG_FIELD_GET64(WOPCM_SIZE_MASK, val);
switch (val) {
case 0x5 ... 0x6:
val--;
fallthrough;
case 0x0 ... 0x3:
wopcm_size = (1U << val) * SZ_1M;
break;
default:
WARN(1, "Missing case wopcm_size=%llx\n", val);
wopcm_size = 0;
}
return wopcm_size;
}
static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
{
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
struct xe_gt *media_gt = xe_device_get_root_tile(xe)->media_gt;
u32 stolen_size, wopcm_size;
u32 ggc, gms;
ggc = xe_mmio_read32(xe_root_mmio_gt(xe), GGC);
/*
* Check GGMS: it should be fixed 0x3 (8MB), which corresponds to the
* GTT size
*/
if (drm_WARN_ON(&xe->drm, (ggc & GGMS_MASK) != GGMS_MASK))
return 0;
/*
* Graphics >= 1270 uses the offset to the GSMBASE as address in the
* PTEs, together with the DM flag being set. Previously there was no
* such flag so the address was the io_base.
*
* DSMBASE = GSMBASE + 8MB
*/
mgr->stolen_base = SZ_8M;
mgr->io_base = pci_resource_start(pdev, 2) + mgr->stolen_base;
/* return valid GMS value, -EIO if invalid */
gms = REG_FIELD_GET(GMS_MASK, ggc);
switch (gms) {
case 0x0 ... 0x04:
stolen_size = gms * 32 * SZ_1M;
break;
case 0xf0 ... 0xfe:
stolen_size = (gms - 0xf0 + 1) * 4 * SZ_1M;
break;
default:
return 0;
}
/* Carve out the top of DSM as it contains the reserved WOPCM region */
wopcm_size = get_wopcm_size(xe);
if (drm_WARN_ON(&xe->drm, !wopcm_size))
return 0;
stolen_size -= wopcm_size;
if (media_gt && XE_WA(media_gt, 14019821291)) {
u64 gscpsmi_base = xe_mmio_read64_2x32(media_gt, GSCPSMI_BASE)
& ~GENMASK_ULL(5, 0);
/*
* This workaround is primarily implemented by the BIOS. We
* just need to figure out whether the BIOS has applied the
* workaround (meaning the programmed address falls within
* the DSM) and, if so, reserve that part of the DSM to
* prevent accidental reuse. The DSM location should be just
* below the WOPCM.
*/
if (gscpsmi_base >= mgr->io_base &&
gscpsmi_base < mgr->io_base + stolen_size) {
xe_gt_dbg(media_gt,
"Reserving %llu bytes of DSM for Wa_14019821291\n",
mgr->io_base + stolen_size - gscpsmi_base);
stolen_size = gscpsmi_base - mgr->io_base;
}
}
if (drm_WARN_ON(&xe->drm, stolen_size + SZ_8M > pci_resource_len(pdev, 2)))
return 0;
return stolen_size;
}
extern struct resource intel_graphics_stolen_res;
static u64 detect_stolen(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
{
#ifdef CONFIG_X86
/* Map into GGTT */
mgr->io_base = pci_resource_start(to_pci_dev(xe->drm.dev), 2);
/* Stolen memory is x86 only */
mgr->stolen_base = intel_graphics_stolen_res.start;
return resource_size(&intel_graphics_stolen_res);
#else
return 0;
#endif
}
void xe_ttm_stolen_mgr_init(struct xe_device *xe)
{
struct xe_ttm_stolen_mgr *mgr = drmm_kzalloc(&xe->drm, sizeof(*mgr), GFP_KERNEL);
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
u64 stolen_size, io_size, pgsize;
int err;
if (IS_DGFX(xe))
stolen_size = detect_bar2_dgfx(xe, mgr);
else if (GRAPHICS_VERx100(xe) >= 1270)
stolen_size = detect_bar2_integrated(xe, mgr);
else
stolen_size = detect_stolen(xe, mgr);
if (!stolen_size) {
drm_dbg_kms(&xe->drm, "No stolen memory support\n");
return;
}
pgsize = xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K;
if (pgsize < PAGE_SIZE)
pgsize = PAGE_SIZE;
/*
* We don't try to attempt partial visible support for stolen vram,
* since stolen is always at the end of vram, and the BAR size is pretty
* much always 256M, with small-bar.
*/
io_size = 0;
if (mgr->io_base && !xe_ttm_stolen_cpu_access_needs_ggtt(xe))
io_size = stolen_size;
err = __xe_ttm_vram_mgr_init(xe, &mgr->base, XE_PL_STOLEN, stolen_size,
io_size, pgsize);
if (err) {
drm_dbg_kms(&xe->drm, "Stolen mgr init failed: %i\n", err);
return;
}
drm_dbg_kms(&xe->drm, "Initialized stolen memory support with %llu bytes\n",
stolen_size);
if (io_size)
mgr->mapping = devm_ioremap_wc(&pdev->dev, mgr->io_base, io_size);
}
u64 xe_ttm_stolen_io_offset(struct xe_bo *bo, u32 offset)
{
struct xe_device *xe = xe_bo_device(bo);
struct ttm_resource_manager *ttm_mgr = ttm_manager_type(&xe->ttm, XE_PL_STOLEN);
struct xe_ttm_stolen_mgr *mgr = to_stolen_mgr(ttm_mgr);
struct xe_res_cursor cur;
XE_WARN_ON(!mgr->io_base);
if (xe_ttm_stolen_cpu_access_needs_ggtt(xe))
return mgr->io_base + xe_bo_ggtt_addr(bo) + offset;
xe_res_first(bo->ttm.resource, offset, 4096, &cur);
return mgr->io_base + cur.start;
}
static int __xe_ttm_stolen_io_mem_reserve_bar2(struct xe_device *xe,
struct xe_ttm_stolen_mgr *mgr,
struct ttm_resource *mem)
{
struct xe_res_cursor cur;
if (!mgr->io_base)
return -EIO;
xe_res_first(mem, 0, 4096, &cur);
mem->bus.offset = cur.start;
drm_WARN_ON(&xe->drm, !(mem->placement & TTM_PL_FLAG_CONTIGUOUS));
if (mem->placement & TTM_PL_FLAG_CONTIGUOUS && mgr->mapping)
mem->bus.addr = (u8 __force *)mgr->mapping + mem->bus.offset;
mem->bus.offset += mgr->io_base;
mem->bus.is_iomem = true;
mem->bus.caching = ttm_write_combined;
return 0;
}
static int __xe_ttm_stolen_io_mem_reserve_stolen(struct xe_device *xe,
struct xe_ttm_stolen_mgr *mgr,
struct ttm_resource *mem)
{
#ifdef CONFIG_X86
struct xe_bo *bo = ttm_to_xe_bo(mem->bo);
XE_WARN_ON(IS_DGFX(xe));
/* XXX: Require BO to be mapped to GGTT? */
if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_CREATE_GGTT_BIT)))
return -EIO;
/* GGTT is always contiguously mapped */
mem->bus.offset = xe_bo_ggtt_addr(bo) + mgr->io_base;
mem->bus.is_iomem = true;
mem->bus.caching = ttm_write_combined;
return 0;
#else
/* How is it even possible to get here without gen12 stolen? */
drm_WARN_ON(&xe->drm, 1);
return -EIO;
#endif
}
int xe_ttm_stolen_io_mem_reserve(struct xe_device *xe, struct ttm_resource *mem)
{
struct ttm_resource_manager *ttm_mgr = ttm_manager_type(&xe->ttm, XE_PL_STOLEN);
struct xe_ttm_stolen_mgr *mgr = ttm_mgr ? to_stolen_mgr(ttm_mgr) : NULL;
if (!mgr || !mgr->io_base)
return -EIO;
if (xe_ttm_stolen_cpu_access_needs_ggtt(xe))
return __xe_ttm_stolen_io_mem_reserve_stolen(xe, mgr, mem);
else
return __xe_ttm_stolen_io_mem_reserve_bar2(xe, mgr, mem);
}
u64 xe_ttm_stolen_gpu_offset(struct xe_device *xe)
{
struct xe_ttm_stolen_mgr *mgr =
to_stolen_mgr(ttm_manager_type(&xe->ttm, XE_PL_STOLEN));
return mgr->stolen_base;
}
|