summaryrefslogtreecommitdiff
path: root/optimus.tex
blob: 365050ab1c433ec28d5793a0b7c3aa9438f4fbcf (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
%\logo{\includegraphics[width=1.5cm]{imgs/ensib.jpg}}
\documentclass[11pt,english,compress]{beamer}
\usepackage[utf8]{inputenc}
\usepackage{cmap}
\usepackage{verbatim}
\usepackage{eurosym}
\usepackage{lmodern}
\usepackage{listings}

\useoutertheme{smoothbars}
\useinnertheme[shadow=true]{rounded}
\usecolortheme{orchid}
\usecolortheme{whale}
\title{Optimus and cross-device synchronization support}
\subtitle{What's done and what needs to be done?}
\author{Maarten Lankhorst}
\institute{Canonical Ltd}
%\logo{\includegraphics[width=1.5cm]{imgs/ensib.jpg}}

\AtBeginSection[]{
  \begin{frame}{Summary}
  \small \tableofcontents[currentsection, hideothersubsections]
  \end{frame} 
}

\begin{document}

\lstset{breakatwhitespace=true,
language=C++,
columns=fullflexible,
keepspaces=true,
breaklines=true,
showstringspaces=false,
extendedchars=true}

\setbeamertemplate{navigation symbols}{}

\begin{frame}
	\titlepage
\end{frame}

\section{Architecture}
	\subsection{The parts involved}
		\begin{frame}
			\begin{block}{The primitives}
				\begin{itemize}
					\item DMA-BUF
					\item X.org server
					\item individual DDX drivers
					\item xrandr
				\end{itemize}
			\end{block}
		\end{frame}

		\begin{frame}
			\begin{block}{DMA-BUF}
				\begin{itemize}
					\item All kernel video drivers have to be modified to support DMA-BUF
					\item DMA-BUF objects can be imported and exported as fd's.
					\item No synchronization is done yet, expect tearing or worse.
				\end{itemize}
			\end{block}
		\end{frame}

		\begin{frame}
			\begin{block}{X.org server, DDX and xrandr}
				\begin{itemize}
					\item Platform bus support is added for supporting dynamic gpu hotplugging
					\item xrandr 1.4 is used to configure gpu slaves and connect them to masters
					\item Each ddx needs to add support for platform bus, and also:
					\begin{itemize}
					\item code for being a output slave (xf86-video-modesetting, intel)
						\begin{itemize}
						\item USB Displaylink devices that have no hardware acceleration on their own, or intel when a display is connected to nouveau.
						\end{itemize}
					\item code for being a offload slave (xf86-video-ati/nouveau)
						\begin{itemize}
						\item Optimus devices where the slave has more powerful hardware than the master.
						\end{itemize}
					\item code for being a offload/output master (xf86-video-ati/intel/nouveau)
						\begin{itemize}
						\item Required to support offload/output slaves. Offload master is untested for ati and nouveau.
						\end{itemize}
					\end{itemize}
					\item code for switching between muxed GPUs (WIP)
						\begin{itemize}
						\item Robustness extension
						\item Smooth switching
						\end{itemize}
				\end{itemize}
			\end{block}
		\end{frame}

	\subsection{Synchronization}
		\begin{frame}
			\begin{block}{The problem}
				\begin{itemize}
					\item Multiple devices involved
					\item Arbitrary number of buffers shared in an arbitrary order between those devices
					\item Preferably no deadlock on either cpu, or because gpu devices waiting on each other for buffer use completion
				\end{itemize}
			\end{block}
		\end{frame}

		\begin{frame}
			\begin{block}{Example deadlock}
				\begin{itemize}
					\item 2 devices, devA and devB.
					\item devB imports bufA, devA imports bufB.
					\item Both want to use bufA and bufB, but want to reserve them in opposite order.
					\item Deadlock! Both hold a buffer and wait for the other.
					\item This can happen on the cpu if you're lucky, or on both gpu's if unlucky.
				\end{itemize}
			\end{block}
		\end{frame}

		\begin{frame}
			\begin{block}{TTM Style reservations}
 				\begin{itemize}
					\item Literally pick up the code from TTM that manages reservations.
					\item Use reservation\textunderscore ticket for reservation multiple objects.
					\item Use fence for cross-device synchronization primitive.
				\end{itemize}
			\end{block}
		\end{frame}

		\begin{frame}
			\begin{block}{Fence API (WIP!)}
				\begin{itemize}
					\item Work in progress, not even the name is finalized.
					\item Dumbest possible primitive for synchronization
					\item Signaled upon completion, software and hardware waiters can be waiting on completion.
					\item Hardware fences might unblock other hardware.
					\item Object might have a single exclusive or multiple shared fences.
				\end{itemize}
			\end{block}
		\end{frame}


		\begin{frame}
			\begin{block}{Reservation API (WIP!)}
				\begin{itemize}
					\item Work in progress, not even the name is finalized.
					\item lockdep annotations have been added, will pick up most common errors.
					\item reservation\textunderscore ticket for performing annotating multi-object reservations,
					      passed to object\textunderscore reserve.
					\item reservation\textunderscore object is a primitive that is used for synchronization,
					      and also contains pointers to fences.
					\item Eviction support is still a TODO!
				\end{itemize}
			\end{block}
		\end{frame}

		\begin{frame}
			\begin{block}{Fence api rules}
				\begin{itemize}
					\item When holding a reservation on a obj, the fence members can be read and written.
					\item Any fence calls must be made after reserving and before unreserving.
					\item Only one new fence needs to be allocated for all reservation buffers held.
					\item BUF\textunderscore MAX\textunderscore SHARED\textunderscore FENCE shared slots, 1 exclusive slot.
					\item For a new shared fence, wait on the last exclusive fence before starting.
					\item If you request exclusive access access, you should wait on all previous shared fences before starting
					      or if there are none, wait on the last exclusive fence.
				\end{itemize}
			\end{block}
		\end{frame}

\section{Demo}
\section{Questions}

\end{document}