diff options
author | Connor Abbott <cwabbott0@gmail.com> | 2015-05-30 14:30:12 -0400 |
committer | Dylan Baker <dylan@pnwbakers.com> | 2016-12-19 11:32:39 -0800 |
commit | 30e9f1c4d663a3aa3d43070356ccef8c6b9daa33 (patch) | |
tree | ac1703e67b4bfa9cd614e67bb9309f0e52c4a96b | |
parent | eb10d050a8a842ed947e3a493099d5604f83963f (diff) |
nir: add initial documentation
-rw-r--r-- | src/glsl/nir/docs/Makefile | 177 | ||||
-rw-r--r-- | src/glsl/nir/docs/source/conf.py | 262 | ||||
-rw-r--r-- | src/glsl/nir/docs/source/control_flow.rst | 412 | ||||
-rw-r--r-- | src/glsl/nir/docs/source/index.rst | 24 | ||||
-rw-r--r-- | src/glsl/nir/docs/source/instructions.rst | 286 | ||||
-rw-r--r-- | src/glsl/nir/docs/source/intro.rst | 161 | ||||
-rw-r--r-- | src/glsl/nir/docs/source/metadata.rst | 34 | ||||
-rw-r--r-- | src/glsl/nir/docs/source/variables.rst | 85 |
8 files changed, 1441 insertions, 0 deletions
diff --git a/src/glsl/nir/docs/Makefile b/src/glsl/nir/docs/Makefile new file mode 100644 index 0000000000..c54a5ec2e4 --- /dev/null +++ b/src/glsl/nir/docs/Makefile @@ -0,0 +1,177 @@ +# Makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +PAPER = +BUILDDIR = build + +# User-friendly check for sphinx-build +ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) +$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) +endif + +# Internal variables. +PAPEROPT_a4 = -D latex_paper_size=a4 +PAPEROPT_letter = -D latex_paper_size=letter +ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source +# the i18n builder cannot share the environment and doctrees with the others +I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source + +.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext + +help: + @echo "Please use \`make <target>' where <target> is one of" + @echo " html to make standalone HTML files" + @echo " dirhtml to make HTML files named index.html in directories" + @echo " singlehtml to make a single large HTML file" + @echo " pickle to make pickle files" + @echo " json to make JSON files" + @echo " htmlhelp to make HTML files and a HTML help project" + @echo " qthelp to make HTML files and a qthelp project" + @echo " devhelp to make HTML files and a Devhelp project" + @echo " epub to make an epub" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " latexpdf to make LaTeX files and run them through pdflatex" + @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" + @echo " text to make text files" + @echo " man to make manual pages" + @echo " texinfo to make Texinfo files" + @echo " info to make Texinfo files and run them through makeinfo" + @echo " gettext to make PO message catalogs" + @echo " changes to make an overview of all changed/added/deprecated items" + @echo " xml to make Docutils-native XML files" + @echo " pseudoxml to make pseudoxml-XML files for display purposes" + @echo " linkcheck to check all external links for integrity" + @echo " doctest to run all doctests embedded in the documentation (if enabled)" + +clean: + rm -rf $(BUILDDIR)/* + +html: + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." + +dirhtml: + $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." + +singlehtml: + $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml + @echo + @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." + +pickle: + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle + @echo + @echo "Build finished; now you can process the pickle files." + +json: + $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json + @echo + @echo "Build finished; now you can process the JSON files." + +htmlhelp: + $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp + @echo + @echo "Build finished; now you can run HTML Help Workshop with the" \ + ".hhp project file in $(BUILDDIR)/htmlhelp." + +qthelp: + $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp + @echo + @echo "Build finished; now you can run "qcollectiongenerator" with the" \ + ".qhcp project file in $(BUILDDIR)/qthelp, like this:" + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/NIR.qhcp" + @echo "To view the help file:" + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/NIR.qhc" + +devhelp: + $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp + @echo + @echo "Build finished." + @echo "To view the help file:" + @echo "# mkdir -p $$HOME/.local/share/devhelp/NIR" + @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/NIR" + @echo "# devhelp" + +epub: + $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub + @echo + @echo "Build finished. The epub file is in $(BUILDDIR)/epub." + +latex: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo + @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." + @echo "Run \`make' in that directory to run these through (pdf)latex" \ + "(use \`make latexpdf' here to do that automatically)." + +latexpdf: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through pdflatex..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +latexpdfja: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through platex and dvipdfmx..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +text: + $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text + @echo + @echo "Build finished. The text files are in $(BUILDDIR)/text." + +man: + $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man + @echo + @echo "Build finished. The manual pages are in $(BUILDDIR)/man." + +texinfo: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo + @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." + @echo "Run \`make' in that directory to run these through makeinfo" \ + "(use \`make info' here to do that automatically)." + +info: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo "Running Texinfo files through makeinfo..." + make -C $(BUILDDIR)/texinfo info + @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." + +gettext: + $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale + @echo + @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." + +changes: + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes + @echo + @echo "The overview file is in $(BUILDDIR)/changes." + +linkcheck: + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck + @echo + @echo "Link check complete; look for any errors in the above output " \ + "or in $(BUILDDIR)/linkcheck/output.txt." + +doctest: + $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest + @echo "Testing of doctests in the sources finished, look at the " \ + "results in $(BUILDDIR)/doctest/output.txt." + +xml: + $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml + @echo + @echo "Build finished. The XML files are in $(BUILDDIR)/xml." + +pseudoxml: + $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml + @echo + @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." diff --git a/src/glsl/nir/docs/source/conf.py b/src/glsl/nir/docs/source/conf.py new file mode 100644 index 0000000000..37b2cb7b23 --- /dev/null +++ b/src/glsl/nir/docs/source/conf.py @@ -0,0 +1,262 @@ +# -*- coding: utf-8 -*- +# +# NIR documentation build configuration file, created by +# sphinx-quickstart on Fri May 22 15:55:45 2015. +# +# This file is execfile()d with the current directory set to its +# containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import sys +import os + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +#sys.path.insert(0, os.path.abspath('.')) + +# -- General configuration ------------------------------------------------ + +# If your documentation needs a minimal Sphinx version, state it here. +#needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.todo', + 'sphinx.ext.pngmath', + 'sphinx.ext.graphviz', +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix of source filenames. +source_suffix = '.rst' + +# The encoding of source files. +#source_encoding = 'utf-8-sig' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'NIR' +copyright = u'2015, Connor Abbott' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = '0.0.1' +# The full version, including alpha/beta/rc tags. +release = '0.0.1' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +#language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +#today_fmt = '%B %d, %Y' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = [] + +# The reST default role (used for this markup: `text`) to use for all +# documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + +# If true, keep warnings as "system message" paragraphs in the built documents. +#keep_warnings = False + + +# -- Options for HTML output ---------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = 'default' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +#html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +#html_theme_path = [] + +# The name for this set of Sphinx documents. If None, it defaults to +# "<project> v<release> documentation". +#html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +#html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +#html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# Add any extra paths that contain custom files (such as robots.txt or +# .htaccess) here, relative to this directory. These files are copied +# directly to the root of the documentation. +#html_extra_path = [] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +#html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +#html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +#html_additional_pages = {} + +# If false, no module index is generated. +#html_domain_indices = True + +# If false, no index is generated. +#html_use_index = True + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, links to the reST sources are added to the pages. +#html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +#html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +#html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a <link> tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = None + +# Output file base name for HTML help builder. +htmlhelp_basename = 'NIRdoc' + + +# -- Options for LaTeX output --------------------------------------------- + +latex_elements = { +# The paper size ('letterpaper' or 'a4paper'). +#'papersize': 'letterpaper', + +# The font size ('10pt', '11pt' or '12pt'). +#'pointsize': '10pt', + +# Additional stuff for the LaTeX preamble. +#'preamble': '', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + ('index', 'NIR.tex', u'NIR Documentation', + u'Connor Abbott', 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# If true, show page references after internal links. +#latex_show_pagerefs = False + +# If true, show URL addresses after external links. +#latex_show_urls = False + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_domain_indices = True + + +# -- Options for manual page output --------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + ('index', 'nir', u'NIR Documentation', + [u'Connor Abbott'], 1) +] + +# If true, show URL addresses after external links. +#man_show_urls = False + + +# -- Options for Texinfo output ------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + ('index', 'NIR', u'NIR Documentation', + u'Connor Abbott', 'NIR', 'One line description of project.', + 'Miscellaneous'), +] + +# Documents to append as an appendix to all manuals. +#texinfo_appendices = [] + +# If false, no module index is generated. +#texinfo_domain_indices = True + +# How to display URL addresses: 'footnote', 'no', or 'inline'. +#texinfo_show_urls = 'footnote' + +# If true, do not generate a @detailmenu in the "Top" node's menu. +#texinfo_no_detailmenu = False diff --git a/src/glsl/nir/docs/source/control_flow.rst b/src/glsl/nir/docs/source/control_flow.rst new file mode 100644 index 0000000000..e10e30bf57 --- /dev/null +++ b/src/glsl/nir/docs/source/control_flow.rst @@ -0,0 +1,412 @@ +Control Flow +============ + +Background +---------- + +Traditional Compilers +~~~~~~~~~~~~~~~~~~~~~ + +In most IR's, functions consist of *basic blocks* (often shortened to just +*blocks*), which are a series of instructions that always execute linearly from +the beginning to the end, and the *edges* that connect them. Each basic block +ends with a *jump* or *branch* that transfers execution either unconditionally +to one basic block or conditionally to two (or sometimes more) blocks. Together, +the basic blocks and edges comprise the *Control Flow Graph* or CFG. For +example, this snippet of code: + +:: + + while (foo) { + ... + if (baz) { + ... + } else { + ... + } + if (bar) { + ... + } + } + ... + +could be translated to a CFG that looks like: + +.. graphviz:: + + digraph { + + post [label="..."]; + + /* force 'post' to be at the bottom*/ + {rank="sink" post} + + header [label="foo?"]; + block1 [label="...\nbaz?"]; + then [label="...\nbar?"]; + else [label="...\nbar?"]; + block2 [label="...\nfoo?"]; + + header -> block1; + header -> post; + block1 -> then; + block1 -> else; + then -> header; + else -> header; + then -> block2; + else -> block2; + block2 -> block1; + block2 -> post; + } + +Note that the CFG is a little different from the original code, since it's +been optimized somewhat; for example, we've folded the second ``if`` into the +then and else branches of the first. This is a good thing for traditional, +scalar, hardware, but as we'll see, these types of optimizations are usually +unnecessary and sometimes harmful for GPU's. However, this is the standard +model that most literature on compiler theory assumes. + +GPU's +~~~~~ + +A unique feature of most GPU's is that they are designed to run many different +instances of the same program in lock step in order to reduce the size of the +scheduling hardware by sharing it between many different "cores." When control +flow *diverges*, meaning that when two different instances (fragments, +vertices, etc.) branch in different directions, then the GPU will take both +sides of the branch. For example, if both thread 1 and thread 2 are currently +in block A, and thread 1 wants to branch to block B while thread 2 wants to +branch to block C, then the GPU will first branch to block B with thread 1 +enabled and thread 2 disabled, and then when execution reaches a predefined +"merge block," the GPU will jump back to block C while flipping the enabled +threads and run until the merge block is reached, at which point the control +flow has *converged* and both thread 1 and thread 2 can be enabled. + +Although most GPU's do something like this internally, the details of how it +works can vary greatly from vendor to vendor and generation to generation. +Some GPU's, such as the Mali T6xx series, give each instance a separate +program counter and don't keep track of divergance at all! All Intel chips +have instructions such as ``IF``, ``ELSE``, ``ENDIF``, and ``WHILE``, and on +all current generations, they're faster than the branch-with-merge +instructions described above (although they do a similar thing under the +hood). Also, the rules as to when control flow re-converges can vary based on +the implementation as well as choices made in the backend. + +There's one place besides modifying control-flow that these details matter: +*cross-channel operations*. There are a few operations that GPU's can do where +separate instances (sometimes also called *channels* or *lanes*) can share +information. One important example is the operation of taking a derivative in +screen-space in fragment shaders, where the value of the input is exchanged +between fragments in a 2x2 block. Derivatives can be taken either explicitly, +or implicitly in order to calculate the appropriate level of detail when +sampling from a texture. Source languages such as GLSL require that these +derivatives be taken in *uniform control flow*, meaning that every channel +must be enabled or disabled together, in order that the inputs to the +derivative are well-defined, and most backends take advantage of this +guarantee. Therefore, optimizations in any intermediate IR must respect this +invariant, which means that the IR must have a good idea of when control flow +diverges. For example, in the following snippet: + +:: + + vec2 color = texture(tex, coordinate * 2.0) + if (foo) { + /* the only time we ever use 'color' */ + output = color; + } + +we can only push the texture operation into the ``if`` if ``foo`` is +*uniform*, meaning it takes the same value for each fragment, since the +texture takes an implicit derivative. + +The NIR Control Flow Model +-------------------------- + +In order to support many different backends, as well as maintain the +structured control information currently given by source languages such as +GLSL and required by some backends such as Intel, NIR uses a control flow +model that explicitly contains structured control flow elements such as loops +and if statements. This approach also gives a clear model for when control +flow converges and diverges that's guaranteed to be supported by every GPU. + +Nevertheless, there's still the need to support the vast existing literature +that takes basic blocks as fundamental. So NIR includes basic blocks as a +primitive as well. Control flow in NIR consists of a *control flow tree* whose +elements are if statements and infinite loops, and whose leaves are basic +blocks. In addition, the *successors* of each block, which are the blocks that +a given block branches to, and the *predecessors* of each block, which are the +blocks that branch to a given block, are always kept up-to-date. Finally, +there's the *start block*, which is the first block in the function, and *end +block*, which is a fake basic block that return statements point to. +Together, the start block, end block, and graph created by the successors and +predecessors form the control flow graph that complements the control flow +tree. For example, this: + +:: + + void main(void) { + if (foo) + return; + + while (bar) { + if (baz) + continue; + + ... + } + } + +would become, in NIR: + +.. graphviz:: + + digraph { + clusterrank="local"; + subgraph cluster_main { + style="solid"; + label="main"; + start [label="(start)"]; + start -> then1_block; + start -> else1_block; + + subgraph cluster_if1 { + style="filled"; + fillcolor="lightgrey"; + label="if (foo)"; + + subgraph cluster_then1 { + label="then"; + fillcolor="white"; + + then1_block [label="return"]; + } + + subgraph cluster_else1 { + label="else"; + fillcolor="white"; + + else1_block [label="(empty)"]; + } + } + + pre_loop [label="(empty)"]; + else1_block -> pre_loop; + pre_loop -> loop_header; + + subgraph cluster_loop { + style="filled"; + fillcolor="lightgrey"; + label="loop"; + + loop_header [label="(empty)"]; + then3_block -> loop_header [constraint=false]; + loop_header -> then2_block; + loop_header -> else2_block; + + subgraph cluster_if2 { + fillcolor="white"; + label="if (!bar)"; + + subgraph cluster_then2 { + fillcolor="lightgrey"; + label="then"; + + then2_block [label="break"]; + } + + subgraph cluster_else2 { + fillcolor="lightgrey"; + label="else"; + + else2_block [label="(empty)"]; + } + } + + loop_middle_block [label="(empty)"]; + else2_block -> loop_middle_block; + loop_middle_block -> then3_block; + loop_middle_block -> else3_block; + + subgraph cluster_if3 { + fillcolor="white"; + label="if (baz)"; + + subgraph cluster_then3 { + fillcolor="lightgrey"; + label="then"; + + then3_block [label="continue"]; + } + + subgraph cluster_else3 { + fillcolor="lightgrey"; + label="else"; + + else3_block [label="(empty)"]; + } + } + + loop_end_block [label="...", rank="max"]; + else3_block -> loop_end_block; + loop_end_block -> loop_header [constraint=false]; + } + + post_loop [label="(empty)"]; + then2_block -> post_loop; + loop_end_block -> post_loop [style="invis"]; + + post_loop -> end_block; + then1_block -> end_block; + + end_block [label="(end)"]; + } + } + +where the if statements and loops are represented by boxes and the basic +blocks are represented by ovals. One thing that may be initially surprising is +that if statements always have at least one empty basic block in the "else" +portion -- that is, if-then statements are turned into if-then-else +statements. This helps optimizations that push operations into if statements, +since there could be a statement that only ever executes when the condition is +false, and adding the empty block creates a place where those statements can +be moved. On the basic block level, creating the empty block removes a +*critical edge*, which is an edge from a block with more than one successor to +another with more than one predecessor. Consider this if-then statement: + +:: + + if (foo) { + bar = 1; + } + ... + +and its basic block representation: + +.. graphviz:: + + digraph { + pre [label="foo?"]; + then [label="bar = 1;"]; + post [label="..."]; + + pre -> then; + pre -> post [color="red"]; + then -> post; + } + +The red edge is a critical edge, since its one of two incoming edges and one +of two outgoing edges. Before running optimizations like Partial Redundancy +Elimination (PRE) and Global Code Motion (GCM) whose aim is to move code into +less frequently executed paths, most compilers will *split* the critical edge +by inserting an empty basic block: + +.. graphviz:: + + digraph { + pre [label="foo?"]; + then [label="bar = 1;"]; + else [label="(empty)"]; + post [label="..."]; + + pre -> then; + pre -> else; + then -> post; + else -> post; + } + +However, in basic-block-focused compilers, keeping critical edges split all +the time would interfere with other optimizations that aim to reduce the +number of jumps that have to be executed. But because NIR keeps control flow +structured, those sorts of optimizations are either done very differently or +not done at all, and therefore it makes sense to always keep critical edges +split. It's for the same reason that NIR doesn't have a "predicated break" or +"predicated continue" instruction, which is supported by most GPU's: they add +critical edges to the CFG and prevent the compiler from being able to make +code execute only when the break or continue executes. In both cases, it's +easy enough for the backend to perform the optimizations to remove the extra +blocks if necessary. + +We've now explained why most of the extra empty basic blocks were inserted in +the example NIR control flow, but there's still one left. There's an empty +block in between the first if statement and the loop, so that the then and +else branches branch to the empty block and then to the first block of the +loop instead of jumping directly to the loop. Clearly, it isn't there to +remove a critical edge. So why insert it? Well, imagine that there was a +statement in the loop that we determined to be *loop-independent*, so that we +could move it outside the loop, but it was used inside the loop so we couldn't +move it after the loop. The empty block before the loop then comes in handy as +a place to move it. Just as splitting critical edges helps optimizations such +as PRE, inserting so-called *padding blocks* before and after the loop can +help optimizations that do Loop-Invariant Code Motion (LICM), including GCM. + +Putting it Together +~~~~~~~~~~~~~~~~~~~ + +We can put all the rules we've created into a guide for constructing the +control flow tree. To do this, we'll need a few different data types: + +* A *control flow node* (often shortened to "cf node" and defined as + ``nir_cf_node`` in nir.h) is the base class for everything in the control + flow tree. It can be a loop, an if statement, or a basic block. +* A *control flow list* (often shortened to "cf list") is a list of control + flow nodes that corresponds to a series of statements in GLSL. It's used to + represent the body of a function and a loop as well as the then and else + branches of an if statement. In NIR, it's implemented as an intrusive + linked list. +* An *if statement* (defined as ``nir_if``) contains a control flow list for + the then and else branches as well as a condition. +* A *loop* (defined as ``nir_loop``) is an infinite loop (the only way to + exit is through ``break`` statements). It only contains a control flow list + representing the body. +* A *basic block*, in addition to its previous definition, is now a leaf of + the control-flow tree. In NIR, basic blocks are defined in a structure + called ``nir_block``. + +as well as two rules, which together will cover both the if-then-else and loop +padding situations: a control flow list must end and begin with a basic block +and must contain one (and exactly one) block between each non-block control +flow node (i.e. loop or if statement). That is, control flow lists must look +like: + +:: + + block + loop/if + block + loop/if + ... + loop/if + block + +and they have to consist of at least one (possibly empty) basic block. +Finally, there are a class of instructions called "jump instructions", defined +as ``nir_jump_instr`` in nir.h, which is how breaks, continues, and returns +are represented in NIR Note that "multilevel breaks" and "multilevel +continues", i.e. jumping to a loop outside of the innermost one, are currently +not supported, although they may be in the future. There must be at most one +jump instruction per basic block, and it must be at the end of the block. + +If you aren't sure, you should go and convince yourself that the example NIR +control flow given earlier satisfies all these rules, in addition to being +free of critical edges. + +Modifying Control Flow +---------------------- + +We've seen that there are two complimentary ways of describing control flow +in NIR, the control flow tree and the control flow graph, which contain +redundant information. To ease the burden of keeping both forms up-to-date, +core NIR provides a number of helpers for rewriting the control flow graph. +They allow you to manipulate the program as if it consists of a series of +statements, like in GLSL, while "under the hood" they guarantee that the +control flow tree is in the correct form and the successors and predecessors +of the basic blocks involved are updated. Currently, these functions include: + +* ``nir_cf_node_insert_before`` +* ``nir_cf_node_insert_after`` +* ``nir_cf_node_insert_begin`` +* ``nir_cf_node_insert_end`` +* ``nir_cf_node_remove`` + +For details see nir.h. + diff --git a/src/glsl/nir/docs/source/index.rst b/src/glsl/nir/docs/source/index.rst new file mode 100644 index 0000000000..a29df31631 --- /dev/null +++ b/src/glsl/nir/docs/source/index.rst @@ -0,0 +1,24 @@ + +NIR Documentation +================= + +Contents: + +.. toctree:: + :maxdepth: 2 + + intro + control_flow + instructions + variables + metadata + + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` + diff --git a/src/glsl/nir/docs/source/instructions.rst b/src/glsl/nir/docs/source/instructions.rst new file mode 100644 index 0000000000..22b43605df --- /dev/null +++ b/src/glsl/nir/docs/source/instructions.rst @@ -0,0 +1,286 @@ +Instructions +============ + +The basic unit of computation in NIR is the *instruction*. An instruction can +be one of the various types listed below. Each instruction type is a derived +class of ``nir_instr``. Instructions occur in *basic blocks*; each basic block +consists of a list of instructions which is executed from beginning to end. + +ALU Instructions +---------------- + +ALU instructions represent simple operations, such as addition, +multiplication, comparison, etc., that take a certain number of arguments and +return a result that only depends on the arguments. A good rule of thumb is +that only things which can be constant folded should be ALU operations. If it +can't be constant folded, then it should probably be an intrinsic instead. + +ALU operations are *typeless*, meaning that they're only defined to convert a +certain bitpattern input to another bitpattern output. intBitsToFloat() and +friends are implicit. Boolean true is defined to be ~0 (NIR_TRUE) and false +is defined to be NIR_FALSE. + +Each ALU instruction has an *opcode*, which is a member of an enum +(``nir_op``) that describes what it does as well as how many arguments it +takes. Associated with each opcode is an info structure (``nir_op_info``), +which shows how many arguments the opcode takes as well as information such as +whether the opcode is commutative (``op a b == op b a``) or associative (``(op +(op a b) c) == (op a (op b c))``). The info structure for each opcode may be +accessed through a global array called ``nir_op_infos`` that's indexed by the +opcode. + +Even though ALU operations are typeless, each opcode also has an "ALU type" +which can be floating-point, boolean, integer, or unsigned integer. The ALU +type mainly helps backends which use the absolute value, negate, and saturate +modifiers (normally not used by core NIR) -- there's some generic +infrastructure in NIR which will fold iabs and ineg operations into integer +sources, as well as fabs and fneg for floating-point sources, although most +core NIR optimizations will assume that they are kept separate. In addition, +if an operation takes a boolean argument, then the argument may be assumed to +be either NIR_TRUE or NIR_FALSE, and if an operation's result has a boolean +type, then it may only produce only NIR_TRUE or NIR_FALSE. + +ALU opcodes also have the notion of *size*, or the number of components. ALU +opcodes are either *non-per-component*, in which case the destination as well +as each of the arguments are explicitly sized, or *per-component*. +Per-component opcodes have the destination size as well as at least one of +the argument sizes set to 0. The sources with their size set to 0 are known +as the *per-component sources*. Conceptually, for per-component instructions, +the destination is computed by looping over each component and computing some +function which depends only on the matching component of the per-component +sources as well as possibly all the components of the non-per-component +sources. In pseudocode: + +:: + + for each component "comp": + dest.comp = some_func(per_comp_src1.comp, per_comp_src2.comp, ..., + non_per_comp_src) + + +Both the info table entry and the enum values are generated from a Python +script called nir_opcodes.py which, when imported, creates an ``opcodes`` +list which contains objects of the ``Opcode`` class. Inside nir_opcodes.py, +opcodes are created using the ``opcode`` function, which constructs the +object and adds it to the list, as well as various helper functions which call +``opcode``. For example, the following line in nir_opcodes.py: + +:: + + binop("fmul", tfloat, commutative + associative, "src0 * src1") + +creates a declaration of a nir_op_fmul member of the ``nir_op`` enum, which is +defined in the generated file nir_opcodes.h, as well as the following entry in +the nir_op_infos array (defined in nir_opcodes.c): + +:: + + { + .name = "fmul", + .num_inputs = 2, + .output_size = 0, + .output_type = nir_type_float, + .input_sizes = { + 0, 0 + }, + .input_types = { + nir_type_float, nir_type_float + }, + .algebraic_properties = + NIR_OP_IS_COMMUTATIVE | NIR_OP_IS_ASSOCIATIVE + }, + +The ``src0 * src1`` part of the definition isn't just documentation; it's +actually used to generate code that can constant fold the operation. +Currently, every ALU operation must have a description of how it should be +constant-folded, which makes documenting the operation (including any corner +cases) much simpler in most cases, as well as obviating the need to deal with +per-component and non-per-component subtleties -- the pseudocode above is +implemented for you, and all you have to do is write the ``some_func``. In +this case, the definition of ``fmul`` also creates the following code in +nir_constant_expressions.c: + +:: + + static nir_const_value + evaluate_fmul(unsigned num_components, nir_const_value *_src) + { + nir_const_value _dst_val = { { {0, 0, 0, 0} } }; + + + for (unsigned _i = 0; _i < num_components; _i++) { + float src0 = _src[0].f[_i]; + float src1 = _src[1].f[_i]; + + float dst = src0 * src1; + + _dst_val.f[_i] = dst; + } + + return _dst_val; + } + +as well as the following case in ``nir_eval_const_opcode``: + +:: + + case nir_op_fmul: { + return evaluate_fmul(num_components, src); + break; + } + +For more information on the format of the constant expression strings, see +the documentation for the ``Opcode`` class in nir_opcodes.py. + +Intrinsic Instructions +---------------------- + +Intrinsics are like the stateful sidekicks to ALU instructions; they include +mainly various different kinds of loads/stores, as well as execution +barriers. Similar to ALU instructions, there is an enum of opcodes +(``nir_intrinsic_op``) as well as a table containing information for each +opcode (``nir_intrinsic_infos``). Intrinsics may or may not have a +destination, and they may also include 1 or more constant indices (integers). +Also similar to ALU instructions, both destinations and sources include a +size that's part of the opcode, and both may be made per-component by setting +their size to 0, in which case the size is obtained from the +``num_components`` field of the instruction. Finally, intrinsics may include +one or more variable dereferences, although these are usually lowered away +before they reach the driver. + +Unlike ALU instructions, which can be freely reordered and deleted as long as +they still produce the same result and satisfy the constaints imposed by SSA +form, intrinsics have a few rules regarding how they may be reordered. +Currently, they're rather conservative, but it's expected that they'll get +more refined in the future. There are two flags that are part of +``nir_intrinsic_infos``: ``NIR_INTRINSIC_CAN_REORDER`` and +``NIR_INTRINSIC_CAN_DELETE``. If an intrinsic can be reordered, then it can be +reordered with respect to *any* other instruction; to prevent two intrinsics +from being reordered with respect to each other, both must not have "can +reorder." If an intrinsic can be deleted, then its only dependencies are on +whatever uses its result, and if it's unused then it can be deleted. For +example, if two intrinsic opcodes are for reading and writing to a common +resource, then the store opcode should have neither flag set, and the load +instruction should have only the "can delete" flag set. Note that load +instructions can't be reordered with respect to each other, and both load and +store instructions can't be reordered with respect to other loads/stores to +resources which don't alias with the resource you're reading/writing; this is +a deficiency of the model, which is expected to change when more +sophisticated analyses are implemented. + +Two especially important intrinsics are ``load_var`` and ``store_var``, +through which all loads and stores to variables occur. Most accesses (besides +accesses to textures and buffers) to variables happen through these +instructions in core NIR, although they can be lowered to loads/stores to +registers, inputs, outputs, etc. with actual indices before they reach the +backend. + +Unlike ALU instructions, intrinsics haven't yet been converted to the new +Python way of specifing opcodes. Instead, intrinsic opcodes are defined in a +header file, nir_intrinsics.h, which expands to a series of ``INTRINSIC`` +macros. nir_intrinsics.h is included twice, once in nir.h to create the +``nir_intrinsic_op``, and another time in ``nir_intrinsics.c`` to create the +``nir_intrinsic_infos`` array. For example, here's the definition of the +``store_var`` intrinsic: + +:: + + INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 0, 0) + +This says that ``store_var`` has one source of size 0 (and thus is +per-component), has no destination, one variable, no indices, and no semantic +flags (it can't be reordered and can't be deleted). It creates the +nir_intrinsic_store_var enum member, as well as the corresponding entry in +``nir_intrinsic_infos``. + +Call Instructions +----------------- + +Call instructions in NIR are pretty simple. They contain a pointer to the +overload that they reference. Arguments are passed through dereferences, which +may be copied from, copied to, or both depending on whether the matching +parameter in the overload is an input, and output, or both. In addition, +there's a return dereference (NULL for functions with void return type) which +gets overwritten with the return value of the function. + +Jump Instructions +----------------- + +A jump instruction in NIR is a break, a continue, or a return. Returns don't +include a value; instead, functions that return a value instead fill out a +specially-designated variable which is the return variable. For more +information, see :doc:`Control Flow </control_flow>`. + +Texture Instructions +-------------------- + +Even though texture instructions *could* be supported as intrinsics, the +vast number of combinations mean that doing so is practically impossible. +Instead, NIR has a dedicated texture instruction. There's still an array of +sources, except that each source also has a *type* associated with it. There +are various source types, each corresponding to a piece of information that +the different texture operations require. There can be at most one source of +each type. In addition, there are several texture operations: + + +* ``nir_texop_tex``: normal texture lookup. +* ``nir_texop_txb``: texture lookup with LOD bias. +* ``nir_texop_txl``: texture look-up with explicit LOD. +* ``nir_texop_txd``: texture look-up with partial derivatvies. +* ``nir_texop_txf``: texel fetch with explicit LOD. +* ``nir_texop_txf_ms``: multisample texture fetch. +* ``nir_texop_txs``: query texture size. +* ``nir_texop_lod``: texture lod query. +* ``nir_texop_tg4``: texture gather. +* ``nir_texop_query_levels``: texture levels query. + +It's assumed that frontends will only insert the source types that are needed +given the sampler type and the operation. + +Like a lot of other resources, there are two ways to represent a sampler in +NIR: either using a variable dereference, or as an index in a single flat +array. When using an index, there is various information stored in the +texture instruction itself so that backends which need to know the type of +the sampler, whether it's a cube or array sampler, etc. can have that +information even in the lowered form. + +Constant-Load Instructions +-------------------------- + +This instruction creates a constant SSA value. Note that writing to a +register isn't supported; instead, you can use a constant load instruction +plus a move to a register. + +Undef Instructions +------------------ + +Creates an undefined SSA value. At each use of the value, each of the bits +can be assumed to be whatever the implementation or optimization passes deem +convenient. Similar in semantics to a register that's read before its written. + +Phi Instructions +---------------- + +From Instructions.h in LLVM: + +:: + + // PHINode - The PHINode class is used to represent the magical mystical PHI + // node, that can not exist in nature, but can be synthesized in a computer + // scientist's overactive imagination. + +Phi nodes contain a list of sources matched to predecessor blocks, where +there must be one source for each predecessor block. Conceptually, when a +certain predecessor block branches to the block with the phi node, the +source corresponding to the predessor block is copied to the destination of +the phi node. If there's more than one phi node in a block, then this +process happens in parallel. Phi nodes must be at the beginning of a block, +i.e. each block must consist of any phi instructions followed by any non-phi +nodes. + +Parallel Copy Instructions +-------------------------- + +Copies a list of registers or SSA values to another list of registers or SSA +values in parallel. Only used internally by the from-SSA pass. + diff --git a/src/glsl/nir/docs/source/intro.rst b/src/glsl/nir/docs/source/intro.rst new file mode 100644 index 0000000000..7c0f78c501 --- /dev/null +++ b/src/glsl/nir/docs/source/intro.rst @@ -0,0 +1,161 @@ +Introduction +============ + +What is NIR? +------------ + +NIR is an Intermediate Representation (IR) that's designed for the needs of +graphics drivers in Mesa. It sits between a frontend that translates another +language or IR such as GLSL IR or TGSI to NIR and the driver's own backend IR. +It includes several optimization passes, as well as some features that are +useful for making translating to the driver's own IR easier, although it is +*not* intended to replace the backend and as such doesn't support +backend-specific tasks such as scheduling, register allocation, etc. + +NIR was designed with several goals in mind: + +* To be backend-agnostic. There are some features, such as registers, that + require some level of backend involvement, but no core NIR optimization pass + depends on the semantics of these features. Instead, almost all interaction + besides simple operations such as addition and subtraction is described using + loads and stores to *variables* that are similar to variables in GLSL. +* To natively understand constructs, such as structured control flow, that are + common in GPU's but not elsewhere. +* To be compatible with the extensive body of literature around compiler + technology. NIR natively supports Single Static Assignment (SSA), which is + a prerequisite for many important optimizations, and all of its optimizations + assume SSA. Furthermore, NIR structures programs into *basic blocks*, which + is often assumed by compiler papers, and it supports several analyses such as + dominance analysis and liveness analysis that are often used by + optimizations. All of these things greatly reduce the hassle of translating + an idea described in a paper into code. + +One thing that NIR is *not* designed to be is a library for users outside of +Mesa. It's not possible to extend NIR at run-time to add additional +operations, although it's flexible enough that it's usually easy to do at +compile time. Furthermore, there is no stable API; it's expected that +producers and consumers will live in-tree so we can update them if we have to +make breaking changes. + +Organization +------------ + +NIR is written in C, although in a very object-oriented manner. Structures +and enums are typedef'ed: + +:: + + typedef struct nir_foo { + /* stuff */ + } nir_foo; + + typedef enum { + nir_enum_thing1, + nir_enum_thing2, + nir_enum_thing3, + } nir_enum; + +and inheritance is done through embedding structures. Upcasting is done +through inline functions defined by the NIR_DEFINE_CAST macro. For example, +here's how an animal structure inherited by cows, cats, and dogs would be +defined: + +:: + + typedef enum { + nir_animal_type_cow, + nir_animal_type_dog, + nir_animal_type_cat, + } nir_animal_type; + + typedef struct { + nir_animal_type type; + /* stuff */ + } nir_animal; + + typedef struct { + nir_animal animal; + } nir_cow; + + typedef struct { + nir_animal animal; + } nir_dog; + + typedef struct { + nir_animal animal; + } nir_cat; + + NIR_DEFINE_CAST(nir_animal_as_cow, nir_animal, nir_cow, animal) + NIR_DEFINE_CAST(nir_animal_as_dog, nir_animal, nir_dog, animal) + NIR_DEFINE_CAST(nir_animal_as_cat, nir_animal, nir_cat, animal) + +Datastructures +~~~~~~~~~~~~~~ + +The core IR consists of various structures defined in nir.h, as well as +functions for creating, destroying, and manipulating them. Currently, these +structures include: + +* ``nir_shader``: represents a linked or unlinked shader. This may contain one + or more functions as well as global registers and variables and other + whole-shader type information. Right now, a ``nir_shader`` usually only + contains one function called "main", but that may change. +* ``nir_function``: represents a GLSL-style overloaded function, for linking + purposes. It includes the name as well as a list of overloads. +* ``nir_function_overload``: represents a declaration or definition of a + function overload. If it's a declaration, then the ``impl`` field will be + NULL, and if it's a definition then ``impl`` will point to a + ``nir_function_impl``. +* ``nir_function_impl``: contains function-local stuff such as local + variables and registers. It's also the root of the *control flow tree*. +* ``nir_cf_node``: represents a node in the control flow tree. For more + information, see :doc:`Control Flow </control_flow>`. + + * ``nir_if`` + * ``nir_loop`` + * ``nir_block`` + +* ``nir_instr``: the base class for instructions in NIR. Each ``nir_block`` + has a list of ``nir_instr``'s. For more information, see :doc:`Instructions + </instructions>`. + + * ``nir_alu_instr`` + * ``nir_call_instr`` + * ``nir_jump_instr`` + * ``nir_tex_instr`` + * ``nir_intrinsic_instr`` + * ``nir_load_const_instr`` + * ``nir_ssa_undef_instr`` + * ``nir_phi_instr`` + * ``nir_parallel_copy_instr`` + +* ``nir_dest`` +* ``nir_src`` +* ``nir_ssa_def`` +* ``nir_register`` +* ``nir_variable`` +* ``nir_deref`` + * ``nir_deref_var`` + * ``nir_deref_struct`` + * ``nir_deref_array`` + +Printing +~~~~~~~~ + +NIR includes a function called ``nir_print_shader()`` for printing the +contents of a shader to a given ``FILE *``, which can be useful for +debugging. In addition, ``nir_print_instr()`` is exposed, which can be useful +for examining instructions in the debugger. + +Validation +~~~~~~~~~~ + +There are various bits of redundant information as well as various invariants +which must be satisfied in the IR. Often, passes will have bugs which result +in those invariants being broken or the information left incorrect, which may +only blow up much later when some other pass or analysis relies on that +information. To make debugging those sorts of problems much easier, NIR has a +validation pass, ``nir_validate_shader()``, which makes sure that the shader +is valid. It's a no-op on release builds, but when debugging it catches many +bugs at the source instead of much later. + diff --git a/src/glsl/nir/docs/source/metadata.rst b/src/glsl/nir/docs/source/metadata.rst new file mode 100644 index 0000000000..9dc1b8700c --- /dev/null +++ b/src/glsl/nir/docs/source/metadata.rst @@ -0,0 +1,34 @@ +Metadata +======== + +Many of the optimization/lowering passes in NIR require different bits of +metadata that are provided by different analysis passes. Currently, this +metadata includes: + +* dominance information +* SSA value liveness +* source-order block indices + +and it's expected that there will be more in the future. The metadata itself +is currently directly embedded inside the IR datastructures (for example, each +basic block contains information about its parent and children in the +dominance tree), but we still need a way to calculate the metadata only when +actually required. In order to do this, there's a simple API made of two +functions: + +* ``nir_metadata_require()``: Declares that the given metadata (an OR of enum + values) is required. The function automatically calls all of the required + analysis passes for you and, upon its return, the requested metadata is + available and current. +* ``nir_metadata_preserve()``: Called to declare what metadata (if any) was + preserved by the given pass. If the pass didn't touch anything, it doesn't + need to call this function. However, if it adds/removes instructions or + modifies the CFG in any way, it needs to call ``nir_metadata_preserve()``. + The ``nir_metadata_preserve()`` function takes an OR of all of the bits of + metadata that are preserved. That way as new metadata gets added, we don't + have to update every optimization pass to dirty it. + +Unfortunately, there's no way to guarantee that you actually call +``nir_metadata_preserve()`` if you change the shader, so if you don't... +shame on you. + diff --git a/src/glsl/nir/docs/source/variables.rst b/src/glsl/nir/docs/source/variables.rst new file mode 100644 index 0000000000..12ddd1ed45 --- /dev/null +++ b/src/glsl/nir/docs/source/variables.rst @@ -0,0 +1,85 @@ +Variables +========= + +NIR is designed to include backend-independent ways to represent things if +possible, while allowing backends the ability to lower some things if +convenient. One of the main mechanisms for backend independence is through +*variables*, which are based on GLSL variables (the implementation is mostly +taken from GLSL IR). Variables are logical instead of physical, meaning that +all accesses to them are unaware of any layout issues (even though in some +cases, such as UBO's, the API already defines the layout), and they may not +contain any pointers; NIR doesn't even have the concept of a pointer. + +Dereferences +------------ + +All accesses to variables occur through *dereferences*, which let you select +which part of the variable you want to modify or access. A dereference is a +singly-linked list which starts with a pointer to the variable itself and +selects structure members or array elements according to the type of the +variable. The type of the *result of doing* the dereference is also stored as +part of the dereference, to make working with them easier. + +One way that the dereference system is more powerful than GLSL's is that it +supports so-called "wildcard" array dereferences, which are used for array +copies (the ``copy_var`` intrinsic). For example, this allows us to support +something like: + +:: + + struct s { + vec4 a, b; + }; + + struct s foo[10], bar[10]; + //... + foo[*].a = bar[*].a + +where we can copy part of each element of the array. This is useful because it +helps make converting part of the variable into SSA easier. For example, if it +turns out that ``foo[*].a`` (i.e. all the elements ``foo[0].a``, ``foo[1].a``, +``foo[2].a``, etc.) is always accessed directly while ``foo[*].b`` is +sometimes accessed indirectly, we can convert ``foo[*].a`` to SSA values while +only keeping around ``foo[*].b``. Therefore, we lower array copies like + +:: + + foo = bar; + +to multiple wildcard copies like + +:: + + foo[*].a = bar[*].a; + foo[*].b = bar[*].b; + +and some of them may be eliminated by the out-of-SSA pass. + +Variables vs. Registers, Inputs/Outputs, etc. +--------------------------------------------- + +Variables are the core NIR way of handling most things that aren't SSA values, +and frontends should prefer emitting code that uses variables as they're +guaranteed to work with the core NIR optimization passes. However, variables +aren't always the easiest thing for backends to work with. Backends work +directly with addresses and locations, and turning variable dereferences into +those involves some work and creates code that then needs to be cleaned up. +For this reason, there are various mechanisms in NIR that can replace most of +the uses of variables and allow the driver's lowering pass (or a common +lowering pass used by the driver) to convert variable references to references +to a flat address space. Some of those include: + +* Registers (really, virtual registers) can be used almost everywhere SSA + values can be used. This means that they can used either as a replacement + for normal local or global variables (i.e. per-instance variables that can + only be loaded or stored), or as a way to make NIR shaders almost entirely + free of SSA. +* For inputs, outputs, and uniforms, there are load/store intrinsics that + take a direct (constant) and indirect index that are added together. +* For textures, similarly there are places to add direct and indirect indices + for the sampler that are complementary to the variables. + +There are some cases, such as alternate interpolation and image load/store, +where the backend still currently has to deal directly with variables, but +those are mostly due to technical restrictions and may change in the future. + |