nir: add initial documentation

author: Connor Abbott <cwabbott0@gmail.com> 2015-05-30 14:30:12 -0400
committer: Dylan Baker <dylan@pnwbakers.com> 2016-12-19 11:32:39 -0800
commit: 30e9f1c4d663a3aa3d43070356ccef8c6b9daa33 (patch)
tree: ac1703e67b4bfa9cd614e67bb9309f0e52c4a96b
parent: eb10d050a8a842ed947e3a493099d5604f83963f (diff)
8 files changed, 1441 insertions, 0 deletions
diff --git a/src/glsl/nir/docs/Makefile b/src/glsl/nir/docs/Makefile
new file mode 100644
index 0000000000..c54a5ec2e4
--- /dev/null
+++ b/src/glsl/nir/docs/Makefile
@@ -0,0 +1,177 @@
+# Makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line.
+SPHINXOPTS    =
+SPHINXBUILD   = sphinx-build
+PAPER         =
+BUILDDIR      = build
+
+# User-friendly check for sphinx-build
+ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
+$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
+endif
+
+# Internal variables.
+PAPEROPT_a4     = -D latex_paper_size=a4
+PAPEROPT_letter = -D latex_paper_size=letter
+ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
+# the i18n builder cannot share the environment and doctrees with the others
+I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
+
+.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
+
+help:
+	@echo "Please use \`make <target>' where <target> is one of"
+	@echo "  html       to make standalone HTML files"
+	@echo "  dirhtml    to make HTML files named index.html in directories"
+	@echo "  singlehtml to make a single large HTML file"
+	@echo "  pickle     to make pickle files"
+	@echo "  json       to make JSON files"
+	@echo "  htmlhelp   to make HTML files and a HTML help project"
+	@echo "  qthelp     to make HTML files and a qthelp project"
+	@echo "  devhelp    to make HTML files and a Devhelp project"
+	@echo "  epub       to make an epub"
+	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
+	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
+	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
+	@echo "  text       to make text files"
+	@echo "  man        to make manual pages"
+	@echo "  texinfo    to make Texinfo files"
+	@echo "  info       to make Texinfo files and run them through makeinfo"
+	@echo "  gettext    to make PO message catalogs"
+	@echo "  changes    to make an overview of all changed/added/deprecated items"
+	@echo "  xml        to make Docutils-native XML files"
+	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
+	@echo "  linkcheck  to check all external links for integrity"
+	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
+
+clean:
+	rm -rf $(BUILDDIR)/*
+
+html:
+	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
+	@echo
+	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
+
+dirhtml:
+	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
+	@echo
+	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
+
+singlehtml:
+	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
+	@echo
+	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
+
+pickle:
+	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
+	@echo
+	@echo "Build finished; now you can process the pickle files."
+
+json:
+	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
+	@echo
+	@echo "Build finished; now you can process the JSON files."
+
+htmlhelp:
+	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
+	@echo
+	@echo "Build finished; now you can run HTML Help Workshop with the" \
+	      ".hhp project file in $(BUILDDIR)/htmlhelp."
+
+qthelp:
+	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
+	@echo
+	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
+	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
+	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/NIR.qhcp"
+	@echo "To view the help file:"
+	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/NIR.qhc"
+
+devhelp:
+	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
+	@echo
+	@echo "Build finished."
+	@echo "To view the help file:"
+	@echo "# mkdir -p $$HOME/.local/share/devhelp/NIR"
+	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/NIR"
+	@echo "# devhelp"
+
+epub:
+	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
+	@echo
+	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
+
+latex:
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+	@echo
+	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
+	@echo "Run \`make' in that directory to run these through (pdf)latex" \
+	      "(use \`make latexpdf' here to do that automatically)."
+
+latexpdf:
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+	@echo "Running LaTeX files through pdflatex..."
+	$(MAKE) -C $(BUILDDIR)/latex all-pdf
+	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
+
+latexpdfja:
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+	@echo "Running LaTeX files through platex and dvipdfmx..."
+	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
+	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
+
+text:
+	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
+	@echo
+	@echo "Build finished. The text files are in $(BUILDDIR)/text."
+
+man:
+	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
+	@echo
+	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
+
+texinfo:
+	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+	@echo
+	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
+	@echo "Run \`make' in that directory to run these through makeinfo" \
+	      "(use \`make info' here to do that automatically)."
+
+info:
+	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+	@echo "Running Texinfo files through makeinfo..."
+	make -C $(BUILDDIR)/texinfo info
+	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
+
+gettext:
+	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
+	@echo
+	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
+
+changes:
+	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
+	@echo
+	@echo "The overview file is in $(BUILDDIR)/changes."
+
+linkcheck:
+	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
+	@echo
+	@echo "Link check complete; look for any errors in the above output " \
+	      "or in $(BUILDDIR)/linkcheck/output.txt."
+
+doctest:
+	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
+	@echo "Testing of doctests in the sources finished, look at the " \
+	      "results in $(BUILDDIR)/doctest/output.txt."
+
+xml:
+	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
+	@echo
+	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
+
+pseudoxml:
+	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
+	@echo
+	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
diff --git a/src/glsl/nir/docs/source/conf.py b/src/glsl/nir/docs/source/conf.py
new file mode 100644
index 0000000000..37b2cb7b23
--- /dev/null
+++ b/src/glsl/nir/docs/source/conf.py
@@ -0,0 +1,262 @@
+# -*- coding: utf-8 -*-
+#
+# NIR documentation build configuration file, created by
+# sphinx-quickstart on Fri May 22 15:55:45 2015.
+#
+# This file is execfile()d with the current directory set to its
+# containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+import sys
+import os
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#sys.path.insert(0, os.path.abspath('.'))
+
+# -- General configuration ------------------------------------------------
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+    'sphinx.ext.todo',
+    'sphinx.ext.pngmath',
+    'sphinx.ext.graphviz',
+]
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix of source filenames.
+source_suffix = '.rst'
+
+# The encoding of source files.
+#source_encoding = 'utf-8-sig'
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
+project = u'NIR'
+copyright = u'2015, Connor Abbott'
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The short X.Y version.
+version = '0.0.1'
+# The full version, including alpha/beta/rc tags.
+release = '0.0.1'
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#language = None
+
+# There are two options for replacing |today|: either, you set today to some
+# non-false value, then it is used:
+#today = ''
+# Else, today_fmt is used as the format for a strftime call.
+#today_fmt = '%B %d, %Y'
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+exclude_patterns = []
+
+# The reST default role (used for this markup: `text`) to use for all
+# documents.
+#default_role = None
+
+# If true, '()' will be appended to :func: etc. cross-reference text.
+#add_function_parentheses = True
+
+# If true, the current module name will be prepended to all description
+# unit titles (such as .. function::).
+#add_module_names = True
+
+# If true, sectionauthor and moduleauthor directives will be shown in the
+# output. They are ignored by default.
+#show_authors = False
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# A list of ignored prefixes for module index sorting.
+#modindex_common_prefix = []
+
+# If true, keep warnings as "system message" paragraphs in the built documents.
+#keep_warnings = False
+
+
+# -- Options for HTML output ----------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+html_theme = 'default'
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further.  For a list of options available for each theme, see the
+# documentation.
+#html_theme_options = {}
+
+# Add any paths that contain custom themes here, relative to this directory.
+#html_theme_path = []
+
+# The name for this set of Sphinx documents.  If None, it defaults to
+# "<project> v<release> documentation".
+#html_title = None
+
+# A shorter title for the navigation bar.  Default is the same as html_title.
+#html_short_title = None
+
+# The name of an image file (relative to this directory) to place at the top
+# of the sidebar.
+#html_logo = None
+
+# The name of an image file (within the static path) to use as favicon of the
+# docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
+# pixels large.
+#html_favicon = None
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+# Add any extra paths that contain custom files (such as robots.txt or
+# .htaccess) here, relative to this directory. These files are copied
+# directly to the root of the documentation.
+#html_extra_path = []
+
+# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
+# using the given strftime format.
+#html_last_updated_fmt = '%b %d, %Y'
+
+# If true, SmartyPants will be used to convert quotes and dashes to
+# typographically correct entities.
+#html_use_smartypants = True
+
+# Custom sidebar templates, maps document names to template names.
+#html_sidebars = {}
+
+# Additional templates that should be rendered to pages, maps page names to
+# template names.
+#html_additional_pages = {}
+
+# If false, no module index is generated.
+#html_domain_indices = True
+
+# If false, no index is generated.
+#html_use_index = True
+
+# If true, the index is split into individual pages for each letter.
+#html_split_index = False
+
+# If true, links to the reST sources are added to the pages.
+#html_show_sourcelink = True
+
+# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
+#html_show_sphinx = True
+
+# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
+#html_show_copyright = True
+
+# If true, an OpenSearch description file will be output, and all pages will
+# contain a <link> tag referring to it.  The value of this option must be the
+# base URL from which the finished HTML is served.
+#html_use_opensearch = ''
+
+# This is the file name suffix for HTML files (e.g. ".xhtml").
+#html_file_suffix = None
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'NIRdoc'
+
+
+# -- Options for LaTeX output ---------------------------------------------
+
+latex_elements = {
+# The paper size ('letterpaper' or 'a4paper').
+#'papersize': 'letterpaper',
+
+# The font size ('10pt', '11pt' or '12pt').
+#'pointsize': '10pt',
+
+# Additional stuff for the LaTeX preamble.
+#'preamble': '',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title,
+#  author, documentclass [howto, manual, or own class]).
+latex_documents = [
+  ('index', 'NIR.tex', u'NIR Documentation',
+   u'Connor Abbott', 'manual'),
+]
+
+# The name of an image file (relative to this directory) to place at the top of
+# the title page.
+#latex_logo = None
+
+# For "manual" documents, if this is true, then toplevel headings are parts,
+# not chapters.
+#latex_use_parts = False
+
+# If true, show page references after internal links.
+#latex_show_pagerefs = False
+
+# If true, show URL addresses after external links.
+#latex_show_urls = False
+
+# Documents to append as an appendix to all manuals.
+#latex_appendices = []
+
+# If false, no module index is generated.
+#latex_domain_indices = True
+
+
+# -- Options for manual page output ---------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+    ('index', 'nir', u'NIR Documentation',
+     [u'Connor Abbott'], 1)
+]
+
+# If true, show URL addresses after external links.
+#man_show_urls = False
+
+
+# -- Options for Texinfo output -------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+#  dir menu entry, description, category)
+texinfo_documents = [
+  ('index', 'NIR', u'NIR Documentation',
+   u'Connor Abbott', 'NIR', 'One line description of project.',
+   'Miscellaneous'),
+]
+
+# Documents to append as an appendix to all manuals.
+#texinfo_appendices = []
+
+# If false, no module index is generated.
+#texinfo_domain_indices = True
+
+# How to display URL addresses: 'footnote', 'no', or 'inline'.
+#texinfo_show_urls = 'footnote'
+
+# If true, do not generate a @detailmenu in the "Top" node's menu.
+#texinfo_no_detailmenu = False
diff --git a/src/glsl/nir/docs/source/control_flow.rst b/src/glsl/nir/docs/source/control_flow.rst
new file mode 100644
index 0000000000..e10e30bf57
--- /dev/null
+++ b/src/glsl/nir/docs/source/control_flow.rst
@@ -0,0 +1,412 @@
+Control Flow
+============
+
+Background
+----------
+
+Traditional Compilers
+~~~~~~~~~~~~~~~~~~~~~
+
+In most IR's, functions consist of *basic blocks* (often shortened to just
+*blocks*), which are a series of instructions that always execute linearly from
+the beginning to the end, and the *edges* that connect them. Each basic block
+ends with a *jump* or *branch* that transfers execution either unconditionally
+to one basic block or conditionally to two (or sometimes more) blocks. Together,
+the basic blocks and edges comprise the *Control Flow Graph* or CFG. For
+example, this snippet of code:
+
+::
+
+    while (foo) {
+        ...
+        if (baz) {
+            ...
+        } else {
+            ...
+        }
+        if (bar) {
+            ...
+        }
+    }
+    ...
+
+could be translated to a CFG that looks like:
+
+.. graphviz::
+
+    digraph {
+
+        post [label="..."];
+        
+        /* force 'post' to be at the bottom*/
+        {rank="sink" post}
+
+        header [label="foo?"];
+        block1 [label="...\nbaz?"];
+        then [label="...\nbar?"];
+        else [label="...\nbar?"];
+        block2 [label="...\nfoo?"];
+
+        header -> block1;
+        header -> post;
+        block1 -> then;
+        block1 -> else;
+        then -> header;
+        else -> header;
+        then -> block2;
+        else -> block2;
+        block2 -> block1;
+        block2 -> post;
+    }
+
+Note that the CFG is a little different from the original code, since it's
+been optimized somewhat; for example, we've folded the second ``if`` into the
+then and else branches of the first. This is a good thing for traditional,
+scalar, hardware, but as we'll see, these types of optimizations are usually
+unnecessary and sometimes harmful for GPU's. However, this is the standard
+model that most literature on compiler theory assumes.
+
+GPU's
+~~~~~
+
+A unique feature of most GPU's is that they are designed to run many different
+instances of the same program in lock step in order to reduce the size of the
+scheduling hardware by sharing it between many different "cores." When control
+flow *diverges*, meaning that when two different instances (fragments,
+vertices, etc.) branch in different directions, then the GPU will take both
+sides of the branch. For example, if both thread 1 and thread 2 are currently
+in block A, and thread 1 wants to branch to block B while thread 2 wants to
+branch to block C, then the GPU will first branch to block B with thread 1
+enabled and thread 2 disabled, and then when execution reaches a predefined
+"merge block," the GPU will jump back to block C while flipping the enabled
+threads and run until the merge block is reached, at which point the control
+flow has *converged* and both thread 1 and thread 2 can be enabled.
+
+Although most GPU's do something like this internally, the details of how it
+works can vary greatly from vendor to vendor and generation to generation.
+Some GPU's, such as the Mali T6xx series, give each instance a separate
+program counter and don't keep track of divergance at all! All Intel chips
+have instructions such as ``IF``, ``ELSE``, ``ENDIF``, and ``WHILE``, and on
+all current generations, they're faster than the branch-with-merge
+instructions described above (although they do a similar thing under the
+hood). Also, the rules as to when control flow re-converges can vary based on
+the implementation as well as choices made in the backend.
+
+There's one place besides modifying control-flow that these details matter:
+*cross-channel operations*. There are a few operations that GPU's can do where
+separate instances (sometimes also called *channels* or *lanes*) can share
+information. One important example is the operation of taking a derivative in
+screen-space in fragment shaders, where the value of the input is exchanged
+between fragments in a 2x2 block. Derivatives can be taken either explicitly,
+or implicitly in order to calculate the appropriate level of detail when
+sampling from a texture. Source languages such as GLSL require that these
+derivatives be taken in *uniform control flow*, meaning that every channel
+must be enabled or disabled together, in order that the inputs to the
+derivative are well-defined, and most backends take advantage of this
+guarantee. Therefore, optimizations in any intermediate IR must respect this
+invariant, which means that the IR must have a good idea of when control flow
+diverges. For example, in the following snippet:
+
+::
+
+    vec2 color = texture(tex, coordinate * 2.0)
+    if (foo) {
+        /* the only time we ever use 'color' */
+        output = color;
+    }
+
+we can only push the texture operation into the ``if`` if ``foo`` is
+*uniform*, meaning it takes the same value for each fragment, since the
+texture takes an implicit derivative.
+
+The NIR Control Flow Model
+--------------------------
+
+In order to support many different backends, as well as maintain the
+structured control information currently given by source languages such as
+GLSL and required by some backends such as Intel, NIR uses a control flow
+model that explicitly contains structured control flow elements such as loops
+and if statements. This approach also gives a clear model for when control
+flow converges and diverges that's guaranteed to be supported by every GPU.
+
+Nevertheless, there's still the need to support the vast existing literature
+that takes basic blocks as fundamental. So NIR includes basic blocks as a
+primitive as well. Control flow in NIR consists of a *control flow tree* whose
+elements are if statements and infinite loops, and whose leaves are basic
+blocks. In addition, the *successors* of each block, which are the blocks that
+a given block branches to, and the *predecessors* of each block, which are the
+blocks that branch to a given block, are always kept up-to-date. Finally,
+there's the *start block*, which is the first block in the function, and *end
+block*, which is a fake basic block that return statements point to.
+Together, the start block, end block, and graph created by the successors and
+predecessors form the control flow graph that complements the control flow
+tree. For example, this:
+
+::
+
+    void main(void) {
+        if (foo)
+            return;
+
+        while (bar) {
+            if (baz)
+                continue;
+
+            ...
+        }
+    }
+
+would become, in NIR:
+
+.. graphviz::
+
+    digraph {
+        clusterrank="local";
+        subgraph cluster_main {
+            style="solid";
+            label="main";
+            start [label="(start)"];
+            start -> then1_block;
+            start -> else1_block;
+
+            subgraph cluster_if1 {
+                style="filled";
+                fillcolor="lightgrey";
+                label="if (foo)";
+                
+                subgraph cluster_then1 {
+                    label="then";
+                    fillcolor="white";
+
+                    then1_block [label="return"];
+                }
+
+                subgraph cluster_else1 {
+                    label="else";
+                    fillcolor="white";
+
+                    else1_block [label="(empty)"];
+                }
+            }
+
+            pre_loop [label="(empty)"];
+            else1_block -> pre_loop;
+            pre_loop -> loop_header;
+
+            subgraph cluster_loop {
+                style="filled";
+                fillcolor="lightgrey";
+                label="loop";
+
+                loop_header [label="(empty)"];
+                then3_block -> loop_header [constraint=false];
+                loop_header -> then2_block;
+                loop_header -> else2_block;
+
+                subgraph cluster_if2 {
+                    fillcolor="white";
+                    label="if (!bar)";
+
+                    subgraph cluster_then2 {
+                        fillcolor="lightgrey";
+                        label="then";
+
+                        then2_block [label="break"];
+                    }
+
+                    subgraph cluster_else2 {
+                        fillcolor="lightgrey";
+                        label="else";
+
+                        else2_block [label="(empty)"];
+                    }
+                }
+
+                loop_middle_block [label="(empty)"];
+                else2_block -> loop_middle_block;
+                loop_middle_block -> then3_block;
+                loop_middle_block -> else3_block;
+
+                subgraph cluster_if3 {
+                    fillcolor="white";
+                    label="if (baz)";
+
+                    subgraph cluster_then3 {
+                        fillcolor="lightgrey";
+                        label="then";
+
+                        then3_block [label="continue"];
+                    }
+
+                    subgraph cluster_else3 {
+                        fillcolor="lightgrey";
+                        label="else";
+
+                        else3_block [label="(empty)"];
+                    }
+                }
+
+                loop_end_block [label="...", rank="max"];
+                else3_block -> loop_end_block;
+                loop_end_block -> loop_header [constraint=false];
+            }
+
+            post_loop [label="(empty)"];
+            then2_block -> post_loop;
+            loop_end_block -> post_loop [style="invis"];
+
+            post_loop -> end_block;
+            then1_block -> end_block;
+
+            end_block [label="(end)"];
+        }
+    }
+
+where the if statements and loops are represented by boxes and the basic
+blocks are represented by ovals. One thing that may be initially surprising is
+that if statements always have at least one empty basic block in the "else"
+portion -- that is, if-then statements are turned into if-then-else
+statements. This helps optimizations that push operations into if statements,
+since there could be a statement that only ever executes when the condition is
+false, and adding the empty block creates a place where those statements can
+be moved. On the basic block level, creating the empty block removes a
+*critical edge*, which is an edge from a block with more than one successor to
+another with more than one predecessor. Consider this if-then statement:
+
+::
+
+    if (foo) {
+        bar = 1;
+    }
+    ...
+
+and its basic block representation:
+
+.. graphviz::
+
+    digraph {
+        pre [label="foo?"];
+        then [label="bar = 1;"];
+        post [label="..."];
+
+        pre -> then;
+        pre -> post [color="red"];
+        then -> post;
+    }
+
+The red edge is a critical edge, since its one of two incoming edges and one
+of two outgoing edges. Before running optimizations like Partial Redundancy
+Elimination (PRE) and Global Code Motion (GCM) whose aim is to move code into
+less frequently executed paths, most compilers will *split* the critical edge
+by inserting an empty basic block:
+
+.. graphviz::
+
+    digraph {
+        pre [label="foo?"];
+        then [label="bar = 1;"];
+        else [label="(empty)"];
+        post [label="..."];
+
+        pre -> then;
+        pre -> else;
+        then -> post;
+        else -> post;
+    }
+
+However, in basic-block-focused compilers, keeping critical edges split all
+the time would interfere with other optimizations that aim to reduce the
+number of jumps that have to be executed. But because NIR keeps control flow
+structured, those sorts of optimizations are either done very differently or
+not done at all, and therefore it makes sense to always keep critical edges
+split. It's for the same reason that NIR doesn't have a "predicated break" or
+"predicated continue" instruction, which is supported by most GPU's: they add
+critical edges to the CFG and prevent the compiler from being able to make
+code execute only when the break or continue executes. In both cases, it's
+easy enough for the backend to perform the optimizations to remove the extra
+blocks if necessary.
+
+We've now explained why most of the extra empty basic blocks were inserted in
+the example NIR control flow, but there's still one left. There's an empty
+block in between the first if statement and the loop, so that the then and
+else branches branch to the empty block and then to the first block of the
+loop instead of jumping directly to the loop. Clearly, it isn't there to
+remove a critical edge. So why insert it? Well, imagine that there was a
+statement in the loop that we determined to be *loop-independent*, so that we
+could move it outside the loop, but it was used inside the loop so we couldn't
+move it after the loop. The empty block before the loop then comes in handy as
+a place to move it. Just as splitting critical edges helps optimizations such
+as PRE, inserting so-called *padding blocks* before and after the loop can
+help optimizations that do Loop-Invariant Code Motion (LICM), including GCM.
+
+Putting it Together
+~~~~~~~~~~~~~~~~~~~
+
+We can put all the rules we've created into a guide for constructing the
+control flow tree. To do this, we'll need a few different data types:
+
+* A *control flow node* (often shortened to "cf node" and defined as
+  ``nir_cf_node`` in nir.h) is the base class for everything in the control
+  flow tree. It can be a loop, an if statement, or a basic block.
+* A *control flow list* (often shortened to "cf list") is a list of control
+  flow nodes that corresponds to a series of statements in GLSL. It's used to
+  represent the body of a function and a loop as well as the then and else
+  branches of an if statement. In NIR, it's implemented as an intrusive
+  linked list.
+* An *if statement* (defined as ``nir_if``) contains a control flow list for
+  the then and else branches as well as a condition.
+* A *loop* (defined as ``nir_loop``) is an infinite loop (the only way to
+  exit is through ``break`` statements). It only contains a control flow list
+  representing the body.
+* A *basic block*, in addition to its previous definition, is now a leaf of
+  the control-flow tree. In NIR, basic blocks are defined in a structure
+  called ``nir_block``.
+
+as well as two rules, which together will cover both the if-then-else and loop
+padding situations: a control flow list must end and begin with a basic block
+and must contain one (and exactly one) block between each non-block control
+flow node (i.e. loop or if statement). That is, control flow lists must look
+like:
+
+::
+
+    block
+    loop/if
+    block
+    loop/if
+    ...
+    loop/if
+    block
+
+and they have to consist of at least one (possibly empty) basic block.
+Finally, there are a class of instructions called "jump instructions", defined
+as ``nir_jump_instr`` in nir.h, which is how breaks, continues, and returns
+are represented in NIR Note that "multilevel breaks" and "multilevel
+continues", i.e. jumping to a loop outside of the innermost one, are currently
+not supported, although they may be in the future. There must be at most one
+jump instruction per basic block, and it must be at the end of the block.
+
+If you aren't sure, you should go and convince yourself that the example NIR
+control flow given earlier satisfies all these rules, in addition to being
+free of critical edges.
+
+Modifying Control Flow
+----------------------
+
+We've seen that there are two complimentary ways of describing control flow
+in NIR, the control flow tree and the control flow graph, which contain
+redundant information. To ease the burden of keeping both forms up-to-date,
+core NIR provides a number of helpers for rewriting the control flow graph.
+They allow you to manipulate the program as if it consists of a series of
+statements, like in GLSL, while "under the hood" they guarantee that the
+control flow tree is in the correct form and the successors and predecessors
+of the basic blocks involved are updated. Currently, these functions include:
+
+* ``nir_cf_node_insert_before``
+* ``nir_cf_node_insert_after``
+* ``nir_cf_node_insert_begin``
+* ``nir_cf_node_insert_end``
+* ``nir_cf_node_remove``
+
+For details see nir.h.
+
diff --git a/src/glsl/nir/docs/source/index.rst b/src/glsl/nir/docs/source/index.rst
new file mode 100644
index 0000000000..a29df31631
--- /dev/null
+++ b/src/glsl/nir/docs/source/index.rst
@@ -0,0 +1,24 @@
+
+NIR Documentation
+=================
+
+Contents:
+
+.. toctree::
+   :maxdepth: 2
+
+   intro
+   control_flow
+   instructions
+   variables
+   metadata
+
+
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
+
diff --git a/src/glsl/nir/docs/source/instructions.rst b/src/glsl/nir/docs/source/instructions.rst
new file mode 100644
index 0000000000..22b43605df
--- /dev/null
+++ b/src/glsl/nir/docs/source/instructions.rst
@@ -0,0 +1,286 @@
+Instructions
+============
+
+The basic unit of computation in NIR is the *instruction*. An instruction can
+be one of the various types listed below. Each instruction type is a derived
+class of ``nir_instr``. Instructions occur in *basic blocks*; each basic block
+consists of a list of instructions which is executed from beginning to end.
+
+ALU Instructions
+----------------
+
+ALU instructions represent simple operations, such as addition,
+multiplication, comparison, etc., that take a certain number of arguments and
+return a result that only depends on the arguments. A good rule of thumb is
+that only things which can be constant folded should be ALU operations. If it
+can't be constant folded, then it should probably be an intrinsic instead.
+
+ALU operations are *typeless*, meaning that they're only defined to convert a
+certain bitpattern input to another bitpattern output. intBitsToFloat() and
+friends are implicit. Boolean true is defined to be ~0 (NIR_TRUE) and false
+is defined to be NIR_FALSE.
+
+Each ALU instruction has an *opcode*, which is a member of an enum
+(``nir_op``) that describes what it does as well as how many arguments it
+takes. Associated with each opcode is an info structure (``nir_op_info``),
+which shows how many arguments the opcode takes as well as information such as
+whether the opcode is commutative (``op a b == op b a``) or associative (``(op
+(op a b) c) == (op a (op b c))``). The info structure for each opcode may be
+accessed through a global array called ``nir_op_infos`` that's indexed by the
+opcode.
+
+Even though ALU operations are typeless, each opcode also has an "ALU type"
+which can be floating-point, boolean, integer, or unsigned integer. The ALU
+type mainly helps backends which use the absolute value, negate, and saturate
+modifiers (normally not used by core NIR) -- there's some generic
+infrastructure in NIR which will fold iabs and ineg operations into integer
+sources, as well as fabs and fneg for floating-point sources, although most
+core NIR optimizations will assume that they are kept separate. In addition,
+if an operation takes a boolean argument, then the argument may be assumed to
+be either NIR_TRUE or NIR_FALSE, and if an operation's result has a boolean
+type, then it may only produce only NIR_TRUE or NIR_FALSE.
+
+ALU opcodes also have the notion of *size*, or the number of components. ALU
+opcodes are either *non-per-component*, in which case the destination as well
+as each of the arguments are explicitly sized, or *per-component*.
+Per-component opcodes have the destination size as well as at least one of
+the argument sizes set to 0. The sources with their size set to 0 are known
+as the *per-component sources*. Conceptually, for per-component instructions,
+the destination is computed by looping over each component and computing some
+function which depends only on the matching component of the per-component
+sources as well as possibly all the components of the non-per-component
+sources. In pseudocode:
+
+::
+
+    for each component "comp":
+        dest.comp = some_func(per_comp_src1.comp, per_comp_src2.comp, ...,
+                              non_per_comp_src)
+
+
+Both the info table entry and the enum values are generated from a Python
+script called nir_opcodes.py which, when imported, creates an ``opcodes``
+list which contains objects of the ``Opcode`` class. Inside nir_opcodes.py,
+opcodes are created using the ``opcode`` function, which constructs the
+object and adds it to the list, as well as various helper functions which call
+``opcode``. For example, the following line in nir_opcodes.py:
+
+::
+
+    binop("fmul", tfloat, commutative + associative, "src0 * src1")
+
+creates a declaration of a nir_op_fmul member of the ``nir_op`` enum, which is
+defined in the generated file nir_opcodes.h, as well as the following entry in
+the nir_op_infos array (defined in nir_opcodes.c):
+
+::
+    
+    {
+       .name = "fmul",
+       .num_inputs = 2,
+       .output_size = 0,
+       .output_type = nir_type_float,
+       .input_sizes = {
+          0, 0
+        },
+        .input_types = {
+          nir_type_float, nir_type_float
+        },
+        .algebraic_properties =
+            NIR_OP_IS_COMMUTATIVE | NIR_OP_IS_ASSOCIATIVE
+    },
+
+The ``src0 * src1`` part of the definition isn't just documentation; it's
+actually used to generate code that can constant fold the operation.
+Currently, every ALU operation must have a description of how it should be
+constant-folded, which makes documenting the operation (including any corner
+cases) much simpler in most cases, as well as obviating the need to deal with
+per-component and non-per-component subtleties -- the pseudocode above is
+implemented for you, and all you have to do is write the ``some_func``. In
+this case, the definition of ``fmul`` also creates the following code in
+nir_constant_expressions.c:
+
+::
+
+    static nir_const_value
+    evaluate_fmul(unsigned num_components, nir_const_value *_src)
+    {
+       nir_const_value _dst_val = { { {0, 0, 0, 0} } };
+
+                      
+          for (unsigned _i = 0; _i < num_components; _i++) {
+                   float src0 = _src[0].f[_i];
+                   float src1 = _src[1].f[_i];
+
+                float dst = src0 * src1;
+
+                _dst_val.f[_i] = dst;
+          }
+
+       return _dst_val;
+    }
+
+as well as the following case in ``nir_eval_const_opcode``:
+
+::
+
+   case nir_op_fmul: {
+      return evaluate_fmul(num_components, src);
+      break;
+   }
+
+For more information on the format of the constant expression strings, see
+the documentation for the ``Opcode`` class in nir_opcodes.py.
+
+Intrinsic Instructions
+----------------------
+
+Intrinsics are like the stateful sidekicks to ALU instructions; they include
+mainly various different kinds of loads/stores, as well as execution
+barriers. Similar to ALU instructions, there is an enum of opcodes
+(``nir_intrinsic_op``) as well as a table containing information for each
+opcode (``nir_intrinsic_infos``). Intrinsics may or may not have a
+destination, and they may also include 1 or more constant indices (integers).
+Also similar to ALU instructions, both destinations and sources include a
+size that's part of the opcode, and both may be made per-component by setting
+their size to 0, in which case the size is obtained from the
+``num_components`` field of the instruction. Finally, intrinsics may include
+one or more variable dereferences, although these are usually lowered away
+before they reach the driver.
+
+Unlike ALU instructions, which can be freely reordered and deleted as long as
+they still produce the same result and satisfy the constaints imposed by SSA
+form, intrinsics have a few rules regarding how they may be reordered.
+Currently, they're rather conservative, but it's expected that they'll get
+more refined in the future.  There are two flags that are part of
+``nir_intrinsic_infos``: ``NIR_INTRINSIC_CAN_REORDER`` and
+``NIR_INTRINSIC_CAN_DELETE``. If an intrinsic can be reordered, then it can be
+reordered with respect to *any* other instruction; to prevent two intrinsics
+from being reordered with respect to each other, both must not have "can
+reorder." If an intrinsic can be deleted, then its only dependencies are on
+whatever uses its result, and if it's unused then it can be deleted. For
+example, if two intrinsic opcodes are for reading and writing to a common
+resource, then the store opcode should have neither flag set, and the load
+instruction should have only the "can delete" flag set. Note that load
+instructions can't be reordered with respect to each other, and both load and
+store instructions can't be reordered with respect to other loads/stores to
+resources which don't alias with the resource you're reading/writing; this is
+a deficiency of the model, which is expected to change when more
+sophisticated analyses are implemented.
+
+Two especially important intrinsics are ``load_var`` and ``store_var``,
+through which all loads and stores to variables occur. Most accesses (besides
+accesses to textures and buffers) to variables happen through these
+instructions in core NIR, although they can be lowered to loads/stores to
+registers, inputs, outputs, etc. with actual indices before they reach the
+backend.
+
+Unlike ALU instructions, intrinsics haven't yet been converted to the new
+Python way of specifing opcodes. Instead, intrinsic opcodes are defined in a
+header file, nir_intrinsics.h, which expands to a series of ``INTRINSIC``
+macros.  nir_intrinsics.h is included twice, once in nir.h to create the
+``nir_intrinsic_op``, and another time in ``nir_intrinsics.c`` to create the
+``nir_intrinsic_infos`` array. For example, here's the definition of the
+``store_var`` intrinsic:
+
+::
+
+    INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 0, 0)
+
+This says that ``store_var`` has one source of size 0 (and thus is
+per-component), has no destination, one variable, no indices, and no semantic
+flags (it can't be reordered and can't be deleted). It creates the
+nir_intrinsic_store_var enum member, as well as the corresponding entry in
+``nir_intrinsic_infos``.
+
+Call Instructions
+-----------------
+
+Call instructions in NIR are pretty simple. They contain a pointer to the
+overload that they reference. Arguments are passed through dereferences, which
+may be copied from, copied to, or both depending on whether the matching
+parameter in the overload is an input, and output, or both. In addition,
+there's a return dereference (NULL for functions with void return type) which
+gets overwritten with the return value of the function.
+
+Jump Instructions
+-----------------
+
+A jump instruction in NIR is a break, a continue, or a return. Returns don't
+include a value; instead, functions that return a value instead fill out a
+specially-designated variable which is the return variable. For more
+information, see :doc:`Control Flow </control_flow>`.
+
+Texture Instructions
+--------------------
+
+Even though texture instructions *could* be supported as intrinsics, the
+vast number of combinations mean that doing so is practically impossible.
+Instead, NIR has a dedicated texture instruction. There's still an array of
+sources, except that each source also has a *type* associated with it. There
+are various source types, each corresponding to a piece of information that
+the different texture operations require. There can be at most one source of
+each type. In addition, there are several texture operations:
+
+
+* ``nir_texop_tex``: normal texture lookup.
+* ``nir_texop_txb``: texture lookup with LOD bias.
+* ``nir_texop_txl``: texture look-up with explicit LOD.
+* ``nir_texop_txd``: texture look-up with partial derivatvies.
+* ``nir_texop_txf``: texel fetch with explicit LOD.
+* ``nir_texop_txf_ms``: multisample texture fetch.
+* ``nir_texop_txs``: query texture size.
+* ``nir_texop_lod``: texture lod query.
+* ``nir_texop_tg4``: texture gather.
+* ``nir_texop_query_levels``: texture levels query.
+
+It's assumed that frontends will only insert the source types that are needed
+given the sampler type and the operation.
+
+Like a lot of other resources, there are two ways to represent a sampler in
+NIR: either using a variable dereference, or as an index in a single flat
+array. When using an index, there is various information stored in the
+texture instruction itself so that backends which need to know the type of
+the sampler, whether it's a cube or array sampler, etc. can have that
+information even in the lowered form.
+
+Constant-Load Instructions
+--------------------------
+
+This instruction creates a constant SSA value. Note that writing to a
+register isn't supported; instead, you can use a constant load instruction
+plus a move to a register.
+
+Undef Instructions
+------------------
+
+Creates an undefined SSA value. At each use of the value, each of the bits
+can be assumed to be whatever the implementation or optimization passes deem
+convenient. Similar in semantics to a register that's read before its written.
+
+Phi Instructions
+----------------
+
+From Instructions.h in LLVM:
+
+::
+
+    // PHINode - The PHINode class is used to represent the magical mystical PHI
+    // node, that can not exist in nature, but can be synthesized in a computer
+    // scientist's overactive imagination.
+
+Phi nodes contain a list of sources matched to predecessor blocks, where
+there must be one source for each predecessor block. Conceptually, when a
+certain predecessor block branches to the block with the phi node, the
+source corresponding to the predessor block is copied to the destination of
+the phi node. If there's more than one phi node in a block, then this
+process happens in parallel. Phi nodes must be at the beginning of a block,
+i.e. each block must consist of any phi instructions followed by any non-phi
+nodes.
+
+Parallel Copy Instructions
+--------------------------
+
+Copies a list of registers or SSA values to another list of registers or SSA
+values in parallel. Only used internally by the from-SSA pass.
+
diff --git a/src/glsl/nir/docs/source/intro.rst b/src/glsl/nir/docs/source/intro.rst
new file mode 100644
index 0000000000..7c0f78c501
--- /dev/null
+++ b/src/glsl/nir/docs/source/intro.rst
@@ -0,0 +1,161 @@
+Introduction
+============
+
+What is NIR?
+------------
+
+NIR is an Intermediate Representation (IR) that's designed for the needs of
+graphics drivers in Mesa. It sits between a frontend that translates another
+language or IR such as GLSL IR or TGSI to NIR and the driver's own backend IR.
+It includes several optimization passes, as well as some features that are
+useful for making translating to the driver's own IR easier, although it is
+*not* intended to replace the backend and as such doesn't support
+backend-specific tasks such as scheduling, register allocation, etc.
+
+NIR was designed with several goals in mind:
+
+* To be backend-agnostic. There are some features, such as registers, that
+  require some level of backend involvement, but no core NIR optimization pass
+  depends on the semantics of these features. Instead, almost all interaction
+  besides simple operations such as addition and subtraction is described using
+  loads and stores to *variables* that are similar to variables in GLSL.
+* To natively understand constructs, such as structured control flow, that are
+  common in GPU's but not elsewhere.
+* To be compatible with the extensive body of literature around compiler
+  technology. NIR natively supports Single Static Assignment (SSA), which is
+  a prerequisite for many important optimizations, and all of its optimizations
+  assume SSA. Furthermore, NIR structures programs into *basic blocks*, which
+  is often assumed by compiler papers, and it supports several analyses such as
+  dominance analysis and liveness analysis that are often used by
+  optimizations. All of these things greatly reduce the hassle of translating
+  an idea described in a paper into code.
+
+One thing that NIR is *not* designed to be is a library for users outside of
+Mesa. It's not possible to extend NIR at run-time to add additional
+operations, although it's flexible enough that it's usually easy to do at
+compile time. Furthermore, there is no stable API; it's expected that
+producers and consumers will live in-tree so we can update them if we have to
+make breaking changes.
+
+Organization
+------------
+
+NIR is written in C, although in a very object-oriented manner. Structures
+and enums are typedef'ed:
+
+::
+
+    typedef struct nir_foo {
+        /* stuff */
+    } nir_foo;
+
+    typedef enum {
+        nir_enum_thing1,
+        nir_enum_thing2,
+        nir_enum_thing3,
+    } nir_enum;
+
+and inheritance is done through embedding structures. Upcasting is done
+through inline functions defined by the NIR_DEFINE_CAST macro. For example,
+here's how an animal structure inherited by cows, cats, and dogs would be
+defined:
+
+::
+
+    typedef enum {
+        nir_animal_type_cow,
+        nir_animal_type_dog,
+        nir_animal_type_cat,
+    } nir_animal_type;
+
+    typedef struct {
+        nir_animal_type type;
+        /* stuff */
+    } nir_animal;
+
+    typedef struct {
+        nir_animal animal;
+    } nir_cow;
+
+    typedef struct {
+        nir_animal animal;
+    } nir_dog;
+
+    typedef struct {
+        nir_animal animal;
+    } nir_cat;
+
+    NIR_DEFINE_CAST(nir_animal_as_cow, nir_animal, nir_cow, animal)
+    NIR_DEFINE_CAST(nir_animal_as_dog, nir_animal, nir_dog, animal)
+    NIR_DEFINE_CAST(nir_animal_as_cat, nir_animal, nir_cat, animal)
+
+Datastructures
+~~~~~~~~~~~~~~
+
+The core IR consists of various structures defined in nir.h, as well as
+functions for creating, destroying, and manipulating them. Currently, these
+structures include:
+
+* ``nir_shader``: represents a linked or unlinked shader. This may contain one
+  or more functions as well as global registers and variables and other
+  whole-shader type information. Right now, a ``nir_shader`` usually only
+  contains one function called "main", but that may change.
+* ``nir_function``: represents a GLSL-style overloaded function, for linking
+  purposes. It includes the name as well as a list of overloads.
+* ``nir_function_overload``: represents a declaration or definition of a
+  function overload. If it's a declaration, then the ``impl`` field will be
+  NULL, and if it's a definition then ``impl`` will point to a
+  ``nir_function_impl``.
+* ``nir_function_impl``: contains function-local stuff such as local
+  variables and registers. It's also the root of the *control flow tree*.
+* ``nir_cf_node``: represents a node in the control flow tree. For more
+  information, see :doc:`Control Flow </control_flow>`.
+
+  * ``nir_if``
+  * ``nir_loop``
+  * ``nir_block``
+
+* ``nir_instr``: the base class for instructions in NIR. Each ``nir_block``
+  has a list of ``nir_instr``'s. For more information, see :doc:`Instructions
+  </instructions>`.
+
+  * ``nir_alu_instr``
+  * ``nir_call_instr``
+  * ``nir_jump_instr``
+  * ``nir_tex_instr``
+  * ``nir_intrinsic_instr``
+  * ``nir_load_const_instr``
+  * ``nir_ssa_undef_instr``
+  * ``nir_phi_instr``
+  * ``nir_parallel_copy_instr``
+
+* ``nir_dest``
+* ``nir_src``
+* ``nir_ssa_def``
+* ``nir_register``
+* ``nir_variable``
+* ``nir_deref``
+  * ``nir_deref_var``
+  * ``nir_deref_struct``
+  * ``nir_deref_array``
+
+Printing
+~~~~~~~~
+
+NIR includes a function called ``nir_print_shader()`` for printing the
+contents of a shader to a given ``FILE *``, which can be useful for
+debugging. In addition, ``nir_print_instr()`` is exposed, which can be useful
+for examining instructions in the debugger.
+
+Validation
+~~~~~~~~~~
+
+There are various bits of redundant information as well as various invariants
+which must be satisfied in the IR. Often, passes will have bugs which result
+in those invariants being broken or the information left incorrect, which may
+only blow up much later when some other pass or analysis relies on that
+information. To make debugging those sorts of problems much easier, NIR has a
+validation pass, ``nir_validate_shader()``, which makes sure that the shader
+is valid. It's a no-op on release builds, but when debugging it catches many
+bugs at the source instead of much later.
+
diff --git a/src/glsl/nir/docs/source/metadata.rst b/src/glsl/nir/docs/source/metadata.rst
new file mode 100644
index 0000000000..9dc1b8700c
--- /dev/null
+++ b/src/glsl/nir/docs/source/metadata.rst
@@ -0,0 +1,34 @@
+Metadata
+========
+
+Many of the optimization/lowering passes in NIR require different bits of
+metadata that are provided by different analysis passes. Currently, this
+metadata includes:
+
+* dominance information
+* SSA value liveness
+* source-order block indices
+
+and it's expected that there will be more in the future. The metadata itself
+is currently directly embedded inside the IR datastructures (for example, each
+basic block contains information about its parent and children in the
+dominance tree), but we still need a way to calculate the metadata only when
+actually required. In order to do this, there's a simple API made of two
+functions:
+
+* ``nir_metadata_require()``: Declares that the given metadata (an OR of enum
+  values) is required. The function automatically calls all of the required
+  analysis passes for you and, upon its return, the requested metadata is
+  available and current.
+* ``nir_metadata_preserve()``: Called to declare what metadata (if any) was
+  preserved by the given pass. If the pass didn't touch anything, it doesn't
+  need to call this function. However, if it adds/removes instructions or
+  modifies the CFG in any way, it needs to call ``nir_metadata_preserve()``.
+  The ``nir_metadata_preserve()`` function takes an OR of all of the bits of
+  metadata that are preserved. That way as new metadata gets added, we don't
+  have to update every optimization pass to dirty it.
+
+Unfortunately, there's no way to guarantee that you actually call
+``nir_metadata_preserve()`` if you change the shader, so if you don't...
+shame on you.
+
diff --git a/src/glsl/nir/docs/source/variables.rst b/src/glsl/nir/docs/source/variables.rst
new file mode 100644
index 0000000000..12ddd1ed45
--- /dev/null
+++ b/src/glsl/nir/docs/source/variables.rst
@@ -0,0 +1,85 @@
+Variables
+=========
+
+NIR is designed to include backend-independent ways to represent things if
+possible, while allowing backends the ability to lower some things if
+convenient. One of the main mechanisms for backend independence is through
+*variables*, which are based on GLSL variables (the implementation is mostly
+taken from GLSL IR). Variables are logical instead of physical, meaning that
+all accesses to them are unaware of any layout issues (even though in some
+cases, such as UBO's, the API already defines the layout), and they may not
+contain any pointers; NIR doesn't even have the concept of a pointer.
+
+Dereferences
+------------
+
+All accesses to variables occur through *dereferences*, which let you select
+which part of the variable you want to modify or access. A dereference is a
+singly-linked list which starts with a pointer to the variable itself and
+selects structure members or array elements according to the type of the
+variable. The type of the *result of doing* the dereference is also stored as
+part of the dereference, to make working with them easier.
+
+One way that the dereference system is more powerful than GLSL's is that it
+supports so-called "wildcard" array dereferences, which are used for array
+copies (the ``copy_var`` intrinsic). For example, this allows us to support
+something like:
+
+::
+
+    struct s {
+        vec4 a, b;
+    };
+
+    struct s foo[10], bar[10];
+    //...
+    foo[*].a = bar[*].a
+
+where we can copy part of each element of the array. This is useful because it
+helps make converting part of the variable into SSA easier. For example, if it
+turns out that ``foo[*].a`` (i.e. all the elements ``foo[0].a``, ``foo[1].a``,
+``foo[2].a``, etc.) is always accessed directly while ``foo[*].b`` is
+sometimes accessed indirectly, we can convert ``foo[*].a`` to SSA values while
+only keeping around ``foo[*].b``. Therefore, we lower array copies like
+
+::
+
+    foo = bar;
+
+to multiple wildcard copies like
+
+::
+
+    foo[*].a = bar[*].a;
+    foo[*].b = bar[*].b;
+
+and some of them may be eliminated by the out-of-SSA pass.
+
+Variables vs. Registers, Inputs/Outputs, etc.
+---------------------------------------------
+
+Variables are the core NIR way of handling most things that aren't SSA values,
+and frontends should prefer emitting code that uses variables as they're
+guaranteed to work with the core NIR optimization passes. However, variables
+aren't always the easiest thing for backends to work with. Backends work
+directly with addresses and locations, and turning variable dereferences into
+those involves some work and creates code that then needs to be cleaned up.
+For this reason, there are various mechanisms in NIR that can replace most of
+the uses of variables and allow the driver's lowering pass (or a common
+lowering pass used by the driver) to convert variable references to references
+to a flat address space. Some of those include:
+
+* Registers (really, virtual registers) can be used almost everywhere SSA
+  values can be used. This means that they can used either as a replacement
+  for normal local or global variables (i.e. per-instance variables that can
+  only be loaded or stored), or as a way to make NIR shaders almost entirely
+  free of SSA.
+* For inputs, outputs, and uniforms, there are load/store intrinsics that
+  take a direct (constant) and indirect index that are added together.
+* For textures, similarly there are places to add direct and indirect indices
+  for the sampler that are complementary to the variables.
+
+There are some cases, such as alternate interpolation and image load/store,
+where the backend still currently has to deal directly with variables, but
+those are mostly due to technical restrictions and may change in the future.
+
author	Connor Abbott <cwabbott0@gmail.com>	2015-05-30 14:30:12 -0400
committer	Dylan Baker <dylan@pnwbakers.com>	2016-12-19 11:32:39 -0800
commit	30e9f1c4d663a3aa3d43070356ccef8c6b9daa33 (patch)
tree	ac1703e67b4bfa9cd614e67bb9309f0e52c4a96b
parent	eb10d050a8a842ed947e3a493099d5604f83963f (diff)