summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLaura Ekstrand <laura@jlekstrand.net>2018-05-17 14:46:33 -0700
committerLaura Ekstrand <laura@jlekstrand.net>2018-06-15 16:06:42 -0700
commit45cf206c08444780464f636b621d634471f39d3e (patch)
tree2afc2411d026961b60dcd36ca5ac58842e869d8e
parentfe5839491dc9642e6852a6a71863df6525ed97b0 (diff)
docs: Add python script that converts html to rst.
Use Beautiful Soup to fix bad html, then use pandoc for converting to rst.
-rwxr-xr-xdocs/rstConverter.py23
1 files changed, 23 insertions, 0 deletions
diff --git a/docs/rstConverter.py b/docs/rstConverter.py
new file mode 100755
index 0000000000..5321fdde8b
--- /dev/null
+++ b/docs/rstConverter.py
@@ -0,0 +1,23 @@
+#!/usr/bin/python3
+import glob
+import subprocess
+from bs4 import BeautifulSoup
+
+pages = glob.glob("*.html")
+pages += glob.glob("relnotes/*.html")
+for filename in pages:
+ # Fix some annoyingly bad html.
+ with open(filename) as f:
+ soup = BeautifulSoup(f, 'html5lib')
+ soup.find("div", "header").extract() # Get rid of old header
+ soup.iframe.extract() # Get rid of old contents bar.
+ soup.find("div", "content").unwrap() # Strip the content div.
+
+ # Write out the better html.
+ with open(filename, 'wt') as f:
+ f.write(str(soup))
+
+ # Convert to rst with pandoc.
+ name = filename.split(".html")[0]
+ bashCmd = "pandoc " + filename + " -o " + name + ".rst"
+ subprocess.run(bashCmd.split())