diff options
author | Laura Ekstrand <laura@jlekstrand.net> | 2018-05-17 14:46:33 -0700 |
---|---|---|
committer | Laura Ekstrand <laura@jlekstrand.net> | 2018-06-15 16:06:42 -0700 |
commit | 45cf206c08444780464f636b621d634471f39d3e (patch) | |
tree | 2afc2411d026961b60dcd36ca5ac58842e869d8e | |
parent | fe5839491dc9642e6852a6a71863df6525ed97b0 (diff) |
docs: Add python script that converts html to rst.
Use Beautiful Soup to fix bad html, then use pandoc for converting to
rst.
-rwxr-xr-x | docs/rstConverter.py | 23 |
1 files changed, 23 insertions, 0 deletions
diff --git a/docs/rstConverter.py b/docs/rstConverter.py new file mode 100755 index 0000000000..5321fdde8b --- /dev/null +++ b/docs/rstConverter.py @@ -0,0 +1,23 @@ +#!/usr/bin/python3 +import glob +import subprocess +from bs4 import BeautifulSoup + +pages = glob.glob("*.html") +pages += glob.glob("relnotes/*.html") +for filename in pages: + # Fix some annoyingly bad html. + with open(filename) as f: + soup = BeautifulSoup(f, 'html5lib') + soup.find("div", "header").extract() # Get rid of old header + soup.iframe.extract() # Get rid of old contents bar. + soup.find("div", "content").unwrap() # Strip the content div. + + # Write out the better html. + with open(filename, 'wt') as f: + f.write(str(soup)) + + # Convert to rst with pandoc. + name = filename.split(".html")[0] + bashCmd = "pandoc " + filename + " -o " + name + ".rst" + subprocess.run(bashCmd.split()) |