summaryrefslogtreecommitdiff
path: root/ooo-help-parser.py
blob: 31c0a88274484326032fc816d2c4cdf8aa69c2df (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#!/usr/bin/env python

import sys, os, os.path, optparse
sys.path.append(sys.path[0]+"/source")

import globals, expatimpl, docbook, node, tree

def processTreeFiles (tree_dir):
    if not os.path.isdir(tree_dir):
        globals.error("Specified tree directory is invalid")
        sys.exit(1)

    if tree_dir[-1] == '/':
        tree_dir = tree_dir[:-1]

    # Parse the tree files to build dom structures.
    rootNodes = {}
    for entry in os.listdir(tree_dir):
        entry = tree_dir + '/' + entry
        if not os.path.isfile(entry):
            continue

        name, ext = os.path.splitext(entry)
        if ext != '.tree':
            continue

        basename = os.path.basename(name)

        file = open(entry, 'r')
        strm = file.read()
        file.close()
        p = expatimpl.TreeParser(strm)
        p.parse()
        rootNodes[basename] = p.root

    # Build document tree.
    builder = tree.TreeBuilder(rootNodes)
    builder.build()
    return builder.root

def walkDirs (fpaths):
    filepaths = []
    for fpath in fpaths:
        if os.path.isdir(fpath):
            # get all .xhp files recursively under this directory.
            for root, dirs, files in os.walk(fpath):
                for file in files:
                    if os.path.splitext(file)[1] == '.xhp':
                        filepaths.append(root + '/' + file)
        elif os.path.isfile(fpath):
            if os.path.splitext(fpath)[1] == '.xhp':
                filepaths.append(os.path.abspath(fpath))

    return filepaths

def parseAllXHPFiles (filepaths):
    filesParsed = 0
    rootNodes = {}
    embedNodes = {}
    for fpath in filepaths:
        file = open(fpath, 'r')
        strm = file.read()
        file.close()
        p = expatimpl.XHPParser(strm)
        p.parse()
        if p.filename != None:
            if p.filename[0] == '/':
                # Remove leading '/' if exists.  We do this because some of the 
                # file names don't begin with '/' while the majority of them do.
                # We need to make this consistent.
                p.filename = p.filename[1:]
            rootNodes[p.filename] = p.root
            embedNodes[p.filename] = p.ids
        filesParsed += 1
    return rootNodes, embedNodes, filesParsed

def main ():
    parser = optparse.OptionParser()
    parser.set_defaults(output=None)
    parser.add_option("-o", "--output", dest="output", help="write output to FILE", metavar="FILE")
    parser.add_option("-t", "--tree-dir", dest="tree_dir", help="Directory where the tree files are located.  Tree files are expected to have .tree extension.")
    parser.add_option("--debug-tree", action="store_true", dest="debug_tree",
        help="Output the tree structure for debugging purposes.", default=False)
    parser.add_option("--no-convert", action="store_false", dest="convert", 
        help="Don't convert to docbook but simply output the parsed raw xhp structure", default=True)
    options, args = parser.parse_args()
    if len(args) == 0:
        parser.print_help()
        sys.exit(1)

    if options.tree_dir == None:
        globals.error("Tree file directory is not provided.")
        parser.print_help()
        sys.exit(1)

    fd = sys.stdout
    if options.output != None:
        if os.path.isdir(options.output):
            globals.error("cannot create output file: " + optiont.output)
            sys.exit(1)
        fd = open(options.output, 'w')

    # Process the tree files first
    treeroot = processTreeFiles(options.tree_dir)
    if options.debug_tree:
        node.prettyPrint(fd, treeroot)
        fd.close()
        sys.exit(0)

    # walk all directories and collect all files.
    filepaths = walkDirs(args)


    xhproots, embeds, filesParsed = parseAllXHPFiles(filepaths)

    if options.convert:
        converter = docbook.DocBookConverter(treeroot, xhproots, embeds)
        converter.convert()
        converter.prettyPrint(fd)
    else:
        for filename in xhproots.keys():
            node.prettyPrint(fd, xhproots[filename])

    globals.info("%d files have been processed"%filesParsed)
    if fd != sys.stdout:
        fd.close()


if __name__ == '__main__':
    main()