#!/usr/bin/env perl #-*- Mode: perl; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- # XML printing, scanning and parsing. # # Copyright (C) 2000-2001 Ximian, Inc. # # Authors: Hans Petter Jansson # Arturo Espinosa # Kenneth Christiansen # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU Library General Public License as published # by the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Library General Public License for more details. # # You should have received a copy of the GNU Library General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. require "___scriptsdir___/util.pl"; require "___scriptsdir___/general.pl"; require "___scriptsdir___/debug.pl"; # --- XML print formatting --- # # &xst_xml_enter: Call after entering a block. Increases indent level. # &xst_xml_leave: Call before leaving a block. Decreases indent level. # &xst_xml_print_indent: Call before printing a line. Indents to current level. # &xst_xml_print_vspace: Ensures there is a vertical space of one and only one line. # &xst_xml_print: Indent, then print all arguments. Just for sugar. my $xst_indent_level = 0; my $xst_have_vspace = 0; my @xst_xml_stack; sub xst_xml_print_begin { my ($name) = @_; $name = $xst_name if !$name; &xst_xml_print_string ("\n"); &xst_xml_print_string ("\n\n"); &xst_xml_print_string ("<$name>\n"); &xst_xml_enter (); &xst_xml_print_vspace (); &xst_xml_print_line ("\n"); &xst_xml_print_vspace (); } sub xst_xml_print_request_end { &xst_xml_print_string ("\n\n"); } sub xst_xml_print_end { my ($name) = @_; $name = $xst_name if !$name; &xst_xml_print_vspace (); &xst_xml_print_line ("\n"); &xst_xml_print_vspace (); &xst_xml_leave (); &xst_xml_print_string ("\n"); } sub xst_xml_enter { $xst_indent_level += 2; } sub xst_xml_leave { $xst_indent_level -= 2; } sub xst_xml_print_string { print $_[0]; &xst_debug_print_string_to_file ("out.xml", $_[0]); } sub xst_xml_format_indent { $xst_have_vspace = 0; return " " x $xst_indent_level; } sub xst_xml_print_indent { &xst_xml_print_string(&xst_xml_format_indent ()); } sub xst_xml_print_vspace { if (not $xst_have_vspace) { &xst_xml_print_string ("\n"); $xst_have_vspace = 1; } } sub xst_xml_print_line { my $line; $line = join ("", @_); $line =~ tr/\n//d; &xst_xml_print_indent (); &xst_xml_print_string ($line . "\n"); } sub xst_xml_format_pcdata # (name, pcdata) { my ($name, $pcdata) = @_; if ($pcdata) { return "<$name>$pcdata"; # xst_xml_print_line ("<$name>$pcdata"); } } sub xst_xml_print_pcdata # (name, pcdata) { my ($name, $pcdata) = @_; my $line = xst_xml_format_pcdata ($name, $pcdata); &xst_xml_print_line ($line); } sub xst_xml_format_state_tag { my ($name, $state) = @_; my $boolean = xst_print_boolean_truefalse ($state); return "<$name state='$boolean'/>"; } sub xst_xml_print_state_tag { my ($name, $state) = @_; my $state_tag = xst_xml_format_state_tag ($name, $state); &xst_xml_print_line ($state_tag); } # Pass a hash and the keys whose items are scalars. Print val. sub xst_xml_print_scalars { my ($h, @scalar_keys) = @_; my ($i, $val); @scalar_keys = sort @scalar_keys; while ($i = shift @scalar_keys) { $val = &xst_xml_quote ($$h{$i}); &xst_xml_print_line ("<$i>$val\n") if exists $$h{$i}; } } # Pass a hash and the keys whose items are arrays. Print val foreach val # in the array at hash{key} sub xst_xml_print_arrays { my ($h, @array_keys) = @_; my ($i, $j, $val); foreach $i (sort @array_keys) { if (exists $$h{$i}) { &xst_xml_print_vspace (); foreach $j (@{$$h{$i}}) { $val = &xst_xml_quote ($j); &xst_xml_print_line ("<$i>$val\n"); } } } } # Pass a hash, create a parent tag $tag and print val for every # value pair in the hash. sub xst_xml_print_hash { my ($hash, $tag) = @_; my ($j, $val); &xst_xml_print_vspace (); &xst_xml_print_line ("<$tag>\n"); &xst_xml_enter (); foreach $j (sort keys (%$hash)) { $val = &xst_xml_quote ($$hash{$j}); &xst_xml_print_line ("<$j>$val\n"); } &xst_xml_leave (); &xst_xml_print_line ("\n"); } sub xst_xml_print_hash_hash { my ($h, $tag) = @_; my $i; foreach $i (sort keys %$h) { &xst_xml_print_hash ($$h{$i}, $tag); } } sub xst_xml_container_enter # (name) { my ($container) = @_; ## xst_xml_stack is not my, as it is defined at top, ## so it is global push @xst_xml_stack, $container; &xst_xml_print_line ("<$container>"); &xst_xml_enter(); } sub xst_xml_container_leave { ## checks if there is a start tag if ($#xst_xml_stack >= 0) { my $current_container = pop @xst_xml_stack; &xst_xml_leave (); &xst_xml_print_line (""); } } sub xst_xml_print_container # (name, @strings) { my ($name, @strings) = @_; if (@strings) { &xst_xml_container_enter ($name); foreach $tag (@strings) { &xst_xml_print_line ("$tag"); } &xst_xml_container_leave (); } } # --- XML printing from in-memory model --- # sub xst_xml_model_print_attributes { my ($tree) = @_; my ($attrs, $string); $attrs = @$tree [0]; for $attr (keys %$attrs) { $string .= " " . $attr . "=\"" . $$attrs{$attr} . "\""; } return $string; } sub xst_xml_model_print_recurse { my ($tree, $indent) = @_; my ($string); my @children = @$tree; shift @children; # Attributes while (@children) { my $branch = @children [1]; if (@children [0] eq "__unparsed__") { $string .= "<" . @children [1] . ">"; } elsif (@children [0] eq "0") { $string .= @children [1]; } elsif (@$branch == 1) # Empty tag. { $string .= "<" . @children [0] . &xst_xml_model_print_attributes ($branch) . "/>"; } else { $string .= "<" . @children [0] . &xst_xml_model_print_attributes ($branch) . ">"; $string .= &xst_xml_model_print_recurse ($branch); $string .= ""; } shift @children; shift @children; } # if ($branch) # { # return &xst_xml_get_attribute ($branch, $property) if $property ne ""; # return &xst_xml_get_pcdata ($branch); # } return $string; } sub xst_xml_model_print { my ($tree) = @_; my ($string); $string = &xst_xml_model_print_recurse ($tree); chomp $string; $string .= "\n"; return $string; } # --- XML scanning --- # # This code tries to replace XML::Parser scanning from stdin in tree mode. sub xst_xml_scan_make_kid_array { my %hash = {}; my @sublist; @attr = $_[0] =~ /[^\t\n\r ]+[\t\n\r ]*([a-zA-Z0-9_-]+)[ \t\n\r]*\=[ \t\n\r\"\']*([a-zA-Z0-9_\.\\\/-]+)/g; %hash = @attr; push (@sublist, \%hash); return \@sublist; } sub xst_xml_scan_recurse { my ($xst_xml_scan_list, $list_arg) = @_; my @list; if ($list_arg ne undef) { @list = $$list_arg[0]; } while (@$xst_xml_scan_list) { $el = shift @$xst_xml_scan_list; if (($el eq "") || $el =~ /^\<[!?].*\>$/s) { next; } # Empty strings, PI and DTD must go. if ($el =~ /^\<.*\/\>$/s) # Empty. { $el =~ /^\<([a-zA-Z0-9_-]+).*\/\>$/s; push (@list, $1); push (@list, &xst_xml_scan_make_kid_array ($el)); } elsif ($el =~ /^\<\/.*\>$/s) # End. { last; } elsif ($el =~ /^\<.*\>$/s) # Start. { $el =~ /^\<([a-zA-Z0-9_-]+).*\>$/s; push (@list, $1); $sublist = &xst_xml_scan_make_kid_array ($el); push (@list, &xst_xml_scan_recurse ($xst_xml_scan_list, $sublist)); next; } elsif ($el ne "") # PCDATA. { push (@list, 0); push (@list, "$el"); } } return \@list; } sub xst_xml_read_file { my ($file) = @_; my ($doc, $i); local *INPUT_FILE; open INPUT_FILE, $file; $doc .= $i while ($i = ); close INPUT_FILE; return $doc; } sub xst_xml_read_compressed_file { my ($file) = @_; my ($doc, $i, $gunzip); local *INPUT_FILE; $gunzip = &xst_file_locate_tool ("gunzip"); if (!$gunzip) { return undef; } open INPUT_FILE, "$gunzip -c $file 2>/dev/null |"; $doc .= $i while ($i = ); close INPUT_FILE; if (length ($doc) < 4) # Allow for one blank line from gzip, '\n\r'. { $doc = undef; } return $doc; } sub xst_xml_read_stdin { my ($i, $doc); while (! ($i =~ /^$/)) { $doc .= $i; $i = ; last if $i eq undef; } return $doc; } # (file, tool) If no file specified, reads stdin. # If tool is an xst_tool, stores the read buffer in # $$tool{"xml_doc"}. sub xst_xml_scan { my ($file, $tool) = @_; my ($doc, @tree, @xst_xml_scan_list); $file = $xst_input_file if $file eq undef; if ($file) { $doc = &xst_xml_read_file ($file); } else { $doc = &xst_xml_read_stdin (); } &xst_debug_print_log_to_file ("in.xml", $doc); $$tool{"xml_doc"} = $doc if (&xst_is_tool ($tool)); @xst_xml_scan_list = ($doc =~ /([^\<]*)(\<[^\>]*\>)[ \t\n\r]*/mg); # pcdata, tag, pcdata, tag, ... $tree = &xst_xml_scan_recurse (\@xst_xml_scan_list); return $tree; } # XML scanning that preserves more exact attributes of the scanned XML. sub xst_xml_model_scan_recurse { my @list; if (@_) { @list = $_[0]->[0]; } while (@xst_xml_scan_list) { $el = $xst_xml_scan_list[0]; shift @xst_xml_scan_list; if ($el eq "") # Empty strings. { next; } elsif ($el =~ /^\<[!?].*\>$/s) # PI and DTD. { $el =~ /^\<([^\>]+)\>$/s; push (@list, "__unparsed__"); push (@list, $1); } elsif ($el =~ /^\<.*\/\>$/s) # Empty. { $el =~ /^\<([a-zA-Z0-9_-]+).*\/\>$/s; push (@list, $1); push (@list, &xst_xml_scan_make_kid_array ($el)); } elsif ($el =~ /^\<\/.*\>$/s) # End. { last; } elsif ($el =~ /^\<.*\>$/s) # Start. { $el =~ /^\<([a-zA-Z0-9_-]+).*\>$/s; push (@list, $1); $sublist = &xst_xml_scan_make_kid_array ($el); push (@list, &xst_xml_model_scan_recurse ($sublist)); next; } elsif ($el ne "") # PCDATA. { push (@list, 0); push (@list, "$el"); } } return \@list; } sub xst_xml_model_scan # (file) If no file specified, reads stdin. { my ($file) = @_; my ($doc, $tree, $compressed); $file = $xst_input_file if $file eq undef; if ($file) { $doc = &xst_xml_read_compressed_file ($file); if (!$doc) { $doc = &xst_xml_read_file ($file); $compressed = 0; } else { $compressed = 1; } } else { return undef, 0; } @xst_xml_scan_list = ($doc =~ /([^\<]*)(\<[^\>]*\>)/mg); # pcdata, tag, pcdata, tag, ... $tree = &xst_xml_model_scan_recurse; return $tree, $compressed; } sub xst_xml_model_save { my ($model, $file, $compressed) = @_; my $fd; if ($compressed == 1) { $fd = &xst_file_open_write_compressed ($file); } else { $fd = &xst_file_open_write_from_names ($file); } if ($fd == -1) { return -1; } print $fd &xst_xml_model_print ($model); close $fd; return 0; } # Quote/unquote. @xst_xml_entities = ( "<", '<', ">", '>', "'", '\'', """, '"', "&", '&' ); sub xst_xml_quote { my $in = $_[0]; my $out = ""; my @xe; my $joined = 0; my @clist = split (//, $in); while (@clist) { # Find character and join its entity equivalent. # If none found, simply join the character. $joined = 0; # Cumbersome. for (@xe = @xst_xml_entities; @xe && !$joined; ) { if ($xe [1] eq $clist [0]) { $out = join ('', $out, $xe [0]); $joined = 1; } shift @xe; shift @xe; } if (!$joined) { $out = join ('', $out, $clist [0]); } shift @clist; } return $out; } sub xst_xml_unquote { my $ret = $_[0]; my $i; #print STDERR "INI U: $ret\n"; for ($i = 0; $xst_xml_entities[$i] ne undef; $i += 2) { $ret =~ s/$xst_xml_entities[$i]/$xst_xml_entities[$i + 1]/g; } #print STDERR "END U: $ret\n"; return $ret; } # --- XML parsing --- # sub xst_xml_get_pcdata { my $tree = $_[0]; my $retval; shift @$tree; # Skip attributes. while (@$tree) { if ($$tree[0] == 0) { $retval = &xst_xml_unquote ($$tree[1]); &xst_debug_print_line ("xst_xml_get_pcdata: $retval"); return ($retval); } shift @$tree; shift @$tree; } return ""; } # Compresses node into a word and returns it. sub xst_xml_get_word { my $tree = $_[0]; my $retval; $retval = &xst_xml_get_pcdata ($tree); $retval =~ tr/ \n\r\t\f//d; return $retval; } # Compresses node into a size and returns it. sub xst_xml_get_size { my $tree = $_[0]; my $retval; $retval = &xst_xml_get_word ($tree); if ($retval =~ /Mb$/) { $retval =~ tr/ Mb//d; $retval *= 1024; } return $retval; } # Replaces misc. whitespace with spaces and returns text. sub xst_xml_get_text { my $tree = $_[0]; my $retval; $retval = &xst_xml_get_pcdata ($tree); my $type = ref ($retval); if (!$type) { $retval =~ tr/\n\r\t\f/ /; } return $retval; } sub xst_xml_get_attribute { my ($tree, $attr) = @_; return $$tree[0]->{$attr}; } sub xst_xml_get_state { my ($tree) = @_; # Check attribute; 'yes', 'true', 'no', 'false'. return &xst_read_boolean ($$tree[0]->{state}); } # XML model operations. # Locate a node from the branch leading up to it. sub xst_xml_model_find { my ($model, $varpath) = @_; my ($branch, @path); $branch = $model; @path = split /\//, $varpath; for $elem (@path) { next if ($elem eq ""); my @children = @$branch; shift @children; # Attributes $branch = undef; while (@children) { if (@children [0] eq $elem) { shift @children; $branch = shift @children; last; } shift @children; shift @children; } last if ($branch == undef); } return $branch; } # Add a branch to another branch. Allows duplicates. sub xst_xml_model_add { my ($model, $varpath, $addpath) = @_; my ($branch, @path); @path = split /\//, $addpath; $branch = &xst_xml_model_find ($model, $varpath); if ($branch == undef) { return -1; } for $elem (@path) { my %hash; my @list = (); push @list, \%hash; push @$branch, $elem; push @$branch, \@list; $branch = \@list; } return 0; } # Ensure a branch exists, by extending the branch with given elements, if needed. sub xst_xml_model_ensure { my ($model, $varpath) = @_; my ($branch, @path); $branch = $model; @path = split /\//, $varpath; for $elem (@path) { next if ($elem eq ""); my @children = @$branch; my $parent_branch = $branch; shift @children; # Attributes $branch = undef; while (@children) { if (@children [0] eq $elem) { shift @children; $branch = shift @children; last; } shift @children; shift @children; } if ($branch == undef) { my %hash; my @list = (); $branch = \@list; push @list, \%hash; push @$parent_branch, $elem; push @$parent_branch, $branch; } } return $branch; } sub xst_xml_model_remove { my ($model, $varpath, $tag) = @_; my ($branch, $i); @path = split /\//, $addpath; $branch = &xst_xml_model_find ($model, $varpath); if ($branch == undef) { return -1; } for ($i = 1; $i < @$branch; $i += 2) { if (@$branch [$i] eq $tag) { @$branch = (@$branch [0 .. $i - 1], @$branch [$i + 2 .. @$branch - 1]); return 0; } } return -1; } sub xst_xml_model_get_children { my ($branch) = @_; my (@children); if (!$branch) { return \@children; } for ($i = 1; $i < @$branch; $i += 2) { if (@$branch [$i] ne "__unparsed__" && @$branch [$i] ne "0") { push @children, @$branch [$i + 1]; } } return \@children; } sub xst_xml_model_get_pcdata { my ($branch) = @_; my ($i); for ($i = 1; $i < @$branch; $i += 2) { if ($$branch [$i] == 0) { my $retval = &xst_xml_unquote ($$branch [$i + 1]); return ($retval); } } return ""; } sub xst_xml_model_set_pcdata { my ($branch, $pcdata) = @_; @$branch = (@$branch [0]); $$branch [1] = 0; $$branch [2] = &xst_xml_quote ($pcdata); } sub xst_xml_model_get_attribute { my ($branch, $attr) = @_; return $$branch[0]->{$attr}; } sub xst_xml_model_set_attribute { my ($branch, $attr, $value) = @_; return $$branch[0]->{$attr} = $value; } 1;