#!/usr/bin/perl
# HTML-to-XML conversion script for AppleScript reference material
# Ron Hale-Evans, a-rhale
# Usage: h2x C:\MyAppleScriptDirectory
# (checks subdirectories)

use warnings;
use diagnostics;
use File::Find;

@ARGV = qw(.) unless @ARGV;

sub set_defaults
{
# These sections are optional; init them with a single whitespace character to prevent warnings
$properties = "\n";
$elements = "\n";
$parameters = "\n";

$header = <<HEADEND;
<!--HEADER-->
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="C:/docstudio/SupportFiles/maml_html_custom.xsl"?>
<conceptual xsi:schemaLocation="http://schemas.microsoft.com/maml/2004/10 
file:///C:/Program%20Files/Microsoft%20DocStudio%202005/MAML/Maml.xsd" 
xml:lang="en-US" 
contentType="macHelp" 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
xmlns="http://schemas.microsoft.com/maml/2004/10" 
xmlns:dev="http://schemas.microsoft.com/maml/dev/2004/10" 
xmlns:command="http://schemas.microsoft.com/maml/dev/command/2004/10">
HEADEND

$body = <<BODYEND;
<!--BODY-->
<para>
<commandInline>
<?xm-replace_text Required class title?>
</commandInline>
</para>
<para>
<?xm-replace_text Required intro paragraph?>
</para>
<para>
<?xm-replace_text Optional paragraph (0-n)?>
</para>
BODYEND

$example = <<EXAMPEND;
<!--EXAMPLE-->
<section>
<title>Example</title>
<para>
<?xm-replace_text Paragraph (1-n)?>
</para>
<dev:code>
<?xm-replace_text Code block?>
</dev:code>
</section>
EXAMPEND

$notes = <<NOTESEND;
<!--NOTES-->
<section>
<title>Notes</title>
<para>
<?xm-replace_text Notes paragraph (0-1)?>
</para>
<dev:code>
<?xm-replace_text Code block (0-1)?>
</dev:code>
<para>
<embedObject>
<objectUri href="eead9dd1-9dc9-412c-9661-45ab954fcdc5" mimeType="image/gif"/>
</embedObject>
</para>
<alertSet class="note">
<alert>
<para>
<?xm-replace_text Enter para text of alert here (0-1)?>
</para>
</alert>
</alertSet>
</section>
NOTESEND

$notestail = <<NOTESTAILEND;
<para>
<embedObject>
<objectUri href="eead9dd1-9dc9-412c-9661-45ab954fcdc5" mimeType="image/gif"/>
</embedObject>
</para>
<alertSet class="note">
<alert>
<para>
<?xm-replace_text Enter para text of alert here (0-1)?>
</para>
</alert>
</alertSet>
</section>
NOTESTAILEND

$footer = <<FOOTEND;
<!--FOOTER-->
</sections> 
</content> 
<relatedLinks> 
<navigationLink> 
<linkText>
TBD
</linkText>
<uri href="DocStudioGUID"/>
</navigationLink> 
<navigationLink> 
<linkText>
TBD
</linkText>
<uri href="DocStudioGUID"/> 
</navigationLink> 
</relatedLinks>
</conceptual>
FOOTEND

$syntax = <<SYNTAXEND;
<!--SYNTAX-->
<section>
<title>Syntax
</title>
<dev:code>
<?xm-replace_text Code Block (1-n)?>
</dev:code>
</section>
SYNTAXEND

$errors = <<ERRSEND;
<!--ERRORS-->
<section>
<title>Errors
</title>
</section>
ERRSEND

$result = <<RESULTEND;
<!--RESULT-->
<section>
<title>Result
</title>
<para>
<?xm-replace_text Enter para text here (1-n)?>
</para>
<para>
<embedObject>
<objectUri href="eead9dd1-9dc9-412c-9661-45ab954fcdc5" mimeType="image/gif"/>
</embedObject>
</para>
</section>
RESULTEND

$default_result = $result;

$proptable = <<PROPTABLEEND;
<table>
<row> 
<headerEntry> 
<para>
Name
</para>
</headerEntry> 
<headerEntry> 
<para>
Description
</para>
</headerEntry> 
</row>
PROPTABLEEND

$elemtable = <<ELEMTABLEEND;
<table>
<row>
<headerEntry>
<para> 
Name
</para>
</headerEntry>
<headerEntry>
<para>
Class
</para>
</headerEntry>
<headerEntry>
<para>
Description
</para>
</headerEntry>
</row>
ELEMTABLEEND

$paramtable = <<PARAMTABLEEND;
<title>Parameters
</title>
<!--Table can have 1-n rows.-->
<table>
<row>
<headerEntry>
<para>Parameter
</para>
</headerEntry>
<headerEntry>
<para>Description
</para>
</headerEntry>
<headerEntry>
<para>Class
</para>
</headerEntry>
<headerEntry>
<para>Default value
</para>
</headerEntry>
</row>
PARAMTABLEEND
}

sub stub_tables
  {
    $et =~ s/<table.*?<\/table>/<para>\[TABLE GOES HERE\]<\/para>/sgi;
  }



sub grab_proto_sections
{
    # The following XML sections have no direct equivalent in HTML and are not searched for:
    # Footer, Errors

    if ($et =~ m/(.*<\/H1>)/si)
    {
	$hheader = "<!--HEADER-->\n$1\n\n"; #HTML header
	$hheader =~ m/(<H1.*?\/A>)(\w*\:\s*)(.*?)(<\/H1>)/si;
	$objName = $3;
	if (!$objName)
	{
	    $objName = "UNKNOWN";
	}
	$objName =~ s/\s+/ /sgi;
	$header = "$header\n<title>$objName</title>\n<content>\n\n";
    }

    if ($et =~ m/(<H1.*?)(<P class=["]*T["]*>[A-Z].*?)(<P class=["]*RL["]*>)/si)
    {
	$body = "<!--BODY-->\n<para>\n<commandInline>$objName</commandInline>\n</para>\n$2\n\n";
    }

    if ($et =~ m/(<P class=["]*RL["]*>Properties<\/P>.*<\/P>)/si)
    {
	$properties = "<!--PROPERTIES-->\n$1\n</section>\n\n";
    }

    if ($et =~ m/(<P class=["]*RL["]*>Elements<\/P>)(.*)(<P class=["]*T["]*><B>.*?<\/B><\/P>\s*)(<P\s*class=["]*RL["]*>)/si)
    {
	$elements = "<!--ELEMENTS-->\n$1\n$elemtable\n</section>\n\n";
    }

    if ($et =~ m/(<P class=["]*RL[NSR]*["]*><B>Example<\/B><\/P>.*<\/CODE><\/PRE>)/si)
    {
	$example = "<!--EXAMPLE-->\n$1\n</section>\n\n";
    }

    if ($et =~ m/(<P class=["]*RL["]*><B>Remarks<\/B><\/P>.*?)(<P class=["]*RL["]*>)/si)
    {
	$notes = "<!--NOTES-->\n$1\n$notestail\n\n";
    }

    if ($et =~ m/(<P class=["]*RL[NSR]*["]*><B>Syntax<\/B><\/P>.*?)(<P class=["]*SYN["]*><B>.*?<\/B>)/si)
    {
	$syntax = "<!--SYNTAX-->\n$1$2\n</section>\n\n";
    }

    if ($et =~ m/(<P class=["]*DT1["]*>.*?)(<P class=["]*RL[NSR]*["]*)/si)
    {
	$parameters = "<!--PARAMETERS-->\n$paramtable\n$1\n</section>\n\n";
    }

    if ($et =~ m/(<P class=["]*SYN["]*>.*?<\/P>)/si)
    {
	$result = $1;
	$result =~ s/&nbsp;//sgi;
	$result =~ s/<P class=["]*SYN["]*><B>.*?<\/B>\s*(.*?)(Required|Optional).*/$1/si;
    }
}


sub convert_properties
{
    $properties =~ s/<B>Properties<\/B>/Properties/sgi;
    $properties =~ s/<P\s*class=["]*RL["]*>Properties<\/P>/\n<section>\n<title>Properties<\/title>\n$proptable/sgi;
    $properties =~ s/<P.*?><I>(.*?)<\/I><\/P>\s*<P\s*class\=["]*ind["]*>(.*?)<\/P>/
	<row>
	<entry>
	<para>
	<codeInline>$1<\/codeInline>
        <\/para>
	<\/entry>
	<entry>
	<para>$2<\/para>
	<\/entry>
	<\/row>/sgix;
    $properties =~ s/(<P.*?>\&lt\;<I>)(Inheritance)(<\/I>\&gt\;)(.*?)<\/P>\s*<P\s*class=["]*ind["]*>(.*?)<\/P>/
	<row>
	<entry>
	<para>
	<codeInline>$2<\/codeInline>
	<\/para>
	<\/entry>
	<entry>
	<para>$5<\/para>
	<\/entry>
	<\/row>/sgix;
}

sub convert_elements
  {
    $elements =~ s/<P class=["]*RL["]*>Elements<\/P>/<section>\n<title>Elements<\/title>/si;
    $elements =~ s/(<P class=["]*T["]*><B>)([\w\s]*?)(<\/B><\/P>)/
        <row>
	<entry>
	<para>
	<codeInline>$2<\/codeInline>
	<\/para>
	<\/entry>
	<entry>
	<para>TBD<\/para>
	<\/entry>
	<entry>
	<para>TBD<\/para>
	<\/entry>
	<\/row>/sgix;
    print "$elements\n";
  }


sub convert_parameters
  {
    $parameters =~ s/&nbsp;//sgi;
    $parameters =~ s/<P\s*class\=dt1>\s*([\[]*\s*<B>.*?<\/B>)(.*?)<\/P>/
        <row>
	<entry>
	<para>
	<codeInline>$1<\/codeInline>
	<\/para>
	<\/entry>
	<entry>
	<para>$2<\/para>
	<\/entry>
	<entry>
	<para>TBD<\/para>
	<\/entry>
	<entry>
	<para>TBD<\/para>
	<\/entry>
	<\/row>/sgix;
}

sub convert_example
  {
    $example =~ s/(<P class\=["]*RL\w*["]*>\s*<B>Example<\/B>\s*<\/P>\s*<P class\=["]*T["]*>)(.*)(<\/P>\s*<PRE class\=["]*CT["]*>\s*<CODE>)(.*)(<\/CODE>\s*<\/PRE>)/<section>\n<title>Example<\/title><para>$2<\/para><dev:code>$4<\/dev:code><\/section>/sgi;
  }

sub convert_notes
  {
    $notes =~ s/(<P class=["]*RL["]*><B>Remarks<\/B><\/P>)(.*?)/<section>\n<title>Notes<\/title>\n$2\n<\/section>/sgi;
    $notes =~ s/<section>\s*<title>Notes<\/title>\s*<\/section>/<section>\n<title>Notes<\/title>\n/sgi;
  }

sub convert_syntax
  {
    $syntax =~ s/<P class=["]*RL["]*><B>Syntax<\/B><\/P>/<section><title>Syntax<\/title>/sgi;
    $syntax =~ s/(<P class=["]*SYN["]*><B>)(.*?)(<\/B>)/<dev:code>$2<\/dev:code><\/section>/sgi;
  }

sub convert_result
  {
    $default_result =~ s/<\?xm-replace_text Enter para text here \(1-n\)\?>/$result/sgi;
    $result = $default_result;
  }

sub convert_proto_sections
{
    if ($fileType == 1)
    {
	&convert_properties;
	&convert_elements;
	&convert_example;
	&convert_notes;
    } else
    {
	&convert_syntax;
	&convert_parameters;
	&convert_result;
	&convert_example;
	&convert_notes;
    } 
}

sub concatenate_sections
{
    if ($fileType == 1)
    {
	$outText = "$header\n$body\n<sections>\n\n$properties\n$elements\n$example\n$notes\n$footer";
    }
    else # it's a command
    {
	$outText = "$header\n$body\n<sections>\n\n$syntax\n$parameters\n$result\n$example\n$notes\n$errors\n$footer";
    }
}


sub insert_guid
  {
    $navlink = $_[0];
    $navlink =~ m/(<navigationLink>\s*<linkText>)(.*?)(<\/linkText>\s*<uri href=")(.*?)("\/>\s*<\/navigationLink>)/sgi;
    $link1 = $1;
    $link2 = $2;
    $link3 = $3;
    $link4 = $4;
    $link5 = $5;

    $link2 =~ s/<.*?>//sgi;
    $link2 =~ s/\s+/ /sgi;
    $link2 =~ s/^\s+//;
    $link2 =~ s/\s+$//;

    if ($appCell eq "AS_Excel")
      {
	if (exists($excel{$link2})) 
	  {
	    $link4 = "$excel{$link2}";
	  }
      }
    elsif ($appCell eq "AS_Word")
      {
	if (exists($word{$link2})) 
	  {
	    $link4 = "$word{$link2}";
	  }
      }

    if (length($link4)==1)
      {
	$link4 = "ffffffff-ffff-ffff-ffff-fffffffffff" . $link4;
      }
    elsif (length($link4)==2)
      {
	$link4 = "ffffffff-ffff-ffff-ffff-ffffffffff" . $link4;
      }
    elsif (length($link4)==3)
      {
	$link4 = "ffffffff-ffff-ffff-ffff-fffffffff" . $link4;
      }
    elsif (length($link4)==4)
      {
	$link4 = "ffffffff-ffff-ffff-ffff-ffffffff" . $link4;
      }
    else 
      {
	$link4 = "eeeeeeee-eeee-eeee-eeee-eeeeeeeeeeee"; 
      }
    $newnavlink = "$link1$link2$link3$link4$link5";
    return $newnavlink;
  }


sub grab_data_for_spreadsheet
{
    if ($in_file =~ /xlmth/i)
    {
	$appCell = "AS_Excel";
	$typeCell = "AppleScript_Command_Reference";
	$fileType = 2;
    }
    elsif ($in_file =~ /xlobj/i)
    {
	$appCell = "AS_Excel";
	$typeCell = "AppleScript_Class_Reference";
	$fileType = 1;
    }
    elsif ($in_file =~ /womth/i)
    {
	$appCell = "AS_Word";
	$typeCell = "AppleScript_Command_Reference";
	$fileType = 2;
    }
    elsif ($in_file =~ /woobj/i)
    {
	$appCell = "AS_Word";
	$typeCell = "AppleScript_Class_Reference";
	$fileType = 1;
    }
    else
    {
	$appCell = "UNKNOWN";
	$typeCell = "UNKNOWN";
	$fileType = 0;
	print "$in_file not a class or command\n";
    }
}

sub grab_guids
{
    $app = $_[0];
    $guidfile = "$app.csv";

    open (GUID_FILE, "< ./$guidfile")
	or die "Couldn't open GUID file: $!\n";

    while ($line = <GUID_FILE>)
    {
	$line =~ m/(.*?),(.*?),(.*?)$/;
	if ($app eq "excel")
	{
	    $excel{$2} = $3;
	    print "$2 = $excel{$2}\n";
	}
	elsif ($app eq "word")
	{
	    $word{$2} = $3;
	    print "$2 = $word{$2}\n";
	}
    }
    close GUID_FILE;
}


sub convert_tags
{
    # HTML tags to XML equivalents
    $outText =~ s/<p>/<para>/sgi;
    $outText =~ s/<P class\=["]*\w*["]*>/<para>/sgi;
    $outText =~ s/<\/p>/<\/para>/sgi;

    # Fix 
    $outText =~ s/<\/para>\s*<section>/<\/para><\/section><section>/sgi;
    $outText =~ s/(<\/row>)(.*?)(<row>)/$1\n\n$2/sgi;
    $outText =~ s/<\/row>\s*<entry>/<\/row>\n<row>\n<entry>/sgi;
    $outText =~ s/(<\/entry>\s*<\/row>\s*)(<\/section>)/$1<\/table>\n$2/sgi;
    $outText =~ s/(<\/section>)(\s*<\/section>)/$1/sgi;
    $outText =~ s/<table>\s*<headerEntry>/<table>\n<row>\n<headerEntry>/sgi;
    $outText =~ s/<\/row>\s*<\/para>\s*<\/entry>\s*<\/row>/<\/row>/sgi;
    $outText =~ s/(<\/entry>\s*<\/row>)(.*?)(<row>)/$2$1\n\n$3/sgi;
    $outText =~ s/<\/entry>/<\/para><\/entry>/sgi;
    $outText =~ s/<\/para>\s*<\/table>/<\/para>\n<\/entry>\n<\/row>\n<\/table>/sgi;
    $outText =~ s/<\/para>\s*<\/para>/<\/para>/sgi;
    $outText =~ s/(<\/para>\s*<\/entry>\s*<\/row>)\s*<\/para>\s*<\/entry>\s*<\/row>/$1/sgi;
    $outText =~ s/(<\/para>)\s*(<entry>)/$1\n<\/entry>\n$2/sgi;
    $outText =~ s/(<table>)\s*<\/para>\s*<\/entry>\s*<\/row>\s*(<row>)/$1$2/sgi;

    # Lists, images: TBD
    $outText =~ s/<UL.*?\/UL>/\<para>List goes here!<\/para>/sgi;
    $outText =~ s/<IMG.*?>/ TBD:IMAGE /sgi;

    # The XML version doesn't use special fonts
    $outText =~ s/<FONT.*?>(.*?)<\/FONT>/$1/sgi;

    # Special characters, etc.
    $outText =~ s//&#0172;/sgi; # line continuation character
    $outText =~ s//'/sgi;    # apostrophe
    $outText =~ s//"/sgi;    # left double quote
    $outText =~ s//"/sgi;    # right double quote
    $outText =~ s//--/sgi;   # em dash
    $outText =~ s//--/sgi;   # em dash
    $outText =~ s/&nbsp;//sgi;   # non-breakable space
    $outText =~ s/<br>//sgi;     # hard line break
    $outText =~ s//&lt;&lt;/sgi;     # left guillemets
    $outText =~ s//&gt;&gt;/sgi;     # right guillemets

    # AppleScript keywords
    $outText =~ s/<B>(.*?)<\/B>/<codeInline>$1<\/codeInline>/sgi;
    $outText =~ s/<I>(.*?)<\/I>/<codeInline>$1<\/codeInline>/sgi;
    $outText =~ s/<I>/<codeInline>/sgi;

    # Code examples
    $outText =~ s/<PRE class=["]*CT["]*><CODE>(.*?)<\/CODE><\/PRE>/<dev:code>$1<\/dev:code>/sgi;
    $outText =~ s/<P class=["]*T["]*><CODE>(.*?)<\/CODE><\/P>/<dev:code>$1<\/dev:code>/sgi;
    $outText =~ s/<PRE class=["]*CT["]*>(.*?)<\/PRE>/<dev:code>$1<\/dev:code>/sgi;
    $outText =~ s/<CODE>/<codeInline>/sgi;
    $outText =~ s/<\/CODE>/<\/codeInline>/sgi;
    $outText =~ s/<dev:code>\s*<codeInline>/<dev:code>/sgi;
    $outText =~ s/<\/codeInline>\s*<\/dev:code>/<\/dev:code>/sgi;
    $outText =~ s/(<linkText>)\s*<codeInline>(.*?)<\/codeInline>\s*(<\/linkText>)/$1$2$3/sgi;

    # This recondite kluge will go away eventually
    $outText =~ s/(<title>Properties\s*<\/title>\s*)<table>\s*<row>\s*<headerEntry>/$1$proptable/sgi;

    # Hyperlinks
    $outText =~ s/<A\s*HREF=\"(.*?)\">(.*?)<\/A>/<navigationLink>\n<linkText>\n$2\n<\/linkText>\n<uri href="eeeeeeee-eeee-eeee-eeee-eeeeeeeeeeee"\/>\n<\/navigationLink>/sgi;
    $outText =~ s/(<navigationLink>.*?<\/navigationLink>)/&insert_guid($1)/esgi;

    # More fixes
    $outText =~ s/<\/B>//sgi;
    $outText =~ s/<\/I>//sgi;
    $outText =~ s/<para>\s*<para>/<para>/sgi;
    $outText =~ s/<!--.*?-->//sgi;
    $outText =~ s/(<para>\s*<codeInline>[^<>]*?)(<\/para>)/$1\n<\/codeInline>\n$2/sgi;
    $outText =~ s/<B>/<codeInline>/sgi;
    $outText =~ s/(<codeInline>)\s*(<navigationLink>)\s*(.*?)\s*(<\/navigationLink>)\s*(<\/codeInline>)/$2\n$3\n$4\n)/sgi;
    $outText =~ s/<\/codeInline>\s*<codeInline>//sgi;
    $outText =~ s/(<para>)([^<>]*?)(<codeInline>)([^<>]*?)(<\/para>)/$1\n$2\n$3\n$4<\/codeInline>\n$5/sgi;
    $outText =~ s/<para>\s*<codeInline>Syntax\s*<\/codeInline>\s*<\/para>/<section>\n<title>Syntax\n<\/title>/sgi;
    $outText =~ s/<codeInline>\s*<codeInline>/<codeInline>/sgi;
    $outText =~ s/<\/codeInline>\s*<\/codeInline>/<\/codeInline>/sgi;
    $outText =~ s/(<para>)([^<>]*?)(<para>)/$1$2<\/para>\n$3/sgi;
    $outText =~ s/<PRE\s.*?>(.*)<\/PRE>/$1/sgi;
    $outText =~ s/<\/section>\s*<title>/<\/section><section><title>/sgi;
    $outText =~ s/<row>\s*<entry>\s*<para>\s*<codeInline>Note\s*<\/codeInline>(.*?)<\/para>\s*<\/section>/
      <row>
      <entry>
      <para>
      <codeInline>Note<\/codeInline>\ $1
      <\/para>
      <\/entry>
      <\/row>
      <\/section>/sgix;
    $outText =~ s/<\/para>\s*<\/entry>\s*<\/row>\s*<para>/<\/para><para>/sgi;
    $outText =~ s/(<codeInline>[^<>]*?)(<codeInline>)/$1/sgi;
    $outText =~ s/(<\/para>\s*)(<section>\s*<title>Syntax\s*<\/title>)/$1\n<sections>\n$2/sgi;
    $outText =~ s/(<sections>)(.*?)(<sections>)/$1$2/sgi;
    $outText =~ s/(<\/para>\s*)(<section>\s*<title>Syntax\s*<\/title>)/$1/sgi;
    $outText =~ s/(<section>\s*<title>Result\s*<\/title>\s*<para>\s*)(<section>\s*<title>Result\s*<\/title>\s*<para>)/$1/sgi;
    $outText =~ s/(<\/para>\s*)(<\/section>\s*<section>\s*<title>Result)/$1<\/entry><\/row><\/table>$2/sgi;


    # Close broken paragraphs and clean up afterwards
    $outText =~ s/<para>/<\/para>\n<para>/sgi;
    $outText =~ s/(<\/para>\s*)<\/para>/$1/sgi;
    $outText =~ s/(<content>\s*)<\/para>/$1/sgi;
    $outText =~ s/(<\/title>\s*)<\/para>/$1/sgi;
    $outText =~ s/(<\/dev:code>\s*)<\/para>/$1/sgi;
    $outText =~ s/(<alert>\s*)<\/para>/$1/sgi;
    $outText =~ s/(<headerEntry>\s*)<\/para>/$1/sgi;
    $outText =~ s/(<entry>\s*)<\/para>/$1/sgi;
    $outText =~ s/(<table>\s*)<\/para>/$1/sgi;
    $outText =~ s/(<\/row>\s*)<\/para>/$1/sgi;
    $outText =~ s/(<\/section>\s*)<\/para>/$1/sgi;
    $outText =~ s/<\/row>\s*<para>/<\/row>\n<row>\n<entry>\n<para>/sgi;

    # Close broken entries, likewise
    $outText =~ s/<entry>/<\/entry>\n<entry>/sgi;
    $outText =~ s/(<\/entry>\s*)<\/entry>/$1/sgi;
    $outText =~ s/(<row>\s*)<\/entry>/$1/sgi;

    # Tidy
    $outText =~ s/\s*</\n</sgi;
    $outText =~ s/\n\n/\n/sgi;
    $outText =~ s/\s*<!--HEADER-->\s*//sgi;
    $outText =~ s/(<!--)/\n$1/sgi;
    $outText =~ s/(<row>)/\n$1/sgi;
    $outText =~ s/^\s*//si;
  }


sub process_file 
{
	if (($_ =~ /\.htm$/i) || ($_ =~ /\.html$/i))
	{
	    print "Processing $_...\n";
	    $in_file = $_;
	    open (IN_FILE, "< $in_file")
		or die "Couldn't open input file: $!\n";

	    # Read in whole file as one big chunk
	    $temp = $/;
	    undef $/;
	    $et = <IN_FILE>;
	    $/ = $temp;

	    &grab_data_for_spreadsheet;
	    if ($fileType)
	    {
		&set_defaults;
	        &stub_tables;
		&grab_proto_sections;
		&convert_proto_sections;
		&concatenate_sections;
		&convert_tags;

		print SSOUT "$appCell,$typeCell,$objName\n";

		$out_file = $in_file;
		$out_file =~ s/\.htm/\.xml/i;
		if ($appCell eq "AS_Excel")
		{
		    if (exists($excel{$objName})) 
			{
			    $out_file = "$excel{$objName}.xml";
			}
		}
		elsif ($appCell eq "AS_Word")
		{
		    if (exists($word{$objName})) 
			{
			    $out_file = "$word{$objName}.xml";
			}
		}
		open (OUT_FILE, "> $out_file")
		    or die "Couldn't open output file: $!\n";
		print "Writing to $out_file\n\n";
		print OUT_FILE "$outText";
		close OUT_FILE;
	    }
	    close IN_FILE;
	}
}

&grab_guids("excel");
&grab_guids("word");

open (SSOUT, "> ssout.csv")
    or die "Couldn't open output spreadsheet: $!\n";

find(\&process_file, @ARGV);

close SSOUT;
