#!/usr/bin/perl -w
# -*- cperl -*-
#
# gtk-doc - GTK DocBook documentation generator.
# Copyright (C) 1998  Damon Chaplin
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#

#############################################################################
# Script      : gtkdoc-scan
# Description : Extracts declarations of functions, macros, enums, structs
#                and unions from header files.
#
#                It is called with a module name, an optional source directory,
#                an optional output directory, and the header files to scan.
#
#                It outputs all declarations found to a file named
#                '$MODULE-decl.txt', and the list of decarations to another
#                file '$MODULE-decl-list.txt'.
#
#                This second list file is typically copied to
#                '$MODULE-sections.txt' and organized into sections ready to
#                output the SGML pages.
#############################################################################

use strict;
use Getopt::Long;
use Cwd qw(realpath);

push @INC, '/usr/local/share/gtk-doc/data';
require "gtkdoc-common.pl";

# Options

# name of documentation module
my $MODULE;
my $OUTPUT_DIR;
my @SOURCE_DIRS;
my $IGNORE_HEADERS = "";
my $REBUILD_TYPES;
my $REBUILD_SECTIONS;
my $PRINT_VERSION;
my $PRINT_HELP;
# regexp matching cpp symbols which surround deprecated stuff
# e.g. 'GTK_ENABLE_BROKEN|GTK_DISABLE_DEPRECATED'
# these are detected if they are used as #if FOO, #ifndef FOO, or #ifdef FOO
my $DEPRECATED_GUARDS;
# regexp matching decorators that should be ignored
my $IGNORE_DECORATORS;

my @get_types = ();

# do not read files twice; checking it here permits to give both srcdir and
# builddir as --source-dir without fear of duplicities
my %seen_headers;


run() unless caller; # Run program unless loaded as a module


sub run {
    my %optctl = (module => \$MODULE,
                  'source-dir' => \@SOURCE_DIRS,
                  'ignore-headers' => \$IGNORE_HEADERS,
                  'output-dir' => \$OUTPUT_DIR,
                  'rebuild-types' => \$REBUILD_TYPES,
                  'rebuild-sections' => \$REBUILD_SECTIONS,
                  'version' => \$PRINT_VERSION,
                  'help' => \$PRINT_HELP,
                  'deprecated-guards' => \$DEPRECATED_GUARDS,
                  'ignore-decorators' => \$IGNORE_DECORATORS);
    GetOptions(\%optctl, "module=s", "source-dir:s", "ignore-headers:s",
               "output-dir:s", "rebuild-types", "rebuild-sections", "version",
               "help", "deprecated-guards:s", "ignore-decorators:s");
    
    if ($PRINT_VERSION) {
        print "1.25\n";
        exit 0;
    }
    
    if (!$MODULE) {
        $PRINT_HELP = 1;
    }
    
    if ($PRINT_HELP) {
        print <<EOF;
gtkdoc-scan version 1.25 - scan header files for public symbols

--module=MODULE_NAME       Name of the doc module being parsed
--source-dir=DIRNAME       Directories containing the source files to scan
--ignore-headers=FILES     A space-separated list of header files/dirs not to
                           scan
--output-dir=DIRNAME       The directory where the results are stored
--deprecated-guards=GUARDS A |-separated list of symbols used as deprecation
                           guards
--ignore-decorators=DECS   A |-separated list of addition decorators in
                           declarations that should be ignored
--rebuild-sections         Rebuild (overwrite) the MODULE-sections.txt file
--rebuild-types            Automatically recreate the MODULE.types file using
                           all the *_get_type() functions found
--version                  Print the version of this program
--help                     Print this help
EOF
        exit 0;
    }
    
    $DEPRECATED_GUARDS = $DEPRECATED_GUARDS ? $DEPRECATED_GUARDS : "does_not_match_any_cpp_symbols_at_all_nope";
    
    $IGNORE_DECORATORS = $IGNORE_DECORATORS || "(?=no)match";
    
    $OUTPUT_DIR = $OUTPUT_DIR ? $OUTPUT_DIR : ".";
    
    if (!-d ${OUTPUT_DIR}) {
        mkdir($OUTPUT_DIR, 0755) || die "Cannot create $OUTPUT_DIR: $!";
    }
    
    my $old_decl_list = "${OUTPUT_DIR}/$MODULE-decl-list.txt";
    my $new_decl_list = "${OUTPUT_DIR}/$MODULE-decl-list.new";
    my $old_decl = "${OUTPUT_DIR}/$MODULE-decl.txt";
    my $new_decl = "${OUTPUT_DIR}/$MODULE-decl.new";
    my $old_types = "${OUTPUT_DIR}/$MODULE.types";
    my $new_types = "${OUTPUT_DIR}/$MODULE.types.new";
    my $sections_file = "${OUTPUT_DIR}/$MODULE-sections.txt";
    
    # If this is the very first run then we create the .types file automatically.
    if (! -e $sections_file && ! -e $old_types) {
        $REBUILD_TYPES = 1;
    }
    
    open (DECLLIST, ">$new_decl_list")
        || die "Can't open $new_decl_list";
    open (DECL, ">$new_decl")
        || die "Can't open $new_decl";
    if ($REBUILD_TYPES) {
        open (TYPES, ">$new_types")
            || die "Can't open $new_types";
    }
    
    my %section_list = ();
    my $file;
   
    # The header files to scan are passed in as command-line args.
    for $file (@ARGV) {
        &ScanHeader ($file, \%section_list);
    }
    
    for my $dir (@SOURCE_DIRS) {
        &ScanHeaders ($dir, \%section_list);
    }
    
    ## FIXME: sort by key and output
    #print DECLLIST $section_list;
    my $section;
    foreach $section (sort(keys %section_list)) {
        print DECLLIST "$section_list{$section}";
    }
    
    close (DECLLIST);
    close (DECL);
    
    if ($REBUILD_TYPES) {
        my $func;
    
        foreach $func (sort(@get_types)) {
           print TYPES "$func\n";
        }
        close (TYPES);
        &UpdateFileIfChanged ($old_types, $new_types, 1);
    
        # remove the file if empty
        if (scalar (@get_types) == 0) {
            unlink ("$new_types");
        }
    }
    
    &UpdateFileIfChanged ($old_decl_list, $new_decl_list, 1);
    &UpdateFileIfChanged ($old_decl, $new_decl, 1);
    
    # If there is no MODULE-sections.txt file yet or we are asked to rebuild it,
    # we copy the MODULE-decl-list.txt file into its place. The user can tweak it
    # later if they want.
    if ($REBUILD_SECTIONS || ! -e $sections_file) {
      `cp $old_decl_list $sections_file`;
    }
    
    # If there is no MODULE-overrides.txt file we create an empty one
    # because EXTRA_DIST in gtk-doc.make requires it.
    my $overrides_file = "${OUTPUT_DIR}/$MODULE-overrides.txt";
    if (! -e $overrides_file) {
      `touch $overrides_file`;
    }
}


#############################################################################
# Function    : ScanHeaders
# Description : This scans a directory tree looking for header files.
#
# Arguments   : $source_dir - the directory to scan.
#               $section_list - a reference to the hashmap of sections.
#############################################################################

sub ScanHeaders {
    my ($source_dir, $section_list) = @_;
    #("Scanning source directory: $source_dir");

    # This array holds any subdirectories found.
    my (@subdirs) = ();

    opendir (SRCDIR, $source_dir)
        || die "Can't open source directory $source_dir: $!";
    my $file;
    foreach $file (readdir (SRCDIR)) {
        if ($file eq '.' || $file eq '..' || $file =~ /^\./) {
            next;
        } elsif (-d "$source_dir/$file") {
            push (@subdirs, $file);
        } elsif ($file =~ m/\.h$/) {
            &ScanHeader ("$source_dir/$file", $section_list);
        }
    }
    closedir (SRCDIR);

    # Now recursively scan the subdirectories.
    my $dir;
    foreach $dir (@subdirs) {
        next if ($IGNORE_HEADERS =~ m/(\s|^)\Q${dir}\E(\s|$)/);
        &ScanHeaders ("$source_dir/$dir", $section_list);
    }
}


#############################################################################
# Function    : ScanHeader
# Description : This scans a header file, looking for declarations of
#                functions, macros, typedefs, structs and unions, which it
#                outputs to the DECL file.
# Arguments   : $input_file - the header file to scan.
#               $section_list - a reference to the hashmap of sections.
# Returns     : it adds declarations to the appropriate list.
#############################################################################

sub ScanHeader {
    my ($input_file, $section_list) = @_;

    my $list = "";                  # Holds the resulting list of declarations.
    my $title = "";                 # Holds the title of the section    
    my ($in_comment) = 0;                  # True if we are in a comment.
    my ($in_declaration) = "";          # The type of declaration we are in, e.g.
                                  #   'function' or 'macro'.
    my ($skip_block) = 0;                  # True if we should skip a block.
    my ($symbol);                  # The current symbol being declared.
    my ($decl);                          # Holds the declaration of the current symbol.
    my ($ret_type);                  # For functions and function typedefs this
                                  #   holds the function's return type.
    my ($pre_previous_line) = "";   # The pre-previous line read in - some Gnome
                                  #   functions have the return type on one
                                  #   line, the function name on the next,
                                  #   and the rest of the declaration after.
    my ($previous_line) = "";          # The previous line read in - some Gnome
                                  #   functions have the return type on one line
                                  #   and the rest of the declaration after.
    my ($first_macro) = 1;          # Used to try to skip the standard #ifdef XXX
                                  #   #define XXX at the start of headers.
    my ($level);                          # Used to handle structs/unions which contain
                                  #   nested structs or unions.
    my @objects = ();                  # Holds declarations that look like GtkObject
                                  #   subclasses, which we remove from the list.
    my ($internal) = 0;             # Set to 1 for internal symbols, we need to
                                    #   fully parse, but don't add them to docs
    my %forward_decls = ();         # hashtable of forward declarations, we skip
                                    #   them if we find the real declaration
                                    #   later.
    my %doc_comments = ();          # hastable of doc-comment we found. We can
                                    # use that to put element to the right
                                    # sction in the generated section-file 

    my $file_basename;

    my $deprecated_conditional_nest = 0;
    my $ignore_conditional_nest = 0;

    my $deprecated = "";
    my $doc_comment = "";

    # Don't scan headers twice
    my $canonical_input_file = realpath $input_file;
    if (exists $seen_headers{$canonical_input_file}) {
        #("File already scanned: $input_file");
        return;
    }
    $seen_headers{$canonical_input_file} = 1;

    if ($input_file =~ m/^.*[\/\\](.*)\.h+$/) {
        $file_basename = $1;
    } else {
        LogWarning(__FILE__,__LINE__,"Can't find basename of file $input_file");
        $file_basename = $input_file;
    }

    # Check if the basename is in the list of headers to ignore.
    if ($IGNORE_HEADERS =~ m/(\s|^)\Q${file_basename}\E\.h(\s|$)/) {
        #("File ignored: $input_file");
        return;
    }
    # Check if the full name is in the list of headers to ignore.
    if ($IGNORE_HEADERS =~ m/(\s|^)\Q${input_file}\E(\s|$)/) {
        #("File ignored: $input_file");
        return;
    }

    if (! -f $input_file) {
        LogWarning(__FILE__,__LINE__,"File doesn't exist: $input_file");
        return;
    }

    #("Scanning $input_file");

    open(INPUT, $input_file)
        || die "Can't open $input_file: $!";
    while(<INPUT>) {
        # If this is a private header, skip it.
        if (m%^\s*/\*\s*<\s*private_header\s*>\s*\*/%) {
            close(INPUT);
            return;
        }

        # Skip to the end of the current comment.
        if ($in_comment) {
            #("Comment: $_");
            $doc_comment .= $_;
            if (m%\*/%) {
                if ($doc_comment =~ m/\* ([a-zA-Z][a-zA-Z0-9_]+):/) {
                  $doc_comments{lc($1)} = 1;
                }
                $in_comment = 0;
                $doc_comment = "";
            }
            next;
        }

        # Keep a count of #if, #ifdef, #ifndef nesting,
        # and if we enter a deprecation-symbol-bracketed
        # zone, take note.
        if (m/^\s*#\s*if(?:n?def\b|\s+!?\s*defined\s*\()\s*(\w+)/) {
            my $define_name = $1;
            if ($deprecated_conditional_nest < 1 and $define_name =~ /$DEPRECATED_GUARDS/) {
                 $deprecated_conditional_nest = 1;
            } elsif ($deprecated_conditional_nest >= 1) {
                 $deprecated_conditional_nest += 1;
            }
            if ($ignore_conditional_nest == 0 and $define_name =~ /__GTK_DOC_IGNORE__/) {
                 $ignore_conditional_nest = 1;
            } elsif ($ignore_conditional_nest > 0) {
                 $ignore_conditional_nest += 1;
            }
        } elsif (m/^\s*#\sif/) {
            if ($deprecated_conditional_nest >= 1) {
                 $deprecated_conditional_nest += 1;
            }
            if ($ignore_conditional_nest > 0) {
                 $ignore_conditional_nest += 1;
            }
        } elsif (m/^\s*#endif/) {
            if ($deprecated_conditional_nest >= 1) {
                $deprecated_conditional_nest -= 1;
            }
            if ($ignore_conditional_nest > 0) {
                $ignore_conditional_nest -= 1;
            }
        }

        # If we find a line containing _DEPRECATED, we hope that this is
        # attribute based deprecation and also treat this as a deprecation
        # guard, unless it's a macro definition.
        if ($deprecated_conditional_nest == 0 and m/_DEPRECATED/) {
            unless (m/^\s*#\s*(if*|define)/ or $in_declaration eq "enum") {
                #("Found deprecation annotation (decl: '$in_declaration'): $_");
                $deprecated_conditional_nest += 0.1;
            }
        }

        # set global that is used later when we do AddSymbolToList
        if ($deprecated_conditional_nest > 0) {
            $deprecated = "<DEPRECATED/>\n";
        } else {
            $deprecated = "";
        }

        if($ignore_conditional_nest) {
            next;
        }

        if (!$in_declaration) {
            # Skip top-level comments.
            if (s%^\s*/\*%%) {
                if (m%\*/%) {
                    #("Found one-line comment: $_");
                } else {
                    $in_comment = 1;
                    $doc_comment = $_;
                    #("Found start of comment: $_");
                }
                next;
            }

            #("0: $_");

            # MACROS

            if (m/^\s*#\s*define\s+(\w+)/) {
                $symbol = $1;
                $decl = $_;
                # We assume all macros which start with '_' are private, but
                # we accept '_' itself which is the standard gettext macro.
                # We also try to skip the first macro if it looks like the
                # standard #ifndef HEADER_FILE #define HEADER_FILE etc.
                # And we only want TRUE & FALSE defined in GLib (libdefs.h in
                # libgnome also defines them if they are not already defined).
                if (($symbol !~ m/^_/
                     && ($previous_line !~ m/#ifndef\s+$symbol/
                         || $first_macro == 0)
                     && (($symbol ne 'TRUE' && $symbol ne 'FALSE')
                         || $MODULE eq 'glib'))
                    || $symbol eq "_") {
                    $in_declaration = "macro";
                    #("Macro: $symbol");
                } else {
                    #("skipping Macro: $symbol");
                    $in_declaration = "macro";
                    $internal = 1;
                }
                $first_macro = 0;


            # TYPEDEF'D FUNCTIONS (i.e. user functions)

            #                        $1                                $3            $4             $5
            } elsif (m/^\s*typedef\s+((const\s+|G_CONST_RETURN\s+)?\w+)(\s+const)?\s*(\**)\s*\(\*\s*(\w+)\)\s*\(/) {
                my $p3 = defined($3) ? $3 : "";
                $ret_type = "$1$p3 $4";
                $symbol = $5;
                $decl = $';
                $in_declaration = "user_function";
                #("user function (1): $symbol, Returns: $ret_type");

            #                                                       $1                                $3            $4             $5
            } elsif (($previous_line =~ m/^\s*typedef\s*/) && m/^\s*((const\s+|G_CONST_RETURN\s+)?\w+)(\s+const)?\s*(\**)\s*\(\*\s*(\w+)\)\s*\(/) {
                my $p3 = defined($3) ? $3 : "";
                $ret_type = "$1$p3 $4";
                $symbol = $5;
                $decl = $';
                $in_declaration = "user_function";
                #("user function (2): $symbol, Returns: $ret_type");

            #                                                       $1            $2
            } elsif (($previous_line =~ m/^\s*typedef\s*/) && m/^\s*(\**)\s*\(\*\s*(\w+)\)\s*\(/) {
                $ret_type = $1;
                $symbol = $2;
                $decl = $';
                #                                     $1                                $3
                if ($previous_line =~ m/^\s*typedef\s*((const\s+|G_CONST_RETURN\s+)?\w+)(\s+const)?\s*/) {
                    my $p3 = defined($3) ? $3 : "";
                    $ret_type = "$1$p3 ".$ret_type;
                    $in_declaration = "user_function";
                    #("user function (3): $symbol, Returns: $ret_type");

                }
            # FUNCTION POINTER VARIABLES
            #                                                                     $1                                $3            $4             $5
            } elsif (m/^\s*(?:\b(?:extern|G_INLINE_FUNC|${IGNORE_DECORATORS})\s*)*((const\s+|G_CONST_RETURN\s+)?\w+)(\s+const)?\s*(\**)\s*\(\*\s*(\w+)\)\s*\(/o) {
                my $p3 = defined($3) ? $3 : "";
                $ret_type = "$1$p3 $4";
                $symbol = $5;
                $decl = $';
                $in_declaration = "user_function";
                #("function pointer variable: $symbol, Returns: $ret_type");

            # ENUMS

            } elsif (s/^\s*enum\s+_?(\w+)\s+\{/enum $1 {/) {
                # We assume that 'enum _<enum_name> {' is really the
                # declaration of enum <enum_name>.
                $symbol = $1;
                #("plain enum: $symbol");
                $decl = $_;
                $in_declaration = "enum";

            } elsif (m/^\s*typedef\s+enum\s+_?(\w+)\s+\1\s*;/) {
                # We skip 'typedef enum <enum_name> _<enum_name>;' as the enum will
                # be declared elsewhere.
                #("skipping enum typedef: $1");

            } elsif (m/^\s*typedef\s+enum/) {
                $symbol = "";
                #("typedef enum: -");
                $decl = $_;
                $in_declaration = "enum";


            # STRUCTS AND UNIONS

            } elsif (m/^\s*typedef\s+(struct|union)\s+_(\w+)\s+\2\s*;/) {
                # We've found a 'typedef struct _<name> <name>;'
                # This could be an opaque data structure, so we output an
                # empty declaration. If the structure is actually found that
                # will override this.
                my $structsym = uc $1;
                #("$structsym typedef: $2");
                $forward_decls{$2} = "<$structsym>\n<NAME>$2</NAME>\n$deprecated</$structsym>\n"

            } elsif (m/^\s*(?:struct|union)\s+_(\w+)\s*;/) {
                # Skip private structs/unions.
                #("private struct/union");

            } elsif (m/^\s*(struct|union)\s+(\w+)\s*;/) {
                # Do a similar thing for normal structs as for typedefs above.
                # But we output the declaration as well in this case, so we
                # can differentiate it from a typedef.
                my $structsym = uc $1;
                #("$structsym: $2");
                $forward_decls{$2} = "<$structsym>\n<NAME>$2</NAME>\n$_$deprecated</$structsym>\n";

            } elsif (m/^\s*typedef\s+(struct|union)\s*\w*\s*{/) {
                $symbol = "";
                $decl = $_;
                $level = 0;
                $in_declaration = $1;
                #("typedef struct/union $1");

            # OTHER TYPEDEFS

            } elsif (m/^\s*typedef\s+(?:struct|union)\s+\w+[\s\*]+(\w+)\s*;/) {
                #("Found struct/union(*) typedef $1: $_");
                if (&AddSymbolToList (\$list, $1)) {
                    print DECL "<TYPEDEF>\n<NAME>$1</NAME>\n$deprecated$_</TYPEDEF>\n";
                }

            } elsif (m/^\s*(G_GNUC_EXTENSION\s+)?typedef\s+(.+[\s\*])(\w+)(\s*\[[^\]]+\])*\s*;/) {
                if ($2 !~ m/^struct\s/ && $2 !~ m/^union\s/) {
                    #("Found typedef: $_");
                    if (&AddSymbolToList (\$list, $3)) {
                        print DECL "<TYPEDEF>\n<NAME>$3</NAME>\n$deprecated$_</TYPEDEF>\n";
                    }
                }
            } elsif (m/^\s*typedef\s+/) {
                #("Skipping typedef: $_");


            # VARIABLES (extern'ed variables)

            } elsif (m/^\s*(extern|[A-Za-z_]+VAR|${IGNORE_DECORATORS})\s+((const\s+|signed\s+|unsigned\s+|long\s+|short\s+)*\w+)(\s+\*+|\*+|\s)\s*(const\s+)*([A-Za-z]\w*)\s*;/o) {
                $symbol = $6;
                s/^\s*([A-Za-z_]+VAR)\b/extern/;
                $decl = $_;
                #("Possible extern var $symbol: $decl");
                if (&AddSymbolToList (\$list, $symbol)) {
                    print DECL "<VARIABLE>\n<NAME>$symbol</NAME>\n$deprecated$decl</VARIABLE>\n";
                }


            # VARIABLES

            } elsif (m/^\s*((const\s+|signed\s+|unsigned\s+|long\s+|short\s+)*\w+)(\s+\*+|\*+|\s)\s*(const\s+)*([A-Za-z]\w*)\s*\=/) {
                $symbol = $5;
                $decl = $_;
                #("Possible global var $symbol: $decl");
                if (&AddSymbolToList (\$list, $symbol)) {
                    print DECL "<VARIABLE>\n<NAME>$symbol</NAME>\n$deprecated$decl</VARIABLE>\n";
                }

            # G_DECLARE_*

            } elsif (m/.*G_DECLARE_(FINAL_TYPE|DERIVABLE_TYPE|INTERFACE)\s*\(/) {
                $in_declaration = "g-declare";
                $symbol = "G_DECLARE_$1";
                $decl = $';

            # FUNCTIONS

            # We assume that functions which start with '_' are private, so
            # we skip them.
            #                                                                     $1                                                                                                    $2                                                          $3
            } elsif (m/^\s*(?:\b(?:extern|G_INLINE_FUNC|${IGNORE_DECORATORS})\s*)*((?:const\s+|G_CONST_RETURN\s+|signed\s+|unsigned\s+|long\s+|short\s+|struct\s+|union\s+|enum\s+)*\w+)((?:\s+|\*)+(?:\s*(?:\*+|\bconst\b|\bG_CONST_RETURN\b))*)\s*(_[A-Za-z]\w*)\s*\(/o) {
                $ret_type = $1;
                if (defined ($2)) { $ret_type .= " $2"; }
                $symbol = $3;
                $decl = $';
                #("internal Function: $symbol, Returns: [$1][$2]");
                $in_declaration = "function";
                $internal = 1;
                if (m/^\s*G_INLINE_FUNC/) {
                    #("skip block after inline function");
                    # now we we need to skip a whole { } block
                    $skip_block = 1;
                }

            #                                                                     $1                                                                                                    $2                                                          $3
            } elsif (m/^\s*(?:\b(?:extern|G_INLINE_FUNC|${IGNORE_DECORATORS})\s*)*((?:const\s+|G_CONST_RETURN\s+|signed\s+|unsigned\s+|long\s+|short\s+|struct\s+|union\s+|enum\s+)*\w+)((?:\s+|\*)+(?:\s*(?:\*+|\bconst\b|\bG_CONST_RETURN\b))*)\s*([A-Za-z]\w*)\s*\(/o) {
                $ret_type = $1;
                if (defined ($2)) { $ret_type .= " $2"; }
                $symbol = $3;
                $decl = $';
                #("Function (1): $symbol, Returns: [$1][$2]");
                $in_declaration = "function";
                if (m/^\s*G_INLINE_FUNC/) {
                    #("skip block after inline function");
                    # now we we need to skip a whole { } block
                    $skip_block = 1;
                }

            # Try to catch function declarations which have the return type on
            # the previous line. But we don't want to catch complete functions
            # which have been declared G_INLINE_FUNC, e.g. g_bit_nth_lsf in
            # glib, or 'static inline' functions.
            } elsif (m/^\s*([A-Za-z]\w*)\s*\(/) {
                $symbol = $1;
                $decl = $';

                if ($previous_line !~ m/^\s*G_INLINE_FUNC/) {
                    if ($previous_line !~ m/^\s*static\s+/) {
                        #                                                                     $1                                                                                                   $2
                        if ($previous_line =~ m/^\s*(?:\b(?:extern|${IGNORE_DECORATORS})\s*)*((?:const\s+|G_CONST_RETURN\s+|signed\s+|unsigned\s+|long\s+|short\s+|struct\s+|union\s+|enum\s+)*\w+)((?:\s*(?:\*+|\bconst\b|\bG_CONST_RETURN\b))*)\s*$/o) {
                            $ret_type = $1;
                            if (defined ($2)) { $ret_type .= " $2"; }
                            #("Function  (2): $symbol, Returns: $ret_type");
                            $in_declaration = "function";
                        }
                    } else {
                        #("skip block after inline function");
                        # now we we need to skip a whole { } block
                        $skip_block = 1;
                        #                                                                                  $1                                                                                                    $2
                        if ($previous_line =~ m/^\s*(?:\b(?:extern|static|inline|${IGNORE_DECORATORS})\s*)*((?:const\s+|G_CONST_RETURN\s+|signed\s+|unsigned\s+|long\s+|short\s+|struct\s+|union\s+|enum\s+)*\w+)((?:\s*(?:\*+|\bconst\b|\bG_CONST_RETURN\b))*)\s*$/o) {
                            $ret_type = $1;
                            if (defined ($2)) { $ret_type .= " $2"; }
                            #("Function  (3): $symbol, Returns: $ret_type");
                            $in_declaration = "function";
                        }
                    }
                }
                else {
                    if ($previous_line !~ m/^\s*static\s+/) {
                        #("skip block after inline function");
                        # now we we need to skip a whole { } block
                        $skip_block = 1;
                        #                                                                                  $1                                                                                                    $2
                        if ($previous_line =~ m/^\s*(?:\b(?:extern|G_INLINE_FUNC|${IGNORE_DECORATORS})\s*)*((?:const\s+|G_CONST_RETURN\s+|signed\s+|unsigned\s+|long\s+|short\s+|struct\s+|union\s+|enum\s+)*\w+)((?:\s*(?:\*+|\bconst\b|\bG_CONST_RETURN\b))*)\s*$/o) {
                            $ret_type = $1;
                            if (defined ($2)) { $ret_type .= " $2"; }
                            #("Function (4): $symbol, Returns: $ret_type");
                            $in_declaration = "function";
                        }
                    }
                }

            # Try to catch function declarations with the return type and name
            # on the previous line(s), and the start of the parameters on this.
            } elsif (m/^\s*\(/) {
                $decl = $';
                if ($previous_line =~ m/^\s*(?:\b(?:extern|G_INLINE_FUNC|${IGNORE_DECORATORS})\s*)*((?:const\s+|G_CONST_RETURN\s+|signed\s+|unsigned\s+|enum\s+)*\w+)(\s+\*+|\*+|\s)\s*([A-Za-z]\w*)\s*$/o) {
                    $ret_type = "$1 $2";
                    $symbol = $3;
                    #("Function (5): $symbol, Returns: $ret_type");
                    $in_declaration = "function";

                } elsif ($previous_line =~ m/^\s*\w+\s*$/
                         && $pre_previous_line =~ m/^\s*(?:\b(?:extern|G_INLINE_FUNC|${IGNORE_DECORATORS})\s*)*((?:const\s+|G_CONST_RETURN\s+|signed\s+|unsigned\s+|struct\s+|union\s+|enum\s+)*\w+(?:\**\s+\**(?:const|G_CONST_RETURN))?(?:\s+|\s*\*+))\s*$/o) {
                    $ret_type = $1;
                    $ret_type =~ s/\s*\n//;
                    $in_declaration = "function";

                    $symbol = $previous_line;
                    $symbol =~ s/^\s+//;
                    $symbol =~ s/\s*\n//;
                    #("Function (6): $symbol, Returns: $ret_type");
                }

            #} elsif (m/^extern\s+/) {
                #print "DEBUG: Skipping extern: $_";


            # STRUCTS

            } elsif (m/^\s*struct\s+_?(\w+)\s*\*/) {
                # Skip 'struct _<struct_name> *', since it could be a
                # return type on its own line.

            } elsif (m/^\s*struct\s+_?(\w+)/) {
                # We assume that 'struct _<struct_name>' is really the
                # declaration of struct <struct_name>.
                $symbol = $1;
                $decl = $_;
                 # we will find the correct level as below we do $level += tr/{//;
                $level = 0;
                $in_declaration = "struct";
                #("Struct(_): $symbol");


            # UNIONS

            } elsif (m/^\s*union\s+_(\w+)\s*\*/) {
                    # Skip 'union _<union_name> *' (see above)
            } elsif (m/^\s*union\s+_(\w+)/) {
                $symbol = $1;
                $decl = $_;
                $level = 0;
                $in_declaration = "union";
                #("Union(_): $symbol");
            }

        } else {
            #("1: [$skip_block] $_");
            # If we were already in the middle of a declaration, we simply add
            # the current line onto the end of it.
            if ($skip_block == 0) {
                $decl .= $_;
            } else {
                # Remove all nested pairs of curly braces.
                while ($_ =~ s/{[^{]*}//g) { }
                # Then hope at most one remains in the line...
                if (m%(.*?){%) {
                    if ($skip_block == 1) {
                        $decl .= $1;
                    }
                    $skip_block += 1;
                } elsif (m%}%) {
                    $skip_block -= 1;
                    if ($skip_block == 1) {
                        # this is a hack to detect the end of declaration
                        $decl .= ";";
                        $skip_block = 0;
                        #("2: ---");
                    }
                } else {
                    if ($skip_block == 1) {
                        $decl .= $_;
                    }
                }
            }
        }

        #if ($in_declaration ne '') {
        #    print "$in_declaration = $decl\n";
        #}

        if ($in_declaration eq "g-declare") {
            if ($decl =~ s/\s*(\w+)\s*,\s*(\w+)\s*,\s*(\w+)\s*,\s*(\w+)\s*,\s*(\w+)\s*\).*$//) {
                my $ModuleObjName = $1;
                my $module_obj_name = $2;
                if ($REBUILD_TYPES) {
                    push (@get_types, "${module_obj_name}_get_type");
                }
                $forward_decls{$ModuleObjName} = "<STRUCT>\n<NAME>$ModuleObjName</NAME>\n$deprecated</STRUCT>\n";
                if ($symbol =~ /^G_DECLARE_DERIVABLE/) {
                    $forward_decls{"${ModuleObjName}Class"} = "<STRUCT>\n<NAME>${ModuleObjName}Class</NAME>\n$deprecated</STRUCT>\n";
                }
                if ($symbol =~ /^G_DECLARE_INTERFACE/) {
                    $forward_decls{"${ModuleObjName}Interface"} = "<STRUCT>\n<NAME>${ModuleObjName}Interface</NAME>\n$deprecated</STRUCT>\n";
                }
                $in_declaration = "";
            }
        }

        # Note that sometimes functions end in ') G_GNUC_PRINTF (2, 3);' or
        # ') __attribute__ (...);'.
        if ($in_declaration eq 'function') {
            if ($decl =~ s/\)\s*(G_GNUC_.*|.*DEPRECATED.*|${IGNORE_DECORATORS}\s*|__attribute__\s*\(.*\)\s*)*;.*$//s) {
                if ($internal == 0) {
                     $decl =~ s%/\*.*?\*/%%gs;        # remove comments.
                     #$decl =~ s/^\s+//;                # remove leading whitespace.
                     #$decl =~ s/\s+$//;                # remove trailing whitespace.
                     $decl =~ s/\s*\n\s*/ /gs;        # consolidate whitespace at start
                                                   # and end of lines.
                     $ret_type =~ s%/\*.*?\*/%%g;        # remove comments in ret type.
                     if (&AddSymbolToList (\$list, $symbol)) {
                         print DECL "<FUNCTION>\n<NAME>$symbol</NAME>\n$deprecated<RETURNS>$ret_type</RETURNS>\n$decl\n</FUNCTION>\n";
                         if ($REBUILD_TYPES) {
                             # check if this looks like a get_type function and if so remember
                             if (($symbol =~ m/_get_type$/) && ($ret_type =~ m/GType/) && ($decl =~ m/^(void|)$/)) {
                                 #("Adding get-type: [$ret_type] [$symbol] [$decl]\tfrom $input_file");
                                 push (@get_types, $symbol);
                             }
                         }
                     }
                } else {
                     $internal = 0;
                }
                $deprecated_conditional_nest = int($deprecated_conditional_nest);
                $in_declaration = "";
                $skip_block = 0;
            }
        }

        if ($in_declaration eq 'user_function') {
            if ($decl =~ s/\).*$//) {
                if (&AddSymbolToList (\$list, $symbol)) {
                    print DECL "<USER_FUNCTION>\n<NAME>$symbol</NAME>\n$deprecated<RETURNS>$ret_type</RETURNS>\n$decl</USER_FUNCTION>\n";
                }
                $deprecated_conditional_nest = int($deprecated_conditional_nest);
                $in_declaration = "";
            }
        }

        if ($in_declaration eq 'macro') {
            if ($decl !~ m/\\\s*$/) {
                if ($internal == 0) {
                    if (&AddSymbolToList (\$list, $symbol)) {
                        print DECL "<MACRO>\n<NAME>$symbol</NAME>\n$deprecated$decl</MACRO>\n";
                    }
                } else {
                    $internal = 0;
                }
                $deprecated_conditional_nest = int($deprecated_conditional_nest);
                $in_declaration = "";
            }
        }

        if ($in_declaration eq 'enum') {
            if ($decl =~ m/\}\s*(\w+)?;\s*$/) {
                if ($symbol eq "") {
                    $symbol = $1;
                }
                if (&AddSymbolToList (\$list, $symbol)) {
                    print DECL "<ENUM>\n<NAME>$symbol</NAME>\n$deprecated$decl</ENUM>\n";
                }
                $deprecated_conditional_nest = int($deprecated_conditional_nest);
                $in_declaration = "";
            }
        }

        # We try to handle nested stucts/unions, but unmatched brackets in
        # comments will cause problems.
        if ($in_declaration eq 'struct' or $in_declaration eq 'union') {
            if (($level <= 1) && ($decl =~ m/\n\}\s*(\w*);\s*$/)) {
                if ($symbol eq "") {
                    $symbol = $1;
                }

                if ($symbol =~ m/^(\S+)(Class|Iface|Interface)\b/) {
                    my $objectname = $1;
                    #("Found object: $1");
                    $title = "<TITLE>$objectname</TITLE>\n";
                    push (@objects, $objectname);
                }
                #("Store struct: $symbol");
                if (&AddSymbolToList (\$list, $symbol)) {
                    my $structsym = uc $in_declaration;
                    print DECL "<$structsym>\n<NAME>$symbol</NAME>\n$deprecated$decl</$structsym>\n";
                    if (defined($forward_decls{$symbol})) {
                        undef($forward_decls{$symbol});
                    }
                }
                $deprecated_conditional_nest = int($deprecated_conditional_nest);
                $in_declaration = "";
            } else {
                # We use tr to count the brackets in the line, and adjust
                # $level accordingly.
                $level += tr/{//;
                $level -= tr/}//;
                #("struct/union level : $level");
            }
        }

        $pre_previous_line = $previous_line;
        $previous_line = $_;
    }
    close(INPUT);

    # print remaining forward declarations
    foreach $symbol (sort(keys %forward_decls)) {
        if (defined($forward_decls{$symbol})) {
            &AddSymbolToList (\$list, $symbol);
            print DECL $forward_decls{$symbol};
        }
    }

    # add title
    $list = "$title$list";
    
    #("Scanning $input_file done\n");
    
    # Try to separate the standard macros and functions, placing them at the
    # end of the current section, in a subsection named 'Standard'.
    # do this in a loop to catch object, enums and flags
    my ($class,$lclass,$prefix,$lprefix);
    my ($standard_decl) = "";
    do {
        if ($list =~ m/^(\S+)_IS_(\S*)_CLASS\n/m) {
            $prefix = $1;
            $lprefix = lc($prefix);
            $class = $2;
            $lclass = lc($class);
            #("Found gobject type '${prefix}_$class' from is_class macro\n");
        } elsif ($list =~ m/^(\S+)_IS_(\S*)\n/m) {
            $prefix = $1;
            $lprefix = lc($prefix);
            $class = $2;
            $lclass = lc($class);
            #("Found gobject type '${prefix}_$class' from is_ macro\n");
        } elsif ($list =~ m/^(\S+?)_(\S*)_get_type\n/m) {
            $lprefix = $1;
            $prefix = uc($lprefix);
            $lclass = $2;
            $class = uc($lclass);
            #("Found gobject type '${prefix}_$class' from get_type function\n");
        } else {
          $class = $lclass = "";
        }
    
        if ($class ne "") {
            my ($cclass) = $lclass;
            $cclass =~ s/_//g;
            my ($type) = $lprefix.$cclass;
            
            if ($list =~ s/^${type}Private\n//im)               { $standard_decl .= $&; }
            
            # We only leave XxYy* in the normal section if they have docs 
            if (! defined($doc_comments{$type})) {
              #("  Hide instance docs for $type");
              if ($list =~ s/^${type}\n//im)                    { $standard_decl .= $&; }
            }
            if (! defined($doc_comments{$type."class"})) {
              #("  Hide class docs for $type");
              if ($list =~ s/^${type}Class\n//im)               { $standard_decl .= $&; }
            }
            if (! defined($doc_comments{$type."interface"})) {
              #("  Hide iface docs for $type");
              if ($list =~ s/^$type}Interface\n//im)            { $standard_decl .= $&; }
            }
            if (! defined($doc_comments{$type."iface"})) {
              #("  Hide iface docs for $type");
              if ($list =~ s/${type}Iface\n//im)                { $standard_decl .= $&; }
            }
            
            while ($list =~ s/^\S+_IS_$class\n//m)              { $standard_decl .= $&; }
            while ($list =~ s/^\S+_TYPE_$class\n//m)            { $standard_decl .= $&; }
            while ($list =~ s/^\S+_${lclass}_get_type\n//m)     { $standard_decl .= $&; }
            while ($list =~ s/^\S+_${class}_CLASS\n//m)         { $standard_decl .= $&; }
            while ($list =~ s/^\S+_IS_${class}_CLASS\n//m)      { $standard_decl .= $&; }
            while ($list =~ s/^\S+_${class}_GET_CLASS\n//m)     { $standard_decl .= $&; }
            while ($list =~ s/^\S+_${class}_GET_IFACE\n//m)     { $standard_decl .= $&; }
            while ($list =~ s/^\S+_${class}_GET_INTERFACE\n//m) { $standard_decl .= $&; }
    
            # We do this one last, otherwise it tends to be caught by the IS_$class macro
            while ($list =~ s/^\S+_$class\n//m)                 { $standard_decl .= $&; }
            
            #("Decl '".join(",",split("\n",$list))."'\n");
            #("Std  '".join(",",split("\n",$standard_decl))."'\n");
        }
    } while ($class ne "");
    if ($standard_decl ne "") {
      # sort the symbols
      $standard_decl=join("\n",sort(split("\n",$standard_decl)))."\n";
      $list .= "<SUBSECTION Standard>\n$standard_decl";
    }
    if ($list ne "") {
        $$section_list{$file_basename} .= "<SECTION>\n<FILE>$file_basename</FILE>\n$list</SECTION>\n\n";
    }
}


#############################################################################
# Function    : AddSymbolToList
# Description : This adds the symbol to the list of declarations, but only if
#                it is not already in the list.
# Arguments   : $list - reference to the list of symbols, one on each line.
#                $symbol - the symbol to add to the list.
#############################################################################

sub AddSymbolToList {
    my ($list, $symbol) = @_;

    if ($$list =~ m/\b\Q$symbol\E\b/) {
         #print "Symbol $symbol already in list. skipping\n";
         # we return 0 to skip outputting another entry to -decl.txt
         # this is to avoid redeclarations (e.g. in conditional
         # sections).
        return 0;
    }
    $$list .= "$symbol\n";
    return 1;
}
