diff options
author | Bill Meier <wmeier@newsguy.com> | 2013-01-28 22:23:32 +0000 |
---|---|---|
committer | Bill Meier <wmeier@newsguy.com> | 2013-01-28 22:23:32 +0000 |
commit | d1ae197ae4f645538752c9ee4bf85023be71eeda (patch) | |
tree | 40bf802a399a0ca0dfb359186ef071e2302849e3 | |
parent | 7540ccc0ed1fbb3f9d58920cd57068328a3a863f (diff) |
Re-implemention of checkhf.pl:
Main objective: reduce the number of false positives.
(Committed as a patch to the original
checkhf.pl written by Joerg Mayer so as to
keep the history for the original program).
svn path=/trunk/; revision=47331
-rwxr-xr-x | tools/checkhf.pl | 605 |
1 files changed, 397 insertions, 208 deletions
diff --git a/tools/checkhf.pl b/tools/checkhf.pl index e9bde6ddfd..458ac08ee7 100755 --- a/tools/checkhf.pl +++ b/tools/checkhf.pl @@ -1,20 +1,12 @@ #!/usr/bin/perl - -my $debug = 0; -# 0: off -# 1: specific debug -# 2: full debug - # -# find unbalanced hf_ variables: Compare hf_ variable usage with the hf_ variables -# declared in the hf_register_info array. +# Copyright 2013, William Meier (See AUTHORS file) # -# Usage: checkhf.pl <file or files> - -# $Id$ - +# Validate hf_... usage for a dissector file; +# +# Usage: checkhf.pl [--debug=?] <file or files> # -# Copyright 2005 Joerg Mayer (see AUTHORS file) +# $Id$ # # Wireshark - Network traffic analyzer # By Gerald Combs <gerald@wireshark.org> @@ -33,210 +25,407 @@ my $debug = 0; # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - -# -# Example: -# ~/work/wireshark/trunk/epan/dissectors> ../../tools/checkhf.pl packet-afs.c -# Unused entry: packet-afs.c, hf_afs_ubik_voteend -# Unused entry: packet-afs.c, hf_afs_ubik_errcode -# Unused entry: packet-afs.c, hf_afs_ubik_votetype -# NO ARRAY: packet-afs.c, hf_afs_fs_ipaddr # -# or checkhf.pl packet-*.c, which will check all the dissector files. -# -# NOTE: This tool currently generates false positives! + +## Note: This program is a re-implementation of the +## original checkhf.pl written and (C) by Joerg Mayer. +## The overall objective of the new implementation was to reduce +## the number of false positives which occurred with the +## original checkhf.pl +## +## ----- (The following is extracted from the original checkhf.pl with thanks to Joerg) ------- +## Example: +## ~/work/wireshark/trunk/epan/dissectors> ../../tools/checkhf.pl packet-afs.c +## Unused entry: packet-afs.c, hf_afs_ubik_voteend +## Unused entry: packet-afs.c, hf_afs_ubik_errcode +## Unused entry: packet-afs.c, hf_afs_ubik_votetype +## ERROR: NO ARRAY: packet-afs.c, hf_afs_fs_ipaddr +## +## or checkhf.pl packet-*.c, which will check all the dissector files. +## +## NOTE: This tool currently generates false positives! +## +## The "NO ARRAY" messages - if accurate - points to an error that will +## cause (t|wire)shark to terminate with an assertion when a packet containing +## this particular element is being dissected. +## +## The "Unused entry" message indicates the opposite: We define an entry but +## never use it in a proto_...add... function. +## ------------------------------------------------------------------------------------ + +# ------------------------------------------------------------------------------------ +# Main # -# The "NO ARRAY" messages - if accurate - points to an error that will -# cause (t|wire)shark to terminate with an assertion when a packet containing -# this particular element is being dissected. +# Logic: +# 1. Clean the input: remove comments and code under '#if 0'. +# 2. hfDefs: +# Find (and remove from input) list of static hf_ variable +# definitions ('static int hf_... ;') +# 2. hfArrayEntries: +# Find (and remove from input) list of hf_... variables +# referenced in the hf[] entries; +# 3. hfUsage: +# From the remaining input, extract list of all strings of form hf_... +# (which may include strings which are not actually valid +# hf_... variable references). +# 4. Checks: +# If entries in hfDefs not in hfUsage then "unused"; +# If entries in hfDefs not in hfArrayEntries then "ERROR: NO ARRAY"; + +use strict; +use warnings; + +use Getopt::Long; + +my $helpFlag = ''; +my $debug = 0; # default: off; 1=cmt; 2=#if0; 3=hfDefs; 4=hfArrayEntry; 5=hfusage (See code) + +my $sts = GetOptions( + 'debug=i' => \$debug, + 'help|?' => \$helpFlag + ); +if (!$sts || $helpFlag || !$ARGV[0]) { + usage(); +} + +while (my $fileName = $ARGV[0]) { + shift; + + my ($fileContents, $fileCleanedContents); + my ($hfDefHRef, $hfArrayEntryHRef, $hfUsageHRef); + my ($unUsedHRef, $noArrayHRef); + + read_file(\$fileName, \$fileContents); + + $fileCleanedContents = $fileContents; + + remove_comments(\$fileCleanedContents, $fileName); + remove_if0_code(\$fileCleanedContents, $fileName); + + $hfDefHRef = find_remove_hf_defs(\$fileCleanedContents, $fileName); + $hfArrayEntryHRef = find_remove_hf_array_entries(\$fileCleanedContents, $fileName); + $hfUsageHRef = find_hf_usage(\$fileCleanedContents); + +# Tests (See above) +# 1. Are all the hfDef entries in hfUsage ? +# if not: "Unused entry:" # -# The "Unused entry" message indicates the opposite: We define an entry but -# never use it in a proto_...add... function. + $unUsedHRef = diff_hash($hfDefHRef, $hfUsageHRef); + print_list("Unused entry: $fileName, ", $unUsedHRef); + +# 2. Are all the hfDef entries in hfArrayEntry ? +# (Note: if hfDef is "unused", don't check for same in hfArrayEntry) +# if not: "ERROR: NO ARRAY" + + $noArrayHRef = diff_hash($hfDefHRef, $hfArrayEntryHRef); + $noArrayHRef = diff_hash($noArrayHRef, $unUsedHRef); # Remove "unused" hf_... from noArray list + print_list("ERROR: NO ARRAY: $fileName, ", $noArrayHRef); +} + +# --------------------------------------------------------------------- # +sub usage { + print "Usage: $0 [--debug=n] Filename [...] #debug: 1=cmt; 2=#if0; 3=hfDefs; 4=hfArrayEntry; 5=hfUsage\n"; + exit(1); +} -use warnings; -use strict; +# --------------------------------------------------------------------- +# action: read contents of a file to specified string +# arg: fileNameRef, fileContentsRef +# returns: fileContentsRef (containing the contents of the file) + +sub read_file { + my ($fileNameRef, $fileContentsRef) = @_; + + die "No such file: \"$$fileNameRef\"\n" if (! -e $$fileNameRef); + + # delete leading './' + $$fileNameRef =~ s{ ^ \. / } {}xo; + + $$fileContentsRef = ''; + # Read in the file (ouch, but it's easier that way) + open(FCI, "<", $$fileNameRef) || die("Couldn't open $$fileNameRef"); + while (<FCI>) { + $$fileContentsRef .= $_; + } + close(FCI); + + return $fileContentsRef; +} + +# --------------------------------------------------------------------- +# action: Create a hash containing entries in 'a' that are not in 'b' +# arg: aHRef, bHref +# returns: pointer to hash + +sub diff_hash { + my ($aHRef, $bHRef) = @_; + + my %diffs; + + @diffs{grep {! exists $$bHRef{$_}} keys %$aHRef} = {}; # each key in the new hash + # will have value 'undef' + + return \%diffs; +} + +# --------------------------------------------------------------------- +# action: print a list +# arg: hdr, listHRef +# returns: nothing + +sub print_list { + my ($hdr, $listHRef) = @_; -my $D; - -my %elements; -my $element; -my %skip; - -my $state; -my $newstate; -# "s_unknown", -# "s_declared", -# "s_used", -# "s_array", -# "s_usedarray", -# "s_error" - -my $type; -# "t_declaration"; -# "t_usage"; -# "t_array"; - -my $restofline; -my $currfile = ""; - -my $comment = 0; -my $brace = 0; - -my $error = 0; - -sub printprevfile { - my $state; - - foreach $element (keys %elements) { - $state = $elements{$element}; - $debug>=2 && print "$currfile, $element: PRINT $state\n"; - if ($state eq "s_usedarray") { - # Everything is fine - } elsif ($state eq "s_used") { - # A value is declared and used but the hf_ array - # entry is missing - print "ERROR: NO ARRAY: $currfile, $element\n"; - $error = 1; - } elsif ($state eq "s_array") { - # A value is declared, has an hf_ array entry - # but it is never used - print "Unused entry: $currfile, $element\n"; - } elsif ($state eq "s_declared") { - # A value is declared but no error value exists. - # This doesn't matter as it isn't used either. - print "Declared only entry: $currfile, $element\n" - } elsif ($state eq "s_unknown") { - print "UNKNOWN: $currfile, $element\n"; - } else { - die "Impossible: State $state for $currfile, $element\n"; - } - } + print + map {"$hdr$_\n"} + sort + keys %$listHRef; } -while (<>) { - if ($currfile !~ /$ARGV/) { - &printprevfile(); - # New file - reset array and state - $currfile = $ARGV; - %elements = ( ); - %skip = ( "hf_register_info" => 1 ); - $state = "s_unknown"; - } - # opening then closing comment - if (/(.*?)\/\*.*\*\/(.*)/) { - $comment = 0; - $_ = "$1$2"; - # closing then opening comment - } elsif (/.*?\*\/(.*?)\/\*/) { - $comment = 1; - $_ = "$1"; - # opening comment - } elsif (/(.*?)\/\*/) { - $comment = 1; - $_ = "$1"; - # closing comment - } elsif (/\*\/(.*?)/) { - $comment = 0; - $_ = "$1"; - } elsif ($comment == 1) { - next; - } - # unhandled: more than one complete comment per line - - chomp; - if ($debug) { - $D = " ($_)"; - } else { - $D = ""; - } - - # Read input - if (/^\s*(?:static\s+)?.*int\s+(hf_\w*)\s*=\s*-1\s*;/) { - $element = $1; - $debug && print "t_declaration for $element$D\n"; - $type = "t_declaration"; - if (/^\s*static/) { - # ignore: static declarations without any use are detected by the compiler - next; +# ------------ +# action: remove comments from input string +# arg: codeRef, fileName +# returns: codeRef + +sub remove_comments { + my ($codeRef, $fileName) = @_; + + # The below Regexp is based on one from: + # http://aspn.activestate.com/ASPN/Cookbook/Rx/Recipe/59811 + # It is in the public domain. + # A complicated regex which matches C-style comments. + my $CCommentRegEx = qr{ / [*] [^*]* [*]+ (?: [^/*] [^*]* [*]+ )* / }xo; + + $$codeRef =~ s {$CCommentRegEx} []xg; + + ($debug == 1) && print "==> After Remove Comments: code: [$fileName]\n$$codeRef\n===<\n"; + + return $codeRef +} + +# ------------- +# action: remove '#if 0'd code from the input string +# args codeRef, fileName +# returns: codeRef +# +# Essentially: Use s//patsub/meg to pass each line to patsub. +# patsub monitors #if/#if 0/etc and determines +# if a particular code line should be removed. +# XXX: This is probably pretty inefficient; +# I could imagine using another approach such as converting +# the input string to an array of lines and then making +# a pass through the array deleting lines as needed. + +{ # block begin +my ($if_lvl, $if0_lvl, $if0); # shared vars + + sub remove_if0_code { + my ($codeRef, $fileName) = @_; + + my ($preprocRegEx) = qr { + ( # $1 [complete line) + ^ + (?: # non-capturing + \s* \# \s* + (if \s 0| if | else | endif) # $2 (only if #...) + ) ? + .* + $ + ) + }xom; + + ($if_lvl, $if0_lvl, $if0) = (0,0,0); + $$codeRef =~ s{ $preprocRegEx }{patsub($1,$2)}xegm; + + ($debug == 2) && print "==> After Remove if0: code: [$fileName]\n$$codeRef\n===<\n"; + return $codeRef; + } + + sub patsub { + if ($debug == 99) { + print "-->$_[0]\n"; + (defined $_[1]) && print " >$_[1]<\n"; + } + + # #if/#if 0/#else/#ndif processing + if (defined $_[1]) { + my ($if) = $_[1]; + if ($if eq 'if') { + $if_lvl += 1; + } elsif ($if eq 'if 0') { + $if_lvl += 1; + if ($if0_lvl == 0) { + $if0_lvl = $if_lvl; + $if0 = 1; # inside #if 0 + } + } elsif ($if eq 'else') { + if ($if0_lvl == $if_lvl) { + $if0 = 0; + } + } elsif ($if eq 'endif') { + if ($if0_lvl == $if_lvl) { + $if0 = 0; + $if0_lvl = 0; + } + $if_lvl -= 1; + if ($if_lvl < 0) { + die "patsub: #if/#endif mismatch" } - $debug && print "extern declaration: t_usage for $element$D\n"; - $type = "t_usage"; # hack: decl is extern so pretend we've seen a usage - # Skip function parameter declarations with hf_ names - } elsif (/(int\s+?|int\s*?\*\s*?|header_field_info\s+?|header_field_info\s*?\*\s*?|hf_register_info\s+?|hf_register_info\s*?\*\s*?|->\s*?)(hf_\w*)\W(.*)/) { - $element = $2; - $restofline = $3; - $debug && print "Setting skip for $element$D\n"; - $skip{$element} = 1; - # Handle functions with multiple hf_ parameters - while ($restofline =~ /(int\s+?|int\s*?\*\s*?|header_field_info\s+?|header_field_info\s*?\*\s*?|hf_register_info\s+?|hf_register_info\s*?\*\s*?|->\s*?)(hf_\w*)\W(.*)/) { - $element = $2; - $restofline = $3; - $debug && print "Setting skip for $element$D\n"; - $skip{$element} = 1; - } - next; - } elsif ($brace == 1 && /^\s*&\s*(hf_\w*)\W+/) { - $element = $1; - $debug && print "t_array1 for $element$D\n"; - $type = "t_array"; - } elsif (/^\s*\{\s*?&\s*?(hf_\w*)\W+/) { - $element = $1; - $debug && print "t_array2 for $element$D\n"; - $type = "t_array"; - # Order matters: catch all remaining hf_ lines - } elsif (/\w*\s*=\s*(hf_\w*)\s*;/) { - # Catches: variable = hf_* - # But we don't check if the variable actually gets used... - $element = $1; - $debug && print "in variable usage for $element$D\n"; - next if ($skip{$element}); - $debug && print "set t_usage for $element$D\n"; - $type = "t_usage"; - } elsif (/\W(hf_\w*)\W*/) { - $element = $1; - $debug && print "in usage for $element$D\n"; - next if ($skip{$element}); - $debug && print "set t_usage for $element$D\n"; - $type = "t_usage"; - } else { - # Line with only a { - if (/^\s+\{\s*$/) { - $brace = 1; - } else { - $brace = 0; - $debug && print "else: $D\n"; - } - next; - } - - # Get current state - if (!defined($elements{$element})) { - $state = "s_unknown"; - } else { - $state = $elements{$element}; - } - - # current state + input ==> new state - # we currently ignore t_declaration - if ($state eq "s_error") { - $newstate = $state; - } elsif ($state eq "s_unknown" && $type eq "t_usage") { - $newstate = "s_used"; - } elsif ($state eq "s_unknown" && $type eq "t_array") { - $newstate = "s_array"; - } elsif ($state eq "s_used" && $type eq "t_array") { - $newstate = "s_usedarray"; - } elsif ($state eq "s_array" && $type eq "t_usage") { - $newstate = "s_usedarray"; - } else { - $newstate = $state; - } - $elements{$element} = $newstate; - $debug>=2 && print "$currfile, $element: SET $state + $type => $newstate$D\n"; + } + return $_[0]; # don't remove preprocessor lines themselves + } + + # not preprocessor line: See if under #if 0: If so, remove + if ($if0 == 1) { + return ''; # remove + } + return $_[0]; + } +} # block end + +# --------------------------------------------------------------------- +# action: Create a hash containing an entry (hf_... => 1) for each +# 'static g?int hf_...' definition (including array names) +# in the input string. +# Remove each definition found from the input string. +# args: codeRef, fileName +# returns: ref to the hash + +sub find_remove_hf_defs { + my ($codeRef, $fileName) = @_; + + # Build pattern to match any of the following + # static g?int hf_foo = -1; + # static g?int hf_foo = HF_EMPTY; + # static g?int hf_foo[xxx]; + # static g?int hf_foo[xxx] = { + + # p1: 'static g?int hf_foo' + my $p1RegEx = qr { + ^ + \s* + static + \s+ + g?int + \s+ + (hf_[a-zA-Z0-9_]+) # hf_.. + }xom; + + # p2a: ' = -1;' or ' = HF_EMPTY;' + my $p2aRegEx = qr { + \s* = \s* + (?: + - \s* 1 | HF_EMPTY + ) + \s* ; + }xom; + + # p2b: '[xxx];' or '[xxx] = {' + my $p2bRegEx = qr ! + \s* \[ [^]]+ \] \s* + (?: + = \s* \{ | ; + ) + !xom; + + my $hfDefRegEx = qr { $p1RegEx (?: $p2aRegEx | $p2bRegEx ) }xom; + + my %hfDefs; + while ($$codeRef =~ m{ $hfDefRegEx }xogm) { + #print "$1\n"; + $hfDefs{$1} = 1; + } + ($debug == 3) && print_hash("VD: $fileName", \%hfDefs); # VariableDefinition + + # remove all + $$codeRef =~ s{ $hfDefRegEx }{}xiogm; + ($debug == 3) && print "==> After remove hfDefs: code: [$fileName]\n$$codeRef\n===<\n"; + + return \%hfDefs; } -&printprevfile(); -exit $error; +# --------------------------------------------------------------------- +# action: Create a hash containing an entry (hf_...) for each hf[] entry. +# Remove each hf[] entry found from the input string. +# args: codeRef, fileName +# returns: ref to the hfArrayEntry hash -__END__ +sub find_remove_hf_array_entries { + my ($codeRef, $fileName) = @_; + +# hf[] entry regex (to extract an hf_index_name and associated field type) + my $hfArrayEntryRegEx = qr { + \{ + \s* + & \s* ( [a-zA-Z0-9_]+ ) # &hf + (?: + \s* \[ [^]]+ \] # optional array ref + ) ? + \s* , \s* + \{ \s* + .+? # Fix: (a bit dangerous) + \s* , \s* + (FT_[a-zA-Z0-9_]+) # field type + \s* , \s* + .+? # Fix: (also a bit dangerous) + \s* , \s* + HFILL ,? # HFILL + \s* \} + }xos; + + # find all the hf[] entries (searching $$codeRef). + # Create a hash keyed by the hf_... string + my %hfArrayEntry; + while ($$codeRef =~ m{ $hfArrayEntryRegEx }xgos) { + ($debug == 98) && print "+++ $1 $2\n"; + $hfArrayEntry{$1} = 1; + } + + ($debug == 4) && print_hash("AE: $fileName", \%hfArrayEntry); # ArrayEntry + + # now remove all + $$codeRef =~ s{ $hfArrayEntryRegEx }{}xgois; + ($debug == 4) && print "==> After remove hfArrayEntry: code: [$fileName]\n$$codeRef\n===<\n"; + + return \%hfArrayEntry; +} + +# --------------------------------------------------------------------- +# action: create hash of all hf_... strings remaining in input string. +# arga: codeRef, fileName +# return: ref to hfUsage hash +# +# The hash will include *all* strings of form hf_... +# which are in the input string (even strings which +# aren't actually vars). +# We don't care since we'll be checking only +# known valid vars against these strings. + +sub find_hf_usage { + my ($codeRef, $fileName) = @_; + + my $hfUsageRegEx = qr { + \b ( hf_[a-zA-Z0-9_]+ ) # hf_... + }ox; + my %hfUsage; + + while ($$codeRef =~ m{ $hfUsageRegEx }xog) { + #print "$1\n"; + $hfUsage{$1} += 1; + } + + ($debug == 5) && print_hash("VU: $fileName", \%hfUsage); # VariableUsage + + return \%hfUsage; +} + +# --------------------------------------------------------------------- +sub print_hash { + my ($title, $HRef) = @_; + + ##print "==> $title\n"; + for my $k (sort keys %$HRef) { + printf "%-40.40s %s\n", $title, $k; + } +} |