diff options
author | Moshe Kaplan <me@moshekaplan.com> | 2022-06-28 16:35:54 -0400 |
---|---|---|
committer | Gerald Combs <gerald@wireshark.org> | 2022-06-28 21:59:53 +0000 |
commit | cdb83a370faf9215b2fb4ab2404f4150fadaff4e (patch) | |
tree | 31a4d2281721944b05424d5a4071e9f2a9121e33 | |
parent | a2272362ce182cc92f5e71cf8c53bac8a1bc6b1b (diff) |
tools: Port make-sminmpec.pl to make-sminmpec.py
Port make-sminmpec.pl to Python.
Now uses an explicit destination path,
instead of a hardcoded path relative to
the script's location on disk.
Ping #18152
-rwxr-xr-x | tools/make-sminmpec.pl | 94 | ||||
-rwxr-xr-x | tools/make-sminmpec.py | 88 |
2 files changed, 88 insertions, 94 deletions
diff --git a/tools/make-sminmpec.pl b/tools/make-sminmpec.pl deleted file mode 100755 index 432d794f1b..0000000000 --- a/tools/make-sminmpec.pl +++ /dev/null @@ -1,94 +0,0 @@ -#!/usr/bin/perl -w -# create the enterprises file from -# https://www.iana.org/assignments/enterprise-numbers/enterprise-numbers -# -# Wireshark - Network traffic analyzer -# By Gerald Combs <gerald@wireshark.org> -# Copyright 2004 Gerald Combs -# -# SPDX-License-Identifier: GPL-2.0-or-later - -use strict; -use File::Spec; - -my ($vol, $script_dir) = File::Spec->splitpath( __FILE__ ); -my $root_dir = File::Spec->catpath($vol, $script_dir, ".."); -chdir($root_dir) || die("Can't find $root_dir"); - -my $in = shift; - -$in = "https://www.iana.org/assignments/enterprise-numbers/enterprise-numbers" unless(defined $in); - -my @in_lines; -my $revision = '2014-04-27'; - -my $min_entries = 100; -my $smi_total = 0; - -if($in =~ m/^https?:/i) { - eval "require LWP::UserAgent;"; - die "LWP isn't installed. It is part of the standard Perl module libwww." if $@; - - my $agent = LWP::UserAgent->new; - $agent->env_proxy; - $agent->agent("Wireshark make-sminmpec.pl/$revision"); - - warn "starting to fetch $in ...\n"; - - my $request = HTTP::Request->new(GET => $in); - - my $result = $agent->request($request); - - if ($result->code eq 200) { - warn "done fetching $in\n"; - @in_lines = split /\n/, $result->content; - } else { - die "request for $in failed with result code:" . $result->code; - } -} else { - open IN, "< $in"; - @in_lines = <IN>; - close IN; -} - -my $body = ''; -my $code; -my $name; -my $last_updated = "(last updated ???)"; -my $end_of_document = 0; - -for(@in_lines) { - chomp; - - if (/^(\d+)/) { - $code = sprintf("%d", $1); - } elsif (/^ ?(\S.*)/ ) { # up to three spaces because of formatting errors in the source - $name = $1; - next if (/^\s*\(?\s*unassigned/i); - $name =~ s/\s+$//; - $name =~ s/ \((formerly .*)\)/\t# $1/; - $body .= "\n$code\t$name"; - } elsif (/\(last updated/i) { - $last_updated = $_; - } elsif (/^ *End of Document/) { - $end_of_document = 1; - } -} - -die "\"End of Document\" not found. Truncated source file?" unless ($end_of_document); - -open OUT, "> enterprises.tsv"; - -print OUT <<"_SMINMPEC"; -# -# generated from https://www.iana.org/assignments/enterprise-numbers/enterprise-numbers -# run "tools/make-sminmpec.pl [infile]" to regenerate -# -# The format used here is: <NUMERICAL_ID><SPACE><NAME> -# Where SPACE can be any sequence of spaces and tabs. -# -# $last_updated -$body -_SMINMPEC - -close OUT; diff --git a/tools/make-sminmpec.py b/tools/make-sminmpec.py new file mode 100755 index 0000000000..c6cfaeca00 --- /dev/null +++ b/tools/make-sminmpec.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python3 +# create the enterprises.tsv file from +# https://www.iana.org/assignments/enterprise-numbers/enterprise-numbers +# or an offline copy +# +# Copyright 2022 by Moshe Kaplan +# Based on make-sminmpec.pl by Gerald Combs +# +# Wireshark - Network traffic analyzer +# By Gerald Combs <gerald@wireshark.org> +# Copyright 2004 Gerald Combs +# +# SPDX-License-Identifier: GPL-2.0-or-later + +import argparse +import re +import urllib.request + + +ENTERPRISE_NUMBERS_URL = "https://www.iana.org/assignments/enterprise-numbers/enterprise-numbers" + +ENTERPRISES_HEADER = """\ +# +# generated from https://www.iana.org/assignments/enterprise-numbers/enterprise-numbers +# run "tools/make-sminmpec.py [infile] outfile" to regenerate +# +# The format used here is: <NUMERICAL_ID><SPACE><NAME> +# Where SPACE can be any sequence of spaces and tabs. +# +""" + +DECIMAL_PATTERN = r"^(\d+)" +# up to three spaces because of formatting errors in the source +ORGANIZATION_PATTERN = r"^ ?(\S.*)" +FORMERLY_PATTERN = r" \((formerly .*)\)" + + +def generate_enterprise_files(file_content): + # We only care about the "Decimal" and "Organization", + # not the contact or email + org_lines = [] + last_updated = "" + end_seen = False + for line in file_content.splitlines(): + decimal_match = re.match(DECIMAL_PATTERN, line) + if decimal_match: + decimal = decimal_match.group(0) + elif re.match(ORGANIZATION_PATTERN, line): + organization = line.strip() + if organization.lower() == "unassigned": + continue + organization = re.sub(FORMERLY_PATTERN, r"\t# \1", organization) + org_lines += [decimal + "\t" + organization] + elif "last updated" in line.lower(): + last_updated = line + elif "end of document" in line.lower(): + end_seen = True + + if not end_seen: + raise Exception('"End of Document" not found. Truncated source file?') + + last_updated_line = "# " + last_updated + "\n\n" + output = ENTERPRISES_HEADER + last_updated_line + "\n".join(org_lines) + "\n" + return output + + +def main(): + parser = argparse.ArgumentParser(description="Create the enterprises.tsv file.") + parser.add_argument('infile', nargs='?') + parser.add_argument('outfile', nargs=1) + parsed_args = parser.parse_args() + + if parsed_args.infile: + with open(parsed_args.infile, encoding='utf-8') as fh: + data = fh.read() + else: + with urllib.request.urlopen(ENTERPRISE_NUMBERS_URL) as f: + if f.status != 200: + raise Exception("request for " + ENTERPRISE_NUMBERS_URL + " failed with result code " + f.status) + data = f.read().decode('utf-8') + + enterprises_content = generate_enterprise_files(data) + with open(parsed_args.outfile[0], encoding='utf-8', mode='w') as fh: + fh.write(enterprises_content) + + +if __name__ == "__main__": + main() |