aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMoshe Kaplan <me@moshekaplan.com>2022-06-28 16:35:54 -0400
committerGerald Combs <gerald@wireshark.org>2022-06-28 21:59:53 +0000
commitcdb83a370faf9215b2fb4ab2404f4150fadaff4e (patch)
tree31a4d2281721944b05424d5a4071e9f2a9121e33
parenta2272362ce182cc92f5e71cf8c53bac8a1bc6b1b (diff)
tools: Port make-sminmpec.pl to make-sminmpec.py
Port make-sminmpec.pl to Python. Now uses an explicit destination path, instead of a hardcoded path relative to the script's location on disk. Ping #18152
-rwxr-xr-xtools/make-sminmpec.pl94
-rwxr-xr-xtools/make-sminmpec.py88
2 files changed, 88 insertions, 94 deletions
diff --git a/tools/make-sminmpec.pl b/tools/make-sminmpec.pl
deleted file mode 100755
index 432d794f1b..0000000000
--- a/tools/make-sminmpec.pl
+++ /dev/null
@@ -1,94 +0,0 @@
-#!/usr/bin/perl -w
-# create the enterprises file from
-# https://www.iana.org/assignments/enterprise-numbers/enterprise-numbers
-#
-# Wireshark - Network traffic analyzer
-# By Gerald Combs <gerald@wireshark.org>
-# Copyright 2004 Gerald Combs
-#
-# SPDX-License-Identifier: GPL-2.0-or-later
-
-use strict;
-use File::Spec;
-
-my ($vol, $script_dir) = File::Spec->splitpath( __FILE__ );
-my $root_dir = File::Spec->catpath($vol, $script_dir, "..");
-chdir($root_dir) || die("Can't find $root_dir");
-
-my $in = shift;
-
-$in = "https://www.iana.org/assignments/enterprise-numbers/enterprise-numbers" unless(defined $in);
-
-my @in_lines;
-my $revision = '2014-04-27';
-
-my $min_entries = 100;
-my $smi_total = 0;
-
-if($in =~ m/^https?:/i) {
- eval "require LWP::UserAgent;";
- die "LWP isn't installed. It is part of the standard Perl module libwww." if $@;
-
- my $agent = LWP::UserAgent->new;
- $agent->env_proxy;
- $agent->agent("Wireshark make-sminmpec.pl/$revision");
-
- warn "starting to fetch $in ...\n";
-
- my $request = HTTP::Request->new(GET => $in);
-
- my $result = $agent->request($request);
-
- if ($result->code eq 200) {
- warn "done fetching $in\n";
- @in_lines = split /\n/, $result->content;
- } else {
- die "request for $in failed with result code:" . $result->code;
- }
-} else {
- open IN, "< $in";
- @in_lines = <IN>;
- close IN;
-}
-
-my $body = '';
-my $code;
-my $name;
-my $last_updated = "(last updated ???)";
-my $end_of_document = 0;
-
-for(@in_lines) {
- chomp;
-
- if (/^(\d+)/) {
- $code = sprintf("%d", $1);
- } elsif (/^ ?(\S.*)/ ) { # up to three spaces because of formatting errors in the source
- $name = $1;
- next if (/^\s*\(?\s*unassigned/i);
- $name =~ s/\s+$//;
- $name =~ s/ \((formerly .*)\)/\t# $1/;
- $body .= "\n$code\t$name";
- } elsif (/\(last updated/i) {
- $last_updated = $_;
- } elsif (/^ *End of Document/) {
- $end_of_document = 1;
- }
-}
-
-die "\"End of Document\" not found. Truncated source file?" unless ($end_of_document);
-
-open OUT, "> enterprises.tsv";
-
-print OUT <<"_SMINMPEC";
-#
-# generated from https://www.iana.org/assignments/enterprise-numbers/enterprise-numbers
-# run "tools/make-sminmpec.pl [infile]" to regenerate
-#
-# The format used here is: <NUMERICAL_ID><SPACE><NAME>
-# Where SPACE can be any sequence of spaces and tabs.
-#
-# $last_updated
-$body
-_SMINMPEC
-
-close OUT;
diff --git a/tools/make-sminmpec.py b/tools/make-sminmpec.py
new file mode 100755
index 0000000000..c6cfaeca00
--- /dev/null
+++ b/tools/make-sminmpec.py
@@ -0,0 +1,88 @@
+#!/usr/bin/env python3
+# create the enterprises.tsv file from
+# https://www.iana.org/assignments/enterprise-numbers/enterprise-numbers
+# or an offline copy
+#
+# Copyright 2022 by Moshe Kaplan
+# Based on make-sminmpec.pl by Gerald Combs
+#
+# Wireshark - Network traffic analyzer
+# By Gerald Combs <gerald@wireshark.org>
+# Copyright 2004 Gerald Combs
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+import argparse
+import re
+import urllib.request
+
+
+ENTERPRISE_NUMBERS_URL = "https://www.iana.org/assignments/enterprise-numbers/enterprise-numbers"
+
+ENTERPRISES_HEADER = """\
+#
+# generated from https://www.iana.org/assignments/enterprise-numbers/enterprise-numbers
+# run "tools/make-sminmpec.py [infile] outfile" to regenerate
+#
+# The format used here is: <NUMERICAL_ID><SPACE><NAME>
+# Where SPACE can be any sequence of spaces and tabs.
+#
+"""
+
+DECIMAL_PATTERN = r"^(\d+)"
+# up to three spaces because of formatting errors in the source
+ORGANIZATION_PATTERN = r"^ ?(\S.*)"
+FORMERLY_PATTERN = r" \((formerly .*)\)"
+
+
+def generate_enterprise_files(file_content):
+ # We only care about the "Decimal" and "Organization",
+ # not the contact or email
+ org_lines = []
+ last_updated = ""
+ end_seen = False
+ for line in file_content.splitlines():
+ decimal_match = re.match(DECIMAL_PATTERN, line)
+ if decimal_match:
+ decimal = decimal_match.group(0)
+ elif re.match(ORGANIZATION_PATTERN, line):
+ organization = line.strip()
+ if organization.lower() == "unassigned":
+ continue
+ organization = re.sub(FORMERLY_PATTERN, r"\t# \1", organization)
+ org_lines += [decimal + "\t" + organization]
+ elif "last updated" in line.lower():
+ last_updated = line
+ elif "end of document" in line.lower():
+ end_seen = True
+
+ if not end_seen:
+ raise Exception('"End of Document" not found. Truncated source file?')
+
+ last_updated_line = "# " + last_updated + "\n\n"
+ output = ENTERPRISES_HEADER + last_updated_line + "\n".join(org_lines) + "\n"
+ return output
+
+
+def main():
+ parser = argparse.ArgumentParser(description="Create the enterprises.tsv file.")
+ parser.add_argument('infile', nargs='?')
+ parser.add_argument('outfile', nargs=1)
+ parsed_args = parser.parse_args()
+
+ if parsed_args.infile:
+ with open(parsed_args.infile, encoding='utf-8') as fh:
+ data = fh.read()
+ else:
+ with urllib.request.urlopen(ENTERPRISE_NUMBERS_URL) as f:
+ if f.status != 200:
+ raise Exception("request for " + ENTERPRISE_NUMBERS_URL + " failed with result code " + f.status)
+ data = f.read().decode('utf-8')
+
+ enterprises_content = generate_enterprise_files(data)
+ with open(parsed_args.outfile[0], encoding='utf-8', mode='w') as fh:
+ fh.write(enterprises_content)
+
+
+if __name__ == "__main__":
+ main()