aboutsummaryrefslogtreecommitdiffstats
path: root/tools/make-services.py
blob: 1782d873648e3e5df95394b5bfeca8bd65aaa8a7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
#!/usr/bin/env python3
#
# Parses the CSV version of the IANA Service Name and Transport Protocol Port Number Registry
# and generates a services(5) file.
#
# Wireshark - Network traffic analyzer
# By Gerald Combs <gerald@wireshark.org>
# Copyright 2013 Gerald Combs
#
# SPDX-License-Identifier: GPL-2.0-or-later

iana_svc_url = 'https://www.iana.org/assignments/service-names-port-numbers/service-names-port-numbers.csv'

__doc__ = '''\
Usage: make-services.py [url]

url defaults to
    %s
''' % (iana_svc_url)

import sys
import getopt
import csv
import re
import collections
import urllib.request, urllib.error, urllib.parse
import codecs

services_file = 'services'

exclude_services = [
    '^spr-itunes',
    '^spl-itunes',
    '^shilp',
    ]

min_source_lines = 14000 # Size was ~ 14800 on 2017-07-20

def parse_port(port_str):

    p = port_str.split('-')
    try:
        if len(p) == 1:
            return tuple([int(p[0])])
        if len(p) == 2:
            return tuple([int(p[0]), int(p[1])])
    except ValueError:
        pass
    return ()

def port_to_str(port):
    if len(port) == 2:
        return str(port[0]) + '-' + str(port[1])
    return str(port[0])

def parse_rows(svc_fd):
    port_reader = csv.reader(svc_fd)
    count = 0

    # Header positions as of 2013-08-06
    headers = next(port_reader)

    try:
        sn_pos = headers.index('Service Name')
    except Exception:
        sn_pos = 0
    try:
        pn_pos = headers.index('Port Number')
    except Exception:
        pn_pos = 1
    try:
        tp_pos = headers.index('Transport Protocol')
    except Exception:
        tp_pos = 2
    try:
        desc_pos = headers.index('Description')
    except Exception:
        desc_pos = 3

    services_map = {}

    for row in port_reader:
        service = row[sn_pos]
        port = parse_port(row[pn_pos])
        proto = row[tp_pos]
        description = row[desc_pos]
        count += 1

        if len(service) < 1 or not port or len(proto) < 1:
            continue

        if re.search('|'.join(exclude_services), service):
            continue

        # max 15 chars
        service = service[:15].rstrip()

        # replace blanks (for some non-standard long names)
        service = service.replace(" ", "-")

        description = description.replace("\n", "")
        description = re.sub("IANA assigned this well-formed service .+$", "", description)
        description = re.sub("  +", " ", description)
        description = description.strip()
        if description == service or description == service.replace("-", " "):
            description = None

        if not port in services_map:
            services_map[port] = collections.OrderedDict()

        # Remove some duplicates (first entry wins)
        proto_exists = False
        for k in services_map[port].keys():
            if proto in services_map[port][k]:
                proto_exists = True
                break
        if proto_exists:
            continue

        if not service in services_map[port]:
            services_map[port][service] = [description]
        services_map[port][service].append(proto)

    if count < min_source_lines:
        exit_msg('Not enough parsed data')

    return services_map

def write_body(d, f):
    keys = list(d.keys())
    keys.sort()

    for port in keys:
        for serv in d[port].keys():
            sep = "\t" * (1 + abs((15 - len(serv)) // 8))
            port_str = port_to_str(port) + "/" + "/".join(d[port][serv][1:])
            line = serv + sep + port_str
            description = d[port][serv][0]
            if description:
                sep = "\t"
                if len(port_str) < 8:
                    sep *= 2
                line += sep + "# " + description
            line += "\n"
            f.write(line)

def exit_msg(msg=None, status=1):
    if msg is not None:
        sys.stderr.write(msg + '\n\n')
    sys.stderr.write(__doc__ + '\n')
    sys.exit(status)

def main(argv):
    if sys.version_info[0] < 3:
        print("This requires Python 3")
        sys.exit(2)

    try:
        opts, _ = getopt.getopt(argv, "h", ["help"])
    except getopt.GetoptError:
        exit_msg()
    for opt, _ in opts:
        if opt in ("-h", "--help"):
            exit_msg(None, 0)

    if (len(argv) > 0):
        svc_url = argv[0]
    else:
        svc_url = iana_svc_url

    try:
        if not svc_url.startswith('http'):
            svc_fd = open(svc_url)
        else:
            req = urllib.request.urlopen(svc_url)
            svc_fd = codecs.getreader('utf8')(req)
    except Exception:
        exit_msg('Error opening ' + svc_url)

    body = parse_rows(svc_fd)

    out = open(services_file, 'w')
    out.write('''\
# This is a local copy of the IANA port-numbers file.
#
# Wireshark uses it to resolve port numbers into human readable
# service names, e.g. TCP port 80 -> http.
#
# It is subject to copyright and being used with IANA's permission:
# https://www.wireshark.org/lists/wireshark-dev/200708/msg00160.html
#
# The original file can be found at:
# %s
#
# The format is the same as that used for services(5). It is allowed to merge
# identical protocols, for example:
#   foo 64/tcp
#   foo 64/udp
# becomes
#   foo 64/tcp/udp
#

''' % (iana_svc_url))

    write_body(body, out)

    out.close()

if __name__ == "__main__":
    sys.exit(main(sys.argv[1:]))