From 66719c9b6a17f15848f4e7151b0f6cbf9d37f9ed Mon Sep 17 00:00:00 2001 From: Hadriel Kaplan Date: Thu, 20 Mar 2014 17:56:21 -0400 Subject: Add way for Lua file reader to save state per file read/write ops Lua can create a file reader/writer, to open new capture file formats or write to new ones. To save local state, it can save things in Lua itself; but since there can be multiple open files at the same time (for example during a reload), the Lua script won't know for which file and state its read/write functions are being invoked for. To remedy this, and also provide a convenient way to store such state, this commit adds the ability for a Lua script to store a Lua table in the wtap/wtap_dumper's priv data member, just like C-code-based reader/writers do. Change-Id: Ifc9e0d5f0379accee56f2a04b6080238670fec52 Reviewed-on: https://code.wireshark.org/review/766 Reviewed-by: Hadriel Kaplan Reviewed-by: Anders Broman --- test/lua/acme_file.lua | 305 ++++++++++++++++++------------- test/lua/pcap_file.lua | 482 +++++++++++++++++++++++++++++++++++-------------- 2 files changed, 526 insertions(+), 261 deletions(-) (limited to 'test/lua') diff --git a/test/lua/acme_file.lua b/test/lua/acme_file.lua index 29a325e1d2..1f87fc640b 100644 --- a/test/lua/acme_file.lua +++ b/test/lua/acme_file.lua @@ -200,57 +200,84 @@ end local ALWAYS_UDP = true -local fh = FileHandler.new("Oracle Acme Packet logs", "acme", "A file reader for Oracle Acme Packet message logs such as sipmsg.log","rs") - - --- there are certain things we have to create fake state/data for, because they don't exist in the log file --- for example to create IP headers we have to cerate fake identification field values, and to create --- timestamps we have to guess the year (and in some cases month/day as well), and for TCP we have --- to create fake conneciton info, such as sequence numbers. We can't simply have a global static variable holding --- such things, because Wireshark reads the file sequentially at first, but then calls seek_read for random --- packets again and we don't want to re-create the fake info again because it will be wrong. So we need to --- create it for each packet and remember what we created for each packet, so that seek_read gets the same values. --- We could store the variables in a big table, keyed by the specific header info line for each one; but instead we'll --- key it off of the file position number, since read() sets it for Wireshark and seek_read() gets it from Wireshark. --- So we'll have a set of global statics used during read(), but the actual per-packet values will be stored in --- a table indexed/keyed by the file position number. A separate table holds TCP peer connection info as described later. - --- the following local table holds global (to this file) static variables that need to be reset every new file read -local statics = { ["ip_ident"] = 0, ["tyear"] = 0, ["tmonth"] = 0, ["tmin"] = 0, ["tmin"] = 0, ["tsec"] = 0, ["tmilli"] = 0, ["nstime"] = NSTime() } - --- the following table holds per-packet info --- the key index will be a number - the file position - but it won't be an array type table (too sparse). --- Each packets entry is a table holding the "static" variables for that packet; this sub-table will be --- an array style instead of hashmap, to reduce size/performance --- This table needs to be cleared whenever the file is closed/opened. -local packets = {} --- the indeces for the variable sub-tables +local fh = FileHandler.new("Oracle Acme Packet logs", "acme", + "A file reader for Oracle Acme Packet message logs such as sipmsg.log","rs") + + +-- There are certain things we have to create fake state/data for, because they +-- don't exist in the log file for example to create IP headers we have to create +-- fake identification field values, and to create timestamps we have to guess the +-- year (and in some cases month/day as well), and for TCP we have to create fake +-- conneciton info, such as sequence numbers. We can't simply have a global static +-- variable holding such things, because Wireshark reads the file sequentially at +-- first, but then calls seek_read for random packets again and we don't want to +-- re-create the fake info again because it will be wrong. So we need to create it +-- for each packet and remember what we created for each packet, so that seek_read +-- gets the same values. We could store the variables in a big table, keyed by the +-- specific header info line for each one; but instead we'll key it off of the file +-- position number, since read() sets it for Wireshark and seek_read() gets it from +-- Wireshark. So we'll have a set of global statics used during read(), but the +-- actual per-packet values will be stored in a table indexed/keyed by the file +-- position number. A separate table holds TCP peer connection info as described +-- later. + +-- I said above that this state is "global", but really it can't be global to this +-- whole script file, because more than one file can be opened for reading at the +-- same time. For exampel if the user presses the reload button, the capture file +-- will be opened for reading before the previous (same) one is closed. So we have +-- to store state per-file. The good news is Wireshark gives us a convenient way to +-- do that, using the CaptureInfo.private_table attribute/member. We can save a Lua +-- table with whatever contents we want, to this private_table member, and get it +-- later during the other read/seek_read/cose function calls. + +-- So to store this per-file state, we're going to use Lua class objects. They're +-- just Lua tables that have functions and metafunctions and can be treated like +-- objects in terms of syntax/behavior. + +local State = {} +local State_mt = { __index = State } + +function State.new() + local new_class = { -- the new instance + -- stuff we need to keep track of to cerate fake info + ip_ident = 0, + tyear = 0, + tmonth = 0, + tmin = 0, + tsec = 0, + tmilli = 0, + nstime = NSTime(), + -- the following table holds per-packet info + -- the key index will be a number - the file position - but it won't be an array type table (too sparse). + -- Each packet's entry is a table holding the "static" variables for that packet; this sub-table will be + -- an array style instead of hashmap, to reduce size/performance + -- This table needs to be cleared whenever the file is closed/opened. + packets = {}, + + -- the following local table holds TCP peer "connection" info, which is basically + -- TCP control block (TCB) type information; this is needed to create and keep track + -- of fake TCP sockets/headers for messages that went over TCP, for example for fake + -- sequence number info. + -- The key index for this is the local+remote ip:port strings concatenated. + -- The value is a sub-table, array style, holding the most recent sequence numbers. + -- This whole table needs to be cleared whenever the file is closed/opened. + tcb = {}, + + } + setmetatable( new_class, State_mt ) -- all instances share the same metatable + return new_class +end + +-- the indeces for the State.packets{} variable sub-tables local IP_IDENT = 1 local TTIME = 2 local LOCAL_SEQ = 3 local REMOTE_SEQ = 4 --- the following local table holds TCP peer "connection" info, which is basically --- TCP control block (TCB) type information; this is needed to create and keep track --- of fake TCP sockets/headers for messages that went over TCP, for example for fake --- sequence number info. --- The key index for this is the local+remote ip:port strings concatenated. --- The value is a sub-table, array style, holding the most recent sequence numbers. --- This whole table needs to be cleared whenever the file is closed/opened. -local tcb = {} --- the indeces for the sub-tables +-- the indeces for the State.tcb{} sub-tables local TLOCAL_SEQ = 1 local TREMOTE_SEQ = 2 -local function reset_state() - tcb = {} - packets = {} - for name, v in pairs(statics) do - statics[name] = 0 - end - statics.nstime = NSTime() -end - -- helper functions local char = string.char local floor = math.floor @@ -335,7 +362,7 @@ local function get_timezone() end local timezone = get_timezone() -local function get_timestamp(line, file_position, seeking) +function State:get_timestamp(line, file_position, seeking) local i, line_pos, month, day, hour, min, sec, milli = line:find(header_time_pattern) if not month then return @@ -343,7 +370,7 @@ local function get_timestamp(line, file_position, seeking) if seeking then -- we've seen this packet before, just go get the saved timestamp - sec = packets[file_position][TTIME] + sec = self.packets[file_position][TTIME] if not sec then dprint("failed to get saved timestamp for packet at position:", file_position) return @@ -377,7 +404,7 @@ local function get_timestamp(line, file_position, seeking) -- so we're going to check the current system month, and if it's less than the log file's then we'll -- assume the log file started last year; if the system month is larger or equal, then we'll assume the log -- file is of this year. We only do this checking once per file. - if statics.tyear == 0 then + if self.tyear == 0 then local curr_year, curr_month = tonumber(os.date("%Y")), tonumber(os.date("%m")) if curr_month < month then -- use last year @@ -385,16 +412,16 @@ local function get_timestamp(line, file_position, seeking) curr_year = curr_year - 1 end end - statics.tyear = curr_year + self.tyear = curr_year end -- if this message's month is less than previous message's, then year wrapped - if month < statics.tmonth then - statics.tyear = statics.tyear + 1 + if month < self.tmonth then + self.tyear = self.tyear + 1 end - statics.tmonth = month + self.tmonth = month - local timet = os.time({ ["year"] = statics.tyear, ["month"] = month, ["day"] = day, ["hour"] = hour, ["min"] = min, ["sec"] = sec }) + local timet = os.time({ ["year"] = self.tyear, ["month"] = month, ["day"] = day, ["hour"] = hour, ["min"] = min, ["sec"] = sec }) if not timet then dprint("timestamp conversion failed") end @@ -402,25 +429,25 @@ local function get_timestamp(line, file_position, seeking) timet = timet + timezone -- make an NSTime - statics.nstime = NSTime(timet, milli * 1000000) - packets[file_position][TTIME] = statics.nstime + self.nstime = NSTime(timet, milli * 1000000) + self.packets[file_position][TTIME] = self.nstime timet = timet + (milli/1000) dprint2("found time of ", os.date("%c",timet), " with value=",timet) - return statics.nstime, line_pos + return self.nstime, line_pos end -- get_tail_time() gets a fictitous timestamp starting from 19:00:00 on Dec 31, 1969, and incrementing based -- on the minutes/secs/millisecs seen (i.e., if the minute wrapped then hour increases by 1, etc.). -- this is needed for tail'ed log files, since they don't show month/day/hour -local function get_tail_time(line, file_position, seeking) +function State:get_tail_time(line, file_position, seeking) local i, line_pos, min, sec, milli = line:find(header_tail_time_pattern) if not min then return end if seeking then -- we've seen this packet before, just go get the saved timestamp - sec = packets[file_position][TTIME] + sec = self.packets[file_position][TTIME] if not sec then dprint("failed to get saved timestamp for packet at position:", file_position) return @@ -438,7 +465,7 @@ local function get_tail_time(line, file_position, seeking) end -- get difference in time - local tmin, tsec, tmilli, nstime = statics.tmin, statics.tsec, statics.tmilli, statics.nstime + local tmin, tsec, tmilli, nstime = self.tmin, self.tsec, self.tmilli, self.nstime local ttime = nstime.secs -- min, sec, milli are what the log says this tail'ed packet is @@ -452,11 +479,11 @@ local function get_tail_time(line, file_position, seeking) else ttime = ttime + (((min * 60) + sec) - ((tmin * 60) + tsec)) end - statics.tmin, statics.tsec, statics.tmilli = min, sec, milli - statics.nstime = NSTime(ttime, milli * 1000000) - packets[file_position][TTIME] = statics.nstime + self.tmin, self.tsec, self.tmilli = min, sec, milli + self.nstime = NSTime(ttime, milli * 1000000) + self.packets[file_position][TTIME] = self.nstime - return statics.nstime, line_pos + return self.nstime, line_pos end local hexbin = { @@ -606,8 +633,9 @@ local TCP = 20 local Packet = {} local Packet_mt = { __index = Packet } -function Packet:new(timestamp, direction, source_ip, source_port, dest_ip, dest_port, ptype, ttype, file_position) +function Packet.new(state, timestamp, direction, source_ip, source_port, dest_ip, dest_port, ptype, ttype, file_position) local new_class = { -- the new instance + ["state"] = state, ["timestamp"] = timestamp, ["direction"] = direction, ["source_ip"] = source_ip, @@ -693,6 +721,7 @@ function Packet:get_ascii_data(file, line, bufftbl, index, only_newline) bufftbl[index-1] = nil end + dprint2("Packet:get_ascii_data() returning", bufflen) return bufflen end @@ -703,8 +732,8 @@ local RawPacket = {} local RawPacket_mt = { __index = RawPacket } setmetatable( RawPacket, Packet_mt ) -- make RawPacket inherit from Packet -function RawPacket:new(...) - local new_class = Packet:new(...) -- the new instance +function RawPacket.new(...) + local new_class = Packet.new(...) -- the new instance setmetatable( new_class, RawPacket_mt ) -- all instances share the same metatable return new_class end @@ -779,8 +808,8 @@ local DataPacket = {} local DataPacket_mt = { __index = DataPacket } setmetatable( DataPacket, Packet_mt ) -- make Dataacket inherit from Packet -function DataPacket:new(...) - local new_class = Packet:new(...) -- the new instance +function DataPacket.new(...) + local new_class = Packet.new(...) -- the new instance setmetatable( new_class, DataPacket_mt ) -- all instances share the same metatable return new_class end @@ -796,16 +825,16 @@ function DataPacket:build_ipv4_hdr(bufflen, proto, seeking) -- figure out the ip identification value local ip_ident if seeking then - ip_ident = packets[self.file_position][IP_IDENT] + ip_ident = self.state.packets[self.file_position][IP_IDENT] else -- increment ident value - statics.ip_ident = statics.ip_ident + 1 - if statics.ip_ident == 65536 then - statics.ip_ident = 1 + self.state.ip_ident = self.state.ip_ident + 1 + if self.state.ip_ident == 65536 then + self.state.ip_ident = 1 end - ip_ident = statics.ip_ident + ip_ident = self.state.ip_ident -- save it for future seeking - packets[self.file_position][IP_IDENT] = ip_ident + self.state.packets[self.file_position][IP_IDENT] = ip_ident end -- use a table to concatenate as it's slightly faster that way @@ -907,45 +936,45 @@ function DataPacket:build_tcp_hdr(bufflen, bufftbl, seeking) local local_seq, remote_seq if seeking then - local_seq = packets[self.file_position][LOCAL_SEQ] - remote_seq = packets[self.file_position][REMOTE_SEQ] + local_seq = self.state.packets[self.file_position][LOCAL_SEQ] + remote_seq = self.state.packets[self.file_position][REMOTE_SEQ] else -- find socket/tcb info for this "stream", create if not found - if not tcb[self.tcbkey] then + if not self.state.tcb[self.tcbkey] then -- create them - tcb[self.tcbkey] = {} + self.state.tcb[self.tcbkey] = {} local_seq = 1 remote_seq = 1 - packets[self.file_position][LOCAL_SEQ] = 1 - packets[self.file_position][REMOTE_SEQ] = 1 + self.state.packets[self.file_position][LOCAL_SEQ] = 1 + self.state.packets[self.file_position][REMOTE_SEQ] = 1 -- set tcb to next sequence numbers, so that the correct "side" -- acknowledges receiving these bytes if self.direction == SENT then -- this packet is being sent, so local sequence increases next time - tcb[self.tcbkey][TLOCAL_SEQ] = bufflen+1 - tcb[self.tcbkey][TREMOTE_SEQ] = 1 + self.state.tcb[self.tcbkey][TLOCAL_SEQ] = bufflen+1 + self.state.tcb[self.tcbkey][TREMOTE_SEQ] = 1 else -- this packet is being received, so remote sequence increases next time -- and local side will acknowldge it next time - tcb[self.tcbkey][TLOCAL_SEQ] = 1 - tcb[self.tcbkey][TREMOTE_SEQ] = bufflen+1 + self.state.tcb[self.tcbkey][TLOCAL_SEQ] = 1 + self.state.tcb[self.tcbkey][TREMOTE_SEQ] = bufflen+1 end else -- stream already exists, so send the current tcb seqs and update for next time if self.direction == SENT then -- this packet is being sent, so local sequence increases next time - local_seq = tcb[self.tcbkey][TLOCAL_SEQ] - remote_seq = tcb[self.tcbkey][TREMOTE_SEQ] - tcb[self.tcbkey][TLOCAL_SEQ] = local_seq + bufflen + local_seq = self.state.tcb[self.tcbkey][TLOCAL_SEQ] + remote_seq = self.state.tcb[self.tcbkey][TREMOTE_SEQ] + self.state.tcb[self.tcbkey][TLOCAL_SEQ] = local_seq + bufflen else -- this packet is being received, so the "local" seq number of the packet is the remote's seq really - local_seq = tcb[self.tcbkey][TREMOTE_SEQ] - remote_seq = tcb[self.tcbkey][TLOCAL_SEQ] + local_seq = self.state.tcb[self.tcbkey][TREMOTE_SEQ] + remote_seq = self.state.tcb[self.tcbkey][TLOCAL_SEQ] -- and remote seq needs to increase next time (remember local_seq is TREMOTE_SEQ) - tcb[self.tcbkey][TREMOTE_SEQ] = local_seq + bufflen + self.state.tcb[self.tcbkey][TREMOTE_SEQ] = local_seq + bufflen end - packets[self.file_position][LOCAL_SEQ] = local_seq - packets[self.file_position][REMOTE_SEQ] = remote_seq + self.state.packets[self.file_position][LOCAL_SEQ] = local_seq + self.state.packets[self.file_position][REMOTE_SEQ] = remote_seq end end @@ -966,7 +995,7 @@ function DataPacket:build_tcp_hdr(bufflen, bufftbl, seeking) end function DataPacket:build_packet(bufftbl, bufflen, seeking) - dprint2("DataPacket:build_packet() called") + dprint2("DataPacket:build_packet() called with ptype=",self.ptype) if self.ptype == IPv4 then if self.ttype == UDP then bufftbl[2] = self:build_udp_hdr(bufflen) @@ -1021,8 +1050,8 @@ local BinPacket = {} local BinPacket_mt = { __index = BinPacket } setmetatable( BinPacket, DataPacket_mt ) -- make BinPacket inherit from DataPacket -function BinPacket:new(...) - local new_class = DataPacket:new(...) -- the new instance +function BinPacket.new(...) + local new_class = DataPacket.new(...) -- the new instance setmetatable( new_class, BinPacket_mt ) -- all instances share the same metatable return new_class end @@ -1067,8 +1096,8 @@ local DnsPacket = {} local DnsPacket_mt = { __index = DnsPacket } setmetatable( DnsPacket, BinPacket_mt ) -- make DnsPacket inherit from BinPacket -function DnsPacket:new(...) - local new_class = BinPacket:new(...) -- the new instance +function DnsPacket.new(...) + local new_class = BinPacket.new(...) -- the new instance setmetatable( new_class, DnsPacket_mt ) -- all instances share the same metatable return new_class end @@ -1099,8 +1128,8 @@ local AsciiPacket = {} local AsciiPacket_mt = { __index = AsciiPacket } setmetatable( AsciiPacket, DataPacket_mt ) -- make AsciiPacket inherit from DataPacket -function AsciiPacket:new(...) - local new_class = DataPacket:new(...) -- the new instance +function AsciiPacket.new(...) + local new_class = DataPacket.new(...) -- the new instance setmetatable( new_class, AsciiPacket_mt ) -- all instances share the same metatable return new_class end @@ -1148,24 +1177,24 @@ end -- this is from a tail'ed log output: -- 52:22.434 On [0:0]205.152.56.211:5060 received from 205.152.56.75:5060 local loopback_pattern = "^127%.0%.0%.%d+$" -local function parse_header(file, line, file_position, seeking) +local function parse_header(state, file, line, file_position, seeking) if seeking then -- verify we've seen this packet before - if not packets[file_position] then + if not state.packets[file_position] then dprint("parse_header: packet at file position ", file_position, " not saved previously") return end else -- first time through, create sub-table for the packet - packets[file_position] = {} + state.packets[file_position] = {} end -- get time info, and line match ending position - local timestamp, line_pos = get_timestamp(line, file_position, seeking) + local timestamp, line_pos = state:get_timestamp(line, file_position, seeking) if not timestamp then -- see if it's a tail'ed log instead - timestamp, line_pos = get_tail_time(line, file_position, seeking) + timestamp, line_pos = state:get_tail_time(line, file_position, seeking) end if not timestamp then @@ -1233,7 +1262,11 @@ local function parse_header(file, line, file_position, seeking) packet_class = get_packet_class(line) file:seek("set", position) -- go back - local packet = packet_class:new(timestamp, direction, source_ip, source_port, dest_ip, dest_port, ptype, ttype, file_position) + dprint2("parse_header calling packet_class.new with:", + tostring(timestamp), direction, source_ip, source_port, + dest_ip, dest_port, ptype, ttype, file_position) + + local packet = packet_class.new(state, timestamp, direction, source_ip, source_port, dest_ip, dest_port, ptype, ttype, file_position) if not packet then dprint("parse_header: parser failed to create Packet object") end @@ -1251,40 +1284,61 @@ end -- file handling functions for Wireshark to use -- The read_open is called by Wireshark once per file, to see if the file is this reader's type. +-- It passes in (1) a File and (2) CaptureInfo object to this function -- Since there is no exact magic sequence to search for, we have to use heuristics to guess if the file -- is our type or not, which we do by parsing a message header. -- Since Wireshark uses the file cursor position for future reading of this file, we also have to seek back to the beginning -- so that our normal read() function works correctly. local function read_open(file, capture) + dprint2("read_open called") -- save current position to return later local position = file:seek() + local line = file:read() if not line then return false end + dprint2("read_open: got this line begin:\n'", line, "'") line, position = skip_ahead(file, line, position) if not line then return false end + dprint2("read_open: got this line after skip:\n'", line, "', with position=", position) - if parse_header(file, line, position) then + local state = State.new() + + if parse_header(state, file, line, position) then + dprint2("read_open success") + file:seek("set",position) + capture.time_precision = wtap_filetypes.TSPREC_MSEC -- for millisecond precision capture.encap = wtap.RAW_IP -- whole file is raw IP format capture.snapshot_length = 0 -- unknown snaplen capture.comment = "Oracle Acme Packet SBC message log" capture.os = "VxWorks or Linux" capture.hardware = "Oracle Acme Packet SBC" - -- reset static variables - reset_state() + + -- reset state variables + capture.private_table = State.new() + + dprint2("read_open returning true") return true end + dprint2("read_open returning false") return false end ---------------------------------------- -- this is used by both read() and seek_read() -local function read_common(funcname, file, frame, position, seeking) +local function read_common(funcname, file, capture, frame, position, seeking) + dprint2(funcname, "read_common called") + local state = capture.private_table + + if not state then + dprint(funcname, "error getting capture state") + return false + end local line = file:read() if not line then @@ -1302,7 +1356,7 @@ local function read_common(funcname, file, frame, position, seeking) end dprint2(funcname, ": parsing line='", line, "'") - local phdr = parse_header(file, line, position, seeking) + local phdr = parse_header(state, file, line, position, seeking) if not phdr then dprint(funcname, "failed to parse header") return false @@ -1317,19 +1371,22 @@ local function read_common(funcname, file, frame, position, seeking) dprint(funcname, "failed to set Wireshark packet header info") return end + + dprint2(funcname, "read_common returning position") return position end ---------------------------------------- -- Wireshark/tshark calls read() for each frame/record in the file --- It passes in a File object and FrameInfo object to this function +-- It passes in (1) a File, (2) CaptureInfo, and (3) a FrameInfo object to this function -- It expects in return the file offset position the record starts at, -- or nil/false if there's an error or end-of-file is reached. -- The offset position is used later: wireshark remembers it and gives -- it to seek_read() at various random times -local function read(file, frame) +local function read(file, capture, frame) + dprint2("read called") local position = file:seek() - position = read_common("read", file, frame, position) + position = read_common("read", file, capture, frame, position) if not position then if file:read(0) ~= nil then dprint("read failed to call read_common") @@ -1343,11 +1400,12 @@ end ---------------------------------------- -- Wireshark/tshark calls seek_read() for each frame/record in the file, at random times --- It passes in to this function a File object, FrameInfo object, and the offset position number +-- It passes in (1) File, (2) CaptureInfo, (3) FrameInfo, and (4) the offset position number -- It expects in return true for successful parsing, or nil/false if there's an error. -local function seek_read(file, frame, offset) +local function seek_read(file, capture, frame, offset) + dprint2("seek_read called") file:seek("set",offset) - if not read_common("seek_read", file, frame, offset, true) then + if not read_common("seek_read", file, capture, frame, offset, true) then dprint("seek_read failed to call read_common") return false end @@ -1356,19 +1414,24 @@ end ---------------------------------------- -- Wireshark/tshark calls read_close() when it's closing the file completely +-- It passes in (1) a File and (2) CaptureInfo object to this function -- this is a good opportunity to clean up any state you may have created during --- file reading. (in our case there *is* state to reset) -local function read_close(file) - reset_state() +-- file reading. +-- In our case there *is* state to reset, but we only saved it in +-- the capture.private_table, so Wireshark will clean it up for us. +local function read_close(file, capture) + dprint2("read_close called") return true end ---------------------------------------- -- An often unused function, Wireshark calls this when the sequential walk-through is over --- (i.e., no more calls to read(), only to seek_read()). So we'll clear the TCB table --- here to free up memory; this is undoubtedly unecessary, but good practice. -local function seq_read_close(file) - tcb = {} +-- It passes in (1) a File and (2) CaptureInfo object to this function +-- (i.e., no more calls to read(), only to seek_read()). +-- In our case there *is* some state to reset, but we only saved it in +-- the capture.private_table, so Wireshark will clean it up for us. +local function seq_read_close(file, capture) + dprint2("seq_read_close called") return true end diff --git a/test/lua/pcap_file.lua b/test/lua/pcap_file.lua index ca684e318b..87e8a57cee 100644 --- a/test/lua/pcap_file.lua +++ b/test/lua/pcap_file.lua @@ -12,6 +12,18 @@ --]] -------------------------------------------------------------------------------- +-- do not modify this table +local debug = { + DISABLED = 0, + LEVEL_1 = 1, + LEVEL_2 = 2 +} + +-- set this DEBUG to debug.LEVEL_1 to enable printing debug info +-- set it to debug.LEVEL_2 to enable really verbose printing +local DEBUG = debug.LEVEL_1 + + local wireshark_name = "Wireshark" if not GUI_ENABLED then wireshark_name = "Tshark" @@ -28,35 +40,51 @@ end -- technically we should be able to do this with 'require', but Struct is a built-in assert(Struct.unpack, wireshark_name .. " does not have the Struct library!") --- debug printer, set DEBUG to true to enable printing debug info --- set DEBUG2 to true to enable really verbose printing -local DEBUG, DEBUG2 = false, false +-------------------------------------------------------------------------------- +-- early definitions +-- throughout most of this file I try to pre-declare things to help ease +-- reading it and following the logic flow, but some things just have to be done +-- before others, so this sections has such things that cannot be avoided +-------------------------------------------------------------------------------- + +-- first some variable declarations for functions we'll define later +local parse_file_header, parse_rec_header, read_common + +-- these will be set inside of parse_file_header(), but we're declaring them up here +local default_settings = +{ + debug = DEBUG, + corrected_magic = 0xa1b2c3d4, + version_major = 2, + version_minor = 4, + timezone = 0, + sigfigs = 0, + read_snaplen = 0, -- the snaplen we read from file + snaplen = 0, -- the snaplen we use (limited by WTAP_MAX_PACKET_SIZE) + linktype = -1, -- the raw linktype number in the file header + wtap_type = wtap_encaps.UNKNOWN, -- the mapped internal wtap number based on linktype + endianess = ENC_BIG_ENDIAN, + time_precision = wtap_filetypes.TSPREC_USEC, + rec_hdr_len = 16, -- default size of record header + rec_hdr_patt = "I4 I4 I4 I4", -- pattern for Struct to use + num_rec_fields = 4, -- number of vars in pattern +} local dprint = function() end local dprint2 = function() end -if DEBUG or DEBUG2 then - dprint = function(...) - print(table.concat({"Lua:", ...}," ")) - end +local function reset_debug() + if default_settings.debug > debug.DISABLED then + dprint = function(...) + print(table.concat({"Lua:", ...}," ")) + end - if DEBUG2 then - dprint2 = dprint + if default_settings.debug > debug.LEVEL_1 then + dprint2 = dprint + end end end - ----------------------------------------- --- to make it easier to read this file, we'll define some of the functions --- later on, but we need them earlier, so we "declare" them here -local parse_file_header, parse_rec_header, read_common - - --- these will be set inside of parse_file_header(), but we're declaring them up here -local VERSION_MAJOR = 2 -local VERSION_MINOR = 4 -local TIMEZONE = 0 -local SIGFIGS = 0 -local SNAPLEN = 0 -local ENCAP_TYPE = wtap.UNKNOWN +-- call it now +reset_debug() -------------------------------------------------------------------------------- -- file reader handling functions for Wireshark to use @@ -68,18 +96,18 @@ local ENCAP_TYPE = wtap.UNKNOWN -- It expects in return either nil or false to mean it's not our file type, or true if it is -- In our case what this means is we figure out if the file has the magic header, and get the -- endianess of the file, and the encapsulation type of its frames/records --- Since Wireshark uses the file cursor position for future reading of this file, we also have to seek back to the beginning --- so that our normal read() function works correctly. local function read_open(file, capture) dprint2("read_open() called") - -- save current position to return later - local position = file:seek() + local file_settings = parse_file_header(file) - if parse_file_header(file) then + if file_settings then dprint2("read_open: success, file is for us") + -- save our state + capture.private_table = file_settings + -- if the file is for us, we MUST set the file position cursor to -- where we want the first call to read() function to get it the next time -- for example if we checked a few records to be sure it's or type @@ -89,9 +117,9 @@ local function read_open(file, capture) --file:seek("set",position) -- these we can also set per record later during read operations - capture.time_precision = wtap_filetypes.TSPREC_USEC -- for microsecond precision - capture.encap = ENCAP_TYPE -- this was updated by parse_file_header() - capture.snapshot_length = SNAPLEN -- also updated by parse_file_header() + capture.time_precision = file_settings.time_precision + capture.encap = file_settings.wtap_type + capture.snapshot_length = file_settings.snaplen return true end @@ -99,26 +127,24 @@ local function read_open(file, capture) dprint2("read_open: file not for us") -- if it's not for us, wireshark will reset the file position itself - -- but we might as well do it too, in case that behavior ever changes - file:seek("set",position) return false end ---------------------------------------- -- Wireshark/tshark calls read() for each frame/record in the file --- It passes in a File object and FrameInfo object to this function +-- It passes in (1) a File, (2) CaptureInfo, and (3) FrameInfo object to this function -- It expects in return the file offset position the record starts at, -- or nil/false if there's an error or end-of-file is reached. -- The offset position is used later: wireshark remembers it and gives -- it to seek_read() at various random times -local function read(file, frame) +local function read(file, capture, frame) dprint2("read() called") -- call our common reader function local position = file:seek() - if not read_common("read", file, frame) then + if not read_common("read", file, capture, frame) then -- this isnt' actually an error, because it might just mean we reached end-of-file -- so let's test for that (read(0) is a special case in Lua, see Lua docs) if file:read(0) ~= nil then @@ -137,15 +163,15 @@ end ---------------------------------------- -- Wireshark/tshark calls seek_read() for each frame/record in the file, at random times --- It passes in to this function a File object, FrameInfo object, and the offset position number +-- It passes in (1) a File, (2) CaptureInfo, (3) FrameInfo object, and the offset position number -- It expects in return true for successful parsing, or nil/false if there's an error. -local function seek_read(file, frame, offset) +local function seek_read(file, capture, frame, offset) dprint2("seek_read() called") -- first move to the right position in the file file:seek("set",offset) - if not read_common("seek_read", file, frame) then + if not read_common("seek_read", file, capture, frame) then dprint("seek_read: failed to call read_common") return false end @@ -155,26 +181,23 @@ end ---------------------------------------- -- Wireshark/tshark calls read_close() when it's closing the file completely +-- It passes in (1) a File and (2) CaptureInfo object to this function -- this is a good opportunity to clean up any state you may have created during -- file reading. (in our case there's no real state) -local function read_close(file) +local function read_close(file, capture) dprint2("read_close() called") - -- we don't really have to reset these, but just to show what you might do in this function... - VERSION_MAJOR = 2 - VERSION_MINOR = 4 - TIMEZONE = 0 - SIGFIGS = 0 - SNAPLEN = 0 - ENCAP_TYPE = wtap.UNKNOWN + -- we don't really have to reset anything, because we used the + -- capture.private_table and wireshark clears it for us after this function return true end ---------------------------------------- -- An often unused function, Wireshark calls this when the sequential walk-through is over -- (i.e., no more calls to read(), only to seek_read()). +-- It passes in (1) a File and (2) CaptureInfo object to this function -- This gives you a chance to clean up any state you used during read() calls, but remember -- that there will be calls to seek_read() after this (in Wireshark, though not Tshark) -local function seq_read_close(file) +local function seq_read_close(file, capture) dprint2("First pass of read() calls are over, but there may be seek_read() calls after this") return true end @@ -216,6 +239,7 @@ local pcap2wtap = { [9] = wtap_encaps.PPP, [101] = wtap_encaps.RAW_IP, [105] = wtap_encaps.IEEE_802_11, + [127] = wtap_encaps.IEEE_802_11_RADIOTAP, [140] = wtap_encaps.MTP2, [141] = wtap_encaps.MTP3, [143] = wtap_encaps.DOCSIS, @@ -253,32 +277,136 @@ local function wtap2pcap(encap) end ---------------------------------------- --- the pcap magic field: 0xA1B2C3D4, of both endianess -local MAGIC = 0xa1b2c3d4 -local SWAPPED_MAGIC = 0xd4c3b2a1 - -- here are the "structs" we're going to parse, of the various records in a pcap file -- these pattern string gets used in calls to Struct.unpack() -- -- we will prepend a '<' or '>' later, once we figure out what endian-ess the files are in -- +-- this is a constant for minimum we need to read before we figure out the filetype +local FILE_HDR_LEN = 24 -- a pcap file header struct -- this is: magic, version_major, version_minor, timezone, sigfigs, snaplen, encap type -local FILE_HEADER = "I4 I2 I2 i4 I4 I4 I4" -local FILE_HDR_LEN = Struct.size(FILE_HEADER) - --- a pcap record header struct --- this is: time_sec, time_usec, capture_len, original_len -local REC_HEADER = "I4 I4 I4 I4" -local REC_HDR_LEN = Struct.size(REC_HEADER) -local NUM_REC_FIELDS = 4 +local FILE_HEADER_PATT = "I4 I2 I2 i4 I4 I4 I4" +-- it's too bad Struct doesn't have a way to get the number of vars the pattern holds +-- another thing to add to my to-do list? +local NUM_HDR_FIELDS = 7 -- these will hold the '<'/'>' prepended version of above -local file_header, rec_header +--local file_header, rec_header -- snaplen/caplen can't be bigger than this local WTAP_MAX_PACKET_SIZE = 65535 +---------------------------------------- +-- different pcap file types have different magic values +-- we need to know various things about them for various functions +-- in this script, so this table holds all the info +-- +-- See default_settings table above for the defaults used if this table +-- doesn't override them. +-- +-- Arguably, these magic types represent different "Protocols" to dissect later, +-- but this script treats them all as "pcapfile" protocol. +-- +-- From this table, we'll auto-create a value-string table for file header magic field +local magic_spells = +{ + normal = + { + magic = 0xa1b2c3d4, + name = "Normal (Big-endian)", + }, + swapped = + { + magic = 0xd4c3b2a1, + name = "Swapped Normal (Little-endian)", + endianess = ENC_LITTLE_ENDIAN, + }, + modified = + { + -- this is for a ss991029 patched format only + magic = 0xa1b2cd34, + name = "Modified", + rec_hdr_len = 24, + rec_hdr_patt = "I4I4I4I4 I4 I2 I1 I1", + num_rec_fields = 8, + }, + swapped_modified = + { + -- this is for a ss991029 patched format only + magic = 0x34cdb2a1, + name = "Swapped Modified", + rec_hdr_len = 24, + rec_hdr_patt = "I4I4I4I4 I4 I2 I1 I1", + num_rec_fields = 8, + endianess = ENC_LITTLE_ENDIAN, + }, + nsecs = + { + magic = 0xa1b23c4d, + name = "Nanosecond", + time_precision = wtap_filetypes.TSPREC_NSEC, + }, + swapped_nsecs = + { + magic = 0x4d3cb2a1, + name = "Swapped Nanosecond", + endianess = ENC_LITTLE_ENDIAN, + time_precision = wtap_filetypes.TSPREC_NSEC, + }, +} + +-- create a magic-to-spell entry table from above magic_spells table +-- so we can find them faster during file read operations +-- we could just add them right back into spells table, but this is cleaner +local magic_values = {} +for k,t in pairs(magic_spells) do + magic_values[t.magic] = t +end + +-- the function which makes a copy of the default settings per file +local function new_settings() + dprint2("creating new file_settings") + local file_settings = {} + for k,v in pairs(default_settings) do + file_settings[k] = v + end + return file_settings +end + +-- set the file_settings that the magic value defines in magic_values +local function set_magic_file_settings(magic) + local t = magic_values[magic] + if not t then + dprint("set_magic_file_settings: did not find magic settings for:",magic) + return false + end + + local file_settings = new_settings() + + -- the magic_values/spells table uses the same key names, so this is easy + for k,v in pairs(t) do + file_settings[k] = v + end + + -- based on endianess, set the file_header and rec_header + -- and determine corrected_magic + if file_settings.endianess == ENC_BIG_ENDIAN then + file_settings.file_hdr_patt = '>' .. FILE_HEADER_PATT + file_settings.rec_hdr_patt = '>' .. file_settings.rec_hdr_patt + file_settings.corrected_magic = magic + else + file_settings.file_hdr_patt = '<' .. FILE_HEADER_PATT + file_settings.rec_hdr_patt = '<' .. file_settings.rec_hdr_patt + local m = Struct.pack(">I4", magic) + file_settings.corrected_magic = Struct.unpack("I4", line) + + local file_settings = set_magic_file_settings(magic) + + if not file_settings then + dprint("magic was: '", magic, "', so not a known pcap file?") return false end - local nettype + -- this is: magic, version_major, version_minor, timezone, sigfigs, snaplen, encap type + local fields = { Struct.unpack(FILE_HEADER_PATT, line) } - magic, VERSION_MAJOR, VERSION_MINOR, TIMEZONE, SIGFIGS, SNAPLEN, nettype = Struct.unpack(file_header, line) + -- sanity check; also note that Struct.unpack() returns the fields plus + -- a number of where in the line it stopped reading (ie, the end in this case) + -- so we got back number of fields + 1 + if #fields ~= NUM_HDR_FIELDS + 1 then + -- this should never happen, since we already told file:read() to grab enough bytes + dprint("parse_file_header: failed to read the file header") + return nil + end - if not magic then - dprint("parse_file_header: failed to unpack header struct") - return false + -- fields[1] is the magic, which we already parsed and saved before, but just to be sure + -- our endianess is set right, we validate what we got is what we expect now that + -- endianess has been corrected + if fields[1] ~= file_settings.corrected_magic then + dprint ("parse_file_header: endianess screwed up? Got:'", fields[1], + "', but wanted:", file_settings.corrected_magic) + return nil end - dprint("parse_file_header: got magic=",magic, ", major version=",VERSION_MAJOR, ", minor=",VERSION_MINOR, - ", timezone=",TIMEZONE, ", sigfigs=",SIGFIGS, "snaplen=",SNAPLEN, ", nettype =",nettype) + file_settings.version_major = fields[2] + file_settings.version_minor = fields[3] + file_settings.timezone = fields[4] + file_settings.sigfigs = fields[5] + file_settings.read_snaplen = fields[6] + file_settings.linktype = fields[7] -- wireshark only supports version 2.0 and later - if VERSION_MAJOR < 2 then + if fields[2] < 2 then dprint("got version =",VERSION_MAJOR,"but only version 2 or greater supported") return false end -- convert pcap file interface type to wtap number type - ENCAP_TYPE = pcap2wtap[nettype] - if not ENCAP_TYPE then - dprint("file nettype",nettype,"couldn't be mapped to wireshark wtap type") + file_settings.wtap_type = pcap2wtap[file_settings.linktype] + if not file_settings.wtap_type then + dprint("file nettype", file_settings.linktype, + "couldn't be mapped to wireshark wtap type") return false end - - if SNAPLEN > WTAP_MAX_PACKET_SIZE then - SNAPLEN = WTAP_MAX_PACKET_SIZE + file_settings.snaplen = file_settings.read_snaplen + if file_settings.snaplen > WTAP_MAX_PACKET_SIZE then + file_settings.snaplen = WTAP_MAX_PACKET_SIZE end + dprint2("read_file_header: got magic='", magic, + "', major version='", file_settings.version_major, + "', minor='", file_settings.version_minor, + "', timezone='", file_settings.timezone, + "', sigfigs='", file_settings.sigfigs, + "', read_snaplen='", file_settings.read_snaplen, + "', snaplen='", file_settings.snaplen, + "', nettype ='", file_settings.linktype, + "', wtap ='", file_settings.wtap_type) + --ok, it's a pcap file dprint2("parse_file_header: success") - return true + return file_settings end ---------------------------------------- -- this is used by both read() and seek_read() -- the calling function to this should have already set the file position correctly -read_common = function(funcname, file, frame) +read_common = function(funcname, file, capture, frame) dprint2(funcname,": read_common() called") + -- get the state info + local file_settings = capture.private_table + -- first parse the record header, which will set the FrameInfo fields - if not parse_rec_header(funcname, file, frame) then + if not parse_rec_header(funcname, file, file_settings, frame) then dprint2(funcname, ": read_common: hit end of file or error") return false end - frame.encap = ENCAP_TYPE + frame.encap = file_settings.wtap_type -- now we need to get the packet bytes from the file record into the frame... -- we *could* read them into a string using file:read(numbytes), and then @@ -380,51 +532,56 @@ end ---------------------------------------- -- the function to parse individual records -parse_rec_header = function(funcname, file, frame) +parse_rec_header = function(funcname, file, file_settings, frame) dprint2(funcname,": parse_rec_header() called") - local line = file:read(REC_HDR_LEN) + local line = file:read(file_settings.rec_hdr_len) -- it's ok for us to not be able to read it, if it's end of file if not line then return false end -- this is: time_sec, time_usec, capture_len, original_len - local fields = { Struct.unpack(rec_header, line) } + local fields = { Struct.unpack(file_settings.rec_hdr_patt, line) } -- sanity check; also note that Struct.unpack() returns the fields plus -- a number of where in the line it stopped reading (ie, the end in this case) -- so we got back number of fields + 1 - if #fields ~= NUM_REC_FIELDS + 1 then - dprint(funcname, ": parse_rec_header: failed to read the record header") + if #fields ~= file_settings.num_rec_fields + 1 then + dprint(funcname, ": parse_rec_header: failed to read the record header, got:", + #fields, ", expected:", file_settings.num_rec_fields) return nil end - -- we could just do this: - --frame.time = fields[1] + (fields[2] / 1000000) - -- but Lua numbers are doubles, which lose precision in the fractional part - -- so we use a NSTime() object instead; remember though that an NSTime takes - -- nanoseconds for its second arg, and pcap's are only microseconds, so *1000 - frame.time = NSTime(fields[1], fields[2]*1000) + local nsecs = fields[2] + + if file_settings.time_precision == wtap_filetypes.TSPREC_USEC then + nsecs = nsecs * 1000 + elseif file_settings.time_precision == wtap_filetypes.TSPREC_MSEC then + nsecs = nsecs * 1000000 + end + + frame.time = NSTime(fields[1], nsecs) + + local caplen, origlen = fields[3], fields[4] -- sanity check, verify captured length isn't more than original length - if fields[3] > fields[4] then - dprint("captured length of",fields[3],"is bigger than original length of",fields[4]) - -- swap them - local caplen = fields[3] - fields[3] = fields[4] - fields[4] = caplen + if caplen > origlen then + dprint("captured length of", caplen, "is bigger than original length of", origlen) + -- swap them, a cool Lua ability + caplen, origlen = origlen, caplen end - if fields[3] > WTAP_MAX_PACKET_SIZE then - dprint("Got a captured_length of",fields[3],"which is too big") - return nil + if caplen > WTAP_MAX_PACKET_SIZE then + dprint("Got a captured_length of", caplen, "which is too big") + caplen = WTAP_MAX_PACKET_SIZE end - frame.captured_length = fields[3] - frame.original_length = fields[4] + frame.captured_length = caplen + frame.original_length = origlen frame.flags = wtap_presence_flags.TS + wtap_presence_flags.CAP_LEN -- for timestamp|cap_len + dprint2(funcname,": parse_rec_header() returning") return true end @@ -446,20 +603,38 @@ local canwrite = { -- etc., etc. } --- we can't reuse the variables we used in the reader, because this script might be sued to both --- open a file for reading and write it out, at the same time, so we prepend 'W_' for the writer's --- versions. Normally I'd put this type of stuff in a class table and just create a new instance, --- but I didn't want to confuse people with Lua class models in this script -local W_VERSION_MAJOR = 2 -local W_VERSION_MINOR = 4 -local W_TIMEZONE = 0 -local W_SIGFIGS = 0 -local W_SNAPLEN = 0 -local W_ENCAP_TYPE = wtap.UNKNOWN --- write out things in little-endian order -local w_file_header = "<" .. FILE_HEADER -local w_rec_header = "<" .. REC_HEADER -local TSPRECISION = wtap_filetypes.TSPREC_USEC +-- we can't reuse the variables we used in the reader, because this script might be used to both +-- open a file for reading and write it out, at the same time, so we cerate another file_settings +-- instance. +-- set the file_settings for the little-endian version in magic_spells +local function create_writer_file_settings() + dprint2("create_writer_file_settings called") + local t = magic_spells.swapped + + local file_settings = new_settings() + + -- the magic_values/spells table uses the same key names, so this is easy + for k,v in pairs(t) do + file_settings[k] = v + end + + -- based on endianess, set the file_header and rec_header + -- and determine corrected_magic + if file_settings.endianess == ENC_BIG_ENDIAN then + file_settings.file_hdr_patt = '>' .. FILE_HEADER_PATT + file_settings.rec_hdr_patt = '>' .. file_settings.rec_hdr_patt + file_settings.corrected_magic = file_settings.magic + else + file_settings.file_hdr_patt = '<' .. FILE_HEADER_PATT + file_settings.rec_hdr_patt = '<' .. file_settings.rec_hdr_patt + local m = Struct.pack(">I4", file_settings.magic) + file_settings.corrected_magic = Struct.unpack("