Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(1359)

Unified Diff: plaso/parsers/trendmicroav.py

Issue 335560043: [plaso] Add parser and formatter for Trend Micro antivirus. (Closed)
Patch Set: Add parser and formatter for Trend Micro antivirus. Created 6 years ago
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « plaso/parsers/__init__.py ('k') | test_data/pccnt35.log » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: plaso/parsers/trendmicroav.py
diff --git a/plaso/parsers/trendmicroav.py b/plaso/parsers/trendmicroav.py
new file mode 100644
index 0000000000000000000000000000000000000000..2fff207b7057fb135fedaae8d3561bef7d5acb2a
--- /dev/null
+++ b/plaso/parsers/trendmicroav.py
@@ -0,0 +1,246 @@
+# -*- coding: utf-8 -*-
+"""Parser for Trend Micro Antivirus logs.
+
+Trend Micro uses two log files to track the scans (both manual/scheduled and
+real-time) and the web reputation (network scan/filtering).
+
+Currently only the first log is supported.
+"""
+
+from __future__ import unicode_literals
+
+from dfdatetime import definitions as dfdatetime_definitions
+from dfdatetime import posix_time as dfdatetime_posix_time
+from dfdatetime import time_elements as dfdatetime_time_elements
+
+from plaso.containers import events
+from plaso.containers import time_events
+from plaso.lib import errors
+from plaso.lib import definitions
+from plaso.formatters import trendmicroav as formatter
+from plaso.parsers import dsv_parser
+from plaso.parsers import manager
+
+
+class TrendMicroAVEventData(events.EventData):
+ """Trend Micro AV Log event data.
+
+ Attributes:
+ action (str): action.
+ threat (str): threat.
+ filename (str): filename.
+ scan_type (str): scan_type.
+ """
+
+ DATA_TYPE = 'av:trendmicro:scan'
+
+ def __init__(self):
+ """Initializes event data."""
+ super(TrendMicroAVEventData, self).__init__(data_type=self.DATA_TYPE)
+ self.threat = None
+ self.action = None
+ self.path = None
+ self.filename = None
+ self.scan_type = None
+
+
+# pylint: disable=abstract-method
+class TrendMicroBaseParser(dsv_parser.DSVParser):
+ """Common code for parsing Trend Micro log files.
+
+ The file format is reminiscent of CSV, but is not quite the same; the
+ delimiter is a three-character sequence and there is no provision for
+ quoting or escaping.
+ """
+
+ DELIMITER = '<;>'
+
+ # Subclasses must define an integer MIN_COLUMNS value.
+ MIN_COLUMNS = None
+
+ def _CreateDictReader(self, parser_mediator, line_reader):
+ """Iterates over the log lines and provide a reader for the values.
+
+ Args:
+ parser_mediator (ParserMediator): mediates interactions between parsers
+ and other components, such as storage and dfvfs.
+ line_reader (iter): yields each line in the log file.
+
+ Yields:
+ A dictionary of column values keyed by column header.
+ """
+ for line in line_reader:
+ try:
+ line = line.decode(self._encoding)
+ except UnicodeDecodeError as exception:
+ raise errors.UnableToParseFile(
+ "Unexpected binary content in file: {0:s}".format(exception))
+ stripped_line = line.strip()
+ values = stripped_line.split(self.DELIMITER)
+ if len(values) < self.MIN_COLUMNS:
+ raise errors.UnableToParseFile(
+ "Expected at least {0:d} values, found {1:d}".format(
+ self.MIN_COLUMNS, len(values)))
+ if len(values) > len(self.COLUMNS):
+ raise errors.UnableToParseFile(
+ "Expected at most {0:d} values, found {1:d}".format(
+ len(self.COLUMNS), len(values)))
+ yield dict(zip(self.COLUMNS, values))
+
+ def _ParseTimestamp(self, parser_mediator, row):
+ """Provides a timestamp for the given row.
+
+ If the Trend Micro log comes from a version that provides a Unix timestamp,
+ use that directly; it provides the advantages of UTC and of second
+ precision. Otherwise fall back onto the local-timezone date and time.
+
+ Args:
+ parser_mediator (ParserMediator): mediates interactions between parsers
+ and other components, such as storage and dfvfs.
+ row (dict[str, str]): fields of a single row, as specified in COLUMNS.
+
+ Returns:
+ dfdatetime.interface.DateTimeValue: the parsed timestamp.
+ """
+ if 'timestamp' in row:
+ try:
+ return dfdatetime_posix_time.PosixTime(timestamp=int(row['timestamp']))
+ except ValueError as exception:
+ parser_mediator.ProduceExtractionError(
+ 'Log line has a timestamp field: [{0:s}], but it is invalid: {1:s}'
+ .format(repr(row['timestamp']), exception))
+
+ # The Unix timestamp is not available; parse the local date and time.
+ try:
+ return self._ConvertToTimestamp(row['date'], row['time'])
+ except ValueError as exception:
+ parser_mediator.ProduceExtractionError(
+ 'Unable to parse time string: [{0:s} {1:s}] with error {2:s}'
+ .format(repr(row['date']), repr(row['time']), exception))
+
+
+ def _ConvertToTimestamp(self, date, time):
+ """Converts date and time strings into a timestamp.
+
+ Recent versions of Office Scan write a log field with a Unix timestamp.
+ Older versions may not write this field; their logs only provide a date and
+ a time expressed in the local time zone. This functions handles the latter
+ case.
+
+ Args:
+ date (str): date as an 8-character string in the YYYYMMDD format.
+ time (str): time as a 3 or 4-character string in the [H]HMM format or a
+ 6-character string in the HHMMSS format.
+
+ Returns:
+ dfdatetime_time_elements.TimestampElements: the parsed timestamp.
+
+ Raises:
+ ValueError: if the date/time values cannot be parsed.
+ """
+ # Check that the strings have the correct length.
+ if len(date) != 8:
+ raise ValueError('date has wrong length: len({0!s}) != 8'.format(
+ repr(date)))
+ if len(time) < 3 or len(time) > 4:
+ raise ValueError('time has wrong length: len({0!s}) not in (3, 4)'.format(
+ repr(time)))
+
+ # Extract the date.
+ year = int(date[:4])
+ month = int(date[4:6])
+ day = int(date[6:8])
+
+ # Extract the time. Note that a single-digit hour value has no leading zero.
+ hour = int(time[:-2])
+ minutes = int(time[-2:])
+
+ time_elements_tuple = (year, month, day, hour, minutes, 0)
+ date_time = dfdatetime_time_elements.TimeElements(
+ time_elements_tuple=time_elements_tuple)
+ date_time.is_local_time = True
+ date_time.precision = dfdatetime_definitions.PRECISION_1_MINUTE
+
+ return date_time
+
+class OfficeScanVirusDetectionParser(TrendMicroBaseParser):
+ """Parses the Trend Micro Office Scan Virus Detection Log."""
+
+ NAME = 'trendmicro_vd'
+ DESCRIPTION = 'Parser for Trend Micro Office Scan Virus Detection log files.'
+
+ COLUMNS = [
+ 'date', 'time', 'threat', 'action', 'scan_type', 'unused1',
+ 'path', 'filename', 'unused2', 'timestamp', 'unused3', 'unused4']
+ MIN_COLUMNS = 8
+
+ def __init__(self, *args, **kwargs):
+ """Initializes the parser.
+
+ The TrendMicro AV writes a text logfile encoded in the CP1252 charset;
+ unless otherwise specified, the parser class needs to know this.
+ """
+ kwargs.setdefault('encoding', 'cp1252')
+ super(OfficeScanVirusDetectionParser, self).__init__(*args, **kwargs)
+
+ def ParseRow(self, parser_mediator, row_offset, row):
+ """Parses a line of the log file and produces events.
+
+ Args:
+ parser_mediator (ParserMediator): mediates interactions between parsers
+ and other components, such as storage and dfvfs.
+ row_offset (int): line number of the row.
+ row (dict[str, str]): fields of a single row, as specified in COLUMNS.
+ """
+
+ timestamp = self._ParseTimestamp(parser_mediator, row)
+
+ if timestamp is None:
+ return
+
+ event_data = TrendMicroAVEventData()
+ event_data.offset = row_offset
+ event_data.threat = row['threat']
+ event_data.action = int(row['action'])
+ event_data.path = row['path']
+ event_data.filename = row['filename']
+ event_data.scan_type = int(row['scan_type'])
+
+ event = time_events.DateTimeValuesEvent(
+ timestamp, definitions.TIME_DESCRIPTION_WRITTEN)
+ parser_mediator.ProduceEventWithEventData(event, event_data)
+
+ def VerifyRow(self, parser_mediator, row):
+ """Verifies if a line of the file is in the expected format.
+
+ Args:
+ parser_mediator (ParserMediator): mediates interactions between parsers
+ and other components, such as storage and dfvfs.
+ row (dict[str, str]): fields of a single row, as specified in COLUMNS.
+
+ Returns:
+ bool: True if this is the correct parser, False otherwise.
+ """
+ if len(row) < self.MIN_COLUMNS:
+ return False
+
+ # Check the date format!
+ # If it doesn't parse, then this isn't a Trend Micro AV log.
+ timestamp = self._ConvertToTimestamp(row['date'], row['time'])
+
+ if timestamp is None:
+ return False
+
+ # Check that the action value is plausible
+ try:
+ action = int(row['action'])
+ except ValueError:
+ return False
+ if action not in formatter.SCAN_RESULTS:
+ return False
+
+ # All checks passed.
+ return True
+
+
+manager.ParsersManager.RegisterParser(OfficeScanVirusDetectionParser)
« no previous file with comments | « plaso/parsers/__init__.py ('k') | test_data/pccnt35.log » ('j') | no next file with comments »

Powered by Google App Engine
RSS Feeds Recent Issues | This issue
This is Rietveld f62528b