LEFT | RIGHT |
(no file at all) | |
1 # -*_ coding: utf-8 -*- | 1 # -*_ coding: utf-8 -*- |
2 """Parser for SCCM Logs.""" | 2 """Parser for SCCM Logs.""" |
3 | 3 |
4 from __future__ import unicode_literals | 4 from __future__ import unicode_literals |
5 | 5 |
6 import re | 6 import re |
| 7 |
| 8 from dfdatetime import time_elements as dfdatetime_time_elements |
7 | 9 |
8 import pyparsing | 10 import pyparsing |
9 | 11 |
10 from plaso.containers import events | 12 from plaso.containers import events |
11 from plaso.containers import time_events | 13 from plaso.containers import time_events |
12 from plaso.lib import errors | 14 from plaso.lib import errors |
13 from plaso.lib import definitions | 15 from plaso.lib import definitions |
14 from plaso.lib import timelib | |
15 from plaso.parsers import manager | 16 from plaso.parsers import manager |
16 from plaso.parsers import text_parser | 17 from plaso.parsers import text_parser |
17 | 18 |
18 | 19 |
19 class SCCMLogEventData(events.EventData): | 20 class SCCMLogEventData(events.EventData): |
20 """SCCM log event data. | 21 """SCCM log event data. |
21 | 22 |
22 Attributes: | 23 Attributes: |
23 component (str): component. | 24 component (str): component. |
24 text (str): text. | 25 text (str): text. |
(...skipping 14 matching lines...) Expand all Loading... |
39 NAME = 'sccm' | 40 NAME = 'sccm' |
40 DESCRIPTION = 'Parser for SCCM logs files.' | 41 DESCRIPTION = 'Parser for SCCM logs files.' |
41 | 42 |
42 _ENCODING = 'utf-8-sig' | 43 _ENCODING = 'utf-8-sig' |
43 | 44 |
44 # Increasing the buffer size as SCCM messages are commonly well larger | 45 # Increasing the buffer size as SCCM messages are commonly well larger |
45 # than the default value. | 46 # than the default value. |
46 BUFFER_SIZE = 16384 | 47 BUFFER_SIZE = 16384 |
47 | 48 |
48 LINE_STRUCTURES = [] | 49 LINE_STRUCTURES = [] |
49 | |
50 _MICRO_SECONDS_PER_MINUTE = 60 * 1000000 | |
51 | 50 |
52 _FOUR_DIGITS = text_parser.PyparsingConstants.FOUR_DIGITS | 51 _FOUR_DIGITS = text_parser.PyparsingConstants.FOUR_DIGITS |
53 _ONE_OR_TWO_DIGITS = text_parser.PyparsingConstants.ONE_OR_TWO_DIGITS | 52 _ONE_OR_TWO_DIGITS = text_parser.PyparsingConstants.ONE_OR_TWO_DIGITS |
54 | 53 |
55 # PyParsing Components used to construct grammars for parsing lines. | 54 # PyParsing Components used to construct grammars for parsing lines. |
56 _PARSING_COMPONENTS = { | 55 _PARSING_COMPONENTS = { |
57 'msg_left_delimiter': pyparsing.Literal('<![LOG['), | 56 'msg_left_delimiter': pyparsing.Literal('<![LOG['), |
58 'msg_right_delimiter': pyparsing.Literal(']LOG]!><time="'), | 57 'msg_right_delimiter': pyparsing.Literal(']LOG]!><time="'), |
59 'year': _FOUR_DIGITS.setResultsName('year'), | 58 'year': _FOUR_DIGITS.setResultsName('year'), |
60 'month': _ONE_OR_TWO_DIGITS.setResultsName('month'), | 59 'month': _ONE_OR_TWO_DIGITS.setResultsName('month'), |
61 'day': _ONE_OR_TWO_DIGITS.setResultsName('day'), | 60 'day': _ONE_OR_TWO_DIGITS.setResultsName('day'), |
62 'microsecond': pyparsing.Regex(r'\d{3,7}'). setResultsName('microsecond'), | 61 'fraction_of_second': pyparsing.Regex(r'\d{3,7}'). setResultsName( |
| 62 'fraction_of_second'), |
63 'utc_offset_minutes': pyparsing.Regex(r'[-+]\d{3}').setResultsName( | 63 'utc_offset_minutes': pyparsing.Regex(r'[-+]\d{3}').setResultsName( |
64 'utc_offset_minutes'), | 64 'utc_offset_minutes'), |
65 'date_prefix': pyparsing.Literal('" date="'). setResultsName( | 65 'date_prefix': pyparsing.Literal('" date="'). setResultsName( |
66 'date_prefix'), | 66 'date_prefix'), |
67 'component_prefix': pyparsing.Literal('" component="').setResultsName( | 67 'component_prefix': pyparsing.Literal('" component="').setResultsName( |
68 'component_prefix'), | 68 'component_prefix'), |
69 'component': pyparsing.Word(pyparsing.alphanums).setResultsName( | 69 'component': pyparsing.Word(pyparsing.alphanums).setResultsName( |
70 'component'), | 70 'component'), |
71 'text': pyparsing.Regex( | 71 'text': pyparsing.Regex( |
72 r'.*?(?=(]LOG]!><time="))', re.DOTALL).setResultsName('text'), | 72 r'.*?(?=(]LOG]!><time="))', re.DOTALL).setResultsName('text'), |
73 'line_remainder': pyparsing.Regex( | 73 'line_remainder': pyparsing.Regex( |
74 r'.*?(?=(\<!\[LOG\[))', re.DOTALL).setResultsName('line_remainder'), | 74 r'.*?(?=(\<!\[LOG\[))', re.DOTALL).setResultsName('line_remainder'), |
75 'lastline_remainder': pyparsing.restOfLine.setResultsName( | 75 'lastline_remainder': pyparsing.restOfLine.setResultsName( |
76 'lastline_remainder'), | 76 'lastline_remainder'), |
77 'hour': _ONE_OR_TWO_DIGITS.setResultsName('hour'), | 77 'hour': _ONE_OR_TWO_DIGITS.setResultsName('hour'), |
78 'minute': text_parser.PyparsingConstants.TWO_DIGITS.setResultsName( | 78 'minute': text_parser.PyparsingConstants.TWO_DIGITS.setResultsName( |
79 'minute'), | 79 'minute'), |
80 'second': text_parser.PyparsingConstants.TWO_DIGITS.setResultsName( | 80 'second': text_parser.PyparsingConstants.TWO_DIGITS.setResultsName( |
81 'second')} | 81 'second')} |
82 | 82 |
83 # Base grammar for individual log event lines. | 83 # Base grammar for individual log event lines. |
84 LINE_GRAMMAR_BASE = ( | 84 LINE_GRAMMAR_BASE = ( |
85 _PARSING_COMPONENTS['msg_left_delimiter'] + | 85 _PARSING_COMPONENTS['msg_left_delimiter'] + |
86 _PARSING_COMPONENTS['text'] + | 86 _PARSING_COMPONENTS['text'] + |
87 _PARSING_COMPONENTS['msg_right_delimiter'] + | 87 _PARSING_COMPONENTS['msg_right_delimiter'] + |
88 _PARSING_COMPONENTS['hour'] + | 88 _PARSING_COMPONENTS['hour'] + |
89 pyparsing.Suppress(':') + _PARSING_COMPONENTS['minute'] + | 89 pyparsing.Suppress(':') + _PARSING_COMPONENTS['minute'] + |
90 pyparsing.Suppress(':') + _PARSING_COMPONENTS['second'] + | 90 pyparsing.Suppress(':') + _PARSING_COMPONENTS['second'] + |
91 pyparsing.Suppress('.') + _PARSING_COMPONENTS['microsecond'] + | 91 pyparsing.Suppress('.') + _PARSING_COMPONENTS['fraction_of_second'] + |
92 _PARSING_COMPONENTS['date_prefix'] + _PARSING_COMPONENTS['month'] + | 92 _PARSING_COMPONENTS['date_prefix'] + _PARSING_COMPONENTS['month'] + |
93 pyparsing.Suppress('-') + _PARSING_COMPONENTS['day'] + | 93 pyparsing.Suppress('-') + _PARSING_COMPONENTS['day'] + |
94 pyparsing.Suppress('-') + _PARSING_COMPONENTS['year'] + | 94 pyparsing.Suppress('-') + _PARSING_COMPONENTS['year'] + |
95 _PARSING_COMPONENTS['component_prefix'] + | 95 _PARSING_COMPONENTS['component_prefix'] + |
96 _PARSING_COMPONENTS['component']) | 96 _PARSING_COMPONENTS['component']) |
97 | 97 |
98 # Grammar for individual log event lines with a minutes offset from UTC. | 98 # Grammar for individual log event lines with a minutes offset from UTC. |
99 LINE_GRAMMAR_OFFSET = ( | 99 LINE_GRAMMAR_OFFSET = ( |
100 _PARSING_COMPONENTS['msg_left_delimiter'] + | 100 _PARSING_COMPONENTS['msg_left_delimiter'] + |
101 _PARSING_COMPONENTS['text'] + | 101 _PARSING_COMPONENTS['text'] + |
102 _PARSING_COMPONENTS['msg_right_delimiter'] + | 102 _PARSING_COMPONENTS['msg_right_delimiter'] + |
103 _PARSING_COMPONENTS['hour'] + | 103 _PARSING_COMPONENTS['hour'] + |
104 pyparsing.Suppress(':') + _PARSING_COMPONENTS['minute'] + | 104 pyparsing.Suppress(':') + _PARSING_COMPONENTS['minute'] + |
105 pyparsing.Suppress(':') + _PARSING_COMPONENTS['second'] + | 105 pyparsing.Suppress(':') + _PARSING_COMPONENTS['second'] + |
106 pyparsing.Suppress('.') + _PARSING_COMPONENTS['microsecond'] + | 106 pyparsing.Suppress('.') + _PARSING_COMPONENTS['fraction_of_second'] + |
107 _PARSING_COMPONENTS['utc_offset_minutes'] + | 107 _PARSING_COMPONENTS['utc_offset_minutes'] + |
108 _PARSING_COMPONENTS['date_prefix'] + _PARSING_COMPONENTS['month'] + | 108 _PARSING_COMPONENTS['date_prefix'] + _PARSING_COMPONENTS['month'] + |
109 pyparsing.Suppress('-') + _PARSING_COMPONENTS['day'] + | 109 pyparsing.Suppress('-') + _PARSING_COMPONENTS['day'] + |
110 pyparsing.Suppress('-') + _PARSING_COMPONENTS['year'] + | 110 pyparsing.Suppress('-') + _PARSING_COMPONENTS['year'] + |
111 _PARSING_COMPONENTS['component_prefix'] + | 111 _PARSING_COMPONENTS['component_prefix'] + |
112 _PARSING_COMPONENTS['component']) | 112 _PARSING_COMPONENTS['component']) |
113 | 113 |
114 LINE_STRUCTURES = [ | 114 LINE_STRUCTURES = [ |
115 ('log_entry', | 115 ('log_entry', |
116 LINE_GRAMMAR_BASE + _PARSING_COMPONENTS['line_remainder']), | 116 LINE_GRAMMAR_BASE + _PARSING_COMPONENTS['line_remainder']), |
117 ('log_entry_at_end', | 117 ('log_entry_at_end', |
118 LINE_GRAMMAR_BASE +_PARSING_COMPONENTS['lastline_remainder'] + | 118 LINE_GRAMMAR_BASE +_PARSING_COMPONENTS['lastline_remainder'] + |
119 pyparsing.lineEnd), | 119 pyparsing.lineEnd), |
120 ('log_entry_offset', | 120 ('log_entry_offset', |
121 LINE_GRAMMAR_OFFSET + _PARSING_COMPONENTS['line_remainder']), | 121 LINE_GRAMMAR_OFFSET + _PARSING_COMPONENTS['line_remainder']), |
122 ('log_entry_offset_at_end', | 122 ('log_entry_offset_at_end', |
123 LINE_GRAMMAR_OFFSET + _PARSING_COMPONENTS['lastline_remainder'] + | 123 LINE_GRAMMAR_OFFSET + _PARSING_COMPONENTS['lastline_remainder'] + |
124 pyparsing.lineEnd)] | 124 pyparsing.lineEnd)] |
125 | 125 |
| 126 def _GetISO8601String(self, structure): |
| 127 """Retrieves an ISO8601 date time string from the structure. |
| 128 |
| 129 The date and time values in the SCCM log are formatted as: |
| 130 time="19:33:19.766-330" date="11-28-2014" |
| 131 |
| 132 Args: |
| 133 structure (pyparsing.ParseResults): structure of tokens derived from |
| 134 a line of a text file. |
| 135 |
| 136 Returns: |
| 137 str: ISO 8601 date time string. |
| 138 |
| 139 Raises: |
| 140 ValueError: if the structure cannot be converted into a date time string. |
| 141 """ |
| 142 fraction_of_second_length = len(structure.fraction_of_second) |
| 143 if fraction_of_second_length not in (3, 6, 7): |
| 144 raise ValueError( |
| 145 'unsupported time fraction of second length: {0:d}'.format( |
| 146 fraction_of_second_length)) |
| 147 |
| 148 try: |
| 149 fraction_of_second = int(structure.fraction_of_second, 10) |
| 150 except (TypeError, ValueError) as exception: |
| 151 raise ValueError( |
| 152 'unable to determine fraction of second with error: {0!s}'.format( |
| 153 exception)) |
| 154 |
| 155 # TODO: improve precision support, but for now ignore the 100ns precision. |
| 156 if fraction_of_second_length == 7: |
| 157 fraction_of_second, _ = divmod(fraction_of_second, 10) |
| 158 |
| 159 date_time_string = '{0:04d}-{1:02d}-{2:02d}T{3:02d}:{4:02d}:{5:02d}'.format( |
| 160 structure.year, structure.month, structure.day, structure.hour, |
| 161 structure.minute, structure.second) |
| 162 |
| 163 if fraction_of_second_length > 0: |
| 164 date_time_string = '{0:s}.{1:d}'.format( |
| 165 date_time_string, fraction_of_second) |
| 166 |
| 167 utc_offset_minutes = structure.get('utc_offset_minutes', None) |
| 168 if utc_offset_minutes is not None: |
| 169 try: |
| 170 time_zone_offset = int(utc_offset_minutes[1:], 10) |
| 171 except (IndexError, ValueError) as exception: |
| 172 raise ValueError( |
| 173 'Unable to parse time zone offset with error: {0!s}.'.format( |
| 174 exception)) |
| 175 |
| 176 time_zone_hours, time_zone_minutes = divmod(time_zone_offset, 60) |
| 177 date_time_string = '{0:s}{1:s}{2:02d}:{3:02d}'.format( |
| 178 date_time_string, utc_offset_minutes[0], time_zone_hours, |
| 179 time_zone_minutes) |
| 180 |
| 181 return date_time_string |
| 182 |
126 def ParseRecord(self, parser_mediator, key, structure): | 183 def ParseRecord(self, parser_mediator, key, structure): |
127 """Parse the record and return an SCCM log event object. | 184 """Parse the record and return an SCCM log event object. |
128 | 185 |
129 Args: | 186 Args: |
130 parser_mediator (ParserMediator): mediates interactions between parsers | 187 parser_mediator (ParserMediator): mediates interactions between parsers |
131 and other components, such as storage and dfvfs. | 188 and other components, such as storage and dfvfs. |
132 file_object (dfvfs.FileIO): a file-like object. | 189 file_object (dfvfs.FileIO): a file-like object. |
133 structure (pyparsing.ParseResults): structure of tokens derived from | 190 structure (pyparsing.ParseResults): structure of tokens derived from |
134 a line of a text file. | 191 a line of a text file. |
135 | 192 |
136 Raises: | 193 Raises: |
137 ParseError: when the structure type is unknown. | 194 ParseError: when the structure type is unknown. |
138 TimestampError: when a non-int value for microseconds is encountered. | |
139 """ | 195 """ |
140 if key not in ( | 196 if key not in ( |
141 'log_entry', 'log_entry_at_end', 'log_entry_offset', | 197 'log_entry', 'log_entry_at_end', 'log_entry_offset', |
142 'log_entry_offset_at_end'): | 198 'log_entry_offset_at_end'): |
143 raise errors.ParseError( | 199 raise errors.ParseError( |
144 'Unable to parse record, unknown structure: {0:s}'.format(key)) | 200 'Unable to parse record, unknown structure: {0:s}'.format(key)) |
145 | 201 |
146 # Sometimes, SCCM logs will exhibit a seven-digit sub-second precision | |
147 # (100 nanosecond intervals). Using six-digit precision because | |
148 # timestamps are in microseconds. | |
149 if len(structure.microsecond) > 6: | |
150 structure.microsecond = structure.microsecond[0:6] | |
151 | |
152 try: | 202 try: |
153 microseconds = int(structure.microsecond, 10) | 203 date_time_string = self._GetISO8601String(structure) |
154 except ValueError as exception: | 204 except ValueError as exception: |
155 parser_mediator.ProduceExtractionError( | 205 parser_mediator.ProduceExtractionError( |
156 'unable to determine microseconds with error: {0!s}'.format( | 206 'unable to determine date time string with error: {0!s}'.format( |
157 exception)) | 207 exception)) |
| 208 |
| 209 fraction_of_second_length = len(structure.fraction_of_second) |
| 210 if fraction_of_second_length == 3: |
| 211 date_time = dfdatetime_time_elements.TimeElementsInMilliseconds() |
| 212 elif fraction_of_second_length in (6, 7): |
| 213 date_time = dfdatetime_time_elements.TimeElementsInMicroseconds() |
| 214 |
| 215 try: |
| 216 date_time.CopyFromStringISO8601(date_time_string) |
| 217 except ValueError as exception: |
| 218 parser_mediator.ProduceExtractionError( |
| 219 'unable to parse date time value: {0:s} with error: {1!s}'.format( |
| 220 date_time_string, exception)) |
158 return | 221 return |
159 | |
160 # 3-digit precision is milliseconds, | |
161 # so multiply by 1000 to convert to microseconds | |
162 if len(structure.microsecond) == 3: | |
163 microseconds *= 1000 | |
164 | |
165 try: | |
166 timestamp = timelib.Timestamp.FromTimeParts( | |
167 structure.year, structure.month, structure.day, | |
168 structure.hour, structure.minute, structure.second, microseconds) | |
169 except errors.TimestampError as exception: | |
170 timestamp = timelib.Timestamp.NONE_TIMESTAMP | |
171 parser_mediator.ProduceExtractionError( | |
172 'unable to determine timestamp with error: {0!s}'.format( | |
173 exception)) | |
174 | |
175 # If an offset is given for the event, apply the offset to convert to UTC. | |
176 if timestamp and 'offset' in key: | |
177 try: | |
178 delta_microseconds = int(structure.utc_offset_minutes[1:], 10) | |
179 except (IndexError, ValueError) as exception: | |
180 raise errors.TimestampError( | |
181 'Unable to parse minute offset from UTC with error: {0!s}.'.format( | |
182 exception)) | |
183 | |
184 delta_microseconds *= self._MICRO_SECONDS_PER_MINUTE | |
185 if structure.utc_offset_minutes[0] == '-': | |
186 delta_microseconds = -delta_microseconds | |
187 timestamp += delta_microseconds | |
188 | 222 |
189 event_data = SCCMLogEventData() | 223 event_data = SCCMLogEventData() |
190 event_data.component = structure.component | 224 event_data.component = structure.component |
191 # TODO: pass line number to offset or remove. | 225 # TODO: pass line number to offset or remove. |
192 event_data.offset = 0 | 226 event_data.offset = 0 |
193 event_data.text = structure.text | 227 event_data.text = structure.text |
194 | 228 |
195 event = time_events.TimestampEvent( | 229 event = time_events.DateTimeValuesEvent( |
196 timestamp, definitions.TIME_DESCRIPTION_WRITTEN) | 230 date_time, definitions.TIME_DESCRIPTION_WRITTEN) |
197 parser_mediator.ProduceEventWithEventData(event, event_data) | 231 parser_mediator.ProduceEventWithEventData(event, event_data) |
198 | 232 |
199 def VerifyStructure(self, parser_mediator, lines): | 233 def VerifyStructure(self, parser_mediator, lines): |
200 """Verifies whether content corresponds to an SCCM log file. | 234 """Verifies whether content corresponds to an SCCM log file. |
201 | 235 |
202 Args: | 236 Args: |
203 parser_mediator (ParserMediator): mediates interactions between parsers | 237 parser_mediator (ParserMediator): mediates interactions between parsers |
204 and other components, such as storage and dfvfs. | 238 and other components, such as storage and dfvfs. |
205 lines (str): one or more lines from the text file. | 239 lines (str): one or more lines from the text file. |
206 | 240 |
207 Returns: | 241 Returns: |
208 bool: True if this is the correct parser, False otherwise. | 242 bool: True if this is the correct parser, False otherwise. |
209 """ | 243 """ |
210 # Identify the token to which we attempt a match. | 244 # Identify the token to which we attempt a match. |
211 match = self._PARSING_COMPONENTS['msg_left_delimiter'].match | 245 match = self._PARSING_COMPONENTS['msg_left_delimiter'].match |
212 | 246 |
213 # Because logs files can lead with a partial event, | 247 # Because logs files can lead with a partial event, |
214 # we can't assume that the first character (post-BOM) | 248 # we can't assume that the first character (post-BOM) |
215 # in the file is the beginning of our match - so we | 249 # in the file is the beginning of our match - so we |
216 # look for match anywhere in lines. | 250 # look for match anywhere in lines. |
217 return match in lines | 251 return match in lines |
218 | 252 |
219 | 253 |
220 manager.ParsersManager.RegisterParser(SCCMParser) | 254 manager.ParsersManager.RegisterParser(SCCMParser) |
LEFT | RIGHT |