OLD | NEW |
1 # -*- coding: utf-8 -*- | 1 # -*- coding: utf-8 -*- |
2 """The extractor class definitions. | 2 """The extractor class definitions. |
3 | 3 |
4 An extractor is a class used to extract information from "raw" data. | 4 An extractor is a class used to extract information from "raw" data. |
5 """ | 5 """ |
6 | 6 |
7 from __future__ import unicode_literals | 7 from __future__ import unicode_literals |
8 | 8 |
9 import copy | 9 import copy |
10 import hashlib | 10 import hashlib |
(...skipping 10 matching lines...) Expand all Loading... |
21 from plaso.parsers import interface as parsers_interface | 21 from plaso.parsers import interface as parsers_interface |
22 from plaso.parsers import manager as parsers_manager | 22 from plaso.parsers import manager as parsers_manager |
23 | 23 |
24 | 24 |
25 class EventExtractor(object): | 25 class EventExtractor(object): |
26 """Event extractor. | 26 """Event extractor. |
27 | 27 |
28 An event extractor extracts events from event sources. | 28 An event extractor extracts events from event sources. |
29 """ | 29 """ |
30 | 30 |
| 31 _PARSE_RESULT_FAILURE = 1 |
| 32 _PARSE_RESULT_SUCCESS = 2 |
| 33 _PARSE_RESULT_UNSUPPORTED = 3 |
| 34 |
31 def __init__(self, parser_filter_expression=None): | 35 def __init__(self, parser_filter_expression=None): |
32 """Initializes an event extractor. | 36 """Initializes an event extractor. |
33 | 37 |
34 Args: | 38 Args: |
35 parser_filter_expression (Optional[str]): the parser filter expression, | 39 parser_filter_expression (Optional[str]): the parser filter expression, |
36 None represents all parsers and plugins. | 40 None represents all parsers and plugins. |
37 | 41 |
38 The parser filter expression is a comma separated value string that | 42 The parser filter expression is a comma separated value string that |
39 denotes a list of parser names to include and/or exclude. Each entry | 43 denotes a list of parser names to include and/or exclude. Each entry |
40 can have the value of: | 44 can have the value of: |
(...skipping 133 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
174 | 178 |
175 Args: | 179 Args: |
176 parser_mediator (ParserMediator): parser mediator. | 180 parser_mediator (ParserMediator): parser mediator. |
177 parser (BaseParser): parser. | 181 parser (BaseParser): parser. |
178 file_entry (dfvfs.FileEntry): file entry. | 182 file_entry (dfvfs.FileEntry): file entry. |
179 file_object (Optional[file]): file-like object to parse. | 183 file_object (Optional[file]): file-like object to parse. |
180 If not set the parser will use the parser mediator to open | 184 If not set the parser will use the parser mediator to open |
181 the file entry's default data stream as a file-like object. | 185 the file entry's default data stream as a file-like object. |
182 | 186 |
183 Returns: | 187 Returns: |
184 bool: False if the file could not be parsed and UnableToParseFile | 188 int: parse result which is _PARSE_RESULT_FAILURE if the file entry |
185 was raised. | 189 could not be parsed, _PARSE_RESULT_SUCCESS if the file entry |
| 190 successfully was parsed or _PARSE_RESULT_UNSUPPORTED when |
| 191 UnableToParseFile was raised. |
186 | 192 |
187 Raises: | 193 Raises: |
188 TypeError: if parser object is not a supported parser type. | 194 TypeError: if parser object is not a supported parser type. |
189 """ | 195 """ |
190 if not isinstance(parser, ( | 196 if not isinstance(parser, ( |
191 parsers_interface.FileEntryParser, parsers_interface.FileObjectParser)): | 197 parsers_interface.FileEntryParser, parsers_interface.FileObjectParser)): |
192 raise TypeError('Unsupported parser object type.') | 198 raise TypeError('Unsupported parser object type.') |
193 | 199 |
194 parser_mediator.ClearParserChain() | 200 parser_mediator.ClearParserChain() |
195 | 201 |
196 reference_count = ( | 202 reference_count = ( |
197 parser_mediator.resolver_context.GetFileObjectReferenceCount( | 203 parser_mediator.resolver_context.GetFileObjectReferenceCount( |
198 file_entry.path_spec)) | 204 file_entry.path_spec)) |
199 | 205 |
200 if self._parsers_profiler: | 206 if self._parsers_profiler: |
201 self._parsers_profiler.StartTiming(parser.NAME) | 207 self._parsers_profiler.StartTiming(parser.NAME) |
202 | 208 |
203 result = True | |
204 try: | 209 try: |
205 if isinstance(parser, parsers_interface.FileEntryParser): | 210 if isinstance(parser, parsers_interface.FileEntryParser): |
206 parser.Parse(parser_mediator) | 211 parser.Parse(parser_mediator) |
207 elif isinstance(parser, parsers_interface.FileObjectParser): | 212 elif isinstance(parser, parsers_interface.FileObjectParser): |
208 parser.Parse(parser_mediator, file_object) | 213 parser.Parse(parser_mediator, file_object) |
| 214 result = self._PARSE_RESULT_SUCCESS |
209 | 215 |
210 # We catch IOError so we can determine the parser that generated the error. | 216 # We catch IOError so we can determine the parser that generated the error. |
211 except (IOError, dfvfs_errors.BackEndError) as exception: | 217 except (IOError, dfvfs_errors.BackEndError) as exception: |
212 display_name = parser_mediator.GetDisplayName(file_entry) | 218 display_name = parser_mediator.GetDisplayName(file_entry) |
213 logger.warning( | 219 logger.warning( |
214 '{0:s} unable to parse file: {1:s} with error: {2!s}'.format( | 220 '{0:s} unable to parse file: {1:s} with error: {2!s}'.format( |
215 parser.NAME, display_name, exception)) | 221 parser.NAME, display_name, exception)) |
| 222 result = self._PARSE_RESULT_FAILURE |
216 | 223 |
217 except errors.UnableToParseFile as exception: | 224 except errors.UnableToParseFile as exception: |
218 display_name = parser_mediator.GetDisplayName(file_entry) | 225 display_name = parser_mediator.GetDisplayName(file_entry) |
219 logger.debug( | 226 logger.debug( |
220 '{0:s} unable to parse file: {1:s} with error: {2!s}'.format( | 227 '{0:s} unable to parse file: {1:s} with error: {2!s}'.format( |
221 parser.NAME, display_name, exception)) | 228 parser.NAME, display_name, exception)) |
222 result = False | 229 result = self._PARSE_RESULT_UNSUPPORTED |
223 | 230 |
224 finally: | 231 finally: |
225 if self._parsers_profiler: | 232 if self._parsers_profiler: |
226 self._parsers_profiler.StopTiming(parser.NAME) | 233 self._parsers_profiler.StopTiming(parser.NAME) |
227 | 234 |
228 new_reference_count = ( | 235 new_reference_count = ( |
229 parser_mediator.resolver_context.GetFileObjectReferenceCount( | 236 parser_mediator.resolver_context.GetFileObjectReferenceCount( |
230 file_entry.path_spec)) | 237 file_entry.path_spec)) |
231 if reference_count != new_reference_count: | 238 if reference_count != new_reference_count: |
232 display_name = parser_mediator.GetDisplayName(file_entry) | 239 display_name = parser_mediator.GetDisplayName(file_entry) |
233 logger.warning(( | 240 logger.warning(( |
234 '[{0:s}] did not explicitly close file-object for file: ' | 241 '[{0:s}] did not explicitly close file-object for file: ' |
235 '{1:s}.').format(parser.NAME, display_name)) | 242 '{1:s}.').format(parser.NAME, display_name)) |
236 | 243 |
237 return result | 244 return result |
238 | 245 |
239 def _ParserFileEntryWithParsers( | 246 def _ParserFileEntryWithParsers( |
240 self, parser_mediator, parser_names, file_entry, file_object=None): | 247 self, parser_mediator, parser_names, file_entry, file_object=None): |
241 """Parses a file entry with a specific parsers. | 248 """Parses a file entry with a specific parsers. |
242 | 249 |
243 Args: | 250 Args: |
244 parser_mediator (ParserMediator): parser mediator. | 251 parser_mediator (ParserMediator): parser mediator. |
245 parser_names (list[str]): names of parsers. | 252 parser_names (list[str]): names of parsers. |
246 file_entry (dfvfs.FileEntry): file entry. | 253 file_entry (dfvfs.FileEntry): file entry. |
247 file_object (Optional[file]): file-like object to parse. | 254 file_object (Optional[file]): file-like object to parse. |
248 If not set the parser will use the parser mediator to open | 255 If not set the parser will use the parser mediator to open |
249 the file entry's default data stream as a file-like object. | 256 the file entry's default data stream as a file-like object. |
250 | 257 |
251 Returns: | 258 Returns: |
252 bool: False if the file could not be parsed and UnableToParseFile | 259 int: parse result which is _PARSE_RESULT_FAILURE if the file entry |
253 was raised. | 260 could not be parsed, _PARSE_RESULT_SUCCESS if the file entry |
| 261 successfully was parsed or _PARSE_RESULT_UNSUPPORTED when |
| 262 UnableToParseFile was raised or no names of parser were provided. |
254 | 263 |
255 Raises: | 264 Raises: |
256 RuntimeError: if the parser object is missing. | 265 RuntimeError: if the parser object is missing. |
257 """ | 266 """ |
| 267 parse_results = self._PARSE_RESULT_UNSUPPORTED |
258 for parser_name in parser_names: | 268 for parser_name in parser_names: |
259 parser = self._parsers.get(parser_name, None) | 269 parser = self._parsers.get(parser_name, None) |
260 if not parser: | 270 if not parser: |
261 raise RuntimeError( | 271 raise RuntimeError( |
262 'Parser object missing for parser: {0:s}'.format(parser_name)) | 272 'Parser object missing for parser: {0:s}'.format(parser_name)) |
263 | 273 |
264 if parser.FILTERS: | 274 if parser.FILTERS: |
265 if not self._CheckParserCanProcessFileEntry(parser, file_entry): | 275 if not self._CheckParserCanProcessFileEntry(parser, file_entry): |
| 276 parse_results = self._PARSE_RESULT_SUCCESS |
266 continue | 277 continue |
267 | 278 |
268 display_name = parser_mediator.GetDisplayName(file_entry) | 279 display_name = parser_mediator.GetDisplayName(file_entry) |
269 logger.debug(( | 280 logger.debug(( |
270 '[ParseDataStream] parsing file: {0:s} with parser: ' | 281 '[ParserFileEntryWithParsers] parsing file: {0:s} with parser: ' |
271 '{1:s}').format(display_name, parser_name)) | 282 '{1:s}').format(display_name, parser_name)) |
272 | 283 |
273 self._ParseFileEntryWithParser( | 284 parse_result = self._ParseFileEntryWithParser( |
274 parser_mediator, parser, file_entry, file_object=file_object) | 285 parser_mediator, parser, file_entry, file_object=file_object) |
| 286 if parse_result == self._PARSE_RESULT_FAILURE: |
| 287 return self._PARSE_RESULT_FAILURE |
| 288 |
| 289 elif parse_result == self._PARSE_RESULT_SUCCESS: |
| 290 parse_results = self._PARSE_RESULT_SUCCESS |
| 291 |
| 292 return parse_results |
275 | 293 |
276 def ParseDataStream(self, parser_mediator, file_entry, data_stream_name): | 294 def ParseDataStream(self, parser_mediator, file_entry, data_stream_name): |
277 """Parses a data stream of a file entry with the enabled parsers. | 295 """Parses a data stream of a file entry with the enabled parsers. |
278 | 296 |
279 Args: | 297 Args: |
280 parser_mediator (ParserMediator): parser mediator. | 298 parser_mediator (ParserMediator): parser mediator. |
281 file_entry (dfvfs.FileEntry): file entry. | 299 file_entry (dfvfs.FileEntry): file entry. |
282 data_stream_name (str): data stream name. | 300 data_stream_name (str): data stream name. |
283 | 301 |
284 Raises: | 302 Raises: |
285 RuntimeError: if the file-like object or the parser object is missing. | 303 RuntimeError: if the file-like object or the parser object is missing. |
286 """ | 304 """ |
287 file_object = file_entry.GetFileObject(data_stream_name=data_stream_name) | 305 file_object = file_entry.GetFileObject(data_stream_name=data_stream_name) |
288 if not file_object: | 306 if not file_object: |
289 raise RuntimeError( | 307 raise RuntimeError( |
290 'Unable to retrieve file-like object from file entry.') | 308 'Unable to retrieve file-like object from file entry.') |
291 | 309 |
292 try: | 310 try: |
293 parser_names = self._GetSignatureMatchParserNames(file_object) | 311 parser_names = self._GetSignatureMatchParserNames(file_object) |
294 if not parser_names: | 312 |
295 result = False | 313 parse_with_non_sigscan_parsers = True |
296 else: | 314 if parser_names: |
297 result = self._ParserFileEntryWithParsers( | 315 parse_result = self._ParserFileEntryWithParsers( |
298 parser_mediator, parser_names, file_entry, file_object=file_object) | 316 parser_mediator, parser_names, file_entry, file_object=file_object) |
| 317 if parse_result in ( |
| 318 self._PARSE_RESULT_FAILURE, self._PARSE_RESULT_SUCCESS): |
| 319 parse_with_non_sigscan_parsers = False |
299 | 320 |
300 if not result: | 321 if parse_with_non_sigscan_parsers: |
301 self._ParserFileEntryWithParsers( | 322 self._ParserFileEntryWithParsers( |
302 parser_mediator, self._non_sigscan_parser_names, file_entry, | 323 parser_mediator, self._non_sigscan_parser_names, file_entry, |
303 file_object=file_object) | 324 file_object=file_object) |
304 | 325 |
305 finally: | 326 finally: |
306 file_object.close() | 327 file_object.close() |
307 | 328 |
308 def ParseFileEntryMetadata(self, parser_mediator, file_entry): | 329 def ParseFileEntryMetadata(self, parser_mediator, file_entry): |
309 """Parses the file entry metadata e.g. file system data. | 330 """Parses the file entry metadata e.g. file system data. |
310 | 331 |
(...skipping 313 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
624 | 645 |
625 Yields: | 646 Yields: |
626 dfvfs.PathSpec: path specification of a file entry found in the source. | 647 dfvfs.PathSpec: path specification of a file entry found in the source. |
627 """ | 648 """ |
628 for path_spec in path_specs: | 649 for path_spec in path_specs: |
629 for extracted_path_spec in self._ExtractPathSpecs( | 650 for extracted_path_spec in self._ExtractPathSpecs( |
630 path_spec, find_specs=find_specs, | 651 path_spec, find_specs=find_specs, |
631 recurse_file_system=recurse_file_system, | 652 recurse_file_system=recurse_file_system, |
632 resolver_context=resolver_context): | 653 resolver_context=resolver_context): |
633 yield extracted_path_spec | 654 yield extracted_path_spec |
OLD | NEW |