OLD | NEW |
1 # -*- coding: utf-8 -*- | 1 # -*- coding: utf-8 -*- |
2 """The extraction CLI tool.""" | 2 """The extraction CLI tool.""" |
3 | 3 |
4 import yara | |
5 | |
6 from artifacts import errors as artifacts_errors | |
7 from artifacts import reader as artifacts_reader | |
8 from artifacts import registry as artifacts_registry | |
9 | |
10 # The following import makes sure the analyzers are registered. | 4 # The following import makes sure the analyzers are registered. |
11 from plaso import analyzers # pylint: disable=unused-import | 5 from plaso import analyzers # pylint: disable=unused-import |
12 | 6 |
13 # The following import makes sure the parsers are registered. | 7 # The following import makes sure the parsers are registered. |
14 from plaso import parsers # pylint: disable=unused-import | 8 from plaso import parsers # pylint: disable=unused-import |
15 | 9 |
16 from plaso.analyzers.hashers import manager as hashers_manager | 10 from plaso.analyzers.hashers import manager as hashers_manager |
17 from plaso.cli import storage_media_tool | 11 from plaso.cli import storage_media_tool |
18 from plaso.cli import views | 12 from plaso.cli import views |
19 from plaso.lib import definitions | 13 from plaso.cli.helpers import manager as helpers_manager |
20 from plaso.lib import errors | 14 from plaso.lib import errors |
21 from plaso.lib import py2to3 | 15 from plaso.lib import py2to3 |
22 from plaso.parsers import manager as parsers_manager | 16 from plaso.parsers import manager as parsers_manager |
23 from plaso.parsers import presets as parsers_presets | 17 from plaso.parsers import presets as parsers_presets |
24 | 18 |
25 | 19 |
26 class ExtractionTool(storage_media_tool.StorageMediaTool): | 20 class ExtractionTool(storage_media_tool.StorageMediaTool): |
27 """Class that implements an extraction CLI tool. | 21 """Extraction CLI tool. |
28 | 22 |
29 Attributes: | 23 Attributes: |
30 list_hashers (bool): True if the hashers should be listed. | 24 list_hashers (bool): True if the hashers should be listed. |
31 list_parsers_and_plugins (bool): True if the parsers and plugins should | 25 list_parsers_and_plugins (bool): True if the parsers and plugins should |
32 be listed. | 26 be listed. |
33 """ | 27 """ |
34 | 28 |
35 # Approximately 250 MB of queued items per worker. | 29 # Approximately 250 MB of queued items per worker. |
36 _DEFAULT_QUEUE_SIZE = 125000 | 30 _DEFAULT_QUEUE_SIZE = 125000 |
37 | 31 |
38 # Enable the SHA256 hasher by default. | 32 # Enable the SHA256 hasher by default. |
39 _DEFAULT_HASHER_STRING = u'sha256' | 33 _DEFAULT_HASHER_STRING = u'sha256' |
40 | 34 |
41 _BYTES_IN_A_MIB = 1024 * 1024 | 35 _BYTES_IN_A_MIB = 1024 * 1024 |
42 | 36 |
43 def __init__(self, input_reader=None, output_writer=None): | 37 def __init__(self, input_reader=None, output_writer=None): |
44 """Initializes the CLI tool object. | 38 """Initializes an CLI tool. |
45 | 39 |
46 Args: | 40 Args: |
47 input_reader (Optional[InputReader]): input reader, where None indicates | 41 input_reader (Optional[InputReader]): input reader, where None indicates |
48 that the stdin input reader should be used. | 42 that the stdin input reader should be used. |
49 output_writer (Optional[OutputWriter]): output writer, where None | 43 output_writer (Optional[OutputWriter]): output writer, where None |
50 indicates that the stdout output writer should be used. | 44 indicates that the stdout output writer should be used. |
51 """ | 45 """ |
52 super(ExtractionTool, self).__init__( | 46 super(ExtractionTool, self).__init__( |
53 input_reader=input_reader, output_writer=output_writer) | 47 input_reader=input_reader, output_writer=output_writer) |
54 self._artifacts_registry = None | 48 self._artifacts_registry = None |
55 self._buffer_size = 0 | 49 self._buffer_size = 0 |
56 self._force_preprocessing = False | 50 self._force_preprocessing = False |
57 self._hashers_manager = hashers_manager.HashersManager | 51 self._hashers_manager = hashers_manager.HashersManager |
58 self._hasher_names_string = None | 52 self._hasher_names_string = None |
59 self._mount_path = None | 53 self._mount_path = None |
60 self._operating_system = None | 54 self._operating_system = None |
61 self._output_module = None | 55 self._output_module = None |
62 self._parser_filter_expression = None | 56 self._parser_filter_expression = None |
63 self._parsers_manager = parsers_manager.ParsersManager | 57 self._parsers_manager = parsers_manager.ParsersManager |
64 self._preferred_year = None | 58 self._preferred_year = None |
65 self._process_archives = False | 59 self._process_archives = False |
66 self._process_compressed_streams = True | 60 self._process_compressed_streams = True |
67 self._queue_size = self._DEFAULT_QUEUE_SIZE | 61 self._queue_size = self._DEFAULT_QUEUE_SIZE |
68 self._single_process_mode = False | 62 self._single_process_mode = False |
69 self._storage_serializer_format = definitions.SERIALIZER_FORMAT_JSON | |
70 self._text_prepend = None | 63 self._text_prepend = None |
71 self._yara_rules_string = None | 64 self._yara_rules_string = None |
72 | 65 |
73 self.list_hashers = False | 66 self.list_hashers = False |
74 self.list_parsers_and_plugins = False | 67 self.list_parsers_and_plugins = False |
75 | 68 |
76 def _GetParserPresetsInformation(self): | 69 def _GetParserPresetsInformation(self): |
77 """Retrieves the parser presets information. | 70 """Retrieves the parser presets information. |
78 | 71 |
79 Returns: | 72 Returns: |
80 list[tuple]: contains: | 73 list[tuple]: contains: |
81 | 74 |
82 str: parser preset name | 75 str: parser preset name |
83 str: parsers names corresponding to the preset | 76 str: parsers names corresponding to the preset |
84 """ | 77 """ |
85 parser_presets_information = [] | 78 parser_presets_information = [] |
86 for preset_name, parser_names in sorted(parsers_presets.CATEGORIES.items()): | 79 for preset_name, parser_names in sorted(parsers_presets.CATEGORIES.items()): |
87 parser_presets_information.append((preset_name, u', '.join(parser_names))) | 80 parser_presets_information.append((preset_name, u', '.join(parser_names))) |
88 | 81 |
89 return parser_presets_information | 82 return parser_presets_information |
90 | 83 |
91 def _ParseArtifactDefinitionsOption(self, options): | |
92 """Parses the artifact definitions option. | |
93 | |
94 Args: | |
95 options (argparse.Namespace): command line arguments. | |
96 | |
97 Raises: | |
98 BadConfigOption: if the options are invalid. | |
99 """ | |
100 path = getattr(options, u'artifact_definitions_path', None) | |
101 if not path: | |
102 return | |
103 | |
104 self._artifacts_registry = artifacts_registry.ArtifactDefinitionsRegistry() | |
105 reader = artifacts_reader.YamlArtifactsReader() | |
106 | |
107 try: | |
108 self._artifacts_registry.ReadFromDirectory(reader, path) | |
109 | |
110 except (KeyError, artifacts_errors.FormatError) as exception: | |
111 raise errors.BadConfigObject(( | |
112 u'Unable to read artifact definitions from: {0:s} with error: ' | |
113 u'{1!s}').format(path, exception)) | |
114 | |
115 def _ParseExtractionOptions(self, options): | 84 def _ParseExtractionOptions(self, options): |
116 """Parses the extraction options. | 85 """Parses the extraction options. |
117 | 86 |
118 Args: | 87 Args: |
119 options (argparse.Namespace): command line arguments. | 88 options (argparse.Namespace): command line arguments. |
120 | 89 |
121 Raises: | 90 Raises: |
122 BadConfigOption: if the options are invalid. | 91 BadConfigOption: if the options are invalid. |
123 """ | 92 """ |
124 self._ParseArtifactDefinitionsOption(options) | |
125 | |
126 self._hasher_names_string = getattr( | 93 self._hasher_names_string = getattr( |
127 options, u'hashers', self._DEFAULT_HASHER_STRING) | 94 options, u'hashers', self._DEFAULT_HASHER_STRING) |
128 if isinstance(self._hasher_names_string, py2to3.STRING_TYPES): | 95 if isinstance(self._hasher_names_string, py2to3.STRING_TYPES): |
129 if self._hasher_names_string.lower() == u'list': | 96 if self._hasher_names_string.lower() == u'list': |
130 self.list_hashers = True | 97 self.list_hashers = True |
131 | 98 |
132 parser_filter_expression = self.ParseStringOption( | 99 parser_filter_expression = self.ParseStringOption( |
133 options, u'parsers', default_value=u'') | 100 options, u'parsers', default_value=u'') |
134 self._parser_filter_expression = parser_filter_expression.replace( | 101 self._parser_filter_expression = parser_filter_expression.replace( |
135 u'\\', u'/') | 102 u'\\', u'/') |
136 | 103 |
137 if (isinstance(self._parser_filter_expression, py2to3.STRING_TYPES) and | 104 if (isinstance(self._parser_filter_expression, py2to3.STRING_TYPES) and |
138 self._parser_filter_expression.lower() == u'list'): | 105 self._parser_filter_expression.lower() == u'list'): |
139 self.list_parsers_and_plugins = True | 106 self.list_parsers_and_plugins = True |
140 | 107 |
141 self._force_preprocessing = getattr(options, u'preprocess', False) | 108 self._force_preprocessing = getattr(options, u'preprocess', False) |
142 | 109 |
143 self._preferred_year = self.ParseNumericOption(options, u'preferred_year') | 110 self._preferred_year = self.ParseNumericOption(options, u'preferred_year') |
144 | 111 |
145 self._process_archives = getattr(options, u'process_archives', False) | 112 self._process_archives = getattr(options, u'process_archives', False) |
146 self._process_compressed_streams = getattr( | 113 self._process_compressed_streams = getattr( |
147 options, u'process_compressed_streams', True) | 114 options, u'process_compressed_streams', True) |
148 | 115 |
149 self._ParseYaraRulesOption(options) | 116 helpers_manager.ArgumentHelperManager.ParseOptions( |
| 117 options, self, names=[u'artifact_definitions', u'yara_rules']) |
150 | 118 |
151 def _ParsePerformanceOptions(self, options): | 119 def _ParsePerformanceOptions(self, options): |
152 """Parses the performance options. | 120 """Parses the performance options. |
153 | 121 |
154 Args: | 122 Args: |
155 options (argparse.Namespace): command line arguments. | 123 options (argparse.Namespace): command line arguments. |
156 | 124 |
157 Raises: | 125 Raises: |
158 BadConfigOption: if the options are invalid. | 126 BadConfigOption: if the options are invalid. |
159 """ | 127 """ |
160 self._buffer_size = getattr(options, u'buffer_size', 0) | 128 self._buffer_size = getattr(options, u'buffer_size', 0) |
161 if self._buffer_size: | 129 if self._buffer_size: |
162 # TODO: turn this into a generic function that supports more size | 130 # TODO: turn this into a generic function that supports more size |
163 # suffixes both MB and MiB and also that does not allow m as a valid | 131 # suffixes both MB and MiB and also that does not allow m as a valid |
164 # indicator for MiB since m represents milli not Mega. | 132 # indicator for MiB since m represents milli not Mega. |
165 try: | 133 try: |
166 if self._buffer_size[-1].lower() == u'm': | 134 if self._buffer_size[-1].lower() == u'm': |
167 self._buffer_size = int(self._buffer_size[:-1], 10) | 135 self._buffer_size = int(self._buffer_size[:-1], 10) |
168 self._buffer_size *= self._BYTES_IN_A_MIB | 136 self._buffer_size *= self._BYTES_IN_A_MIB |
169 else: | 137 else: |
170 self._buffer_size = int(self._buffer_size, 10) | 138 self._buffer_size = int(self._buffer_size, 10) |
171 except ValueError: | 139 except ValueError: |
172 raise errors.BadConfigOption( | 140 raise errors.BadConfigOption( |
173 u'Invalid buffer size: {0:s}.'.format(self._buffer_size)) | 141 u'Invalid buffer size: {0:s}.'.format(self._buffer_size)) |
174 | 142 |
175 self._queue_size = self.ParseNumericOption(options, u'queue_size') | 143 self._queue_size = self.ParseNumericOption(options, u'queue_size') |
176 | 144 |
177 def _ParseStorageOptions(self, options): | |
178 """Parses the storage options. | |
179 | |
180 Args: | |
181 options (argparse.Namespace): command line arguments. | |
182 | |
183 Raises: | |
184 BadConfigOption: if the options are invalid. | |
185 """ | |
186 serializer_format = getattr( | |
187 options, u'serializer_format', definitions.SERIALIZER_FORMAT_JSON) | |
188 if serializer_format not in definitions.SERIALIZER_FORMATS: | |
189 raise errors.BadConfigOption( | |
190 u'Unsupported storage serializer format: {0:s}.'.format( | |
191 serializer_format)) | |
192 self._storage_serializer_format = serializer_format | |
193 | |
194 def _ParseYaraRulesOption(self, options): | |
195 """Parses the yara rules option. | |
196 | |
197 Args: | |
198 options (argparse.Namespace): command line arguments. | |
199 | |
200 Raises: | |
201 BadConfigOption: if the options are invalid. | |
202 """ | |
203 path = getattr(options, u'yara_rules_path', None) | |
204 if not path: | |
205 return | |
206 | |
207 try: | |
208 with open(path, 'rb') as rules_file: | |
209 self._yara_rules_string = rules_file.read() | |
210 | |
211 except IOError as exception: | |
212 raise errors.BadConfigObject( | |
213 u'Unable to read Yara rules file: {0:s} with error: {1!s}'.format( | |
214 path, exception)) | |
215 | |
216 try: | |
217 # We try to parse the rules here, to check that the definitions are | |
218 # valid. We then pass the string definitions along to the workers, so | |
219 # that they don't need read access to the rules file. | |
220 yara.compile(source=self._yara_rules_string) | |
221 | |
222 except yara.Error as exception: | |
223 raise errors.BadConfigObject( | |
224 u'Unable to parse Yara rules in: {0:s} with error: {1!s}'.format( | |
225 path, exception)) | |
226 | |
227 def AddExtractionOptions(self, argument_group): | 145 def AddExtractionOptions(self, argument_group): |
228 """Adds the extraction options to the argument group. | 146 """Adds the extraction options to the argument group. |
229 | 147 |
230 Args: | 148 Args: |
231 argument_group (argparse._ArgumentGroup): argparse argument group. | 149 argument_group (argparse._ArgumentGroup): argparse argument group. |
232 """ | 150 """ |
233 argument_group.add_argument( | 151 helpers_manager.ArgumentHelperManager.AddCommandLineArguments( |
234 u'--artifact_definitions', u'--artifact-definitions', | 152 argument_group, names=[u'artifact_definitions']) |
235 dest=u'artifact_definitions_path', type=str, metavar=u'PATH', | |
236 action=u'store', help=( | |
237 u'Path to a directory containing artifact definitions. Artifact ' | |
238 u'definitions can be used to describe and quickly collect data ' | |
239 u'data of interest, such as specific files or Windows Registry ' | |
240 u'keys.')) | |
241 | 153 |
242 argument_group.add_argument( | 154 argument_group.add_argument( |
243 u'--hashers', dest=u'hashers', type=str, action=u'store', | 155 u'--hashers', dest=u'hashers', type=str, action=u'store', |
244 default=self._DEFAULT_HASHER_STRING, metavar=u'HASHER_LIST', help=( | 156 default=self._DEFAULT_HASHER_STRING, metavar=u'HASHER_LIST', help=( |
245 u'Define a list of hashers to use by the tool. This is a comma ' | 157 u'Define a list of hashers to use by the tool. This is a comma ' |
246 u'separated list where each entry is the name of a hasher, such as ' | 158 u'separated list where each entry is the name of a hasher, such as ' |
247 u'"md5,sha256". "all" indicates that all hashers should be ' | 159 u'"md5,sha256". "all" indicates that all hashers should be ' |
248 u'enabled. "none" disables all hashers. Use "--hashers list" or ' | 160 u'enabled. "none" disables all hashers. Use "--hashers list" or ' |
249 u'"--info" to list the available hashers.')) | 161 u'"--info" to list the available hashers.')) |
250 | 162 |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
285 u'archive.tar and archive.zip. This can make processing ' | 197 u'archive.tar and archive.zip. This can make processing ' |
286 u'significantly slower.')) | 198 u'significantly slower.')) |
287 | 199 |
288 argument_group.add_argument( | 200 argument_group.add_argument( |
289 u'--skip_compressed_streams', u'--skip-compressed-streams', | 201 u'--skip_compressed_streams', u'--skip-compressed-streams', |
290 dest=u'process_compressed_streams', action=u'store_false', default=True, | 202 dest=u'process_compressed_streams', action=u'store_false', default=True, |
291 help=( | 203 help=( |
292 u'Skip processing file content within compressed streams, such as ' | 204 u'Skip processing file content within compressed streams, such as ' |
293 u'syslog.gz and syslog.bz2.')) | 205 u'syslog.gz and syslog.bz2.')) |
294 | 206 |
295 argument_group.add_argument( | 207 helpers_manager.ArgumentHelperManager.AddCommandLineArguments( |
296 u'--yara_rules', u'--yara-rules', dest=u'yara_rules_path', | 208 argument_group, names=[u'yara_rules']) |
297 type=str, metavar=u'PATH', action=u'store', help=( | |
298 u'Path to a file containing Yara rules definitions.')) | |
299 | 209 |
300 def AddPerformanceOptions(self, argument_group): | 210 def AddPerformanceOptions(self, argument_group): |
301 """Adds the performance options to the argument group. | 211 """Adds the performance options to the argument group. |
302 | 212 |
303 Args: | 213 Args: |
304 argument_group (argparse._ArgumentGroup): argparse argument group. | 214 argument_group (argparse._ArgumentGroup): argparse argument group. |
305 """ | 215 """ |
306 argument_group.add_argument( | 216 argument_group.add_argument( |
307 u'--buffer_size', u'--buffer-size', u'--bs', dest=u'buffer_size', | 217 u'--buffer_size', u'--buffer-size', u'--bs', dest=u'buffer_size', |
308 action=u'store', default=0, help=( | 218 action=u'store', default=0, help=( |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
352 table_view.Write(self._output_writer) | 262 table_view.Write(self._output_writer) |
353 | 263 |
354 presets_information = self._GetParserPresetsInformation() | 264 presets_information = self._GetParserPresetsInformation() |
355 | 265 |
356 table_view = views.ViewsFactory.GetTableView( | 266 table_view = views.ViewsFactory.GetTableView( |
357 self._views_format_type, column_names=[u'Name', u'Parsers and plugins'], | 267 self._views_format_type, column_names=[u'Name', u'Parsers and plugins'], |
358 title=u'Parser presets') | 268 title=u'Parser presets') |
359 for name, description in sorted(presets_information): | 269 for name, description in sorted(presets_information): |
360 table_view.AddRow([name, description]) | 270 table_view.AddRow([name, description]) |
361 table_view.Write(self._output_writer) | 271 table_view.Write(self._output_writer) |
362 | |
363 def ParseOptions(self, options): | |
364 """Parses tool specific options. | |
365 | |
366 Args: | |
367 options (argparse.Namespace): command line arguments. | |
368 | |
369 Raises: | |
370 BadConfigOption: if the options are invalid. | |
371 """ | |
372 super(ExtractionTool, self).ParseOptions(options) | |
373 self._ParseDataLocationOption(options) | |
374 self._ParseFilterOptions(options) | |
375 self._ParsePerformanceOptions(options) | |
376 self._ParseStorageOptions(options) | |
OLD | NEW |