OLD | NEW |
| (Empty) |
1 # -*- coding: utf-8 -*- | |
2 """The extraction front-end.""" | |
3 | |
4 import logging | |
5 | |
6 from dfvfs.lib import definitions as dfvfs_definitions | |
7 from dfvfs.resolver import context | |
8 | |
9 from plaso import parsers # pylint: disable=unused-import | |
10 | |
11 from plaso.containers import sessions | |
12 from plaso.engine import single_process | |
13 from plaso.frontend import frontend | |
14 from plaso.frontend import utils | |
15 from plaso.multi_processing import task_engine as multi_process_engine | |
16 from plaso.parsers import manager as parsers_manager | |
17 | |
18 | |
19 class ExtractionFrontend(frontend.Frontend): | |
20 """Class that implements an extraction front-end.""" | |
21 | |
22 _DEFAULT_PROFILING_SAMPLE_RATE = 1000 | |
23 | |
24 _SOURCE_TYPES_TO_PREPROCESS = frozenset([ | |
25 dfvfs_definitions.SOURCE_TYPE_DIRECTORY, | |
26 dfvfs_definitions.SOURCE_TYPE_STORAGE_MEDIA_DEVICE, | |
27 dfvfs_definitions.SOURCE_TYPE_STORAGE_MEDIA_IMAGE]) | |
28 | |
29 def __init__(self): | |
30 """Initializes the front-end object.""" | |
31 super(ExtractionFrontend, self).__init__() | |
32 self._collection_process = None | |
33 self._debug_mode = False | |
34 # TODO: remove after testing. | |
35 self._experimental = False | |
36 self._filter_expression = None | |
37 self._filter_object = None | |
38 self._mount_path = None | |
39 self._profiling_directory = None | |
40 self._profiling_sample_rate = self._DEFAULT_PROFILING_SAMPLE_RATE | |
41 self._profiling_type = u'all' | |
42 self._resolver_context = context.Context() | |
43 self._text_prepend = None | |
44 | |
45 def _CreateEngine(self, single_process_mode, use_zeromq=True): | |
46 """Creates an engine based on the front end settings. | |
47 | |
48 Args: | |
49 single_process_mode (bool): True if the front-end should run in single | |
50 process mode. | |
51 use_zeromq (Optional[bool]): True if ZeroMQ should be used for queuing. | |
52 | |
53 Returns: | |
54 BaseEngine: engine. | |
55 """ | |
56 if single_process_mode: | |
57 engine = single_process.SingleProcessEngine() | |
58 else: | |
59 engine = multi_process_engine.TaskMultiProcessEngine( | |
60 use_zeromq=use_zeromq) | |
61 | |
62 return engine | |
63 | |
64 def _PreprocessSources(self, engine, source_path_specs): | |
65 """Preprocesses the sources. | |
66 | |
67 Args: | |
68 engine (BaseEngine): engine to preprocess the sources. | |
69 source_path_specs (list[dfvfs.PathSpec]): path specifications of | |
70 the sources to process. | |
71 """ | |
72 logging.debug(u'Starting preprocessing.') | |
73 | |
74 try: | |
75 engine.PreprocessSources( | |
76 source_path_specs, resolver_context=self._resolver_context) | |
77 | |
78 except IOError as exception: | |
79 logging.error(u'Unable to preprocess with error: {0:s}'.format( | |
80 exception)) | |
81 return | |
82 | |
83 logging.debug(u'Preprocessing done.') | |
84 | |
85 def CreateSession( | |
86 self, command_line_arguments=None, filter_file=None, | |
87 preferred_encoding=u'utf-8', preferred_time_zone=None, | |
88 preferred_year=None): | |
89 """Creates a session attribute containiner. | |
90 | |
91 Args: | |
92 command_line_arguments (Optional[str]): the command line arguments. | |
93 filter_file (Optional[str]): path to a file with find specifications. | |
94 preferred_encoding (Optional[str]): preferred encoding. | |
95 preferred_time_zone (Optional[str]): preferred time zone. | |
96 preferred_year (Optional[int]): preferred year. | |
97 | |
98 Returns: | |
99 Session: session attribute container. | |
100 """ | |
101 session = sessions.Session() | |
102 | |
103 session.command_line_arguments = command_line_arguments | |
104 session.filter_expression = self._filter_expression | |
105 session.filter_file = filter_file | |
106 session.debug_mode = self._debug_mode | |
107 session.preferred_encoding = preferred_encoding | |
108 session.preferred_time_zone = preferred_time_zone | |
109 session.preferred_year = preferred_year | |
110 | |
111 return session | |
112 | |
113 def ProcessSources( | |
114 self, session, storage_writer, source_path_specs, source_type, | |
115 processing_configuration, enable_sigsegv_handler=False, | |
116 force_preprocessing=False, number_of_extraction_workers=0, | |
117 single_process_mode=False, status_update_callback=None, | |
118 use_zeromq=True, worker_memory_limit=None): | |
119 """Processes the sources. | |
120 | |
121 Args: | |
122 session (Session): session the storage changes are part of. | |
123 storage_writer (StorageWriter): storage writer. | |
124 source_path_specs (list[dfvfs.PathSpec]): path specifications of | |
125 the sources to process. | |
126 source_type (str): the dfVFS source type definition. | |
127 processing_configuration (ProcessingConfiguration): processing | |
128 configuration. | |
129 enable_sigsegv_handler (Optional[bool]): True if the SIGSEGV handler | |
130 should be enabled. | |
131 force_preprocessing (Optional[bool]): True if preprocessing should be | |
132 forced. | |
133 number_of_extraction_workers (Optional[int]): number of extraction | |
134 workers to run. If 0, the number will be selected automatically. | |
135 single_process_mode (Optional[bool]): True if the front-end should | |
136 run in single process mode. | |
137 status_update_callback (Optional[function]): callback function for status | |
138 updates. | |
139 use_zeromq (Optional[bool]): True if ZeroMQ should be used for queuing. | |
140 worker_memory_limit (Optional[int]): maximum amount of memory a worker is | |
141 allowed to consume, where None represents 2 GiB. | |
142 | |
143 Returns: | |
144 ProcessingStatus: processing status or None. | |
145 | |
146 Raises: | |
147 SourceScannerError: if the source scanner could not find a supported | |
148 file system. | |
149 UserAbort: if the user initiated an abort. | |
150 """ | |
151 if source_type == dfvfs_definitions.SOURCE_TYPE_FILE: | |
152 # No need to multi process a single file source. | |
153 single_process_mode = True | |
154 | |
155 engine = self._CreateEngine(single_process_mode, use_zeromq=use_zeromq) | |
156 | |
157 # If the source is a directory or a storage media image | |
158 # run pre-processing. | |
159 if force_preprocessing or source_type in self._SOURCE_TYPES_TO_PREPROCESS: | |
160 self._PreprocessSources(engine, source_path_specs) | |
161 | |
162 if not processing_configuration.parser_filter_expression: | |
163 operating_system = engine.knowledge_base.GetValue( | |
164 u'operating_system') | |
165 operating_system_product = engine.knowledge_base.GetValue( | |
166 u'operating_system_product') | |
167 operating_system_version = engine.knowledge_base.GetValue( | |
168 u'operating_system_version') | |
169 parser_filter_expression = ( | |
170 parsers_manager.ParsersManager.GetPresetForOperatingSystem( | |
171 operating_system, operating_system_product, | |
172 operating_system_version)) | |
173 | |
174 if parser_filter_expression: | |
175 logging.info(u'Parser filter expression changed to: {0:s}'.format( | |
176 parser_filter_expression)) | |
177 | |
178 processing_configuration.parser_filter_expression = ( | |
179 parser_filter_expression) | |
180 session.enabled_parser_names = list( | |
181 parsers_manager.ParsersManager.GetParserAndPluginNames( | |
182 parser_filter_expression=( | |
183 processing_configuration.parser_filter_expression))) | |
184 session.parser_filter_expression = ( | |
185 processing_configuration.parser_filter_expression) | |
186 | |
187 if session.preferred_time_zone: | |
188 try: | |
189 engine.knowledge_base.SetTimeZone(session.preferred_time_zone) | |
190 except ValueError: | |
191 logging.warning( | |
192 u'Unsupported time zone: {0:s}, defaulting to {1:s}'.format( | |
193 session.preferred_time_zone, | |
194 engine.knowledge_base.time_zone.zone)) | |
195 | |
196 filter_find_specs = None | |
197 if processing_configuration.filter_file: | |
198 environment_variables = engine.knowledge_base.GetEnvironmentVariables() | |
199 filter_find_specs = utils.BuildFindSpecsFromFile( | |
200 processing_configuration.filter_file, | |
201 environment_variables=environment_variables) | |
202 | |
203 processing_status = None | |
204 if single_process_mode: | |
205 logging.debug(u'Starting extraction in single process mode.') | |
206 | |
207 processing_status = engine.ProcessSources( | |
208 source_path_specs, storage_writer, self._resolver_context, | |
209 processing_configuration, filter_find_specs=filter_find_specs, | |
210 status_update_callback=status_update_callback) | |
211 | |
212 else: | |
213 logging.debug(u'Starting extraction in multi process mode.') | |
214 | |
215 processing_status = engine.ProcessSources( | |
216 session.identifier, source_path_specs, storage_writer, | |
217 processing_configuration, | |
218 enable_sigsegv_handler=enable_sigsegv_handler, | |
219 filter_find_specs=filter_find_specs, | |
220 number_of_worker_processes=number_of_extraction_workers, | |
221 status_update_callback=status_update_callback, | |
222 worker_memory_limit=worker_memory_limit) | |
223 | |
224 return processing_status | |
OLD | NEW |