OLD | NEW |
1 # -*- coding: utf-8 -*- | 1 # -*- coding: utf-8 -*- |
2 """ZIP-based storage. | 2 """ZIP-based storage. |
3 | 3 |
4 The ZIP-based storage can be described as a collection of storage files | 4 The ZIP-based storage can be described as a collection of storage files |
5 (named streams) bundled in a single ZIP archive file. | 5 (named streams) bundled in a single ZIP archive file. |
6 | 6 |
7 There are multiple types of streams: | 7 There are multiple types of streams: |
8 * error_data.# | 8 * error_data.# |
9 The error data streams contain the serialized error objects. | 9 The error data streams contain the serialized error objects. |
10 * error_index.# | 10 * error_index.# |
11 The error index streams contain the stream offset to the serialized | 11 The error index streams contain the stream offset to the serialized |
12 error objects. | 12 error objects. |
13 * event_data.# | 13 * event_data.# |
14 The event data streams contain the serialized events. | 14 The event data streams contain the serialized events. |
15 * event_index.# | 15 * event_index.# |
16 The event index streams contain the stream offset to the serialized | 16 The event index streams contain the stream offset to the serialized |
17 events. | 17 events. |
18 * event_source_data.# | 18 * event_source_data.# |
19 The event source data streams contain the serialized event source objects. | 19 The event source data streams contain the serialized event source objects. |
20 * event_source_index.# | 20 * event_source_index.# |
21 The event source index streams contain the stream offset to the serialized | 21 The event source index streams contain the stream offset to the serialized |
22 event source objects. | 22 event source objects. |
23 * event_tag_data.# | 23 * event_tag_data.# |
24 The event tag data streams contain the serialized event tag objects. | 24 The event tag data streams contain the serialized event tag objects. |
25 * event_timestamps.# | 25 * event_timestamps.# |
26 The event timestamps streams contain the timestamp of the serialized | 26 The event timestamps streams contain the timestamp of the serialized |
27 events. | 27 events. |
| 28 * event_values_data.# |
| 29 The event values streams contain the serialized event data objects. |
28 * metadata.txt | 30 * metadata.txt |
29 Stream that contains the storage metadata. | 31 Stream that contains the storage metadata. |
30 * preprocess.# | 32 * preprocess.# |
31 Stream that contains the preprocessing information. | 33 Stream that contains the preprocessing information. |
32 Only applies to session-based storage. | 34 Only applies to session-based storage. |
33 * session_completion.# | 35 * session_completion.# |
34 Stream that contains information about the completion of a session. | 36 Stream that contains information about the completion of a session. |
35 Only applies to session-based storage. | 37 Only applies to session-based storage. |
36 * session_start.# | 38 * session_start.# |
37 Stream that contains information about the start of a session. | 39 Stream that contains information about the start of a session. |
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
127 import ConfigParser as configparser | 129 import ConfigParser as configparser |
128 except ImportError: | 130 except ImportError: |
129 import configparser # pylint: disable=import-error | 131 import configparser # pylint: disable=import-error |
130 | 132 |
131 import construct | 133 import construct |
132 | 134 |
133 from plaso.containers import sessions | 135 from plaso.containers import sessions |
134 from plaso.lib import definitions | 136 from plaso.lib import definitions |
135 from plaso.lib import platform_specific | 137 from plaso.lib import platform_specific |
136 from plaso.serializer import json_serializer | 138 from plaso.serializer import json_serializer |
| 139 from plaso.storage import event_heaps |
137 from plaso.storage import identifiers | 140 from plaso.storage import identifiers |
138 from plaso.storage import interface | 141 from plaso.storage import interface |
139 from plaso.storage import gzip_file | 142 from plaso.storage import gzip_file |
140 | 143 |
141 | 144 |
142 class _EventsHeap(object): | 145 class _SerializedEventHeap(object): |
143 """Events heap.""" | 146 """Serialized event heap. |
144 | |
145 def __init__(self): | |
146 """Initializes an events heap.""" | |
147 super(_EventsHeap, self).__init__() | |
148 self._heap = [] | |
149 | |
150 @property | |
151 def number_of_events(self): | |
152 """int: number of serialized events on the heap.""" | |
153 return len(self._heap) | |
154 | |
155 def PeekEvent(self): | |
156 """Retrieves the first event from the heap without removing it. | |
157 | |
158 Returns: | |
159 tuple: contains: | |
160 | |
161 EventObject: event or None. | |
162 int: number of the stream or None. | |
163 """ | |
164 try: | |
165 _, stream_number, _, event = self._heap[0] | |
166 return event, stream_number | |
167 | |
168 except IndexError: | |
169 return None, None | |
170 | |
171 def PopEvent(self): | |
172 """Retrieves and removes the first event from the heap. | |
173 | |
174 Returns: | |
175 tuple: contains: | |
176 | |
177 EventObject: event or None. | |
178 int: number of the stream or None. | |
179 """ | |
180 try: | |
181 _, stream_number, _, event = heapq.heappop(self._heap) | |
182 return event, stream_number | |
183 | |
184 except IndexError: | |
185 return None, None | |
186 | |
187 def PushEvent(self, event): | |
188 """Pushes an event onto the heap. | |
189 | |
190 Args: | |
191 event (EventObject): event. | |
192 """ | |
193 event_identifier = event.GetIdentifier() | |
194 heap_values = ( | |
195 event.timestamp, event_identifier.stream_number, | |
196 event_identifier.entry_index, event) | |
197 heapq.heappush(self._heap, heap_values) | |
198 | |
199 | |
200 class _SerializedEventsHeap(object): | |
201 """Serialized events heap. | |
202 | 147 |
203 Attributes: | 148 Attributes: |
204 data_size (int): total data size of the serialized events on the heap. | 149 data_size (int): total data size of the serialized events on the heap. |
205 """ | 150 """ |
206 | 151 |
207 def __init__(self): | 152 def __init__(self): |
208 """Initializes a serialized events heap.""" | 153 """Initializes a serialized event heap.""" |
209 super(_SerializedEventsHeap, self).__init__() | 154 super(_SerializedEventHeap, self).__init__() |
210 self._heap = [] | 155 self._heap = [] |
211 self.data_size = 0 | 156 self.data_size = 0 |
212 | 157 |
213 @property | 158 @property |
214 def number_of_events(self): | 159 def number_of_events(self): |
215 """int: number of serialized events on the heap.""" | 160 """int: number of serialized events on the heap.""" |
216 return len(self._heap) | 161 return len(self._heap) |
217 | 162 |
218 def Empty(self): | 163 def Empty(self): |
219 """Empties the heap.""" | 164 """Empties the heap.""" |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
256 | 201 |
257 _DATA_ENTRY = construct.Struct( | 202 _DATA_ENTRY = construct.Struct( |
258 u'data_entry', | 203 u'data_entry', |
259 construct.ULInt32(u'size')) | 204 construct.ULInt32(u'size')) |
260 _DATA_ENTRY_SIZE = _DATA_ENTRY.sizeof() | 205 _DATA_ENTRY_SIZE = _DATA_ENTRY.sizeof() |
261 | 206 |
262 # The default maximum serialized data size (40 MiB). | 207 # The default maximum serialized data size (40 MiB). |
263 DEFAULT_MAXIMUM_DATA_SIZE = 40 * 1024 * 1024 | 208 DEFAULT_MAXIMUM_DATA_SIZE = 40 * 1024 * 1024 |
264 | 209 |
265 def __init__( | 210 def __init__( |
266 self, zip_file, storage_file_path, stream_name, | 211 self, zip_file, temporary_path, stream_name, |
267 maximum_data_size=DEFAULT_MAXIMUM_DATA_SIZE): | 212 maximum_data_size=DEFAULT_MAXIMUM_DATA_SIZE): |
268 """Initializes a serialized data stream. | 213 """Initializes a serialized data stream. |
269 | 214 |
270 Args: | 215 Args: |
271 zip_file (zipfile.ZipFile): ZIP file that contains the stream. | 216 zip_file (zipfile.ZipFile): ZIP file that contains the stream. |
272 storage_file_path (str): path to the storage file. | 217 temporary_path (str): temporary path. |
273 stream_name (str): name of the stream. | 218 stream_name (str): name of the stream. |
274 maximum_data_size (Optional[int]): maximum data size of the stream. | 219 maximum_data_size (Optional[int]): maximum data size of the stream. |
275 """ | 220 """ |
276 super(_SerializedDataStream, self).__init__() | 221 super(_SerializedDataStream, self).__init__() |
277 self._entry_index = 0 | 222 self._entry_index = 0 |
278 self._file_object = None | 223 self._file_object = None |
279 self._maximum_data_size = maximum_data_size | 224 self._maximum_data_size = maximum_data_size |
280 self._path = os.path.dirname(os.path.abspath(storage_file_path)) | |
281 self._stream_name = stream_name | 225 self._stream_name = stream_name |
282 self._stream_offset = 0 | 226 self._stream_offset = 0 |
| 227 self._stream_file_path = None |
| 228 self._temporary_path = temporary_path |
283 self._zip_file = zip_file | 229 self._zip_file = zip_file |
284 | 230 |
| 231 def __del__(self): |
| 232 """Clean up.""" |
| 233 if hasattr(self, u'_stream_file_path'): |
| 234 if self._stream_file_path and os.path.exists(self._stream_file_path): |
| 235 try: |
| 236 os.remove(self._stream_file_path) |
| 237 except (IOError, OSError): |
| 238 pass |
| 239 |
285 @property | 240 @property |
286 def entry_index(self): | 241 def entry_index(self): |
287 """int: entry index.""" | 242 """int: entry index.""" |
288 return self._entry_index | 243 return self._entry_index |
289 | 244 |
290 def _OpenFileObject(self): | 245 def _OpenFileObject(self): |
291 """Opens the file-like object (instance of ZipExtFile). | 246 """Opens the file-like object (instance of ZipExtFile). |
292 | 247 |
293 Raises: | 248 Raises: |
294 IOError: if the file-like object cannot be opened. | 249 IOError: if the file-like object cannot be opened. |
295 """ | 250 """ |
296 try: | 251 try: |
297 self._file_object = self._zip_file.open(self._stream_name, mode='r') | 252 self._zip_file.extract(self._stream_name, self._temporary_path) |
298 except KeyError as exception: | 253 except KeyError as exception: |
299 raise IOError( | 254 raise IOError( |
300 u'Unable to open stream with error: {0:s}'.format(exception)) | 255 u'Unable to open stream with error: {0:s}'.format(exception)) |
301 | 256 |
302 self._stream_offset = 0 | 257 self._stream_file_path = os.path.join( |
303 | 258 self._temporary_path, self._stream_name) |
304 def _ReOpenFileObject(self): | 259 self._file_object = open(self._stream_file_path, 'rb') |
305 """Reopens the file-like object (instance of ZipExtFile).""" | |
306 if self._file_object: | |
307 self._file_object.close() | |
308 self._file_object = None | |
309 | |
310 self._file_object = self._zip_file.open(self._stream_name, mode='r') | |
311 self._stream_offset = 0 | |
312 | 260 |
313 def ReadEntry(self): | 261 def ReadEntry(self): |
314 """Reads an entry from the data stream. | 262 """Reads an entry from the data stream. |
315 | 263 |
316 Returns: | 264 Returns: |
317 bytes: data or None if no data remaining. | 265 bytes: data or None if no data remaining. |
318 | 266 |
319 Raises: | 267 Raises: |
320 IOError: if the entry cannot be read. | 268 IOError: if the entry cannot be read. |
321 """ | 269 """ |
322 if not self._file_object: | 270 if not self._file_object: |
323 self._OpenFileObject() | 271 self._OpenFileObject() |
324 | 272 |
| 273 self._file_object.seek(self._stream_offset, os.SEEK_SET) |
| 274 |
325 data = self._file_object.read(self._DATA_ENTRY_SIZE) | 275 data = self._file_object.read(self._DATA_ENTRY_SIZE) |
326 if not data: | 276 if not data: |
327 return | 277 return |
328 | 278 |
329 try: | 279 try: |
330 data_entry = self._DATA_ENTRY.parse(data) | 280 data_entry = self._DATA_ENTRY.parse(data) |
331 except construct.FieldError as exception: | 281 except construct.FieldError as exception: |
332 raise IOError(u'Unable to read data entry with error: {0:s}'.format( | 282 raise IOError(u'Unable to read data entry with error: {0:s}'.format( |
333 exception)) | 283 exception)) |
334 | 284 |
335 if data_entry.size > self._maximum_data_size: | 285 if data_entry.size > self._maximum_data_size: |
336 raise IOError(u'Unable to read data entry size value out of bounds.') | 286 raise IOError(u'Unable to read data entry size value out of bounds.') |
337 | 287 |
338 data = self._file_object.read(data_entry.size) | 288 data = self._file_object.read(data_entry.size) |
339 if len(data) != data_entry.size: | 289 if len(data) != data_entry.size: |
340 raise IOError(u'Unable to read data.') | 290 raise IOError(u'Unable to read data.') |
341 | 291 |
342 self._stream_offset += self._DATA_ENTRY_SIZE + data_entry.size | 292 self._stream_offset += self._DATA_ENTRY_SIZE + data_entry.size |
343 self._entry_index += 1 | 293 self._entry_index += 1 |
344 | 294 |
345 return data | 295 return data |
346 | 296 |
347 def SeekEntryAtOffset(self, entry_index, stream_offset): | 297 def SeekEntryAtOffset(self, entry_index, stream_offset): |
348 """Seeks a specific serialized data stream entry at a specific offset. | 298 """Seeks a specific serialized data stream entry at a specific offset. |
349 | 299 |
350 Args: | 300 Args: |
351 entry_index (int): serialized data stream entry index. | 301 entry_index (int): serialized data stream entry index. |
352 stream_offset (int): data stream offset. | 302 stream_offset (int): data stream offset. |
353 """ | 303 """ |
354 if not self._file_object: | |
355 self._OpenFileObject() | |
356 | |
357 if stream_offset < self._stream_offset: | |
358 # Since zipfile.ZipExtFile is not seekable we need to close the stream | |
359 # and reopen it to fake a seek. | |
360 self._ReOpenFileObject() | |
361 | |
362 skip_read_size = stream_offset | |
363 else: | |
364 skip_read_size = stream_offset - self._stream_offset | |
365 | |
366 if skip_read_size > 0: | |
367 # Since zipfile.ZipExtFile is not seekable we need to read up to | |
368 # the stream offset. | |
369 self._file_object.read(skip_read_size) | |
370 self._stream_offset += skip_read_size | |
371 | |
372 self._entry_index = entry_index | 304 self._entry_index = entry_index |
| 305 self._stream_offset = stream_offset |
373 | 306 |
374 def WriteAbort(self): | 307 def WriteAbort(self): |
375 """Aborts the write of a serialized data stream.""" | 308 """Aborts the write of a serialized data stream.""" |
376 if self._file_object: | 309 if self._file_object: |
377 self._file_object.close() | 310 self._file_object.close() |
378 self._file_object = None | 311 self._file_object = None |
379 | 312 |
380 if os.path.exists(self._stream_name): | 313 if os.path.exists(self._stream_name): |
381 os.remove(self._stream_name) | 314 os.remove(self._stream_name) |
382 | 315 |
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
423 """ | 356 """ |
424 if not self._file_object: | 357 if not self._file_object: |
425 raise IOError(u'Unable to write to closed serialized data stream.') | 358 raise IOError(u'Unable to write to closed serialized data stream.') |
426 | 359 |
427 offset = self._file_object.tell() | 360 offset = self._file_object.tell() |
428 self._file_object.close() | 361 self._file_object.close() |
429 self._file_object = None | 362 self._file_object = None |
430 | 363 |
431 current_working_directory = os.getcwd() | 364 current_working_directory = os.getcwd() |
432 try: | 365 try: |
433 os.chdir(self._path) | 366 os.chdir(self._temporary_path) |
434 self._zip_file.write(self._stream_name) | 367 self._zip_file.write(self._stream_name) |
435 finally: | 368 finally: |
436 os.remove(self._stream_name) | 369 os.remove(self._stream_name) |
437 os.chdir(current_working_directory) | 370 os.chdir(current_working_directory) |
438 | 371 |
439 return offset | 372 return offset |
440 | 373 |
441 def WriteInitialize(self): | 374 def WriteInitialize(self): |
442 """Initializes the write of a serialized data stream. | 375 """Initializes the write of a serialized data stream. |
443 | 376 |
444 Creates a temporary file to store the serialized data. | 377 Creates a temporary file to store the serialized data. |
445 | 378 |
446 Returns: | 379 Returns: |
447 int: offset of the entry within the temporary file. | 380 int: offset of the entry within the temporary file. |
448 | 381 |
449 Raises: | 382 Raises: |
450 IOError: if the serialized data stream is already opened or | 383 IOError: if the serialized data stream is already opened or |
451 cannot be written. | 384 cannot be written. |
452 """ | 385 """ |
453 if self._file_object: | 386 if self._file_object: |
454 raise IOError(u'Serialized data stream already opened.') | 387 raise IOError(u'Serialized data stream already opened.') |
455 | 388 |
456 stream_file_path = os.path.join(self._path, self._stream_name) | 389 stream_file_path = os.path.join(self._temporary_path, self._stream_name) |
457 self._file_object = open(stream_file_path, 'wb') | 390 self._file_object = open(stream_file_path, 'wb') |
458 if platform_specific.PlatformIsWindows(): | 391 if platform_specific.PlatformIsWindows(): |
459 file_handle = self._file_object.fileno() | 392 file_handle = self._file_object.fileno() |
460 platform_specific.DisableWindowsFileHandleInheritance(file_handle) | 393 platform_specific.DisableWindowsFileHandleInheritance(file_handle) |
461 | 394 |
462 return self._file_object.tell() | 395 return self._file_object.tell() |
463 | 396 |
464 | 397 |
465 class _SerializedDataOffsetTable(object): | 398 class _SerializedDataOffsetTable(object): |
466 """Serialized data offset table.""" | 399 """Serialized data offset table.""" |
(...skipping 248 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
715 | 648 |
716 Attributes: | 649 Attributes: |
717 format_version (int): storage format version. | 650 format_version (int): storage format version. |
718 serialization_format (str): serialization format. | 651 serialization_format (str): serialization format. |
719 storage_type (str): storage type. | 652 storage_type (str): storage type. |
720 """ | 653 """ |
721 | 654 |
722 NEXT_AVAILABLE_ENTRY = -1 | 655 NEXT_AVAILABLE_ENTRY = -1 |
723 | 656 |
724 # The format version. | 657 # The format version. |
725 _FORMAT_VERSION = 20170121 | 658 _FORMAT_VERSION = 20170707 |
726 | 659 |
727 # The earliest format version, stored in-file, that this class | 660 # The earliest format version, stored in-file, that this class |
728 # is able to read. | 661 # is able to read. |
729 _COMPATIBLE_FORMAT_VERSION = 20170121 | 662 _COMPATIBLE_FORMAT_VERSION = 20170121 |
730 | 663 |
731 # The maximum buffer size of serialized data before triggering | 664 # The maximum buffer size of serialized data before triggering |
732 # a flush to disk (64 MiB). | 665 # a flush to disk (64 MiB). |
733 _MAXIMUM_BUFFER_SIZE = 64 * 1024 * 1024 | 666 _MAXIMUM_BUFFER_SIZE = 64 * 1024 * 1024 |
734 | 667 |
| 668 # The maximum number of cached streams. |
| 669 _MAXIMUM_NUMBER_OF_CACHED_STREAMS = 16 |
| 670 |
735 # The maximum number of cached tables. | 671 # The maximum number of cached tables. |
736 _MAXIMUM_NUMBER_OF_CACHED_TABLES = 5 | 672 _MAXIMUM_NUMBER_OF_CACHED_TABLES = 16 |
737 | 673 |
738 # The maximum serialized report size (32 MiB). | 674 # The maximum serialized report size (32 MiB). |
739 _MAXIMUM_SERIALIZED_REPORT_SIZE = 32 * 1024 * 1024 | 675 _MAXIMUM_SERIALIZED_REPORT_SIZE = 32 * 1024 * 1024 |
740 | 676 |
741 _MAXIMUM_NUMBER_OF_LOCKED_FILE_ATTEMPTS = 5 | 677 _MAXIMUM_NUMBER_OF_LOCKED_FILE_ATTEMPTS = 5 |
742 _LOCKED_FILE_SLEEP_TIME = 0.5 | 678 _LOCKED_FILE_SLEEP_TIME = 0.5 |
743 | 679 |
| 680 _STREAM_NAME_PREFIXES = { |
| 681 u'extraction_error': u'error', |
| 682 u'event': u'event', |
| 683 u'event_data': u'event_values', |
| 684 u'event_source': u'event_source', |
| 685 u'event_tag': u'event_tag'} |
| 686 |
744 def __init__( | 687 def __init__( |
745 self, maximum_buffer_size=0, | 688 self, maximum_buffer_size=0, |
746 storage_type=definitions.STORAGE_TYPE_SESSION): | 689 storage_type=definitions.STORAGE_TYPE_SESSION): |
747 """Initializes a ZIP-based storage file. | 690 """Initializes a ZIP-based storage file. |
748 | 691 |
749 Args: | 692 Args: |
750 maximum_buffer_size (Optional[int]): | 693 maximum_buffer_size (Optional[int]): |
751 maximum size of a single storage stream. A value of 0 indicates | 694 maximum size of a single storage stream. A value of 0 indicates |
752 the limit is _MAXIMUM_BUFFER_SIZE. | 695 the limit is _MAXIMUM_BUFFER_SIZE. |
753 storage_type (Optional[str]): storage type. | 696 storage_type (Optional[str]): storage type. |
754 | 697 |
755 Raises: | 698 Raises: |
756 ValueError: if the maximum buffer size value is out of bounds. | 699 ValueError: if the maximum buffer size value is out of bounds. |
757 """ | 700 """ |
758 if (maximum_buffer_size < 0 or | 701 if (maximum_buffer_size < 0 or |
759 maximum_buffer_size > self._MAXIMUM_BUFFER_SIZE): | 702 maximum_buffer_size > self._MAXIMUM_BUFFER_SIZE): |
760 raise ValueError(u'Maximum buffer size value out of bounds.') | 703 raise ValueError(u'Maximum buffer size value out of bounds.') |
761 | 704 |
762 if not maximum_buffer_size: | 705 if not maximum_buffer_size: |
763 maximum_buffer_size = self._MAXIMUM_BUFFER_SIZE | 706 maximum_buffer_size = self._MAXIMUM_BUFFER_SIZE |
764 | 707 |
765 super(ZIPStorageFile, self).__init__() | 708 super(ZIPStorageFile, self).__init__() |
766 self._analysis_report_stream_number = 0 | 709 self._analysis_report_stream_number = 0 |
767 self._event_offset_tables = {} | |
768 self._event_offset_tables_lfu = [] | |
769 self._event_stream_number = 1 | |
770 self._event_streams = {} | |
771 self._event_source_offset_tables = {} | |
772 self._event_source_offset_tables_lfu = [] | |
773 self._event_source_streams = {} | |
774 self._event_sources_in_stream = [] | 710 self._event_sources_in_stream = [] |
775 self._event_tag_index = None | 711 self._event_tag_index = None |
776 self._event_tag_offset_tables = {} | |
777 self._event_tag_offset_tables_lfu = [] | |
778 self._event_tag_streams = {} | |
779 self._event_timestamp_tables = {} | 712 self._event_timestamp_tables = {} |
780 self._event_timestamp_tables_lfu = [] | 713 self._event_timestamp_tables_lfu = [] |
781 self._event_heap = None | 714 self._event_heap = None |
782 self._last_preprocess = 0 | 715 self._last_preprocess = 0 |
783 self._last_session = 0 | 716 self._last_session = 0 |
| 717 self._last_stream_numbers = {} |
784 self._last_task = 0 | 718 self._last_task = 0 |
785 self._maximum_buffer_size = maximum_buffer_size | 719 self._maximum_buffer_size = maximum_buffer_size |
| 720 self._offset_tables = {} |
| 721 self._offset_tables_lfu = [] |
| 722 self._path = None |
| 723 self._serialized_event_heap = _SerializedEventHeap() |
786 self._serialized_event_tags = [] | 724 self._serialized_event_tags = [] |
787 self._serialized_event_tags_size = 0 | 725 self._serialized_event_tags_size = 0 |
788 self._serialized_events_heap = _SerializedEventsHeap() | 726 self._streams = {} |
789 self._stream_numbers = {} | 727 self._streams_lfu = [] |
790 self._path = None | 728 self._temporary_path = None |
791 self._zipfile = None | 729 self._zipfile = None |
792 self._zipfile_path = None | 730 self._zipfile_path = None |
793 | 731 |
794 self.format_version = self._FORMAT_VERSION | 732 self.format_version = self._FORMAT_VERSION |
795 self.serialization_format = definitions.SERIALIZER_FORMAT_JSON | 733 self.serialization_format = definitions.SERIALIZER_FORMAT_JSON |
796 self.storage_type = storage_type | 734 self.storage_type = storage_type |
797 | 735 |
| 736 def __del__(self): |
| 737 """Clean up.""" |
| 738 if hasattr(self, u'_temporary_path'): |
| 739 if self._temporary_path and os.path.exists(self._temporary_path): |
| 740 try: |
| 741 os.rmdir(self._temporary_path) |
| 742 self._temporary_path = None |
| 743 except (IOError, OSError): |
| 744 pass |
| 745 |
798 def _AddAttributeContainer(self, container_type, attribute_container): | 746 def _AddAttributeContainer(self, container_type, attribute_container): |
799 """Adds an atttibute container. | 747 """Adds an atttibute container. |
800 | 748 |
801 Args: | 749 Args: |
802 container_type (str): attribute container type. | 750 container_type (str): attribute container type. |
803 attribute_container (AttributeContainer): attribute container. | 751 attribute_container (AttributeContainer): attribute container. |
804 | |
805 Raises: | |
806 IOError: when the storage file is closed or read-only or | |
807 if the attribute container cannot be serialized. | |
808 """ | 752 """ |
809 if not self._is_open: | |
810 raise IOError(u'Unable to write to closed storage file.') | |
811 | |
812 if self._read_only: | |
813 raise IOError(u'Unable to write to read-only storage file.') | |
814 | |
815 container_list = self._GetSerializedAttributeContainerList( | 753 container_list = self._GetSerializedAttributeContainerList( |
816 container_type) | 754 container_type) |
817 | 755 |
818 stream_number = self._stream_numbers[container_type] | 756 stream_number = self._last_stream_numbers[container_type] |
819 identifier = identifiers.SerializedStreamIdentifier( | 757 identifier = identifiers.SerializedStreamIdentifier( |
820 stream_number, container_list.number_of_attribute_containers) | 758 stream_number, container_list.number_of_attribute_containers) |
821 attribute_container.SetIdentifier(identifier) | 759 attribute_container.SetIdentifier(identifier) |
822 | 760 |
823 # We try to serialize the error first, so we can skip some | |
824 # processing if it is invalid. | |
825 serialized_data = self._SerializeAttributeContainer(attribute_container) | 761 serialized_data = self._SerializeAttributeContainer(attribute_container) |
826 | 762 |
827 container_list.PushAttributeContainer(serialized_data) | 763 container_list.PushAttributeContainer(serialized_data) |
828 | 764 |
829 if container_list.data_size > self._maximum_buffer_size: | 765 if container_list.data_size > self._maximum_buffer_size: |
830 self._WriteSerializedAttributeContainerList(container_type) | 766 self._WriteSerializedAttributeContainerList(container_type) |
831 | 767 |
832 def _BuildEventTagIndex(self): | 768 def _AddSerializedEvent(self, event): |
833 """Builds the event tag index. | 769 """Adds an serialized event. |
| 770 |
| 771 Args: |
| 772 event (EventObject): event. |
834 | 773 |
835 Raises: | 774 Raises: |
836 IOError: if a stream is missing. | 775 IOError: if the event cannot be serialized. |
837 """ | 776 """ |
| 777 identifier = identifiers.SerializedStreamIdentifier( |
| 778 self._last_stream_numbers[u'event'], |
| 779 self._serialized_event_heap.number_of_events) |
| 780 event.SetIdentifier(identifier) |
| 781 |
| 782 serialized_data = self._SerializeAttributeContainer(event) |
| 783 |
| 784 self._serialized_event_heap.PushEvent(event.timestamp, serialized_data) |
| 785 |
| 786 if self._serialized_event_heap.data_size > self._maximum_buffer_size: |
| 787 self._WriteSerializedEvents() |
| 788 |
| 789 def _BuildEventTagIndex(self): |
| 790 """Builds the event tag index.""" |
838 self._event_tag_index = {} | 791 self._event_tag_index = {} |
839 for event_tag in self.GetEventTags(): | 792 for event_tag in self.GetEventTags(): |
840 event_identifier = event_tag.GetEventIdentifier() | 793 event_identifier = event_tag.GetEventIdentifier() |
841 lookup_key = event_identifier.CopyToString() | 794 lookup_key = event_identifier.CopyToString() |
842 self._event_tag_index[lookup_key] = event_tag.GetIdentifier() | 795 self._event_tag_index[lookup_key] = event_tag.GetIdentifier() |
843 | 796 |
| 797 @classmethod |
| 798 def _CheckStorageMetadata(cls, storage_metadata): |
| 799 """Checks the storage metadata. |
| 800 |
| 801 Args: |
| 802 storage_metadata (_StorageMetadata): storage metadata. |
| 803 |
| 804 Raises: |
| 805 IOError: if the format version or the serializer format is not supported. |
| 806 """ |
| 807 if not storage_metadata.format_version: |
| 808 raise IOError(u'Missing format version.') |
| 809 |
| 810 if storage_metadata.format_version < cls._COMPATIBLE_FORMAT_VERSION: |
| 811 raise IOError( |
| 812 u'Format version: {0:d} is too old and no longer supported.'.format( |
| 813 storage_metadata.format_version)) |
| 814 |
| 815 if storage_metadata.format_version > cls._FORMAT_VERSION: |
| 816 raise IOError( |
| 817 u'Format version: {0:d} is too new and not yet supported.'.format( |
| 818 storage_metadata.format_version)) |
| 819 |
| 820 serialization_format = storage_metadata.serialization_format |
| 821 if serialization_format != definitions.SERIALIZER_FORMAT_JSON: |
| 822 raise IOError(u'Unsupported serialization format: {0:s}'.format( |
| 823 serialization_format)) |
| 824 |
| 825 if storage_metadata.storage_type not in definitions.STORAGE_TYPES: |
| 826 raise IOError(u'Unsupported storage type: {0:s}'.format( |
| 827 storage_metadata.storage_type)) |
| 828 |
844 def _FillEventHeapFromStream(self, stream_number): | 829 def _FillEventHeapFromStream(self, stream_number): |
845 """Fills the event heap with the next events from the stream. | 830 """Fills the event heap with the next events from the stream. |
846 | 831 |
847 This function will read events starting at the current stream entry that | 832 This function will read events starting at the current stream entry that |
848 have the same timestamp and adds them to the heap. This ensures that the | 833 have the same timestamp and adds them to the heap. This ensures that the |
849 sorting order of events with the same timestamp is consistent. | 834 sorting order of events with the same timestamp is consistent. |
850 | 835 |
851 Except for the last event, all newly added events will have the same | 836 Except for the last event, all newly added events will have the same |
852 timestamp. | 837 timestamp. |
853 | 838 |
854 Args: | 839 Args: |
855 stream_number (int): serialized data stream number. | 840 stream_number (int): serialized data stream number. |
856 """ | 841 """ |
857 event = self._GetEvent(stream_number) | 842 event = self._GetEvent(stream_number) |
858 if not event: | 843 if not event: |
859 return | 844 return |
860 | 845 |
861 self._event_heap.PushEvent(event) | 846 self._event_heap.PushEvent(event) |
862 | 847 |
863 reference_timestamp = event.timestamp | 848 reference_timestamp = event.timestamp |
864 while event.timestamp == reference_timestamp: | 849 while event.timestamp == reference_timestamp: |
865 event = self._GetEvent(stream_number) | 850 event = self._GetEvent(stream_number) |
866 if not event: | 851 if not event: |
867 break | 852 break |
868 | 853 |
869 self._event_heap.PushEvent(event) | 854 self._event_heap.PushEvent(event) |
870 | 855 |
| 856 def _GetAttributeContainer( |
| 857 self, container_type, stream_number, entry_index=NEXT_AVAILABLE_ENTRY): |
| 858 """Reads an attribute container from a specific stream. |
| 859 |
| 860 Args: |
| 861 container_type (str): attribute container type. |
| 862 stream_number (int): number of the serialized event source stream. |
| 863 entry_index (Optional[int]): number of the serialized event source |
| 864 within the stream, where NEXT_AVAILABLE_ENTRY represents the next |
| 865 available event |
| 866 source. |
| 867 |
| 868 Returns: |
| 869 AttributeContainer: attribute container or None if not available. |
| 870 """ |
| 871 serialized_data, entry_index = self._GetSerializedAttributeContainerData( |
| 872 container_type, stream_number, entry_index=entry_index) |
| 873 if not serialized_data: |
| 874 return |
| 875 |
| 876 attribute_container = self._DeserializeAttributeContainer( |
| 877 container_type, serialized_data) |
| 878 |
| 879 if attribute_container: |
| 880 identifier = identifiers.SerializedStreamIdentifier( |
| 881 stream_number, entry_index) |
| 882 attribute_container.SetIdentifier(identifier) |
| 883 |
| 884 return attribute_container |
| 885 |
| 886 def _GetAttributeContainerByIndex(self, container_type, index): |
| 887 """Retrieves a specific attribute container. |
| 888 |
| 889 Args: |
| 890 container_type (str): attribute container type. |
| 891 index (int): attribute container index. |
| 892 |
| 893 Returns: |
| 894 AttributeContainer: attribute container or None if not available. |
| 895 """ |
| 896 last_stream_number = self._last_stream_numbers[container_type] |
| 897 |
| 898 stream_number = 1 |
| 899 while stream_number < last_stream_number: |
| 900 if stream_number <= len(self._event_sources_in_stream): |
| 901 number_of_entries = self._event_sources_in_stream[ |
| 902 stream_number - 1] |
| 903 |
| 904 else: |
| 905 offset_table = self._GetSerializedDataOffsetTable( |
| 906 container_type, stream_number) |
| 907 number_of_entries = offset_table.number_of_offsets |
| 908 self._event_sources_in_stream.append(number_of_entries) |
| 909 |
| 910 if index < number_of_entries: |
| 911 break |
| 912 |
| 913 index -= number_of_entries |
| 914 stream_number += 1 |
| 915 |
| 916 if stream_number < last_stream_number: |
| 917 stream_name = u'{0:s}_data.{1:06}'.format( |
| 918 self._STREAM_NAME_PREFIXES[container_type], stream_number) |
| 919 if not self._HasStream(stream_name): |
| 920 raise IOError(u'No such stream: {0:s}'.format(stream_name)) |
| 921 |
| 922 offset_table = self._GetSerializedDataOffsetTable( |
| 923 container_type, stream_number) |
| 924 stream_offset = offset_table.GetOffset(index) |
| 925 |
| 926 data_stream = _SerializedDataStream( |
| 927 self._zipfile, self._temporary_path, stream_name) |
| 928 data_stream.SeekEntryAtOffset(index, stream_offset) |
| 929 |
| 930 attribute_container = self._ReadAttributeContainerFromStreamEntry( |
| 931 data_stream, container_type) |
| 932 |
| 933 if attribute_container: |
| 934 identifier = identifiers.SerializedStreamIdentifier( |
| 935 stream_number, index) |
| 936 attribute_container.SetIdentifier(identifier) |
| 937 |
| 938 return attribute_container |
| 939 |
| 940 serialized_data = self._GetSerializedAttributeContainerByIndex( |
| 941 container_type, index) |
| 942 attribute_container = self._DeserializeAttributeContainer( |
| 943 container_type, serialized_data) |
| 944 |
| 945 if attribute_container: |
| 946 identifier = identifiers.SerializedStreamIdentifier( |
| 947 stream_number, index) |
| 948 attribute_container.SetIdentifier(identifier) |
| 949 |
| 950 return attribute_container |
| 951 |
| 952 def _GetAttributeContainers(self, container_type): |
| 953 """Retrieves attribute containers. |
| 954 |
| 955 Args: |
| 956 container_type (str): attribute container type. |
| 957 |
| 958 Yields: |
| 959 AttributeContainer: attribute container. |
| 960 |
| 961 Raises: |
| 962 IOError: if a stream is missing. |
| 963 """ |
| 964 last_stream_number = self._last_stream_numbers[container_type] |
| 965 for stream_number in range(1, last_stream_number): |
| 966 stream_name = u'{0:s}_data.{1:06}'.format( |
| 967 self._STREAM_NAME_PREFIXES[container_type], stream_number) |
| 968 if not self._HasStream(stream_name): |
| 969 raise IOError(u'No such stream: {0:s}'.format(stream_name)) |
| 970 |
| 971 data_stream = _SerializedDataStream( |
| 972 self._zipfile, self._temporary_path, stream_name) |
| 973 |
| 974 generator = self._ReadAttributeContainersFromStream( |
| 975 data_stream, container_type) |
| 976 for entry_index, attribute_container in enumerate(generator): |
| 977 identifier = identifiers.SerializedStreamIdentifier( |
| 978 stream_number, entry_index) |
| 979 attribute_container.SetIdentifier(identifier) |
| 980 yield attribute_container |
| 981 |
871 def _GetEvent(self, stream_number, entry_index=NEXT_AVAILABLE_ENTRY): | 982 def _GetEvent(self, stream_number, entry_index=NEXT_AVAILABLE_ENTRY): |
872 """Reads an event from a specific stream. | 983 """Reads an event from a specific stream. |
873 | 984 |
874 Args: | 985 Args: |
875 stream_number (int): number of the serialized event stream. | 986 stream_number (int): number of the serialized event stream. |
876 entry_index (Optional[int]): number of the serialized event within | 987 entry_index (Optional[int]): number of the serialized event within |
877 the stream, where NEXT_AVAILABLE_ENTRY represents the next available | 988 the stream, where NEXT_AVAILABLE_ENTRY represents the next available |
878 event. | 989 event. |
879 | 990 |
880 Returns: | 991 Returns: |
881 EventObject: event or None. | 992 EventObject: an event or None if not available. |
882 """ | 993 """ |
883 event_data, entry_index = self._GetEventSerializedData( | 994 event_data, entry_index = self._GetEventSerializedData( |
884 stream_number, entry_index=entry_index) | 995 stream_number, entry_index=entry_index) |
885 if not event_data: | 996 if not event_data: |
886 return | 997 return |
887 | 998 |
888 event = self._DeserializeAttributeContainer(u'event', event_data) | 999 event = self._DeserializeAttributeContainer(u'event', event_data) |
889 if event: | 1000 if not event: |
890 event_identifier = identifiers.SerializedStreamIdentifier( | 1001 return |
891 stream_number, entry_index) | 1002 |
892 event.SetIdentifier(event_identifier) | 1003 event_identifier = identifiers.SerializedStreamIdentifier( |
| 1004 stream_number, entry_index) |
| 1005 event.SetIdentifier(event_identifier) |
| 1006 |
| 1007 if (hasattr(event, u'event_data_stream_number') and |
| 1008 hasattr(event, u'event_data_entry_index')): |
| 1009 event_data_identifier = identifiers.SerializedStreamIdentifier( |
| 1010 event.event_data_stream_number, event.event_data_entry_index) |
| 1011 event.SetEventDataIdentifier(event_data_identifier) |
| 1012 |
| 1013 del event.event_data_stream_number |
| 1014 del event.event_data_entry_index |
| 1015 |
893 return event | 1016 return event |
894 | 1017 |
895 def _GetEventSerializedData( | 1018 def _GetEventSerializedData( |
896 self, stream_number, entry_index=NEXT_AVAILABLE_ENTRY): | 1019 self, stream_number, entry_index=NEXT_AVAILABLE_ENTRY): |
897 """Retrieves specific event serialized data. | 1020 """Retrieves specific event serialized data. |
898 | 1021 |
899 By default the first available entry in the specific serialized stream | 1022 By default the first available entry in the specific serialized stream |
900 is read, however any entry can be read using the index stream. | 1023 is read, however any entry can be read using the index stream. |
901 | 1024 |
902 Args: | 1025 Args: |
(...skipping 11 matching lines...) Expand all Loading... |
914 Raises: | 1037 Raises: |
915 IOError: if the stream cannot be opened. | 1038 IOError: if the stream cannot be opened. |
916 ValueError: if the stream number or entry index is out of bounds. | 1039 ValueError: if the stream number or entry index is out of bounds. |
917 """ | 1040 """ |
918 if stream_number is None: | 1041 if stream_number is None: |
919 raise ValueError(u'Invalid stream number.') | 1042 raise ValueError(u'Invalid stream number.') |
920 | 1043 |
921 if entry_index is None: | 1044 if entry_index is None: |
922 raise ValueError(u'Invalid entry index.') | 1045 raise ValueError(u'Invalid entry index.') |
923 | 1046 |
924 if stream_number < 1 or stream_number > self._event_stream_number: | 1047 if stream_number < 1 or stream_number > self._last_stream_numbers[u'event']: |
925 raise ValueError(u'Stream number: {0:d} out of bounds.'.format( | 1048 raise ValueError(u'Stream number: {0:d} out of bounds.'.format( |
926 stream_number)) | 1049 stream_number)) |
927 | 1050 |
928 if entry_index < self.NEXT_AVAILABLE_ENTRY: | 1051 if entry_index < self.NEXT_AVAILABLE_ENTRY: |
929 raise ValueError(u'Entry index: {0:d} out of bounds.'.format( | 1052 raise ValueError(u'Entry index: {0:d} out of bounds.'.format( |
930 entry_index)) | 1053 entry_index)) |
931 | 1054 |
932 try: | 1055 try: |
933 data_stream = self._GetSerializedEventStream(stream_number) | 1056 data_stream = self._GetSerializedDataStream(u'event', stream_number) |
934 except IOError as exception: | 1057 except IOError as exception: |
935 logging.error(( | 1058 logging.error(( |
936 u'Unable to retrieve serialized data steam: {0:d} ' | 1059 u'Unable to retrieve serialized data steam: {0:d} ' |
937 u'with error: {1:s}.').format(stream_number, exception)) | 1060 u'with error: {1:s}.').format(stream_number, exception)) |
938 return None, None | 1061 return None, None |
939 | 1062 |
940 if entry_index >= 0: | 1063 if entry_index >= 0: |
941 try: | 1064 try: |
942 offset_table = self._GetSerializedEventOffsetTable(stream_number) | 1065 offset_table = self._GetSerializedDataOffsetTable( |
| 1066 u'event', stream_number) |
943 stream_offset = offset_table.GetOffset(entry_index) | 1067 stream_offset = offset_table.GetOffset(entry_index) |
944 except (IndexError, IOError): | 1068 except (IndexError, IOError): |
945 logging.error(( | 1069 logging.error(( |
946 u'Unable to read entry index: {0:d} from serialized data stream: ' | 1070 u'Unable to read entry index: {0:d} from serialized data stream: ' |
947 u'{1:d}').format(entry_index, stream_number)) | 1071 u'{1:d}').format(entry_index, stream_number)) |
948 return None, None | 1072 return None, None |
949 | 1073 |
950 data_stream.SeekEntryAtOffset(entry_index, stream_offset) | 1074 data_stream.SeekEntryAtOffset(entry_index, stream_offset) |
951 | 1075 |
952 event_entry_index = data_stream.entry_index | 1076 event_entry_index = data_stream.entry_index |
(...skipping 13 matching lines...) Expand all Loading... |
966 Args: | 1090 Args: |
967 stream_number (int): number of the serialized event source stream. | 1091 stream_number (int): number of the serialized event source stream. |
968 entry_index (Optional[int]): number of the serialized event source | 1092 entry_index (Optional[int]): number of the serialized event source |
969 within the stream, where NEXT_AVAILABLE_ENTRY represents the next | 1093 within the stream, where NEXT_AVAILABLE_ENTRY represents the next |
970 available event | 1094 available event |
971 source. | 1095 source. |
972 | 1096 |
973 Returns: | 1097 Returns: |
974 EventSource: event source or None. | 1098 EventSource: event source or None. |
975 """ | 1099 """ |
976 event_source_data, entry_index = self._GetEventSourceSerializedData( | 1100 event_source_data, entry_index = self._GetSerializedAttributeContainerData( |
977 stream_number, entry_index=entry_index) | 1101 u'event_source', stream_number, entry_index=entry_index) |
978 if not event_source_data: | 1102 if not event_source_data: |
979 return | 1103 return |
980 | 1104 |
981 event_source = self._DeserializeAttributeContainer( | 1105 event_source = self._DeserializeAttributeContainer( |
982 u'event_source', event_source_data) | 1106 u'event_source', event_source_data) |
983 if event_source: | 1107 if event_source: |
984 event_source_identifier = identifiers.SerializedStreamIdentifier( | 1108 event_source_identifier = identifiers.SerializedStreamIdentifier( |
985 stream_number, entry_index) | 1109 stream_number, entry_index) |
986 event_source.SetIdentifier(event_source_identifier) | 1110 event_source.SetIdentifier(event_source_identifier) |
987 return event_source | 1111 return event_source |
988 | 1112 |
989 def _GetEventSourceSerializedData( | |
990 self, stream_number, entry_index=NEXT_AVAILABLE_ENTRY): | |
991 """Retrieves specific event source serialized data. | |
992 | |
993 By default the first available entry in the specific serialized stream | |
994 is read, however any entry can be read using the index stream. | |
995 | |
996 Args: | |
997 stream_number (int): number of the serialized event source stream. | |
998 entry_index (Optional[int]): number of the serialized event source | |
999 within the stream, where NEXT_AVAILABLE_ENTRY represents the next | |
1000 available event source. | |
1001 | |
1002 Returns: | |
1003 tuple: contains: | |
1004 | |
1005 bytes: event source serialized data. | |
1006 int: entry index of the event source within the stream. | |
1007 | |
1008 Raises: | |
1009 IOError: if the stream cannot be opened. | |
1010 ValueError: if the stream number or entry index is out of bounds. | |
1011 """ | |
1012 event_source_stream_number = self._stream_numbers[u'event_source'] | |
1013 | |
1014 if stream_number < 1 or stream_number > event_source_stream_number: | |
1015 raise ValueError(u'Stream number out of bounds.') | |
1016 | |
1017 if entry_index < self.NEXT_AVAILABLE_ENTRY: | |
1018 raise ValueError(u'Entry index out of bounds.') | |
1019 | |
1020 if stream_number == event_source_stream_number: | |
1021 if entry_index < 0: | |
1022 raise ValueError(u'Entry index out of bounds.') | |
1023 | |
1024 event_source_data = self._GetSerializedAttributeContainerByIndex( | |
1025 u'event_source', entry_index) | |
1026 return event_source_data, entry_index | |
1027 | |
1028 try: | |
1029 data_stream = self._GetSerializedEventSourceStream(stream_number) | |
1030 except IOError as exception: | |
1031 logging.error(( | |
1032 u'Unable to retrieve serialized data steam: {0:d} ' | |
1033 u'with error: {1:s}.').format(stream_number, exception)) | |
1034 return None, None | |
1035 | |
1036 if entry_index >= 0: | |
1037 try: | |
1038 offset_table = self._GetSerializedEventSourceOffsetTable(stream_number) | |
1039 stream_offset = offset_table.GetOffset(entry_index) | |
1040 except (IOError, IndexError): | |
1041 logging.error(( | |
1042 u'Unable to read entry index: {0:d} from serialized data stream: ' | |
1043 u'{1:d}').format(entry_index, stream_number)) | |
1044 return None, None | |
1045 | |
1046 data_stream.SeekEntryAtOffset(entry_index, stream_offset) | |
1047 | |
1048 event_source_entry_index = data_stream.entry_index | |
1049 try: | |
1050 event_source_data = data_stream.ReadEntry() | |
1051 except IOError as exception: | |
1052 logging.error(( | |
1053 u'Unable to read entry from serialized data steam: {0:d} ' | |
1054 u'with error: {1:s}.').format(stream_number, exception)) | |
1055 return None, None | |
1056 | |
1057 return event_source_data, event_source_entry_index | |
1058 | |
1059 def _GetEventTag(self, stream_number, entry_index=NEXT_AVAILABLE_ENTRY): | 1113 def _GetEventTag(self, stream_number, entry_index=NEXT_AVAILABLE_ENTRY): |
1060 """Reads an event tag from a specific stream. | 1114 """Reads an event tag from a specific stream. |
1061 | 1115 |
1062 Args: | 1116 Args: |
1063 stream_number (int): number of the serialized event tag stream. | 1117 stream_number (int): number of the serialized event tag stream. |
1064 entry_index (Optional[int]): number of the serialized event tag | 1118 entry_index (Optional[int]): number of the serialized event tag |
1065 within the stream, where NEXT_AVAILABLE_ENTRY represents | 1119 within the stream, where NEXT_AVAILABLE_ENTRY represents |
1066 the next available event tag. | 1120 the next available event tag. |
1067 | 1121 |
1068 Returns: | 1122 Returns: |
1069 EventTag: event tag or None. | 1123 EventTag: event tag or None. |
1070 """ | 1124 """ |
1071 event_tag_data, entry_index = self._GetEventTagSerializedData( | 1125 event_tag_data, entry_index = self._GetSerializedAttributeContainerData( |
1072 stream_number, entry_index=entry_index) | 1126 u'event_tag', stream_number, entry_index=entry_index) |
1073 if not event_tag_data: | 1127 if not event_tag_data: |
1074 return | 1128 return |
1075 | 1129 |
1076 event_tag = self._DeserializeAttributeContainer( | 1130 event_tag = self._DeserializeAttributeContainer( |
1077 u'event_tag', event_tag_data) | 1131 u'event_tag', event_tag_data) |
1078 if event_tag: | 1132 if event_tag: |
1079 event_tag_identifier = identifiers.SerializedStreamIdentifier( | 1133 event_tag_identifier = identifiers.SerializedStreamIdentifier( |
1080 stream_number, entry_index) | 1134 stream_number, entry_index) |
1081 event_tag.SetIdentifier(event_tag_identifier) | 1135 event_tag.SetIdentifier(event_tag_identifier) |
1082 | 1136 |
1083 event_identifier = identifiers.SerializedStreamIdentifier( | 1137 event_identifier = identifiers.SerializedStreamIdentifier( |
1084 event_tag.event_stream_number, event_tag.event_entry_index) | 1138 event_tag.event_stream_number, event_tag.event_entry_index) |
1085 event_tag.SetEventIdentifier(event_identifier) | 1139 event_tag.SetEventIdentifier(event_identifier) |
1086 | 1140 |
| 1141 del event_tag.event_stream_number |
| 1142 del event_tag.event_entry_index |
| 1143 |
1087 return event_tag | 1144 return event_tag |
1088 | 1145 |
1089 def _GetEventTagByIdentifier(self, event_identifier): | 1146 def _GetEventTagByIdentifier(self, event_identifier): |
1090 """Retrieves an event tag by the event identifier. | 1147 """Retrieves an event tag by the event identifier. |
1091 | 1148 |
1092 Args: | 1149 Args: |
1093 event_identifier (AttributeContainerIdentifier): event attribute | 1150 event_identifier (AttributeContainerIdentifier): event attribute |
1094 container identifier. | 1151 container identifier. |
1095 | 1152 |
1096 Returns: | 1153 Returns: |
1097 EventTag: event tag or None. | 1154 EventTag: event tag or None. |
1098 | 1155 |
1099 Raises: | 1156 Raises: |
1100 IOError: if the event tag data stream cannot be opened. | 1157 IOError: if the event tag data stream cannot be opened. |
1101 """ | 1158 """ |
1102 if not self._event_tag_index: | 1159 if not self._event_tag_index: |
1103 self._BuildEventTagIndex() | 1160 self._BuildEventTagIndex() |
1104 | 1161 |
1105 lookup_key = event_identifier.CopyToString() | 1162 lookup_key = event_identifier.CopyToString() |
1106 event_tag_identifier = self._event_tag_index.get(lookup_key, None) | 1163 event_tag_identifier = self._event_tag_index.get(lookup_key, None) |
1107 if not event_tag_identifier: | 1164 if not event_tag_identifier: |
1108 return | 1165 return |
1109 | 1166 |
1110 return self._GetEventTag( | 1167 return self._GetEventTag( |
1111 event_tag_identifier.stream_number, | 1168 event_tag_identifier.stream_number, |
1112 entry_index=event_tag_identifier.entry_index) | 1169 entry_index=event_tag_identifier.entry_index) |
1113 | 1170 |
1114 def _GetEventTagSerializedData( | 1171 def _GetLastStreamNumber(self, stream_name_prefix): |
1115 self, stream_number, entry_index=NEXT_AVAILABLE_ENTRY): | 1172 """Retrieves the last stream number. |
1116 """Retrieves specific event tag serialized data. | 1173 |
| 1174 Args: |
| 1175 stream_name_prefix (str): stream name prefix. |
| 1176 |
| 1177 Returns: |
| 1178 int: last stream number. |
| 1179 |
| 1180 Raises: |
| 1181 IOError: if the stream number format is not supported. |
| 1182 """ |
| 1183 last_stream_number = 0 |
| 1184 for stream_name in self._GetStreamNames(): |
| 1185 if stream_name.startswith(stream_name_prefix): |
| 1186 _, _, stream_number = stream_name.partition(u'.') |
| 1187 |
| 1188 try: |
| 1189 stream_number = int(stream_number, 10) |
| 1190 except ValueError: |
| 1191 raise IOError( |
| 1192 u'Unsupported stream number: {0:s}'.format(stream_number)) |
| 1193 |
| 1194 if stream_number > last_stream_number: |
| 1195 last_stream_number = stream_number |
| 1196 |
| 1197 return last_stream_number + 1 |
| 1198 |
| 1199 def _GetSerializedAttributeContainerData( |
| 1200 self, container_type, stream_number, entry_index=NEXT_AVAILABLE_ENTRY): |
| 1201 """Retrieves the serialized data of a specific attribute container. |
1117 | 1202 |
1118 By default the first available entry in the specific serialized stream | 1203 By default the first available entry in the specific serialized stream |
1119 is read, however any entry can be read using the index stream. | 1204 is read, however any entry can be read using the index stream. |
1120 | 1205 |
1121 Args: | 1206 Args: |
1122 stream_number (int): number of the serialized event tag stream. | 1207 container_type (str): attribute container type. |
1123 entry_index (Optional[int]): number of the serialized event tag | 1208 stream_number (int): number of the serialized attribute container stream. |
1124 within the stream, where NEXT_AVAILABLE_ENTRY represents | 1209 entry_index (Optional[int]): number of the serialized attribute container |
1125 the next available event tag. | 1210 within the stream, where NEXT_AVAILABLE_ENTRY represents the next |
| 1211 available attribute container. |
1126 | 1212 |
1127 Returns: | 1213 Returns: |
1128 tuple: contains: | 1214 tuple: contains: |
1129 | 1215 |
1130 bytes: event tag serialized data. | 1216 bytes: attribute container serialized data. |
1131 int: entry index of the event tag within the stream. | 1217 int: entry index of the attribute container within the stream. |
1132 | 1218 |
1133 Raises: | 1219 Raises: |
1134 IOError: if the stream cannot be opened. | 1220 IOError: if the stream cannot be opened. |
1135 ValueError: if the stream number or entry index is out of bounds. | 1221 ValueError: if the stream number or entry index is out of bounds. |
1136 """ | 1222 """ |
1137 event_tag_stream_number = self._stream_numbers[u'event_tag'] | 1223 last_stream_number = self._last_stream_numbers[container_type] |
1138 | 1224 |
1139 if stream_number < 1 or stream_number > event_tag_stream_number: | 1225 if stream_number < 1 or stream_number > last_stream_number: |
1140 raise ValueError(u'Stream number out of bounds.') | 1226 raise ValueError(u'Stream number out of bounds.') |
1141 | 1227 |
1142 if entry_index < self.NEXT_AVAILABLE_ENTRY: | 1228 if entry_index < self.NEXT_AVAILABLE_ENTRY: |
1143 raise ValueError(u'Entry index out of bounds.') | 1229 raise ValueError(u'Entry index out of bounds.') |
1144 | 1230 |
1145 if stream_number == event_tag_stream_number: | 1231 if stream_number == last_stream_number: |
1146 if entry_index < 0: | 1232 if entry_index < 0: |
1147 raise ValueError(u'Entry index out of bounds.') | 1233 raise ValueError(u'Entry index out of bounds.') |
1148 | 1234 |
1149 event_tag_data = self._GetSerializedAttributeContainerByIndex( | 1235 serialized_data = self._GetSerializedAttributeContainerByIndex( |
1150 u'event_tag', entry_index) | 1236 container_type, entry_index) |
1151 return event_tag_data, entry_index | 1237 return serialized_data, entry_index |
1152 | 1238 |
1153 try: | 1239 try: |
1154 data_stream = self._GetSerializedEventTagStream(stream_number) | 1240 data_stream = self._GetSerializedDataStream(container_type, stream_number) |
1155 except IOError as exception: | 1241 except IOError as exception: |
1156 logging.error(( | 1242 logging.error(( |
1157 u'Unable to retrieve serialized data steam: {0:d} ' | 1243 u'Unable to retrieve serialized data steam: {0:d} ' |
1158 u'with error: {1:s}.').format(stream_number, exception)) | 1244 u'with error: {1:s}.').format(stream_number, exception)) |
1159 return None, None | 1245 return None, None |
1160 | 1246 |
1161 if entry_index >= 0: | 1247 if entry_index >= 0: |
1162 try: | 1248 try: |
1163 offset_table = self._GetSerializedEventTagOffsetTable(stream_number) | 1249 offset_table = self._GetSerializedDataOffsetTable( |
| 1250 container_type, stream_number) |
1164 stream_offset = offset_table.GetOffset(entry_index) | 1251 stream_offset = offset_table.GetOffset(entry_index) |
1165 except (IOError, IndexError): | 1252 except (IOError, IndexError): |
1166 logging.error(( | 1253 logging.error(( |
1167 u'Unable to read entry index: {0:d} from serialized data stream: ' | 1254 u'Unable to read entry index: {0:d} from serialized data stream: ' |
1168 u'{1:d}').format(entry_index, stream_number)) | 1255 u'{1:d}').format(entry_index, stream_number)) |
1169 return None, None | 1256 return None, None |
1170 | 1257 |
1171 data_stream.SeekEntryAtOffset(entry_index, stream_offset) | 1258 data_stream.SeekEntryAtOffset(entry_index, stream_offset) |
1172 | 1259 |
1173 event_tag_entry_index = data_stream.entry_index | 1260 data_stream_entry_index = data_stream.entry_index |
1174 try: | 1261 try: |
1175 event_tag_data = data_stream.ReadEntry() | 1262 serialized_data = data_stream.ReadEntry() |
1176 except IOError as exception: | 1263 except IOError as exception: |
1177 logging.error(( | 1264 logging.error(( |
1178 u'Unable to read entry from serialized data steam: {0:d} ' | 1265 u'Unable to read entry from serialized data steam: {0:d} ' |
1179 u'with error: {1:s}.').format(stream_number, exception)) | 1266 u'with error: {1:s}.').format(stream_number, exception)) |
1180 return None, None | 1267 return None, None |
1181 | 1268 |
1182 return event_tag_data, event_tag_entry_index | 1269 return serialized_data, data_stream_entry_index |
1183 | 1270 |
1184 def _GetLastStreamNumber(self, stream_name_prefix): | 1271 def _GetSerializedDataStream(self, container_type, stream_number): |
1185 """Retrieves the last stream number. | |
1186 | |
1187 Args: | |
1188 stream_name_prefix (str): stream name prefix. | |
1189 | |
1190 Returns: | |
1191 int: last stream number. | |
1192 | |
1193 Raises: | |
1194 IOError: if the stream number format is not supported. | |
1195 """ | |
1196 last_stream_number = 0 | |
1197 for stream_name in self._GetStreamNames(): | |
1198 if stream_name.startswith(stream_name_prefix): | |
1199 _, _, stream_number = stream_name.partition(u'.') | |
1200 | |
1201 try: | |
1202 stream_number = int(stream_number, 10) | |
1203 except ValueError: | |
1204 raise IOError( | |
1205 u'Unsupported stream number: {0:s}'.format(stream_number)) | |
1206 | |
1207 if stream_number > last_stream_number: | |
1208 last_stream_number = stream_number | |
1209 | |
1210 return last_stream_number + 1 | |
1211 | |
1212 def _GetSerializedDataStream( | |
1213 self, streams_cache, stream_name_prefix, stream_number): | |
1214 """Retrieves the serialized data stream. | 1272 """Retrieves the serialized data stream. |
1215 | 1273 |
1216 Args: | 1274 Args: |
1217 streams_cache (dict): streams cache. | 1275 container_type (str): attribute container type. |
1218 stream_name_prefix (str): stream name prefix. | |
1219 stream_number (int): number of the stream. | 1276 stream_number (int): number of the stream. |
1220 | 1277 |
1221 Returns: | 1278 Returns: |
1222 _SerializedDataStream: serialized data stream. | 1279 _SerializedDataStream: serialized data stream. |
1223 | 1280 |
1224 Raises: | 1281 Raises: |
1225 IOError: if the stream cannot be opened. | 1282 IOError: if the stream cannot be opened. |
1226 """ | 1283 """ |
1227 data_stream = streams_cache.get(stream_number, None) | 1284 lookup_key = u'{0:s}.{1:d}'.format(container_type, stream_number) |
| 1285 |
| 1286 data_stream = self._streams.get(lookup_key, None) |
1228 if not data_stream: | 1287 if not data_stream: |
1229 stream_name = u'{0:s}.{1:06d}'.format(stream_name_prefix, stream_number) | 1288 stream_name = u'{0:s}_data.{1:06d}'.format( |
| 1289 self._STREAM_NAME_PREFIXES[container_type], stream_number) |
1230 if not self._HasStream(stream_name): | 1290 if not self._HasStream(stream_name): |
1231 raise IOError(u'No such stream: {0:s}'.format(stream_name)) | 1291 raise IOError(u'No such stream: {0:s}'.format(stream_name)) |
1232 | 1292 |
1233 data_stream = _SerializedDataStream( | 1293 data_stream = _SerializedDataStream( |
1234 self._zipfile, self._zipfile_path, stream_name) | 1294 self._zipfile, self._temporary_path, stream_name) |
1235 streams_cache[stream_number] = data_stream | 1295 |
| 1296 number_of_tables = len(self._streams) |
| 1297 if number_of_tables >= self._MAXIMUM_NUMBER_OF_CACHED_STREAMS: |
| 1298 lfu_lookup_key = self._streams_lfu.pop() |
| 1299 del self._streams[lfu_lookup_key] |
| 1300 |
| 1301 self._streams[lookup_key] = data_stream |
| 1302 |
| 1303 if lookup_key in self._streams_lfu: |
| 1304 lfu_index = self._streams_lfu.index(lookup_key) |
| 1305 self._streams_lfu.pop(lfu_index) |
| 1306 |
| 1307 self._streams_lfu.append(lookup_key) |
1236 | 1308 |
1237 return data_stream | 1309 return data_stream |
1238 | 1310 |
1239 def _GetSerializedDataOffsetTable( | 1311 def _GetSerializedDataOffsetTable(self, container_type, stream_number): |
1240 self, offset_tables_cache, offset_tables_lfu, stream_name_prefix, | |
1241 stream_number): | |
1242 """Retrieves the serialized data offset table. | 1312 """Retrieves the serialized data offset table. |
1243 | 1313 |
1244 Args: | 1314 Args: |
1245 offset_tables_cache (dict): offset tables cache. | 1315 container_type (str): attribute container type. |
1246 offset_tables_lfu (list[_SerializedDataOffsetTable]): least frequently | |
1247 used (LFU) offset tables. | |
1248 stream_name_prefix (str): stream name prefix. | |
1249 stream_number (int): number of the stream. | 1316 stream_number (int): number of the stream. |
1250 | 1317 |
1251 Returns: | 1318 Returns: |
1252 _SerializedDataOffsetTable: serialized data offset table. | 1319 _SerializedDataOffsetTable: serialized data offset table. |
1253 | 1320 |
1254 Raises: | 1321 Raises: |
1255 IOError: if the stream cannot be opened. | 1322 IOError: if the stream cannot be opened. |
1256 """ | 1323 """ |
1257 offset_table = offset_tables_cache.get(stream_number, None) | 1324 lookup_key = u'{0:s}.{1:d}'.format(container_type, stream_number) |
| 1325 |
| 1326 offset_table = self._offset_tables.get(lookup_key, None) |
1258 if not offset_table: | 1327 if not offset_table: |
1259 stream_name = u'{0:s}.{1:06d}'.format(stream_name_prefix, stream_number) | 1328 stream_name = u'{0:s}_index.{1:06d}'.format( |
| 1329 self._STREAM_NAME_PREFIXES[container_type], stream_number) |
1260 if not self._HasStream(stream_name): | 1330 if not self._HasStream(stream_name): |
1261 raise IOError(u'No such stream: {0:s}'.format(stream_name)) | 1331 raise IOError(u'No such stream: {0:s}'.format(stream_name)) |
1262 | 1332 |
1263 offset_table = _SerializedDataOffsetTable(self._zipfile, stream_name) | 1333 offset_table = _SerializedDataOffsetTable(self._zipfile, stream_name) |
1264 offset_table.Read() | 1334 offset_table.Read() |
1265 | 1335 |
1266 number_of_tables = len(offset_tables_cache) | 1336 number_of_tables = len(self._offset_tables) |
1267 if number_of_tables >= self._MAXIMUM_NUMBER_OF_CACHED_TABLES: | 1337 if number_of_tables >= self._MAXIMUM_NUMBER_OF_CACHED_TABLES: |
1268 lfu_stream_number = offset_tables_lfu.pop() | 1338 lfu_lookup_key = self._offset_tables_lfu.pop() |
1269 del offset_tables_cache[lfu_stream_number] | 1339 del self._offset_tables[lfu_lookup_key] |
1270 | 1340 |
1271 offset_tables_cache[stream_number] = offset_table | 1341 self._offset_tables[lookup_key] = offset_table |
1272 | 1342 |
1273 if stream_number in offset_tables_lfu: | 1343 if lookup_key in self._offset_tables_lfu: |
1274 lfu_index = offset_tables_lfu.index(stream_number) | 1344 lfu_index = self._offset_tables_lfu.index(lookup_key) |
1275 offset_tables_lfu.pop(lfu_index) | 1345 self._offset_tables_lfu.pop(lfu_index) |
1276 | 1346 |
1277 offset_tables_lfu.append(stream_number) | 1347 self._offset_tables_lfu.append(lookup_key) |
1278 | 1348 |
1279 return offset_table | 1349 return offset_table |
1280 | 1350 |
1281 def _GetSerializedDataStreamNumbers(self, stream_name_prefix): | 1351 def _GetSerializedDataStreamNumbers(self, stream_name_prefix): |
1282 """Retrieves the available serialized data stream numbers. | 1352 """Retrieves the available serialized data stream numbers. |
1283 | 1353 |
1284 Args: | 1354 Args: |
1285 stream_name_prefix (str): stream name prefix. | 1355 stream_name_prefix (str): stream name prefix. |
1286 | 1356 |
1287 Returns: | 1357 Returns: |
1288 list[int]: available serialized data stream numbers sorted numerically. | 1358 list[int]: available serialized data stream numbers sorted numerically. |
1289 """ | 1359 """ |
1290 stream_numbers = [] | 1360 stream_numbers = [] |
1291 for stream_name in self._zipfile.namelist(): | 1361 for stream_name in self._zipfile.namelist(): |
1292 if not stream_name.startswith(stream_name_prefix): | 1362 if not stream_name.startswith(stream_name_prefix): |
1293 continue | 1363 continue |
1294 | 1364 |
1295 _, _, stream_number = stream_name.partition(u'.') | 1365 _, _, stream_number = stream_name.partition(u'.') |
1296 try: | 1366 try: |
1297 stream_number = int(stream_number, 10) | 1367 stream_number = int(stream_number, 10) |
1298 stream_numbers.append(stream_number) | 1368 stream_numbers.append(stream_number) |
1299 except ValueError: | 1369 except ValueError: |
1300 logging.error( | 1370 logging.error( |
1301 u'Unable to determine stream number from stream: {0:s}'.format( | 1371 u'Unable to determine stream number from stream: {0:s}'.format( |
1302 stream_name)) | 1372 stream_name)) |
1303 | 1373 |
1304 return sorted(stream_numbers) | 1374 return sorted(stream_numbers) |
1305 | 1375 |
1306 def _GetSerializedEventOffsetTable(self, stream_number): | |
1307 """Retrieves the serialized event stream offset table. | |
1308 | |
1309 Args: | |
1310 stream_number (int): number of the stream. | |
1311 | |
1312 Returns: | |
1313 _SerializedDataOffsetTable: serialized data offset table. | |
1314 | |
1315 Raises: | |
1316 IOError: if the stream cannot be opened. | |
1317 """ | |
1318 return self._GetSerializedDataOffsetTable( | |
1319 self._event_offset_tables, self._event_offset_tables_lfu, | |
1320 u'event_index', stream_number) | |
1321 | |
1322 def _GetSerializedEventSourceOffsetTable(self, stream_number): | |
1323 """Retrieves the serialized event source stream offset table. | |
1324 | |
1325 Args: | |
1326 stream_number (int): number of the stream. | |
1327 | |
1328 Returns: | |
1329 _SerializedDataOffsetTable: serialized data offset table. | |
1330 | |
1331 Raises: | |
1332 IOError: if the stream cannot be opened. | |
1333 """ | |
1334 return self._GetSerializedDataOffsetTable( | |
1335 self._event_source_offset_tables, self._event_source_offset_tables_lfu, | |
1336 u'event_source_index', stream_number) | |
1337 | |
1338 def _GetSerializedEventSourceStream(self, stream_number): | |
1339 """Retrieves the serialized event source stream. | |
1340 | |
1341 Args: | |
1342 stream_number (int): number of the stream. | |
1343 | |
1344 Returns: | |
1345 _SerializedDataStream: serialized data stream. | |
1346 | |
1347 Raises: | |
1348 IOError: if the stream cannot be opened. | |
1349 """ | |
1350 return self._GetSerializedDataStream( | |
1351 self._event_source_streams, u'event_source_data', stream_number) | |
1352 | |
1353 def _GetSerializedEventTagStream(self, stream_number): | |
1354 """Retrieves the serialized event tag stream. | |
1355 | |
1356 Args: | |
1357 stream_number (int): number of the stream. | |
1358 | |
1359 Returns: | |
1360 _SerializedDataStream: serialized data stream. | |
1361 | |
1362 Raises: | |
1363 IOError: if the stream cannot be opened. | |
1364 """ | |
1365 return self._GetSerializedDataStream( | |
1366 self._event_tag_streams, u'event_tag_data', stream_number) | |
1367 | |
1368 def _GetSerializedEventStream(self, stream_number): | |
1369 """Retrieves the serialized event stream. | |
1370 | |
1371 Args: | |
1372 stream_number (int): number of the stream. | |
1373 | |
1374 Returns: | |
1375 _SerializedDataStream: serialized data stream. | |
1376 | |
1377 Raises: | |
1378 IOError: if the stream cannot be opened. | |
1379 """ | |
1380 return self._GetSerializedDataStream( | |
1381 self._event_streams, u'event_data', stream_number) | |
1382 | |
1383 def _GetSerializedEventSourceStreamNumbers(self): | |
1384 """Retrieves the available serialized event source stream numbers. | |
1385 | |
1386 Returns: | |
1387 list[int]: available serialized data stream numbers sorted numerically. | |
1388 """ | |
1389 return self._GetSerializedDataStreamNumbers(u'event_source_data.') | |
1390 | |
1391 def _GetSerializedEventStreamNumbers(self): | |
1392 """Retrieves the available serialized event stream numbers. | |
1393 | |
1394 Returns: | |
1395 list[int]: available serialized data stream numbers sorted numerically. | |
1396 """ | |
1397 return self._GetSerializedDataStreamNumbers(u'event_data.') | |
1398 | |
1399 def _GetSerializedEventTagOffsetTable(self, stream_number): | |
1400 """Retrieves the serialized event tag stream offset table. | |
1401 | |
1402 Args: | |
1403 stream_number (int): number of the stream. | |
1404 | |
1405 Returns: | |
1406 _SerializedDataOffsetTable: serialized data offset table. | |
1407 | |
1408 Raises: | |
1409 IOError: if the stream cannot be opened. | |
1410 """ | |
1411 return self._GetSerializedDataOffsetTable( | |
1412 self._event_tag_offset_tables, self._event_tag_offset_tables_lfu, | |
1413 u'event_tag_index', stream_number) | |
1414 | |
1415 def _GetSerializedEventTimestampTable(self, stream_number): | 1376 def _GetSerializedEventTimestampTable(self, stream_number): |
1416 """Retrieves the serialized event stream timestamp table. | 1377 """Retrieves the serialized event stream timestamp table. |
1417 | 1378 |
1418 Args: | 1379 Args: |
1419 stream_number (int): number of the stream. | 1380 stream_number (int): number of the stream. |
1420 | 1381 |
1421 Returns: | 1382 Returns: |
1422 _SerializedDataTimestampTable: serialized data timestamp table. | 1383 _SerializedDataTimestampTable: serialized data timestamp table. |
1423 | 1384 |
1424 Raises: | 1385 Raises: |
(...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1510 def _InitializeMergeBuffer(self, time_range=None): | 1471 def _InitializeMergeBuffer(self, time_range=None): |
1511 """Initializes the events into the merge buffer. | 1472 """Initializes the events into the merge buffer. |
1512 | 1473 |
1513 This function fills the merge buffer with the first relevant event | 1474 This function fills the merge buffer with the first relevant event |
1514 from each stream. | 1475 from each stream. |
1515 | 1476 |
1516 Args: | 1477 Args: |
1517 time_range (Optional[TimeRange]): time range used to filter events | 1478 time_range (Optional[TimeRange]): time range used to filter events |
1518 that fall in a specific period. | 1479 that fall in a specific period. |
1519 """ | 1480 """ |
1520 self._event_heap = _EventsHeap() | 1481 self._event_heap = event_heaps.SerializedStreamEventHeap() |
1521 | 1482 |
1522 number_range = self._GetSerializedEventStreamNumbers() | 1483 number_range = self._GetSerializedDataStreamNumbers(u'event_data.') |
1523 for stream_number in number_range: | 1484 for stream_number in number_range: |
1524 entry_index = self.NEXT_AVAILABLE_ENTRY | 1485 entry_index = self.NEXT_AVAILABLE_ENTRY |
1525 if time_range: | 1486 if time_range: |
1526 stream_name = u'event_timestamps.{0:06d}'.format(stream_number) | 1487 stream_name = u'event_timestamps.{0:06d}'.format(stream_number) |
1527 if self._HasStream(stream_name): | 1488 if self._HasStream(stream_name): |
1528 try: | 1489 try: |
1529 timestamp_table = self._GetSerializedEventTimestampTable( | 1490 timestamp_table = self._GetSerializedEventTimestampTable( |
1530 stream_number) | 1491 stream_number) |
1531 except IOError as exception: | 1492 except IOError as exception: |
1532 logging.error(( | 1493 logging.error(( |
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1578 stored_serialization_format = self._ReadSerializerStream() | 1539 stored_serialization_format = self._ReadSerializerStream() |
1579 if stored_serialization_format: | 1540 if stored_serialization_format: |
1580 self.serialization_format = stored_serialization_format | 1541 self.serialization_format = stored_serialization_format |
1581 | 1542 |
1582 if self.serialization_format != definitions.SERIALIZER_FORMAT_JSON: | 1543 if self.serialization_format != definitions.SERIALIZER_FORMAT_JSON: |
1583 raise IOError(u'Unsupported serialization format: {0:s}'.format( | 1544 raise IOError(u'Unsupported serialization format: {0:s}'.format( |
1584 self.serialization_format)) | 1545 self.serialization_format)) |
1585 | 1546 |
1586 self._serializer = json_serializer.JSONAttributeContainerSerializer | 1547 self._serializer = json_serializer.JSONAttributeContainerSerializer |
1587 | 1548 |
1588 # TODO: create a single function to determin last stream numbers. | 1549 for container_type, stream_name_prefix in ( |
1589 self._stream_numbers[u'extraction_error'] = self._GetLastStreamNumber( | 1550 self._STREAM_NAME_PREFIXES.items()): |
1590 u'error_data.') | 1551 stream_name_prefix = u'{0:s}_data.'.format(stream_name_prefix) |
1591 self._event_stream_number = self._GetLastStreamNumber(u'event_data.') | 1552 self._last_stream_numbers[container_type] = self._GetLastStreamNumber( |
1592 | 1553 stream_name_prefix) |
1593 self._stream_numbers[u'event_source'] = self._GetLastStreamNumber( | |
1594 u'event_source_data.') | |
1595 self._stream_numbers[u'event_tag'] = self._GetLastStreamNumber( | |
1596 u'event_tag_data.') | |
1597 | 1554 |
1598 self._analysis_report_stream_number = self._GetLastStreamNumber( | 1555 self._analysis_report_stream_number = self._GetLastStreamNumber( |
1599 u'analysis_report_data.') | 1556 u'analysis_report_data.') |
1600 self._last_preprocess = self._GetLastStreamNumber(u'preprocess.') | 1557 self._last_preprocess = self._GetLastStreamNumber(u'preprocess.') |
1601 | 1558 |
1602 last_session_start = self._GetLastStreamNumber(u'session_start.') | 1559 last_session_start = self._GetLastStreamNumber(u'session_start.') |
1603 last_session_completion = self._GetLastStreamNumber(u'session_completion.') | 1560 last_session_completion = self._GetLastStreamNumber(u'session_completion.') |
1604 | 1561 |
1605 # TODO: handle open sessions. | 1562 # TODO: handle open sessions. |
1606 if last_session_start != last_session_completion: | 1563 if last_session_start != last_session_completion: |
(...skipping 20 matching lines...) Expand all Loading... |
1627 Returns: | 1584 Returns: |
1628 zipfile.ZipExtFile: stream file-like object or None. | 1585 zipfile.ZipExtFile: stream file-like object or None. |
1629 """ | 1586 """ |
1630 try: | 1587 try: |
1631 return self._zipfile.open(stream_name, mode=access_mode) | 1588 return self._zipfile.open(stream_name, mode=access_mode) |
1632 except KeyError: | 1589 except KeyError: |
1633 return | 1590 return |
1634 | 1591 |
1635 def _OpenWrite(self): | 1592 def _OpenWrite(self): |
1636 """Opens the storage file for writing.""" | 1593 """Opens the storage file for writing.""" |
1637 if self._event_stream_number == 1: | 1594 if self._last_stream_numbers[u'event'] == 1: |
1638 self._WriteStorageMetadata() | 1595 self._WriteStorageMetadata() |
1639 | 1596 |
1640 def _OpenZIPFile(self, path, read_only): | 1597 def _OpenZIPFile(self, path, read_only): |
1641 """Opens the ZIP file. | 1598 """Opens the ZIP file. |
1642 | 1599 |
1643 Args: | 1600 Args: |
1644 path (str): path to the ZIP file. | 1601 path (str): path to the ZIP file. |
1645 read_only (bool): True if the file should be opened in read-only mode. | 1602 read_only (bool): True if the file should be opened in read-only mode. |
1646 | 1603 |
1647 Raises: | 1604 Raises: |
1648 IOError: if the ZIP file is already opened or if the ZIP file cannot | 1605 IOError: if the ZIP file is already opened or if the ZIP file cannot |
1649 be opened. | 1606 be opened. |
1650 """ | 1607 """ |
1651 if self._zipfile: | 1608 if self._zipfile: |
1652 raise IOError(u'ZIP file already opened.') | 1609 raise IOError(u'ZIP file already opened.') |
1653 | 1610 |
| 1611 # Create a temporary directory to prevent multiple ZIP storage |
| 1612 # files in the same directory conflicting with each other. |
| 1613 path = os.path.abspath(path) |
| 1614 directory_name = os.path.dirname(path) |
| 1615 self._temporary_path = tempfile.mkdtemp(dir=directory_name) |
| 1616 |
1654 if read_only: | 1617 if read_only: |
1655 access_mode = 'r' | 1618 access_mode = 'r' |
1656 | 1619 |
1657 zipfile_path = path | 1620 zipfile_path = path |
| 1621 |
1658 else: | 1622 else: |
1659 access_mode = 'a' | 1623 access_mode = 'a' |
1660 | 1624 |
1661 # Create a temporary directory to prevent multiple ZIP storage | |
1662 # files in the same directory conflicting with each other. | |
1663 directory_name = os.path.dirname(path) | |
1664 basename = os.path.basename(path) | 1625 basename = os.path.basename(path) |
1665 directory_name = tempfile.mkdtemp(dir=directory_name) | 1626 zipfile_path = os.path.join(self._temporary_path, basename) |
1666 zipfile_path = os.path.join(directory_name, basename) | |
1667 | 1627 |
1668 if os.path.exists(path): | 1628 if os.path.exists(path): |
1669 for attempt in range(1, self._MAXIMUM_NUMBER_OF_LOCKED_FILE_ATTEMPTS): | 1629 for attempt in range(1, self._MAXIMUM_NUMBER_OF_LOCKED_FILE_ATTEMPTS): |
1670 try: | 1630 try: |
1671 os.rename(path, zipfile_path) | 1631 os.rename(path, zipfile_path) |
1672 break | 1632 break |
1673 | 1633 |
1674 except OSError: | 1634 except OSError: |
1675 if attempt == self._MAXIMUM_NUMBER_OF_LOCKED_FILE_ATTEMPTS: | 1635 if attempt == self._MAXIMUM_NUMBER_OF_LOCKED_FILE_ATTEMPTS: |
1676 raise | 1636 raise |
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1755 Returns: | 1715 Returns: |
1756 bool: True if the storage metadata was read. | 1716 bool: True if the storage metadata was read. |
1757 | 1717 |
1758 Raises: | 1718 Raises: |
1759 IOError: if the format version or the serializer format is not supported. | 1719 IOError: if the format version or the serializer format is not supported. |
1760 """ | 1720 """ |
1761 stream_name = u'metadata.txt' | 1721 stream_name = u'metadata.txt' |
1762 if not self._HasStream(stream_name): | 1722 if not self._HasStream(stream_name): |
1763 return False | 1723 return False |
1764 | 1724 |
| 1725 stream_data = self._ReadStream(stream_name) |
| 1726 |
1765 storage_metadata_reader = _StorageMetadataReader() | 1727 storage_metadata_reader = _StorageMetadataReader() |
1766 stream_data = self._ReadStream(stream_name) | |
1767 storage_metadata = storage_metadata_reader.Read(stream_data) | 1728 storage_metadata = storage_metadata_reader.Read(stream_data) |
1768 | 1729 |
1769 if not storage_metadata.format_version: | 1730 ZIPStorageFile._CheckStorageMetadata(storage_metadata) |
1770 raise IOError(u'Missing format version.') | |
1771 | |
1772 if storage_metadata.format_version < self._COMPATIBLE_FORMAT_VERSION: | |
1773 raise IOError( | |
1774 u'Format version: {0:d} is too old and no longer supported.'.format( | |
1775 storage_metadata.format_version)) | |
1776 | |
1777 if storage_metadata.format_version > self._FORMAT_VERSION: | |
1778 raise IOError( | |
1779 u'Format version: {0:d} is too new and not yet supported.'.format( | |
1780 storage_metadata.format_version)) | |
1781 | |
1782 serialization_format = storage_metadata.serialization_format | |
1783 if serialization_format != definitions.SERIALIZER_FORMAT_JSON: | |
1784 raise IOError(u'Unsupported serialization format: {0:s}'.format( | |
1785 serialization_format)) | |
1786 | |
1787 if storage_metadata.storage_type not in definitions.STORAGE_TYPES: | |
1788 raise IOError(u'Unsupported storage type: {0:s}'.format( | |
1789 storage_metadata.storage_type)) | |
1790 | 1731 |
1791 self.format_version = storage_metadata.format_version | 1732 self.format_version = storage_metadata.format_version |
1792 self.serialization_format = serialization_format | 1733 self.serialization_format = storage_metadata.serialization_format |
1793 self.storage_type = storage_metadata.storage_type | 1734 self.storage_type = storage_metadata.storage_type |
1794 | 1735 |
1795 return True | 1736 return True |
1796 | 1737 |
1797 def _ReadStream(self, stream_name): | 1738 def _ReadStream(self, stream_name): |
1798 """Reads data from a stream. | 1739 """Reads data from a stream. |
1799 | 1740 |
1800 Args: | 1741 Args: |
1801 stream_name (str): name of the stream. | 1742 stream_name (str): name of the stream. |
1802 | 1743 |
1803 Returns: | 1744 Returns: |
1804 bytes: data of the stream. | 1745 bytes: data of the stream. |
1805 """ | 1746 """ |
1806 file_object = self._OpenStream(stream_name) | 1747 file_object = self._OpenStream(stream_name) |
1807 if not file_object: | 1748 if not file_object: |
1808 return b'' | 1749 return b'' |
1809 | 1750 |
1810 try: | 1751 try: |
1811 data = file_object.read() | 1752 data = file_object.read() |
1812 finally: | 1753 finally: |
1813 file_object.close() | 1754 file_object.close() |
1814 | 1755 |
1815 return data | 1756 return data |
1816 | 1757 |
| 1758 def _UpdateEventTagIndex(self, event_tag): |
| 1759 """Builds the event tag index. |
| 1760 |
| 1761 Args: |
| 1762 event_tag (EventTag): event tag. |
| 1763 """ |
| 1764 event_identifier = event_tag.GetEventIdentifier() |
| 1765 |
| 1766 lookup_key = event_identifier.CopyToString() |
| 1767 self._event_tag_index[lookup_key] = event_tag.GetIdentifier() |
| 1768 |
1817 def _WriteSerializedAttributeContainerList(self, container_type): | 1769 def _WriteSerializedAttributeContainerList(self, container_type): |
1818 """Writes a serialized attribute container list. | 1770 """Writes a serialized attribute container list. |
1819 | 1771 |
1820 Args: | 1772 Args: |
1821 container_type (str): attribute container type. | 1773 container_type (str): attribute container type. |
1822 """ | 1774 """ |
1823 container_list = self._GetSerializedAttributeContainerList(container_type) | 1775 container_list = self._GetSerializedAttributeContainerList(container_type) |
1824 if not container_list.data_size: | 1776 if not container_list.data_size: |
1825 return | 1777 return |
1826 | 1778 |
1827 stream_number = self._stream_numbers[container_type] | 1779 stream_name_prefix = self._STREAM_NAME_PREFIXES.get(container_type) |
| 1780 stream_number = self._last_stream_numbers[container_type] |
1828 | 1781 |
1829 stream_name = u'{0:s}_index.{1:06d}'.format(container_type, stream_number) | 1782 stream_name = u'{0:s}_index.{1:06d}'.format( |
| 1783 stream_name_prefix, stream_number) |
1830 offset_table = _SerializedDataOffsetTable(self._zipfile, stream_name) | 1784 offset_table = _SerializedDataOffsetTable(self._zipfile, stream_name) |
1831 | 1785 |
1832 stream_name = u'{0:s}_data.{1:06d}'.format(container_type, stream_number) | 1786 stream_name = u'{0:s}_data.{1:06d}'.format( |
| 1787 stream_name_prefix, stream_number) |
1833 data_stream = _SerializedDataStream( | 1788 data_stream = _SerializedDataStream( |
1834 self._zipfile, self._zipfile_path, stream_name) | 1789 self._zipfile, self._temporary_path, stream_name) |
1835 | 1790 |
1836 if self._serializers_profiler: | 1791 if self._serializers_profiler: |
1837 self._serializers_profiler.StartTiming(u'write') | 1792 self._serializers_profiler.StartTiming(u'write') |
1838 | 1793 |
1839 entry_data_offset = data_stream.WriteInitialize() | 1794 entry_data_offset = data_stream.WriteInitialize() |
1840 | 1795 |
1841 try: | 1796 try: |
1842 for _ in range(container_list.number_of_attribute_containers): | 1797 for _ in range(container_list.number_of_attribute_containers): |
1843 serialized_data = container_list.PopAttributeContainer() | 1798 serialized_data = container_list.PopAttributeContainer() |
1844 | 1799 |
1845 offset_table.AddOffset(entry_data_offset) | 1800 offset_table.AddOffset(entry_data_offset) |
1846 | 1801 |
1847 entry_data_offset = data_stream.WriteEntry(serialized_data) | 1802 entry_data_offset = data_stream.WriteEntry(serialized_data) |
1848 | 1803 |
1849 except: | 1804 except: |
1850 data_stream.WriteAbort() | 1805 data_stream.WriteAbort() |
1851 | 1806 |
1852 if self._serializers_profiler: | 1807 if self._serializers_profiler: |
1853 self._serializers_profiler.StopTiming(u'write') | 1808 self._serializers_profiler.StopTiming(u'write') |
1854 | 1809 |
1855 raise | 1810 raise |
1856 | 1811 |
1857 offset_table.Write() | 1812 offset_table.Write() |
1858 data_stream.WriteFinalize() | 1813 data_stream.WriteFinalize() |
1859 | 1814 |
1860 if self._serializers_profiler: | 1815 if self._serializers_profiler: |
1861 self._serializers_profiler.StopTiming(u'write') | 1816 self._serializers_profiler.StopTiming(u'write') |
1862 | 1817 |
1863 self._stream_numbers[container_type] = stream_number + 1 | 1818 self._last_stream_numbers[container_type] = stream_number + 1 |
1864 | 1819 |
1865 container_list.Empty() | 1820 container_list.Empty() |
1866 | 1821 |
1867 def _WriteSerializedEvents(self): | 1822 def _WriteSerializedEvents(self): |
1868 """Writes the serialized events.""" | 1823 """Writes the serialized events.""" |
1869 if not self._serialized_events_heap.data_size: | 1824 if not self._serialized_event_heap.data_size: |
1870 return | 1825 return |
1871 | 1826 |
1872 self._WriteSerializedEventsHeap( | 1827 self._WriteSerializedEventHeap( |
1873 self._serialized_events_heap, self._event_stream_number) | 1828 self._serialized_event_heap, self._last_stream_numbers[u'event']) |
1874 | 1829 |
1875 self._event_stream_number += 1 | 1830 self._last_stream_numbers[u'event'] += 1 |
1876 self._serialized_events_heap.Empty() | 1831 self._serialized_event_heap.Empty() |
1877 | 1832 |
1878 def _WriteSerializedEventsHeap(self, serialized_events_heap, stream_number): | 1833 def _WriteSerializedEventHeap(self, serialized_event_heap, stream_number): |
1879 """Writes the contents of an serialized events heap. | 1834 """Writes the contents of an serialized event heap. |
1880 | 1835 |
1881 Args: | 1836 Args: |
1882 serialized_events_heap(_SerializedEventsHeap): serialized events heap. | 1837 serialized_event_heap(_SerializedEventHeap): serialized event heap. |
1883 stream_number(int): stream number. | 1838 stream_number(int): stream number. |
1884 """ | 1839 """ |
1885 stream_name = u'event_index.{0:06d}'.format(stream_number) | 1840 stream_name = u'event_index.{0:06d}'.format(stream_number) |
1886 offset_table = _SerializedDataOffsetTable(self._zipfile, stream_name) | 1841 offset_table = _SerializedDataOffsetTable(self._zipfile, stream_name) |
1887 | 1842 |
1888 stream_name = u'event_timestamps.{0:06d}'.format(stream_number) | 1843 stream_name = u'event_timestamps.{0:06d}'.format(stream_number) |
1889 timestamp_table = _SerializedDataTimestampTable(self._zipfile, stream_name) | 1844 timestamp_table = _SerializedDataTimestampTable(self._zipfile, stream_name) |
1890 | 1845 |
1891 stream_name = u'event_data.{0:06d}'.format(stream_number) | 1846 stream_name = u'event_data.{0:06d}'.format(stream_number) |
1892 data_stream = _SerializedDataStream( | 1847 data_stream = _SerializedDataStream( |
1893 self._zipfile, self._zipfile_path, stream_name) | 1848 self._zipfile, self._temporary_path, stream_name) |
1894 | 1849 |
1895 if self._serializers_profiler: | 1850 if self._serializers_profiler: |
1896 self._serializers_profiler.StartTiming(u'write') | 1851 self._serializers_profiler.StartTiming(u'write') |
1897 | 1852 |
1898 entry_data_offset = data_stream.WriteInitialize() | 1853 entry_data_offset = data_stream.WriteInitialize() |
1899 | 1854 |
1900 try: | 1855 try: |
1901 for _ in range(serialized_events_heap.number_of_events): | 1856 for _ in range(serialized_event_heap.number_of_events): |
1902 timestamp, entry_data = serialized_events_heap.PopEvent() | 1857 timestamp, entry_data = serialized_event_heap.PopEvent() |
1903 | 1858 |
1904 timestamp_table.AddTimestamp(timestamp) | 1859 timestamp_table.AddTimestamp(timestamp) |
1905 offset_table.AddOffset(entry_data_offset) | 1860 offset_table.AddOffset(entry_data_offset) |
1906 | 1861 |
1907 entry_data_offset = data_stream.WriteEntry(entry_data) | 1862 entry_data_offset = data_stream.WriteEntry(entry_data) |
1908 | 1863 |
1909 except: | 1864 except: |
1910 data_stream.WriteAbort() | 1865 data_stream.WriteAbort() |
1911 | 1866 |
1912 if self._serializers_profiler: | 1867 if self._serializers_profiler: |
(...skipping 24 matching lines...) Expand all Loading... |
1937 | 1892 |
1938 stream_name = u'session_completion.{0:06d}'.format(self._last_session) | 1893 stream_name = u'session_completion.{0:06d}'.format(self._last_session) |
1939 if self._HasStream(stream_name): | 1894 if self._HasStream(stream_name): |
1940 raise IOError(u'Session completion: {0:06d} already exists.'.format( | 1895 raise IOError(u'Session completion: {0:06d} already exists.'.format( |
1941 self._last_session)) | 1896 self._last_session)) |
1942 | 1897 |
1943 session_completion_data = self._SerializeAttributeContainer( | 1898 session_completion_data = self._SerializeAttributeContainer( |
1944 session_completion) | 1899 session_completion) |
1945 | 1900 |
1946 data_stream = _SerializedDataStream( | 1901 data_stream = _SerializedDataStream( |
1947 self._zipfile, self._zipfile_path, stream_name) | 1902 self._zipfile, self._temporary_path, stream_name) |
1948 data_stream.WriteInitialize() | 1903 data_stream.WriteInitialize() |
1949 data_stream.WriteEntry(session_completion_data) | 1904 data_stream.WriteEntry(session_completion_data) |
1950 data_stream.WriteFinalize() | 1905 data_stream.WriteFinalize() |
1951 | 1906 |
1952 def _WriteSessionStart(self, session_start): | 1907 def _WriteSessionStart(self, session_start): |
1953 """Writes a session start attribute container | 1908 """Writes a session start attribute container |
1954 | 1909 |
1955 Args: | 1910 Args: |
1956 session_start (SessionStart): session start attribute container. | 1911 session_start (SessionStart): session start attribute container. |
1957 | 1912 |
1958 Raises: | 1913 Raises: |
1959 IOError: if the storage type does not support writing a session | 1914 IOError: if the storage type does not support writing a session |
1960 start or the session start already exists. | 1915 start or the session start already exists. |
1961 """ | 1916 """ |
1962 if self.storage_type != definitions.STORAGE_TYPE_SESSION: | 1917 if self.storage_type != definitions.STORAGE_TYPE_SESSION: |
1963 raise IOError(u'Session completion not supported by storage type.') | 1918 raise IOError(u'Session completion not supported by storage type.') |
1964 | 1919 |
1965 stream_name = u'session_start.{0:06d}'.format(self._last_session) | 1920 stream_name = u'session_start.{0:06d}'.format(self._last_session) |
1966 if self._HasStream(stream_name): | 1921 if self._HasStream(stream_name): |
1967 raise IOError(u'Session start: {0:06d} already exists.'.format( | 1922 raise IOError(u'Session start: {0:06d} already exists.'.format( |
1968 self._last_session)) | 1923 self._last_session)) |
1969 | 1924 |
1970 session_start_data = self._SerializeAttributeContainer(session_start) | 1925 session_start_data = self._SerializeAttributeContainer(session_start) |
1971 | 1926 |
1972 data_stream = _SerializedDataStream( | 1927 data_stream = _SerializedDataStream( |
1973 self._zipfile, self._zipfile_path, stream_name) | 1928 self._zipfile, self._temporary_path, stream_name) |
1974 data_stream.WriteInitialize() | 1929 data_stream.WriteInitialize() |
1975 data_stream.WriteEntry(session_start_data) | 1930 data_stream.WriteEntry(session_start_data) |
1976 data_stream.WriteFinalize() | 1931 data_stream.WriteFinalize() |
1977 | 1932 |
1978 def _WriteStorageMetadata(self): | 1933 def _WriteStorageMetadata(self): |
1979 """Writes the storage metadata.""" | 1934 """Writes the storage metadata.""" |
1980 stream_name = u'metadata.txt' | 1935 stream_name = u'metadata.txt' |
1981 if self._HasStream(stream_name): | 1936 if self._HasStream(stream_name): |
1982 return | 1937 return |
1983 | 1938 |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2020 raise IOError(u'Task completion not supported by storage type.') | 1975 raise IOError(u'Task completion not supported by storage type.') |
2021 | 1976 |
2022 stream_name = u'task_completion.{0:06d}'.format(self._last_task) | 1977 stream_name = u'task_completion.{0:06d}'.format(self._last_task) |
2023 if self._HasStream(stream_name): | 1978 if self._HasStream(stream_name): |
2024 raise IOError(u'Task completion: {0:06d} already exists.'.format( | 1979 raise IOError(u'Task completion: {0:06d} already exists.'.format( |
2025 self._last_task)) | 1980 self._last_task)) |
2026 | 1981 |
2027 task_completion_data = self._SerializeAttributeContainer(task_completion) | 1982 task_completion_data = self._SerializeAttributeContainer(task_completion) |
2028 | 1983 |
2029 data_stream = _SerializedDataStream( | 1984 data_stream = _SerializedDataStream( |
2030 self._zipfile, self._zipfile_path, stream_name) | 1985 self._zipfile, self._temporary_path, stream_name) |
2031 data_stream.WriteInitialize() | 1986 data_stream.WriteInitialize() |
2032 data_stream.WriteEntry(task_completion_data) | 1987 data_stream.WriteEntry(task_completion_data) |
2033 data_stream.WriteFinalize() | 1988 data_stream.WriteFinalize() |
2034 | 1989 |
2035 def _WriteTaskStart(self, task_start): | 1990 def _WriteTaskStart(self, task_start): |
2036 """Writes a task start attribute container. | 1991 """Writes a task start attribute container. |
2037 | 1992 |
2038 Args: | 1993 Args: |
2039 task_start (TaskStart): task start attribute container. | 1994 task_start (TaskStart): task start attribute container. |
2040 | 1995 |
2041 Raises: | 1996 Raises: |
2042 IOError: if the storage type does not support writing a task start | 1997 IOError: if the storage type does not support writing a task start |
2043 or the task start already exists. | 1998 or the task start already exists. |
2044 """ | 1999 """ |
2045 if self.storage_type != definitions.STORAGE_TYPE_TASK: | 2000 if self.storage_type != definitions.STORAGE_TYPE_TASK: |
2046 raise IOError(u'Task start not supported by storage type.') | 2001 raise IOError(u'Task start not supported by storage type.') |
2047 | 2002 |
2048 stream_name = u'task_start.{0:06d}'.format(self._last_task) | 2003 stream_name = u'task_start.{0:06d}'.format(self._last_task) |
2049 if self._HasStream(stream_name): | 2004 if self._HasStream(stream_name): |
2050 raise IOError(u'Task start: {0:06d} already exists.'.format( | 2005 raise IOError(u'Task start: {0:06d} already exists.'.format( |
2051 self._last_task)) | 2006 self._last_task)) |
2052 | 2007 |
2053 task_start_data = self._SerializeAttributeContainer(task_start) | 2008 task_start_data = self._SerializeAttributeContainer(task_start) |
2054 | 2009 |
2055 data_stream = _SerializedDataStream( | 2010 data_stream = _SerializedDataStream( |
2056 self._zipfile, self._zipfile_path, stream_name) | 2011 self._zipfile, self._temporary_path, stream_name) |
2057 data_stream.WriteInitialize() | 2012 data_stream.WriteInitialize() |
2058 data_stream.WriteEntry(task_start_data) | 2013 data_stream.WriteEntry(task_start_data) |
2059 data_stream.WriteFinalize() | 2014 data_stream.WriteFinalize() |
2060 | 2015 |
2061 def AddAnalysisReport(self, analysis_report): | 2016 def AddAnalysisReport(self, analysis_report): |
2062 """Adds an analysis report. | 2017 """Adds an analysis report. |
2063 | 2018 |
2064 Args: | 2019 Args: |
2065 analysis_report (AnalysisReport): analysis report. | 2020 analysis_report (AnalysisReport): analysis report. |
2066 | 2021 |
2067 Raises: | 2022 Raises: |
2068 IOError: when the storage file is closed or read-only. | 2023 IOError: when the storage file is closed or read-only or |
| 2024 if the analysis report cannot be serialized. |
2069 """ | 2025 """ |
2070 if not self._is_open: | 2026 self._RaiseIfNotWritable() |
2071 raise IOError(u'Unable to write to closed storage file.') | |
2072 | |
2073 if self._read_only: | |
2074 raise IOError(u'Unable to write to read-only storage file.') | |
2075 | 2027 |
2076 analysis_report_identifier = identifiers.SerializedStreamIdentifier( | 2028 analysis_report_identifier = identifiers.SerializedStreamIdentifier( |
2077 self._analysis_report_stream_number, 0) | 2029 self._analysis_report_stream_number, 0) |
2078 analysis_report.SetIdentifier(analysis_report_identifier) | 2030 analysis_report.SetIdentifier(analysis_report_identifier) |
2079 | 2031 |
2080 stream_name = u'analysis_report_data.{0:06}'.format( | 2032 stream_name = u'analysis_report_data.{0:06}'.format( |
2081 self._analysis_report_stream_number) | 2033 self._analysis_report_stream_number) |
2082 | 2034 |
2083 serialized_report = self._SerializeAttributeContainer(analysis_report) | 2035 serialized_report = self._SerializeAttributeContainer(analysis_report) |
2084 | 2036 |
2085 data_stream = _SerializedDataStream( | 2037 data_stream = _SerializedDataStream( |
2086 self._zipfile, self._zipfile_path, stream_name) | 2038 self._zipfile, self._temporary_path, stream_name) |
2087 data_stream.WriteInitialize() | 2039 data_stream.WriteInitialize() |
2088 data_stream.WriteEntry(serialized_report) | 2040 data_stream.WriteEntry(serialized_report) |
2089 data_stream.WriteFinalize() | 2041 data_stream.WriteFinalize() |
2090 | 2042 |
2091 self._analysis_report_stream_number += 1 | 2043 self._analysis_report_stream_number += 1 |
2092 | 2044 |
2093 def AddError(self, error): | 2045 def AddError(self, error): |
2094 """Adds an error. | 2046 """Adds an error. |
2095 | 2047 |
2096 Args: | 2048 Args: |
2097 error (ExtractionError): error. | 2049 error (ExtractionError): error. |
2098 | 2050 |
2099 Raises: | 2051 Raises: |
2100 IOError: when the storage file is closed or read-only or | 2052 IOError: when the storage file is closed or read-only or |
2101 if the error cannot be serialized. | 2053 if the error cannot be serialized. |
2102 """ | 2054 """ |
| 2055 self._RaiseIfNotWritable() |
| 2056 |
2103 self._AddAttributeContainer(u'extraction_error', error) | 2057 self._AddAttributeContainer(u'extraction_error', error) |
2104 | 2058 |
2105 def AddEvent(self, event): | 2059 def AddEvent(self, event): |
2106 """Adds an event. | 2060 """Adds an event. |
2107 | 2061 |
2108 Args: | 2062 Args: |
2109 event (EventObject): event. | 2063 event (EventObject): event. |
2110 | 2064 |
2111 Raises: | 2065 Raises: |
2112 IOError: when the storage file is closed or read-only or | 2066 IOError: when the storage file is closed or read-only or |
2113 if the event cannot be serialized. | 2067 if the event cannot be serialized. |
2114 """ | 2068 """ |
2115 if not self._is_open: | 2069 self._RaiseIfNotWritable() |
2116 raise IOError(u'Unable to write to closed storage file.') | |
2117 | 2070 |
2118 if self._read_only: | 2071 # TODO: change to no longer allow event_data_identifier is None |
2119 raise IOError(u'Unable to write to read-only storage file.') | 2072 # after refactoring every parser to generate event data. |
| 2073 event_data_identifier = event.GetEventDataIdentifier() |
| 2074 if event_data_identifier: |
| 2075 if not isinstance( |
| 2076 event_data_identifier, identifiers.SerializedStreamIdentifier): |
| 2077 raise IOError(u'Unsupported event data identifier type: {0:s}'.format( |
| 2078 type(event_data_identifier))) |
2120 | 2079 |
2121 event_identifier = identifiers.SerializedStreamIdentifier( | 2080 event.event_data_stream_number = event_data_identifier.stream_number |
2122 self._event_stream_number, | 2081 event.event_data_entry_index = event_data_identifier.entry_index |
2123 self._serialized_events_heap.number_of_events) | |
2124 event.SetIdentifier(event_identifier) | |
2125 | 2082 |
2126 # We try to serialize the event first, so we can skip some | 2083 self._AddSerializedEvent(event) |
2127 # processing if it is invalid. | |
2128 event_data = self._SerializeAttributeContainer(event) | |
2129 | 2084 |
2130 self._serialized_events_heap.PushEvent(event.timestamp, event_data) | 2085 def AddEventData(self, event_data): |
| 2086 """Adds event data. |
2131 | 2087 |
2132 if self._serialized_events_heap.data_size > self._maximum_buffer_size: | 2088 Args: |
2133 self._WriteSerializedEvents() | 2089 event_data (EventData): event data. |
| 2090 """ |
| 2091 self._RaiseIfNotWritable() |
| 2092 |
| 2093 self._AddAttributeContainer(u'event_data', event_data) |
2134 | 2094 |
2135 def AddEventSource(self, event_source): | 2095 def AddEventSource(self, event_source): |
2136 """Adds an event source. | 2096 """Adds an event source. |
2137 | 2097 |
2138 Args: | 2098 Args: |
2139 event_source (EventSource): event source. | 2099 event_source (EventSource): event source. |
2140 | 2100 |
2141 Raises: | 2101 Raises: |
2142 IOError: when the storage file is closed or read-only or | 2102 IOError: when the storage file is closed or read-only or |
2143 if the event source cannot be serialized. | 2103 if the event source cannot be serialized. |
2144 """ | 2104 """ |
| 2105 self._RaiseIfNotWritable() |
| 2106 |
2145 self._AddAttributeContainer(u'event_source', event_source) | 2107 self._AddAttributeContainer(u'event_source', event_source) |
2146 | 2108 |
2147 def AddEventTag(self, event_tag): | 2109 def AddEventTag(self, event_tag): |
2148 """Adds an event tag. | 2110 """Adds an event tag. |
2149 | 2111 |
2150 If the event referenced by the tag is already tagged, the comment | 2112 If the event referenced by the tag is already tagged, the comment |
2151 and labels will be appended to the existing tag. | 2113 and labels will be appended to the existing tag. |
2152 | 2114 |
2153 Args: | 2115 Args: |
2154 event_tag (EventTag): event tag. | 2116 event_tag (EventTag): event tag. |
2155 | 2117 |
2156 Raises: | 2118 Raises: |
2157 IOError: when the storage file is closed or read-only or | 2119 IOError: when the storage file is closed or read-only or |
2158 if the event tag cannot be serialized or | 2120 if the event tag cannot be serialized or |
2159 if the event tag event identifier type is not supported. | 2121 if the event tag event identifier type is not supported. |
2160 """ | 2122 """ |
2161 if not self._is_open: | 2123 self._RaiseIfNotWritable() |
2162 raise IOError(u'Unable to write to closed storage file.') | |
2163 | |
2164 if self._read_only: | |
2165 raise IOError(u'Unable to write to read-only storage file.') | |
2166 | 2124 |
2167 event_identifier = event_tag.GetEventIdentifier() | 2125 event_identifier = event_tag.GetEventIdentifier() |
2168 if not isinstance( | 2126 if not isinstance( |
2169 event_identifier, identifiers.SerializedStreamIdentifier): | 2127 event_identifier, identifiers.SerializedStreamIdentifier): |
2170 raise IOError(u'Unsupported event identifier type: {0:s}'.format( | 2128 raise IOError(u'Unsupported event identifier type: {0:s}'.format( |
2171 type(event_identifier))) | 2129 type(event_identifier))) |
2172 | 2130 |
2173 container_list = self._GetSerializedAttributeContainerList(u'event_tag') | 2131 event_tag.event_stream_number = event_identifier.stream_number |
2174 | 2132 event_tag.event_entry_index = event_identifier.entry_index |
2175 stream_number = self._stream_numbers[u'event_tag'] | |
2176 event_tag_identifier = identifiers.SerializedStreamIdentifier( | |
2177 stream_number, container_list.number_of_attribute_containers) | |
2178 event_tag.SetIdentifier(event_tag_identifier) | |
2179 | 2133 |
2180 # Check if the event has already been tagged on a previous occasion, | 2134 # Check if the event has already been tagged on a previous occasion, |
2181 # we need to append the event tag any existing event tag. | 2135 # we need to append the event tag any existing event tag. |
2182 stored_event_tag = self._GetEventTagByIdentifier(event_identifier) | 2136 stored_event_tag = self._GetEventTagByIdentifier(event_identifier) |
2183 | |
2184 if stored_event_tag: | 2137 if stored_event_tag: |
2185 event_tag.AddComment(stored_event_tag.comment) | 2138 event_tag.AddComment(stored_event_tag.comment) |
2186 event_tag.AddLabels(stored_event_tag.labels) | 2139 event_tag.AddLabels(stored_event_tag.labels) |
2187 | 2140 |
2188 event_tag.event_stream_number = event_identifier.stream_number | 2141 self._AddAttributeContainer(u'event_tag', event_tag) |
2189 event_tag.event_entry_index = event_identifier.entry_index | |
2190 | 2142 |
2191 # TODO: update event tag index value. | 2143 self._UpdateEventTagIndex(event_tag) |
2192 lookup_key = event_identifier.CopyToString() | |
2193 self._event_tag_index[lookup_key] = event_tag_identifier | |
2194 | |
2195 # We try to serialize the event tag first, so we can skip some | |
2196 # processing if it is invalid. | |
2197 event_tag_data = self._SerializeAttributeContainer(event_tag) | |
2198 | |
2199 container_list.PushAttributeContainer(event_tag_data) | |
2200 | |
2201 if container_list.data_size > self._maximum_buffer_size: | |
2202 self._WriteSerializedAttributeContainerList(u'event_tag') | |
2203 | 2144 |
2204 def AddEventTags(self, event_tags): | 2145 def AddEventTags(self, event_tags): |
2205 """Adds event tags. | 2146 """Adds event tags. |
2206 | 2147 |
2207 Args: | 2148 Args: |
2208 event_tags (list[EventTag]): event tags. | 2149 event_tags (list[EventTag]): event tags. |
2209 | 2150 |
2210 Raises: | 2151 Raises: |
2211 IOError: when the storage file is closed or read-only or | 2152 IOError: when the storage file is closed or read-only or |
2212 if the stream cannot be opened. | 2153 if the event tags cannot be serialized. |
2213 """ | 2154 """ |
2214 if not self._is_open: | 2155 self._RaiseIfNotWritable() |
2215 raise IOError(u'Unable to write to closed storage file.') | |
2216 | |
2217 if self._read_only: | |
2218 raise IOError(u'Unable to write to read-only storage file.') | |
2219 | 2156 |
2220 for event_tag in event_tags: | 2157 for event_tag in event_tags: |
2221 self.AddEventTag(event_tag) | 2158 self.AddEventTag(event_tag) |
2222 | 2159 |
2223 @classmethod | 2160 @classmethod |
2224 def CheckSupportedFormat(cls, path): | 2161 def CheckSupportedFormat(cls, path): |
2225 """Checks is the storage file format is supported. | 2162 """Checks is the storage file format is supported. |
2226 | 2163 |
2227 Args: | 2164 Args: |
2228 path (str): path to the storage file. | 2165 path (str): path to the storage file. |
2229 | 2166 |
2230 Returns: | 2167 Returns: |
2231 bool: True if the format is supported. | 2168 bool: True if the format is supported. |
2232 """ | 2169 """ |
2233 try: | 2170 try: |
2234 zip_file = zipfile.ZipFile( | 2171 zip_file = zipfile.ZipFile( |
2235 path, mode=u'r', compression=zipfile.ZIP_DEFLATED, allowZip64=True) | 2172 path, mode=u'r', compression=zipfile.ZIP_DEFLATED, allowZip64=True) |
2236 | 2173 |
2237 # TODO: check metadata. | 2174 with zip_file.open(u'metadata.txt', mode=u'r') as file_object: |
| 2175 stream_data = file_object.read() |
| 2176 |
| 2177 storage_metadata_reader = _StorageMetadataReader() |
| 2178 storage_metadata = storage_metadata_reader.Read(stream_data) |
| 2179 |
| 2180 cls._CheckStorageMetadata(storage_metadata) |
2238 | 2181 |
2239 zip_file.close() | 2182 zip_file.close() |
2240 result = True | 2183 result = True |
2241 | 2184 |
2242 except zipfile.BadZipfile: | 2185 except (IOError, KeyError, zipfile.BadZipfile): |
2243 result = False | 2186 result = False |
2244 | 2187 |
2245 return result | 2188 return result |
2246 | 2189 |
2247 def Close(self): | 2190 def Close(self): |
2248 """Closes the storage file. | 2191 """Closes the storage file. |
2249 | 2192 |
2250 Buffered attribute containers are written to file. | 2193 Buffered attribute containers are written to file. |
2251 | 2194 |
2252 Raises: | 2195 Raises: |
2253 IOError: if the storage file is already closed, | 2196 IOError: if the storage file is already closed, |
2254 if the event source cannot be serialized or | 2197 if the event source cannot be serialized or |
2255 if the storage file cannot be closed. | 2198 if the storage file cannot be closed. |
2256 """ | 2199 """ |
2257 if not self._is_open: | 2200 if not self._is_open: |
2258 raise IOError(u'Storage file already closed.') | 2201 raise IOError(u'Storage file already closed.') |
2259 | 2202 |
2260 if not self._read_only: | 2203 if not self._read_only: |
2261 self.Flush() | 2204 self.Flush() |
2262 | 2205 |
2263 if self._serializers_profiler: | 2206 if self._serializers_profiler: |
2264 self._serializers_profiler.Write() | 2207 self._serializers_profiler.Write() |
2265 | 2208 |
2266 # Make sure to flush the caches so that zipfile can be closed and freed. | 2209 # Make sure to flush the caches so that zipfile can be closed and freed. |
2267 # Otherwise on Windows the ZIP file remains locked and cannot be renamed. | 2210 # Otherwise on Windows the ZIP file remains locked and cannot be renamed. |
2268 | 2211 |
2269 self._event_offset_tables = {} | 2212 self._offset_tables = {} |
2270 self._event_offset_tables_lfu = [] | 2213 self._offset_tables_lfu = [] |
2271 self._event_streams = {} | |
2272 | 2214 |
2273 self._event_source_offset_tables = [] | 2215 self._streams = {} |
2274 self._event_source_offset_tables_lfu = [] | 2216 self._streams_lfu = [] |
2275 self._event_source_streams = {} | |
2276 | |
2277 self._event_tag_offset_tables = [] | |
2278 self._event_tag_offset_tables_lfu = [] | |
2279 self._event_tag_streams = {} | |
2280 | 2217 |
2281 self._event_timestamp_tables = {} | 2218 self._event_timestamp_tables = {} |
2282 self._event_timestamp_tables_lfu = [] | 2219 self._event_timestamp_tables_lfu = [] |
2283 | 2220 |
2284 self._zipfile.close() | 2221 self._zipfile.close() |
2285 self._zipfile = None | 2222 self._zipfile = None |
2286 self._is_open = False | 2223 self._is_open = False |
2287 | 2224 |
2288 file_renamed = False | 2225 file_renamed = False |
2289 if self._path != self._zipfile_path and os.path.exists(self._zipfile_path): | 2226 if self._path != self._zipfile_path and os.path.exists(self._zipfile_path): |
2290 # On Windows the file can sometimes be still in use and we have to wait. | 2227 # On Windows the file can sometimes be still in use and we have to wait. |
2291 for attempt in range(1, self._MAXIMUM_NUMBER_OF_LOCKED_FILE_ATTEMPTS): | 2228 for attempt in range(1, self._MAXIMUM_NUMBER_OF_LOCKED_FILE_ATTEMPTS): |
2292 try: | 2229 try: |
2293 os.rename(self._zipfile_path, self._path) | 2230 os.rename(self._zipfile_path, self._path) |
2294 file_renamed = True | 2231 file_renamed = True |
2295 break | 2232 break |
2296 | 2233 |
2297 except OSError: | 2234 except OSError: |
2298 if attempt == self._MAXIMUM_NUMBER_OF_LOCKED_FILE_ATTEMPTS: | 2235 if attempt == self._MAXIMUM_NUMBER_OF_LOCKED_FILE_ATTEMPTS: |
2299 raise | 2236 raise |
2300 time.sleep(self._LOCKED_FILE_SLEEP_TIME) | 2237 time.sleep(self._LOCKED_FILE_SLEEP_TIME) |
2301 | 2238 |
2302 if file_renamed: | |
2303 directory_name = os.path.dirname(self._zipfile_path) | |
2304 os.rmdir(directory_name) | |
2305 | |
2306 self._path = None | 2239 self._path = None |
2307 self._zipfile_path = None | 2240 self._zipfile_path = None |
2308 | 2241 |
2309 if self._path != self._zipfile_path and not file_renamed: | 2242 if self._path != self._zipfile_path and not file_renamed: |
2310 raise IOError(u'Unable to close storage file.') | 2243 raise IOError(u'Unable to close storage file.') |
2311 | 2244 |
2312 def Flush(self): | 2245 def Flush(self): |
2313 """Forces the serialized attribute containers to be written to file. | 2246 """Forces the serialized attribute containers to be written to file. |
2314 | 2247 |
2315 Raises: | 2248 Raises: |
2316 IOError: when trying to write to a closed storage file or | 2249 IOError: when trying to write to a closed storage file or |
2317 if the event source cannot be serialized. | 2250 if the event source cannot be serialized. |
2318 """ | 2251 """ |
2319 if not self._is_open: | 2252 if not self._is_open: |
2320 raise IOError(u'Unable to flush a closed storage file.') | 2253 raise IOError(u'Unable to flush a closed storage file.') |
2321 | 2254 |
2322 if not self._read_only: | 2255 if not self._read_only: |
2323 self._WriteSerializedAttributeContainerList(u'event_source') | 2256 self._WriteSerializedAttributeContainerList(u'event_source') |
| 2257 self._WriteSerializedAttributeContainerList(u'event_data') |
2324 self._WriteSerializedEvents() | 2258 self._WriteSerializedEvents() |
2325 self._WriteSerializedAttributeContainerList(u'event_tag') | 2259 self._WriteSerializedAttributeContainerList(u'event_tag') |
2326 self._WriteSerializedAttributeContainerList(u'extraction_error') | 2260 self._WriteSerializedAttributeContainerList(u'extraction_error') |
2327 | 2261 |
2328 def GetAnalysisReports(self): | 2262 def GetAnalysisReports(self): |
2329 """Retrieves the analysis reports. | 2263 """Retrieves the analysis reports. |
2330 | 2264 |
2331 Yields: | 2265 Yields: |
2332 AnalysisReport: analysis report. | 2266 AnalysisReport: analysis report. |
2333 | 2267 |
2334 Raises: | 2268 Raises: |
2335 IOError: if the stream cannot be opened. | 2269 IOError: if the stream cannot be opened. |
2336 """ | 2270 """ |
2337 for stream_name in self._GetStreamNames(): | 2271 for stream_name in self._GetStreamNames(): |
2338 if not stream_name.startswith(u'analysis_report_data.'): | 2272 if not stream_name.startswith(u'analysis_report_data.'): |
2339 continue | 2273 continue |
2340 | 2274 |
2341 data_stream = _SerializedDataStream( | 2275 data_stream = _SerializedDataStream( |
2342 self._zipfile, self._zipfile_path, stream_name) | 2276 self._zipfile, self._temporary_path, stream_name) |
2343 | 2277 |
2344 for analysis_report in self._ReadAttributeContainersFromStream( | 2278 for analysis_report in self._ReadAttributeContainersFromStream( |
2345 data_stream, u'analysis_report'): | 2279 data_stream, u'analysis_report'): |
2346 # TODO: add SetIdentifier. | 2280 # TODO: add SetIdentifier. |
2347 yield analysis_report | 2281 yield analysis_report |
2348 | 2282 |
2349 def GetErrors(self): | 2283 def GetErrors(self): |
2350 """Retrieves the errors. | 2284 """Retrieves the errors. |
2351 | 2285 |
2352 Yields: | 2286 Returns: |
2353 ExtractionError: error. | 2287 generator(ExtractionError): error generator. |
| 2288 """ |
| 2289 return self._GetAttributeContainers(u'extraction_error') |
2354 | 2290 |
2355 Raises: | 2291 def GetEventData(self): |
2356 IOError: if a stream is missing. | 2292 """Retrieves the event data. |
| 2293 |
| 2294 Returns: |
| 2295 generator(EventData): event data generator. |
2357 """ | 2296 """ |
2358 error_stream_number = self._stream_numbers[u'extraction_error'] | 2297 return self._GetAttributeContainers(u'event_data') |
2359 | 2298 |
2360 for stream_number in range(1, error_stream_number): | 2299 def GetEventDataByIdentifier(self, identifier): |
2361 stream_name = u'error_data.{0:06}'.format(stream_number) | 2300 """Retrieves specific event data. |
2362 if not self._HasStream(stream_name): | |
2363 raise IOError(u'No such stream: {0:s}'.format(stream_name)) | |
2364 | 2301 |
2365 data_stream = _SerializedDataStream( | 2302 Args: |
2366 self._zipfile, self._zipfile_path, stream_name) | 2303 identifier (SerializedStreamIdentifier): event data identifier. |
2367 | 2304 |
2368 generator = self._ReadAttributeContainersFromStream(data_stream, u'error') | 2305 Returns: |
2369 for entry_index, error in enumerate(generator): | 2306 EventData: event data or None if not available. |
2370 error_identifier = identifiers.SerializedStreamIdentifier( | 2307 """ |
2371 stream_number, entry_index) | 2308 return self._GetAttributeContainer( |
2372 error.SetIdentifier(error_identifier) | 2309 u'event_data', identifier.stream_number, |
2373 yield error | 2310 entry_index=identifier.entry_index) |
2374 | 2311 |
2375 def GetEvents(self): | 2312 def GetEvents(self): |
2376 """Retrieves the events. | 2313 """Retrieves the events. |
2377 | 2314 |
2378 Yields: | 2315 Yields: |
2379 EventObject: event. | 2316 EventObject: event. |
2380 """ | 2317 """ |
2381 for stream_number in range(1, self._event_stream_number): | 2318 for stream_number in range(1, self._last_stream_numbers[u'event']): |
2382 stream_name = u'event_data.{0:06}'.format(stream_number) | 2319 stream_name = u'event_data.{0:06}'.format(stream_number) |
2383 if not self._HasStream(stream_name): | 2320 if not self._HasStream(stream_name): |
2384 raise IOError(u'No such stream: {0:s}'.format(stream_name)) | 2321 raise IOError(u'No such stream: {0:s}'.format(stream_name)) |
2385 | 2322 |
2386 data_stream = _SerializedDataStream( | 2323 data_stream = _SerializedDataStream( |
2387 self._zipfile, self._zipfile_path, stream_name) | 2324 self._zipfile, self._temporary_path, stream_name) |
2388 | 2325 |
2389 generator = self._ReadAttributeContainersFromStream( | 2326 generator = self._ReadAttributeContainersFromStream( |
2390 data_stream, u'event') | 2327 data_stream, u'event') |
2391 for entry_index, event in enumerate(generator): | 2328 for entry_index, event in enumerate(generator): |
2392 event_identifier = identifiers.SerializedStreamIdentifier( | 2329 event_identifier = identifiers.SerializedStreamIdentifier( |
2393 stream_number, entry_index) | 2330 stream_number, entry_index) |
2394 event.SetIdentifier(event_identifier) | 2331 event.SetIdentifier(event_identifier) |
| 2332 |
| 2333 if (hasattr(event, u'event_data_stream_number') and |
| 2334 hasattr(event, u'event_data_entry_index')): |
| 2335 event_data_identifier = identifiers.SerializedStreamIdentifier( |
| 2336 event.event_data_stream_number, event.event_data_entry_index) |
| 2337 event.SetEventDataIdentifier(event_data_identifier) |
| 2338 |
| 2339 del event.event_data_stream_number |
| 2340 del event.event_data_entry_index |
| 2341 |
2395 yield event | 2342 yield event |
2396 | 2343 |
2397 def GetEventSourceByIndex(self, index): | 2344 def GetEventSourceByIndex(self, index): |
2398 """Retrieves a specific event source. | 2345 """Retrieves a specific event source. |
2399 | 2346 |
2400 Args: | 2347 Args: |
2401 index (int): event source index. | 2348 index (int): event source index. |
2402 | 2349 |
2403 Returns: | 2350 Returns: |
2404 EventSource: event source. | 2351 EventSource: event source. |
2405 | |
2406 Raises: | |
2407 IOError: if a stream is missing. | |
2408 """ | 2352 """ |
2409 event_source_stream_number = self._stream_numbers[u'event_source'] | 2353 return self._GetAttributeContainerByIndex(u'event_source', index) |
2410 | |
2411 stream_number = 1 | |
2412 while stream_number < event_source_stream_number: | |
2413 if stream_number <= len(self._event_sources_in_stream): | |
2414 number_of_event_sources = self._event_sources_in_stream[ | |
2415 stream_number - 1] | |
2416 | |
2417 else: | |
2418 offset_table = self._GetSerializedEventSourceOffsetTable(stream_number) | |
2419 number_of_event_sources = offset_table.number_of_offsets | |
2420 self._event_sources_in_stream.append(number_of_event_sources) | |
2421 | |
2422 if index < number_of_event_sources: | |
2423 break | |
2424 | |
2425 index -= number_of_event_sources | |
2426 stream_number += 1 | |
2427 | |
2428 if stream_number < event_source_stream_number: | |
2429 stream_name = u'event_source_data.{0:06}'.format(stream_number) | |
2430 if not self._HasStream(stream_name): | |
2431 raise IOError(u'No such stream: {0:s}'.format(stream_name)) | |
2432 | |
2433 offset_table = self._GetSerializedEventSourceOffsetTable(stream_number) | |
2434 stream_offset = offset_table.GetOffset(index) | |
2435 | |
2436 data_stream = _SerializedDataStream( | |
2437 self._zipfile, self._zipfile_path, stream_name) | |
2438 data_stream.SeekEntryAtOffset(index, stream_offset) | |
2439 | |
2440 event_source = self._ReadAttributeContainerFromStreamEntry( | |
2441 data_stream, u'event_source') | |
2442 if event_source: | |
2443 event_source_identifier = identifiers.SerializedStreamIdentifier( | |
2444 stream_number, index) | |
2445 event_source.SetIdentifier(event_source_identifier) | |
2446 return event_source | |
2447 | |
2448 serialized_data = self._GetSerializedAttributeContainerByIndex( | |
2449 u'event_source', index) | |
2450 event_source = self._DeserializeAttributeContainer( | |
2451 u'event_source', serialized_data) | |
2452 if event_source: | |
2453 event_source_identifier = identifiers.SerializedStreamIdentifier( | |
2454 stream_number, index) | |
2455 event_source.SetIdentifier(event_source_identifier) | |
2456 return event_source | |
2457 | 2354 |
2458 def GetEventSources(self): | 2355 def GetEventSources(self): |
2459 """Retrieves the event sources. | 2356 """Retrieves the event sources. |
2460 | 2357 |
2461 Yields: | 2358 Returns: |
2462 EventSource: event source. | 2359 generator(EventSource): event source generator. |
2463 | |
2464 Raises: | |
2465 IOError: if a stream is missing. | |
2466 """ | 2360 """ |
2467 event_source_stream_number = self._stream_numbers[u'event_source'] | 2361 return self._GetAttributeContainers(u'event_source') |
2468 | |
2469 for stream_number in range(1, event_source_stream_number): | |
2470 stream_name = u'event_source_data.{0:06}'.format(stream_number) | |
2471 if not self._HasStream(stream_name): | |
2472 raise IOError(u'No such stream: {0:s}'.format(stream_name)) | |
2473 | |
2474 data_stream = _SerializedDataStream( | |
2475 self._zipfile, self._zipfile_path, stream_name) | |
2476 | |
2477 generator = self._ReadAttributeContainersFromStream( | |
2478 data_stream, u'event_source') | |
2479 for entry_index, event_source in enumerate(generator): | |
2480 event_source_identifier = identifiers.SerializedStreamIdentifier( | |
2481 stream_number, entry_index) | |
2482 event_source.SetIdentifier(event_source_identifier) | |
2483 yield event_source | |
2484 | 2362 |
2485 def GetEventTags(self): | 2363 def GetEventTags(self): |
2486 """Retrieves the event tags. | 2364 """Retrieves the event tags. |
2487 | 2365 |
2488 Yields: | 2366 Returns: |
2489 EventTag: event tag. | 2367 generator(EventTag): event tag generator. |
| 2368 """ |
| 2369 for event_tag in self._GetAttributeContainers(u'event_tag'): |
| 2370 event_identifier = identifiers.SerializedStreamIdentifier( |
| 2371 event_tag.event_stream_number, event_tag.event_entry_index) |
| 2372 event_tag.SetEventIdentifier(event_identifier) |
2490 | 2373 |
2491 Raises: | 2374 del event_tag.event_stream_number |
2492 IOError: if a stream is missing. | 2375 del event_tag.event_entry_index |
2493 """ | |
2494 event_tag_stream_number = self._stream_numbers[u'event_tag'] | |
2495 | 2376 |
2496 for stream_number in range(1, event_tag_stream_number): | 2377 yield event_tag |
2497 stream_name = u'event_tag_data.{0:06}'.format(stream_number) | |
2498 if not self._HasStream(stream_name): | |
2499 raise IOError(u'No such stream: {0:s}'.format(stream_name)) | |
2500 | |
2501 data_stream = _SerializedDataStream( | |
2502 self._zipfile, self._zipfile_path, stream_name) | |
2503 | |
2504 generator = self._ReadAttributeContainersFromStream( | |
2505 data_stream, u'event_tag') | |
2506 for entry_index, event_tag in enumerate(generator): | |
2507 event_tag_identifier = identifiers.SerializedStreamIdentifier( | |
2508 stream_number, entry_index) | |
2509 event_tag.SetIdentifier(event_tag_identifier) | |
2510 | |
2511 event_identifier = identifiers.SerializedStreamIdentifier( | |
2512 event_tag.event_stream_number, event_tag.event_entry_index) | |
2513 event_tag.SetEventIdentifier(event_identifier) | |
2514 | |
2515 yield event_tag | |
2516 | 2378 |
2517 def GetNumberOfAnalysisReports(self): | 2379 def GetNumberOfAnalysisReports(self): |
2518 """Retrieves the number analysis reports. | 2380 """Retrieves the number analysis reports. |
2519 | 2381 |
2520 Returns: | 2382 Returns: |
2521 int: number of analysis reports. | 2383 int: number of analysis reports. |
2522 """ | 2384 """ |
2523 return self._analysis_report_stream_number - 1 | 2385 return self._analysis_report_stream_number - 1 |
2524 | 2386 |
2525 def GetNumberOfEventSources(self): | 2387 def GetNumberOfEventSources(self): |
2526 """Retrieves the number event sources. | 2388 """Retrieves the number event sources. |
2527 | 2389 |
2528 Returns: | 2390 Returns: |
2529 int: number of event sources. | 2391 int: number of event sources. |
2530 """ | 2392 """ |
2531 event_source_stream_number = self._stream_numbers[u'event_source'] | 2393 event_source_stream_number = self._last_stream_numbers[u'event_source'] |
2532 | 2394 |
2533 number_of_event_sources = 0 | 2395 number_of_event_sources = 0 |
2534 for stream_number in range(1, event_source_stream_number): | 2396 for stream_number in range(1, event_source_stream_number): |
2535 offset_table = self._GetSerializedEventSourceOffsetTable(stream_number) | 2397 offset_table = self._GetSerializedDataOffsetTable( |
| 2398 u'event_source', stream_number) |
| 2399 |
2536 number_of_event_sources += offset_table.number_of_offsets | 2400 number_of_event_sources += offset_table.number_of_offsets |
2537 | 2401 |
2538 number_of_event_sources += self._GetNumberOfSerializedAttributeContainers( | 2402 number_of_event_sources += self._GetNumberOfSerializedAttributeContainers( |
2539 u'event_sources') | 2403 u'event_sources') |
2540 return number_of_event_sources | 2404 return number_of_event_sources |
2541 | 2405 |
2542 def GetSessions(self): | 2406 def GetSessions(self): |
2543 """Retrieves the sessions. | 2407 """Retrieves the sessions. |
2544 | 2408 |
2545 Yields: | 2409 Yields: |
2546 Session: session attribute container. | 2410 Session: session attribute container. |
2547 | 2411 |
2548 Raises: | 2412 Raises: |
2549 IOError: if a stream is missing or there is a mismatch in session | 2413 IOError: if a stream is missing or there is a mismatch in session |
2550 identifiers between the session start and completion attribute | 2414 identifiers between the session start and completion attribute |
2551 containers. | 2415 containers. |
2552 """ | 2416 """ |
2553 for stream_number in range(1, self._last_session): | 2417 for stream_number in range(1, self._last_session): |
2554 stream_name = u'session_start.{0:06d}'.format(stream_number) | 2418 stream_name = u'session_start.{0:06d}'.format(stream_number) |
2555 if not self._HasStream(stream_name): | 2419 if not self._HasStream(stream_name): |
2556 raise IOError(u'No such stream: {0:s}'.format(stream_name)) | 2420 raise IOError(u'No such stream: {0:s}'.format(stream_name)) |
2557 | 2421 |
2558 data_stream = _SerializedDataStream( | 2422 data_stream = _SerializedDataStream( |
2559 self._zipfile, self._zipfile_path, stream_name) | 2423 self._zipfile, self._temporary_path, stream_name) |
2560 | 2424 |
2561 session_start = self._ReadAttributeContainerFromStreamEntry( | 2425 session_start = self._ReadAttributeContainerFromStreamEntry( |
2562 data_stream, u'session_start') | 2426 data_stream, u'session_start') |
2563 | 2427 |
2564 session_completion = None | 2428 session_completion = None |
2565 stream_name = u'session_completion.{0:06d}'.format(stream_number) | 2429 stream_name = u'session_completion.{0:06d}'.format(stream_number) |
2566 if self._HasStream(stream_name): | 2430 if self._HasStream(stream_name): |
2567 data_stream = _SerializedDataStream( | 2431 data_stream = _SerializedDataStream( |
2568 self._zipfile, self._zipfile_path, stream_name) | 2432 self._zipfile, self._temporary_path, stream_name) |
2569 | 2433 |
2570 session_completion = self._ReadAttributeContainerFromStreamEntry( | 2434 session_completion = self._ReadAttributeContainerFromStreamEntry( |
2571 data_stream, u'session_completion') | 2435 data_stream, u'session_completion') |
2572 | 2436 |
2573 session = sessions.Session() | 2437 session = sessions.Session() |
2574 session.CopyAttributesFromSessionStart(session_start) | 2438 session.CopyAttributesFromSessionStart(session_start) |
2575 if session_completion: | 2439 if session_completion: |
2576 try: | 2440 try: |
2577 session.CopyAttributesFromSessionCompletion(session_completion) | 2441 session.CopyAttributesFromSessionCompletion(session_completion) |
2578 except ValueError: | 2442 except ValueError: |
(...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2667 Args: | 2531 Args: |
2668 knowledge_base (KnowledgeBase): is used to store the preprocessing | 2532 knowledge_base (KnowledgeBase): is used to store the preprocessing |
2669 information. | 2533 information. |
2670 """ | 2534 """ |
2671 for stream_number in range(1, self._last_preprocess): | 2535 for stream_number in range(1, self._last_preprocess): |
2672 stream_name = u'preprocess.{0:06d}'.format(stream_number) | 2536 stream_name = u'preprocess.{0:06d}'.format(stream_number) |
2673 if not self._HasStream(stream_name): | 2537 if not self._HasStream(stream_name): |
2674 raise IOError(u'No such stream: {0:s}'.format(stream_name)) | 2538 raise IOError(u'No such stream: {0:s}'.format(stream_name)) |
2675 | 2539 |
2676 data_stream = _SerializedDataStream( | 2540 data_stream = _SerializedDataStream( |
2677 self._zipfile, self._zipfile_path, stream_name) | 2541 self._zipfile, self._temporary_path, stream_name) |
2678 | 2542 |
2679 system_configuration = self._ReadAttributeContainerFromStreamEntry( | 2543 system_configuration = self._ReadAttributeContainerFromStreamEntry( |
2680 data_stream, u'preprocess') | 2544 data_stream, u'preprocess') |
2681 | 2545 |
2682 # TODO: replace stream_number by session_identifier. | 2546 # TODO: replace stream_number by session_identifier. |
2683 knowledge_base.ReadSystemConfigurationArtifact( | 2547 knowledge_base.ReadSystemConfigurationArtifact( |
2684 system_configuration, session_identifier=stream_number) | 2548 system_configuration, session_identifier=stream_number) |
2685 | 2549 |
2686 def WritePreprocessingInformation(self, knowledge_base): | 2550 def WritePreprocessingInformation(self, knowledge_base): |
2687 """Writes preprocessing information. | 2551 """Writes preprocessing information. |
2688 | 2552 |
2689 Args: | 2553 Args: |
2690 knowledge_base (KnowledgeBase): contains the preprocessing information. | 2554 knowledge_base (KnowledgeBase): contains the preprocessing information. |
2691 | 2555 |
2692 Raises: | 2556 Raises: |
2693 IOError: if the storage type does not support writing preprocess | 2557 IOError: if the storage type does not support writing preprocess |
2694 information or the storage file is closed or read-only or | 2558 information or the storage file is closed or read-only or |
2695 if the preprocess information stream already exists. | 2559 if the preprocess information stream already exists. |
2696 """ | 2560 """ |
2697 if not self._is_open: | 2561 self._RaiseIfNotWritable() |
2698 raise IOError(u'Unable to write to closed storage file.') | |
2699 | |
2700 if self._read_only: | |
2701 raise IOError(u'Unable to write to read-only storage file.') | |
2702 | 2562 |
2703 if self.storage_type != definitions.STORAGE_TYPE_SESSION: | 2563 if self.storage_type != definitions.STORAGE_TYPE_SESSION: |
2704 raise IOError(u'Preprocess information not supported by storage type.') | 2564 raise IOError(u'Preprocess information not supported by storage type.') |
2705 | 2565 |
2706 stream_name = u'preprocess.{0:06d}'.format(self._last_preprocess) | 2566 stream_name = u'preprocess.{0:06d}'.format(self._last_preprocess) |
2707 if self._HasStream(stream_name): | 2567 if self._HasStream(stream_name): |
2708 raise IOError(u'preprocess information: {0:06d} already exists.'.format( | 2568 raise IOError(u'preprocess information: {0:06d} already exists.'.format( |
2709 self._last_preprocess)) | 2569 self._last_preprocess)) |
2710 | 2570 |
2711 system_configuration = knowledge_base.GetSystemConfigurationArtifact() | 2571 system_configuration = knowledge_base.GetSystemConfigurationArtifact() |
2712 | 2572 |
2713 preprocess_data = self._SerializeAttributeContainer(system_configuration) | 2573 preprocess_data = self._SerializeAttributeContainer(system_configuration) |
2714 | 2574 |
2715 data_stream = _SerializedDataStream( | 2575 data_stream = _SerializedDataStream( |
2716 self._zipfile, self._zipfile_path, stream_name) | 2576 self._zipfile, self._temporary_path, stream_name) |
2717 data_stream.WriteInitialize() | 2577 data_stream.WriteInitialize() |
2718 data_stream.WriteEntry(preprocess_data) | 2578 data_stream.WriteEntry(preprocess_data) |
2719 data_stream.WriteFinalize() | 2579 data_stream.WriteFinalize() |
2720 | 2580 |
2721 def WriteSessionCompletion(self, session_completion): | 2581 def WriteSessionCompletion(self, session_completion): |
2722 """Writes session completion information. | 2582 """Writes session completion information. |
2723 | 2583 |
2724 Args: | 2584 Args: |
2725 session_completion (SessionCompletion): session completion information. | 2585 session_completion (SessionCompletion): session completion information. |
2726 | 2586 |
2727 Raises: | 2587 Raises: |
2728 IOError: when the storage file is closed or read-only. | 2588 IOError: when the storage file is closed or read-only. |
2729 """ | 2589 """ |
2730 if not self._is_open: | 2590 self._RaiseIfNotWritable() |
2731 raise IOError(u'Unable to write to closed storage file.') | |
2732 | |
2733 if self._read_only: | |
2734 raise IOError(u'Unable to write to read-only storage file.') | |
2735 | 2591 |
2736 self.Flush() | 2592 self.Flush() |
2737 | 2593 |
2738 self._WriteSessionCompletion(session_completion) | 2594 self._WriteSessionCompletion(session_completion) |
2739 self._last_session += 1 | 2595 self._last_session += 1 |
2740 | 2596 |
2741 def WriteSessionStart(self, session_start): | 2597 def WriteSessionStart(self, session_start): |
2742 """Writes session start information. | 2598 """Writes session start information. |
2743 | 2599 |
2744 Args: | 2600 Args: |
2745 session_start (SessionStart): session start information. | 2601 session_start (SessionStart): session start information. |
2746 | 2602 |
2747 Raises: | 2603 Raises: |
2748 IOError: when the storage file is closed or read-only. | 2604 IOError: when the storage file is closed or read-only. |
2749 """ | 2605 """ |
2750 if not self._is_open: | 2606 self._RaiseIfNotWritable() |
2751 raise IOError(u'Unable to write to closed storage file.') | |
2752 | |
2753 if self._read_only: | |
2754 raise IOError(u'Unable to write to read-only storage file.') | |
2755 | 2607 |
2756 self._WriteSessionStart(session_start) | 2608 self._WriteSessionStart(session_start) |
2757 | 2609 |
2758 def WriteTaskCompletion(self, task_completion): | 2610 def WriteTaskCompletion(self, task_completion): |
2759 """Writes task completion information. | 2611 """Writes task completion information. |
2760 | 2612 |
2761 Args: | 2613 Args: |
2762 task_completion (TaskCompletion): task completion information. | 2614 task_completion (TaskCompletion): task completion information. |
2763 | 2615 |
2764 Raises: | 2616 Raises: |
2765 IOError: when the storage file is closed or read-only. | 2617 IOError: when the storage file is closed or read-only. |
2766 """ | 2618 """ |
2767 if not self._is_open: | 2619 self._RaiseIfNotWritable() |
2768 raise IOError(u'Unable to write to closed storage file.') | |
2769 | |
2770 if self._read_only: | |
2771 raise IOError(u'Unable to write to read-only storage file.') | |
2772 | 2620 |
2773 self.Flush() | 2621 self.Flush() |
2774 | 2622 |
2775 self._WriteTaskCompletion(task_completion) | 2623 self._WriteTaskCompletion(task_completion) |
2776 | 2624 |
2777 def WriteTaskStart(self, task_start): | 2625 def WriteTaskStart(self, task_start): |
2778 """Writes task start information. | 2626 """Writes task start information. |
2779 | 2627 |
2780 Args: | 2628 Args: |
2781 task_start (TaskStart): task start information. | 2629 task_start (TaskStart): task start information. |
2782 | 2630 |
2783 Raises: | 2631 Raises: |
2784 IOError: when the storage file is closed or read-only. | 2632 IOError: when the storage file is closed or read-only. |
2785 """ | 2633 """ |
2786 if not self._is_open: | 2634 self._RaiseIfNotWritable() |
2787 raise IOError(u'Unable to write to closed storage file.') | |
2788 | |
2789 if self._read_only: | |
2790 raise IOError(u'Unable to write to read-only storage file.') | |
2791 | 2635 |
2792 self._WriteTaskStart(task_start) | 2636 self._WriteTaskStart(task_start) |
2793 | 2637 |
2794 | 2638 |
2795 class ZIPStorageFileReader(interface.FileStorageReader): | 2639 class ZIPStorageFileReader(interface.FileStorageReader): |
2796 """ZIP-based storage file reader.""" | 2640 """ZIP-based storage file reader.""" |
2797 | 2641 |
2798 def __init__(self, path): | 2642 def __init__(self, path): |
2799 """Initializes a storage reader. | 2643 """Initializes a storage reader. |
2800 | 2644 |
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2855 Args: | 2699 Args: |
2856 path (str): path to the storage file. | 2700 path (str): path to the storage file. |
2857 task (Task): task. | 2701 task (Task): task. |
2858 | 2702 |
2859 Returns: | 2703 Returns: |
2860 StorageWriter: storage writer. | 2704 StorageWriter: storage writer. |
2861 """ | 2705 """ |
2862 return ZIPStorageFileWriter( | 2706 return ZIPStorageFileWriter( |
2863 self._session, path, buffer_size=self._buffer_size, | 2707 self._session, path, buffer_size=self._buffer_size, |
2864 storage_type=definitions.STORAGE_TYPE_TASK, task=task) | 2708 storage_type=definitions.STORAGE_TYPE_TASK, task=task) |
OLD | NEW |