Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(1)

Delta Between Two Patch Sets: MoinMoin/storage/middleware/indexing.py

Issue 6423063: userheads
Left Patch Set: Created 12 years, 8 months ago
Right Patch Set: Created 12 years, 8 months ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
LEFTRIGHT
1 # Copyright: 2011 MoinMoin:RonnyPfannschmidt 1 # Copyright: 2011 MoinMoin:RonnyPfannschmidt
2 # Copyright: 2011 MoinMoin:ThomasWaldmann 2 # Copyright: 2011 MoinMoin:ThomasWaldmann
3 # Copyright: 2011 MoinMoin:MichaelMayorov 3 # Copyright: 2011 MoinMoin:MichaelMayorov
4 # License: GNU GPL v2 (or any later version), see LICENSE.txt for details. 4 # License: GNU GPL v2 (or any later version), see LICENSE.txt for details.
5 5
6 """ 6 """
7 MoinMoin - indexing middleware 7 MoinMoin - indexing middleware
8 8
9 The backends and stores moin uses are rather simple, it is mostly just a 9 The backends and stores moin uses are rather simple, it is mostly just a
10 unsorted / unordered bunch of revisions (meta and data) with iteration. 10 unsorted / unordered bunch of revisions (meta and data) with iteration.
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after
72 from whoosh.query import Every, Term 72 from whoosh.query import Every, Term
73 from whoosh.sorting import FieldFacet 73 from whoosh.sorting import FieldFacet
74 74
75 from MoinMoin import log 75 from MoinMoin import log
76 logging = log.getLogger(__name__) 76 logging = log.getLogger(__name__)
77 77
78 from MoinMoin.config import WIKINAME, NAME, NAME_EXACT, MTIME, CONTENTTYPE, TAGS , \ 78 from MoinMoin.config import WIKINAME, NAME, NAME_EXACT, MTIME, CONTENTTYPE, TAGS , \
79 LANGUAGE, USERID, ADDRESS, HOSTNAME, SIZE, ACTION, C OMMENT, SUMMARY, \ 79 LANGUAGE, USERID, ADDRESS, HOSTNAME, SIZE, ACTION, C OMMENT, SUMMARY, \
80 CONTENT, EXTERNALLINKS, ITEMLINKS, ITEMTRANSCLUSIONS , ACL, EMAIL, OPENID, \ 80 CONTENT, EXTERNALLINKS, ITEMLINKS, ITEMTRANSCLUSIONS , ACL, EMAIL, OPENID, \
81 ITEMID, REVID, CURRENT, PARENTID, \ 81 ITEMID, REVID, CURRENT, PARENTID, \
82 LATEST_REVS, ALL_REVS, BRANCHES, USERHEADS, \ 82 ALL_REVS, BRANCHES, \
83 CONTENTTYPE_USER, \ 83 CONTENTTYPE_USER, \
84 BRANCH_ID, BRANCH_ITEMID, BRANCH_NAME, BRANCH_REVID, \ 84 BRANCH_ID, BRANCH_ITEMID, BRANCH_SRC, BRANCH_DST, \
85 BRANCH_TYPE, \ 85 BRANCH_TYPE, MASTER_BRANCH, \
86 UH_ID, UH_ITEMID, UH_USER, UH_POINTER 86 UH_ID, UH_ITEMID, UH_USER, UH_POINTER, \
87 BRANCH, TAG, USERHEAD
88
87 from MoinMoin.constants import keys 89 from MoinMoin.constants import keys
88 90
89 from MoinMoin import user 91 from MoinMoin import user
90 from MoinMoin.search.analyzers import item_name_analyzer, MimeTokenizer, AclToke nizer 92 from MoinMoin.search.analyzers import item_name_analyzer, MimeTokenizer, AclToke nizer
91 from MoinMoin.themes import utctimestamp 93 from MoinMoin.themes import utctimestamp
92 from MoinMoin.util.crypto import make_uuid 94 from MoinMoin.util.crypto import make_uuid, UUID_LEN
93 from MoinMoin.storage.middleware.validation import ContentMetaSchema, UserMetaSc hema 95 from MoinMoin.storage.middleware.validation import ContentMetaSchema, UserMetaSc hema
94 from MoinMoin.storage.error import NoSuchItemError, ItemAlreadyExistsError 96 from MoinMoin.storage.error import NoSuchItemError, ItemAlreadyExistsError
95 97
96 98
97 INDEXES = [LATEST_REVS, ALL_REVS, BRANCHES, USERHEADS] 99 INDEXES = [ALL_REVS, BRANCHES]
98 100
99 101
100 def backend_to_index(meta, content, schema, wikiname): 102 def backend_to_index(meta, content, schema, wikiname):
101 """ 103 """
102 Convert backend metadata/data to a whoosh document. 104 Convert backend metadata/data to a whoosh document.
103 105
104 :param meta: revision meta from moin backend 106 :param meta: revision meta from moin backend
105 :param content: revision data converted to indexable content 107 :param content: revision data converted to indexable content
106 :param schema: whoosh schema 108 :param schema: whoosh schema
107 :param wikiname: interwikiname of this wiki 109 :param wikiname: interwikiname of this wiki
(...skipping 163 matching lines...) Expand 10 before | Expand all | Expand 10 after
271 273
272 # TODO: because there is no "latest_revs" anymore, everything is dumped 274 # TODO: because there is no "latest_revs" anymore, everything is dumped
273 # into "all_revs". Some parts should be moved to branches and userheads 275 # into "all_revs". Some parts should be moved to branches and userheads
274 all_revs_fields = dict() 276 all_revs_fields = dict()
275 all_revs_fields.update(**latest_revs_fields) 277 all_revs_fields.update(**latest_revs_fields)
276 all_revs_fields[ITEMID] = ID(stored=True) 278 all_revs_fields[ITEMID] = ID(stored=True)
277 279
278 branches_fields = { 280 branches_fields = {
279 BRANCH_ID: ID(unique=True, stored=True), 281 BRANCH_ID: ID(unique=True, stored=True),
280 BRANCH_ITEMID: ID(stored=True), 282 BRANCH_ITEMID: ID(stored=True),
281 BRANCH_NAME: ID(stored=True),
282 BRANCH_REVID: ID(stored=True),
283 BRANCH_TYPE: ID(stored=True), 283 BRANCH_TYPE: ID(stored=True),
284 BRANCH_SRC: ID(stored=True),
285 BRANCH_DST: ID(stored=True),
284 } 286 }
285 287
286 userheads_fields = {
287 UH_ID: ID(unique=True, stored=True),
288 UH_ITEMID: ID(stored=True),
289 UH_USER: ID(stored=True),
290 UH_POINTER: ID(stored=True),
291 }
292
293 latest_revisions_schema = Schema(**latest_revs_fields)
294 all_revisions_schema = Schema(**all_revs_fields) 288 all_revisions_schema = Schema(**all_revs_fields)
295 branches_schema = Schema(**branches_fields) 289 branches_schema = Schema(**branches_fields)
296 userheads_schema = Schema(**userheads_fields)
297 290
298 # Define dynamic fields 291 # Define dynamic fields
299 dynamic_fields = [("*_id", ID(stored=True)), 292 dynamic_fields = [("*_id", ID(stored=True)),
300 ("*_text", TEXT(stored=True)), 293 ("*_text", TEXT(stored=True)),
301 ("*_keyword", KEYWORD(stored=True)), 294 ("*_keyword", KEYWORD(stored=True)),
302 ("*_numeric", NUMERIC(stored=True)), 295 ("*_numeric", NUMERIC(stored=True)),
303 ("*_datetime", DATETIME(stored=True)), 296 ("*_datetime", DATETIME(stored=True)),
304 ("*_boolean", BOOLEAN(stored=True)), 297 ("*_boolean", BOOLEAN(stored=True)),
305 ] 298 ]
306 299
307 # Adding dynamic fields to schemas 300 # Adding dynamic fields to schemas
308 for glob, field_type in dynamic_fields: 301 for glob, field_type in dynamic_fields:
309 latest_revisions_schema.add(glob, field_type, glob=True)
310 all_revisions_schema.add(glob, field_type, glob=True) 302 all_revisions_schema.add(glob, field_type, glob=True)
311 303
312 # schemas are needed by query parser and for index creation 304 # schemas are needed by query parser and for index creation
313 self.schemas[ALL_REVS] = all_revisions_schema 305 self.schemas[ALL_REVS] = all_revisions_schema
314 self.schemas[LATEST_REVS] = latest_revisions_schema
315 self.schemas[BRANCHES] = branches_schema 306 self.schemas[BRANCHES] = branches_schema
316 self.schemas[USERHEADS] = userheads_schema
317 307
318 # what fields could whoosh result documents have (no matter whether all revs index 308 # what fields could whoosh result documents have (no matter whether all revs index
319 # or latest revs index): 309 # or latest revs index):
320 self.common_fields = set(latest_revs_fields.keys()) & set(all_revs_field s.keys()) 310 self.common_fields = set(latest_revs_fields.keys()) & set(all_revs_field s.keys())
321 311
322 def open(self): 312 def open(self):
323 """ 313 """
324 Open all indexes. 314 Open all indexes.
325 """ 315 """
326 index_dir = self.index_dir 316 index_dir = self.index_dir
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after
397 else: 387 else:
398 writer = self.ix[type].writer() 388 writer = self.ix[type].writer()
399 with writer as writer: 389 with writer as writer:
400 writer.update_document(**doc) 390 writer.update_document(**doc)
401 391
402 def remove_revision(self, revid, async=True): 392 def remove_revision(self, revid, async=True):
403 """ 393 """
404 Remove a single revision from indexes. 394 Remove a single revision from indexes.
405 """ 395 """
406 # get branches with the revision which will be removed 396 # get branches with the revision which will be removed
407 with self.ix[LATEST_REVS].searcher() as searcher: 397 with self.ix[ALL_REVS].searcher() as searcher:
408 revision_to_remove = searcher.document(revid=revid) 398 revision_to_remove = searcher.document(revid=revid)
409 if revision_to_remove: 399 if revision_to_remove:
410 parent_revid = revision_to_remove.get(PARENTID, None) 400 parent_revid = revision_to_remove.get(PARENTID, None)
411 with self.ix[BRANCHES].searcher() as searcher: 401 with self.ix[BRANCHES].searcher() as searcher:
412 branches_docs = searcher.documents(revid=revid) 402 branches_docs = searcher.documents(**{BRANCH_DST: revid})
413 with self.ix[BRANCHES].writer() as writer: 403 with self.ix[BRANCHES].writer() as writer:
414 for branch_doc in branches_docs: 404 for branch_doc in branches_docs:
415 branch_doc[BRANCH_REVID] = parent_revid 405 branch_doc[BRANCH_DST] = parent_revid
416 writer.update_document(**branch_doc) 406 writer.update_document(**branch_doc)
417 if async: 407 if async:
418 writer = AsyncWriter(self.ix[ALL_REVS]) 408 writer = AsyncWriter(self.ix[ALL_REVS])
419 else: 409 else:
420 writer = self.ix[ALL_REVS].writer() 410 writer = self.ix[ALL_REVS].writer()
421 with writer as writer: 411 with writer as writer:
422 writer.delete_by_term(REVID, revid) 412 writer.delete_by_term(REVID, revid)
423 413
424 def _modify_branches_index(self, index, schema, wikiname, branches, 414 def _modify_branches_index(self, index, schema, wikiname, data, mode='add',
425 mode='add', procs=1, limitmb=256):
Reimar Bauer 2012/07/20 14:16:34 unicode type
426 return self._modify_special_index(index, schema, wikiname, branches,
427 BRANCH_ID, self.backend.retrieve_branc h,
428 mode, procs, limitmb)
429 def _modify_userheads_index(self, index, schema, wikiname, userheads,
430 mode='add', procs=1, limitmb=256):
431 return self._modify_special_index(index, schema, wikiname, userheads,
432 UH_ID, self.backend.retrieve_userhead,
433 mode, procs, limitmb)
434
435 def _modify_special_index(self, index, schema, wikiname, data, id_field,
436 retrieve_function, mode='add',
437 procs=1, limitmb=256): 415 procs=1, limitmb=256):
438 """ 416 """
439 modify special index, containing simple (non-typed) elements like 417 modify special index, containing simple (non-typed) elements like
440 branches and userheads. 418 branches and userheads.
441 """ 419 """
442 with index.writer(procs=procs, limitmb=limitmb) as writer: 420 with index.writer(procs=procs, limitmb=limitmb) as writer:
443 for dataid in data: 421 for dataid in data:
444 if mode in ['add', 'update', ]: 422 if mode in ['add', 'update', ]:
445 entry = retrieve_function(dataid) 423 entry = self.backend.retrieve_branch(dataid)
446 doc = dict([(str(key), value) 424 doc = dict([(str(key), value)
447 for key, value in entry.items() 425 for key, value in entry.items()
448 if key in schema]) 426 if key in schema])
449 if mode == 'update': 427 if mode == 'update':
450 writer.update_document(**doc) 428 writer.update_document(**doc)
451 elif mode == 'add': 429 elif mode == 'add':
452 writer.add_document(**doc) 430 writer.add_document(**doc)
453 elif mode == 'delete': 431 elif mode == 'delete':
454 writer.delete_by_term(id_field, dataid) 432 writer.delete_by_term(BRANCH_ID, dataid)
455 else: 433 else:
456 raise ValueError("mode must be 'update', 'add' or 'delete', not '{0}'".format(mode)) 434 raise ValueError("mode must be 'update', 'add' or 'delete', not '{0}'".format(mode))
457 435
458 def _modify_index(self, index, schema, wikiname, revids, mode='add', procs=1 , limitmb=256): 436 def _modify_index(self, index, schema, wikiname, revids, mode='add', procs=1 , limitmb=256):
459 """ 437 """
460 modify index contents - add, update, delete the indexed documents for al l given revids 438 modify index contents - add, update, delete the indexed documents for al l given revids
461 439
462 Note: mode == 'add' is faster but you need to make sure to not create du plicate 440 Note: mode == 'add' is faster but you need to make sure to not create du plicate
463 documents in the index. 441 documents in the index.
464 """ 442 """
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
507 """ 485 """
508 index_dir = self.index_dir_tmp if tmp else self.index_dir 486 index_dir = self.index_dir_tmp if tmp else self.index_dir
509 index = open_dir(index_dir, indexname=ALL_REVS) 487 index = open_dir(index_dir, indexname=ALL_REVS)
510 try: 488 try:
511 # build an index of all we have (so we know what we have) 489 # build an index of all we have (so we know what we have)
512 all_revids = self.backend # the backend is an iterator over all revi ds 490 all_revids = self.backend # the backend is an iterator over all revi ds
513 self._modify_index(index, self.schemas[ALL_REVS], self.wikiname, all _revids, 'add', procs, limitmb) 491 self._modify_index(index, self.schemas[ALL_REVS], self.wikiname, all _revids, 'add', procs, limitmb)
514 latest_names_revids = self._find_latest_names_revids(index) 492 latest_names_revids = self._find_latest_names_revids(index)
515 finally: 493 finally:
516 index.close() 494 index.close()
517 # branches and userheads indexes 495 # branches and userheads index
518 index = open_dir(index_dir, indexname=BRANCHES) 496 index = open_dir(index_dir, indexname=BRANCHES)
519 try: 497 try:
520 self._modify_branches_index(index, BRANCHES, self.wikiname, 498 self._modify_branches_index(index, self.schemas[BRANCHES],
ThomasJWaldmann 2012/07/20 13:14:29 you give BRANCHES to schema? that's a little stra
breton 2012/07/21 02:28:57 right, there should be self.schemas[BRANCHES]. //
521 self.backend.branches, 'add', 499 self.wikiname, self.backend.branches,
522 procs, limitmb) 500 'add', procs, limitmb)
523 finally:
524 index.close()
525 index = open_dir(index_dir, indexname=BRANCHES)
ThomasJWaldmann 2012/07/20 13:14:29 same indexname again?
breton 2012/07/21 02:28:57 thanks, fixed. Should definetely return back to th
526 try:
527 self._modify_userheads_index(index, BRANCHES, self.wikiname,
528 self.backend.userheads, 'add',
529 procs, limitmb)
530 finally: 501 finally:
531 index.close() 502 index.close()
532 503
533 def update(self, tmp=False): 504 def update(self, tmp=False):
534 """ 505 """
535 Make sure index reflects current backend state, add missing stuff, remov e outdated stuff. 506 Make sure index reflects current backend state, add missing stuff, remov e outdated stuff.
536 507
537 This is intended to be used: 508 This is intended to be used:
538 * after a full rebuild that was done at tmp location 509 * after a full rebuild that was done at tmp location
539 * after wiki is made read-only or taken offline 510 * after wiki is made read-only or taken offline
(...skipping 22 matching lines...) Expand all
562 del_revids = [(revids_mountpoints[revid], revid) for revid in del_re vids] 533 del_revids = [(revids_mountpoints[revid], revid) for revid in del_re vids]
563 self._modify_index(index_all, self.schemas[ALL_REVS], self.wikiname, add_revids, 'add') 534 self._modify_index(index_all, self.schemas[ALL_REVS], self.wikiname, add_revids, 'add')
564 self._modify_index(index_all, self.schemas[ALL_REVS], self.wikiname, del_revids, 'delete') 535 self._modify_index(index_all, self.schemas[ALL_REVS], self.wikiname, del_revids, 'delete')
565 finally: 536 finally:
566 index_all.close() 537 index_all.close()
567 index_branches = open_dir(index_dir, indexname=BRANCHES) 538 index_branches = open_dir(index_dir, indexname=BRANCHES)
568 try: 539 try:
569 # now update BRANCHES index: 540 # now update BRANCHES index:
570 with index_branches.searcher() as searcher: 541 with index_branches.searcher() as searcher:
571 ix_branchids = set(doc[BRANCH_ID] for doc in searcher.all_stored _fields()) 542 ix_branchids = set(doc[BRANCH_ID] for doc in searcher.all_stored _fields())
572 backend_branchids = set(branchid for branch in self.backend.branches ) 543 backend_branchids = set(branch for branch in self.backend.branches)
573 add_branchids = backend_branchids - ix_branchids 544 add_branchids = backend_branchids - ix_branchids
574 del_branchids = ix_branchids - backend_branchids 545 del_branchids = ix_branchids - backend_branchids
575 changed = changed or add_branchids or del_branchids 546 changed = changed or add_branchids or del_branchids
576 self._modify_branches_index(index_branches, self.schemas[BRANCHES], 547 self._modify_branches_index(index_branches, self.schemas[BRANCHES],
577 self.wikiname, add_branchids, 'add') 548 self.wikiname, add_branchids, 'add')
578 self._modify_branches_index(index_branches, self.schemas[BRANCHES], 549 self._modify_branches_index(index_branches, self.schemas[BRANCHES],
579 self.wikiname, del_branchids, 'delete') 550 self.wikiname, del_branchids, 'delete')
580 finally: 551 finally:
581 index_branches.close() 552 index_branches.close()
582 index_userheads = open_dir(index_dir, indexname=USERHEADS)
583 try:
584 # now update userheads index:
585 with index_userheads.searcher() as searcher:
586 ix_userheadids = set(doc[UH_ID] for doc in searcher.all_stored_f ields())
587 backend_userheadids = set(userheadid for userhead in self.backend.us erheads)
588 add_userheadids = backend_userheadids - ix_userheadids
589 del_userheadids = ix_userheadids - backend_userheadids
590 changed = changed or add_userheadids or del_userheadids
591 self._modify_userhead_index(index_userheads, self.schemas[USERHEADS] ,
592 self.wikiname, add_userheadids, 'add')
593 self._modify_userhead_index(index_userheads, self.schemas[USERHEADS] ,
594 self.wikiname, del_userheadids, 'delete' )
595 finally:
596 index_branches.close()
597 return changed 553 return changed
598 554
599 def optimize_backend(self): 555 def optimize_backend(self):
600 """ 556 """
601 Optimize backend / collect garbage to safe space: 557 Optimize backend / collect garbage to safe space:
602 558
603 * deleted items: destroy them? use a deleted_max_age? 559 * deleted items: destroy them? use a deleted_max_age?
604 * user profiles: only keep latest revision? 560 * user profiles: only keep latest revision?
605 * normal wiki items: keep by max_revisions_count / max_age 561 * normal wiki items: keep by max_revisions_count / max_age
606 * deduplicate data (determine dataids with same hash, fix references to point to one of them) 562 * deduplicate data (determine dataids with same hash, fix references to point to one of them)
607 * remove unreferenced dataids (destroyed revisions, deduplicated stuff) 563 * remove unreferenced dataids (destroyed revisions, deduplicated stuff)
608 """ 564 """
609 # TODO 565 # TODO
610 566
611 def optimize_index(self, tmp=False): 567 def optimize_index(self, tmp=False):
612 """ 568 """
613 Optimize whoosh index. 569 Optimize whoosh index.
614 """ 570 """
615 index_dir = self.index_dir_tmp if tmp else self.index_dir 571 index_dir = self.index_dir_tmp if tmp else self.index_dir
616 for name in INDEXES: 572 for name in INDEXES:
617 ix = open_dir(index_dir, indexname=name) 573 ix = open_dir(index_dir, indexname=name)
618 try: 574 try:
619 ix.optimize() 575 ix.optimize()
620 finally: 576 finally:
621 ix.close() 577 ix.close()
622 578
623 def dump(self, tmp=False, idx_name=LATEST_REVS): 579 def dump(self, tmp=False, idx_name=ALL_REVS):
624 """ 580 """
625 Yield key/value tuple lists for all documents in the indexes, fields sor ted. 581 Yield key/value tuple lists for all documents in the indexes, fields sor ted.
626 """ 582 """
627 index_dir = self.index_dir_tmp if tmp else self.index_dir 583 index_dir = self.index_dir_tmp if tmp else self.index_dir
628 ix = open_dir(index_dir, indexname=idx_name) 584 ix = open_dir(index_dir, indexname=idx_name)
629 try: 585 try:
630 with ix.searcher() as searcher: 586 with ix.searcher() as searcher:
631 for doc in searcher.all_stored_fields(): 587 for doc in searcher.all_stored_fields():
632 name = doc.pop(NAME, u"") 588 name = doc.pop(NAME, u"")
633 content = doc.pop(CONTENT, u"") 589 content = doc.pop(CONTENT, u"")
634 yield [(NAME, name), ] + sorted(doc.items()) + [(CONTENT, co ntent), ] 590 yield [(NAME, name), ] + sorted(doc.items()) + [(CONTENT, co ntent), ]
635 finally: 591 finally:
636 ix.close() 592 ix.close()
637 593
638 def query_parser(self, default_fields, idx_name=LATEST_REVS): 594 def query_parser(self, default_fields, idx_name=ALL_REVS):
639 """ 595 """
640 Build a query parser for a list of default fields. 596 Build a query parser for a list of default fields.
641 """ 597 """
642 schema = self.schemas[idx_name] 598 schema = self.schemas[idx_name]
643 if len(default_fields) > 1: 599 if len(default_fields) > 1:
644 qp = MultifieldParser(default_fields, schema=schema) 600 qp = MultifieldParser(default_fields, schema=schema)
645 elif len(default_fields) == 1: 601 elif len(default_fields) == 1:
646 qp = QueryParser(default_fields[0], schema=schema) 602 qp = QueryParser(default_fields[0], schema=schema)
647 else: 603 else:
648 raise ValueError("default_fields list must at least contain one fiel d name") 604 raise ValueError("default_fields list must at least contain one fiel d name")
649 qp.add_plugin(RegexPlugin()) 605 qp.add_plugin(RegexPlugin())
650 def username_pseudo_field(node): 606 def username_pseudo_field(node):
651 username = node.text 607 username = node.text
652 users = user.search_users(**{NAME_EXACT: username}) 608 users = user.search_users(**{NAME_EXACT: username})
653 if users: 609 if users:
654 userid = users[0].meta['userid'] 610 userid = users[0].meta['userid']
655 node = WordNode(userid) 611 node = WordNode(userid)
656 node.set_fieldname("userid") 612 node.set_fieldname("userid")
657 return node 613 return node
658 return node 614 return node
659 qp.add_plugin(PseudoFieldPlugin({'username': username_pseudo_field})) 615 qp.add_plugin(PseudoFieldPlugin({'username': username_pseudo_field}))
660 return qp 616 return qp
661 617
662 def search(self, q, idx_name=LATEST_REVS, **kw): 618 def search(self, q, idx_name=ALL_REVS, **kw):
663 """ 619 """
664 Search with query q, yield Revisions. 620 Search with query q, yield Revisions.
665 """ 621 """
666 with self.ix[idx_name].searcher() as searcher: 622 with self.ix[idx_name].searcher() as searcher:
667 # Note: callers must consume everything we yield, so the for loop 623 # Note: callers must consume everything we yield, so the for loop
668 # ends and the "with" is left to close the index files. 624 # ends and the "with" is left to close the index files.
669 for hit in searcher.search(q, **kw): 625 for hit in searcher.search(q, **kw):
670 doc = hit.fields() 626 doc = hit.fields()
671 latest_doc = doc if idx_name == LATEST_REVS else None 627 item = Item(self, doc=None, itemid=doc[ITEMID])
672 item = Item(self, doc=latest_doc, itemid=doc[ITEMID])
673 yield item.get_revision(doc[REVID], doc=doc) 628 yield item.get_revision(doc[REVID], doc=doc)
674 629
675 def search_page(self, q, idx_name=LATEST_REVS, pagenum=1, pagelen=10, **kw): 630 def search_page(self, q, idx_name=ALL_REVS, pagenum=1, pagelen=10, **kw):
676 """ 631 """
677 Same as search, but with paging support. 632 Same as search, but with paging support.
678 """ 633 """
679 with self.ix[idx_name].searcher() as searcher: 634 with self.ix[idx_name].searcher() as searcher:
680 # Note: callers must consume everything we yield, so the for loop 635 # Note: callers must consume everything we yield, so the for loop
681 # ends and the "with" is left to close the index files. 636 # ends and the "with" is left to close the index files.
682 for hit in searcher.search_page(q, pagenum, pagelen=pagelen, **kw): 637 for hit in searcher.search_page(q, pagenum, pagelen=pagelen, **kw):
683 doc = hit.fields() 638 doc = hit.fields()
684 latest_doc = doc if idx_name == LATEST_REVS else None 639 item = Item(self, latest_doc=None, itemid=doc[ITEMID])
685 item = Item(self, latest_doc=latest_doc, itemid=doc[ITEMID])
686 yield item.get_revision(doc[REVID], doc=doc) 640 yield item.get_revision(doc[REVID], doc=doc)
687 641
688 def documents(self, idx_name=LATEST_REVS, **kw): 642 def documents(self, idx_name=ALL_REVS, **kw):
689 """ 643 """
690 Yield Revisions matching the kw args. 644 Yield Revisions matching the kw args.
691 """ 645 """
692 for doc in self._documents(idx_name, **kw): 646 for doc in self._documents(idx_name, **kw):
693 latest_doc = doc if idx_name == LATEST_REVS else None 647 item = Item(self, doc=None, itemid=doc[ITEMID])
694 item = Item(self, latest_doc=latest_doc, itemid=doc[ITEMID])
695 yield item.get_revision(doc[REVID], doc=doc) 648 yield item.get_revision(doc[REVID], doc=doc)
696 649
697 def _documents(self, idx_name=LATEST_REVS, **kw): 650 def _documents(self, idx_name=ALL_REVS, **kw):
698 """ 651 """
699 Yield documents matching the kw args (internal use only). 652 Yield documents matching the kw args (internal use only).
700 653
701 If no kw args are given, this yields all documents. 654 If no kw args are given, this yields all documents.
702 """ 655 """
703 with self.ix[idx_name].searcher() as searcher: 656 with self.ix[idx_name].searcher() as searcher:
704 # Note: callers must consume everything we yield, so the for loop 657 # Note: callers must consume everything we yield, so the for loop
705 # ends and the "with" is left to close the index files. 658 # ends and the "with" is left to close the index files.
706 for doc in searcher.documents(**kw): 659 for doc in searcher.documents(**kw):
707 yield doc 660 yield doc
708 661
709 def document(self, idx_name=LATEST_REVS, **kw): 662 def document(self, idx_name=ALL_REVS, **kw):
710 """ 663 """
711 Return a Revision matching the kw args. 664 Return a Revision matching the kw args.
712 """ 665 """
713 doc = self._document(idx_name, **kw) 666 doc = self._document(idx_name, **kw)
714 if doc: 667 if doc:
715 latest_doc = doc if idx_name == LATEST_REVS else None 668 item = Item(self, latest_doc=None, itemid=doc[ITEMID])
716 item = Item(self, latest_doc=latest_doc, itemid=doc[ITEMID])
717 return item.get_revision(doc[REVID], doc=doc) 669 return item.get_revision(doc[REVID], doc=doc)
718 670
719 def _document(self, idx_name=ALL_REVS, **kw): 671 def _document(self, idx_name=ALL_REVS, **kw):
720 """ 672 """
721 Return a document matching the kw args (internal use only). 673 Return a document matching the kw args (internal use only).
722 """ 674 """
723 with self.ix[idx_name].searcher() as searcher: 675 with self.ix[idx_name].searcher() as searcher:
724 return searcher.document(**kw) 676 return searcher.document(**kw)
725 677
726 def has_item(self, name): 678 def has_item(self, name):
(...skipping 26 matching lines...) Expand all
753 705
754 def existing_item(self, **query): 706 def existing_item(self, **query):
755 """ 707 """
756 Return item identified by query (must be an existing item). 708 Return item identified by query (must be an existing item).
757 709
758 :kwargs **query: e.g. name_exact=u"Foo" or itemid="..." or ... 710 :kwargs **query: e.g. name_exact=u"Foo" or itemid="..." or ...
759 (must be a unique fieldname=value for the latest-revs i ndex) 711 (must be a unique fieldname=value for the latest-revs i ndex)
760 """ 712 """
761 return Item.existing(self, **query) 713 return Item.existing(self, **query)
762 714
763 def get_users_branch(self): 715 def generate_branchname(self, itemid):
ThomasJWaldmann 2012/08/07 20:29:51 what exactly means itemid here?
764 """ 716 """
765 Return a branchname by looking up the userhead of the current user 717 Return a generated vacant branchname
766 """ 718 """
767 user = flaskg.user 719 i = 0
768 if user.name == 'anonymous': 720 while True:
769 return 'master' 721 branchname = u'branch' + unicode(i)
770 else: 722 q = {BRANCH_DST: itemid, BRANCH_SRC: branchname}
771 userhead = self._document(idx_name=USERHEADS, userid=user.itemid) 723 branch = self._document(idx_name=BRANCHES, **{BRANCH_DST: itemid})
ThomasJWaldmann 2012/07/20 13:14:29 this is not the userhead, but an indexed document.
breton 2012/07/21 02:28:57 in fact, that's exactly the userhead, as defined o
breton 2012/07/21 09:58:15 Also, I forgot to use itemid here. I think this fu
ThomasJWaldmann 2012/08/07 20:29:51 i don't understand itemid usage here.
772 return userhead[UH_POINTER] 724 if not branch:
725 return branchname
726 else:
727 i += 1
773 728
774 729
775 class Item(object): 730 class Item(object):
776 def __init__(self, indexer, doc=None, **query): 731 def __init__(self, indexer, doc=None, **query):
777 """ 732 """
778 :param indexer: indexer middleware instance 733 :param indexer: indexer middleware instance
779 :param doc: if caller already has a latest-revs index whoosh document 734 :param doc: if caller already has a latest-revs index whoosh document
780 it can be given there, to avoid us fetching same doc again 735 it can be given there, to avoid us fetching same doc again
781 from the index 736 from the index
782 :kwargs **query: any unique fieldname=value for the latest-revs index, e .g.: 737 :kwargs **query: any unique fieldname=value for the latest-revs index, e .g.:
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after
837 if self: 792 if self:
838 for rev in self.indexer.documents(idx_name=ALL_REVS, itemid=self.ite mid): 793 for rev in self.indexer.documents(idx_name=ALL_REVS, itemid=self.ite mid):
839 yield rev 794 yield rev
840 795
841 def __getitem__(self, revid): 796 def __getitem__(self, revid):
842 """ 797 """
843 Get Revision with revision id <revid>. 798 Get Revision with revision id <revid>.
844 """ 799 """
845 return Revision(self, revid) 800 return Revision(self, revid)
846 801
847 def get_revision(self, revid, doc=None): 802 def get_revision(self, revid, branch=None, doc=None):
848 """ 803 """
849 Similar to item[revid], but you can optionally give an already existing 804 Similar to item[revid], but you can optionally give an already existing
850 whoosh result document for the given revid to avoid backend accesses for some use cases. 805 whoosh result document for the given revid to avoid backend accesses for some use cases.
851 """ 806 """
852 return Revision(self, revid, doc) 807 return Revision(self, revid, branch, doc)
853 808
854 def get_head_revid_by_branch(self, branchname): 809 def get_head_revid_by_branch(self, branchname):
855 if branchname.startswith('$') and len(branchname) == len(make_uuid()) + 1: 810 if branchname.startswith(u'$') and len(branchname) == UUID_LEN + 1:
eSyr 2012/07/20 12:51:16 check against \$[0-9a-fA-F]{8}-([0-9a-fA-F]{4}-){3
ThomasJWaldmann 2012/07/20 13:14:29 IIRC we have a constant for this length, so you do
breton 2012/07/21 02:28:57 I don't think it'll be very helpful. Also, sane pe
breton 2012/07/21 02:28:57 fixed
ThomasJWaldmann 2012/07/22 10:45:28 well, maybe a function get_uuid(s) would be good,
856 return branchname[1:] # is a revid already 811 return branchname[1:] # is a revid already
857 else: 812 else:
858 branch = self.indexer._document(idx_name=BRANCHES, 813 q = {BRANCH_SRC: branchname or MASTER_BRANCH,
859 name=branchname or 'master', 814 BRANCH_ITEMID: self.itemid}
860 itemid=self.itemid) 815 branch = self.indexer._document(idx_name=BRANCHES, **q)
861 if branch: 816 if branch:
862 return branch[BRANCH_REVID] 817 return branch[BRANCH_DST]
ThomasJWaldmann 2012/07/20 13:14:29 see comment for line 771 - this is an indexed docu
863 # no branch retrieved from index, checking default 818 # no branch retrieved from index, checking default
ThomasJWaldmann 2012/07/20 13:14:29 the question here is whether we should just fall b
breton 2012/07/21 02:28:57 as we decided on EP:83
864 branch = self.indexer._document(idx_name=BRANCHES, 819 q = {BRANCH_SRC: MASTER_BRANCH, BRANCH_ITEMID: self.itemid}
865 name='master', 820 branch = self.indexer._document(idx_name=BRANCHES, **q)
866 itemid=self.itemid)
867 if branch: 821 if branch:
868 return branch[BRANCH_REVID] 822 return branch[BRANCH_DST]
869 # still no branch; using the first found 823 # still no branch; using the first found
ThomasJWaldmann 2012/07/20 13:14:29 see above.
870 return self.indexer._document(idx_name=ALL_REVS, 824 return self.indexer._document(idx_name=ALL_REVS,
871 itemid=self.itemid)[REVID] 825 **{BRANCH_ITEMID: self.itemid})[REVID]
872
873 def get_revision_by_branch(self, branch):
874 return Revision(self, self.get_head_revid_by_branch(branch))
875 826
876 def preprocess(self, meta, data): 827 def preprocess(self, meta, data):
877 """ 828 """
878 preprocess a revision before it gets stored and put into index. 829 preprocess a revision before it gets stored and put into index.
879 """ 830 """
880 content = convert_to_indexable(meta, data, is_new=True) 831 content = convert_to_indexable(meta, data, is_new=True)
881 return meta, data, content 832 return meta, data, content
882 833
883 def store_revision(self, meta, data, branch, overwrite=False, 834 def store_revision(self, meta, data, branch, overwrite=False,
884 trusted=False, # True for loading a serialized representa tion or other trusted sources 835 trusted=False, # True for loading a serialized representa tion or other trusted sources
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after
956 revid = meta.get(REVID) 907 revid = meta.get(REVID)
957 if revid is not None and revid in backend: 908 if revid is not None and revid in backend:
958 raise ValueError('need overwrite=True to overwrite existing revi sions') 909 raise ValueError('need overwrite=True to overwrite existing revi sions')
959 meta, data, content = self.preprocess(meta, data) 910 meta, data, content = self.preprocess(meta, data)
960 data.seek(0) # rewind file 911 data.seek(0) # rewind file
961 revid = backend.store(meta, data) 912 revid = backend.store(meta, data)
962 meta[REVID] = revid 913 meta[REVID] = revid
963 self.indexer.index_revision(meta, content) 914 self.indexer.index_revision(meta, content)
964 # having saved revid, formate and create/update branch 915 # having saved revid, formate and create/update branch
965 if not branch: 916 if not branch:
966 branch = u'master' 917 # check if a branch with revid=meta[PARENTID] exists
ThomasJWaldmann 2012/07/20 13:14:29 sometimes you use 'master', sometimes u'master'.
breton 2012/07/21 02:28:57 Agreed. Add it to constats.keys?
ThomasJWaldmann 2012/07/22 10:45:28 yes.
967 branch_doc = self.indexer._document(idx_name=BRANCHES, name=branch, 918 # if yes, we should use it. Else generate some name.
968 itemid=self.itemid) 919 # meta[PARENTID] is a list, so if there are more than 1 parents
920 # we should generate a name anyway.
921 if len(meta[PARENTID]) > 1:
922 branch = self.indexer.generate_branchname(self.itemid)
923 else:
924 q = {BRANCH_DST: meta[PARENTID][0]}
925 branch_doc = self.indexer._document(idx_name=BRANCHES, **q)
926 if not branch_doc:
927 branch = self.indexer.generate_branchname(self.itemid)
928 else:
929 branch = branch_doc[BRANCH_SRC]
930 q = {BRANCH_SRC: branch, BRANCH_ITEMID: self.itemid}
931 branch_doc = self.indexer._document(idx_name=BRANCHES, **q)
969 branch_state = { 932 branch_state = {
970 BRANCH_ITEMID: self.itemid, 933 BRANCH_ITEMID: self.itemid,
971 BRANCH_NAME: branch, 934 BRANCH_SRC: branch,
972 } 935 }
973 if not branch_doc: 936 if not branch_doc:
974 branch_state[BRANCH_TYPE] = u'branch' 937 branch_state[BRANCH_TYPE] = BRANCH
ThomasJWaldmann 2012/07/20 13:14:29 same here.
975 branch_state[BRANCH_ID] = make_uuid() 938 branch_state[BRANCH_ID] = make_uuid()
976 else: 939 else:
977 branch_state[BRANCH_TYPE] = branch_doc.get(BRANCH_TYPE, u'branch') 940 branch_state[BRANCH_TYPE] = branch_doc.get(BRANCH_TYPE, BRANCH)
978 branch_state[BRANCH_ID] = branch_doc.get(BRANCH_ID, make_uuid()) 941 branch_state[BRANCH_ID] = branch_doc.get(BRANCH_ID, make_uuid())
979 branch_state[BRANCH_REVID] = revid 942 branch_state[BRANCH_DST] = revid
980 branchid = backend.store_branch(branch_state) 943 branchid = backend.store_branch(branch_state, meta[NAME])
981 self.indexer.index_special(branch_state, BRANCHES) 944 self.indexer.index_special(branch_state, BRANCHES)
982 # userheads 945 if flaskg.user.valid:
ThomasJWaldmann 2012/07/20 13:14:29 maybe try to avoid pointless comments. putting com
breton 2012/07/21 02:28:57 just dividing the into sections
983 if flaskg.user.name != 'anonymous': 946 userhead_doc = self.indexer.document(idx_name=BRANCHES,
984 userhead_doc = self.indexer._document(idx_name=USERHEADS, 947 **{UH_USER: userid,
985 userid=userid, 948 UH_ITEMID: self.itemid})
986 itemid=self.itemid)
987 userhead_state = dict() 949 userhead_state = dict()
988 if not userhead_doc: 950 if not userhead_doc:
989 userhead_state[UH_ITEMID] = self.itemid 951 userhead_state[UH_ITEMID] = self.itemid
990 userhead_state[UH_USER] = flaskg.user 952 userhead_state[UH_USER] = userid
ThomasJWaldmann 2012/07/20 13:14:29 this is the complete User object, i doubt you want
breton 2012/07/21 02:28:57 thanks, fixed.
953 userhead_state[BRANCH_TYPE] = USERHEAD
991 else: 954 else:
992 userhead_state.update(userhead_doc) 955 userhead_state.update(userhead_doc)
993 userhead_state[UH_POINTER] = branch_state[BRANCH_NAME] 956 userhead_state[UH_POINTER] = branch_state[BRANCH_SRC]
994 userheadid = backend.store_userhead(userhead_state) 957 userheadid = backend.store_userhead(userhead_state, meta[NAME])
995 self.indexer.index_special(userhead_state, USERHEADS) 958 self.indexer.index_special(userhead_state, BRANCHES)
996 959
997 return Revision(self, revid) 960 return Revision(self, revid)
998 961
999 def store_all_revisions(self, meta, data): 962 def store_all_revisions(self, meta, data):
1000 """ 963 """
1001 Store over all revisions of this item. 964 Store over all revisions of this item.
1002 """ 965 """
1003 for rev in self.iter_revs(): 966 for rev in self.iter_revs():
1004 meta[REVID] = rev.revid 967 meta[REVID] = rev.revid
1005 self.store_revision(meta, data, overwrite=True) 968 self.store_revision(meta, data, overwrite=True)
1006 969
1007 def destroy_revision(self, revid): 970 def destroy_revision(self, revid):
1008 """ 971 """
1009 Destroy revision <revid>. 972 Destroy revision <revid>.
1010 """ 973 """
1011 rev = Revision(self, revid) 974 rev = Revision(self, revid)
1012 self.backend.remove(rev.name, revid) 975 self.backend.remove(rev.name, revid)
1013 self.indexer.remove_revision(revid) 976 self.indexer.remove_revision(revid)
1014 977
1015 def destroy_all_revisions(self): 978 def destroy_all_revisions(self):
1016 """ 979 """
1017 Destroy all revisions of this item. 980 Destroy all revisions of this item.
1018 """ 981 """
1019 for rev in self.iter_revs(): 982 for rev in self.iter_revs():
1020 self.destroy_revision(rev.revid) 983 self.destroy_revision(rev.revid)
984
985 def get_users_branch(self):
986 """
987 Return a branchname by looking up the userhead of the current user
988 """
989 try:
990 # XXX: now users are stored the same way as the content is. During
991 # login a user needs to be retrieved for password comparison,
992 # which requires a call of this function. But during login
993 # flaskg.user is not set yet. This below is a little workaround.
994 # Consider removal after users are coded in a sanier way
995 user = flaskg.user
996 userid = user.itemid
997 except AttributeError:
998 # user's meta and data are ALWAYS in master branch.
999 return MASTER_BRANCH
1000
1001 if user.name == 'anonymous':
1002 return MASTER_BRANCH
1003 else:
1004 q = {UH_USER: userid, UH_ITEMID: self.itemid,BRANCH_TYPE: USERHEAD}
ThomasJWaldmann 2012/08/07 20:29:51 a blank missing
1005 userhead = self.indexer._document(idx_name=BRANCHES, **q)
1006 if not userhead:
1007 return MASTER_BRANCH
1008 else:
1009 return userhead[UH_POINTER]
1010
1021 1011
1022 1012
1023 class Revision(object): 1013 class Revision(object):
1024 """ 1014 """
1025 An existing revision (exists in the backend). 1015 An existing revision (exists in the backend).
1026 """ 1016 """
1027 def __init__(self, item, revid=None, doc=None, branch=None): 1017 def __init__(self, item, revid=None, branch=None, doc=None):
1028 is_current = revid == CURRENT 1018 is_current = revid == CURRENT
1019 self.users_branch = item.get_users_branch()
1029 if doc is None: 1020 if doc is None:
1030 if not revid or is_current: 1021 if revid and revid != CURRENT:
1031 # get current user's branchname 1022 pass
ThomasJWaldmann 2012/07/20 13:14:29 see above
1032 branchname = item.indexer.get_users_branch() 1023 elif branch:
1033 if not branchname: 1024 revid = item.get_head_revid_by_branch(branch)
1034 branchname = 'master' 1025 else:
ThomasJWaldmann 2012/07/20 13:14:29 maybe do that in get_users_branch()?
breton 2012/07/21 02:28:57 in fact, I don't see any reason to do it. Removed.
1035 revid = item.get_head_revid_by_branch(branchname) 1026 branch = self.users_branch
1036 doc = item.indexer._document(idx_name=ALL_REVS, revid=revid) 1027 revid = item.get_head_revid_by_branch(branch)
1028 doc = item.indexer._document(idx_name=ALL_REVS, **{REVID: revid})
1037 if doc is None: 1029 if doc is None:
1038 raise KeyError 1030 raise KeyError
1039 if is_current: 1031 if is_current:
1040 revid = doc.get(REVID) 1032 revid = doc.get(REVID)
1041 if revid is None: 1033 if revid is None:
1042 raise KeyError 1034 raise KeyError
1043 self.item = item 1035 self.item = item
1044 self.revid = revid 1036 self.revid = revid
1045 self.backend = item.backend 1037 self.backend = item.backend
1046 self._doc = doc 1038 self._doc = doc
1047 self.meta = Meta(self, self._doc) 1039 self.meta = Meta(self, self._doc)
1048 self._data = None 1040 self._data = None
1041 real_branch = item.indexer._document(idx_name=BRANCHES,
1042 **{BRANCH_DST: revid})
1043 if real_branch:··
1044 self.not_on_userhead = real_branch[BRANCH_SRC] == self.users_branch
1045 self.real_branch = real_branch[BRANCH_SRC]
1046 else:
1047 self.not_on_userhead = True
1048 self.real_branch = None
1049
1049 # Note: this does not immediately raise a KeyError for non-existing revs any more 1050 # Note: this does not immediately raise a KeyError for non-existing revs any more
1050 # If you access data or meta, it will, though. 1051 # If you access data or meta, it will, though.
1051 1052
1052 @property 1053 @property
1053 def name(self): 1054 def name(self):
1054 return self.meta.get(NAME, 'DoesNotExist') 1055 return self.meta.get(NAME, 'DoesNotExist')
1055 1056
1056 def _load(self): 1057 def _load(self):
1057 meta, data = self.backend.retrieve(self._doc[NAME], self.revid) # raises KeyError if rev does not exist 1058 meta, data = self.backend.retrieve(self._doc[NAME], self.revid) # raises KeyError if rev does not exist
1058 self.meta = Meta(self, self._doc, meta) 1059 self.meta = Meta(self, self._doc, meta)
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after
1119 def __cmp__(self, other): 1120 def __cmp__(self, other):
1120 if self[REVID] == other[REVID]: 1121 if self[REVID] == other[REVID]:
1121 return 0 1122 return 0
1122 return cmp(self[MTIME], other[MTIME]) 1123 return cmp(self[MTIME], other[MTIME])
1123 1124
1124 def __len__(self): 1125 def __len__(self):
1125 return 0 # XXX 1126 return 0 # XXX
1126 1127
1127 def __repr__(self): 1128 def __repr__(self):
1128 return "Meta _doc: {0!r} _meta: {1!r}".format(self._doc, self._meta) 1129 return "Meta _doc: {0!r} _meta: {1!r}".format(self._doc, self._meta)
1129 1130
ThomasJWaldmann 2012/08/07 20:29:51 in general, code is hard to follow/understand, i s
LEFTRIGHT

Powered by Google App Engine
RSS Feeds Recent Issues | This issue
This is Rietveld f62528b