Index: MoinMoin/storage/middleware/indexing.py |
=================================================================== |
--- a/MoinMoin/storage/middleware/indexing.py |
+++ b/MoinMoin/storage/middleware/indexing.py |
@@ -79,22 +79,24 @@ |
LANGUAGE, USERID, ADDRESS, HOSTNAME, SIZE, ACTION, COMMENT, SUMMARY, \ |
CONTENT, EXTERNALLINKS, ITEMLINKS, ITEMTRANSCLUSIONS, ACL, EMAIL, OPENID, \ |
ITEMID, REVID, CURRENT, PARENTID, \ |
- LATEST_REVS, ALL_REVS, BRANCHES, USERHEADS, \ |
+ ALL_REVS, BRANCHES, \ |
CONTENTTYPE_USER, \ |
- BRANCH_ID, BRANCH_ITEMID, BRANCH_NAME, BRANCH_REVID, \ |
- BRANCH_TYPE, \ |
- UH_ID, UH_ITEMID, UH_USER, UH_POINTER |
+ BRANCH_ID, BRANCH_ITEMID, BRANCH_SRC, BRANCH_DST, \ |
+ BRANCH_TYPE, MASTER_BRANCH, \ |
+ UH_ID, UH_ITEMID, UH_USER, UH_POINTER, \ |
+ BRANCH, TAG, USERHEAD |
+ |
from MoinMoin.constants import keys |
from MoinMoin import user |
from MoinMoin.search.analyzers import item_name_analyzer, MimeTokenizer, AclTokenizer |
from MoinMoin.themes import utctimestamp |
-from MoinMoin.util.crypto import make_uuid |
+from MoinMoin.util.crypto import make_uuid, UUID_LEN |
from MoinMoin.storage.middleware.validation import ContentMetaSchema, UserMetaSchema |
from MoinMoin.storage.error import NoSuchItemError, ItemAlreadyExistsError |
-INDEXES = [LATEST_REVS, ALL_REVS, BRANCHES, USERHEADS] |
+INDEXES = [ALL_REVS, BRANCHES] |
def backend_to_index(meta, content, schema, wikiname): |
@@ -278,22 +280,13 @@ |
branches_fields = { |
BRANCH_ID: ID(unique=True, stored=True), |
BRANCH_ITEMID: ID(stored=True), |
- BRANCH_NAME: ID(stored=True), |
- BRANCH_REVID: ID(stored=True), |
BRANCH_TYPE: ID(stored=True), |
+ BRANCH_SRC: ID(stored=True), |
+ BRANCH_DST: ID(stored=True), |
} |
- userheads_fields = { |
- UH_ID: ID(unique=True, stored=True), |
- UH_ITEMID: ID(stored=True), |
- UH_USER: ID(stored=True), |
- UH_POINTER: ID(stored=True), |
- } |
- |
- latest_revisions_schema = Schema(**latest_revs_fields) |
all_revisions_schema = Schema(**all_revs_fields) |
branches_schema = Schema(**branches_fields) |
- userheads_schema = Schema(**userheads_fields) |
# Define dynamic fields |
dynamic_fields = [("*_id", ID(stored=True)), |
@@ -306,14 +299,11 @@ |
# Adding dynamic fields to schemas |
for glob, field_type in dynamic_fields: |
- latest_revisions_schema.add(glob, field_type, glob=True) |
all_revisions_schema.add(glob, field_type, glob=True) |
# schemas are needed by query parser and for index creation |
self.schemas[ALL_REVS] = all_revisions_schema |
- self.schemas[LATEST_REVS] = latest_revisions_schema |
self.schemas[BRANCHES] = branches_schema |
- self.schemas[USERHEADS] = userheads_schema |
# what fields could whoosh result documents have (no matter whether all revs index |
# or latest revs index): |
@@ -404,16 +394,16 @@ |
Remove a single revision from indexes. |
""" |
# get branches with the revision which will be removed |
- with self.ix[LATEST_REVS].searcher() as searcher: |
+ with self.ix[ALL_REVS].searcher() as searcher: |
revision_to_remove = searcher.document(revid=revid) |
if revision_to_remove: |
parent_revid = revision_to_remove.get(PARENTID, None) |
with self.ix[BRANCHES].searcher() as searcher: |
- branches_docs = searcher.documents(revid=revid) |
- with self.ix[BRANCHES].writer() as writer: |
- for branch_doc in branches_docs: |
- branch_doc[BRANCH_REVID] = parent_revid |
- writer.update_document(**branch_doc) |
+ branches_docs = searcher.documents(**{BRANCH_DST: revid}) |
+ with self.ix[BRANCHES].writer() as writer: |
+ for branch_doc in branches_docs: |
+ branch_doc[BRANCH_DST] = parent_revid |
+ writer.update_document(**branch_doc) |
if async: |
writer = AsyncWriter(self.ix[ALL_REVS]) |
else: |
@@ -421,19 +411,7 @@ |
with writer as writer: |
writer.delete_by_term(REVID, revid) |
- def _modify_branches_index(self, index, schema, wikiname, branches, |
- mode='add', procs=1, limitmb=256): |
- return self._modify_special_index(index, schema, wikiname, branches, |
- BRANCH_ID, self.backend.retrieve_branch, |
- mode, procs, limitmb) |
- def _modify_userheads_index(self, index, schema, wikiname, userheads, |
- mode='add', procs=1, limitmb=256): |
- return self._modify_special_index(index, schema, wikiname, userheads, |
- UH_ID, self.backend.retrieve_userhead, |
- mode, procs, limitmb) |
- |
- def _modify_special_index(self, index, schema, wikiname, data, id_field, |
- retrieve_function, mode='add', |
+ def _modify_branches_index(self, index, schema, wikiname, data, mode='add', |
procs=1, limitmb=256): |
""" |
modify special index, containing simple (non-typed) elements like |
@@ -442,7 +420,7 @@ |
with index.writer(procs=procs, limitmb=limitmb) as writer: |
for dataid in data: |
if mode in ['add', 'update', ]: |
- entry = retrieve_function(dataid) |
+ entry = self.backend.retrieve_branch(dataid) |
doc = dict([(str(key), value) |
for key, value in entry.items() |
if key in schema]) |
@@ -451,7 +429,7 @@ |
elif mode == 'add': |
writer.add_document(**doc) |
elif mode == 'delete': |
- writer.delete_by_term(id_field, dataid) |
+ writer.delete_by_term(BRANCH_ID, dataid) |
else: |
raise ValueError("mode must be 'update', 'add' or 'delete', not '{0}'".format(mode)) |
@@ -514,19 +492,12 @@ |
latest_names_revids = self._find_latest_names_revids(index) |
finally: |
index.close() |
- # branches and userheads indexes |
+ # branches and userheads index |
index = open_dir(index_dir, indexname=BRANCHES) |
try: |
- self._modify_branches_index(index, BRANCHES, self.wikiname, |
- self.backend.branches, 'add', |
- procs, limitmb) |
- finally: |
- index.close() |
- index = open_dir(index_dir, indexname=BRANCHES) |
- try: |
- self._modify_userheads_index(index, BRANCHES, self.wikiname, |
- self.backend.userheads, 'add', |
- procs, limitmb) |
+ self._modify_branches_index(index, self.schemas[BRANCHES], |
+ self.wikiname, self.backend.branches, |
+ 'add', procs, limitmb) |
finally: |
index.close() |
@@ -569,7 +540,7 @@ |
# now update BRANCHES index: |
with index_branches.searcher() as searcher: |
ix_branchids = set(doc[BRANCH_ID] for doc in searcher.all_stored_fields()) |
- backend_branchids = set(branchid for branch in self.backend.branches) |
+ backend_branchids = set(branch for branch in self.backend.branches) |
add_branchids = backend_branchids - ix_branchids |
del_branchids = ix_branchids - backend_branchids |
changed = changed or add_branchids or del_branchids |
@@ -579,21 +550,6 @@ |
self.wikiname, del_branchids, 'delete') |
finally: |
index_branches.close() |
- index_userheads = open_dir(index_dir, indexname=USERHEADS) |
- try: |
- # now update userheads index: |
- with index_userheads.searcher() as searcher: |
- ix_userheadids = set(doc[UH_ID] for doc in searcher.all_stored_fields()) |
- backend_userheadids = set(userheadid for userhead in self.backend.userheads) |
- add_userheadids = backend_userheadids - ix_userheadids |
- del_userheadids = ix_userheadids - backend_userheadids |
- changed = changed or add_userheadids or del_userheadids |
- self._modify_userhead_index(index_userheads, self.schemas[USERHEADS], |
- self.wikiname, add_userheadids, 'add') |
- self._modify_userhead_index(index_userheads, self.schemas[USERHEADS], |
- self.wikiname, del_userheadids, 'delete') |
- finally: |
- index_branches.close() |
return changed |
def optimize_backend(self): |
@@ -620,7 +576,7 @@ |
finally: |
ix.close() |
- def dump(self, tmp=False, idx_name=LATEST_REVS): |
+ def dump(self, tmp=False, idx_name=ALL_REVS): |
""" |
Yield key/value tuple lists for all documents in the indexes, fields sorted. |
""" |
@@ -635,7 +591,7 @@ |
finally: |
ix.close() |
- def query_parser(self, default_fields, idx_name=LATEST_REVS): |
+ def query_parser(self, default_fields, idx_name=ALL_REVS): |
""" |
Build a query parser for a list of default fields. |
""" |
@@ -659,7 +615,7 @@ |
qp.add_plugin(PseudoFieldPlugin({'username': username_pseudo_field})) |
return qp |
- def search(self, q, idx_name=LATEST_REVS, **kw): |
+ def search(self, q, idx_name=ALL_REVS, **kw): |
""" |
Search with query q, yield Revisions. |
""" |
@@ -668,11 +624,10 @@ |
# ends and the "with" is left to close the index files. |
for hit in searcher.search(q, **kw): |
doc = hit.fields() |
- latest_doc = doc if idx_name == LATEST_REVS else None |
- item = Item(self, doc=latest_doc, itemid=doc[ITEMID]) |
+ item = Item(self, doc=None, itemid=doc[ITEMID]) |
yield item.get_revision(doc[REVID], doc=doc) |
- def search_page(self, q, idx_name=LATEST_REVS, pagenum=1, pagelen=10, **kw): |
+ def search_page(self, q, idx_name=ALL_REVS, pagenum=1, pagelen=10, **kw): |
""" |
Same as search, but with paging support. |
""" |
@@ -681,20 +636,18 @@ |
# ends and the "with" is left to close the index files. |
for hit in searcher.search_page(q, pagenum, pagelen=pagelen, **kw): |
doc = hit.fields() |
- latest_doc = doc if idx_name == LATEST_REVS else None |
- item = Item(self, latest_doc=latest_doc, itemid=doc[ITEMID]) |
+ item = Item(self, latest_doc=None, itemid=doc[ITEMID]) |
yield item.get_revision(doc[REVID], doc=doc) |
- def documents(self, idx_name=LATEST_REVS, **kw): |
+ def documents(self, idx_name=ALL_REVS, **kw): |
""" |
Yield Revisions matching the kw args. |
""" |
for doc in self._documents(idx_name, **kw): |
- latest_doc = doc if idx_name == LATEST_REVS else None |
- item = Item(self, latest_doc=latest_doc, itemid=doc[ITEMID]) |
+ item = Item(self, doc=None, itemid=doc[ITEMID]) |
yield item.get_revision(doc[REVID], doc=doc) |
- def _documents(self, idx_name=LATEST_REVS, **kw): |
+ def _documents(self, idx_name=ALL_REVS, **kw): |
""" |
Yield documents matching the kw args (internal use only). |
@@ -706,14 +659,13 @@ |
for doc in searcher.documents(**kw): |
yield doc |
- def document(self, idx_name=LATEST_REVS, **kw): |
+ def document(self, idx_name=ALL_REVS, **kw): |
""" |
Return a Revision matching the kw args. |
""" |
doc = self._document(idx_name, **kw) |
if doc: |
- latest_doc = doc if idx_name == LATEST_REVS else None |
- item = Item(self, latest_doc=latest_doc, itemid=doc[ITEMID]) |
+ item = Item(self, latest_doc=None, itemid=doc[ITEMID]) |
return item.get_revision(doc[REVID], doc=doc) |
def _document(self, idx_name=ALL_REVS, **kw): |
@@ -760,16 +712,19 @@ |
""" |
return Item.existing(self, **query) |
- def get_users_branch(self): |
+ def generate_branchname(self, itemid): |
ThomasJWaldmann
2012/08/07 20:29:51
what exactly means itemid here?
|
""" |
- Return a branchname by looking up the userhead of the current user |
+ Return a generated vacant branchname |
""" |
- user = flaskg.user |
- if user.name == 'anonymous': |
- return 'master' |
- else: |
- userhead = self._document(idx_name=USERHEADS, userid=user.itemid) |
- return userhead[UH_POINTER] |
+ i = 0 |
+ while True: |
+ branchname = u'branch' + unicode(i) |
+ q = {BRANCH_DST: itemid, BRANCH_SRC: branchname} |
+ branch = self._document(idx_name=BRANCHES, **{BRANCH_DST: itemid}) |
ThomasJWaldmann
2012/08/07 20:29:51
i don't understand itemid usage here.
|
+ if not branch: |
+ return branchname |
+ else: |
+ i += 1 |
class Item(object): |
@@ -844,34 +799,30 @@ |
""" |
return Revision(self, revid) |
- def get_revision(self, revid, doc=None): |
+ def get_revision(self, revid, branch=None, doc=None): |
""" |
Similar to item[revid], but you can optionally give an already existing |
whoosh result document for the given revid to avoid backend accesses for some use cases. |
""" |
- return Revision(self, revid, doc) |
+ return Revision(self, revid, branch, doc) |
def get_head_revid_by_branch(self, branchname): |
- if branchname.startswith('$') and len(branchname) == len(make_uuid()) + 1: |
+ if branchname.startswith(u'$') and len(branchname) == UUID_LEN + 1: |
return branchname[1:] # is a revid already |
else: |
- branch = self.indexer._document(idx_name=BRANCHES, |
- name=branchname or 'master', |
- itemid=self.itemid) |
+ q = {BRANCH_SRC: branchname or MASTER_BRANCH, |
+ BRANCH_ITEMID: self.itemid} |
+ branch = self.indexer._document(idx_name=BRANCHES, **q) |
if branch: |
- return branch[BRANCH_REVID] |
+ return branch[BRANCH_DST] |
# no branch retrieved from index, checking default |
- branch = self.indexer._document(idx_name=BRANCHES, |
- name='master', |
- itemid=self.itemid) |
+ q = {BRANCH_SRC: MASTER_BRANCH, BRANCH_ITEMID: self.itemid} |
+ branch = self.indexer._document(idx_name=BRANCHES, **q) |
if branch: |
- return branch[BRANCH_REVID] |
+ return branch[BRANCH_DST] |
# still no branch; using the first found |
return self.indexer._document(idx_name=ALL_REVS, |
- itemid=self.itemid)[REVID] |
- |
- def get_revision_by_branch(self, branch): |
- return Revision(self, self.get_head_revid_by_branch(branch)) |
+ **{BRANCH_ITEMID: self.itemid})[REVID] |
def preprocess(self, meta, data): |
""" |
@@ -963,37 +914,49 @@ |
self.indexer.index_revision(meta, content) |
# having saved revid, formate and create/update branch |
if not branch: |
- branch = u'master' |
- branch_doc = self.indexer._document(idx_name=BRANCHES, name=branch, |
- itemid=self.itemid) |
+ # check if a branch with revid=meta[PARENTID] exists |
+ # if yes, we should use it. Else generate some name. |
+ # meta[PARENTID] is a list, so if there are more than 1 parents |
+ # we should generate a name anyway. |
+ if len(meta[PARENTID]) > 1: |
+ branch = self.indexer.generate_branchname(self.itemid) |
+ else: |
+ q = {BRANCH_DST: meta[PARENTID][0]} |
+ branch_doc = self.indexer._document(idx_name=BRANCHES, **q) |
+ if not branch_doc: |
+ branch = self.indexer.generate_branchname(self.itemid) |
+ else: |
+ branch = branch_doc[BRANCH_SRC] |
+ q = {BRANCH_SRC: branch, BRANCH_ITEMID: self.itemid} |
+ branch_doc = self.indexer._document(idx_name=BRANCHES, **q) |
branch_state = { |
BRANCH_ITEMID: self.itemid, |
- BRANCH_NAME: branch, |
+ BRANCH_SRC: branch, |
} |
if not branch_doc: |
- branch_state[BRANCH_TYPE] = u'branch' |
+ branch_state[BRANCH_TYPE] = BRANCH |
branch_state[BRANCH_ID] = make_uuid() |
else: |
- branch_state[BRANCH_TYPE] = branch_doc.get(BRANCH_TYPE, u'branch') |
+ branch_state[BRANCH_TYPE] = branch_doc.get(BRANCH_TYPE, BRANCH) |
branch_state[BRANCH_ID] = branch_doc.get(BRANCH_ID, make_uuid()) |
- branch_state[BRANCH_REVID] = revid |
- branchid = backend.store_branch(branch_state) |
+ branch_state[BRANCH_DST] = revid |
+ branchid = backend.store_branch(branch_state, meta[NAME]) |
self.indexer.index_special(branch_state, BRANCHES) |
- # userheads |
- if flaskg.user.name != 'anonymous': |
- userhead_doc = self.indexer._document(idx_name=USERHEADS, |
- userid=userid, |
- itemid=self.itemid) |
+ if flaskg.user.valid: |
+ userhead_doc = self.indexer.document(idx_name=BRANCHES, |
+ **{UH_USER: userid, |
+ UH_ITEMID: self.itemid}) |
userhead_state = dict() |
if not userhead_doc: |
userhead_state[UH_ITEMID] = self.itemid |
- userhead_state[UH_USER] = flaskg.user |
+ userhead_state[UH_USER] = userid |
+ userhead_state[BRANCH_TYPE] = USERHEAD |
else: |
userhead_state.update(userhead_doc) |
- userhead_state[UH_POINTER] = branch_state[BRANCH_NAME] |
- userheadid = backend.store_userhead(userhead_state) |
- self.indexer.index_special(userhead_state, USERHEADS) |
- |
+ userhead_state[UH_POINTER] = branch_state[BRANCH_SRC] |
+ userheadid = backend.store_userhead(userhead_state, meta[NAME]) |
+ self.indexer.index_special(userhead_state, BRANCHES) |
+ |
return Revision(self, revid) |
def store_all_revisions(self, meta, data): |
@@ -1019,21 +982,50 @@ |
for rev in self.iter_revs(): |
self.destroy_revision(rev.revid) |
+ def get_users_branch(self): |
+ """ |
+ Return a branchname by looking up the userhead of the current user |
+ """ |
+ try: |
+ # XXX: now users are stored the same way as the content is. During |
+ # login a user needs to be retrieved for password comparison, |
+ # which requires a call of this function. But during login |
+ # flaskg.user is not set yet. This below is a little workaround. |
+ # Consider removal after users are coded in a sanier way |
+ user = flaskg.user |
+ userid = user.itemid |
+ except AttributeError: |
+ # user's meta and data are ALWAYS in master branch. |
+ return MASTER_BRANCH |
+ |
+ if user.name == 'anonymous': |
+ return MASTER_BRANCH |
+ else: |
+ q = {UH_USER: userid, UH_ITEMID: self.itemid,BRANCH_TYPE: USERHEAD} |
ThomasJWaldmann
2012/08/07 20:29:51
a blank missing
|
+ userhead = self.indexer._document(idx_name=BRANCHES, **q) |
+ if not userhead: |
+ return MASTER_BRANCH |
+ else: |
+ return userhead[UH_POINTER] |
+ |
+ |
class Revision(object): |
""" |
An existing revision (exists in the backend). |
""" |
- def __init__(self, item, revid=None, doc=None, branch=None): |
+ def __init__(self, item, revid=None, branch=None, doc=None): |
is_current = revid == CURRENT |
+ self.users_branch = item.get_users_branch() |
if doc is None: |
- if not revid or is_current: |
- # get current user's branchname |
- branchname = item.indexer.get_users_branch() |
- if not branchname: |
- branchname = 'master' |
- revid = item.get_head_revid_by_branch(branchname) |
- doc = item.indexer._document(idx_name=ALL_REVS, revid=revid) |
+ if revid and revid != CURRENT: |
+ pass |
+ elif branch: |
+ revid = item.get_head_revid_by_branch(branch) |
+ else: |
+ branch = self.users_branch |
+ revid = item.get_head_revid_by_branch(branch) |
+ doc = item.indexer._document(idx_name=ALL_REVS, **{REVID: revid}) |
if doc is None: |
raise KeyError |
if is_current: |
@@ -1046,6 +1038,15 @@ |
self._doc = doc |
self.meta = Meta(self, self._doc) |
self._data = None |
+ real_branch = item.indexer._document(idx_name=BRANCHES, |
+ **{BRANCH_DST: revid}) |
+ if real_branch: |
+ self.not_on_userhead = real_branch[BRANCH_SRC] == self.users_branch |
+ self.real_branch = real_branch[BRANCH_SRC] |
+ else: |
+ self.not_on_userhead = True |
+ self.real_branch = None |
+ |
# Note: this does not immediately raise a KeyError for non-existing revs any more |
# If you access data or meta, it will, though. |