MoinMoin/storage/middleware/indexing.py - Issue 6423063: userheads

Unified Diff: MoinMoin/storage/middleware/indexing.py

Issue 6423063: userheads

Patch Set: Created 12 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.

Jump to:

View side-by-side diff with in-line comments

« MoinMoin/storage/middleware/_tests/test_indexing.py ('K') | « MoinMoin/storage/middleware/_tests/test_protecting.py ('k') | MoinMoin/storage/middleware/protecting.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: MoinMoin/storage/middleware/indexing.py

===================================================================

--- a/MoinMoin/storage/middleware/indexing.py

+++ b/MoinMoin/storage/middleware/indexing.py

@@ -79,22 +79,24 @@

LANGUAGE, USERID, ADDRESS, HOSTNAME, SIZE, ACTION, COMMENT, SUMMARY, \

CONTENT, EXTERNALLINKS, ITEMLINKS, ITEMTRANSCLUSIONS, ACL, EMAIL, OPENID, \

ITEMID, REVID, CURRENT, PARENTID, \

- LATEST_REVS, ALL_REVS, BRANCHES, USERHEADS, \

+ ALL_REVS, BRANCHES, \

CONTENTTYPE_USER, \

- BRANCH_ID, BRANCH_ITEMID, BRANCH_NAME, BRANCH_REVID, \

- BRANCH_TYPE, \

- UH_ID, UH_ITEMID, UH_USER, UH_POINTER

+ BRANCH_ID, BRANCH_ITEMID, BRANCH_SRC, BRANCH_DST, \

+ BRANCH_TYPE, MASTER_BRANCH, \

+ UH_ID, UH_ITEMID, UH_USER, UH_POINTER, \

+ BRANCH, TAG, USERHEAD

from MoinMoin.constants import keys

from MoinMoin import user

from MoinMoin.search.analyzers import item_name_analyzer, MimeTokenizer, AclTokenizer

from MoinMoin.themes import utctimestamp

-from MoinMoin.util.crypto import make_uuid

+from MoinMoin.util.crypto import make_uuid, UUID_LEN

from MoinMoin.storage.middleware.validation import ContentMetaSchema, UserMetaSchema

from MoinMoin.storage.error import NoSuchItemError, ItemAlreadyExistsError

-INDEXES = [LATEST_REVS, ALL_REVS, BRANCHES, USERHEADS]

+INDEXES = [ALL_REVS, BRANCHES]

def backend_to_index(meta, content, schema, wikiname):

@@ -278,22 +280,13 @@

branches_fields = {

BRANCH_ID: ID(unique=True, stored=True),

BRANCH_ITEMID: ID(stored=True),

- BRANCH_NAME: ID(stored=True),

- BRANCH_REVID: ID(stored=True),

BRANCH_TYPE: ID(stored=True),

+ BRANCH_SRC: ID(stored=True),

+ BRANCH_DST: ID(stored=True),

}

- userheads_fields = {

- UH_ID: ID(unique=True, stored=True),

- UH_ITEMID: ID(stored=True),

- UH_USER: ID(stored=True),

- UH_POINTER: ID(stored=True),

- }

- latest_revisions_schema = Schema(**latest_revs_fields)

all_revisions_schema = Schema(**all_revs_fields)

branches_schema = Schema(**branches_fields)

- userheads_schema = Schema(**userheads_fields)

# Define dynamic fields

dynamic_fields = [("*_id", ID(stored=True)),

@@ -306,14 +299,11 @@

# Adding dynamic fields to schemas

for glob, field_type in dynamic_fields:

- latest_revisions_schema.add(glob, field_type, glob=True)

all_revisions_schema.add(glob, field_type, glob=True)

# schemas are needed by query parser and for index creation

self.schemas[ALL_REVS] = all_revisions_schema

- self.schemas[LATEST_REVS] = latest_revisions_schema

self.schemas[BRANCHES] = branches_schema

- self.schemas[USERHEADS] = userheads_schema

# what fields could whoosh result documents have (no matter whether all revs index

# or latest revs index):

@@ -404,16 +394,16 @@

Remove a single revision from indexes.

"""

# get branches with the revision which will be removed

- with self.ix[LATEST_REVS].searcher() as searcher:

+ with self.ix[ALL_REVS].searcher() as searcher:

revision_to_remove = searcher.document(revid=revid)

if revision_to_remove:

parent_revid = revision_to_remove.get(PARENTID, None)

with self.ix[BRANCHES].searcher() as searcher:

- branches_docs = searcher.documents(revid=revid)

- with self.ix[BRANCHES].writer() as writer:

- for branch_doc in branches_docs:

- branch_doc[BRANCH_REVID] = parent_revid

- writer.update_document(**branch_doc)

+ branches_docs = searcher.documents(**{BRANCH_DST: revid})

+ with self.ix[BRANCHES].writer() as writer:

+ for branch_doc in branches_docs:

+ branch_doc[BRANCH_DST] = parent_revid

+ writer.update_document(**branch_doc)

if async:

writer = AsyncWriter(self.ix[ALL_REVS])

else:

@@ -421,19 +411,7 @@

with writer as writer:

writer.delete_by_term(REVID, revid)

- def _modify_branches_index(self, index, schema, wikiname, branches,

- mode='add', procs=1, limitmb=256):

- return self._modify_special_index(index, schema, wikiname, branches,

- BRANCH_ID, self.backend.retrieve_branch,

- mode, procs, limitmb)

- def _modify_userheads_index(self, index, schema, wikiname, userheads,

- mode='add', procs=1, limitmb=256):

- return self._modify_special_index(index, schema, wikiname, userheads,

- UH_ID, self.backend.retrieve_userhead,

- mode, procs, limitmb)

- def _modify_special_index(self, index, schema, wikiname, data, id_field,

- retrieve_function, mode='add',

+ def _modify_branches_index(self, index, schema, wikiname, data, mode='add',

procs=1, limitmb=256):

"""

modify special index, containing simple (non-typed) elements like

@@ -442,7 +420,7 @@

with index.writer(procs=procs, limitmb=limitmb) as writer:

for dataid in data:

if mode in ['add', 'update', ]:

- entry = retrieve_function(dataid)

+ entry = self.backend.retrieve_branch(dataid)

doc = dict([(str(key), value)

for key, value in entry.items()

if key in schema])

@@ -451,7 +429,7 @@

elif mode == 'add':

writer.add_document(**doc)

elif mode == 'delete':

- writer.delete_by_term(id_field, dataid)

+ writer.delete_by_term(BRANCH_ID, dataid)

else:

raise ValueError("mode must be 'update', 'add' or 'delete', not '{0}'".format(mode))

@@ -514,19 +492,12 @@

latest_names_revids = self._find_latest_names_revids(index)

finally:

index.close()

- # branches and userheads indexes

+ # branches and userheads index

index = open_dir(index_dir, indexname=BRANCHES)

try:

- self._modify_branches_index(index, BRANCHES, self.wikiname,

- self.backend.branches, 'add',

- procs, limitmb)

- finally:

- index.close()

- index = open_dir(index_dir, indexname=BRANCHES)

- try:

- self._modify_userheads_index(index, BRANCHES, self.wikiname,

- self.backend.userheads, 'add',

- procs, limitmb)

+ self._modify_branches_index(index, self.schemas[BRANCHES],

+ self.wikiname, self.backend.branches,

+ 'add', procs, limitmb)

finally:

index.close()

@@ -569,7 +540,7 @@

# now update BRANCHES index:

with index_branches.searcher() as searcher:

ix_branchids = set(doc[BRANCH_ID] for doc in searcher.all_stored_fields())

- backend_branchids = set(branchid for branch in self.backend.branches)

+ backend_branchids = set(branch for branch in self.backend.branches)

add_branchids = backend_branchids - ix_branchids

del_branchids = ix_branchids - backend_branchids

changed = changed or add_branchids or del_branchids

@@ -579,21 +550,6 @@

self.wikiname, del_branchids, 'delete')

finally:

index_branches.close()

- index_userheads = open_dir(index_dir, indexname=USERHEADS)

- try:

- # now update userheads index:

- with index_userheads.searcher() as searcher:

- ix_userheadids = set(doc[UH_ID] for doc in searcher.all_stored_fields())

- backend_userheadids = set(userheadid for userhead in self.backend.userheads)

- add_userheadids = backend_userheadids - ix_userheadids

- del_userheadids = ix_userheadids - backend_userheadids

- changed = changed or add_userheadids or del_userheadids

- self._modify_userhead_index(index_userheads, self.schemas[USERHEADS],

- self.wikiname, add_userheadids, 'add')

- self._modify_userhead_index(index_userheads, self.schemas[USERHEADS],

- self.wikiname, del_userheadids, 'delete')

- finally:

- index_branches.close()

return changed

def optimize_backend(self):

@@ -620,7 +576,7 @@

finally:

ix.close()

- def dump(self, tmp=False, idx_name=LATEST_REVS):

+ def dump(self, tmp=False, idx_name=ALL_REVS):

"""

Yield key/value tuple lists for all documents in the indexes, fields sorted.

"""

@@ -635,7 +591,7 @@

finally:

ix.close()

- def query_parser(self, default_fields, idx_name=LATEST_REVS):

+ def query_parser(self, default_fields, idx_name=ALL_REVS):

"""

Build a query parser for a list of default fields.

"""

@@ -659,7 +615,7 @@

qp.add_plugin(PseudoFieldPlugin({'username': username_pseudo_field}))

return qp

- def search(self, q, idx_name=LATEST_REVS, **kw):

+ def search(self, q, idx_name=ALL_REVS, **kw):

"""

Search with query q, yield Revisions.

"""

@@ -668,11 +624,10 @@

# ends and the "with" is left to close the index files.

for hit in searcher.search(q, **kw):

doc = hit.fields()

- latest_doc = doc if idx_name == LATEST_REVS else None

- item = Item(self, doc=latest_doc, itemid=doc[ITEMID])

+ item = Item(self, doc=None, itemid=doc[ITEMID])

yield item.get_revision(doc[REVID], doc=doc)

- def search_page(self, q, idx_name=LATEST_REVS, pagenum=1, pagelen=10, **kw):

+ def search_page(self, q, idx_name=ALL_REVS, pagenum=1, pagelen=10, **kw):

"""

Same as search, but with paging support.

"""

@@ -681,20 +636,18 @@

# ends and the "with" is left to close the index files.

for hit in searcher.search_page(q, pagenum, pagelen=pagelen, **kw):

doc = hit.fields()

- latest_doc = doc if idx_name == LATEST_REVS else None

- item = Item(self, latest_doc=latest_doc, itemid=doc[ITEMID])

+ item = Item(self, latest_doc=None, itemid=doc[ITEMID])

yield item.get_revision(doc[REVID], doc=doc)

- def documents(self, idx_name=LATEST_REVS, **kw):

+ def documents(self, idx_name=ALL_REVS, **kw):

"""

Yield Revisions matching the kw args.

"""

for doc in self._documents(idx_name, **kw):

- latest_doc = doc if idx_name == LATEST_REVS else None

- item = Item(self, latest_doc=latest_doc, itemid=doc[ITEMID])

+ item = Item(self, doc=None, itemid=doc[ITEMID])

yield item.get_revision(doc[REVID], doc=doc)

- def _documents(self, idx_name=LATEST_REVS, **kw):

+ def _documents(self, idx_name=ALL_REVS, **kw):

"""

Yield documents matching the kw args (internal use only).

@@ -706,14 +659,13 @@

for doc in searcher.documents(**kw):

yield doc

- def document(self, idx_name=LATEST_REVS, **kw):

+ def document(self, idx_name=ALL_REVS, **kw):

"""

Return a Revision matching the kw args.

"""

doc = self._document(idx_name, **kw)

if doc:

- latest_doc = doc if idx_name == LATEST_REVS else None

- item = Item(self, latest_doc=latest_doc, itemid=doc[ITEMID])

+ item = Item(self, latest_doc=None, itemid=doc[ITEMID])

return item.get_revision(doc[REVID], doc=doc)

def _document(self, idx_name=ALL_REVS, **kw):

@@ -760,16 +712,19 @@

"""

return Item.existing(self, **query)

- def get_users_branch(self):

+ def generate_branchname(self, itemid):

ThomasJWaldmann 2012/08/07 20:29:51 what exactly means itemid here?

"""

- Return a branchname by looking up the userhead of the current user

+ Return a generated vacant branchname

"""

- user = flaskg.user

- if user.name == 'anonymous':

- return 'master'

- else:

- userhead = self._document(idx_name=USERHEADS, userid=user.itemid)

- return userhead[UH_POINTER]

+ i = 0

+ while True:

+ branchname = u'branch' + unicode(i)

+ q = {BRANCH_DST: itemid, BRANCH_SRC: branchname}

+ branch = self._document(idx_name=BRANCHES, **{BRANCH_DST: itemid})

ThomasJWaldmann 2012/08/07 20:29:51 i don't understand itemid usage here.

+ if not branch:

+ return branchname

+ else:

+ i += 1

class Item(object):

@@ -844,34 +799,30 @@

"""

return Revision(self, revid)

- def get_revision(self, revid, doc=None):

+ def get_revision(self, revid, branch=None, doc=None):

"""

Similar to item[revid], but you can optionally give an already existing

whoosh result document for the given revid to avoid backend accesses for some use cases.

"""

- return Revision(self, revid, doc)

+ return Revision(self, revid, branch, doc)

def get_head_revid_by_branch(self, branchname):

- if branchname.startswith('$') and len(branchname) == len(make_uuid()) + 1:

+ if branchname.startswith(u'$') and len(branchname) == UUID_LEN + 1:

return branchname[1:] # is a revid already

else:

- branch = self.indexer._document(idx_name=BRANCHES,

- name=branchname or 'master',

- itemid=self.itemid)

+ q = {BRANCH_SRC: branchname or MASTER_BRANCH,

+ BRANCH_ITEMID: self.itemid}

+ branch = self.indexer._document(idx_name=BRANCHES, **q)

if branch:

- return branch[BRANCH_REVID]

+ return branch[BRANCH_DST]

# no branch retrieved from index, checking default

- branch = self.indexer._document(idx_name=BRANCHES,

- name='master',

- itemid=self.itemid)

+ q = {BRANCH_SRC: MASTER_BRANCH, BRANCH_ITEMID: self.itemid}

+ branch = self.indexer._document(idx_name=BRANCHES, **q)

if branch:

- return branch[BRANCH_REVID]

+ return branch[BRANCH_DST]

# still no branch; using the first found

return self.indexer._document(idx_name=ALL_REVS,

- itemid=self.itemid)[REVID]

- def get_revision_by_branch(self, branch):

- return Revision(self, self.get_head_revid_by_branch(branch))

+ **{BRANCH_ITEMID: self.itemid})[REVID]

def preprocess(self, meta, data):

"""

@@ -963,37 +914,49 @@

self.indexer.index_revision(meta, content)

# having saved revid, formate and create/update branch

if not branch:

- branch = u'master'

- branch_doc = self.indexer._document(idx_name=BRANCHES, name=branch,

- itemid=self.itemid)

+ # check if a branch with revid=meta[PARENTID] exists

+ # if yes, we should use it. Else generate some name.

+ # meta[PARENTID] is a list, so if there are more than 1 parents

+ # we should generate a name anyway.

+ if len(meta[PARENTID]) > 1:

+ branch = self.indexer.generate_branchname(self.itemid)

+ else:

+ q = {BRANCH_DST: meta[PARENTID][0]}

+ branch_doc = self.indexer._document(idx_name=BRANCHES, **q)

+ if not branch_doc:

+ branch = self.indexer.generate_branchname(self.itemid)

+ else:

+ branch = branch_doc[BRANCH_SRC]

+ q = {BRANCH_SRC: branch, BRANCH_ITEMID: self.itemid}

+ branch_doc = self.indexer._document(idx_name=BRANCHES, **q)

branch_state = {

BRANCH_ITEMID: self.itemid,

- BRANCH_NAME: branch,

+ BRANCH_SRC: branch,

}

if not branch_doc:

- branch_state[BRANCH_TYPE] = u'branch'

+ branch_state[BRANCH_TYPE] = BRANCH

branch_state[BRANCH_ID] = make_uuid()

else:

- branch_state[BRANCH_TYPE] = branch_doc.get(BRANCH_TYPE, u'branch')

+ branch_state[BRANCH_TYPE] = branch_doc.get(BRANCH_TYPE, BRANCH)

branch_state[BRANCH_ID] = branch_doc.get(BRANCH_ID, make_uuid())

- branch_state[BRANCH_REVID] = revid

- branchid = backend.store_branch(branch_state)

+ branch_state[BRANCH_DST] = revid

+ branchid = backend.store_branch(branch_state, meta[NAME])

self.indexer.index_special(branch_state, BRANCHES)

- # userheads

- if flaskg.user.name != 'anonymous':

- userhead_doc = self.indexer._document(idx_name=USERHEADS,

- userid=userid,

- itemid=self.itemid)

+ if flaskg.user.valid:

+ userhead_doc = self.indexer.document(idx_name=BRANCHES,

+ **{UH_USER: userid,

+ UH_ITEMID: self.itemid})

userhead_state = dict()

if not userhead_doc:

userhead_state[UH_ITEMID] = self.itemid

- userhead_state[UH_USER] = flaskg.user

+ userhead_state[UH_USER] = userid

+ userhead_state[BRANCH_TYPE] = USERHEAD

else:

userhead_state.update(userhead_doc)

- userhead_state[UH_POINTER] = branch_state[BRANCH_NAME]

- userheadid = backend.store_userhead(userhead_state)

- self.indexer.index_special(userhead_state, USERHEADS)

+ userhead_state[UH_POINTER] = branch_state[BRANCH_SRC]

+ userheadid = backend.store_userhead(userhead_state, meta[NAME])

+ self.indexer.index_special(userhead_state, BRANCHES)

return Revision(self, revid)

def store_all_revisions(self, meta, data):

@@ -1019,21 +982,50 @@

for rev in self.iter_revs():

self.destroy_revision(rev.revid)

+ def get_users_branch(self):

+ """

+ Return a branchname by looking up the userhead of the current user

+ """

+ try:

+ # XXX: now users are stored the same way as the content is. During

+ # login a user needs to be retrieved for password comparison,

+ # which requires a call of this function. But during login

+ # flaskg.user is not set yet. This below is a little workaround.

+ # Consider removal after users are coded in a sanier way

+ user = flaskg.user

+ userid = user.itemid

+ except AttributeError:

+ # user's meta and data are ALWAYS in master branch.

+ return MASTER_BRANCH

+ if user.name == 'anonymous':

+ return MASTER_BRANCH

+ else:

+ q = {UH_USER: userid, UH_ITEMID: self.itemid,BRANCH_TYPE: USERHEAD}

ThomasJWaldmann 2012/08/07 20:29:51 a blank missing

+ userhead = self.indexer._document(idx_name=BRANCHES, **q)

+ if not userhead:

+ return MASTER_BRANCH

+ else:

+ return userhead[UH_POINTER]

class Revision(object):

"""

An existing revision (exists in the backend).

"""

- def __init__(self, item, revid=None, doc=None, branch=None):

+ def __init__(self, item, revid=None, branch=None, doc=None):

is_current = revid == CURRENT

+ self.users_branch = item.get_users_branch()

if doc is None:

- if not revid or is_current:

- # get current user's branchname

- branchname = item.indexer.get_users_branch()

- if not branchname:

- branchname = 'master'

- revid = item.get_head_revid_by_branch(branchname)

- doc = item.indexer._document(idx_name=ALL_REVS, revid=revid)

+ if revid and revid != CURRENT:

+ pass

+ elif branch:

+ revid = item.get_head_revid_by_branch(branch)

+ else:

+ branch = self.users_branch

+ revid = item.get_head_revid_by_branch(branch)

+ doc = item.indexer._document(idx_name=ALL_REVS, **{REVID: revid})

if doc is None:

raise KeyError

if is_current:

@@ -1046,6 +1038,15 @@

self._doc = doc

self.meta = Meta(self, self._doc)

self._data = None

+ real_branch = item.indexer._document(idx_name=BRANCHES,

+ **{BRANCH_DST: revid})

+ if real_branch:

+ self.not_on_userhead = real_branch[BRANCH_SRC] == self.users_branch

+ self.real_branch = real_branch[BRANCH_SRC]

+ else:

+ self.not_on_userhead = True

+ self.real_branch = None

# Note: this does not immediately raise a KeyError for non-existing revs any more

# If you access data or meta, it will, though.