📄 bplustreelong.py
字号:
"""
Bplustree mapping fixed length strings (byte sequences) to longs (seek positions in file indexed).
"Next leaf pointer" is not used since it increases the chance of file corruption on failure.
All modifications are "shadowed" until a flush of all modifications succeeds. Modifications are
"hardened" when the header record is rewritten with a new root. This design trades a few "unneeded"
buffer writes for lower likelihood of file corruption.
"""
import string, BufferFile, codecs, types
class BplusTreeException(RuntimeError):
"problem in b-plus tree"
class BplusTreeKeyMissing(KeyError):
"key not found in b-plus tree"
class BplusTreeBadKeyValue(ValueError):
"bad type for bplustree value"
VERSION = 0
ENCODER = codecs.getencoder("utf_8")
DECODER = codecs.getdecoder("utf_8")
NULLBUFFERNUMBER = -1
NONLEAF = 0
LEAF = 1
FREE = 2
NONFREE = (LEAF, NONLEAF)
HEADERPREFIX = string.join(map(chr, [98, 112, 78, 98, 112]), "")
INVARIANTCULTUREID = 127
def SetupFromExistingStream(fromfile, StartSeek=0):
result = BplusTreeLong(fromfile, 33, 33, StartSeek)
result.readHeader()
result.buffers = BufferFile.SetupFromExistingStream(fromfile, StartSeek+result.headersize)
if (result.buffers.buffersize!=result.buffersize):
raise BplusTreeException, "inner and outer buffer sizes should match "+repr(
(result.buffers.buffersize, result.buffersize))
if result.rootSeek!=NULLBUFFERNUMBER:
result.root = BplusNode(result, None, None, True)
result.root.LoadFromBuffer(result.rootSeek)
return result
def InitializeInStream(fromfile, KeyLength, NodeSize,
CultureId=INVARIANTCULTUREID, StartSeek=0):
result = BplusTreeLong(fromfile, NodeSize, KeyLength, StartSeek, CultureId)
result.setHeader()
result.buffers = BufferFile.InitializeBufferFileInStream(fromfile, result.buffersize, StartSeek+result.headersize)
return result
class BplusTreeLong:
FifoLimit = 100
headersize = len(HEADERPREFIX) + 1 + BufferFile.INTSTORAGE*3 + BufferFile.LONGSTORAGE*2
freeHeadSeek = NULLBUFFERNUMBER
root = None
rootSeek = NULLBUFFERNUMBER
TerminalNodeCount = 0
LowerTerminalNodeCount = 0
buffers = None # BufferFile, must be set externally
#committing = False # debug
def __init__(this, fromfile, NodeSize, KeyLength, StartSeek=0, CultureId=INVARIANTCULTUREID):
if (CultureId!=INVARIANTCULTUREID):
raise BplusTreeException, "only invariant culture supported in this python implementation"
this.fromfile = fromfile
this.NodeSize = NodeSize
this.seekStart = StartSeek
this.KeyLength = KeyLength
this.MaxKeyLength = this.KeyLength - BufferFile.SHORTSTORAGE
this.FreeBuffersOnAbort = {}
this.FreeBuffersOnCommit = {}
this.IdToTerminalNode = {}
this.TerminalNodeToId = {}
this.SanityCheck()
def KeyOk(this, key):
"return translation from unicode if the key is ok, else None"
(encoded, dummy) = ENCODER(key)
if len(encoded)>this.MaxKeyLength:
#print repr(encoded), "exceeds max length", len(encoded), ">", this.MaxKeyLength
return None
return encoded
def toHtml(this, mapLong=None):
L = []
if this.root is None:
L.append("<br>EMPTY<br>")
else:
L.append("<br>root seek="+repr(this.rootSeek))
this.root.AsHtml(L, mapLong)
return string.join(L, "\n")
def Shutdown(this):
this.fromfile.flush()
this.fromfile.close()
def SanityCheck(this, strong=False):
if (this.NodeSize<2):
raise BplusTreeException, "node size must be larger than 2 "+repr(this.NodeSize)
if (this.KeyLength<5):
raise BplusTreeException, "key length must be larger than 5"
if (this.seekStart<0):
raise BplusTreeException, "start seek cannot be negative"
# compute buffer size
keystorage = this.KeyLength+BufferFile.SHORTSTORAGE
this.buffersize = 1+BufferFile.LONGSTORAGE + (keystorage + BufferFile.LONGSTORAGE)*this.NodeSize
this.MaxKeyLength = this.KeyLength - BufferFile.SHORTSTORAGE
if (strong):
this.Recover(False)
# check that deferred allocations are not marked free
for buffernumber in this.FreeBuffersOnAbort.keys():
indicator = this.buffers.getBuffer(buffernumber, 1)
mark = ord(indicator)
if mark not in NONFREE:
raise BplusTreeException, "free on abort buffer without nonfree mark"
for buffernumber in this.FreeBuffersOnCommit.keys():
indicator = this.buffers.getBuffer(buffernumber, 1)
mark = ord(indicator)
if mark not in NONFREE:
raise BplusTreeException, "free on commit buffer without nonfree mark"
# end of sanity check
def Recover(this, CorrectErrors=True):
visited = {}
if this.root!=None:
# check all reachable nodes
this.root.SanityCheck(visited)
# traverse the free list
freebuffernumber = this.freeHeadSeek
while freebuffernumber!=NULLBUFFERNUMBER:
if visited.has_key(freebuffernumber):
raise BplusTreeException, "free buffer visited twice "+repr(freebuffernumber)
visited[freebuffernumber] = freebuffernumber
freebuffernumber = this.parseFreeBuffer(freebuffernumber)
# determine missing buffers
Missing = {}
maxbuffer = this.buffers.nextBufferNumber()
for i in xrange(maxbuffer):
if not visited.has_key(i):
Missing[i] = i
# ... less free on commit buffers
for tobefreed in this.FreeBuffersOnCommit.keys():
del Missing[tobefreed]
if (CorrectErrors):
for missingbuffer in Missing.keys():
this.deallocateBuffer(missingbuffer)
elif len(Missing)>0:
raise BplusTreeException, "found %s unreachable buffers %s" % (len(Missing), Missing)
def SetFootPrintLimit(this, limit):
if limit<5:
raise BplusTreeException, "footprint must be larger than 5"
this.FifoLimit = limit
def RemoveKey(this, key):
theroot = this.root
if theroot is None:
raise BplusTreeKeyMissing, "tree is empty, cannot delete"
(smallest, MergeMe) = this.root.Delete(key)
if (MergeMe and (not theroot.isLeaf) and theroot.SizeInUse()==0):
this.root = this.root.FirstChild()
this.rootSeek = this.root.makeRoot()
#this.MaterializedChildNodes[0] = None
theroot.Free()
def __getitem__(this, key):
# autoconvert 8-bit keys to unicode
if type(key) is types.StringType:
(key, dummy) = DECODER(key)
valueFound = this.ContainsKey(key)
if valueFound==None:
#print this.toHtml()
raise BplusTreeKeyMissing, "key not found "+repr(key)
return valueFound[0]
def __setitem__(this, key, value):
# autoconvert 8-bit keys to unicode
if type(key) is types.StringType:
(key, dummy) = DECODER(key)
if this.KeyOk(key) is None:
raise BplusTreeBadKeyValue, "key too large "+repr(key)
rootinit = False
if this.root is None:
this.root = BplusNode(this, None, None, True)
rootinit = True
(dummy, splitString, SplitNode) = this.root.Insert(key, value)
if SplitNode is not None:
# split of root, make new root
rootinit = True
oldRoot = this.root
this.root = BinaryRoot(oldRoot, splitString, SplitNode, this)
if rootinit:
this.rootSeek = this.root.DumpToFreshBuffer();
this.ShrinkFootprint()
def FirstKey(this):
result = None
# empty string is smallest possible key
if (this.root!=None):
if this.ContainsKey(u""):
result = u"";
else:
result = this.root.FindNextKey(u"")
this.ShrinkFootprint()
return result
def NextKey(this, AfterThisKey):
# autoconvert 8-bit keys to unicode
if type(AfterThisKey) is types.StringType:
(AfterThisKey, dummy) = DECODER(AfterThisKey)
result = this.root.FindNextKey(AfterThisKey)
this.ShrinkFootprint()
return result
def ContainsKey(this, key):
"return None or (value,) found for key"
# autoconvert 8-bit keys to unicode
if type(key) is types.StringType:
(key, dummy) = DECODER(key)
result = None
if (this.root is not None):
result = this.root.FindMatch(key)
this.ShrinkFootprint()
return result
def Get(this, key, defaultValue):
# autoconvert 8-bit keys to unicode
if type(key) is types.StringType:
(key, dummy) = DECODER(key)
valueFound = this.ContainsKey(key)
if valueFound==None:
return defaultValue
return valueFound[0]
Set = __setitem__
def Commit(this):
#this.committing = True # debug
if (this.root is not None):
this.rootSeek = this.root.Invalidate(False)
this.fromfile.flush()
# commit the new root
this.setHeader()
this.fromfile.flush()
# free buffers no longer in use
tofree = this.FreeBuffersOnCommit.keys()
tofree.sort()
tofree.reverse()
for buffernumber in tofree:
this.deallocateBuffer(buffernumber)
# store new freelist head
this.setHeader()
this.fromfile.flush()
this.ResetBookkeeping()
#this.committing = False # debug
def Abort(this):
# destroy any materialized portion of the tree
if this.root is not None:
#print "destroying root for abort<br>"
this.root.Destroy()
this.root.owner = this
this.root.Clear()
#this.root = None
tofree = this.FreeBuffersOnAbort.keys()
for buffernumber in tofree:
this.deallocateBuffer(buffernumber)
freehead = this.freeHeadSeek
# reread header except for freelist head
this.readHeader()
if this.rootSeek==NULLBUFFERNUMBER:
this.root = None
else:
this.root = BplusNode(this, None, None, True)
this.root.LoadFromBuffer(this.rootSeek)
#print "<br>read header<br>", this.toHtml()
this.ResetBookkeeping()
this.freeHeadSeek = freehead
this.setHeader() # store new freelist head
this.fromfile.flush()
def ResetBookkeeping(this):
this.FreeBuffersOnAbort = {}
this.FreeBuffersOnCommit = {}
this.IdToTerminalNode = {}
this.TerminalNodeToId = {}
def allocateBuffer(this):
if this.freeHeadSeek==NULLBUFFERNUMBER:
#print "<br>allocating next buffer number"
return this.buffers.nextBufferNumber()
#print "<br>allocating free head"
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -