📄 pk_rangekey.cxx
字号:
/* * Copyright (C) 1998, 1999, 2001, Jonathan S. Shapiro. * * This file is part of the EROS Operating System. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2, * or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */#include <kerninc/kernel.hxx>#include <kerninc/Check.hxx>#include <kerninc/Key.hxx>#include <kerninc/Invocation.hxx>#include <kerninc/BlockDev.hxx>#include <kerninc/ObjectCache.hxx>#include <disk/DiskNode.hxx>#include <disk/DiskFrame.hxx>#include <eros/Invoke.h>#include <eros/StdKeyType.h>#include <eros/RangeKey.h>/* * There is a problem with range keys that is pretty well * fundamental. In a nutshell, the key representation only gives us * 112 bits of storage (3*32 + 16 in keyInfo), and we need to * represent 128 bits of information (base and bound). * * In the final analysis, there are really only two viable solutions: * * 1. You can restrict the range of valid OIDs to something that fits. * This means 56 bits/oid. Since 4 bits (minimum) are used for the * object index, this would give us 2^52 page frames, or (assuming * 4k pages) 2^64 bytes of addressable storage. * * 2. You can restrict range keys to a representable subrange, and * require some applications to manage multiple range keys. This * would give base=64 bits and length=48 bits, using the * keyInfo field for the extra 16 bits of the length. At the * moment, I'm restricting things to 32 bits of representable range * and avoiding the keyInfo field. * * 3. You can introduce a "superrange" capability that uses a * distinctive key type to actually cover the full range. * * I have chosen to combine options (2) and (3), but this results in a * secondary problem: the "find first subrange" operation may someday * actually find a subrange whose length cannot be represented in 48 * bits, and it's not immediately clear what it should do in this * case. I have chosen to implement "find first subrange" as "find * first reportable subrange". If the first subrange is 2^32 objects * or larger, then the reported first subrange will be of length * (2^32-1). The caller can recognize this case by performing: * * top = range_query(range_key); * (base, bound) = find_first_subrange(range_key); * new_range_key = make_subrange(range_key, bound, top); * (base2, bound2) = find_first_subrange(new_range_key); * * check if base2 == 0. * * I cannot imagine this actually arising much in the next several * years, though current (year 2001) disk drives are currently * implementing on the close order of 2^25 pages. This takes a minimum * of 2^29 oids (16 bits for object index), so I expect we will blow * past the 2^32 oid limit within the next year or two and I will need * to eat into the keyInfo field. I'm deferring that primarily for * time reasons. *//* #define DEBUG_RANGEKEY */voidRangeKey(Invocation& inv){ OID start = inv.key->rk.oid; OID end = inv.key->rk.oid + inv.key->rk.count; if (inv.key->keyType == KT_PrimeRange) { start = 0ll; end = (UINT64_MAX * EROS_OBJECTS_PER_FRAME) + EROS_NODES_PER_FRAME; } else if (inv.key->keyType == KT_PhysRange) { start = OID_RESERVED_PHYSRANGE; end = (UINT64_MAX * EROS_OBJECTS_PER_FRAME); } range_off_t range = end - start; switch(inv.entry.code) { case OC_KeyType: COMMIT_POINT(); /* Notice that this returns AKT_Range for both the prime range key * and the more conventional range key. This is actually correct, * because both keys obey identical interfaces. The prime range * key can be distinguished by the fact that it's reported range * from OC_RANGE_QUERY is large. */ inv.exit.code = RC_OK; inv.exit.w1 = AKT_Range; return; case OC_Range_Query: { COMMIT_POINT(); inv.exit.w1 = range; inv.exit.w2 = (fixreg_t) (range >> 32); inv.exit.code = RC_OK; return; } case OC_Range_Next_Subrange: { COMMIT_POINT(); OID subStart; OID subEnd; OID startOffset = inv.entry.w2; startOffset <<= 32; startOffset |= inv.entry.w1; startOffset += start; if (startOffset >= end) { inv.exit.code = RC_Range_RangeErr; return; } /* FIX: This only works for 32-bit subranges */ ObjectCache::FindFirstSubrange(startOffset, end, subStart, subEnd); range = subEnd - subStart; if (range >= (uint64_t) UINT32_MAX) range = UINT32_MAX; subStart -= start; inv.exit.w1 = subStart; inv.exit.w2 = (fixreg_t) (subStart >> 32); inv.exit.w3 = range; inv.exit.code = RC_OK; return; } case OC_Range_Make_Subrange: { COMMIT_POINT(); /* This implementation allows for 64 bit offsets, but only 32 * bit limits, which is nuts! */ OID newStart = inv.entry.w2; newStart <<= 32; newStart |= inv.entry.w1; newStart += start; /* This is not an issue with the broken interface, but with the * 48 bit length representation and a fixed interface we could * get caught here by a representable bounds problem. */ OID newLen = inv.entry.w3; newLen = min(newLen, UINT32_MAX); OID newEnd = newLen + newStart; /* REMEMBER: malicious arithmetic might wrap! */ if ((newStart < start) || (newStart >= end) || (newEnd <= start) || (newEnd > end)) { inv.exit.code = RC_Range_RangeErr; return; } Key* key = inv.exit.pKey[0]; inv.flags |= INV_EXITKEY0; if (key) { /* Unchain the old key so we can overwrite it... */ if (key) key->NH_Unchain(); key->InitType(KT_Range); key->rk.oid = newStart; key->rk.count = newLen; } inv.exit.code = RC_OK; return; } case OC_Range_Identify: { /* Key to identify is in slot 0 */ Key& key = *inv.entry.key[0]; key.Prepare(); COMMIT_POINT(); inv.exit.code = RC_OK; if (key.IsType(KT_Page) == false && key.IsType(KT_Node) == false) { inv.exit.code = RC_Range_RangeErr; return; }#if 0 /* There is no reason why these should fail to identify. */ if (key.keyData & SEGMODE_ATTRIBUTE_MASK) { inv.exit.code = RC_RequestError; return; }#endif OID oid = key.GetKeyOid(); if ( oid < start ) inv.exit.code = RC_Range_RangeErr; else if ( oid >= end ) inv.exit.code = RC_Range_RangeErr; else range = oid - start; /* ALERT: output convention depends on register size! */ if (key.IsType(KT_Page)) inv.exit.w1 = OT_DataPage; else if (key.IsType(KT_Node)) inv.exit.w1 = OT_Node; else inv.exit.w1 = OT_INVALID; /* FIX: there is a register size assumption here! */ assert (sizeof(range) == sizeof(uint64_t)); assert (sizeof(inv.exit.w2) == sizeof(uint32_t) || sizeof(inv.exit.w2) == sizeof(uint64_t)); inv.exit.w2 = range; if (sizeof(inv.exit.w2) != sizeof(range_off_t)) inv.exit.w3 = (range >> 32); else inv.exit.w3 = 0; return; } case OC_Range_Rescind: { Key& key = *inv.entry.key[0]; key.Prepare(); inv.exit.code = RC_OK; if (key.IsType(KT_Page) == false && key.IsType(KT_Node) == false) { inv.exit.code = RC_Range_RangeErr; COMMIT_POINT(); return; } if (key.keyData & SEGMODE_ATTRIBUTE_MASK) { inv.exit.code = RC_RequestError; COMMIT_POINT(); return; } OID oid = key.GetKeyOid(); #ifdef DEBUG_PAGERANGEKEY dprintf(true, "Rescinding page OID 0x%08x%08x\n", (uint32_t) (oid>>32), (uint32_t) oid);#endif if ( oid < start ) { inv.exit.code = RC_Range_RangeErr; COMMIT_POINT(); return; } else if ( oid >= end ) { inv.exit.code = RC_Range_RangeErr; COMMIT_POINT(); return; } ObjectHeader* pObject = key.GetObjectPtr(); pObject->FlushIfCkpt(); pObject->MakeObjectDirty(); COMMIT_POINT(); pObject->Rescind();#ifdef DBG_WILD_PTR if (dbg_wild_ptr) Check::Consistency("Object rescind()");#endif return; } case OC_Range_WaitObKey: case OC_Range_GetObKey: { inv.exit.code = RC_OK; /* set the exit code */ uint64_t offset = (((uint64_t) inv.entry.w3) << 32) | ((uint64_t) inv.entry.w2); /* Figure out the OID for the new key: */ OID oid = start + offset; if (oid > end) { /* comparing ranges: INclusive */ dprintf(true, "oid 0x%X top 0x%X\n", oid, end); inv.exit.code = RC_Range_RangeErr; return; } uint32_t obNdx = offset % EROS_OBJECTS_PER_FRAME; ObjectHeader *pObj = 0; Key* key = inv.exit.pKey[0]; inv.flags |= INV_EXITKEY0; if ((inv.entry.code == OC_Range_GetObKey) && !ObjectCache::HaveSource(oid)) { COMMIT_POINT(); inv.exit.code = RC_Range_RangeErr; return; } switch(inv.entry.w1) { case OT_DataPage: { if (obNdx != 0) { inv.exit.code = RC_Range_RangeErr; return; } uint8_t kt = KT_Page; /* If we don't get the object back, it's because of bad frame * type: */ pObj = ObjectCache::GetObject(oid, ObType::PtDataPage, 0, false); assert(inv.CanCommit()); assert(pObj); COMMIT_POINT(); /* It's definitely an object key. Pin the object it names. */ pObj->TransLock(); if (key) { /* Unchain the old key so we can overwrite it... */ if (key) key->NH_Unchain(); key->InitType(kt); key->SetPrepared(); key->keyData = EROS_PAGE_BLSS; } break; } case OT_Node: if (obNdx >= DISK_NODES_PER_PAGE) { inv.exit.code = RC_Range_RangeErr; return; } /* If we don't get the object back, it's because of bad frame * type: */ pObj = ObjectCache::GetObject(oid, ObType::NtUnprepared, 0, false); assert(inv.CanCommit()); assert(pObj); COMMIT_POINT(); /* It's definitely an object key. Pin the object it names. */ pObj->TransLock(); if (key) { /* Unchain the old key so we can overwrite it... */ if (key) key->NH_Unchain(); key->InitType(KT_Node); key->SetPrepared(); } break; } if (key) { key->ok.pObj = pObj; /* Link as next key after object */ key->ok.pObj = pObj; key->ok.next = pObj->kr.next; key->ok.prev = (KeyRing *) pObj; pObj->kr.next = (KeyRing*) key; key->ok.next->prev = (KeyRing*) key; }#ifdef DEBUG_PAGERANGEKEY dprintf(true, "pObject is 0x%08x\n", pObj);#endif return; } case OC_Range_Compare: { Key& key = *inv.entry.key[0]; key.Prepare(); COMMIT_POINT(); inv.exit.code = RC_OK; inv.exit.w1 = 0; /* no overlap until proven otherwise */ if (!key.IsType(KT_Range) && !key.IsType(KT_PrimeRange)) return; /* RC_OK in this case probably wrong thing. */ OID kstart = key.rk.oid; OID kend = key.rk.oid + key.rk.count; if (key.keyType == KT_PrimeRange) { kstart = 0ll; kend = UINT64_MAX; } if (kstart >= end || kend <= start) return; /* They overlap; need to figure out how. */ inv.exit.w1 = 1; inv.exit.w3 = 0; if (kstart < start) { inv.exit.w1 = 3; inv.exit.w2 = (fixreg_t) (start - kstart); if (sizeof(inv.exit.w2) == sizeof(uint32_t)) /* 32 bit system */ inv.exit.w3 = (fixreg_t) ((start - kstart) >> 32); } else if (kstart == start) { inv.exit.w1 = 1; inv.exit.w2 = 0; } else { inv.exit.w1 = 2; inv.exit.w2 = (fixreg_t) (kstart - start); if (sizeof(inv.exit.w2) == sizeof(uint32_t)) /* 32 bit system */ inv.exit.w3 = (fixreg_t) ((kstart - start) >> 32); } return; } default: COMMIT_POINT(); inv.exit.code = RC_UnknownRequest; return; } return;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -