📄 cache.cpp
字号:
/* * =========================================================================== * PRODUCTION $Log: cache.cpp,v $ * PRODUCTION Revision 1000.3 2004/06/01 19:35:08 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R6.21 * PRODUCTION * =========================================================================== *//* $Id: cache.cpp,v 1000.3 2004/06/01 19:35:08 gouriano Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's official duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Author: Vladimir Soussov, Michael Domrachev * * File Description: * NCBI Taxonomy information retreival library caching implementation * */#include <ncbi_pch.hpp>#include <objects/taxon1/taxon1.hpp>#include "cache.hpp"#include <vector>#include <algorithm>BEGIN_NCBI_SCOPEBEGIN_objects_SCOPECOrgRefCache::COrgRefCache( CTaxon1& host ) : m_host( host ), m_nCacheCapacity( 10 ){ return;}boolCOrgRefCache::Init( unsigned nCapacity ){ CTaxon1_req req; CTaxon1_resp resp; req.SetMaxtaxid(); if( m_host.SendRequest( req, resp ) ) { if( resp.IsMaxtaxid() ) { // Correct response, return object m_nMaxTaxId = resp.GetMaxtaxid(); m_nMaxTaxId += m_nMaxTaxId/10; m_ppEntries = new CTaxon1Node*[m_nMaxTaxId]; memset( m_ppEntries, '\0', m_nMaxTaxId*sizeof(*m_ppEntries) ); } else { // Internal: wrong respond type m_host.SetLastError( "Response type is not Maxtaxid" ); return false; } } else { return false; } CTaxon1_name* pNode = ( new CTaxon1_name ); pNode->SetTaxid( 1 ); pNode->SetOname().assign("root"); pNode->SetCde( 0x40000000 ); // Gene bank hidden CTaxon1Node* pRoot = new CTaxon1Node( CRef<CTaxon1_name>(pNode) ); m_tPartTree.SetRoot( pRoot ); SetIndexEntry( 1, pRoot ); if( nCapacity != 0 ) { m_nCacheCapacity = nCapacity; } InitRanks(); InitDivisions(); return true;}boolCOrgRefCache::Lookup( int tax_id, CTaxon1Node** ppNode ){ if( (unsigned)tax_id < m_nMaxTaxId ) { *ppNode = m_ppEntries[tax_id]; } else { *ppNode = NULL; } return *ppNode != NULL;}boolCOrgRefCache::LookupAndAdd( int tax_id, CTaxon1Node** ppData ){ *ppData = NULL; if( (unsigned)tax_id < m_nMaxTaxId ) { CTaxon1Node* pNode = ( m_ppEntries[tax_id] ); if( pNode ) { *ppData = pNode; return true; } else { // Add the entry from server CTaxon1_req req; CTaxon1_resp resp; req.SetTaxalineage( tax_id ); if( m_host.SendRequest( req, resp ) ) { if( resp.IsTaxalineage() ) { // Correct response, return object list< CRef<CTaxon1_name> >& lLin = resp.SetTaxalineage(); CTaxon1Node* pParent = 0; pNode = 0; // Check if this is a secondary node if( lLin.front()->GetTaxid() != tax_id ) { // Secondary node, try to get primary from index pNode = m_ppEntries[ lLin.front()->GetTaxid() ]; } if( !pNode ) { list< CRef< CTaxon1_name > >::reverse_iterator i; // Fill in storage for( i = lLin.rbegin(); i != lLin.rend(); ++i ) { if( !m_ppEntries[ (*i)->GetTaxid() ] ) { // Create node break; } else { pParent = m_ppEntries[ (*i)->GetTaxid() ]; } } // Create tree iterator CTreeIterator* pIt = ( m_tPartTree.GetIterator() ); if( !pParent ) { pParent = static_cast<CTaxon1Node*>(pIt->GetNode()); } pIt->GoNode( pParent ); for( ; i != lLin.rend(); ++i ) { pNode = new CTaxon1Node(*i); m_ppEntries[ pNode->GetTaxId() ] = pNode; pIt->AddChild( pNode ); pIt->GoNode( pNode ); } } else { // Store secondary in index m_ppEntries[ tax_id ] = pNode; } _ASSERT( pNode ); *ppData = pNode; return true; } else { // Internal: wrong respond type m_host.SetLastError( "Unable to get node lineage:\ Response type is not Taxalineage" ); return false; } } } } return false;}boolCOrgRefCache::LookupAndInsert( int tax_id, CTaxon1_data** ppData ){ CTaxon1Node* pNode = ( NULL ); *ppData = NULL; if( LookupAndAdd( tax_id, &pNode ) && pNode ) { SCacheEntry* pEntry = ( pNode->GetEntry() ); if( !pEntry ) { if( !Insert1( *pNode ) ) return false; pEntry = pNode->GetEntry(); } else { m_lCache.remove( pEntry ); m_lCache.push_front( pEntry ); } *ppData = pEntry->GetData1(); return true; } return false;}boolCOrgRefCache::LookupAndInsert( int tax_id, CTaxon2_data** ppData ){ CTaxon1Node* pNode = ( NULL ); *ppData = NULL; if( LookupAndAdd( tax_id, &pNode ) && pNode ) { SCacheEntry* pEntry = ( pNode->GetEntry() ); if( !pEntry ) { if( !Insert2( *pNode ) ) return false; pEntry = pNode->GetEntry(); } else { m_lCache.remove( pEntry ); m_lCache.push_front( pEntry ); } *ppData = pEntry->GetData2(); return true; } return false;}boolCOrgRefCache::Lookup( int tax_id, CTaxon1_data** ppData ){ if( (unsigned)tax_id < m_nMaxTaxId ) { CTaxon1Node* pNode = ( m_ppEntries[tax_id] ); SCacheEntry* pEntry; if( pNode && (pEntry=pNode->GetEntry()) ) { // Move in the list m_lCache.remove( pEntry ); m_lCache.push_front( pEntry ); *ppData = pEntry->GetData1(); return true; } } *ppData = NULL; return false;}boolCOrgRefCache::Lookup( int tax_id, CTaxon2_data** ppData ){ if( (unsigned)tax_id < m_nMaxTaxId ) { CTaxon1Node* pNode = ( m_ppEntries[tax_id] ); SCacheEntry* pEntry; if( pNode && (pEntry=pNode->GetEntry()) ) { // Move in the list m_lCache.remove( pEntry ); m_lCache.push_front( pEntry ); *ppData = pEntry->GetData2(); return true; } } *ppData = NULL; return false;}bools_BuildLineage( string& str, CTaxon1Node* pNode, unsigned sz, int sp_rank ){ if( !pNode->IsRoot() ) { if( pNode->GetRank() > sp_rank-1 ) { s_BuildLineage( str, pNode->GetParent(), 0, sp_rank ); return false; } else { if( pNode->IsGenBankHidden() ) { return s_BuildLineage( str, pNode->GetParent(), sz, sp_rank ); } bool bCont; bCont=s_BuildLineage( str, pNode->GetParent(), sz+pNode->GetName().size()+2, sp_rank ); if( bCont ) { str.append( pNode->GetName() ); if( sz != 0 ) { str.append( "; " ); } } return bCont; } } else { str.reserve( sz ); } return true;}string::size_types_AfterPrefix( const string& str1, const string& prefix ){ string::size_type pos(0); if( NStr::StartsWith( str1, prefix ) ) { pos += prefix.size(); } return str1.find_first_not_of( " \t\n\r", pos );}static const char s_achSubsp[] = "subsp.";static const char s_achSsp[] = "ssp.";static const char s_achF_Sp[] = "f. sp.";static const char s_achFSp[] = "f.sp.";static const char s_achStr[] = "str.";static const char s_achSubstr[] = "substr.";static const char s_achVar[] = "var.";static const char s_achSv[] = "sv.";static const char s_achCv[] = "cv.";static const char s_achPv[] = "pv.";static const char s_achBv[] = "bv.";static const char s_achF[] = "f.";static const char s_achFo[] = "fo.";static const char s_achGrp[] = "grp.";struct SSubtypeAbbr { const char* m_pchAbbr; size_t m_nAbbrLen; COrgMod::ESubtype m_eSubtype;};static SSubtypeAbbr s_aSubtypes[] = { { s_achSubsp, sizeof(s_achSubsp)-1, COrgMod::eSubtype_sub_species }, { s_achSsp, sizeof(s_achSsp)-1, COrgMod::eSubtype_sub_species }, { s_achF_Sp, sizeof(s_achF_Sp)-1, COrgMod::eSubtype_forma_specialis }, { s_achFSp, sizeof(s_achFSp)-1, COrgMod::eSubtype_forma_specialis }, { s_achStr, sizeof(s_achStr)-1, COrgMod::eSubtype_strain }, { s_achSubstr,sizeof(s_achSubstr)-1,COrgMod::eSubtype_substrain }, { s_achVar, sizeof(s_achVar)-1, COrgMod::eSubtype_variety }, { s_achSv, sizeof(s_achSv)-1, COrgMod::eSubtype_serovar }, { s_achCv, sizeof(s_achCv)-1, COrgMod::eSubtype_cultivar }, { s_achPv, sizeof(s_achPv)-1, COrgMod::eSubtype_pathovar }, { s_achBv, sizeof(s_achBv)-1, COrgMod::eSubtype_biovar }, { s_achF, sizeof(s_achF)-1, COrgMod::eSubtype_forma }, { s_achFo, sizeof(s_achFo)-1, COrgMod::eSubtype_forma }, { s_achGrp, sizeof(s_achGrp)-1, COrgMod::eSubtype_group }, { NULL, 0, COrgMod::eSubtype_other }};static ints_NofTokens( const string& s ){ int nof = 0; char first, last, c; int bracket_level, token; if( !s.empty() ) { string::size_type pos = 0; while( pos < s.size() ) { bracket_level= 0; token = 0; do { // Skip heading white space first= s[pos++]; } while( (isspace(first) || iscntrl(first)) && pos < s.size() ); switch( first ) { case '"': last= '"'; break; case '(': last= ')'; break; case '{': last= '}'; break; case '[': last= ']'; break; default: last= 0; break; } for(; pos < s.size(); ++pos) { c = s[pos]; if( !isalnum(c) ) { if( last == 0 ) { if( first == c ) { ++bracket_level; } if( last == c && (!bracket_level--) ) { ++pos; break; } } else { if( c == '.' || isspace(c) || iscntrl(c) ) { ++pos; break; } } } else { token = 1; } } nof += token; } } return nof;}COrgMod::ESubtypeCOrgRefCache::GetSubtypeFromName( string& sName ){ static const string s_sSubspCf( "subsp. cf." ); static const string s_sSubspAff( "subsp. aff." ); string::size_type pos; if( sName.find('.') == string::npos ) { return COrgMod::eSubtype_other; } /* ignore subsp. cf. and subsp. aff. */ if( NStr::FindNoCase( sName, s_sSubspCf ) != string::npos ) { return COrgMod::eSubtype_other; } if( NStr::FindNoCase( sName, s_sSubspAff ) != string::npos ) { return COrgMod::eSubtype_other; } /* check for subsp */ SSubtypeAbbr* pSubtypeAbbr = &s_aSubtypes[0]; while( pSubtypeAbbr->m_eSubtype != COrgMod::eSubtype_other ) { if( (pos=NStr::FindNoCase( sName, string(pSubtypeAbbr->m_pchAbbr, pSubtypeAbbr->m_nAbbrLen) )) != NPOS ) { sName.erase( pos, pSubtypeAbbr->m_nAbbrLen ); sName = NStr::TruncateSpaces( sName, NStr::eTrunc_Begin ); if( pSubtypeAbbr->m_eSubtype == COrgMod::eSubtype_sub_species && s_NofTokens( sName ) != 1 ) { break; // Return other } return pSubtypeAbbr->m_eSubtype; } ++pSubtypeAbbr; } return COrgMod::eSubtype_other;}boolCOrgRefCache::BuildOrgModifier( CTaxon1Node* pNode, COrgName& on, CTaxon1Node* pParent ){ CTaxon1Node* pTmp; CRef<COrgMod> pMod( new COrgMod ); if( !pParent && !pNode->IsRoot() ) { pTmp = pNode->GetParent(); while( !pTmp->IsRoot() ) { int prank = pTmp->GetRank(); if((prank == GetSubspeciesRank()) || (prank == GetSpeciesRank()) || (prank == GetGenusRank())) { pParent = pTmp; break; } pTmp = pTmp->GetParent(); } } string::size_type pos = 0; if( pParent ) { // Get rid of parent prefix pos = s_AfterPrefix( pNode->GetName(), pParent->GetName() ); } pMod->SetSubname().assign( pNode->GetName(), pos, pNode->GetName().size()-pos ); pMod->SetSubtype( GetSubtypeFromName( pMod->SetSubname() ) ); if( pMod->GetSubtype() == COrgMod_Base::eSubtype_sub_species && pNode->GetRank() != GetSubspeciesRank() ) { pMod->SetSubtype( COrgMod_Base::eSubtype_other ); } if( pMod->GetSubtype() == COrgMod_Base::eSubtype_other ) { int rank = pNode->GetRank(); if( rank == GetSubspeciesRank() ) { if( s_NofTokens( pNode->GetName() ) == 3 ) { // Assign only "Name1 ssp. Name2" or "Name1 subsp. Name2" pMod->SetSubtype( COrgMod_Base::eSubtype_sub_species ); } } else if( rank == GetVarietyRank() ) { pMod->SetSubtype( COrgMod_Base::eSubtype_variety ); } else if( rank == GetFormaRank() ) { pMod->SetSubtype( COrgMod_Base::eSubtype_forma ); } else if( pParent && pParent->GetRank() == GetSubspeciesRank() ) { pMod->SetSubtype( COrgMod_Base::eSubtype_strain ); } else { // Do not insert invalid modifier return false; } } // Store it into list on.SetMod().push_back( pMod ); return true;}boolCOrgRefCache::SetBinomialName( CTaxon1Node& node, COrgName& on ){ CTaxon1Node* pSpec = ( NULL ); CTaxon1Node* pSubspec = ( NULL ); CTaxon1Node* pGenus = ( NULL );
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -