📄 taxon1.cpp
字号:
/* * =========================================================================== * PRODUCTION $Log: taxon1.cpp,v $ * PRODUCTION Revision 1000.3 2004/06/01 19:35:15 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R6.25 * PRODUCTION * =========================================================================== *//* $Id: taxon1.cpp,v 1000.3 2004/06/01 19:35:15 gouriano Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's official duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Author: Vladimir Soussov, Michael Domrachev * * File Description: * NCBI Taxonomy information retreival library implementation * */#include <ncbi_pch.hpp>#include <corelib/ncbistr.hpp>#include <objects/taxon1/taxon1.hpp>#include <objects/seqfeat/seqfeat__.hpp>#include <connect/ncbi_conn_stream.hpp>#include <serial/serial.hpp>#include <serial/enumvalues.hpp>#include <serial/objistr.hpp>#include <serial/objostr.hpp>#include <algorithm>#include "cache.hpp"BEGIN_NCBI_SCOPEBEGIN_objects_SCOPE // namespace ncbi::objects::static const char s_achInvalTaxid[] = "Invalid tax id specified";CTaxon1::CTaxon1() : m_pServer(NULL), m_pOut(NULL), m_pIn(NULL), m_plCache(NULL), m_bWithSynonyms(false){ return;}CTaxon1::~CTaxon1(){ Reset();}voidCTaxon1::Reset(){ SetLastError(NULL); delete m_pIn; delete m_pOut; delete m_pServer; m_pIn = NULL; m_pOut = NULL; m_pServer = NULL; delete m_plCache; m_plCache = NULL;}boolCTaxon1::Init(void){ static const STimeout def_timeout = { 120, 0 }; return CTaxon1::Init(&def_timeout);}boolCTaxon1::Init(unsigned cache_capacity){ static const STimeout def_timeout = { 120, 0 }; return CTaxon1::Init(&def_timeout, 5, cache_capacity);}boolCTaxon1::Init(const STimeout* timeout, unsigned reconnect_attempts, unsigned cache_capacity){ SetLastError(NULL); if( m_pServer ) { // Already inited SetLastError( "ERROR: Init(): Already initialized" ); return false; } try { // Open connection to Taxonomy service CTaxon1_req req; CTaxon1_resp resp; if ( timeout ) { m_timeout_value = *timeout; m_timeout = &m_timeout_value; } else { m_timeout = 0; } m_nReconnectAttempts = reconnect_attempts; m_pchService = "TaxService"; const char* tmp; if( ( (tmp=getenv("NI_TAXONOMY_SERVICE_NAME")) != NULL ) || ( (tmp=getenv("NI_SERVICE_NAME_TAXONOMY")) != NULL ) ) { m_pchService = tmp; } auto_ptr<CObjectOStream> pOut; auto_ptr<CObjectIStream> pIn; auto_ptr<CConn_ServiceStream> pServer( new CConn_ServiceStream(m_pchService, fSERV_Any, 0, 0, m_timeout) );#ifdef USE_TEXT_ASN m_eDataFormat = eSerial_AsnText;#else m_eDataFormat = eSerial_AsnBinary;#endif pOut.reset( CObjectOStream::Open(m_eDataFormat, *pServer) ); pIn.reset( CObjectIStream::Open(m_eDataFormat, *pServer) ); req.SetInit(); m_pServer = pServer.release(); m_pIn = pIn.release(); m_pOut = pOut.release(); if( SendRequest( req, resp ) ) { if( resp.IsInit() ) { // Init is done m_plCache = new COrgRefCache( *this ); if( m_plCache->Init( cache_capacity ) ) { return true; } delete m_plCache; m_plCache = NULL; } else { // Set error SetLastError( "ERROR: Response type is not Init" ); } } } catch( exception& e ) { SetLastError( e.what() ); } // Clean streams delete m_pIn; delete m_pOut; delete m_pServer; m_pIn = NULL; m_pOut = NULL; m_pServer = NULL; return false;}voidCTaxon1::Fini(void){ SetLastError(NULL); if( m_pServer ) { CTaxon1_req req; CTaxon1_resp resp; req.SetFini(); if( SendRequest( req, resp ) ) { if( !resp.IsFini() ) { SetLastError( "Response type is not Fini" ); } } } Reset();}CRef< CTaxon2_data >CTaxon1::GetById(int tax_id){ SetLastError(NULL); if( tax_id > 0 ) { // Check if this taxon is in cache CTaxon2_data* pData = 0; if( m_plCache->LookupAndInsert( tax_id, &pData ) && pData ) { CTaxon2_data* pNewData = new CTaxon2_data(); SerialAssign<CTaxon2_data>( *pNewData, *pData ); return CRef<CTaxon2_data>(pNewData); } } else { SetLastError( s_achInvalTaxid ); } return CRef<CTaxon2_data>(NULL);}class PFindMod {public: void SetModToMatch( const CRef< COrgMod >& mod ) { CanonizeName( mod->GetSubname(), m_sName ); m_nType = mod->GetSubtype(); } bool operator()( const CRef< COrgMod >& mod ) const { if( m_nType == mod->GetSubtype() ) { string sCanoName; CanonizeName( mod->GetSubname(), sCanoName ); return ( sCanoName == m_sName ); } return false; } void CanonizeName( const string& in, string& out ) const { bool bSpace = true; char prevc = '\0'; for( size_t i = 0; i < in.size(); ++i ) { if( bSpace ) { if( !isspace(in[i]) ) { bSpace = false; if( prevc ) out += tolower(prevc); prevc = in[i]; } } else { if( prevc ) out += tolower(prevc); if( isspace(in[i]) ) { prevc = ' '; bSpace = true; } else { prevc = in[i]; } } } if( prevc && prevc != ' ' ) out += tolower(prevc); }private: string m_sName; int m_nType;};class PFindConflict {public: void SetTypeToMatch( int type ) { m_nType = type; switch( type ) { case COrgMod::eSubtype_strain: case COrgMod::eSubtype_variety: //case COrgMod::eSubtype_sub_species: m_bSubSpecType = true; break; default: m_bSubSpecType = false; break; } } bool operator()( const CRef< COrgMod >& mod ) const { // mod is the destination modifier if( m_nType == mod->GetSubtype() ) { return true; } switch( mod->GetSubtype() ) { case COrgMod::eSubtype_strain: case COrgMod::eSubtype_substrain: case COrgMod::eSubtype_type: case COrgMod::eSubtype_subtype: case COrgMod::eSubtype_variety: case COrgMod::eSubtype_serotype: case COrgMod::eSubtype_serogroup: case COrgMod::eSubtype_serovar: case COrgMod::eSubtype_cultivar: case COrgMod::eSubtype_pathovar: case COrgMod::eSubtype_chemovar: case COrgMod::eSubtype_biovar: case COrgMod::eSubtype_biotype: case COrgMod::eSubtype_group: case COrgMod::eSubtype_subgroup: case COrgMod::eSubtype_isolate:// case COrgMod::eSubtype_sub_species: return m_bSubSpecType;// if( (m_nType >= 2 && m_nType <= 17) ) return 1; case COrgMod::eSubtype_other: return true; default: break; } return false; }private: int m_nType; bool m_bSubSpecType;};class PFindModByType {public: PFindModByType( int type ) : m_nType( type ) {} bool operator()( const CRef< COrgMod >& mod ) const { return ( m_nType == mod->GetSubtype() ); }private: int m_nType;};class PRemoveSynAnamorph {public: PRemoveSynAnamorph( const string& sTaxname ) : m_sName( sTaxname ) {} bool operator()( const CRef< COrgMod >& mod ) const { switch( mod->GetSubtype() ) { case COrgMod::eSubtype_synonym: case COrgMod::eSubtype_anamorph: return (NStr::CompareNocase( m_sName, mod->GetSubname() ) == 0); default: break; } return false; }private: const string& m_sName;};voidCTaxon1::OrgRefAdjust( COrg_ref& inp_orgRef, const COrg_ref& db_orgRef, int tax_id ){ inp_orgRef.ResetCommon(); inp_orgRef.ResetSyn(); // fill-up inp_orgRef based on db_orgRef inp_orgRef.SetTaxname( db_orgRef.GetTaxname() ); if( db_orgRef.IsSetCommon() ) { inp_orgRef.SetCommon( db_orgRef.GetCommon() ); } // Set tax id inp_orgRef.SetTaxId( tax_id ); // copy the synonym list if( m_bWithSynonyms && db_orgRef.IsSetSyn() ) { inp_orgRef.SetSyn() = db_orgRef.GetSyn(); } // copy orgname COrgName& on = inp_orgRef.SetOrgname(); // Copy the orgname on.SetName().Assign( db_orgRef.GetOrgname().GetName() ); bool bHasMod = on.IsSetMod(); const COrgName::TMod& lSrcMod = db_orgRef.GetOrgname().GetMod(); COrgName::TMod& lDstMod = on.SetMod(); if( bHasMod ) { // Merge modifiers // Find and remove gb_xxx modifiers // tc2proc.c: CleanOrgName // Service stuff CTaxon1_req req; CTaxon1_resp resp; CRef<CTaxon1_info> pModInfo( new CTaxon1_info() ); PushDiagPostPrefix( "Taxon1::OrgRefAdjust" ); for( COrgName::TMod::iterator i = lDstMod.begin(); i != lDstMod.end(); ) { switch( (*i)->GetSubtype() ) { case COrgMod::eSubtype_gb_acronym: case COrgMod::eSubtype_gb_anamorph: case COrgMod::eSubtype_gb_synonym: i = lDstMod.erase( i ); break; default: // Check the modifier validity if( (*i)->CanGetSubname() && (*i)->CanGetSubtype() && !(*i)->GetSubname().empty() && (*i)->GetSubtype() != 0 ) { pModInfo->SetIval1( tax_id ); pModInfo->SetIval2( (*i)->GetSubtype() ); pModInfo->SetSval( (*i)->GetSubname() ); req.SetGetorgmod( *pModInfo ); try { if( SendRequest( req, resp ) ) { if( !resp.IsGetorgmod() ) { // error ERR_POST( "Response type is not Getorgmod" ); } else { if( resp.GetGetorgmod().size() > 0 ) { CRef<CTaxon1_info> pInfo = resp.GetGetorgmod().front(); if( pInfo->GetIval1() == tax_id ) { if( pInfo->GetIval2() == 0 ) { // Modifier is wrong (probably, hidden) i = lDstMod.erase( i ); continue; } else { (*i)->SetSubname( pInfo->GetSval() ); (*i)->SetSubtype( COrgMod::TSubtype( pInfo->GetIval2() ) ); } } else if( pInfo->GetIval1() != 0 ) { // Another redirection occurred // leave modifier but issue warning NCBI_NS_NCBI::CNcbiDiag(eDiag_Warning) << "OrgMod type=" << COrgMod::GetTypeInfo_enum_ESubtype() ->FindName( (*i)->GetSubtype(), true ) << " name='" << (*i)->GetSubname() << "' causing illegal redirection" << NCBI_NS_NCBI::Endm; } } } } else if( resp.IsError() && resp.GetError().GetLevel() != CTaxon1_error::eLevel_none ) { string sErr; resp.GetError().GetErrorText( sErr ); ERR_POST( sErr ); } } catch( exception& e ) { ERR_POST( e.what() ); } } ++i; break; } } PopDiagPostPrefix(); PFindConflict predConflict; for( COrgName::TMod::const_iterator i = lSrcMod.begin(); i != lSrcMod.end(); ++i ) { predConflict.SetTypeToMatch( (*i)->GetSubtype() ); if( (*i)->GetSubtype() != COrgMod::eSubtype_other ) { if( find_if( lDstMod.begin(), lDstMod.end(), predConflict ) == lDstMod.end() ) { CRef<COrgMod> pMod( new COrgMod() ); pMod->Assign( *(*i) ); lDstMod.push_back( pMod ); } } } } else { // Copy modifiers CRef<COrgMod> pMod; for( COrgName::TMod::const_iterator i = lSrcMod.begin(); i != lSrcMod.end(); ++i ) { switch( (*i)->GetSubtype() ) { case COrgMod::eSubtype_gb_acronym: case COrgMod::eSubtype_gb_anamorph: case COrgMod::eSubtype_gb_synonym: pMod.Reset( new COrgMod() ); pMod->Assign( *(*i) ); lDstMod.push_back( pMod ); default: break; } } // Remove 'other' modifiers PFindModByType fmbt( COrgMod::eSubtype_other ); remove_if( lDstMod.begin(), lDstMod.end(), fmbt ); } // Remove 'synonym' or 'anamorph' it if coincides with taxname PRemoveSynAnamorph rsa( inp_orgRef.GetTaxname() ); remove_if( lDstMod.begin(), lDstMod.end(), rsa ); // Reset destination modifiers if empty if( lDstMod.size() == 0 ) { on.ResetMod(); } // Copy lineage if( db_orgRef.GetOrgname().IsSetLineage() ) { on.SetLineage() = db_orgRef.GetOrgname().GetLineage(); } else { on.ResetLineage(); } if( db_orgRef.GetOrgname().IsSetGcode() ) { on.SetGcode( db_orgRef.GetOrgname().GetGcode() ); } else {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -