📄 taxon1.hpp
字号:
/* * =========================================================================== * PRODUCTION $Log: taxon1.hpp,v $ * PRODUCTION Revision 1000.1 2004/04/12 17:24:21 gouriano * PRODUCTION PRODUCTION: UPGRADED [CATCHUP_003] Dev-tree R1.16 * PRODUCTION * =========================================================================== */#ifndef NCBI_TAXON1_HPP#define NCBI_TAXON1_HPP/* $Id: taxon1.hpp,v 1000.1 2004/04/12 17:24:21 gouriano Exp $ * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's official duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Author: Vladimir Soussov, Michael Domrachev * * File Description: * NCBI Taxonomy information retreival library * */#include <objects/taxon1/taxon1__.hpp>#include <objects/seqfeat/seqfeat__.hpp>#include <serial/serialdef.hpp>#include <connect/ncbi_types.h>#include <corelib/ncbi_limits.hpp>#include <list>#include <vector>#include <map>BEGIN_NCBI_SCOPEclass CObjectOStream;class CConn_ServiceStream;BEGIN_objects_SCOPEclass COrgRefCache;class ITaxon1Node;class ITreeIterator;class NCBI_TAXON1_EXPORT CTaxon1 {public: typedef list< string > TNameList; typedef vector< int > TTaxIdList; CTaxon1(); ~CTaxon1(); //--------------------------------------------- // Taxon1 server init // Returns: TRUE - OK // FALSE - Can't open connection to taxonomy service /// bool Init(void); // default: 120 sec timeout, 5 reconnect attempts, // cache for 10 org-refs bool Init(unsigned cache_capacity); bool Init(const STimeout* timeout, unsigned reconnect_attempts=5, unsigned cache_capacity=10); //--------------------------------------------- // Taxon1 server fini (closes connection, frees memory) /// void Fini(void); //--------------------------------------------- // Get organism by tax_id // Returns: pointer to Taxon2Data if organism exists // NULL - if tax_id wrong // // NOTE: // Caller gets his own copy of Taxon2Data structure. /// CRef< CTaxon2_data > GetById(int tax_id); //---------------------------------------------- // Get organism by OrgRef // Returns: pointer to Taxon2Data if organism exists // NULL - if no such organism in taxonomy database // // NOTE: // 1. These functions uses the following data from inp_orgRef to find // organism in taxonomy database. It uses taxname first. If no organism // was found (or multiple nodes found) then it tryes to find organism // using common name. If nothing found, then it tryes to find organism // using synonyms. Lookup never uses tax_id to find organism. // 2. LookupMerge function modifies given OrgRef to correspond to the // found one and returns constant pointer to the Taxon2Data structure // stored internally. /// CRef< CTaxon2_data > Lookup(const COrg_ref& inp_orgRef); CConstRef< CTaxon2_data > LookupMerge(COrg_ref& inp_orgRef); //----------------------------------------------- // Get tax_id by OrgRef // Returns: tax_id - if organism found // 0 - no organism found // -tax_id - if multiple nodes found // (where -tax_id is id of one of the nodes) // NOTE: // This function uses the same information from inp_orgRef as Lookup /// int GetTaxIdByOrgRef(const COrg_ref& inp_orgRef); enum EOrgRefStatus { eStatus_Ok = 0, eStatus_WrongTaxId = 0x001, eStatus_WrongGC = 0x002, eStatus_WrongMGC = 0x004, eStatus_NoOrgname = 0x008, eStatus_WrongTaxname = 0x010, eStatus_WrongLineage = 0x020, eStatus_WrongCommonName = 0x040, eStatus_WrongOrgname = 0x080, eStatus_WrongDivision = 0x100, eStatus_WrongOrgmod = 0x200 }; typedef unsigned TOrgRefStatus; //----------------------------------------------- // Checks whether OrgRef is valid // Returns: false on any error, stat_out filled with status flags // (see above) /// bool CheckOrgRef( const COrg_ref& orgRef, TOrgRefStatus& stat_out ); enum ESearch { eSearch_Exact, eSearch_TokenSet, eSearch_WildCard, // shell-style wildcards, i.e. *,?,[] eSearch_Phonetic }; //---------------------------------------------- // Get tax_id by organism name // Returns: tax_id - if organism found // 0 - no organism found // -tax_id - if multiple nodes found // (where -tax_id is id of one of the nodes) /// int GetTaxIdByName(const string& orgname); //---------------------------------------------- // Get tax_id by organism "unique" name // Returns: tax_id - if organism found // 0 - no organism found // -tax_id - if multiple nodes found // (where -tax_id is id of one of the nodes) /// int FindTaxIdByName(const string& orgname); //---------------------------------------------- // Get tax_id by organism name using fancy search modes. If given a pointer // to the list of names then it'll return all found names (one name per // tax id). Previous content of name_list_out will be destroyed. // Returns: tax_id - if organism found // 0 - no organism found // -1 - if multiple nodes found /// int SearchTaxIdByName(const string& orgname, ESearch mode = eSearch_TokenSet, list< CRef< CTaxon1_name > >* name_list_out = NULL); //---------------------------------------------- // Get ALL tax_id by organism name // Returns: number of organisms found, id list appended with found tax ids /// int GetAllTaxIdByName(const string& orgname, TTaxIdList& lIds); //---------------------------------------------- // Get organism by tax_id // Returns: pointer to OrgRef structure if organism found // NULL - if no such organism in taxonomy database // NOTE: // This function does not make a copy of OrgRef structure but returns // pointer to internally stored OrgRef. /// CConstRef< COrg_ref > GetOrgRef(int tax_id, bool& is_species, bool& is_uncultured, string& blast_name); //--------------------------------------------- // Set mode for synonyms in OrgRef // Returns: previous mode // NOTE: // Default value: false (do not copy synonyms to the new OrgRef) /// bool SetSynonyms(bool on_off); //--------------------------------------------- // Get parent tax_id // Returns: tax_id of parent node or 0 if error // NOTE: // Root of the tree has tax_id of 1 /// int GetParent(int id_tax); //--------------------------------------------- // Get genus tax_id (id_tax should be below genus) // Returns: tax_id of genus or -1 if error or no genus in the lineage /// int GetGenus(int id_tax); //--------------------------------------------- // Get superkingdom tax_id (id_tax should be below superkingdom) // Returns: tax_id of superkingdom // or -1 if error or no superkingdom in the lineage /// int GetSuperkingdom(int id_tax); //--------------------------------------------- // Get taxids for all children of specified node. // Returns: number of children, id list appended with found tax ids /// int GetChildren(int id_tax, TTaxIdList& children_ids); //--------------------------------------------- // Get genetic code name by genetic code id /// bool GetGCName(short gc_id, string& gc_name_out ); //--------------------------------------------- // Get taxonomic rank name by rank id /// bool GetRankName(short rank_id, string& rank_name_out ); //--------------------------------------------- // Get taxonomic division name by division id /// bool GetDivisionName(short div_id, string& div_name_out ); //--------------------------------------------- // Get taxonomic name class name by name class id /// bool GetNameClass(short nameclass_id, string& class_name_out ); //--------------------------------------------- // Get the nearest common ancestor for two nodes // Returns: id of this ancestor (id == 1 means that root node only is // ancestor) int Join(int taxid1, int taxid2); //--------------------------------------------- // Get all names for tax_id // Returns: number of names, name list appended with ogranism's names // NOTE: // If unique is true then only unique names will be stored /// int GetAllNames(int tax_id, TNameList& lNames, bool unique); //--------------------------------------------- // Find out is taxonomy lookup system alive or not // Returns: TRUE - alive // FALSE - dead /// bool IsAlive(void); //-------------------------------------------------- // Get tax_id for given gi // Returns: // true if ok // false if error // tax_id_out contains: // tax_id if found // 0 if not found /// bool GetTaxId4GI(int gi, int& tax_id_out); //-------------------------------------------------- // Get "blast" name for id // Returns: false if some error (blast_name_out not changed) // true if Ok // blast_name_out contains first blast name at or above // this node in the lineage or empty if there is no blast // name above /// bool GetBlastName(int tax_id, string& blast_name_out); //-------------------------------------------------- // Get error message after latest erroneous operation // Returns: error message, or empty string if no error occurred /// const string& GetLastError() const { return m_sLastError; } //-------------------------------------------------- // This function constructs minimal common tree from the given tax id // set (ids_in) treated as tree's leaves. It then returns a residue of // this tree node set and the given tax id set in ids_out. // Returns: false if some error // true if Ok /// bool GetPopsetJoin( const TTaxIdList& ids_in, TTaxIdList& ids_out ); //-------------------------------------------------- // This function updates cached partial tree and insures that node // with given tax_id and all its ancestors will present in this tree. // Returns: false if error // true if Ok, *ppNode is pointing to the node /// bool LoadNode( int tax_id, const ITaxon1Node** ppNode = NULL ) { return LoadSubtreeEx( tax_id, 0, ppNode ); } //-------------------------------------------------- // This function updates cached partial tree and insures that node // with given tax_id and all its ancestors and immediate children (if any) // will present in this tree. // Returns: false if error // true if Ok, *ppNode is pointing to the subtree root /// bool LoadChildren( int tax_id, const ITaxon1Node** ppNode = NULL ) { return LoadSubtreeEx( tax_id, 1, ppNode ); } //-------------------------------------------------- // This function updates cached partial tree and insures that all nodes // from subtree with given tax_id as a root will present in this tree. // Returns: false if error // true if Ok, *ppNode is pointing to the subtree root /// bool LoadSubtree( int tax_id, const ITaxon1Node** ppNode = NULL ) { return LoadSubtreeEx( tax_id, -1, ppNode ); } enum EIteratorMode { eIteratorMode_FullTree, // Iterator in this mode traverses all tree nodes eIteratorMode_LeavesBranches, // traverses only leaves and branches eIteratorMode_Best, // leaves and branches plus nodes right below branches eIteratorMode_Blast, // nodes with non-empty blast names eIteratorMode_Default = eIteratorMode_FullTree }; //-------------------------------------------------- // This function returnes an iterator of a cached partial tree positioned // at the tree root. Please note that the tree is PARTIAL. To traverse the // full taxonomy tree invoke LoadSubtree(1) first.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -