reference_item.cpp
来自「ncbi源码」· C++ 代码 · 共 1,302 行 · 第 1/3 页
CPP
1,302 行
/* * =========================================================================== * PRODUCTION $Log: reference_item.cpp,v $ * PRODUCTION Revision 1000.2 2004/06/01 19:45:23 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.18 * PRODUCTION * =========================================================================== *//* $Id: reference_item.cpp,v 1000.2 2004/06/01 19:45:23 gouriano Exp $* ===========================================================================** PUBLIC DOMAIN NOTICE* National Center for Biotechnology Information** This software/database is a "United States Government Work" under the* terms of the United States Copyright Act. It was written as part of* the author's official duties as a United States Government employee and* thus cannot be copyrighted. This software/database is freely available* to the public for use. The National Library of Medicine and the U.S.* Government have not placed any restriction on its use or reproduction.** Although all reasonable efforts have been taken to ensure the accuracy* and reliability of the software and data, the NLM and the U.S.* Government do not and cannot warrant the performance or results that* may be obtained by using this software or data. The NLM and the U.S.* Government disclaim all warranties, express or implied, including* warranties of performance, merchantability or fitness for any particular* purpose.** Please cite the author in any work or product based on this material.** ===========================================================================** Author: Aaron Ucko, NCBI* Mati Shomrat** File Description:* flat-file generator -- bibliographic references** ===========================================================================*/#include <ncbi_pch.hpp>#include <corelib/ncbistd.hpp>#include <serial/iterator.hpp>#include <util/static_set.hpp>#include <objects/biblio/biblio__.hpp>#include <objects/general/Name_std.hpp>#include <objects/general/Person_id.hpp>#include <objects/medline/Medline_entry.hpp>#include <objects/pub/Pub.hpp>#include <objects/pub/Pub_equiv.hpp>#include <objects/pub/Pub_set.hpp>#include <objects/seqloc/Patent_seq_id.hpp>#include <objects/seq/Bioseq.hpp>#include <objects/seq/Seqdesc.hpp>#include <objects/seqfeat/Seq_feat.hpp>#include <objects/seqfeat/SeqFeatData.hpp>#include <objects/biblio/Imprint.hpp>#include <objmgr/util/sequence.hpp>#include <algorithm>#include <objtools/format/text_ostream.hpp>#include <objtools/format/formatter.hpp>#include <objtools/format/items/reference_item.hpp>#include <objtools/format/context.hpp>#include "utils.hpp"BEGIN_NCBI_SCOPEBEGIN_SCOPE(objects)USING_SCOPE(sequence);///////////////////////////////////////////////////////////////////////////////// LessEqual - predicate class for sorting referencesclass LessEqual{public: LessEqual(bool serial_first, bool is_refseq); bool operator()(const CRef<CReferenceItem>& ref1, const CRef<CReferenceItem>& ref2);private: bool m_SerialFirst; bool m_IsRefSeq;};/////////////////////////////////////////////////////////////////////////////void CReferenceItem::FormatAffil(const CAffil& affil, string& result){ if (affil.IsStr()) { result = affil.GetStr(); } else { result.erase(); const CAffil::C_Std& std = affil.GetStd(); if (std.IsSetDiv()) { result = std.GetDiv(); } if (std.IsSetAffil()) { if (!result.empty()) { result += ", "; } result += std.GetAffil(); } if (std.IsSetStreet()) { if (!result.empty()) { result += ", "; } result += std.GetStreet(); } if (std.IsSetCity()) { if (!result.empty()) { result += ", "; } result += std.GetCity(); } if (std.IsSetSub()) { if (!result.empty()) { result += ", "; } result += std.GetSub(); } if (std.IsSetPostal_code()) { if (!result.empty()) { result += " "; } result += std.GetPostal_code(); } if (std.IsSetCountry()) { if (!result.empty()) { result += ", "; } result += std.GetCountry(); } }}static void s_FixPages(string& pages){ // Restore redundant leading digits of the second number if needed SIZE_TYPE digits1 = pages.find_first_not_of("0123456789"); if ( digits1 != NPOS) { SIZE_TYPE hyphen = pages.find('-', digits1); if ( hyphen != NPOS ) { SIZE_TYPE digits2 = pages.find_first_not_of("0123456789", hyphen + 1); digits2 -= hyphen + 1; if ( digits2 < digits1 ) { // lengths of the tail portions SIZE_TYPE len1 = hyphen + digits2 - digits1; SIZE_TYPE len2 = pages.size() - hyphen - 1; int x = NStr::strncasecmp(&pages[digits1 - digits2], &pages[hyphen + 1], min(len1, len2)); if ( x > 0 || (x == 0 && len1 >= len2) ) { // complain? } else { pages.insert(hyphen + 1, pages, 0, digits1 - digits2); } } } }}CReferenceItem::CReferenceItem(const CSeqdesc& desc, CBioseqContext& ctx) : CFlatItem(&ctx), m_PMID(0), m_MUID(0), m_Category(eUnknown), m_Serial(0), m_JustUids(false), m_Prepub(CImprint::ePrepub_other){ _ASSERT(desc.IsPub()); x_SetObject(desc.GetPub()); m_Pubdesc.Reset(&(desc.GetPub())); if ( ctx.GetMapper() != 0 ) { m_Loc.Reset(ctx.GetMapper()->Map(ctx.GetLocation())); } else { m_Loc.Reset(&ctx.GetLocation()); } x_GatherInfo(ctx);}CReferenceItem::CReferenceItem(const CSeq_feat& feat, CBioseqContext& ctx) : CFlatItem(&ctx), m_PMID(0), m_MUID(0), m_Category(eUnknown), m_Serial(0), m_JustUids(false), m_Prepub(CImprint::ePrepub_other){ _ASSERT(feat.GetData().IsPub()); x_SetObject(feat); m_Pubdesc.Reset(&(feat.GetData().GetPub())); if ( ctx.GetMapper() != 0 ) { m_Loc.Reset(ctx.GetMapper()->Map(feat.GetLocation())); } else { m_Loc.Reset(&(feat.GetLocation())); }}CReferenceItem::CReferenceItem(const CPubdesc& pub, CBioseqContext& ctx, const CSeq_loc* loc) : CFlatItem(&ctx), m_Pubdesc(&pub), m_Loc(loc), m_PMID(0), m_MUID(0), m_Category(eUnknown), m_Serial(0), m_JustUids(false), m_Prepub(CImprint::ePrepub_other){ x_SetObject(pub); if ( !m_Loc ) { m_Loc.Reset(&ctx.GetLocation()); } if ( ctx.GetMapper() != 0 ) { m_Loc.Reset(ctx.GetMapper()->Map(*m_Loc)); } x_GatherInfo(ctx);}void CReferenceItem::SetLoc(const CConstRef<CSeq_loc>& loc){ m_Loc = loc;}static void s_MergeDuplicates(CReferenceItem::TReferences& refs, CBioseqContext& ctx){ if ( refs.size() < 2 ) { return; } CReferenceItem::TReferences::iterator curr = refs.begin(); CReferenceItem::TReferences::iterator prev = curr; while ( curr != refs.end() ) { if ( !*curr ) { curr = refs.erase(curr); if ( curr == refs.end() ) { break; } } _ASSERT(*curr); bool remove = false; bool merge = true; const CReferenceItem& curr_ref = **curr; if ( curr_ref.JustUids() ) { remove = true; } else { // EMBL patent records do not need author or title - A29528.1 // do not allow no author reference to appear by itself - U07000.1 if ( !(ctx.IsEMBL() && ctx.IsPatent()) && curr_ref.GetAuthors() == 0 ) { remove = true; merge = false; } } if ( (prev != curr) && prev->NotEmpty() ) { const CReferenceItem& prev_ref = **prev; if ( curr_ref.GetPMID() == prev_ref.GetPMID() && curr_ref.GetPMID() != 0 ) { remove = true; } if ( remove && prev_ref.GetReftype() == CPubdesc::eReftype_seq && curr_ref.GetReftype() != CPubdesc::eReftype_seq ) { // real range trumps sites merge = false; } if ( prev_ref.GetLoc() == 0 ) { merge = false; } } else { merge = false; } if ( remove ) { CConstRef<CSeq_loc> merged_loc; if ( merge && (curr_ref.GetLoc() != 0) ) { merged_loc.Reset(SeqLocMerge(ctx.GetHandle(), *(*prev)->GetLoc(), *curr_ref.GetLoc(), fFuseAbutting)); } (*prev)->SetLoc(merged_loc); curr = refs.erase(curr); if ( curr == refs.end() ) { break; } } else { prev = curr; ++curr; } }}void CReferenceItem::Rearrange(TReferences& refs, CBioseqContext& ctx){ {{ sort(refs.begin(), refs.end(), LessEqual(false, ctx.IsRefSeq())); }} {{ // merge duplicate references s_MergeDuplicates(refs, ctx); }} {{ // !!! add submit reference }} {{ // re-sort, take serial number into consideration. sort(refs.begin(), refs.end(), LessEqual(true, ctx.IsRefSeq())); }} // assign final serial numbers size_t size = refs.size(); for ( size_t i = 0; i < size; ++i ) { refs[i]->m_Serial = i + 1; }}void CReferenceItem::Format(IFormatter& formatter, IFlatTextOStream& text_os) const{ formatter.FormatReference(*this, text_os);}bool CReferenceItem::Matches(const CPub_set& ps) const{ if ( !ps.IsPub() ) { return false; } ITERATE (CPub_set::TPub, it, ps.GetPub()) { if ( x_Matches(**it) ) { return true; } } return false;}bool CReferenceItem::x_Matches(const CPub& pub) const{ switch ( pub.Which() ) { case CPub::e_Muid: return pub.GetMuid() == GetMUID(); case CPub::e_Pmid: return pub.GetPmid() == GetPMID(); case CPub::e_Equiv: ITERATE (CPub::TEquiv::Tdata, it, pub.GetEquiv().Get()) { if ( x_Matches(**it) ) { return true; } } break; default: { if ( !m_UniqueStr.empty() ) { string unique; pub.GetLabel(&unique, CPub::eContent, true); size_t len = unique.length(); if ( len > 0 && unique[len - 1] == '>' ) { --len; } len = min(len , m_UniqueStr.length()); unique.resize(len); if ( NStr::StartsWith(m_UniqueStr, unique, NStr::eNocase) ) { return true; } } break; } } return false;}void CReferenceItem::x_GatherInfo(CBioseqContext& ctx){ if ( !m_Pubdesc->CanGetPub() ) { x_SetSkip(); } if ( ctx.GetSubmitBlock() != 0 ) { m_Title = "Direct Submission"; m_Category = eSubmission; } CPub_equiv::Tdata::const_iterator last = m_Pubdesc->GetPub().Get().end()--; ITERATE (CPub_equiv::Tdata, it, m_Pubdesc->GetPub().Get()) { x_Init(**it, ctx); // set unique str // skip over just serial number if ( (*it)->IsGen() && it != last ) { const CCit_gen& gen = (*it)->GetGen(); if ( !gen.CanGetCit() || !NStr::StartsWith(gen.GetCit(), "BackBone id_pub", NStr::eNocase) ) { if ( !gen.CanGetCit() && !gen.CanGetJournal() && !gen.CanGetDate() && gen.CanGetSerial_number() && gen.GetSerial_number() > 0 ) { continue; } } } if ( m_UniqueStr.empty() ) { (*it)->GetLabel(&m_UniqueStr, CPub::eContent, true); } } x_CleanData(); // gather Genbank specific fields (formats: Genbank, GBSeq, DDBJ) if ( ctx.IsGenbankFormat() ) { x_GatherRemark(ctx); }}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?