reference_item.cpp

来自「ncbi源码」· C++ 代码 · 共 1,302 行 · 第 1/3 页

CPP
1,302
字号
/* * =========================================================================== * PRODUCTION $Log: reference_item.cpp,v $ * PRODUCTION Revision 1000.2  2004/06/01 19:45:23  gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.18 * PRODUCTION * =========================================================================== *//*  $Id: reference_item.cpp,v 1000.2 2004/06/01 19:45:23 gouriano Exp $* ===========================================================================**                            PUBLIC DOMAIN NOTICE*               National Center for Biotechnology Information**  This software/database is a "United States Government Work" under the*  terms of the United States Copyright Act.  It was written as part of*  the author's official duties as a United States Government employee and*  thus cannot be copyrighted.  This software/database is freely available*  to the public for use. The National Library of Medicine and the U.S.*  Government have not placed any restriction on its use or reproduction.**  Although all reasonable efforts have been taken to ensure the accuracy*  and reliability of the software and data, the NLM and the U.S.*  Government do not and cannot warrant the performance or results that*  may be obtained by using this software or data. The NLM and the U.S.*  Government disclaim all warranties, express or implied, including*  warranties of performance, merchantability or fitness for any particular*  purpose.**  Please cite the author in any work or product based on this material.** ===========================================================================** Author:  Aaron Ucko, NCBI*          Mati Shomrat** File Description:*   flat-file generator -- bibliographic references** ===========================================================================*/#include <ncbi_pch.hpp>#include <corelib/ncbistd.hpp>#include <serial/iterator.hpp>#include <util/static_set.hpp>#include <objects/biblio/biblio__.hpp>#include <objects/general/Name_std.hpp>#include <objects/general/Person_id.hpp>#include <objects/medline/Medline_entry.hpp>#include <objects/pub/Pub.hpp>#include <objects/pub/Pub_equiv.hpp>#include <objects/pub/Pub_set.hpp>#include <objects/seqloc/Patent_seq_id.hpp>#include <objects/seq/Bioseq.hpp>#include <objects/seq/Seqdesc.hpp>#include <objects/seqfeat/Seq_feat.hpp>#include <objects/seqfeat/SeqFeatData.hpp>#include <objects/biblio/Imprint.hpp>#include <objmgr/util/sequence.hpp>#include <algorithm>#include <objtools/format/text_ostream.hpp>#include <objtools/format/formatter.hpp>#include <objtools/format/items/reference_item.hpp>#include <objtools/format/context.hpp>#include "utils.hpp"BEGIN_NCBI_SCOPEBEGIN_SCOPE(objects)USING_SCOPE(sequence);///////////////////////////////////////////////////////////////////////////////// LessEqual - predicate class for sorting referencesclass LessEqual{public:    LessEqual(bool serial_first, bool is_refseq);    bool operator()(const CRef<CReferenceItem>& ref1, const CRef<CReferenceItem>& ref2);private:    bool m_SerialFirst;    bool m_IsRefSeq;};/////////////////////////////////////////////////////////////////////////////void CReferenceItem::FormatAffil(const CAffil& affil, string& result){    if (affil.IsStr()) {        result = affil.GetStr();    } else {        result.erase();        const CAffil::C_Std& std = affil.GetStd();        if (std.IsSetDiv()) {            result = std.GetDiv();        }        if (std.IsSetAffil()) {            if (!result.empty()) {                result += ", ";            }            result += std.GetAffil();        }        if (std.IsSetStreet()) {            if (!result.empty()) {                result += ", ";            }            result += std.GetStreet();        }        if (std.IsSetCity()) {            if (!result.empty()) {                result += ", ";            }            result += std.GetCity();        }        if (std.IsSetSub()) {            if (!result.empty()) {                result += ", ";            }            result += std.GetSub();        }        if (std.IsSetPostal_code()) {            if (!result.empty()) {                result += " ";            }            result += std.GetPostal_code();        }        if (std.IsSetCountry()) {            if (!result.empty()) {                result += ", ";            }            result += std.GetCountry();        }    }}static void s_FixPages(string& pages){    // Restore redundant leading digits of the second number if needed    SIZE_TYPE digits1 = pages.find_first_not_of("0123456789");    if ( digits1 != NPOS) {        SIZE_TYPE hyphen = pages.find('-', digits1);        if ( hyphen != NPOS ) {            SIZE_TYPE digits2 = pages.find_first_not_of("0123456789",                hyphen + 1);            digits2 -= hyphen + 1;            if ( digits2 < digits1 ) {                // lengths of the tail portions                SIZE_TYPE len1 = hyphen + digits2 - digits1;                SIZE_TYPE len2 = pages.size() - hyphen - 1;                int x = NStr::strncasecmp(&pages[digits1 - digits2],                    &pages[hyphen + 1],                    min(len1, len2));                if ( x > 0  ||  (x == 0  &&  len1 >= len2) ) {                    // complain?                } else {                    pages.insert(hyphen + 1, pages, 0,                        digits1 - digits2);                }            }        }    }}CReferenceItem::CReferenceItem(const CSeqdesc& desc, CBioseqContext& ctx) :    CFlatItem(&ctx), m_PMID(0), m_MUID(0), m_Category(eUnknown), m_Serial(0),    m_JustUids(false), m_Prepub(CImprint::ePrepub_other){    _ASSERT(desc.IsPub());        x_SetObject(desc.GetPub());    m_Pubdesc.Reset(&(desc.GetPub()));    if ( ctx.GetMapper() != 0 ) {        m_Loc.Reset(ctx.GetMapper()->Map(ctx.GetLocation()));    } else {        m_Loc.Reset(&ctx.GetLocation());    }    x_GatherInfo(ctx);}CReferenceItem::CReferenceItem(const CSeq_feat& feat, CBioseqContext& ctx) :    CFlatItem(&ctx), m_PMID(0), m_MUID(0), m_Category(eUnknown), m_Serial(0),    m_JustUids(false), m_Prepub(CImprint::ePrepub_other){    _ASSERT(feat.GetData().IsPub());    x_SetObject(feat);    m_Pubdesc.Reset(&(feat.GetData().GetPub()));    if ( ctx.GetMapper() != 0 ) {        m_Loc.Reset(ctx.GetMapper()->Map(feat.GetLocation()));    } else {        m_Loc.Reset(&(feat.GetLocation()));    }}CReferenceItem::CReferenceItem(const CPubdesc& pub, CBioseqContext& ctx, const CSeq_loc* loc) :    CFlatItem(&ctx), m_Pubdesc(&pub), m_Loc(loc), m_PMID(0), m_MUID(0),    m_Category(eUnknown), m_Serial(0), m_JustUids(false),    m_Prepub(CImprint::ePrepub_other){    x_SetObject(pub);    if ( !m_Loc ) {        m_Loc.Reset(&ctx.GetLocation());    }    if ( ctx.GetMapper() != 0 ) {        m_Loc.Reset(ctx.GetMapper()->Map(*m_Loc));    }    x_GatherInfo(ctx);}void CReferenceItem::SetLoc(const CConstRef<CSeq_loc>& loc){    m_Loc = loc;}static void s_MergeDuplicates(CReferenceItem::TReferences& refs, CBioseqContext& ctx){    if ( refs.size() < 2 ) {        return;    }    CReferenceItem::TReferences::iterator curr = refs.begin();    CReferenceItem::TReferences::iterator prev = curr;    while ( curr != refs.end() ) {        if ( !*curr ) {            curr = refs.erase(curr);            if ( curr == refs.end() ) {                break;            }        }        _ASSERT(*curr);        bool remove = false;        bool merge  = true;        const CReferenceItem& curr_ref = **curr;        if ( curr_ref.JustUids() ) {            remove = true;        } else {            // EMBL patent records do not need author or title - A29528.1            // do not allow no author reference to appear by itself - U07000.1            if ( !(ctx.IsEMBL()  &&  ctx.IsPatent())  &&                 curr_ref.GetAuthors() == 0 ) {                remove = true;                merge = false;            }        }        if ( (prev != curr)  &&  prev->NotEmpty() ) {            const CReferenceItem& prev_ref = **prev;            if ( curr_ref.GetPMID() == prev_ref.GetPMID()  &&  curr_ref.GetPMID() != 0 ) {                remove = true;            }            if ( remove  &&                 prev_ref.GetReftype() == CPubdesc::eReftype_seq  &&                 curr_ref.GetReftype() != CPubdesc::eReftype_seq ) {                // real range trumps sites                merge = false;            }            if ( prev_ref.GetLoc() == 0 ) {                merge = false;            }        } else {            merge = false;        }        if ( remove ) {            CConstRef<CSeq_loc> merged_loc;            if ( merge  &&  (curr_ref.GetLoc() != 0) ) {                merged_loc.Reset(SeqLocMerge(ctx.GetHandle(),                    *(*prev)->GetLoc(), *curr_ref.GetLoc(), fFuseAbutting));            }            (*prev)->SetLoc(merged_loc);            curr = refs.erase(curr);            if ( curr == refs.end() ) {                break;            }        } else {            prev = curr;            ++curr;        }    }}void CReferenceItem::Rearrange(TReferences& refs, CBioseqContext& ctx){    {{        sort(refs.begin(), refs.end(), LessEqual(false, ctx.IsRefSeq()));    }}    {{        // merge duplicate references        s_MergeDuplicates(refs, ctx);    }}    {{        // !!! add submit reference    }}    {{        // re-sort, take serial number into consideration.        sort(refs.begin(), refs.end(), LessEqual(true, ctx.IsRefSeq()));    }}        // assign final serial numbers    size_t size = refs.size();    for ( size_t i = 0;  i < size; ++i ) {        refs[i]->m_Serial = i + 1;    }}void CReferenceItem::Format(IFormatter& formatter, IFlatTextOStream& text_os) const{    formatter.FormatReference(*this, text_os);}bool CReferenceItem::Matches(const CPub_set& ps) const{    if ( !ps.IsPub() ) {        return false;    }    ITERATE (CPub_set::TPub, it, ps.GetPub()) {        if ( x_Matches(**it) ) {            return true;        }    }    return false;}bool CReferenceItem::x_Matches(const CPub& pub) const{    switch ( pub.Which() ) {    case CPub::e_Muid:        return pub.GetMuid() == GetMUID();    case CPub::e_Pmid:        return pub.GetPmid() == GetPMID();    case CPub::e_Equiv:        ITERATE (CPub::TEquiv::Tdata, it, pub.GetEquiv().Get()) {            if ( x_Matches(**it) ) {                return true;            }        }        break;    default:        {            if ( !m_UniqueStr.empty() ) {                string unique;                pub.GetLabel(&unique, CPub::eContent, true);                size_t len = unique.length();                if ( len > 0  &&  unique[len - 1] == '>' ) {                    --len;                }                len = min(len , m_UniqueStr.length());                unique.resize(len);                if ( NStr::StartsWith(m_UniqueStr, unique, NStr::eNocase) ) {                    return true;                }            }        break;        }    }    return false;}void CReferenceItem::x_GatherInfo(CBioseqContext& ctx){    if ( !m_Pubdesc->CanGetPub() ) {        x_SetSkip();    }    if ( ctx.GetSubmitBlock() != 0 ) {        m_Title = "Direct Submission";        m_Category = eSubmission;    }    CPub_equiv::Tdata::const_iterator last = m_Pubdesc->GetPub().Get().end()--;    ITERATE (CPub_equiv::Tdata, it, m_Pubdesc->GetPub().Get()) {        x_Init(**it, ctx);        // set unique str        // skip over just serial number        if ( (*it)->IsGen()  &&  it != last ) {            const CCit_gen& gen = (*it)->GetGen();            if ( !gen.CanGetCit()  ||                 !NStr::StartsWith(gen.GetCit(), "BackBone id_pub", NStr::eNocase) ) {                if ( !gen.CanGetCit()  &&                      !gen.CanGetJournal()  &&                      !gen.CanGetDate()  &&                      gen.CanGetSerial_number()  &&                      gen.GetSerial_number() > 0 ) {                     continue;                }            }        }        if ( m_UniqueStr.empty() ) {            (*it)->GetLabel(&m_UniqueStr, CPub::eContent, true);        }    }    x_CleanData();    // gather Genbank specific fields (formats: Genbank, GBSeq, DDBJ)    if ( ctx.IsGenbankFormat() ) {        x_GatherRemark(ctx);    }} 

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?