📄 alnread.c
字号:
/* * =========================================================================== * PRODUCTION $Log: alnread.c,v $ * PRODUCTION Revision 1000.1 2004/06/01 19:41:15 gouriano * PRODUCTION PRODUCTION: UPGRADED [GCC34_MSVC7] Dev-tree R1.10 * PRODUCTION * =========================================================================== *//* * $Id: alnread.c,v 1000.1 2004/06/01 19:41:15 gouriano Exp $ * * =========================================================================== * * PUBLIC DOMAIN NOTICE * National Center for Biotechnology Information * * This software/database is a "United States Government Work" under the * terms of the United States Copyright Act. It was written as part of * the author's official duties as a United States Government employee and * thus cannot be copyrighted. This software/database is freely available * to the public for use. The National Library of Medicine and the U.S. * Government have not placed any restriction on its use or reproduction. * * Although all reasonable efforts have been taken to ensure the accuracy * and reliability of the software and data, the NLM and the U.S. * Government do not and cannot warrant the performance or results that * may be obtained by using this software or data. The NLM and the U.S. * Government disclaim all warranties, express or implied, including * warranties of performance, merchantability or fitness for any particular * purpose. * * Please cite the author in any work or product based on this material. * * =========================================================================== * * Authors: Colleen Bollin * */#include <util/creaders/alnread.h>#include <stdio.h>#include <stdlib.h>#include <string.h>#include <ctype.h>static const int kMaxPrintedIntLen = 10;#define MAX_PRINTED_INT_LEN_PLUS_ONE 11typedef enum { eTrue = -1, eFalse = 0} EBool;/* structures used internally */typedef struct SLineInfo { char * data; int line_num; int line_offset; EBool delete_me; struct SLineInfo * next;} SLineInfo, * TLineInfoPtr;typedef struct SLineInfoReader { TLineInfoPtr first_line; TLineInfoPtr curr_line; char * curr_line_pos; int data_pos;} SLineInfoReader, * TLineInfoReaderPtr;typedef struct SIntLink { int ival; struct SIntLink * next;} SIntLink, * TIntLinkPtr;typedef struct SStringCount { char * string; int num_appearances; TIntLinkPtr line_numbers; struct SStringCount * next;} SStringCount, * TStringCountPtr;typedef struct SSizeInfo { int size_value; int num_appearances; struct SSizeInfo * next;} SSizeInfo, * TSizeInfoPtr;typedef struct SLengthList { TSizeInfoPtr lengthrepeats; int num_appearances; struct SLengthList * next;} SLengthListData, * SLengthListPtr; typedef struct SCommentLoc { char * start; char * end; struct SCommentLoc * next;} SCommentLoc, * TCommentLocPtr;typedef struct SBracketedCommentList { TLineInfoPtr comment_lines; struct SBracketedCommentList * next;} SBracketedCommentList, * TBracketedCommentListPtr;typedef struct SAlignRawSeq { char * id; TLineInfoPtr sequence_data; TIntLinkPtr id_lines; struct SAlignRawSeq * next;} SAlignRawSeq, * TAlignRawSeqPtr;typedef struct SAlignFileRaw { TLineInfoPtr line_list; TLineInfoPtr organisms; TAlignRawSeqPtr sequences; int num_organisms; TLineInfoPtr deflines; int num_deflines; EBool marked_ids; int block_size; TIntLinkPtr offset_list; FReportErrorFunction report_error; void * report_error_userdata; char * alphabet; int expected_num_sequence; int expected_sequence_len; int num_segments;} SAlignRawFileData, * SAlignRawFilePtr;/* These functions are used for storing and transmitting information * about errors encountered while reading the alignment data. *//* This function allocates memory for a new error structure and populates * the structure with default values. * The new structure will be added to the end of the linked list of error * structures pointed to by list. */extern TErrorInfoPtr ErrorInfoNew (TErrorInfoPtr list){ TErrorInfoPtr eip, last; eip = (TErrorInfoPtr) malloc ( sizeof (SErrorInfo)); if (eip == NULL) { return NULL; } eip->category = eAlnErr_Unknown; eip->line_num = -1; eip->id = NULL; eip->message = NULL; eip->next = NULL; last = list; while (last != NULL && last->next != NULL) { last = last->next; } if (last != NULL) { last->next = eip; } return eip;}/* This function recursively frees the memory associated with a list of * error structures as well as the member variables of the error structures. */extern void ErrorInfoFree (TErrorInfoPtr eip){ if (eip == NULL) { return; } ErrorInfoFree (eip->next); free (eip->id); free (eip->message); free (eip);}/* This function creates and sends an error message regarding a NEXUS comment * character. */static void s_ReportCharCommentError (char * expected, char seen, char * val_name, FReportErrorFunction errfunc, void * errdata){ TErrorInfoPtr eip; const char * errformat = "Specified %s character does not match NEXUS" " comment in file (specified %s, comment %c)"; if (errfunc == NULL || val_name == NULL || expected == NULL) { return; } eip = ErrorInfoNew (NULL); if (eip != NULL) { eip->category = eAlnErr_BadFormat; eip->message = (char *) malloc (strlen (errformat) + strlen (val_name) + strlen (expected) + 2); if (eip->message != NULL) { sprintf (eip->message, errformat, val_name, expected, seen); } errfunc (eip, errdata); }}/* This function creates and sends an error message regarding a character * that is unexpected in sequence data. */static void s_ReportBadCharError (char * id, char bad_char, int num_bad, int offset, int line_number, char * reason, FReportErrorFunction errfunc, void * errdata){ TErrorInfoPtr eip; const char * err_format = "%d bad characters (%c) found at position %d (%s)."; if (errfunc == NULL || num_bad == 0 || bad_char == 0 || reason == NULL) { return; } eip = ErrorInfoNew (NULL); if (eip == NULL) { return; } eip->category = eAlnErr_BadData; if (id != NULL) eip->id = strdup (id); eip->line_num = line_number; eip->message = (char *) malloc (strlen (err_format) + 2 * kMaxPrintedIntLen + strlen (reason) + 3); if (eip->message != NULL) { sprintf (eip->message, err_format, num_bad, bad_char, offset, reason); } errfunc (eip, errdata);} /* This function creates and sends an error message regarding an ID that * was found in the wrong location. */static void s_ReportInconsistentID (char * id, int line_number, FReportErrorFunction report_error, void * report_error_userdata){ TErrorInfoPtr eip; if (report_error == NULL) { return; } eip = ErrorInfoNew (NULL); if (eip == NULL) { return; } eip->category = eAlnErr_BadFormat; eip->id = strdup (id); eip->line_num = line_number; eip->message = strdup ("Found unexpected ID"); report_error (eip, report_error_userdata);}/* This function creates and sends an error message regarding a line * of sequence data that was expected to have a different length. */static void s_ReportInconsistentBlockLine (char * id, int line_number, FReportErrorFunction report_error, void * report_error_userdata){ TErrorInfoPtr eip; if (report_error == NULL) { return; } eip = ErrorInfoNew (NULL); if (eip == NULL) { return; } eip->category = eAlnErr_BadFormat; eip->id = strdup (id); eip->line_num = line_number; eip->message = strdup ("Inconsistent block line formatting"); report_error (eip, report_error_userdata);}/* This function creates and sends an error message regarding mismatched * definition lines */static voids_ReportDefinitionLineMismatch(FReportErrorFunction report_error, void * report_error_userdata){ TErrorInfoPtr eip; if (report_error == NULL) { return; } eip = ErrorInfoNew (NULL); if (eip == NULL) { return; } eip->category = eAlnErr_BadData; eip->message = strdup ("Mismatched definition lines"); report_error (eip, report_error_userdata);}/* This function recursively creates and sends an error message * regarding the number of times items in list appear. */static void s_ReportDefinitionLines (TStringCountPtr list, FReportErrorFunction report_error, void * report_error_userdata){ TErrorInfoPtr eip; const char * err_null_format = "Null definition line occurs %d times"; const char * err_format = "Definition line %s occurs %d times"; if (list == NULL || report_error == NULL) { return; } eip = ErrorInfoNew (NULL); if (eip == NULL) { return; } eip->category = eAlnErr_BadData; if (list->string == NULL) { eip->message = malloc (strlen (err_null_format) + kMaxPrintedIntLen + 1); if (eip->message != NULL) { sprintf (eip->message, err_null_format, list->num_appearances); } } else { eip->message = malloc (strlen (err_format) + strlen (list->string) + kMaxPrintedIntLen + 1); if (eip->message != NULL) { sprintf (eip->message, err_format, list->string, list->num_appearances); } } report_error (eip, report_error_userdata); s_ReportDefinitionLines (list->next, report_error, report_error_userdata);} /* This function creates and sends an error message regarding a line of * sequence data that was expected to be a different length. */static void s_ReportLineLengthError (char * id, TLineInfoPtr lip, int expected_length, FReportErrorFunction report_error, void * report_error_userdata){ TErrorInfoPtr eip; char * msg; const char * format = "Expected line length %d, actual length %d"; int len; if (lip == NULL || report_error == NULL) { return; } eip = ErrorInfoNew (NULL); if (eip == NULL) { return; } eip->category = eAlnErr_BadFormat; eip->id = strdup (id); eip->line_num = lip->line_num; msg = (char *) malloc (strlen (format) + kMaxPrintedIntLen + 1); if (msg != NULL) { if (lip->data == NULL) { len = 0; } else { len = strlen (lip->data); } sprintf (msg, format, expected_length, len); eip->message = msg; } report_error (eip, report_error_userdata);}/* This function creates and sends an error message regarding a block of * sequence data that was expected to contain more lines. */static void s_ReportBlockLengthError (char * id, int line_num, int expected_num, int actual_num, FReportErrorFunction report_error, void * report_error_userdata){ TErrorInfoPtr eip; const char * err_format = "Expected %d lines in block, found %d"; if (report_error == NULL) { return; } eip = ErrorInfoNew (NULL); if (eip == NULL) { return; } eip->category = eAlnErr_BadFormat; eip->id = strdup (id); eip->line_num = line_num; eip->message = malloc (strlen (err_format) + 2 * kMaxPrintedIntLen + 1); if (eip->message != NULL) { sprintf (eip->message, err_format, expected_num, actual_num); } report_error (eip, report_error_userdata);}/* This function creates and sends an error message regarding missing * sequence data. */static voids_ReportMissingSequenceData(char * id, FReportErrorFunction report_error, void * report_error_userdata){ TErrorInfoPtr eip; if (report_error == NULL) { return; } eip = ErrorInfoNew (NULL); if (eip == NULL) { return; } eip->category = eAlnErr_Fatal; eip->id = strdup (id); eip->message = strdup ("No data found"); report_error (eip, report_error_userdata);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -