⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 apparsecat.c

📁 数据挖掘经典的hierarchial clustering algorithm
💻 C
📖 第 1 页 / 共 2 页
字号:
/*
  ========================================================================
  DEVise Data Visualization Software
  (c) Copyright 1992-1996
  By the DEVise Development Group
  Madison, Wisconsin
  All Rights Reserved.
  ========================================================================

  Under no circumstances is this software to be copied, distributed,
  or altered in any way without prior permission from the DEVise
  Development Group.
*/

/*
  Module for reading physical and logical schemas.
 */

/*
  $Id: ApParseCat.c,v 1.10 1996/10/10 16:45:16 wenger Exp $

  $Log: ApParseCat.c,v $
  Revision 1.10  1996/10/10 16:45:16  wenger
  Changed function names, etc., in ApParseCat.c to get rid of name clashes
  when Donko puts transformation engine code into DEVise.

  Revision 1.9  1996/08/27 19:06:58  flisakow
  Added ifdef's around some information printf's.

  Revision 1.8  1996/08/15 19:54:46  wenger
  Added 'pure' targets for attrproj and devread; fixed some dynamic
  memory problems.  Found some bugs while demo'ing for soils science
  people.

  Revision 1.7  1996/07/01 20:36:59  jussi
  Minor changes to reflect the new TDataAscii/TDataBinary constructor
  interface.

  Revision 1.6  1996/06/27 18:11:59  wenger
  Re-integrated most of the attribute projection code (most importantly,
  all of the TData code) into the main code base (reduced the number of
  modules used only in attribute projection).

  Revision 1.5  1996/06/17 20:01:03  wenger
  First version of 'show' program for dumping projections to stdout.

  Revision 1.4  1996/06/07 19:40:34  wenger
  Integrated some of the special attribute projection sources back
  into the regular Devise sources.

  Revision 1.3  1996/04/30 15:31:37  wenger
  Attrproj code now reads records via TData object; interface to Birch
  code now in place (but not fully functional).

  Revision 1.2  1996/04/25 19:25:10  wenger
  Attribute projection code can now parse a schema, and create the
  corresponding TData object.

  Revision 1.1  1996/04/22 18:01:47  wenger
  First version of "attribute projection" code.  The parser (with
  the exception of instantiating any TData) compiles and runs.

*/

#include <stdio.h>

#include "ApParseCat.h"
#include "AttrList.h"
#include "GroupDir.h"
#include "Parse.h"
#include "ApInit.h"
#include "DeviseTypes.h"
#include "TDataAsciiInterp.h"
#include "TDataBinaryInterp.h"
#include "Util.h"

//#define DEBUG

static GroupDir *gdir = new GroupDir();

#define LINESIZE 512

static int numAttrs          = 0;
static AttrList *attrs       = 0;

static int _line = 0;

/*------------------------------------------------------------------------------
 * function: SetVal
 * Set the value field in aval to the value equivalent of valstr based
 * on the valtype.
 */
static void
SetVal(AttrVal *aval, char *valstr, AttrType valtype)
{
  double tempval;

  switch(valtype) {
    case IntAttr:
      aval->intVal = atoi(valstr);
      break;
    case FloatAttr:
      aval->floatVal = atof(valstr);
      break;
    case DoubleAttr:
      aval->doubleVal = atof(valstr);
      break;
    case StringAttr:
      aval->strVal = CopyString(valstr);
      break;
    case DateAttr:
      (void)ParseFloatDate(valstr, tempval);
      aval->dateVal = (time_t)tempval;
      break;
    default:
      fprintf(stderr,"unknown attr value\n");
      Exit::DoExit(2);
      break;
    }
}

#ifndef NO_GEN_CLASS_INFO

const int MAX_GENCLASSINFO = 20;
static int _numGenClass = 0;

static struct {
  char *source;
  GenClassInfo *genInfo;
} _genClasses[MAX_GENCLASSINFO];

/*------------------------------------------------------------------------------
 * function: ApRegisterGenClassInfo
 * Register the TData class generator for a given source.
 */
void
RegisterGenClassInfo(char *source, GenClassInfo *gen)
{
  if (_numGenClass == MAX_GENCLASSINFO) {
    fprintf(stderr, "too many interpreted TData class generator\n");
    Exit::DoExit(1);
  }
  _genClasses[_numGenClass].source = source;
  _genClasses[_numGenClass++].genInfo = gen;
}

/*------------------------------------------------------------------------------
 * function: FindGenClass
 * Find the TData generator for a given source.
 */
static GenClassInfo *
FindGenClass(char *source)
{
  for(int i = 0; i < _numGenClass; i++) {
    if (strcmp(_genClasses[i].source,source) == 0)
      return _genClasses[i].genInfo;
  }

  fprintf(stderr,"Can't find TData generator for input source %s\n",source);
  Exit::DoExit(1);

  // keep compiler happy
  return 0;
}
#endif

/*------------------------------------------------------------------------------
 * function: ParseChar
 * Parse a character, Return false if can't parse.
 */
static Boolean
ParseChar(char *instr, char &c)
{
  char *str = instr;
  if (*str == '\\') {
    str++;
    switch(*str) {
    case 'n':
      c = '\n';
      break;
    case 'r':
      c = '\r';
      break;
    case 't':
      c = '\t';
      break;
    case '\'':
      c = '\'';
      break;
    default:
      goto error;
      break;
    }
  } else
    c = *str;
  return true;

 error:
  fprintf(stderr, "ParseCat: invalid separator %s\n", instr);
  return false;
}

/* Parse separators */
const int MAX_SEPARATORS = 50;
static char separators[MAX_SEPARATORS];
static int numSeparators;

/*------------------------------------------------------------------------------
 * function: ParseSeparator
 * Parse a separator; return false if can't parse.
 */
static Boolean
ParseSeparator(int numArgs, char **args)
{
  if (numArgs >= MAX_SEPARATORS) {
    fprintf(stderr, "ParseCat: too many separators, max = %d\n",
            MAX_SEPARATORS);
    return false;
  }

  for(int i = 1; i < numArgs; i++) {
    if (!ParseChar(args[i], separators[i - 1]))
      return false;
  }

  numSeparators = numArgs - 1;
  return true;
}


static char whitespaces[MAX_SEPARATORS];
static int numWhitespace;

/*------------------------------------------------------------------------------
 * function: ParseWhiteSpace
 * Parse whitespace; return false if can't parse.
 */
static Boolean
ParseWhiteSpace(int numArgs, char **args)
{
  if (numArgs >= MAX_SEPARATORS) {
    fprintf(stderr, "ParseCat: too many separators, max = %d\n",
            MAX_SEPARATORS);
    return false;
  }

  for(int i = 1; i < numArgs; i++) {
    if (!ParseChar(args[i], whitespaces[i - 1]))
      return false;
  }

  numWhitespace = numArgs - 1;
  return true;
}

/*------------------------------------------------------------------------------
 * function: ParseAttr
 * Parse an attribute.
 */
static DevStatus
ParseAttr(
        int &   numArgs,
        char ** args,
        int &   recSize,
        Boolean hasFileType,
        char *  fileType)
{
        int                     attrLength;
        AttrType        attrType;
        DevStatus       result = StatusOk;

        /* an attribute */
        Boolean isSorted = false;
        if (strcmp(args[0],"sorted") == 0)
        {
                /* sorted attribute */
                isSorted = true;
                if (strcmp(args[1],"attr") && strcmp(args[1],"compattr"))
                {
                        fprintf(stderr,"'sorted' must be followed by 'attr' or 'compattr'\n");
                        result = StatusFailed;
                        return result;
                }
                args = &args[1];
                numArgs--;
                isSorted = true;
        }

        Boolean isComposite;
        if (strcmp(args[0],"attr") == 0)
                isComposite = false;
        else isComposite = true;

        /* get attr type */
        int attrNum = 0;
        if (numArgs < 3)
        {
                fprintf(stderr,"attr needs at least 3 args\n");
                result = StatusFailed;
                return result;
        }

        if (strcmp(args[2],"int") == 0)
        {
                attrType = IntAttr;
                attrLength = sizeof(int);
                attrNum = 3;
        }
        else if (strcmp(args[2],"double") == 0)
        {
                attrType = DoubleAttr;
                attrLength = sizeof(double);
                attrNum = 3;
        }
        else if (strcmp(args[2],"float") == 0)
        {
                attrType = FloatAttr;
                attrLength = sizeof(float);
                attrNum = 3;
        }
        else if (strcmp(args[2],"date") == 0)
        {
                attrType = DateAttr;
                attrLength = sizeof(long);
                attrNum = 3;
        }
        else if (strcmp(args[2],"string") == 0)
        {
                attrType = StringAttr;
                if (numArgs < 4)
                {
                        fprintf(stderr,"string attr needs length\n");
                        result = StatusFailed;
                        return result;
                }
                attrLength = atoi(args[3]);
                attrNum = 4;
        }
        else
        {
                fprintf(stderr,"unknown type %s\n",args[2]);
                result = StatusFailed;
                return result;
        }

        char *attrName = CopyString(args[1]);

        Boolean hasMatchVal = false;
        AttrVal matchVal;
        Boolean hasHi = false;
        Boolean hasLo = false;
        AttrVal hiVal, loVal;

        if ((attrNum < numArgs) && (!strcmp(args[attrNum], "=")))
        {
                attrNum++;
                if (attrNum > numArgs-1)
                {
                fprintf(stderr,"expecting default value after '='\n");
                        result = StatusFailed;
                        return result;
                }
                hasMatchVal = true;
                SetVal(&matchVal, args[attrNum], attrType);
                attrNum++;
        }

        if ((attrNum < numArgs) &&
            (strcmp(args[attrNum], "hi")) &&
            (strcmp(args[attrNum], "lo")))
        {
                fprintf(stderr, "Unrecognized chars in an attribute definition line\n");
                result = StatusFailed;
                return result;
        }
        else if (attrNum < numArgs)
        {
                if (!strcmp(args[attrNum], "hi"))
                {
                hasHi = true;
                attrNum++;
                if (attrNum >= numArgs)
                {
                        fprintf(stderr, "Expecting value after keyword hi\n");
                                result = StatusFailed;
                                return result;
                        }
                        SetVal(&hiVal, args[attrNum], attrType);
                        attrNum++;
                }

                if ((attrNum < numArgs) &&
                        (!strcmp(args[attrNum], "lo")))
                {
                        hasLo = true;
                        attrNum++;
                        if (attrNum >= numArgs)
                        {
                                fprintf(stderr, "Expecting value after keyword lo\n");
                                return result;
                        }
                        SetVal(&loVal, args[attrNum], attrType);
                        attrNum++;
                }

                if (attrNum < numArgs)
                {
                        fprintf(stderr, "Unrecognized chars in an attribute definition line\n");
                        result = StatusFailed;
                        return result;
                }
        }

        if (attrs == NULL)
        {
            if (!hasFileType )
                {
                fprintf(stderr,"no file type yet\n");
                        result = StatusFailed;
                        return result;
                }
                attrs = new AttrList(fileType);
        }

        int roundAmount = 0;
        switch(attrType)
        {
          case FloatAttr:
            roundAmount = sizeof(float);
            break;
          case DoubleAttr:
            roundAmount = sizeof(double);
            break;
          case StringAttr:
            roundAmount = sizeof(char);
            break;
          case DateAttr:
            roundAmount = sizeof(time_t);
            break;
          case IntAttr:
            roundAmount = sizeof(int);
            break;
          default:
            fprintf(stderr,"ParseCat: don't know type\n");
            Exit::DoExit(2);
        }

        if (recSize/roundAmount*roundAmount != recSize)
        {
                /* round to rounding boundaries */
                recSize = (recSize/roundAmount+1)*roundAmount;
        }

        attrs->InsertAttr(numAttrs, attrName, recSize,
                          attrLength, attrType, hasMatchVal,
                          &matchVal, isComposite, isSorted,
                          hasHi, &hiVal, hasLo, &loVal);
        numAttrs++;
        recSize += attrLength;

        delete attrName;

        return result;
}

/*------------------------------------------------------------------------------
 * function: ParseCatPhysical
 * Read and parse a physical schema from a catalog file.
 * physicalOnly should be true if only a physical schema (not a physical
 * schema and a logical schema) is being read.
 */
static char *
ParseCatPhysical(char *catFile, char *dataFile, Boolean physicalOnly,
        TData *&tDataP)
{
        FILE *file= NULL;
        Boolean hasSource = false;
        char *source = 0; /* source of data. Which interpreter we use depends
                             on this */

        char buf[LINESIZE];
        Boolean hasFileType = false;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -