⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 apparsecat2.c

📁 数据挖掘经典的hierarchial clustering algorithm
💻 C
📖 第 1 页 / 共 2 页
字号:
        char buf[LINESIZE];
        Boolean hasFileType = false;
        Boolean hasSeparator = false;
        Boolean hasWhitespace = false;
        Boolean hasComment = false;

        Boolean isAscii = false;
        Boolean GLoad = true;
        char *fileType = 0;
        int numArgs;
        char **args;
        int recSize = 0;
        char *sep = 0;
        int numSep = 0;
        char *commentString = 0;
        Group *currgrp = NULL;

#if 0
        if (attrs != NULL) delete attrs;
#endif
        attrs = NULL;
        numAttrs = 0;

        /*
        printf("opening file %s\n", catFile);
        */
        file = fopen(catFile, "r");
        if (file == NULL){
                fprintf(stderr,"ParseCat: can't open file %s\n", catFile);
                goto error;
        }
        _line = 0;
        while ((fgets(buf,LINESIZE, file) != NULL) && strcmp(buf, "endSchema\n"))
        {
                StripTrailingNewline(buf);

                _line++;
                /*
                printf("getting line %s\n", buf);
                */
                if (buf[0] == '#' || buf[0] == '\n' || buf[0] == '\r')
                        continue;
                Parse(buf,numArgs, args);
                if (numArgs == 0)
                        continue;

#ifdef DEBUG
                printf("parse: ");
                for(int ind = 0; ind < numArgs; ind++)
                  printf("'%s' ", args[ind]);
                printf("\n");
#endif

                if (strcmp(args[0],"end")== 0)
                {
                        break;
                }
                else if (strcmp(args[0],"source") == 0)
                {
                        source = CopyString(args[1]);
                        hasSource = true;
                }
                else if (strcmp(args[0],"separator")== 0)
                {
                        /* parse separator */
                        hasSeparator = ParseSeparator(numArgs, args);
                        if (!hasSeparator){
                                fprintf(stderr,"can't parse separator\n");
                                goto error;
                        }
                }
                else if (strcmp(args[0],"whitespace")== 0)
                {
                        /* parse separator */
                        hasWhitespace = ParseWhiteSpace(numArgs, args);
                }
                else if (strcmp(args[0],"comment") == 0)
                {
                        if (numArgs != 2){
                                fprintf(stderr,"can't parse comment string\n");
                                goto error;
                        }
                        hasComment = true;
                        commentString = CopyString(args[1]);
                }
                else if (strcmp(args[0],"type") == 0)
                {
                        if (numArgs != 3)
                        {
                                fprintf(stderr,"can't parse file type need 3 args\n");
                                goto error;
                        }
                        if (strcmp(args[2],"ascii") == 0)
                        {
                                isAscii = true;
                        }
                        else if (strcmp(args[2],"binary") == 0)
                        {
                                isAscii = false;
                        }
                        else
                        {
                                fprintf(stderr,"don't know file type %s, must be ascii or binary", args[2]);
                                goto error;
                        }
                        fileType = CopyString(args[1]);
                        hasFileType = true;
                        if (physicalOnly)
                        {
                                /* Let's add the schema name to the directory now */
                                /* First check if the schema is already loaded, in
                                   which case we do nothing more */
                                if (gdir->find_entry(StripPath(catFile)))
                                {
                                  GLoad = false;
                                }
                                else
                                {
#ifdef    DEBUG
                                  printf("Adding schema %s to directory\n", StripPath(catFile));
#endif
                                  gdir->add_entry(StripPath(catFile));
                                  GLoad = true;
                                }
                        }
                }
                else if (strcmp(args[0],"attr") == 0 ||
                           strcmp(args[0],"compattr") == 0 ||
                           strcmp(args[0],"sorted") == 0)
                {
                        if (ParseAttr(numArgs, args, recSize, hasFileType, fileType) !=
                                StatusOk) goto error;
                }
                else if (physicalOnly && !strcmp(args[0], "group"))
                {
                  if (GLoad) {
                      if (!currgrp)             /* Top level */
                      {
                        currgrp = new Group(args[1], NULL, TOPGRP);
                        gdir->add_topgrp(StripPath(catFile), currgrp);
                      }
                      else
                        currgrp = currgrp->insert_group(args[1]);
                    }
                }
                else if (physicalOnly && !strcmp(args[0], "item"))
                {
                  if (GLoad)
                  {
                      currgrp->insert_item(args[1]);
                  }
                }
                else if (physicalOnly && !strcmp(args[0], "endgroup"))
                {
                  if (GLoad)
                  {
                      if (!currgrp)
                      {
                        fprintf(stderr, "Group begins and ends not matched\n");
                        goto error;
                      }
                      currgrp = currgrp->parent_group();
                    }
                }
                else
                {
                fprintf(stderr,"ParseCat: unknown command %s\n", args[0]);
                goto error;
                }
        }

        /* round record size */
        if (recSize/8*8 != recSize){
                /* round to rounding boundaries */
                recSize = (recSize/8+1)*8;
        }

        if (!hasFileType ){
                fprintf(stderr,"ParseCat: no file type specified\n");
                goto error;
        }

        if (numAttrs == 0){
                fprintf(stderr,"ParseCat: no attribute specified\n");
                goto error;
        }

        int i,j;

        if (physicalOnly)
        {
        /* If no group has been defined, create a default group */
        if (GLoad && (gdir->num_topgrp(StripPath(catFile)) == 0))
        {
          Group *newgrp = new Group("__default", NULL, TOPGRP);
          gdir->add_topgrp(StripPath(catFile), newgrp);
          for (i=0; i < numAttrs; i++) {
            AttrInfo *iInfo = attrs->Get(i);
            if (iInfo->type != StringAttr)
              newgrp->insert_item(iInfo->name);
          }
        }
        }

        /* test attribute names */
        for (i=0 ; i < numAttrs-1;i++) {
                AttrInfo *iInfo = attrs->Get(i);
                if (strcmp(iInfo->name,"recId") == 0){
                        fprintf(stderr,"attribute name 'recId' is reserved\n");
                        goto error;
                }
                for (j=i+1; j < numAttrs; j++){
                        AttrInfo *jInfo = attrs->Get(j);
                        if (strcmp(iInfo->name,jInfo->name)== 0){
                                fprintf(stderr,"ParseCat:duplicate attribute name %s\n",
                                        iInfo->name);
                                goto error;
                        }
                }
        }

        if (isAscii) {
          if (hasSeparator && hasWhitespace){
            fprintf(stderr,"can't specify both whitespace and separator\n");
            goto error;
          }
          if (!(hasSeparator || hasWhitespace)){
            fprintf(stderr,"must specify either whitespace or separator\n");
            goto error;
          }
        }

        if (hasSeparator) {
          sep = new char[numSeparators];
          for (i=0; i < numSeparators; i++){
            sep[i] = separators[i];
          }
          numSep = numSeparators;
        }
        if (hasWhitespace) {
          sep = new char[numWhitespace];
          for (i=0; i < numWhitespace; i++){
            sep[i] = whitespaces[i];
            }
          numSep = numWhitespace;
        }

        if (!hasComment)
          commentString = "#";

        if (hasSource)
        {
#ifndef NO_GEN_CLASS_INFO
                if (physicalOnly)
                {
                        printf("source: %s\n",source);
                }
                else
                {
                        printf("schema: %s\n",source);
                }
                GenClassInfo *genInfo = FindGenClass(source);
                ControlPanel::RegisterClass(
                        genInfo->Gen(source, isAscii, fileType,
                        attrs, recSize,sep, numSep, hasSeparator, commentString),
                        true);
#else
                fprintf(stderr, "Illegal token 'source' in schema\n");
                Exit::DoExit(1);
#endif
        }
        else
        {
                // strdups because TData destructor will try to free type
                // strings -- make sure they're dynamic.
                if (isAscii) {
#ifdef    DEBUG
                  printf("default source, recSize %d\n",recSize);
#endif

#ifdef   USE_SEQ
                  tDataP = new TDataSeqAsciiInterp(catFile, strdup("UNIXFILE"),
                    dataFile, recSize, attrs, sep, numSep,
                    hasSeparator, commentString);
#else
                  tDataP = new TDataAsciiInterp(catFile, strdup("UNIXFILE"),
                    dataFile, recSize, attrs, sep, numSep,
                    hasSeparator, commentString);
#endif
                }
                else
                {
#ifdef    DEBUG
                  printf("default binary source, recSize %d\n",recSize);
#endif
                  // Note: the second use of recSize is for the physical
                  // record size.  This needs to get changed.  RKW 96/06/27.
                  tDataP = new TDataBinaryInterp(catFile, strdup("UNIXFILE"), dataFile,
                        recSize, recSize/*TEMP*/, attrs);
                }
        }

        fclose(file);

        if (Init::PrintTDataAttr()) attrs->Print();
        return fileType;

error:
        if (file != NULL) fclose(file);

        if (attrs != NULL) delete attrs;
        fprintf(stderr,"error at line %d\n", _line);
        return NULL;
}

/*------------------------------------------------------------------------------
 * function: ParseCatLogical
 * Read and parse a logical schema from a catalog file.
 */
static char *
ParseCatLogical(char *catFile, char *sname)
{
  Group *currgrp = NULL;
  FILE *file= NULL;
  Boolean GLoad = true;
  char buf[LINESIZE];
  int numArgs;
  char **args;

  file = fopen(catFile, "r");
  if (file == NULL) {
    fprintf(stderr,"ParseCat: can't open file %s\n", catFile);
    goto error;
  }
  _line = 0;

  /* read the first line first */
  fgets(buf, LINESIZE, file);

  /* Let's add the group name to the directory now */
  /* The groups for a particular logical schema are identified by the
     schema file name. This is bcos the type name of the physical schema
     is not a unique identifier - several logical schemas may use the same
     physical schema */
  /* First check if the schema is already loaded, in
     which case we do nothing more */

  if (gdir->find_entry(StripPath(catFile)))
    GLoad = false;
  else
  {
    printf("Adding schema %s to directory \n", StripPath(catFile));
    gdir->add_entry(StripPath(catFile));
    GLoad = true;
  }

  while (fgets(buf,LINESIZE, file) != NULL) {
          StripTrailingNewline(buf);

      _line++;
      /*
         printf("getting line %s\n", buf);
         */
      if (buf[0] == '#' || buf[0] == '\n' || buf[0] == '\r')
        continue;
      Parse(buf,numArgs, args);
      if (numArgs == 0)
        continue;

#ifdef DEBUG
      printf("parse: ");
      for(int ind = 0; ind < numArgs; ind++)
        printf("'%s' ", args[ind]);
      printf("\n");
#endif

      if (strcmp(args[0], "group") == 0)
      {
        if (GLoad) {
            if (!currgrp)               /* Top level */
            {
              currgrp = new Group(args[1], NULL, TOPGRP);
              gdir->add_topgrp(StripPath(catFile), currgrp);
            }
            else
              currgrp = currgrp->insert_group(args[1]);
          }
      }
      else if (strcmp(args[0], "item") == 0)
      {
        if (GLoad) {
            currgrp->insert_item(args[1]);
        }
      }
      else if (strcmp(args[0], "endgroup") == 0)
      {
        if (GLoad) {
            if (!currgrp)
            {
              fprintf(stderr, "Group begins and ends not matched\n");
              goto error;
            }
            currgrp = currgrp->parent_group();
          }
      }
      else {
          fprintf(stderr,"ParseCat: unknown command %s\n", args[0]);
          goto error;
      }
  }

  /* If no group has been defined, create a default group */
  if (GLoad && (gdir->num_topgrp(StripPath(catFile)) == 0))
  {
    Group *newgrp = new Group("__default", NULL, TOPGRP);
    gdir->add_topgrp(StripPath(catFile), newgrp);
    for(int i = 0; i < numAttrs; i++) {
      AttrInfo *iInfo = attrs->Get(i);
      if (iInfo->type != StringAttr)
        newgrp->insert_item(iInfo->name);
    }
  }

  fclose(file);

  return sname;

 error:
  if (file != NULL)
    fclose(file);

  fprintf(stderr,"error at line %d\n", _line);
  return NULL;
}

/*------------------------------------------------------------------------------
 * function: ApParseCat
 * Read and parse a schema file.
 */
char *
ApParseCat(char *catFile, char *dataFile, TData *&tDataP)
{
  // Check the first line of catFile - if it is "physical abc",
  // call ParseCatPhysical(abc, false) and then ParseCatLogical(catFile)
  // Otherwise, simply call ParseCatPhysical(catFile, true).

  char *        result = NULL;

  FILE *fp = fopen(catFile, "r");
  if (!fp)
  {
    fprintf(stderr,"ParseCat: can't open file %s\n", catFile);
  }
  else
  {
    char buf[100];
    if (fscanf(fp, "%s", buf) != 1 || strcmp(buf, "physical"))
        {
      fclose(fp);
      result = ParseCatPhysical(catFile, dataFile, true, tDataP);
    }
        else
        {
      // Read in the file name
      fscanf(fp, "%s", buf);
      fclose(fp);

      char *sname;
      if (!(sname = ParseCatPhysical(buf, dataFile, false, tDataP)))
          {
                result = NULL;
          }

      result = ParseCatLogical(catFile, sname);
        }
  }

  return result;
}

/*------------------------------------------------------------------------------
 * function: ApParseSchema
 * Parse a schema from buffer(s).
 */
char *
ApParseSchema(char *schemaName, char *physSchema, char *logSchema)
{
        char *          result = NULL;

        return result;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -