📄 apparsecat.c

📁 Solaris环境下的数据挖掘算法：birch聚类算法。该算法适用于对大量数据的挖掘。
💻 C
📖 第 1 页 / 共 2 页
字号:
上一页 12
	Boolean hasSeparator = false;	Boolean hasWhitespace = false;	Boolean hasComment = false;	Boolean isAscii = false;	Boolean GLoad = true;	char *fileType = 0;	int numArgs;	char **args;	int recSize = 0;	char *sep = 0;	int numSep = 0;	char *commentString = 0;	Group *currgrp = NULL;#if 0	if (attrs != NULL) delete attrs;#endif	attrs = NULL;	numAttrs = 0;	/*	printf("opening file %s\n", catFile);	*/	file = fopen(catFile, "r");	if (file == NULL){		fprintf(stderr,"ParseCat: can't open file %s\n", catFile);		goto error;	}	_line = 0;	while ((fgets(buf,LINESIZE, file) != NULL) && strcmp(buf, "endSchema\n"))	{		StripTrailingNewline(buf);		_line++;		/*		printf("getting line %s\n", buf);		*/		if (buf[0] == '#' || buf[0] == '\n' || buf[0] == '\r')			continue;		Parse(buf,numArgs, args);		if (numArgs == 0)			continue;#ifdef DEBUG		printf("parse: ");		for(int ind = 0; ind < numArgs; ind++)		  printf("'%s' ", args[ind]);		printf("\n");#endif		if (strcmp(args[0],"end")== 0)		{			break;		}		else if (strcmp(args[0],"source") == 0)		{			source = CopyString(args[1]);			hasSource = true;		}		else if (strcmp(args[0],"separator")== 0)		{			/* parse separator */			hasSeparator = ParseSeparator(numArgs, args);			if (!hasSeparator){				fprintf(stderr,"can't parse separator\n");				goto error;			}		}		else if (strcmp(args[0],"whitespace")== 0)		{			/* parse separator */			hasWhitespace = ParseWhiteSpace(numArgs, args);		}		else if (strcmp(args[0],"comment") == 0)		{			if (numArgs != 2){				fprintf(stderr,"can't parse comment string\n");				goto error;			}			hasComment = true;			commentString = CopyString(args[1]);		}		else if (strcmp(args[0],"type") == 0)		{			if (numArgs != 3)			{				fprintf(stderr,"can't parse file type need 3 args\n");				goto error;			}			if (strcmp(args[2],"ascii") == 0)			{				isAscii = true;			}			else if (strcmp(args[2],"binary") == 0)			{				isAscii = false;			}			else			{				fprintf(stderr,"don't know file type %s, must be ascii or binary", args[2]);				goto error;			}			fileType = CopyString(args[1]);			hasFileType = true;			if (physicalOnly)			{				/* Let's add the schema name to the directory now */				/* First check if the schema is already loaded, in				   which case we do nothing more */				if (gdir->find_entry(StripPath(catFile)))				{				  GLoad = false;				}				else				{#ifdef    DEBUG				  printf("Adding schema %s to directory\n", StripPath(catFile));#endif				  gdir->add_entry(StripPath(catFile));				  GLoad = true;				}			}		}		else if (strcmp(args[0],"attr") == 0 ||			   strcmp(args[0],"compattr") == 0 ||			   strcmp(args[0],"sorted") == 0)		{			if (ParseAttr(numArgs, args, recSize, hasFileType, fileType) !=				StatusOk) goto error;		}		else if (physicalOnly && !strcmp(args[0], "group"))		{		  if (GLoad) {		      if (!currgrp)		/* Top level */		      {			currgrp = new Group(args[1], NULL, TOPGRP);			gdir->add_topgrp(StripPath(catFile), currgrp);		      }		      else			currgrp = currgrp->insert_group(args[1]);		    }		}		else if (physicalOnly && !strcmp(args[0], "item"))		{		  if (GLoad)		  {		      currgrp->insert_item(args[1]);		  }		}		else if (physicalOnly && !strcmp(args[0], "endgroup"))		{		  if (GLoad)		  {		      if (!currgrp)		      {			fprintf(stderr, "Group begins and ends not matched\n");			goto error;		      }		      currgrp = currgrp->parent_group();		    }		}		else		{	    	fprintf(stderr,"ParseCat: unknown command %s\n", args[0]);	    	goto error;		}	}	/* round record size */	if (recSize/8*8 != recSize){		/* round to rounding boundaries */		recSize = (recSize/8+1)*8;	}	if (!hasFileType ){		fprintf(stderr,"ParseCat: no file type specified\n");		goto error;	}	if (numAttrs == 0){		fprintf(stderr,"ParseCat: no attribute specified\n");		goto error;	}	int i,j;	if (physicalOnly)	{	/* If no group has been defined, create a default group */	if (GLoad && (gdir->num_topgrp(StripPath(catFile)) == 0))	{	  Group *newgrp = new Group("__default", NULL, TOPGRP);	  gdir->add_topgrp(StripPath(catFile), newgrp);	  for (i=0; i < numAttrs; i++) {	    AttrInfo *iInfo = attrs->Get(i);	    if (iInfo->type != StringAttr)	      newgrp->insert_item(iInfo->name);	  }	}	}	/* test attribute names */	for (i=0 ; i < numAttrs-1;i++) {		AttrInfo *iInfo = attrs->Get(i);		if (strcmp(iInfo->name,"recId") == 0){			fprintf(stderr,"attribute name 'recId' is reserved\n");			goto error;		}		for (j=i+1; j < numAttrs; j++){			AttrInfo *jInfo = attrs->Get(j);			if (strcmp(iInfo->name,jInfo->name)== 0){				fprintf(stderr,"ParseCat:duplicate attribute name %s\n",					iInfo->name);				goto error;			}		}	}	if (isAscii) {	  if (hasSeparator && hasWhitespace){	    fprintf(stderr,"can't specify both whitespace and separator\n");	    goto error;	  }	  if (!(hasSeparator || hasWhitespace)){	    fprintf(stderr,"must specify either whitespace or separator\n");	    goto error;	  }	}	if (hasSeparator) {	  sep = new char[numSeparators];	  for (i=0; i < numSeparators; i++){	    sep[i] = separators[i];	  }	  numSep = numSeparators;	}	if (hasWhitespace) {	  sep = new char[numWhitespace];	  for (i=0; i < numWhitespace; i++){	    sep[i] = whitespaces[i];	    }	  numSep = numWhitespace;	}		if (!hasComment)	  commentString = "#";	  	if (hasSource)	{#ifndef NO_GEN_CLASS_INFO		if (physicalOnly)		{			printf("source: %s\n",source);		}		else		{			printf("schema: %s\n",source);		}		GenClassInfo *genInfo = FindGenClass(source);		ControlPanel::RegisterClass(			genInfo->Gen(source, isAscii, fileType,			attrs, recSize,sep, numSep, hasSeparator, commentString),			true);#else		fprintf(stderr, "Illegal token 'source' in schema\n");		Exit::DoExit(1);#endif	}	else	{		// strdups because TData destructor will try to free type		// strings -- make sure they're dynamic.		if (isAscii) {#ifdef    DEBUG		  printf("default source, recSize %d\n",recSize);#endif		  tDataP = new TDataAsciiInterp(catFile, strdup("UNIXFILE"), dataFile,			recSize, attrs, sep, numSep, hasSeparator, commentString);		}		else		{#ifdef    DEBUG		  printf("default binary source, recSize %d\n",recSize);#endif		  // Note: the second use of recSize is for the physical		  // record size.  This needs to get changed.  RKW 96/06/27.		  tDataP = new TDataBinaryInterp(catFile, strdup("UNIXFILE"), dataFile,			recSize, recSize/*TEMP*/, attrs);		}	}	fclose(file);	if (Init::PrintTDataAttr()) attrs->Print();	return fileType;error:	if (file != NULL) fclose(file);	if (attrs != NULL) delete attrs;	fprintf(stderr,"error at line %d\n", _line);	return NULL;}/*------------------------------------------------------------------------------ * function: ParseCatLogical * Read and parse a logical schema from a catalog file. */static char *ParseCatLogical(char *catFile, char *sname){  Group *currgrp = NULL;  FILE *file= NULL;  Boolean GLoad = true;  char buf[LINESIZE];  int numArgs;  char **args;  file = fopen(catFile, "r");  if (file == NULL) {    fprintf(stderr,"ParseCat: can't open file %s\n", catFile);    goto error;  }  _line = 0;    /* read the first line first */  fgets(buf, LINESIZE, file);    /* Let's add the group name to the directory now */  /* The groups for a particular logical schema are identified by the      schema file name. This is bcos the type name of the physical schema     is not a unique identifier - several logical schemas may use the same     physical schema */  /* First check if the schema is already loaded, in     which case we do nothing more */  if (gdir->find_entry(StripPath(catFile)))    GLoad = false;  else  {    printf("Adding schema %s to directory \n", StripPath(catFile));    gdir->add_entry(StripPath(catFile));    GLoad = true;  }   while (fgets(buf,LINESIZE, file) != NULL) {	  StripTrailingNewline(buf);            _line++;      /*	 printf("getting line %s\n", buf);	 */      if (buf[0] == '#' || buf[0] == '\n' || buf[0] == '\r')	continue;      Parse(buf,numArgs, args);      if (numArgs == 0)	continue;     #ifdef DEBUG      printf("parse: ");      for(int ind = 0; ind < numArgs; ind++)	printf("'%s' ", args[ind]);      printf("\n");#endif      if (strcmp(args[0], "group") == 0)      {	if (GLoad) {	    if (!currgrp)		/* Top level */	    {	      currgrp = new Group(args[1], NULL, TOPGRP);	      gdir->add_topgrp(StripPath(catFile), currgrp);	    }	    else	      currgrp = currgrp->insert_group(args[1]);	  }      }      else if (strcmp(args[0], "item") == 0)      {	if (GLoad) {	    currgrp->insert_item(args[1]);	}      }      else if (strcmp(args[0], "endgroup") == 0)      {	if (GLoad) {	    if (!currgrp)	    {	      fprintf(stderr, "Group begins and ends not matched\n");	      goto error;	    }	    currgrp = currgrp->parent_group();	  }      }      else {	  fprintf(stderr,"ParseCat: unknown command %s\n", args[0]);	  goto error;      }  }  /* If no group has been defined, create a default group */  if (GLoad && (gdir->num_topgrp(StripPath(catFile)) == 0))  {    Group *newgrp = new Group("__default", NULL, TOPGRP);    gdir->add_topgrp(StripPath(catFile), newgrp);    for(int i = 0; i < numAttrs; i++) {      AttrInfo *iInfo = attrs->Get(i);      if (iInfo->type != StringAttr)	newgrp->insert_item(iInfo->name);    }  }  fclose(file);  return sname; error:  if (file != NULL)    fclose(file);    fprintf(stderr,"error at line %d\n", _line);  return NULL;}/*------------------------------------------------------------------------------ * function: ApParseCat * Read and parse a schema file. */char *ApParseCat(char *catFile, char *dataFile, TData *&tDataP) {  // Check the first line of catFile - if it is "physical abc",  // call ParseCatPhysical(abc, false) and then ParseCatLogical(catFile)  // Otherwise, simply call ParseCatPhysical(catFile, true).  char *	result = NULL;  FILE *fp = fopen(catFile, "r");  if (!fp)  {    fprintf(stderr,"ParseCat: can't open file %s\n", catFile);  }  else  {    char buf[100];    if (fscanf(fp, "%s", buf) != 1 || strcmp(buf, "physical"))	{      fclose(fp);      result = ParseCatPhysical(catFile, dataFile, true, tDataP);    }	else	{      // Read in the file name      fscanf(fp, "%s", buf);      fclose(fp);      char *sname;      if (!(sname = ParseCatPhysical(buf, dataFile, false, tDataP)))	  {		result = NULL;	  }      result = ParseCatLogical(catFile, sname);	}  }  return result;}/*------------------------------------------------------------------------------ * function: ApParseSchema * Parse a schema from buffer(s). */char *ApParseSchema(char *schemaName, char *physSchema, char *logSchema){	char *		result = NULL;	return result;}
上一页 12
💿 文件大小 1290 K
👤 上传用户 GUAIGUAICHENGTI
📂 所属分类人工智能/神经网络
🏷️ 相关标签

#Solaris #birch #环境 #数据挖掘算法
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -