xmlwf.c

来自「ARM上的如果你对底层感兴趣」· C语言 代码 · 共 751 行 · 第 1/2 页

C
751
字号
  if (publicId)
    ftprintf(fp, T(" public=\"%s\""), publicId);
  if (systemId) {
    fputts(T(" system=\""), fp);
    characterData(fp, systemId, tcslen(systemId));
    puttc(T('"'), fp);
  }
  metaLocation(parser);
  fputts(T("/>\n"), fp);
}

typedef struct {
  XML_Parser parser;
  int *retPtr;
} PROCESS_ARGS;

static
void reportError(XML_Parser parser, const XML_Char *filename)
{
  int code = XML_GetErrorCode(parser);
  const XML_Char *message = XML_ErrorString(code);
  if (message)
    ftprintf(stdout, T("%s:%d:%ld: %s\n"),
	     filename,
	     XML_GetErrorLineNumber(parser),
	     XML_GetErrorColumnNumber(parser),
	     message);
  else
    ftprintf(stderr, T("%s: (unknown message %d)\n"), filename, code);
}

static
void processFile(const void *data, size_t size, const XML_Char *filename, void *args)
{
  XML_Parser parser = ((PROCESS_ARGS *)args)->parser;
  int *retPtr = ((PROCESS_ARGS *)args)->retPtr;
  if (!XML_Parse(parser, data, size, 1)) {
    reportError(parser, filename);
    *retPtr = 0;
  }
  else
    *retPtr = 1;
}

static
int isAsciiLetter(XML_Char c)
{
  return (T('a') <= c && c <= T('z')) || (T('A') <= c && c <= T('Z'));
}

static
const XML_Char *resolveSystemId(const XML_Char *base, const XML_Char *systemId, XML_Char **toFree)
{
  XML_Char *s;
  *toFree = 0;
  if (!base
      || *systemId == T('/')
#ifdef WIN32
      || *systemId == T('\\')
      || (isAsciiLetter(systemId[0]) && systemId[1] == T(':'))
#endif
     )
    return systemId;
  *toFree = (XML_Char *)malloc((tcslen(base) + tcslen(systemId) + 2)*sizeof(XML_Char));
  if (!*toFree)
    return systemId;
  tcscpy(*toFree, base);
  s = *toFree;
  if (tcsrchr(s, T('/')))
    s = tcsrchr(s, T('/')) + 1;
#ifdef WIN32
  if (tcsrchr(s, T('\\')))
    s = tcsrchr(s, T('\\')) + 1;
#endif
  tcscpy(s, systemId);
  return *toFree;
}

static
int externalEntityRefFilemap(XML_Parser parser,
			     const XML_Char *context,
			     const XML_Char *base,
			     const XML_Char *systemId,
			     const XML_Char *publicId)
{
  int result;
  XML_Char *s;
  const XML_Char *filename;
  XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0);
  PROCESS_ARGS args;
  args.retPtr = &result;
  args.parser = entParser;
  filename = resolveSystemId(base, systemId, &s);
  XML_SetBase(entParser, filename);
  if (!filemap(filename, processFile, &args))
    result = 0;
  free(s);
  XML_ParserFree(entParser);
  return result;
}

static
int processStream(const XML_Char *filename, XML_Parser parser)
{
  int fd = topen(filename, O_BINARY|O_RDONLY);
  if (fd < 0) {
    tperror(filename);
    return 0;
  }
  for (;;) {
    int nread;
    char *buf = XML_GetBuffer(parser, READ_SIZE);
    if (!buf) {
      close(fd);
      ftprintf(stderr, T("%s: out of memory\n"), filename);
      return 0;
    }
    nread = read(fd, buf, READ_SIZE);
    if (nread < 0) {
      tperror(filename);
      close(fd);
      return 0;
    }
    if (!XML_ParseBuffer(parser, nread, nread == 0)) {
      reportError(parser, filename);
      close(fd);
      return 0;
    }
    if (nread == 0) {
      close(fd);
      break;;
    }
  }
  return 1;
}

static
int externalEntityRefStream(XML_Parser parser,
			    const XML_Char *context,
			    const XML_Char *base,
			    const XML_Char *systemId,
			    const XML_Char *publicId)
{
  XML_Char *s;
  const XML_Char *filename;
  int ret;
  XML_Parser entParser = XML_ExternalEntityParserCreate(parser, context, 0);
  filename = resolveSystemId(base, systemId, &s);
  XML_SetBase(entParser, filename);
  ret = processStream(filename, entParser);
  free(s);
  XML_ParserFree(entParser);
  return ret;
}

static
int unknownEncodingConvert(void *data, const char *p)
{
  return codepageConvert(*(int *)data, p);
}

static
int unknownEncoding(void *userData,
		    const XML_Char *name,
		    XML_Encoding *info)
{
  int cp;
  static const XML_Char prefixL[] = T("windows-");
  static const XML_Char prefixU[] = T("WINDOWS-");
  int i;

  for (i = 0; prefixU[i]; i++)
    if (name[i] != prefixU[i] && name[i] != prefixL[i])
      return 0;
  
  cp = 0;
  for (; name[i]; i++) {
    static const XML_Char digits[] = T("0123456789");
    const XML_Char *s = tcschr(digits, name[i]);
    if (!s)
      return 0;
    cp *= 10;
    cp += s - digits;
    if (cp >= 0x10000)
      return 0;
  }
  if (!codepageMap(cp, info->map))
    return 0;
  info->convert = unknownEncodingConvert;
  /* We could just cast the code page integer to a void *,
  and avoid the use of release. */
  info->release = free;
  info->data = malloc(sizeof(int));
  if (!info->data)
    return 0;
  *(int *)info->data = cp;
  return 1;
}

static
void usage(const XML_Char *prog)
{
  ftprintf(stderr, T("usage: %s [-n] [-r] [-w] [-x] [-d output-dir] [-e encoding] file ...\n"), prog);
  exit(1);
}

int tmain(int argc, XML_Char **argv)
{
  int i;
  const XML_Char *outputDir = 0;
  const XML_Char *encoding = 0;
  int useFilemap = 1;
  int processExternalEntities = 0;
  int windowsCodePages = 0;
  int outputType = 0;
  int useNamespaces = 0;

#ifdef _MSC_VER
  _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF|_CRTDBG_LEAK_CHECK_DF);
#endif

  i = 1;
  while (i < argc && argv[i][0] == T('-')) {
    int j;
    if (argv[i][1] == T('-') && argv[i][2] == T('\0')) {
      i++;
      break;
    }
    j = 1;
    if (argv[i][j] == T('r')) {
      useFilemap = 0;
      j++;
    }
    if (argv[i][j] == T('n')) {
      useNamespaces = 1;
      outputType = 0;
      j++;
    }
    if (argv[i][j] == T('x')) {
      processExternalEntities = 1;
      j++;
    }
    if (argv[i][j] == T('w')) {
      windowsCodePages = 1;
      j++;
    }
    if (argv[i][j] == T('m')) {
      outputType = 'm';
      useNamespaces = 0;
      j++;
    }
    if (argv[i][j] == T('c')) {
      outputType = 'c';
      useNamespaces = 0;
      j++;
    }
    if (argv[i][j] == T('d')) {
      if (argv[i][j + 1] == T('\0')) {
	if (++i == argc)
	  usage(argv[0]);
	outputDir = argv[i];
      }
      else
	outputDir = argv[i] + j + 1;
      i++;
    }
    else if (argv[i][j] == T('e')) {
      if (argv[i][j + 1] == T('\0')) {
	if (++i == argc)
	  usage(argv[0]);
	encoding = argv[i];
      }
      else
	encoding = argv[i] + j + 1;
      i++;
    }
    else if (argv[i][j] == T('\0') && j > 1)
      i++;
    else
      usage(argv[0]);
  }
  if (i == argc)
    usage(argv[0]);
  for (; i < argc; i++) {
    FILE *fp = 0;
    XML_Char *outName = 0;
    int result;
    XML_Parser parser;
    if (useNamespaces)
      parser = XML_ParserCreateNS(encoding, NSSEP);
    else
      parser = XML_ParserCreate(encoding);
    if (outputDir) {
      const XML_Char *file = argv[i];
      if (tcsrchr(file, T('/')))
	file = tcsrchr(file, T('/')) + 1;
#ifdef WIN32
      if (tcsrchr(file, T('\\')))
	file = tcsrchr(file, T('\\')) + 1;
#endif
      outName = malloc((tcslen(outputDir) + tcslen(file) + 2) * sizeof(XML_Char));
      tcscpy(outName, outputDir);
      tcscat(outName, T("/"));
      tcscat(outName, file);
      fp = tfopen(outName, T("wb"));
      if (!fp) {
	tperror(outName);
	exit(1);
      }
      setvbuf(fp, NULL, _IOFBF, 16384);
#ifdef XML_UNICODE
      puttc(0xFEFF, fp);
#endif
      XML_SetUserData(parser, fp);
      switch (outputType) {
      case 'm':
	XML_UseParserAsHandlerArg(parser);
	fputts(T("<document>\n"), fp);
	XML_SetElementHandler(parser, metaStartElement, metaEndElement);
	XML_SetProcessingInstructionHandler(parser, metaProcessingInstruction);
	XML_SetCommentHandler(parser, metaComment);
	XML_SetCharacterDataHandler(parser, metaCharacterData);
	XML_SetUnparsedEntityDeclHandler(parser, metaUnparsedEntityDecl);
	XML_SetNotationDeclHandler(parser, metaNotationDecl);
	break;
      case 'c':
	XML_UseParserAsHandlerArg(parser);
	XML_SetDefaultHandler(parser, markup);
	XML_SetElementHandler(parser, defaultStartElement, defaultEndElement);
	XML_SetCharacterDataHandler(parser, defaultCharacterData);
	XML_SetProcessingInstructionHandler(parser, defaultProcessingInstruction);
	break;
      default:
	if (useNamespaces)
	  XML_SetElementHandler(parser, startElementNS, endElementNS);
	else
	  XML_SetElementHandler(parser, startElement, endElement);
	XML_SetCharacterDataHandler(parser, characterData);
	XML_SetProcessingInstructionHandler(parser, processingInstruction);
	break;
      }
    }
    if (windowsCodePages)
      XML_SetUnknownEncodingHandler(parser, unknownEncoding, 0);
    if (!XML_SetBase(parser, argv[i])) {
      ftprintf(stderr, T("%s: out of memory"), argv[0]);
      exit(1);
    }
    if (processExternalEntities)
      XML_SetExternalEntityRefHandler(parser,
	                              useFilemap
				      ? externalEntityRefFilemap
				      : externalEntityRefStream);
    if (useFilemap) {
      PROCESS_ARGS args;
      args.retPtr = &result;
      args.parser = parser;
      if (!filemap(argv[i], processFile, &args))
	result = 0;
    }
    else
      result = processStream(argv[i], parser);
    if (outputDir) {
      if (outputType == 'm')
	fputts(T("</document>\n"), fp);
      fclose(fp);
      if (!result)
	tremove(outName);
      free(outName);
    }
    XML_ParserFree(parser);
  }
  return 0;
}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?