📄 textfile.cpp

📁 目前最快速的字符串排序算法
💻 CPP
字号:
/* Copyright 2007 Stefan Webb

This file is part of Burstsort.

Burstsort is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

Burstsort is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with Burstsort; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA */

#include "main.h"

/* Constructors and Destructors */
Textfile::Textfile() {
	text = 0;
}

Textfile::Textfile(wchar_t* file) {
	open(file);
}

Textfile::~Textfile() {
	if (text)
		free(text);
}

/* Detects the encoding of a filebuffer if it is a text file */
ENCODING Textfile::detectencoding(unsigned char* src) {
	if (src[0]==0xEF && src[1]==0xBB && src[2]==0xBF)
		return UTF8;
	else if (src[0]==0xFF && src[1]==0xFE)
		return Unicode;
	else if (src[0]==0xFE && src[1]==0xFF)
		return UnicodeBE;
	else
		return ASCII;
}

/* Opens a file and returns a unicode string to the contents */
wchar_t* Textfile::open(wchar_t* filename) {
	// Open file, get filesize
	FILE* file = _wfopen(filename, L"rb");
	if(!file) {
		//printf( "strerror says open failed: %s\n",	// what are these lines doing in here?
        // strerror( errno ) ); // C4996				// I can't remember writing them.
		//error(COULDNT_OPENFILE);
	}
	int filesize = fsize(file);
	if (filesize < 4);
		//error(FILE_EMPTY);
	// Read contents of the file, then close it
	char* filestring = (char*) malloc(filesize + 2);
	int bytesread = fread(filestring, 1, filesize, file);
	if (!bytesread);
		//error(UNABLE_READ_FILE);
	// Make the file contents a null terminated string (Unicode)
	filestring[filesize] = (unsigned char) 0;
	filestring[filesize + 1] = (unsigned char) 0;
	fclose(file);
	// Detect encoding and if nessecary(*) convert it
	ENCODING filetype = detectencoding ((unsigned char*) filestring);
	// Turn this into a switch case arrangement
	if (filetype == Unicode)
		text = (wchar_t*) (filestring+2);
	else if (filetype == ASCII)
		text = ASCIItoUnicode((unsigned char*) filestring, filesize);
	else if (filetype == UTF8)
		text = UTF8toUnicode((unsigned char*) (filestring + 3));
	else
		text = UnicodeBEtoUnicode((unsigned char*) (filestring + 2));
	return text;
}

/* Frees the memory and zeros the pointer */
void Textfile::close() {
	if (text) {
		free(text);
		text = 0;
	}
}

/* Returns a pointer to the string of the file */
wchar_t* Textfile::gettext() {
	return text;
}

/* Converts from UTF8 to Unicode */
wchar_t* Textfile::UTF8toUnicode (unsigned char *utf8) {
	int size = 0, index = 0, out_index = 0;
    wchar_t *out;
    unsigned char c;

    /* first calculate the size of the target string */
    c = utf8[index++];
    while(c) {
        if((c & 0x80) == 0) {
            index += 0;
        } else if((c & 0xe0) == 0xe0) {
            index += 2;
        } else {
            index += 1;
        }
        size += 1;
        c = utf8[index++];
    }   

    out = (wchar_t*) malloc((size + 1) * sizeof(wchar_t));
    if (out == NULL)
        return NULL;
    index = 0;

    c = utf8[index++];
    while(c)
    {
        if((c & 0x80) == 0) {
            out[out_index++] = c;
        } else if((c & 0xe0) == 0xe0) {
            out[out_index] = (c & 0x1F) << 12;
           c = utf8[index++];
            out[out_index] |= (c & 0x3F) << 6;
           c = utf8[index++];
            out[out_index++] |= (c & 0x3F);
        } else {
            out[out_index] = (c & 0x3F) << 6;
           c = utf8[index++];
            out[out_index++] |= (c & 0x3F);
        }
        c = utf8[index++];
    }
    out[out_index] = 0;
    return out;
}

/* Converts from ASCII to Unicode */
wchar_t* Textfile::ASCIItoUnicode (unsigned char* ascii, int size) {
	wchar_t* out;
	int index = 0;
    out = (wchar_t*) malloc ((size+1) * sizeof(wchar_t));
	if (out==NULL)
		return NULL;
	while (ascii[index]) {
		// how to do the type conversion correctly??
		out[index] = ascii[index];
		index++;
	}
	out[index] = 0;
	free(ascii);
	return out;
}

/* Converts from Big-endian Unicode to regular Unicode */
wchar_t* Textfile::UnicodeBEtoUnicode (unsigned char* unicodebe) {
	int index = 0;
	//wchar_t* testing = (wchar_t*) unicodebe;
	unsigned char temp;
	while (unicodebe[index] != 0 || unicodebe[index+1] != 0) {
		temp = unicodebe[index];
		unicodebe[index] = unicodebe[index + 1];
		unicodebe[index + 1] = temp;
		index += 2;
	}
	return (wchar_t*) unicodebe;
}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -