wstr_util.c.svn-base

来自「SumatraPDF是一款小型开源的pdf阅读工具。虽然玲珑小巧(只有800多K」· SVN-BASE 代码 · 共 592 行

SVN-BASE
592
字号
/* Written by Krzysztof Kowalczyk (http://blog.kowalczyk.info)
   The author disclaims copyright to this source code. */

/* The most basic things, including string handling functions */

#include "base_util.h"
#include "wstr_util.h"

#include <str_strsafe.h>
#include <assert.h>

WCHAR * wstr_cat_s(WCHAR * dest, size_t dst_cch_size, const WCHAR * src)
{
    int len = wstr_len(dest), count = dst_cch_size - len;
    int ret = _snwprintf(dest + len, count, L"%s", src);
    return (ret<count ) ? dest : NULL;
}

WCHAR * wstr_catn_s(WCHAR *dst, size_t dst_cch_size, const WCHAR *src, size_t src_cch_size)
{
    int len = wstr_len(dst);
    if (dst_cch_size > len + src_cch_size) {
        memcpy(dst + len, src, src_cch_size * sizeof *src);
        dst[len] = 0;
        return dst;
    }
    else
        return NULL;
}

WCHAR *wstr_cat4(const WCHAR *str1, const WCHAR *str2, const WCHAR *str3, const WCHAR *str4)
{
    WCHAR *str;
    WCHAR *tmp;
    size_t str1_len = 0;
    size_t str2_len = 0;
    size_t str3_len = 0;
    size_t str4_len = 0;

    if (str1)
        str1_len = wstrlen(str1);
    if (str2)
        str2_len = wstrlen(str2);
    if (str3)
        str3_len = wstrlen(str3);
    if (str4)
        str4_len = wstrlen(str4);

    str = (WCHAR*)zmalloc((str1_len + str2_len + str3_len + str4_len + 1)*sizeof(WCHAR));
    if (!str)
        return NULL;

    tmp = str;
    if (str1) {
        memcpy(tmp, str1, str1_len*sizeof(WCHAR));
        tmp += str1_len;
    }
    if (str2) {
        memcpy(tmp, str2, str2_len*sizeof(WCHAR));
        tmp += str2_len;
    }
    if (str3) {
        memcpy(tmp, str3, str3_len*sizeof(WCHAR));
        tmp += str3_len;
    }
    if (str4) {
        memcpy(tmp, str4, str1_len*sizeof(WCHAR));
    }
    return str;
}

WCHAR *wstr_cat3(const WCHAR *str1, const WCHAR *str2, const WCHAR *str3)
{
    return wstr_cat4(str1, str2, str3, NULL);
}

WCHAR *wstr_cat(const WCHAR *str1, const WCHAR *str2)
{
    return wstr_cat4(str1, str2, NULL, NULL);
}

WCHAR *wstr_dupn(const WCHAR *str, int str_len_cch)
{
    WCHAR *copy;

    if (!str)
        return NULL;
    copy = (WCHAR*)malloc((str_len_cch+1)*sizeof(WCHAR));
    if (!copy)
        return NULL;
    memcpy(copy, str, str_len_cch*sizeof(WCHAR));
    copy[str_len_cch] = 0;
    return copy;
}

WCHAR *wstr_dup(const WCHAR *str)
{
    return wstr_cat4(str, NULL, NULL, NULL);
}

int wstr_copyn(WCHAR *dst, int dst_cch_size, const WCHAR *src, int src_cch_size)
{
    WCHAR *end = dst + dst_cch_size - 1;
    if (0 == dst_cch_size) {
        if (0 == src_cch_size)
            return TRUE;
        else
            return FALSE;
    }

    while ((dst < end) && (src_cch_size > 0)) {
        *dst++ = *src++;
        --src_cch_size;
    }
    *dst = 0;
    if (0 == src_cch_size)
        return TRUE;
    else
        return FALSE;
}

int wstr_copy(WCHAR *dst, int dst_cch_size, const WCHAR *src)
{
    WCHAR *end = dst + dst_cch_size - 1;
    if (0 == dst_cch_size)
        return FALSE;

    while ((dst < end) && *src) {
        *dst++ = *src++;
    }
    *dst = 0;
    if (0 == *src)
        return TRUE;
    else
        return FALSE;
}

int wstr_eq(const WCHAR *str1, const WCHAR *str2)
{
    if (!str1 && !str2)
        return TRUE;
    if (!str1 || !str2)
        return FALSE;
    if (0 == wcscmp(str1, str2))
        return TRUE;
    return FALSE;
}

int wstr_ieq(const WCHAR *str1, const WCHAR *str2)
{
    if (!str1 && !str2)
        return TRUE;
    if (!str1 || !str2)
        return FALSE;
    if (0 == _wcsicmp(str1, str2))
        return TRUE;
    return FALSE;
}

/* return true if 'str' starts with 'txt', case-sensitive */
int  wstr_startswith(const WCHAR *str, const WCHAR *txt)
{
    if (!str && !txt)
        return TRUE;
    if (!str || !txt)
        return FALSE;

    if (0 == wcsncmp(str, txt, wcslen(txt)))
        return TRUE;
    return FALSE;
}

int wstr_endswithi(const WCHAR *txt, const WCHAR *end)
{
    size_t end_len;
    size_t txt_len;

    if (!txt || !end)
        return FALSE;

    txt_len = wstrlen(txt);
    end_len = wstrlen(end);
    if (end_len > txt_len)
        return FALSE;
    if (wstr_ieq(txt+txt_len-end_len, end))
        return TRUE;
    return FALSE;
}

/* return true if 'str' starts with 'txt', NOT case-sensitive */
int  wstr_startswithi(const WCHAR *str, const WCHAR *txt)
{
    if (!str && !txt)
        return TRUE;
    if (!str || !txt)
        return FALSE;

    if (0 == _wcsnicmp(str, txt, wcslen(txt)))
        return TRUE;
    return FALSE;
}

int wstr_empty(const WCHAR *str)
{
    if (!str)
        return TRUE;
    if (0 == *str)
        return TRUE;
    return FALSE;
}

static void wchar_to_hex(WCHAR c, WCHAR* buffer)
{
    const WCHAR* numbers = L"0123456789ABCDEF";
    buffer[0]=numbers[c / 16];
    buffer[1]=numbers[c % 16];
}

int wstr_contains(const WCHAR *str, WCHAR c)
{
    while (*str) {
        if (c == *str++)
            return TRUE;
    }
    return FALSE;
}

static int wchar_is_ws(char c)
{
    switch (c) {
        case ' ':
        case '\t':
        case '\r':
        case '\n':
            return TRUE;
    }
    return FALSE;
}

/* Given a pointer to a string in '*txt', skip past whitespace in the string
   and put the result in '*txt' */
void wstr_skip_ws(WCHAR **txtInOut)
{
    WCHAR *cur;
    if (!txtInOut)
        return;
    cur = *txtInOut;
    if (!cur)
        return;
    while (wchar_is_ws(*cur)) {
        ++cur;
    }
    *txtInOut = cur;
}

#define WCHAR_URL_DONT_ENCODE L"-_.!~*'()"

int wchar_needs_url_encode(WCHAR c)
{
    if ((c >= L'a') && (c <= L'z'))
        return FALSE;
    if ((c >= L'A') && (c <= L'Z'))
        return FALSE;
    if ((c >= L'0') && (c <= L'9'))
        return FALSE;
    if (wstr_contains(WCHAR_URL_DONT_ENCODE, c))
        return FALSE;
    return TRUE;
}

WCHAR *wstr_url_encode(const WCHAR *str)
{
    WCHAR *         encoded;
    WCHAR *         result;
    int             res_len = 0;
    const WCHAR *   tmp = str;

    while (*tmp) {
        if (wchar_needs_url_encode(*tmp))
            res_len += 3;
        else
            ++res_len;
        tmp++;
    }
    if (0 == res_len)
        return NULL;

    encoded = (WCHAR*)malloc((res_len+1)*sizeof(WCHAR));
    if (!encoded)
        return NULL;

    result = encoded;
    tmp = str;
    while (*tmp) {
        if (wchar_needs_url_encode(*tmp)) {
            *encoded++ = L'%';
            wchar_to_hex(*tmp, encoded);
            encoded += 2;
        } else {
            if (L' ' == *tmp)
                *encoded++ = L'+';
            else
                *encoded++ = *tmp;
        }
        tmp++;
    }
    *encoded = 0;
    return result;
}

WCHAR *wstr_printf(const WCHAR *format, ...)
{
    va_list     args;
    WCHAR       message[256];
    WCHAR  *    buf;
    size_t      bufCchSize;

    buf = &(message[0]);
    bufCchSize = sizeof(message)/sizeof(message[0]);

    va_start(args, format);
    for (;;)
    {
    #ifdef __GNUC__
        if (vsnwprintf(buf, bufCchSize, format, args) < bufCchSize)
            break;
    #else
        HRESULT hr;
        hr = StringCchVPrintfW(buf, bufCchSize, format, args);
        if (S_OK == hr)
            break;
        if (STRSAFE_E_INSUFFICIENT_BUFFER != hr)
        {
            /* any error other than buffer not big enough:
               a) should not happen
               b) means we give up */
            assert(FALSE);
            goto Error;
        }
    #endif
        /* we have to make the buffer bigger. The algorithm used to calculate
           the new size is arbitrary (aka. educated guess) */
        if (buf != &(message[0]))
            free(buf);
        if (bufCchSize < 4*1024)
            bufCchSize += bufCchSize;
        else
            bufCchSize += 1024;
        buf = (WCHAR *)malloc(bufCchSize*sizeof(WCHAR));
        if (NULL == buf)
            goto Error;
    }
    va_end(args);

    /* free the buffer if it was dynamically allocated */
    if (buf == &(message[0]))
        return wstr_dup(buf);

    return buf;
Error:
    if (buf != &(message[0]))
        free((void*)buf);

    return NULL;
}


/* Find character 'c' in string 'txt'.
   Return pointer to this character or NULL if not found */
const WCHAR *wstr_find_char(const WCHAR *txt, WCHAR c)
{
    while (*txt != c) {
        if (0 == *txt)
            return NULL;
        ++txt;
    }
    return txt;
}

/* A simplistic (and potentially wrong) conversion from ascii to unicode by
   setting unicode character value to ascii code, without taking encoding
   into account. 
   TODO: This is a band-aid and all callers should be changed
   to use the right conversion, eventually.
   The caller needs to free() return value.
*/
WCHAR *str_to_wstr_simplistic(const char *s)
{
    WCHAR *tmp;
    WCHAR *ret;

    assert(s);
    ret = (WCHAR*)malloc(sizeof(WCHAR)*(strlen(s)+1));
    if (!ret)
        return NULL;
    tmp = ret;
    while (*s) {
        *tmp++ = *s++;
    }
    *tmp = 0;
    return ret;
}

/* Caller needs to free() the result */
char *wstr_to_utf8(const WCHAR *txt)
{
    char *res;
    int requiredBufSize = WideCharToMultiByte(CP_UTF8, 0, txt, -1, NULL, 0, NULL, NULL);
    res = (char*)malloc(requiredBufSize);
    if (!res)
        return NULL;
    WideCharToMultiByte(CP_UTF8, 0, txt, -1, res, requiredBufSize, NULL, NULL);
    return res;
}

/* Caller needs to free() the result */
WCHAR *utf8_to_wstr(const char *utf8)
{
    WCHAR *res;
    int requiredBufSize = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0);
    res = (WCHAR*)malloc(requiredBufSize * sizeof(WCHAR));
    if (!res)
        return NULL;
    MultiByteToWideChar(CP_UTF8, 0, utf8, -1, res, requiredBufSize);
    return res;
}

static WCHAR *wstr_parse_quoted(WCHAR **txt)
{
    WCHAR *     strStart;
    WCHAR *     strCopy;
    WCHAR *     cur;
    WCHAR *     dst;
    WCHAR       c;
    size_t      len;

    assert(txt);
    if (!txt) return NULL;
    strStart = *txt;
    assert(strStart);
    if (!strStart) return NULL;

    assert('"' == *strStart);
    /* TODO: rewrite as 2-phase logic so that counting and copying are always in sync */
    ++strStart;
    cur = strStart;
    len = 0;
    for (;;) {
        c = *cur;
        if ((0 == c) || ('"' == c))
            break;
        if ('\\' == c) {
            /* TODO: should I un-escape more than '"' ?
               I used to un-escape '\' as well, but it wasn't right and
               files with UNC path like "\\foo\file.pdf" failed to load */
            if ('"' == cur[1]) {
                ++cur;
                c = *cur;
            }
        }
        ++cur;
        ++len;
    }

    strCopy = (WCHAR *)malloc(sizeof(WCHAR)*(len+1));
    if (!strCopy)
        return NULL;

    cur = strStart;
    dst = strCopy;
    for (;;) {
        c = *cur;
        if (0 == c)
            break;
        if ('"' == c) {
            ++cur;
            break;
        }
        if ('\\' == c) {
            /* TODO: should I un-escape more than '"' ?
               I used to un-escape '\' as well, but it wasn't right and
               files with UNC path like "\\foo\file.pdf" failed to load */
            if ('"' == cur[1]) {
                ++cur;
                c = *cur;
            }
        }
        *dst++ = c;
        ++cur;
    }
    *dst = 0;
    *txt = cur;
    return strCopy;
}

static int wchar_is_ws_or_zero(WCHAR c)
{
    switch (c) {
        case ' ':
        case '\t':
        case '\r':
        case '\n':
        case 0:
            return TRUE;
    }
    return FALSE;
}

static WCHAR *wstr_parse_non_quoted(WCHAR **txt)
{
    WCHAR * cur;
    WCHAR * strStart;
    WCHAR * strCopy;
    WCHAR   c;
    size_t  strLen;

    strStart = *txt;
    assert(strStart);
    if (!strStart) return NULL;
    assert('"' != *strStart);
    cur = strStart;
    for (;;) {
        c = *cur;
        if (wchar_is_ws_or_zero(c))
            break;
        ++cur;
    }

    strLen = cur - strStart;
    assert(strLen > 0);
    strCopy = wstr_dupn(strStart, strLen);
    *txt = cur;
    return strCopy;
}

/* 'txt' is path that can be:
  - escaped, in which case it starts with '"', ends with '"' and each '"' that is part of the name is escaped
    with '\'
  - unescaped, in which case it start with != '"' and ends with ' ' or eol (0)
  This function extracts escaped or unescaped path from 'txt'. Returns NULL in case of error.
  Caller needs to free() the result. */
WCHAR *wstr_parse_possibly_quoted(WCHAR **txt)
{
    WCHAR *  cur;
    WCHAR *  str_copy;

    if (!txt)
        return NULL;
    cur = *txt;
    if (!cur)
        return NULL;

    wstr_skip_ws(&cur);
    if (0 == *cur)
        return NULL;
    if (L'"' == *cur)
        str_copy = wstr_parse_quoted(&cur);
    else
        str_copy = wstr_parse_non_quoted(&cur);
    *txt = cur;
    return str_copy;
}

static int hex_wchar_to_num(WCHAR c)
{
    if ((c >= '0') && (c <= '9'))
        return c - '0';
    if ((c >= 'a') && (c <= 'f'))
        return c - 'a' + 10;
    if ((c >= 'A') && (c <= 'F'))
        return c - 'A' + 10;
    return -1;
}

int hex_wstr_decode_byte(const WCHAR **txt)
{
    const WCHAR *s;
    int c1, c2;
    if (!txt) 
        return -1;
    s = *txt;
    c1 = hex_wchar_to_num(*s++);
    if (-1 == c1)
        return -1;
    c2 = hex_wchar_to_num(*s++);
    if (-1 == c2)
        return -1;
    *txt = s;
    return (16 * c1) + c2;
}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?