⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 normalize.xs

📁 source of perl for linux application,
💻 XS
📖 第 1 页 / 共 2 页
字号:
MODULE = Unicode::Normalize	PACKAGE = Unicode::NormalizeSV*decompose(src, compat = &PL_sv_no)    SV * src    SV * compat  PROTOTYPE: $;$  PREINIT:    SV* dst;    U8 *s, *d, *dend;    STRLEN slen, dlen;  CODE:    s = (U8*)sv_2pvunicode(src,&slen);    dst = newSVpvn("", 0);    dlen = slen;    New(0, d, dlen+1, U8);    dend = pv_utf8_decompose(s, slen, &d, dlen, (bool)SvTRUE(compat));    sv_setpvn(dst, (char *)d, dend - d);    SvUTF8_on(dst);    Safefree(d);    RETVAL = dst;  OUTPUT:    RETVALSV*reorder(src)    SV * src  PROTOTYPE: $  PREINIT:    SV* dst;    U8 *s, *d, *dend;    STRLEN slen, dlen;  CODE:    s = (U8*)sv_2pvunicode(src,&slen);    dst = newSVpvn("", 0);    dlen = slen + UTF8_MAXLEN;    d = (U8*)SvGROW(dst,dlen+1);    SvUTF8_on(dst);    dend = pv_utf8_reorder(s, slen, d, dlen);    *dend = '\0';    SvCUR_set(dst, dend - d);    RETVAL = dst;  OUTPUT:    RETVALSV*compose(src)    SV * src  PROTOTYPE: $  ALIAS:    composeContiguous = 1  PREINIT:    SV* dst;    U8 *s, *d, *dend;    STRLEN slen, dlen;  CODE:    s = (U8*)sv_2pvunicode(src,&slen);    dst = newSVpvn("", 0);    dlen = slen + UTF8_MAXLEN;    d = (U8*)SvGROW(dst,dlen+1);    SvUTF8_on(dst);    dend = pv_utf8_compose(s, slen, d, dlen, (bool)ix);    *dend = '\0';    SvCUR_set(dst, dend - d);    RETVAL = dst;  OUTPUT:    RETVALSV*NFD(src)    SV * src  PROTOTYPE: $  ALIAS:    NFKD = 1  PREINIT:    SV *dst;    U8 *s, *t, *tend, *d, *dend;    STRLEN slen, tlen, dlen;  CODE:    /* decompose */    s = (U8*)sv_2pvunicode(src,&slen);    tlen = slen;    New(0, t, tlen+1, U8);    tend = pv_utf8_decompose(s, slen, &t, tlen, (bool)ix);    *tend = '\0';    tlen = tend - t; /* no longer know real tlen */    /* reorder */    dst = newSVpvn("", 0);    dlen = tlen + UTF8_MAXLEN;    d = (U8*)SvGROW(dst,dlen+1);    SvUTF8_on(dst);    dend = pv_utf8_reorder(t, tlen, d, dlen);    *dend = '\0';    SvCUR_set(dst, dend - d);    /* return */    Safefree(t);    RETVAL = dst;  OUTPUT:    RETVALSV*NFC(src)    SV * src  PROTOTYPE: $  ALIAS:    NFKC = 1    FCC  = 2  PREINIT:    SV *dst;    U8 *s, *t, *tend, *u, *uend, *d, *dend;    STRLEN slen, tlen, ulen, dlen;  CODE:    /* decompose */    s = (U8*)sv_2pvunicode(src,&slen);    tlen = slen;    New(0, t, tlen+1, U8);    tend = pv_utf8_decompose(s, slen, &t, tlen, (bool)(ix==1));    *tend = '\0';    tlen = tend - t; /* no longer know real tlen */    /* reorder */    ulen = tlen + UTF8_MAXLEN;    New(0, u, ulen+1, U8);    uend = pv_utf8_reorder(t, tlen, u, ulen);    *uend = '\0';    ulen = uend - u;    /* compose */    dst = newSVpvn("", 0);    dlen = ulen + UTF8_MAXLEN;    d = (U8*)SvGROW(dst,dlen+1);    SvUTF8_on(dst);    dend = pv_utf8_compose(u, ulen, d, dlen, (bool)(ix==2));    *dend = '\0';    SvCUR_set(dst, dend - d);    /* return */    Safefree(t);    Safefree(u);    RETVAL = dst;  OUTPUT:    RETVALSV*checkNFD(src)    SV * src  PROTOTYPE: $  ALIAS:    checkNFKD = 1  PREINIT:    STRLEN srclen, retlen;    U8 *s, *e, *p, curCC, preCC;    bool result = TRUE;  CODE:    s = (U8*)sv_2pvunicode(src,&srclen);    e = s + srclen;    preCC = 0;    for (p = s; p < e; p += retlen) {	UV uv = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF);	if (!retlen)	    croak(ErrRetlenIsZero, "checkNFD or -NFKD");	curCC = getCombinClass(uv);	if (preCC > curCC && curCC != 0) { /* canonical ordering violated */	    result = FALSE;	    break;	}	if (Hangul_IsS(uv) || (ix ? dec_compat(uv) : dec_canonical(uv))) {	    result = FALSE;	    break;	}	preCC = curCC;    }    RETVAL = boolSV(result);  OUTPUT:    RETVALSV*checkNFC(src)    SV * src  PROTOTYPE: $  ALIAS:    checkNFKC = 1  PREINIT:    STRLEN srclen, retlen;    U8 *s, *e, *p, curCC, preCC;    bool result = TRUE;    bool isMAYBE = FALSE;  CODE:    s = (U8*)sv_2pvunicode(src,&srclen);    e = s + srclen;    preCC = 0;    for (p = s; p < e; p += retlen) {	UV uv = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF);	if (!retlen)	    croak(ErrRetlenIsZero, "checkNFC or -NFKC");	curCC = getCombinClass(uv);	if (preCC > curCC && curCC != 0) { /* canonical ordering violated */	    result = FALSE;	    break;	}	/* get NFC/NFKC property */	if (Hangul_IsS(uv)) /* Hangul syllables are canonical composites */	    ; /* YES */	else if (isExclusion(uv) || isSingleton(uv) || isNonStDecomp(uv)) {	    result = FALSE;	    break;	}	else if (isComp2nd(uv))	    isMAYBE = TRUE;	else if (ix) {	    char *canon, *compat;	  /* NFKC_NO when having compatibility mapping. */	    canon  = (char *) dec_canonical(uv);	    compat = (char *) dec_compat(uv);	    if (compat && !(canon && strEQ(canon, compat))) {		result = FALSE;		break;	    }	} /* end of get NFC/NFKC property */	preCC = curCC;    }    if (isMAYBE && result) /* NO precedes MAYBE */	XSRETURN_UNDEF;    RETVAL = boolSV(result);  OUTPUT:    RETVALSV*checkFCD(src)    SV * src  PROTOTYPE: $  ALIAS:    checkFCC = 1  PREINIT:    STRLEN srclen, retlen;    U8 *s, *e, *p, curCC, preCC;    bool result = TRUE;    bool isMAYBE = FALSE;  CODE:    s = (U8*)sv_2pvunicode(src,&srclen);    e = s + srclen;    preCC = 0;    for (p = s; p < e; p += retlen) {	U8 *sCan;	UV uvLead;	STRLEN canlen = 0;	UV uv = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF);	if (!retlen)	    croak(ErrRetlenIsZero, "checkFCD or -FCC");	sCan = (U8*) dec_canonical(uv);	if (sCan) {	    STRLEN canret;	    canlen = (STRLEN)strlen((char *) sCan);	    uvLead = utf8n_to_uvuni(sCan, canlen, &canret, AllowAnyUTF);	    if (!canret)		croak(ErrRetlenIsZero, "checkFCD or -FCC");	}	else {	    uvLead = uv;	}	curCC = getCombinClass(uvLead);	if (curCC != 0 && curCC < preCC) { /* canonical ordering violated */	    result = FALSE;	    break;	}	if (ix) {	    if (isExclusion(uv) || isSingleton(uv) || isNonStDecomp(uv)) {		result = FALSE;		break;	    }	    else if (isComp2nd(uv))		isMAYBE = TRUE;	}	if (sCan) {	    STRLEN canret;	    UV uvTrail;	    U8* eCan = sCan + canlen;	    U8* pCan = utf8_hop(eCan, -1);	    if (pCan < sCan)		croak(ErrHopBeforeStart);	    uvTrail = utf8n_to_uvuni(pCan, eCan - pCan, &canret, AllowAnyUTF);	    if (!canret)		croak(ErrRetlenIsZero, "checkFCD or -FCC");	    preCC = getCombinClass(uvTrail);	}	else {	    preCC = curCC;	}    }    if (isMAYBE && result) /* NO precedes MAYBE */	XSRETURN_UNDEF;    RETVAL = boolSV(result);  OUTPUT:    RETVALU8getCombinClass(uv)    UV uv  PROTOTYPE: $boolisExclusion(uv)    UV uv  PROTOTYPE: $boolisSingleton(uv)    UV uv  PROTOTYPE: $boolisNonStDecomp(uv)    UV uv  PROTOTYPE: $boolisComp2nd(uv)    UV uv  PROTOTYPE: $  ALIAS:    isNFC_MAYBE  = 1    isNFKC_MAYBE = 2SV*isNFD_NO(uv)    UV uv  PROTOTYPE: $  ALIAS:    isNFKD_NO = 1  PREINIT:    bool result = FALSE;  CODE:    if (Hangul_IsS(uv) || (ix ? dec_compat(uv) : dec_canonical(uv)))	result = TRUE; /* NFD_NO or NFKD_NO */    RETVAL = boolSV(result);  OUTPUT:    RETVALSV*isComp_Ex(uv)    UV uv  PROTOTYPE: $  ALIAS:    isNFC_NO  = 0    isNFKC_NO = 1  PREINIT:    bool result = FALSE;  CODE:    if (isExclusion(uv) || isSingleton(uv) || isNonStDecomp(uv))	result = TRUE; /* NFC_NO or NFKC_NO */    else if (ix) {	char *canon, *compat;	canon  = (char *) dec_canonical(uv);	compat = (char *) dec_compat(uv);	if (compat && (!canon || strNE(canon, compat)))	    result = TRUE; /* NFC_NO or NFKC_NO */    }    RETVAL = boolSV(result);  OUTPUT:    RETVALSV*getComposite(uv, uv2)    UV uv    UV uv2  PROTOTYPE: $$  PREINIT:    UV composite;  CODE:    composite = composite_uv(uv, uv2);    RETVAL = composite ? newSVuv(composite) : &PL_sv_undef;  OUTPUT:    RETVALSV*getCanon(uv)    UV uv  PROTOTYPE: $  ALIAS:    getCompat = 1  CODE:    if (Hangul_IsS(uv)) {	U8 tmp[3 * UTF8_MAXLEN + 1];	U8 *t = tmp;	U8 *e = pv_cat_decompHangul(t, uv);	RETVAL = newSVpvn((char *)t, e - t);    } else {	U8* rstr = ix ? dec_compat(uv) : dec_canonical(uv);	if (!rstr)	    XSRETURN_UNDEF;	RETVAL = newSVpvn((char *)rstr, strlen((char *)rstr));    }    SvUTF8_on(RETVAL);  OUTPUT:    RETVALvoidsplitOnLastStarter(src)    SV * src  PREINIT:    SV *svp;    STRLEN srclen;    U8 *s, *e, *p;  PPCODE:    s = (U8*)sv_2pvunicode(src,&srclen);    e = s + srclen;    p = e;    while (s < p) {	UV uv;	p = utf8_hop(p, -1);	if (p < s)	    croak(ErrHopBeforeStart);	uv = utf8n_to_uvuni(p, e - p, NULL, AllowAnyUTF);	if (getCombinClass(uv) == 0) /* Last Starter found */	    break;    }    svp = sv_2mortal(newSVpvn((char*)s, p - s));    SvUTF8_on(svp);    XPUSHs(svp);    svp = sv_2mortal(newSVpvn((char*)p, e - p));    SvUTF8_on(svp);    XPUSHs(svp);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -