📄 util.c
字号:
/* * super.c * * Copyright (C) 1997 Martin von L鰓is * * The utf8 routines are copied from Python wstrop module, * Copyright (C) 1997 Martin von L鰓is */#include <errno.h>#include "ntfs.h"/* Converts a single wide character to a sequence of utf8 bytes. Returns the number of bytes, or 0 on error. */static intto_utf8(ntfs_u16 c,unsigned char* buf){ if(c==0) return 0; /* No support for embedded 0 runes */ if(c<0x80){ if(buf)buf[0]=c; return 1; } if(c<0x800){ if(buf){ buf[0] = 0xc0 | (c>>6); buf[1] = 0x80 | (c & 0x3f); } return 2; } if(c<0x10000){ if(buf){ buf[0] = 0xe0 | (c>>12); buf[1] = 0x80 | ((c>>6) & 0x3f); buf[2] = 0x80 | (c & 0x3f); } return 3; } /* We don't support characters above 0xFFFF in NTFS */ return 0;}/* Decodes a sequence of utf8 bytes into a single wide character. Returns the number of bytes consumed, or 0 on error */static intfrom_utf8(const unsigned char* str,ntfs_u16 *c){ int l=0,i; if(*str<0x80){ *c = *str; return 1; } if(*str<0xc0) /* lead byte must not be 10xxxxxx */ return 0; /* is c0 a possible lead byte? */ if(*str<0xe0){ /* 110xxxxx */ *c = *str & 0x1f; l=2; }else if(*str<0xf0){ /* 1110xxxx */ *c = *str & 0xf; l=3; }else if(*str<0xf8){ /* 11110xxx */ *c = *str & 7; l=4; }else /* We don't support characters above 0xFFFF in NTFS */ return 0; for(i=1;i<l;i++){ /* all other bytes must be 10xxxxxx */ if((str[i] & 0xc0) != 0x80) return 0; *c <<= 6; *c |= str[i] & 0x3f; } return l;}/* Converts wide string to UTF-8. Expects two in- and two out-parameters. Returns 0 on success, or error code. The caller has to free the result string. There is no support for UTF-16, yet */int ntfs_dupuni2utf8(ntfs_u16* in, int in_len,char **out,int *out_len){ int i,tmp; int len8; unsigned char *result; /* count the length of the resulting UTF-8 */ for(i=len8=0;i<in_len;i++){ tmp=to_utf8(in[i],0); if(!tmp) /* invalid character */ return EILSEQ; len8+=tmp; } *out=result=ntfs_malloc(len8+1); /* allow for zero-termination */ if(!result) return ENOMEM; result[len8]='\0'; *out_len=len8; for(i=len8=0;i<in_len;i++) len8+=to_utf8(in[i],result+len8); return 0;}/* Converts an UTF-8 sequence to a wide string. Same conventions as the previous function */int ntfs_duputf82uni(unsigned char* in, int in_len,ntfs_u16** out,int *out_len){ int i,tmp; int len16; ntfs_u16* result; ntfs_u16 wtmp; for(i=len16=0;i<in_len;i+=tmp,len16++){ tmp=from_utf8(in+i,&wtmp); if(!tmp) return EILSEQ; } *out=result=ntfs_malloc(2*(len16+1)); if(!result) return ENOMEM; result[len16]=0; *out_len=len16; for(i=len16=0;i<in_len;i+=tmp,len16++) tmp=from_utf8(in+i,result+len16); return 0;}/* See above. Produces ISO-8859-1 from wide strings */int ntfs_dupuni288591(ntfs_u16* in,int in_len,char** out,int *out_len){ int i; char *result; /* check for characters out of range */ for(i=0;i<in_len;i++) if(in[i]>=256) return EILSEQ; *out=result=ntfs_malloc(in_len+1); if(!result) return ENOMEM; result[in_len]='\0'; *out_len=in_len; for(i=0;i<in_len;i++) result[i]=in[i]; return 0;}/* See above */int ntfs_dup885912uni(unsigned char* in,int in_len,ntfs_u16 **out,int *out_len){ int i; ntfs_u16* result; *out=result=ntfs_malloc(2*in_len); if(!result) return ENOMEM; *out_len=in_len; for(i=0;i<in_len;i++) result[i]=in[i]; return 0;}/* Encodings dispatcher */int ntfs_encodeuni(ntfs_volume *vol,ntfs_u16 *in, int in_len, char **out, int *out_len){ switch(vol->nct){ case nct_utf8: return ntfs_dupuni2utf8(in,in_len,out,out_len); case nct_iso8859_1:return ntfs_dupuni288591(in,in_len,out,out_len); default:return EINVAL; /* unknown encoding */ }}int ntfs_decodeuni(ntfs_volume *vol,char *in, int in_len, ntfs_u16 **out, int *out_len){ switch(vol->nct){ case nct_utf8: return ntfs_duputf82uni(in,in_len,out,out_len); case nct_iso8859_1:return ntfs_dup885912uni(in,in_len,out,out_len); default:return EINVAL; }}/* * Local variables: * c-file-style: "linux" * End: */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -