📄 mimeutility.java
字号:
* it is returned as-is. If the string contains non US-ASCII * characters, it is first character-encoded using the platform's * default charset, then transfer-encoded using either the B or * Q encoding. The resulting bytes are then returned as a Unicode * string containing only ASCII characters. <p> * * Note that this method should be used to encode only * "unstructured" RFC 822 headers. <p> * * Example of usage: * <p><blockquote><pre> * * MimePart part = ... * String rawvalue = "FooBar Mailer, Japanese version 1.1" * try { * // If we know for sure that rawvalue contains only US-ASCII * // characters, we can skip the encoding part * part.setHeader("X-mailer", MimeUtility.encodeText(rawvalue)); * } catch (UnsupportedEncodingException e) { * // encoding failure * } catch (MessagingException me) { * // setHeader() failure * } * * </pre></blockquote><p> * * @param text Unicode string * @return Unicode string containing only US-ASCII characters * @exception UnsupportedEncodingException if the encoding fails */ public static String encodeText(String text) throws UnsupportedEncodingException { return encodeText(text, null, null); } /** * Encode a RFC 822 "text" token into mail-safe form as per * RFC 2047. <p> * * The given Unicode string is examined for non US-ASCII * characters. If the string contains only US-ASCII characters, * it is returned as-is. If the string contains non US-ASCII * characters, it is first character-encoded using the specified * charset, then transfer-encoded using either the B or Q encoding. * The resulting bytes are then returned as a Unicode string * containing only ASCII characters. <p> * * Note that this method should be used to encode only * "unstructured" RFC 822 headers. * * @param text the header value * @param charset the charset. If this parameter is null, the * platform's default chatset is used. * @param encoding the encoding to be used. Currently supported * values are "B" and "Q". If this parameter is null, then * the "Q" encoding is used if most of characters to be * encoded are in the ASCII charset, otherwise "B" encoding * is used. * @return Unicode string containing only US-ASCII characters */ public static String encodeText(String text, String charset, String encoding) throws UnsupportedEncodingException { return encodeWord(text, charset, encoding, false); } /** * Decode "unstructured" headers, that is, headers that are defined * as '*text' as per RFC 822. <p> * * The string is decoded using the algorithm specified in * RFC 2047, Section 6.1. If the charset-conversion fails * for any sequence, an UnsupportedEncodingException is thrown. * If the String is not an RFC 2047 style encoded header, it is * returned as-is <p> * * Example of usage: * <p><blockquote><pre> * * MimePart part = ... * String rawvalue = null; * String value = null; * try { * if ((rawvalue = part.getHeader("X-mailer")[0]) != null) * value = MimeUtility.decodeText(rawvalue); * } catch (UnsupportedEncodingException e) { * // Don't care * value = rawvalue; * } catch (MessagingException me) { } * * return value; * * </pre></blockquote><p> * * @param etext the possibly encoded value * @exception UnsupportedEncodingException if the charset * conversion failed. */ public static String decodeText(String etext) throws UnsupportedEncodingException { /* * We look for sequences separated by "linear-white-space". * (as per RFC 2047, Section 6.1) * RFC 822 defines "linear-white-space" as SPACE | HT | CR | NL. */ String lwsp = " \t\n\r"; StringTokenizer st; /* * First, lets do a quick run thru the string and check * whether the sequence "=?" exists at all. If none exists, * we know there are no encoded-words in here and we can just * return the string as-is, without suffering thru the later * decoding logic. * This handles the most common case of unencoded headers * efficiently. */ if (etext.indexOf("=?") == -1) return etext; // Encoded words found. Start decoding ... st = new StringTokenizer(etext, lwsp, true); StringBuffer sb = new StringBuffer(); // decode buffer StringBuffer wsb = new StringBuffer(); // white space buffer boolean prevWasEncoded = false; while (st.hasMoreTokens()) { char c; String s = st.nextToken(); // If whitespace, append it to the whitespace buffer if (((c = s.charAt(0)) == ' ') || (c == '\t') || (c == '\r') || (c == '\n')) wsb.append(c); else { // Check if token is an 'encoded-word' .. String word; try { word = decodeWord(s); // Yes, this IS an 'encoded-word'. if (!prevWasEncoded && wsb.length() > 0) { // if the previous word was also encoded, we // should ignore the collected whitespace. Else // we include the whitespace as well. sb.append(wsb); } prevWasEncoded = true; } catch (ParseException pex) { // This is NOT an 'encoded-word'. word = s; // possibly decode inner encoded words if (!decodeStrict) { String dword = decodeInnerWords(word); if (dword != word) { // if a different String object was returned, // decoding was done. if (prevWasEncoded && word.startsWith("=?")) { // encoded followed by encoded, // throw away whitespace between } else { // include collected whitespace .. if (wsb.length() > 0) sb.append(wsb); } // did original end with encoded? prevWasEncoded = word.endsWith("?="); word = dword; } else { // include collected whitespace .. if (wsb.length() > 0) sb.append(wsb); prevWasEncoded = false; } } else { // include collected whitespace .. if (wsb.length() > 0) sb.append(wsb); prevWasEncoded = false; } } sb.append(word); // append the actual word wsb.setLength(0); // reset wsb for reuse } } sb.append(wsb); // append trailing whitespace return sb.toString(); } /** * Encode a RFC 822 "word" token into mail-safe form as per * RFC 2047. <p> * * The given Unicode string is examined for non US-ASCII * characters. If the string contains only US-ASCII characters, * it is returned as-is. If the string contains non US-ASCII * characters, it is first character-encoded using the platform's * default charset, then transfer-encoded using either the B or * Q encoding. The resulting bytes are then returned as a Unicode * string containing only ASCII characters. <p> * * This method is meant to be used when creating RFC 822 "phrases". * The InternetAddress class, for example, uses this to encode * it's 'phrase' component. * * @param word Unicode string * @return Array of Unicode strings containing only US-ASCII * characters. * @exception UnsupportedEncodingException if the encoding fails */ public static String encodeWord(String word) throws UnsupportedEncodingException { return encodeWord(word, null, null); } /** * Encode a RFC 822 "word" token into mail-safe form as per * RFC 2047. <p> * * The given Unicode string is examined for non US-ASCII * characters. If the string contains only US-ASCII characters, * it is returned as-is. If the string contains non US-ASCII * characters, it is first character-encoded using the specified * charset, then transfer-encoded using either the B or Q encoding. * The resulting bytes are then returned as a Unicode string * containing only ASCII characters. <p> * * @param word Unicode string * @param charset the MIME charset * @param encoding the encoding to be used. Currently supported * values are "B" and "Q". If this parameter is null, then * the "Q" encoding is used if most of characters to be * encoded are in the ASCII charset, otherwise "B" encoding * is used. * @return Unicode string containing only US-ASCII characters * @exception UnsupportedEncodingException if the encoding fails */ public static String encodeWord(String word, String charset, String encoding) throws UnsupportedEncodingException { return encodeWord(word, charset, encoding, true); } /* * Encode the given string. The parameter 'encodingWord' should * be true if a RFC 822 "word" token is being encoded and false if a * RFC 822 "text" token is being encoded. This is because the * "Q" encoding defined in RFC 2047 has more restrictions when * encoding "word" tokens. (Sigh) */ private static String encodeWord(String string, String charset, String encoding, boolean encodingWord) throws UnsupportedEncodingException { // If 'string' contains only US-ASCII characters, just // return it. int ascii = checkAscii(string); if (ascii == ALL_ASCII) return string; // Else, apply the specified charset conversion. String jcharset; if (charset == null) { // use default charset jcharset = getDefaultJavaCharset(); // the java charset charset = getDefaultMIMECharset(); // the MIME equivalent } else // MIME charset -> java charset jcharset = javaCharset(charset); // If no transfer-encoding is specified, figure one out. if (encoding == null) { if (ascii != MOSTLY_NONASCII) encoding = "Q"; else encoding = "B"; } boolean b64; if (encoding.equalsIgnoreCase("B")) b64 = true; else if (encoding.equalsIgnoreCase("Q")) b64 = false; else throw new UnsupportedEncodingException( "Unknown transfer encoding: " + encoding); StringBuffer outb = new StringBuffer(); // the output buffer doEncode(string, b64, jcharset, // As per RFC 2047, size of an encoded string should not // exceed 75 bytes. // 7 = size of "=?", '?', 'B'/'Q', '?', "?=" 75 - 7 - charset.length(), // the available space "=?" + charset + "?" + encoding + "?", // prefix true, encodingWord, outb); return outb.toString(); } private static void doEncode(String string, boolean b64, String jcharset, int avail, String prefix, boolean first, boolean encodingWord, StringBuffer buf) throws UnsupportedEncodingException { // First find out what the length of the encoded version of // 'string' would be. byte[] bytes = string.getBytes(jcharset); int len; if (b64) // "B" encoding len = BEncoderStream.encodedLength(bytes); else // "Q" len = QEncoderStream.encodedLength(bytes, encodingWord); int size; if ((len > avail) && ((size = string.length()) > 1)) { // If the length is greater than 'avail', split 'string' // into two and recurse. doEncode(string.substring(0, size/2), b64, jcharset, avail, prefix, first, encodingWord, buf); doEncode(string.substring(size/2, size), b64, jcharset, avail, prefix, false, encodingWord, buf); } else { // length <= than 'avail'. Encode the given string ByteArrayOutputStream os = new ByteArrayOutputStream(); OutputStream eos; // the encoder if (b64) // "B" encoding eos = new BEncoderStream(os); else // "Q" encoding eos = new QEncoderStream(os, encodingWord); try { // do the encoding eos.write(bytes); eos.close(); } catch (IOException ioex) { } byte[] encodedBytes = os.toByteArray(); // the encoded stuff // Now write out the encoded (all ASCII) bytes into our // StringBuffer if (!first) // not the first line of this sequence if (foldEncodedWords) buf.append("\r\n "); // start a continuation line else buf.append(" "); // line will be folded later buf.append(prefix); for (int i = 0; i < encodedBytes.length; i++) buf.append((char)encodedBytes[i]); buf.append("?="); // terminate the current sequence } } /** * The string is parsed using the rules in RFC 2047 for parsing * an "encoded-word". If the parse fails, a ParseException is * thrown. Otherwise, it is transfer-decoded, and then * charset-converted into Unicode. If the charset-conversion * fails, an UnsupportedEncodingException is thrown.<p> * * @param eword the encoded value * @exception ParseException if the string is not an * encoded-word as per RFC 2047. * @exception UnsupportedEncodingException if the charset * conversion failed. */ public static String decodeWord(String eword) throws ParseException, UnsupportedEncodingException { if (!eword.startsWith("=?")) // not an encoded word throw new ParseException( "encoded word does not start with \"=?\": " + eword); // get charset int start = 2; int pos; if ((pos = eword.indexOf('?', start)) == -1) throw new ParseException( "encoded word does not include charset: " + eword); String charset = javaCharset(eword.substring(start, pos)); // get encoding start = pos+1; if ((pos = eword.indexOf('?', start)) == -1) throw new ParseException( "encoded word does not include encoding: " + eword); String encoding = eword.substring(start, pos); // get encoded-sequence start = pos+1; if ((pos = eword.indexOf("?=", start)) == -1) throw new ParseException( "encoded word does not end with \"?=\": " + eword); /* * XXX - should include this, but leaving it out for compatibility... * if (decodeStrict && pos != eword.length() - 2) throw new ParseException( "encoded word does not end with \"?=\": " + eword);); */ String word = eword.substring(start, pos);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -