⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 archiveutils.java

📁 高性能分词算法
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
     * timestamp: E.g. '20010909014640' or '20010101' or '1970'.     * @return A date.     * @see #secondsSinceEpoch(String)     * @throws ParseException      */    public static Date getSecondsSinceEpoch(String timestamp)    throws ParseException {        if (timestamp.length() < 14) {            if (timestamp.length() < 10 && (timestamp.length() % 2) == 1) {                throw new IllegalArgumentException("Must have year, " +                    "month, date, hour or second granularity: " + timestamp);            }            if (timestamp.length() == 4) {                // Add first month and first date.                timestamp = timestamp + "01010000";            }            if (timestamp.length() == 6) {                // Add a date of the first.                timestamp = timestamp + "010000";            }            if (timestamp.length() < 14) {                timestamp = timestamp +                    ArchiveUtils.padTo("", 14 - timestamp.length(), '0');            }        }        return ArchiveUtils.parse14DigitDate(timestamp);    }        /**     * @param i Integer to add prefix of zeros too.  If passed     * 2005, will return the String <code>0000002005</code>. String     * width is the width of Integer.MAX_VALUE as a string (10     * digits).     * @return Padded String version of <code>i</code>.     */    public static String zeroPadInteger(int i) {        return ArchiveUtils.padTo(Integer.toString(i),                MAX_INT_CHAR_WIDTH, '0');    }    /**      * Convert an <code>int</code> to a <code>String</code>, and pad it to     * <code>pad</code> spaces.     * @param i the int     * @param pad the width to pad to.     * @return String w/ padding.     */    public static String padTo(final int i, final int pad) {        String n = Integer.toString(i);        return padTo(n, pad);    }        /**      * Pad the given <code>String</code> to <code>pad</code> characters wide     * by pre-pending spaces.  <code>s</code> should not be <code>null</code>.     * If <code>s</code> is already wider than <code>pad</code> no change is     * done.     *     * @param s the String to pad     * @param pad the width to pad to.     * @return String w/ padding.     */    public static String padTo(final String s, final int pad) {        return padTo(s, pad, DEFAULT_PAD_CHAR);    }    /**      * Pad the given <code>String</code> to <code>pad</code> characters wide     * by pre-pending <code>padChar</code>.     *      * <code>s</code> should not be <code>null</code>. If <code>s</code> is     * already wider than <code>pad</code> no change is done.     *     * @param s the String to pad     * @param pad the width to pad to.     * @param padChar The pad character to use.     * @return String w/ padding.     */    public static String padTo(final String s, final int pad,            final char padChar) {        String result = s;        int l = s.length();        if (l < pad) {            StringBuffer sb = new StringBuffer(pad);            while(l < pad) {                sb.append(padChar);                l++;            }            sb.append(s);            result = sb.toString();        }        return result;    }    /** check that two byte arrays are equal.  They may be <code>null</code>.     *     * @param lhs a byte array     * @param rhs another byte array.     * @return <code>true</code> if they are both equal (or both     * <code>null</code>)     */    public static boolean byteArrayEquals(final byte[] lhs, final byte[] rhs) {        if (lhs == null && rhs != null || lhs != null && rhs == null) {            return false;        }        if (lhs==rhs) {            return true;        }        if (lhs.length != rhs.length) {            return false;        }        for(int i = 0; i<lhs.length; i++) {            if (lhs[i]!=rhs[i]) {                return false;            }        }        return true;    }    /**     * Converts a double to a string.     * @param val The double to convert     * @param precision How many characters to include after '.'     * @return the double as a string.     */    public static String doubleToString(double val, int maxFractionDigits){        return doubleToString(val, maxFractionDigits, 0);    }    private static String doubleToString(double val, int maxFractionDigits, int minFractionDigits) {        NumberFormat f = NumberFormat.getNumberInstance(Locale.US);         f.setMaximumFractionDigits(maxFractionDigits);        f.setMinimumFractionDigits(minFractionDigits);        return f.format(val);     }    /**     * Takes a byte size and formats it for display with 'friendly' units.      * <p>     * This involves converting it to the largest unit      * (of B, KB, MB, GB, TB) for which the amount will be > 1.     * <p>     * Additionally, at least 2 significant digits are always displayed.      * <p>     * Displays as bytes (B): 0-1023     * Displays as kilobytes (KB): 1024 - 2097151 (~2Mb)     * Displays as megabytes (MB): 2097152 - 4294967295 (~4Gb)     * Displays as gigabytes (GB): 4294967296 - infinity     * <p>     * Negative numbers will be returned as '0 B'.     *     * @param amount the amount of bytes     * @return A string containing the amount, properly formated.     */    public static String formatBytesForDisplay(long amount) {        double displayAmount = (double) amount;        int unitPowerOf1024 = 0;         if(amount <= 0){            return "0 B";        }                while(displayAmount>=1024 && unitPowerOf1024 < 4) {            displayAmount = displayAmount / 1024;            unitPowerOf1024++;        }                // TODO: get didactic, make these KiB, MiB, GiB, TiB        final String[] units = { " B", " KB", " MB", " GB", " TB" };                // ensure at least 2 significant digits (#.#) for small displayValues        int fractionDigits = (displayAmount < 10) ? 1 : 0;         return doubleToString(displayAmount, fractionDigits, fractionDigits)                    + units[unitPowerOf1024];    }    /**     * Convert milliseconds value to a human-readable duration     * @param time     * @return Human readable string version of passed <code>time</code>     */    public static String formatMillisecondsToConventional(long time) {        return formatMillisecondsToConventional(time,true);    }        /**     * Convert milliseconds value to a human-readable duration     * @param time     * @param toMs whether to print to the ms     * @return Human readable string version of passed <code>time</code>     */    public static String formatMillisecondsToConventional(long time, boolean toMs) {        StringBuffer sb = new StringBuffer();        if(time<0) {            sb.append("-");        }        long absTime = Math.abs(time);        if(!toMs && absTime < 1000) {            return "0s";        }        if(absTime > DAY_IN_MS) {            // days            sb.append(absTime / DAY_IN_MS + "d");            absTime = absTime % DAY_IN_MS;        }        if (absTime > HOUR_IN_MS) {            //got hours.            sb.append(absTime / HOUR_IN_MS + "h");            absTime = absTime % HOUR_IN_MS;        }        if (absTime > 60000) {            sb.append(absTime / 60000 + "m");            absTime = absTime % 60000;        }        if (absTime > 1000) {            sb.append(absTime / 1000 + "s");            absTime = absTime % 1000;        }        if(toMs) {            sb.append(absTime + "ms");        }        return sb.toString();    }    /**     * Generate a long UID based on the given class and version number.     * Using this instead of the default will assume serialization     * compatibility across class changes unless version number is     * intentionally bumped.     *     * @param class1     * @param version     * @return UID based off class and version number.     */    public static long classnameBasedUID(Class class1, int version) {        String callingClassname = class1.getName();        return (long)callingClassname.hashCode() << 32 + version;    }        /**     * Copy the raw bytes of a long into a byte array, starting at     * the specified offset.     *      * @param l     * @param array     * @param offset     */    public static void longIntoByteArray(long l, byte[] array, int offset) {        int i, shift;                          for(i = 0, shift = 56; i < 8; i++, shift -= 8)        array[offset+i] = (byte)(0xFF & (l >> shift));    }        public static long byteArrayIntoLong(byte [] bytearray) {        return byteArrayIntoLong(bytearray, 0);    }        /**     * Byte array into long.     * @param bytearray Array to convert to a long.     * @param offset Offset into array at which we start decoding the long.     * @return Long made of the bytes of <code>array</code> beginning at     * offset <code>offset</code>.     * @see #longIntoByteArray(long, byte[], int)     */    public static long byteArrayIntoLong(byte [] bytearray,            int offset) {        long result = 0;        for (int i = offset; i < 8 /*Bytes in long*/; i++) {            result = (result << 8 /*Bits in byte*/) |                (0xff & (byte)(bytearray[i] & 0xff));        }        return result;    }    /**     * Given a string that may be a plain host or host/path (without     * URI scheme), add an implied http:// if necessary.      *      * @param u string to evaluate     * @return string with http:// added if no scheme already present     */    public static String addImpliedHttpIfNecessary(String u) {        if(u.indexOf(':') == -1 || u.indexOf('.') < u.indexOf(':')) {            // No scheme present; prepend "http://"            u = "http://" + u;        }        return u;    }    /**     * Verify that the array begins with the prefix.      *      * @param array     * @param prefix     * @return true if array is identical to prefix for the first prefix.length     * positions      */    public static boolean startsWith(byte[] array, byte[] prefix) {        if(prefix.length>array.length) {            return false;        }        for(int i = 0; i < prefix.length; i++) {            if(array[i]!=prefix[i]) {                return false;             }        }        return true;     }    /**     * Utility method to get a String singleLineReport from Reporter     * @param rep  Reporter to get singleLineReport from     * @return String of report     */    public static String singleLineReport(Reporter rep) {        StringWriter sw = new StringWriter();        PrintWriter pw = new PrintWriter(sw);        try {            rep.singleLineReportTo(pw);        } catch (IOException e) {            // not really possible            e.printStackTrace();        }        pw.flush();        return sw.toString();    }    /**     * Compose the requested report into a String. DANGEROUS IF REPORT     * CAN BE LARGE.     *      * @param rep Reported     * @param name String name of report to compose     * @return String of report     */    public static String writeReportToString(Reporter rep, String name) {        StringWriter sw = new StringWriter();        PrintWriter pw = new PrintWriter(sw);        rep.reportTo(name,pw);        pw.flush();        return sw.toString();    }        public static Set<String> TLDS;        static {        TLDS = new HashSet<String>();        // from http://data.iana.org/TLD/tlds-alpha-by-domain.txt        // # Version 2008071601, Last Updated Thu Jul 17 08:07:01 2008 UTC        String[] tldsArray = { "AC", "AD", "AE", "AERO", "AF", "AG", "AI",                "AL", "AM", "AN", "AO", "AQ", "AR", "ARPA", "AS", "ASIA", "AT",                "AU", "AW", "AX", "AZ", "BA", "BB", "BD", "BE", "BF", "BG",                "BH", "BI", "BIZ", "BJ", "BM", "BN", "BO", "BR", "BS", "BT",                "BV", "BW", "BY", "BZ", "CA", "CAT", "CC", "CD", "CF", "CG",                "CH", "CI", "CK", "CL", "CM", "CN", "CO", "COM", "COOP", "CR",                "CU", "CV", "CX", "CY", "CZ", "DE", "DJ", "DK", "DM", "DO",                "DZ", "EC", "EDU", "EE", "EG", "ER", "ES", "ET", "EU", "FI",                "FJ", "FK", "FM", "FO", "FR", "GA", "GB", "GD", "GE", "GF",                "GG", "GH", "GI", "GL", "GM", "GN", "GOV", "GP", "GQ", "GR",                "GS", "GT", "GU", "GW", "GY", "HK", "HM", "HN", "HR", "HT",                "HU", "ID", "IE", "IL", "IM", "IN", "INFO", "INT", "IO", "IQ",                "IR", "IS", "IT", "JE", "JM", "JO", "JOBS", "JP", "KE", "KG",                "KH", "KI", "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA",                "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", "LV", "LY",                "MA", "MC", "MD", "ME", "MG", "MH", "MIL", "MK", "ML", "MM",                "MN", "MO", "MOBI", "MP", "MQ", "MR", "MS", "MT", "MU",                "MUSEUM", "MV", "MW", "MX", "MY", "MZ", "NA", "NAME", "NC",                "NE", "NET", "NF", "NG", "NI", "NL", "NO", "NP", "NR", "NU",                "NZ", "OM", "ORG", "PA", "PE", "PF", "PG", "PH", "PK", "PL",                "PM", "PN", "PR", "PRO", "PS", "PT", "PW", "PY", "QA", "RE",                "RO", "RS", "RU", "RW", "SA", "SB", "SC", "SD", "SE", "SG",                "SH", "SI", "SJ", "SK", "SL", "SM", "SN", "SO", "SR", "ST",                "SU", "SV", "SY", "SZ", "TC", "TD", "TEL", "TF", "TG", "TH",                "TJ", "TK", "TL", "TM", "TN", "TO", "TP", "TR", "TRAVEL", "TT",                "TV", "TW", "TZ", "UA", "UG", "UK", "US", "UY", "UZ", "VA",                "VC", "VE", "VG", "VI", "VN", "VU", "WF", "WS", "XN--0ZWM56D",                "XN--11B5BS3A9AJ6G", "XN--80AKHBYKNJ4F", "XN--9T4B11YI5A",                "XN--DEBA0AD", "XN--G6W251D", "XN--HGBK6AJ7F53BBA",                "XN--HLCJ6AYA9ESC7A", "XN--JXALPDLP", "XN--KGBECHTV",                "XN--ZCKZAH", "YE", "YT", "YU", "ZA", "ZM", "ZW" };        TLDS.addAll(Arrays.asList(tldsArray));    }    /**     * Return whether the given string represents a known      * top-level-domain (like "com", "org", etc.) per IANA     * as of 2008071601.      *      * @param dom candidate string     * @return boolean true if recognized as TLD     */    public static boolean isTld(String dom) {        return TLDS.contains(dom.toUpperCase());    }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -