📄 importreaddata.java
字号:
//if found white space characters so far, the following if will be true if ((positionOfNonWhiteSpaceCharInFront + 1) == totalCharsSoFar && ((!foundStartDelimiter) && (!foundStartAndStopDelimiters) )) { char currentChar = currentToken[positionOfNonWhiteSpaceCharInFront]; if (//currentChar == '\t' || //currentChar == '\r' || alc: why isn't this included? // alc: BTW, \r and \n should be replaced // or amended with the first char of line.separator... //currentChar == '\n' || //currentChar == ' ') { // use String.trim()'s definition of whitespace. // i18n - check for whitespace - avoid doing a hard coded character // check and use the isWhitespace method to cover all the Unicode // options Character.isWhitespace(currentChar) == true) { if ((recordSepStartNotWhite || (currentChar != recordSeparatorChar0)) && (fieldSepStartNotWhite || (currentChar != fieldSeparatorChar0))) //disregard if whitespace char is same as separator first char positionOfNonWhiteSpaceCharInFront++; } } } //look for white spaces from the back towards the stop delimiter position. //If there was no startdelimite & stopdelimiter combination, then we start from the back //all the way to the beginning and stop when we find non-white char //positionOfNonWhiteSpaceCharInBack keeps the count of whitespaces at the back private void checkForWhiteSpaceInBack() { boolean onlyWhiteSpaceSoFar = true; positionOfNonWhiteSpaceCharInBack = 0; for (int i = totalCharsSoFar; (i > stopDelimiterPosition) && onlyWhiteSpaceSoFar; i--) { char currentChar = currentToken[i]; // replace test on \t,\n,' ' with String.trim's definition of white space // i18n - check for whitespace - avoid doing a hard coded character // check and use the isWhitespace method to cover all the Unicode // options if (Character.isWhitespace(currentChar)==true) { if ((recordSepStartNotWhite || (currentChar != recordSeparatorChar0)) && (fieldSepStartNotWhite || (currentChar != fieldSeparatorChar0))) //disregard if whitespace char is same as separator first char positionOfNonWhiteSpaceCharInBack++; } else onlyWhiteSpaceSoFar = false; } } //keep looking for field and record separators simultaneously because we don't yet //know how many columns make up a row in this data file. Stop as soon as we get //the record separator which is indicated by a return value of true from this function boolean readTokensUntilEndOfRecord() throws Exception { int nextChar; int fieldSeparatorIndex = 0; int recordSeparatorIndex = 0; fieldStopDelimiterIndex = 0; fieldStartDelimiterIndex = 0; totalCharsSoFar = 0; //at the start of every new token, make white space in front count 0 positionOfNonWhiteSpaceCharInFront = 0; foundStartDelimiter = false; foundStartAndStopDelimiters = false; numberOfCharsReadSoFar = 0; while (true) { nextChar = bufferedReader.read(); if (nextChar == -1) return true; numberOfCharsReadSoFar++; //read the character into the token holder. If token holder reaches it's capacity, //double it's capacity currentToken[totalCharsSoFar++] = (char)nextChar; //check if character read is white space char in front checkForWhiteSpaceInFront(); if (totalCharsSoFar == currentTokenMaxSize) { currentTokenMaxSize = currentTokenMaxSize * 2; char[] tempArray = new char[currentTokenMaxSize]; System.arraycopy(currentToken, 0, tempArray, 0, totalCharsSoFar); currentToken = tempArray; } //see if we can find fieldSeparator fieldSeparatorIndex = lookForPassedSeparator(fieldSeparator, fieldSeparatorIndex, fieldSeparatorLength, nextChar, false); //every time we find a column separator, the return false will indicate that count //this token as column data value and keep lookin for more tokens or record //separator if (fieldSeparatorIndex == -1) return false; //if found start delimiter, then don't look for record separator, just look for //end delimiter if (!foundStartDelimiter ) { //see if we can find recordSeparator recordSeparatorIndex = lookForPassedSeparator(recordSeparator, recordSeparatorIndex, recordSeparatorLength, nextChar, true); if (recordSeparatorIndex == -1) return true; } } } //if not inside a start delimiter, then look for the delimiter passed //else look for stop delimiter first. //this routine returns -1 if it finds field delimiter or record delimiter private int lookForPassedSeparator(char[] delimiter, int delimiterIndex, int delimiterLength, int nextChar, boolean lookForRecordSeperator) throws IOException { //foundStartDelimiter will be false if we haven't found a start delimiter yet //if we haven't found startdelimiter, then we look for both start delimiter //and passed delimiter(which can be field or record delimiter). If we do find //start delimiter, then we only look for stop delimiter and not the passed delimiter. if (!foundStartDelimiter ) { //look for start delimiter only if it's length is non-zero and only if haven't already //found it at all so far. if (fieldStartDelimiterLength != 0 && (!foundStartAndStopDelimiters) ) { //the code inside following if will be executed only if we have gone past all the //white characters in the front. if (totalCharsSoFar != positionOfNonWhiteSpaceCharInFront && (totalCharsSoFar - positionOfNonWhiteSpaceCharInFront) <= fieldStartDelimiterLength) { //After getting rid of white spaces in front, look for the start delimiter. If //found, set foundStartDelimiter flag. if (nextChar == fieldStartDelimiter[fieldStartDelimiterIndex]){ fieldStartDelimiterIndex++; if (fieldStartDelimiterIndex == fieldStartDelimiterLength) { foundStartDelimiter = true; //since characters read so far are same as start delimiters, discard those chars totalCharsSoFar = 0; positionOfNonWhiteSpaceCharInFront = 0; return 0; } } else { //found a mismatch for the start delimiter //see if found match for more than one char of this start delimiter before the //current mismatch, if so check the remaining chars agains //eg if stop delimiter is xa and data is xxa if (fieldStartDelimiterIndex > 0) { reCheckRestOfTheCharacters(totalCharsSoFar-fieldStartDelimiterIndex, fieldStartDelimiter, fieldStartDelimiterLength); } } } } /*look for typical record seperators line feed (\n), a carriage return * (\r) or a carriage return followed by line feed (\r\n) */ if(lookForRecordSeperator) { if(nextChar == '\r' || nextChar == '\n') { recordSeparatorChar0 = (char) nextChar; if(nextChar == '\r' ) { //omot the line feed character if it exists in the stream omitLineFeed(); } totalCharsSoFar = totalCharsSoFar - 1 ; return -1; } return delimiterIndex; } //look for passed delimiter if (nextChar == delimiter[delimiterIndex]) { delimiterIndex++; if (delimiterIndex == delimiterLength) { //found passed delimiter totalCharsSoFar = totalCharsSoFar - delimiterLength; return -1; } return delimiterIndex; //this number of chars of delimiter have exact match so far } else { //found a mismatch for the delimiter //see if found match for more than one char of this delimiter before the //current mismatch, if so check the remaining chars agains //eg if delimiter is xa and data is xxa if (delimiterIndex > 0) return(reCheckRestOfTheCharacters(totalCharsSoFar-delimiterIndex, delimiter, delimiterLength)); } } else { //see if we can find fieldStopDelimiter if (nextChar == fieldStopDelimiter[fieldStopDelimiterIndex]) { fieldStopDelimiterIndex++; if (fieldStopDelimiterIndex == fieldStopDelimiterLength) { boolean skipped = skipDoubleDelimiters(fieldStopDelimiter); if(!skipped) { foundStartDelimiter = false; //found stop delimiter, discard the chars corresponding to stop delimiter totalCharsSoFar = totalCharsSoFar - fieldStopDelimiterLength; //following is to take care of a case like "aa"aa This will result in an //error. Also a case like "aa" will truncate it to just aa stopDelimiterPosition = totalCharsSoFar; //following is used to distinguish between empty string ,"", and null string ,, foundStartAndStopDelimiters = true; }else { fieldStopDelimiterIndex =0 ; } return 0; } return 0; } else { //found a mismatch for the stop delimiter //see if found match for more than one char of this stop delimiter before the //current mismatch, if so check the remaining chars agains //eg if stop delimiter is xa and data is xxa if (fieldStopDelimiterIndex > 0) { reCheckRestOfTheCharacters(totalCharsSoFar-fieldStopDelimiterIndex, fieldStopDelimiter, fieldStopDelimiterLength); return 0; } } } return 0; } //If after finding a few matching characters for a delimiter, find a mismatch, //restart the matching process from character next to the one from which you //were in the process of finding the matching pattern private int reCheckRestOfTheCharacters(int startFrom, char[] delimiter, int delimiterLength) { int delimiterIndex = 0; // alc: need to test delim of abab with abaabab // if delimIndex resets to 0, i probably needs to reset to // (an ever increasing) startFrom=startFrom+1, not stay where it is for (int i = startFrom; i<totalCharsSoFar; i++) { if (currentToken[i] == delimiter[delimiterIndex]) delimiterIndex++; else delimiterIndex = 0; } return delimiterIndex; } /* * skips the duplicate delimeter characters inserd character stringd ata * to get the original string. In Double Delimter recognigation Delimiter * Format strings are written with a duplicate delimeter if a delimiter is * found inside the data while exporting. * For example with double quote(") as character delimiter * * "What a ""nice""day!" * * will be imported as: * * What a "nice"day! * * In the case of export, the rule applies in reverse. For example, * * I am 6"tall. * * will be exported to a file as: * * "I am 6""tall." */ private boolean skipDoubleDelimiters(char [] characterDelimiter) throws IOException { boolean skipped = true; int cDelLength = characterDelimiter.length ; bufferedReader.mark(cDelLength); for(int i = 0 ; i < cDelLength ; i++) { int nextChar = bufferedReader.read(); if(nextChar != characterDelimiter[i]) { //not a double delimter case bufferedReader.reset(); skipped = false; break; } } return skipped; } //omit the line feed character(\n) private void omitLineFeed() throws IOException { bufferedReader.mark(1); int nextChar = bufferedReader.read(); if(nextChar != '\n') { //not a Line Feed bufferedReader.reset(); } } /**returns the number of the current row */ int getCurrentRowNumber() {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -