htmlrepairer.java

来自「linux下建立JAVA虚拟机的源码KAFFE」· Java 代码 · 共 675 行 · 第 1/2 页

JAVA
675
字号
	       break;
	    }
	 }


	 if (!isAtomTag) {

	    // check whether this open tag is equal to the topmost
	    // entry on the stack; if yes, emit a close tag first
	    
	    // corrects stuff like '<tr><td>...<td>...');
	    if (!tagStack.isEmpty() && tagStack.peek().equals(tagName)) {
	       printWarning("Inserting </"+tagName+">");
	       output.append("</"+tagName+">");
	       tagStack.pop();
	    }
	    else {
	       processKnownChildTags(tagName, tagStack, output);
	    }

	    // otherwise, we assume there are no close tags required 
	    // before this open tag.
	    tagStack.push(tagName);

	    output.append("<"+tagName+newAttributes+">");
	 }
	 else {
	    output.append("<"+tagName+newAttributes+"/>");
	 }
      }
   }

   private boolean processKnownChildTags(String tagName, Stack tagStack, StringBuffer output) {

      TagInfo tagInfo = (TagInfo)tagInfoMap.get(tagName);
      if (null != tagInfo) {

	 String parentTag = null;
	 for (Enumeration en = tagStack.elements(); en.hasMoreElements(); ) {
	    String tag = (String)en.nextElement();
	    if (tagInfo.isLegalParentTag(tag)) {
	       parentTag = tag;
	       break;
	    }
	 }
	 if (parentTag != null) {
	    while (((String)tagStack.peek()) != parentTag) {
	       String poppedTagName = (String)tagStack.pop();
	       output.append("</"+poppedTagName+">");
	       printWarning("Inserting </"+poppedTagName+">");
	    }
	    return true;
	 }
      }
      return false;
   }

   private void flush() {
      
      // close all pending tags
      while (!tagStack.isEmpty()) {
	 String tagName = (String)tagStack.pop();
	 printWarning("Inserting </"+tagName+">");
	 output.append("</"+tagName+">");
      }
   }

   /**
    *  Takes HTML fragment and returns a well-formed XHTML
    *  equivalent.
    *
    *  In the returned String, all tags are properly closed and
    *  nested.
    *
    *  Currently, the returned String is not guaranteed to be
    *  well-formed. In particular there are no checks on the tag
    *  names, attribute names and entity names.  
    */
   public String getWellformedHTML(String text) {

      final int	STATE_INITIAL	      = 1;
      final int	STATE_TAG_START	      = 2;
      final int	STATE_TAG	      = 3;
      final int	STATE_TAG_DOUBLEQUOTE = 4;
      final int	STATE_TAG_SINGLEQUOTE = 5;
      final int	STATE_AMP	      = 6;

      int state = STATE_INITIAL;
      output.setLength(0);


      StringBuffer buf = new StringBuffer();
      char[] textAsChars = text.toCharArray();
      
   outer_loop:
      for (int i=0, ilim=textAsChars.length+1; i<ilim; ++i) {
	 int c;

	 if (i<textAsChars.length) {
	    c = textAsChars[i];
	 }
	 else {
	    c = -1;
	 }

	 switch (state) {

	 case STATE_INITIAL:
	    if ('<'==c) {
	       state = STATE_TAG_START;
	       if (buf.length()>0) {
		  haveText(buf.toString());
		  buf.setLength(0);
	       }
	    }
	    else if ('>'==c) {
	       // assume this is a greater-than sign
	       buf.append("&gt;");
	    }
	    else if ('&'==c) {
	       state = STATE_AMP;
	    }
	    else if (-1==c) {
	       if (buf.length()>0) {
		  haveText(buf.toString());
		  buf.setLength(0);
	       }
	       continue;
	    }
	    else {
	       buf.append((char)c);
	    }
	    break;

	 case STATE_AMP:
	    if ('<'==c) {
	       buf.append("&amp;");
	       state = STATE_TAG_START;
	       if (buf.length()>0) {
		  haveText(buf.toString());
		  buf.setLength(0);
	       }
	    }
	    else if ('>'==c) {
	       // assume this is a greater-than sign
	       buf.append("&amp;");
	       buf.append("&gt;");
	       state = STATE_INITIAL;
	    }
	    else if ('&'==c) {
	       buf.append("&amp;");
	       buf.append("&amp;");
	       state = STATE_INITIAL;
	    }
	    else if (-1==c) {
	       buf.append("&amp;");
	       haveText(buf.toString());
	       buf.setLength(0);
	       state = STATE_INITIAL;
	       continue;
	    }
            else {
               // peek forward and see whether this is a valid entity.
               if ('#'==c) {
                  buf.append("&");
                  buf.append((char)c);
                  state = STATE_INITIAL;
                  continue outer_loop;
               }
               else if (Character.isLetter((char)c)) {
                  for (int i2=i+1; i2<ilim-1; i2++) {
                     if (';' == textAsChars[i2]) {
                        buf.append("&");
                        buf.append((char)c);
                        state = STATE_INITIAL;
                        continue outer_loop;
                     }
                     else if (!Character.isLetter((char)c)
                              && !Character.isDigit((char)c)
                              && ".-_:".indexOf((char)c) < 0
                              //&& !isCombiningChar(c)  // FIXME
                              //&& !isExtender(c)       // FIXME
                              ) {
                        break;
                     }
                  }
                  // not a valid entity declaration; assume &amp;
               }
               buf.append("&amp;");
               buf.append((char)c);
               state = STATE_INITIAL;                  
            }

            /*
	    else if ('#'==c || Character.isLetter((char)c)) {
	       buf.append("&");
	       buf.append((char)c);
	       state = STATE_INITIAL;
	    }
	    else {
	       buf.append("&amp;");
	       buf.append((char)c);
	       state = STATE_INITIAL;	       
	    }
            */
	    break;
	    
	 case STATE_TAG_START:
	    if (" \t\r\n".indexOf(c)>=0) {
	       //continue;

	       // new: assume this is a less-sign
	       haveText("&lt;"+c);
	       state = STATE_INITIAL;
	    }
	    else if ('/'==c) {
	       buf.append((char)c);
	       state = STATE_TAG;
	    }
	    else if ('<'==c) {
	       // assume this is a less-sign
	       haveText("&lt;&lt;");
	       state = STATE_INITIAL;	       
	    }
	    else if ('>'==c) {
	       // assume this is a less-sign
	       haveText("&lt;&gt;");
	       state = STATE_INITIAL;	       
	    }
	    //else if ('-'==c || '+'==c || '='==c || '\''==c || "0123456789".indexOf(c)>=0) {
	    else if (!Character.isLetter((char)c)) {
	       // assume this is a less-sign
	       haveText("&lt;"+(char)c);
	       state = STATE_INITIAL;
	    }
	    else {
	       buf.append((char)c);
	       state = STATE_TAG;
	    }
	    break;
	    
	 case STATE_TAG:
	    if ('\"'==c) {
	       buf.append((char)c);
	       state = STATE_TAG_DOUBLEQUOTE;
	    }
	    else if ('\''==c) {
	       buf.append((char)c);
	       state = STATE_TAG_SINGLEQUOTE;
	    }
	    else if ('>'==c) {
	       state = STATE_INITIAL;
	       haveStartOrEndTag(buf.toString());
	       buf.setLength(0);
	    }
	    else if ('<'==c) {
	       // notify user, missing greater-than sign
	       haveStartOrEndTag(buf.toString());
	       buf.setLength(0);
	    }
	    else if (-1==c) {
	       printWarning("Unclosed tag at end-of-comment: <"+buf);
	       haveStartOrEndTag(buf.toString());
	       buf.setLength(0);
	    }
	    else {
	       buf.append((char)c);
	    }
	    break;

	 case STATE_TAG_DOUBLEQUOTE:
	    if ('\"'==c) {
	       buf.append((char)c);
	       state = STATE_TAG;
	    }
	    else if (-1==c) {
	       printWarning("Unclosed attribute value at end-of-comment.");
	       haveStartOrEndTag(buf.toString()+"\"");
	    }
	    else {
	       buf.append((char)c);
	    }
	    break;

	 case STATE_TAG_SINGLEQUOTE:
	    if ('\''==c) {
	       buf.append((char)c);
	       state = STATE_TAG;
	    }
	    else if (-1==c) {
	       printWarning("Unclosed attribute value at end-of-comment.");
	       haveStartOrEndTag(buf.toString()+"'");
	    }
	    else {
	       buf.append((char)c);
	    }
	    break;
	 }
      }

      return output.toString();
   }

   private String getContext() {
      if (null != contextClass) {
         StringBuffer rc = new StringBuffer();
         rc.append(contextClass.qualifiedTypeName());
         if (null != contextMember) {
            rc.append("."+contextMember.toString());
         }
         return rc.toString();
      }
      else {
         return null;
      }
   }

   private void printWarning(String msg) {
      if (null != warningReporter && !noWarn) {
         String context = getContext();
         if (null != context) {
            warningReporter.printWarning("In "+getContext()+": "+msg);
         }
         else {
            warningReporter.printWarning("In overview page: "+msg);
         }
      }
   }

   public String terminateText() {
      output.setLength(0);
      flush();
      return output.toString();
   }
}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?