代码拉取完成,页面将自动刷新
diff -Nru boilerpipe-1.2.0/pom.xml boilerpipe-1.2.0-gil/pom.xml
--- boilerpipe-1.2.0/pom.xml 2013-10-11 11:54:23.418310128 +0200
+++ boilerpipe-1.2.0-gil/pom.xml 2013-10-11 11:51:51.334701196 +0200
@@ -32,4 +32,13 @@
<name>Christian Kohlschütter</name>
</developer>
</developers>
+
+ <dependencies>
+ <dependency>
+ <groupId>net.sourceforge.nekohtml</groupId>
+ <artifactId>nekohtml</artifactId>
+ <version>1.9.14</version>
+ </dependency>
+ </dependencies>
+
</project>
diff -Nru boilerpipe-1.2.0/src/main/org/cyberneko/html/HTMLElements.java boilerpipe-1.2.0-gil/src/main/org/cyberneko/html/HTMLElements.java
--- boilerpipe-1.2.0/src/main/org/cyberneko/html/HTMLElements.java 2010-12-16 11:30:06.000000000 +0100
+++ boilerpipe-1.2.0-gil/src/main/org/cyberneko/html/HTMLElements.java 1970-01-01 01:00:00.000000000 +0100
@@ -1,794 +0,0 @@
-/*
- * Copyright 2002-2009 Andy Clark, Marc Guillemot
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.cyberneko.html;
-
-/**
- * Collection of HTML element information.
- *
- * @author Andy Clark
- * @author Ahmed Ashour
- * @author Marc Guillemot
- *
- * @version $Id: HTMLElements.java,v 1.12 2005/02/14 07:16:59 andyc Exp $
- */
-public class HTMLElements {
-
- //
- // Constants
- //
-
- // element codes
-
- // NOTE: The element codes *must* start with 0 and increment in
- // sequence. The parent and closes references depends on
- // this assumption. -Ac
-
- public static final short A = 0;
- public static final short ABBR = A+1;
- public static final short ACRONYM = ABBR+1;
- public static final short ADDRESS = ACRONYM+1;
- public static final short APPLET = ADDRESS+1;
- public static final short AREA = APPLET+1;
- public static final short B = AREA+1;
- public static final short BASE = B+1;
- public static final short BASEFONT = BASE+1;
- public static final short BDO = BASEFONT+1;
- public static final short BGSOUND = BDO+1;
- public static final short BIG = BGSOUND+1;
- public static final short BLINK = BIG+1;
- public static final short BLOCKQUOTE = BLINK+1;
- public static final short BODY = BLOCKQUOTE+1;
- public static final short BR = BODY+1;
- public static final short BUTTON = BR+1;
- public static final short CAPTION = BUTTON+1;
- public static final short CENTER = CAPTION+1;
- public static final short CITE = CENTER+1;
- public static final short CODE = CITE+1;
- public static final short COL = CODE+1;
- public static final short COLGROUP = COL+1;
- public static final short COMMENT = COLGROUP+1;
- public static final short DEL = COMMENT+1;
- public static final short DFN = DEL+1;
- public static final short DIR = DFN+1;
- public static final short DIV = DIR+1;
- public static final short DD = DIV+1;
- public static final short DL = DD+1;
- public static final short DT = DL+1;
- public static final short EM = DT+1;
- public static final short EMBED = EM+1;
- public static final short FIELDSET = EMBED+1;
- public static final short FONT = FIELDSET+1;
- public static final short FORM = FONT+1;
- public static final short FRAME = FORM+1;
- public static final short FRAMESET = FRAME+1;
- public static final short H1 = FRAMESET+1;
- public static final short H2 = H1+1;
- public static final short H3 = H2+1;
- public static final short H4 = H3+1;
- public static final short H5 = H4+1;
- public static final short H6 = H5+1;
- public static final short HEAD = H6+1;
- public static final short HR = HEAD+1;
- public static final short HTML = HR+1;
- public static final short I = HTML+1;
- public static final short IFRAME = I+1;
- public static final short ILAYER = IFRAME+1;
- public static final short IMG = ILAYER+1;
- public static final short INPUT = IMG+1;
- public static final short INS = INPUT+1;
- public static final short ISINDEX = INS+1;
- public static final short KBD = ISINDEX+1;
- public static final short KEYGEN = KBD+1;
- public static final short LABEL = KEYGEN+1;
- public static final short LAYER = LABEL+1;
- public static final short LEGEND = LAYER+1;
- public static final short LI = LEGEND+1;
- public static final short LINK = LI+1;
- public static final short LISTING = LINK+1;
- public static final short MAP = LISTING+1;
- public static final short MARQUEE = MAP+1;
- public static final short MENU = MARQUEE+1;
- public static final short META = MENU+1;
- public static final short MULTICOL = META+1;
- public static final short NEXTID = MULTICOL+1;
- public static final short NOBR = NEXTID+1;
- public static final short NOEMBED = NOBR+1;
- public static final short NOFRAMES = NOEMBED+1;
- public static final short NOLAYER = NOFRAMES+1;
- public static final short NOSCRIPT = NOLAYER+1;
- public static final short OBJECT = NOSCRIPT+1;
- public static final short OL = OBJECT+1;
- public static final short OPTION = OL+1;
- public static final short OPTGROUP = OPTION+1;
- public static final short P = OPTGROUP+1;
- public static final short PARAM = P+1;
- public static final short PLAINTEXT = PARAM+1;
- public static final short PRE = PLAINTEXT+1;
- public static final short Q = PRE+1;
- public static final short RB = Q+1;
- public static final short RBC = RB+1;
- public static final short RP = RBC+1;
- public static final short RT = RP+1;
- public static final short RTC = RT+1;
- public static final short RUBY = RTC+1;
- public static final short S = RUBY+1;
- public static final short SAMP = S+1;
- public static final short SCRIPT = SAMP+1;
- public static final short SELECT = SCRIPT+1;
- public static final short SMALL = SELECT+1;
- public static final short SOUND = SMALL+1;
- public static final short SPACER = SOUND+1;
- public static final short SPAN = SPACER+1;
- public static final short STRIKE = SPAN+1;
- public static final short STRONG = STRIKE+1;
- public static final short STYLE = STRONG+1;
- public static final short SUB = STYLE+1;
- public static final short SUP = SUB+1;
- public static final short TABLE = SUP+1;
- public static final short TBODY = TABLE+1;
- public static final short TD = TBODY+1;
- public static final short TEXTAREA = TD+1;
- public static final short TFOOT = TEXTAREA+1;
- public static final short TH = TFOOT+1;
- public static final short THEAD = TH+1;
- public static final short TITLE = THEAD+1;
- public static final short TR = TITLE+1;
- public static final short TT = TR+1;
- public static final short U = TT+1;
- public static final short UL = U+1;
- public static final short VAR = UL+1;
- public static final short WBR = VAR+1;
- public static final short XML = WBR+1;
- public static final short XMP = XML+1;
- public static final short UNKNOWN = XMP+1;
-
- // information
-
- /** Element information organized by first letter. */
- protected static final Element[][] ELEMENTS_ARRAY = new Element[26][];
-
- /** Element information as a contiguous list. */
- protected static final ElementList ELEMENTS = new ElementList();
-
- /** No such element. */
- public static final Element NO_SUCH_ELEMENT = new Element(UNKNOWN, "", Element.CONTAINER, new short[]{BODY,HEAD}/*HTML*/, null);
-
- //
- // Static initializer
- //
-
- /**
- * Initializes the element information.
- * <p>
- * <strong>Note:</strong>
- * The <code>getElement</code> method requires that the HTML elements
- * are added to the list in alphabetical order. If new elements are
- * added, then they <em>must</em> be inserted in alphabetical order.
- */
- static {
- // <!ENTITY % heading "H1|H2|H3|H4|H5|H6">
- // <!ENTITY % fontstyle "TT | I | B | BIG | SMALL">
- // <!ENTITY % phrase "EM | STRONG | DFN | CODE | SAMP | KBD | VAR | CITE | ABBR | ACRONYM" >
- // <!ENTITY % special "A | IMG | OBJECT | BR | SCRIPT | MAP | Q | SUB | SUP | SPAN | BDO">
- // <!ENTITY % formctrl "INPUT | SELECT | TEXTAREA | LABEL | BUTTON">
- // <!ENTITY % inline "#PCDATA | %fontstyle; | %phrase; | %special; | %formctrl;">
- // <!ENTITY % block "P | %heading; | %list; | %preformatted; | DL | DIV | NOSCRIPT | BLOCKQUOTE | FORM | HR | TABLE | FIELDSET | ADDRESS">
- // <!ENTITY % flow "%block; | %inline;">
-
- // initialize array of element information
- ELEMENTS_ARRAY['A'-'A'] = new Element[] {
- // A - - (%inline;)* -(A)
- new Element(A, "A", Element.INLINE, BODY, new short[] {A}),
- // ABBR - - (%inline;)*
- new Element(ABBR, "ABBR", Element.INLINE, BODY, null),
- // ACRONYM - - (%inline;)*
- new Element(ACRONYM, "ACRONYM", Element.INLINE, BODY, null),
- // ADDRESS - - (%inline;)*
- new Element(ADDRESS, "ADDRESS", Element.BLOCK, BODY, null),
- // APPLET
- new Element(APPLET, "APPLET", 0, BODY, null),
- // AREA - O EMPTY
- new Element(AREA, "AREA", Element.EMPTY, MAP, null),
- };
- ELEMENTS_ARRAY['B'-'A'] = new Element[] {
- // B - - (%inline;)*
- new Element(B, "B", Element.INLINE, BODY, null),
- // BASE - O EMPTY
- new Element(BASE, "BASE", Element.EMPTY, HEAD, null),
- // BASEFONT
- new Element(BASEFONT, "BASEFONT", 0, HEAD, null),
- // BDO - - (%inline;)*
- new Element(BDO, "BDO", Element.INLINE, BODY, null),
- // BGSOUND
- new Element(BGSOUND, "BGSOUND", Element.EMPTY, HEAD, null),
- // BIG - - (%inline;)*
- new Element(BIG, "BIG", Element.INLINE, BODY, null),
- // BLINK
- new Element(BLINK, "BLINK", Element.INLINE, BODY, null),
- // BLOCKQUOTE - - (%block;|SCRIPT)+
- new Element(BLOCKQUOTE, "BLOCKQUOTE", Element.BLOCK, BODY, new short[]{P}),
- // BODY O O (%block;|SCRIPT)+ +(INS|DEL)
- new Element(BODY, "BODY", Element.CONTAINER, HTML, new short[]{HEAD}),
- // BR - O EMPTY
- new Element(BR, "BR", Element.EMPTY, BODY, null),
- // BUTTON - - (%flow;)* -(A|%formctrl;|FORM|FIELDSET)
- new Element(BUTTON, "BUTTON", 0, BODY, null),
- };
- ELEMENTS_ARRAY['C'-'A'] = new Element[] {
- // CAPTION - - (%inline;)*
- new Element(CAPTION, "CAPTION", Element.INLINE, TABLE, null),
- // CENTER,
- new Element(CENTER, "CENTER", 0, BODY, null),
- // CITE - - (%inline;)*
- new Element(CITE, "CITE", Element.INLINE, BODY, null),
- // CODE - - (%inline;)*
- new Element(CODE, "CODE", Element.INLINE, BODY, null),
- // COL - O EMPTY
- new Element(COL, "COL", Element.EMPTY, TABLE, null),
- // COLGROUP - O (COL)*
- new Element(COLGROUP, "COLGROUP", 0, TABLE, new short[]{COL,COLGROUP}),
- // COMMENT
- new Element(COMMENT, "COMMENT", Element.SPECIAL, HTML, null),
- };
- ELEMENTS_ARRAY['D'-'A'] = new Element[] {
- // DEL - - (%flow;)*
- new Element(DEL, "DEL", 0, BODY, null),
- // DFN - - (%inline;)*
- new Element(DFN, "DFN", Element.INLINE, BODY, null),
- // DIR
- new Element(DIR, "DIR", 0, BODY, null),
- // DIV - - (%flow;)*
- new Element(DIV, "DIV", Element.BLOCK, BODY, new short[]{P}),
- // DD - O (%flow;)*
- new Element(DD, "DD", 0, DL, new short[]{DT,DD}),
- // DL - - (DT|DD)+
- new Element(DL, "DL", Element.BLOCK, BODY, null),
- // DT - O (%inline;)*
- new Element(DT, "DT", 0, DL, new short[]{DT,DD}),
- };
- ELEMENTS_ARRAY['E'-'A'] = new Element[] {
- // EM - - (%inline;)*
- new Element(EM, "EM", Element.INLINE, BODY, null),
- // EMBED
- new Element(EMBED, "EMBED", 0, BODY, null),
- };
- ELEMENTS_ARRAY['F'-'A'] = new Element[] {
- // FIELDSET - - (#PCDATA,LEGEND,(%flow;)*)
- new Element(FIELDSET, "FIELDSET", 0, BODY, null),
- // FONT
- new Element(FONT, "FONT", Element.CONTAINER, BODY, null),
- // FORM - - (%block;|SCRIPT)+ -(FORM)
- new Element(FORM, "FORM", Element.CONTAINER, new short[]{BODY,TD,DIV}, new short[]{BUTTON,P}),
- // FRAME - O EMPTY
- new Element(FRAME, "FRAME", Element.EMPTY, FRAMESET, null),
- // FRAMESET - - ((FRAMESET|FRAME)+ & NOFRAMES?)
- new Element(FRAMESET, "FRAMESET", 0, HTML, null),
- };
- ELEMENTS_ARRAY['H'-'A'] = new Element[] {
- // (H1|H2|H3|H4|H5|H6) - - (%inline;)*
- new Element(H1, "H1", Element.BLOCK, new short[]{BODY,A}, new short[]{H1,H2,H3,H4,H5,H6,P}),
- new Element(H2, "H2", Element.BLOCK, new short[]{BODY,A}, new short[]{H1,H2,H3,H4,H5,H6,P}),
- new Element(H3, "H3", Element.BLOCK, new short[]{BODY,A}, new short[]{H1,H2,H3,H4,H5,H6,P}),
- new Element(H4, "H4", Element.BLOCK, new short[]{BODY,A}, new short[]{H1,H2,H3,H4,H5,H6,P}),
- new Element(H5, "H5", Element.BLOCK, new short[]{BODY,A}, new short[]{H1,H2,H3,H4,H5,H6,P}),
- new Element(H6, "H6", Element.BLOCK, new short[]{BODY,A}, new short[]{H1,H2,H3,H4,H5,H6,P}),
- // HEAD O O (%head.content;) +(%head.misc;)
- new Element(HEAD, "HEAD", 0, HTML, null),
- // HR - O EMPTY
- new Element(HR, "HR", Element.EMPTY, BODY, new short[]{P}),
- // HTML O O (%html.content;)
- new Element(HTML, "HTML", 0, null, null),
- };
- ELEMENTS_ARRAY['I'-'A'] = new Element[] {
- // I - - (%inline;)*
- new Element(I, "I", Element.INLINE, BODY, null),
- // IFRAME
- new Element(IFRAME, "IFRAME", Element.BLOCK, BODY, null),
- // ILAYER
- new Element(ILAYER, "ILAYER", Element.BLOCK, BODY, null),
- // IMG - O EMPTY
- new Element(IMG, "IMG", Element.EMPTY, BODY, null),
- // INPUT - O EMPTY
- new Element(INPUT, "INPUT", Element.EMPTY, BODY, null),
- // INS - - (%flow;)*
- new Element(INS, "INS", 0, BODY, null),
- // ISINDEX
- new Element(ISINDEX, "ISINDEX", 0, HEAD, null),
- };
- ELEMENTS_ARRAY['K'-'A'] = new Element[] {
- // KBD - - (%inline;)*
- new Element(KBD, "KBD", Element.INLINE, BODY, null),
- // KEYGEN
- new Element(KEYGEN, "KEYGEN", 0, BODY, null),
- };
- ELEMENTS_ARRAY['L'-'A'] = new Element[] {
- // LABEL - - (%inline;)* -(LABEL)
- new Element(LABEL, "LABEL", 0, BODY, null),
- // LAYER
- new Element(LAYER, "LAYER", Element.BLOCK, BODY, null),
- // LEGEND - - (%inline;)*
- new Element(LEGEND, "LEGEND", Element.INLINE, FIELDSET, null),
- // LI - O (%flow;)*
- new Element(LI, "LI", 0, new short[]{BODY,UL,OL}, new short[]{LI}),
- // LINK - O EMPTY
- new Element(LINK, "LINK", Element.EMPTY, HEAD, null),
- // LISTING
- new Element(LISTING, "LISTING", 0, BODY, null),
- };
- ELEMENTS_ARRAY['M'-'A'] = new Element[] {
- // MAP - - ((%block;) | AREA)+
- new Element(MAP, "MAP", Element.INLINE, BODY, null),
- // MARQUEE
- new Element(MARQUEE, "MARQUEE", 0, BODY, null),
- // MENU
- new Element(MENU, "MENU", 0, BODY, null),
- // META - O EMPTY
- new Element(META, "META", Element.EMPTY, HEAD, new short[]{STYLE,TITLE}),
- // MULTICOL
- new Element(MULTICOL, "MULTICOL", 0, BODY, null),
- };
- ELEMENTS_ARRAY['N'-'A'] = new Element[] {
- // NEXTID
- new Element(NEXTID, "NEXTID", Element.EMPTY, BODY, null),
- // NOBR
- new Element(NOBR, "NOBR", Element.INLINE, BODY, null),
- // NOEMBED
- new Element(NOEMBED, "NOEMBED", 0, BODY, null),
- // NOFRAMES - - (BODY) -(NOFRAMES)
- new Element(NOFRAMES, "NOFRAMES", 0, FRAMESET, null),
- // NOLAYER
- new Element(NOLAYER, "NOLAYER", 0, BODY, null),
- // NOSCRIPT - - (%block;)+
- new Element(NOSCRIPT, "NOSCRIPT", 0, new short[]{BODY}, null),
- };
- ELEMENTS_ARRAY['O'-'A'] = new Element[] {
- // OBJECT - - (PARAM | %flow;)*
- new Element(OBJECT, "OBJECT", 0, BODY, null),
- // OL - - (LI)+
- new Element(OL, "OL", Element.BLOCK, BODY, null),
- // OPTGROUP - - (OPTION)+
- new Element(OPTGROUP, "OPTGROUP", 0, SELECT, new short[]{OPTION}),
- // OPTION - O (#PCDATA)
- new Element(OPTION, "OPTION", 0, SELECT, new short[]{OPTION}),
- };
- ELEMENTS_ARRAY['P'-'A'] = new Element[] {
- // P - O (%inline;)*
- new Element(P, "P", Element.CONTAINER, BODY, new short[]{P}),
- // PARAM - O EMPTY
- new Element(PARAM, "PARAM", Element.EMPTY, new short[]{OBJECT,APPLET}, null),
- // PLAINTEXT
- new Element(PLAINTEXT, "PLAINTEXT", Element.SPECIAL, BODY, null),
- // PRE - - (%inline;)* -(%pre.exclusion;)
- new Element(PRE, "PRE", 0, BODY, null),
- };
- ELEMENTS_ARRAY['Q'-'A'] = new Element[] {
- // Q - - (%inline;)*
- new Element(Q, "Q", Element.INLINE, BODY, null),
- };
- ELEMENTS_ARRAY['R'-'A'] = new Element[] {
- // RB
- new Element(RB, "RB", Element.INLINE, RUBY, new short[]{RB}),
- // RBC
- new Element(RBC, "RBC", 0, RUBY, null),
- // RP
- new Element(RP, "RP", Element.INLINE, RUBY, new short[]{RB}),
- // RT
- new Element(RT, "RT", Element.INLINE, RUBY, new short[]{RB,RP}),
- // RTC
- new Element(RTC, "RTC", 0, RUBY, new short[]{RBC}),
- // RUBY
- new Element(RUBY, "RUBY", 0, BODY, new short[]{RUBY}),
- };
- ELEMENTS_ARRAY['S'-'A'] = new Element[] {
- // S
- new Element(S, "S", 0, BODY, null),
- // SAMP - - (%inline;)*
- new Element(SAMP, "SAMP", Element.INLINE, BODY, null),
- // SCRIPT - - %Script;
- new Element(SCRIPT, "SCRIPT", Element.SPECIAL, new short[]{HEAD,BODY}, null),
- // SELECT - - (OPTGROUP|OPTION)+
- new Element(SELECT, "SELECT", Element.CONTAINER, BODY, new short[]{SELECT}),
- // SMALL - - (%inline;)*
- new Element(SMALL, "SMALL", Element.INLINE, BODY, null),
- // SOUND
- new Element(SOUND, "SOUND", Element.EMPTY, HEAD, null),
- // SPACER
- new Element(SPACER, "SPACER", Element.EMPTY, BODY, null),
- // SPAN - - (%inline;)*
- new Element(SPAN, "SPAN", Element.CONTAINER, BODY, null),
- // STRIKE
- new Element(STRIKE, "STRIKE", Element.INLINE, BODY, null),
- // STRONG - - (%inline;)*
- new Element(STRONG, "STRONG", Element.INLINE, BODY, null),
- // STYLE - - %StyleSheet;
- new Element(STYLE, "STYLE", Element.SPECIAL, new short[]{HEAD,BODY}, new short[]{STYLE,TITLE,META}),
- // SUB - - (%inline;)*
- new Element(SUB, "SUB", Element.INLINE, BODY, null),
- // SUP - - (%inline;)*
- new Element(SUP, "SUP", Element.INLINE, BODY, null),
- };
- ELEMENTS_ARRAY['T'-'A'] = new Element[] {
- // TABLE - - (CAPTION?, (COL*|COLGROUP*), THEAD?, TFOOT?, TBODY+)
- new Element(TABLE, "TABLE", Element.BLOCK|Element.CONTAINER, BODY, null),
- // TBODY O O (TR)+
- new Element(TBODY, "TBODY", 0, TABLE, new short[]{THEAD,TD,TH,TR,COLGROUP}),
- // TD - O (%flow;)*
- new Element(TD, "TD", Element.CONTAINER, TR, TABLE, new short[]{TD,TH}),
- // TEXTAREA - - (#PCDATA)
- new Element(TEXTAREA, "TEXTAREA", Element.SPECIAL, BODY, null),
- // TFOOT - O (TR)+
- new Element(TFOOT, "TFOOT", 0, TABLE, new short[]{THEAD,TBODY,TD,TH,TR}),
- // TH - O (%flow;)*
- new Element(TH, "TH", Element.CONTAINER, TR, TABLE, new short[]{TD,TH}),
- // THEAD - O (TR)+
- new Element(THEAD, "THEAD", 0, TABLE, new short[]{COLGROUP}),
- // TITLE - - (#PCDATA) -(%head.misc;)
- new Element(TITLE, "TITLE", Element.SPECIAL, new short[]{HEAD,BODY}, null),
- // TR - O (TH|TD)+
- new Element(TR, "TR", Element.BLOCK, new short[]{TBODY, THEAD, TFOOT}, TABLE, new short[]{TD,TH,TR,COLGROUP}),
- // TT - - (%inline;)*
- new Element(TT, "TT", Element.INLINE, BODY, null),
- };
- ELEMENTS_ARRAY['U'-'A'] = new Element[] {
- // U,
- new Element(U, "U", Element.INLINE, BODY, null),
- // UL - - (LI)+
- new Element(UL, "UL", Element.BLOCK, BODY, null),
- };
- ELEMENTS_ARRAY['V'-'A'] = new Element[] {
- // VAR - - (%inline;)*
- new Element(VAR, "VAR", Element.INLINE, BODY, null),
- };
- ELEMENTS_ARRAY['W'-'A'] = new Element[] {
- // WBR
- new Element(WBR, "WBR", Element.EMPTY, BODY, null),
- };
- ELEMENTS_ARRAY['X'-'A'] = new Element[] {
- // XML
- new Element(XML, "XML", 0, BODY, null),
- // XMP
- new Element(XMP, "XMP", Element.SPECIAL, BODY, null),
- };
-
- // keep contiguous list of elements for lookups by code
- for (int i = 0; i < ELEMENTS_ARRAY.length; i++) {
- Element[] elements = ELEMENTS_ARRAY[i];
- if (elements != null) {
- for (int j = 0; j < elements.length; j++) {
- Element element = elements[j];
- ELEMENTS.addElement(element);
- }
- }
- }
- ELEMENTS.addElement(NO_SUCH_ELEMENT);
-
- // initialize cross references to parent elements
- for (int i = 0; i < ELEMENTS.size; i++) {
- Element element = ELEMENTS.data[i];
- if (element.parentCodes != null) {
- element.parent = new Element[element.parentCodes.length];
- for (int j = 0; j < element.parentCodes.length; j++) {
- element.parent[j] = ELEMENTS.data[element.parentCodes[j]];
- }
- element.parentCodes = null;
- }
- }
-
- } // <clinit>()
-
- //
- // Public static methods
- //
-
- /**
- * Returns the element information for the specified element code.
- *
- * @param code The element code.
- */
- public static final Element getElement(short code) {
- return ELEMENTS.data[code];
- } // getElement(short):Element
-
- /**
- * Returns the element information for the specified element name.
- *
- * @param ename The element name.
- */
- public static final Element getElement(String ename) {
- return getElement(ename, NO_SUCH_ELEMENT);
- } // getElement(String):Element
-
- /**
- * Returns the element information for the specified element name.
- *
- * @param ename The element name.
- * @param element The default element to return if not found.
- */
- public static final Element getElement(String ename, Element element) {
-
- if (ename.length() > 0) {
- int c = ename.charAt(0);
- if (c >= 'a' && c <= 'z') {
- c = 'A' + c - 'a';
- }
- if (c >= 'A' && c <= 'Z') {
- Element[] elements = ELEMENTS_ARRAY[c - 'A'];
- if (elements != null) {
- for (int i = 0; i < elements.length; i++) {
- Element elem = elements[i];
- if (elem.name.equalsIgnoreCase(ename)) {
- return elem;
- }
- }
- }
- }
- }
- return element;
-
- } // getElement(String):Element
-
- //
- // Classes
- //
-
- /**
- * Element information.
- *
- * @author Andy Clark
- */
- public static class Element {
-
- //
- // Constants
- //
-
- /** Inline element. */
- public static final int INLINE = 0x01;
-
- /** Block element. */
- public static final int BLOCK = 0x02;
-
- /** Empty element. */
- public static final int EMPTY = 0x04;
-
- /** Container element. */
- public static final int CONTAINER = 0x08;
-
- /** Special element. */
- public static final int SPECIAL = 0x10;
-
- //
- // Data
- //
-
- /** The element code. */
- public short code;
-
- /** The element name. */
- public String name;
-
- /** Informational flags. */
- public int flags;
-
- /** Parent elements. */
- public short[] parentCodes;
-
- /** Parent elements. */
- public Element[] parent;
-
- /** The bounding element code. */
- public short bounds;
-
- /** List of elements this element can close. */
- public short[] closes;
-
- /** If set to true, then this element may not be nested, example: "A" **/
- boolean nestable = true;
-
- //
- // Constructors
- //
-
- /**
- * Constructs an element object.
- *
- * @param code The element code.
- * @param name The element name.
- * @param flags Informational flags
- * @param parent Natural closing parent name.
- * @param closes List of elements this element can close.
- */
- public Element(short code, String name, int flags,
- short parent, short[] closes) {
- this(code, name, flags, new short[]{parent}, (short)-1, closes);
- } // <init>(short,String,int,short,short[]);
-
- /**
- * Constructs an element object.
- *
- * @param code The element code.
- * @param name The element name.
- * @param flags Informational flags
- * @param parent Natural closing parent name.
- * @param closes List of elements this element can close.
- */
- public Element(short code, String name, int flags,
- short parent, short bounds, short[] closes) {
- this(code, name, flags, new short[]{parent}, bounds, closes);
- } // <init>(short,String,int,short,short,short[])
-
- /**
- * Constructs an element object.
- *
- * @param code The element code.
- * @param name The element name.
- * @param flags Informational flags
- * @param parents Natural closing parent names.
- * @param closes List of elements this element can close.
- */
- public Element(short code, String name, int flags,
- short[] parents, short[] closes) {
- this(code, name, flags, parents, (short)-1, closes);
- } // <init>(short,String,int,short[],short[])
-
- /**
- * Constructs an element object.
- *
- * @param code The element code.
- * @param name The element name.
- * @param flags Informational flags
- * @param parents Natural closing parent names.
- * @param closes List of elements this element can close.
- */
- public Element(short code, String name, int flags,
- short[] parents, short bounds, short[] closes) {
- this.code = code;
- this.name = name;
- this.flags = flags;
- this.parentCodes = parents;
- this.parent = null;
- this.bounds = bounds;
- this.closes = closes;
- if(closes != null) {
- for(int i=0;i<closes.length;i++) {
- if(closes[i] == code) {
- this.nestable = false;
- break;
- }
- }
- }
- } // <init>(short,String,int,short[],short,short[])
-
- //
- // Public methods
- //
-
- /** Returns true if this element is an inline element. */
- public final boolean isInline() {
- return (flags & INLINE) != 0;
- } // isInline():boolean
-
- /** Returns true if this element is a block element. */
- public final boolean isBlock() {
- return (flags & BLOCK) != 0;
- } // isBlock():boolean
-
- /** Returns true if this element is an empty element. */
- public final boolean isEmpty() {
- return (flags & EMPTY) != 0;
- } // isEmpty():boolean
-
- /** Returns true if this element is a container element. */
- public final boolean isContainer() {
- return (flags & CONTAINER) != 0;
- } // isContainer():boolean
-
- /**
- * Returns true if this element is special -- if its content
- * should be parsed ignoring markup.
- */
- public final boolean isSpecial() {
- return (flags & SPECIAL) != 0;
- } // isSpecial():boolean
-
- /**
- * Returns true if this element can close the specified Element.
- *
- * @param tag The element.
- */
- public boolean closes(short tag) {
-
- if (closes != null) {
- for (int i = 0; i < closes.length; i++) {
- if (closes[i] == tag) {
- return true;
- }
- }
- }
- return false;
-
- } // closes(short):boolean
-
- //
- // Object methods
- //
-
- /** Returns a hash code for this object. */
- public int hashCode() {
- return name.hashCode();
- } // hashCode():int
-
- /** Returns true if the objects are equal. */
- public boolean equals(Object o) {
- return name.equals(o);
- } // equals(Object):boolean
-
- /**
- * Provides a simple representation to make debugging easier
- */
- public String toString() {
- return super.toString() + "(name=" + name + ")";
- }
-
- /**
- * Indicates if the provided element is an accepted parent of current element
- * @param element the element to test for "paternity"
- * @return <code>true</code> if <code>element</code> belongs to the {@link #parent}
- */
- public boolean isParent(final Element element) {
- if (parent == null)
- return false;
- else {
- for (int i=0; i<parent.length; ++i) {
- if (element.code == parent[i].code)
- return true;
- }
- }
- return false;
- }
- } // class Element
-
- /** Unsynchronized list of elements. */
- public static class ElementList {
-
- //
- // Data
- //
-
- /** The size of the list. */
- public int size;
-
- /** The data in the list. */
- public Element[] data = new Element[120];
-
- //
- // Public methods
- //
-
- /** Adds an element to list, resizing if necessary. */
- public void addElement(Element element) {
- if (size == data.length) {
- Element[] newarray = new Element[size + 20];
- System.arraycopy(data, 0, newarray, 0, size);
- data = newarray;
- }
- data[size++] = element;
- } // addElement(Element)
-
- } // class Element
-
-} // class HTMLElements
diff -Nru boilerpipe-1.2.0/src/main/org/cyberneko/html/HTMLTagBalancer.java boilerpipe-1.2.0-gil/src/main/org/cyberneko/html/HTMLTagBalancer.java
--- boilerpipe-1.2.0/src/main/org/cyberneko/html/HTMLTagBalancer.java 2010-12-16 11:30:06.000000000 +0100
+++ boilerpipe-1.2.0-gil/src/main/org/cyberneko/html/HTMLTagBalancer.java 1970-01-01 01:00:00.000000000 +0100
@@ -1,1409 +0,0 @@
-/*
- * Copyright 2002-2009 Andy Clark, Marc Guillemot
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.cyberneko.html;
-
-import java.util.ArrayList;
-import java.util.List;
-import org.apache.xerces.util.XMLAttributesImpl;
-import org.apache.xerces.xni.Augmentations;
-import org.apache.xerces.xni.NamespaceContext;
-import org.apache.xerces.xni.QName;
-import org.apache.xerces.xni.XMLAttributes;
-import org.apache.xerces.xni.XMLDocumentHandler;
-import org.apache.xerces.xni.XMLLocator;
-import org.apache.xerces.xni.XMLResourceIdentifier;
-import org.apache.xerces.xni.XMLString;
-import org.apache.xerces.xni.XNIException;
-import org.apache.xerces.xni.parser.XMLComponentManager;
-import org.apache.xerces.xni.parser.XMLConfigurationException;
-import org.apache.xerces.xni.parser.XMLDocumentFilter;
-import org.apache.xerces.xni.parser.XMLDocumentSource;
-import org.cyberneko.html.HTMLElements.Element;
-import org.cyberneko.html.filters.NamespaceBinder;
-import org.cyberneko.html.xercesbridge.XercesBridge;
-
-/**
- * Balances tags in an HTML document. This component receives document events
- * and tries to correct many common mistakes that human (and computer) HTML
- * document authors make. This tag balancer can:
- * <ul>
- * <li>add missing parent elements;
- * <li>automatically close elements with optional end tags; and
- * <li>handle mis-matched inline element tags.
- * </ul>
- * <p>
- * This component recognizes the following features:
- * <ul>
- * <li>http://cyberneko.org/html/features/augmentations
- * <li>http://cyberneko.org/html/features/report-errors
- * <li>http://cyberneko.org/html/features/balance-tags/document-fragment
- * <li>http://cyberneko.org/html/features/balance-tags/ignore-outside-content
- * </ul>
- * <p>
- * This component recognizes the following properties:
- * <ul>
- * <li>http://cyberneko.org/html/properties/names/elems
- * <li>http://cyberneko.org/html/properties/names/attrs
- * <li>http://cyberneko.org/html/properties/error-reporter
- * <li>http://cyberneko.org/html/properties/balance-tags/current-stack
- * </ul>
- *
- * @see HTMLElements
- *
- * @author Andy Clark
- * @author Marc Guillemot
- *
- * @version $Id: HTMLTagBalancer.java,v 1.20 2005/02/14 04:06:22 andyc Exp $
- */
-public class HTMLTagBalancer
- implements XMLDocumentFilter, HTMLComponent {
-
- //
- // Constants
- //
-
- // features
-
- /** Namespaces. */
- protected static final String NAMESPACES = "http://xml.org/sax/features/namespaces";
-
- /** Include infoset augmentations. */
- protected static final String AUGMENTATIONS = "http://cyberneko.org/html/features/augmentations";
-
- /** Report errors. */
- protected static final String REPORT_ERRORS = "http://cyberneko.org/html/features/report-errors";
-
- /** Document fragment balancing only (deprecated). */
- protected static final String DOCUMENT_FRAGMENT_DEPRECATED = "http://cyberneko.org/html/features/document-fragment";
-
- /** Document fragment balancing only. */
- protected static final String DOCUMENT_FRAGMENT = "http://cyberneko.org/html/features/balance-tags/document-fragment";
-
- /** Ignore outside content. */
- protected static final String IGNORE_OUTSIDE_CONTENT = "http://cyberneko.org/html/features/balance-tags/ignore-outside-content";
-
- /** Recognized features. */
- private static final String[] RECOGNIZED_FEATURES = {
- NAMESPACES,
- AUGMENTATIONS,
- REPORT_ERRORS,
- DOCUMENT_FRAGMENT_DEPRECATED,
- DOCUMENT_FRAGMENT,
- IGNORE_OUTSIDE_CONTENT,
- };
-
- /** Recognized features defaults. */
- private static final Boolean[] RECOGNIZED_FEATURES_DEFAULTS = {
- null,
- null,
- null,
- null,
- Boolean.FALSE,
- Boolean.FALSE,
- };
-
- // properties
-
- /** Modify HTML element names: { "upper", "lower", "default" }. */
- protected static final String NAMES_ELEMS = "http://cyberneko.org/html/properties/names/elems";
-
- /** Modify HTML attribute names: { "upper", "lower", "default" }. */
- protected static final String NAMES_ATTRS = "http://cyberneko.org/html/properties/names/attrs";
-
- /** Error reporter. */
- protected static final String ERROR_REPORTER = "http://cyberneko.org/html/properties/error-reporter";
-
- /**
- * <font color="red">EXPERIMENTAL: may change in next release</font><br/>
- * Name of the property holding the stack of elements in which context a document fragment should be parsed.
- **/
- public static final String FRAGMENT_CONTEXT_STACK = "http://cyberneko.org/html/properties/balance-tags/fragment-context-stack";
-
- /** Recognized properties. */
- private static final String[] RECOGNIZED_PROPERTIES = {
- NAMES_ELEMS,
- NAMES_ATTRS,
- ERROR_REPORTER,
- FRAGMENT_CONTEXT_STACK,
- };
-
- /** Recognized properties defaults. */
- private static final Object[] RECOGNIZED_PROPERTIES_DEFAULTS = {
- null,
- null,
- null,
- null,
- };
-
- // modify HTML names
-
- /** Don't modify HTML names. */
- protected static final short NAMES_NO_CHANGE = 0;
-
- /** Match HTML element names. */
- protected static final short NAMES_MATCH = 0;
-
- /** Uppercase HTML names. */
- protected static final short NAMES_UPPERCASE = 1;
-
- /** Lowercase HTML names. */
- protected static final short NAMES_LOWERCASE = 2;
-
- // static vars
-
- /** Synthesized event info item. */
- protected static final HTMLEventInfo SYNTHESIZED_ITEM =
- new HTMLEventInfo.SynthesizedItem();
-
- //
- // Data
- //
-
- // features
-
- /** Namespaces. */
- protected boolean fNamespaces;
-
- /** Include infoset augmentations. */
- protected boolean fAugmentations;
-
- /** Report errors. */
- protected boolean fReportErrors;
-
- /** Document fragment balancing only. */
- protected boolean fDocumentFragment;
-
- /** Ignore outside content. */
- protected boolean fIgnoreOutsideContent;
-
- // properties
-
- /** Modify HTML element names. */
- protected short fNamesElems;
-
- /** Modify HTML attribute names. */
- protected short fNamesAttrs;
-
- /** Error reporter. */
- protected HTMLErrorReporter fErrorReporter;
-
- // connections
-
- /** The document source. */
- protected XMLDocumentSource fDocumentSource;
-
- /** The document handler. */
- protected XMLDocumentHandler fDocumentHandler;
-
- // state
-
- /** The element stack. */
- protected final InfoStack fElementStack = new InfoStack();
-
- /** The inline stack. */
- protected final InfoStack fInlineStack = new InfoStack();
-
- /** True if seen anything. Important for xml declaration. */
- protected boolean fSeenAnything;
-
- /** True if root element has been seen. */
- protected boolean fSeenDoctype;
-
- /** True if root element has been seen. */
- protected boolean fSeenRootElement;
-
- /**
- * True if seen the end of the document element. In other words,
- * this variable is set to false <em>until</em> the end </HTML>
- * tag is seen (or synthesized). This is used to ensure that
- * extraneous events after the end of the document element do not
- * make the document stream ill-formed.
- */
- protected boolean fSeenRootElementEnd;
-
- /** True if seen <head< element. */
- protected boolean fSeenHeadElement;
-
- /** True if seen <body< element. */
- protected boolean fSeenBodyElement;
-
- /** True if a form is in the stack (allow to discard opening of nested forms) */
- protected boolean fOpenedForm;
-
- // temp vars
-
- /** A qualified name. */
- private final QName fQName = new QName();
-
- /** Empty attributes. */
- private final XMLAttributes fEmptyAttrs = new XMLAttributesImpl();
-
- /** Augmentations. */
- private final HTMLAugmentations fInfosetAugs = new HTMLAugmentations();
-
- protected HTMLTagBalancingListener tagBalancingListener;
- private LostText lostText_ = new LostText();
-
- private boolean forcedStartElement_ = false;
- private boolean forcedEndElement_ = false;
-
- /**
- * Stack of elements determining the context in which a document fragment should be parsed
- */
- private QName[] fragmentContextStack_ = null;
- private int fragmentContextStackSize_ = 0; // not 0 only when a fragment is parsed and fragmentContextStack_ is set
-
- private List/*ElementEntry*/ endElementsBuffer_ = new ArrayList();
-
- //
- // HTMLComponent methods
- //
-
- /** Returns the default state for a feature. */
- public Boolean getFeatureDefault(String featureId) {
- int length = RECOGNIZED_FEATURES != null ? RECOGNIZED_FEATURES.length : 0;
- for (int i = 0; i < length; i++) {
- if (RECOGNIZED_FEATURES[i].equals(featureId)) {
- return RECOGNIZED_FEATURES_DEFAULTS[i];
- }
- }
- return null;
- } // getFeatureDefault(String):Boolean
-
- /** Returns the default state for a property. */
- public Object getPropertyDefault(String propertyId) {
- int length = RECOGNIZED_PROPERTIES != null ? RECOGNIZED_PROPERTIES.length : 0;
- for (int i = 0; i < length; i++) {
- if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) {
- return RECOGNIZED_PROPERTIES_DEFAULTS[i];
- }
- }
- return null;
- } // getPropertyDefault(String):Object
-
- //
- // XMLComponent methods
- //
-
- /** Returns recognized features. */
- public String[] getRecognizedFeatures() {
- return RECOGNIZED_FEATURES;
- } // getRecognizedFeatures():String[]
-
- /** Returns recognized properties. */
- public String[] getRecognizedProperties() {
- return RECOGNIZED_PROPERTIES;
- } // getRecognizedProperties():String[]
-
- /** Resets the component. */
- public void reset(XMLComponentManager manager)
- throws XMLConfigurationException {
-
- // get features
- fNamespaces = manager.getFeature(NAMESPACES);
- fAugmentations = manager.getFeature(AUGMENTATIONS);
- fReportErrors = manager.getFeature(REPORT_ERRORS);
- fDocumentFragment = manager.getFeature(DOCUMENT_FRAGMENT) ||
- manager.getFeature(DOCUMENT_FRAGMENT_DEPRECATED);
- fIgnoreOutsideContent = manager.getFeature(IGNORE_OUTSIDE_CONTENT);
-
- // get properties
- fNamesElems = getNamesValue(String.valueOf(manager.getProperty(NAMES_ELEMS)));
- fNamesAttrs = getNamesValue(String.valueOf(manager.getProperty(NAMES_ATTRS)));
- fErrorReporter = (HTMLErrorReporter)manager.getProperty(ERROR_REPORTER);
-
- fragmentContextStack_ = (QName[]) manager.getProperty(FRAGMENT_CONTEXT_STACK);
-
- } // reset(XMLComponentManager)
-
- /** Sets a feature. */
- public void setFeature(String featureId, boolean state)
- throws XMLConfigurationException {
-
- if (featureId.equals(AUGMENTATIONS)) {
- fAugmentations = state;
- return;
- }
- if (featureId.equals(REPORT_ERRORS)) {
- fReportErrors = state;
- return;
- }
- if (featureId.equals(IGNORE_OUTSIDE_CONTENT)) {
- fIgnoreOutsideContent = state;
- return;
- }
-
- } // setFeature(String,boolean)
-
- /** Sets a property. */
- public void setProperty(String propertyId, Object value)
- throws XMLConfigurationException {
-
- if (propertyId.equals(NAMES_ELEMS)) {
- fNamesElems = getNamesValue(String.valueOf(value));
- return;
- }
-
- if (propertyId.equals(NAMES_ATTRS)) {
- fNamesAttrs = getNamesValue(String.valueOf(value));
- return;
- }
-
- } // setProperty(String,Object)
-
- //
- // XMLDocumentSource methods
- //
-
- /** Sets the document handler. */
- public void setDocumentHandler(XMLDocumentHandler handler) {
- fDocumentHandler = handler;
- } // setDocumentHandler(XMLDocumentHandler)
-
- // @since Xerces 2.1.0
-
- /** Returns the document handler. */
- public XMLDocumentHandler getDocumentHandler() {
- return fDocumentHandler;
- } // getDocumentHandler():XMLDocumentHandler
-
- //
- // XMLDocumentHandler methods
- //
-
- // since Xerces-J 2.2.0
-
- /** Start document. */
- public void startDocument(XMLLocator locator, String encoding,
- NamespaceContext nscontext, Augmentations augs)
- throws XNIException {
-
- // reset state
- fElementStack.top = 0;
- if (fragmentContextStack_ != null) {
- fragmentContextStackSize_ = fragmentContextStack_.length;
- for (int i=0; i<fragmentContextStack_.length; ++i) {
- final QName name = fragmentContextStack_[i];
- final Element elt = HTMLElements.getElement(name.localpart);
- fElementStack.push(new Info(elt, name));
- }
-
- }
- else {
- fragmentContextStackSize_ = 0;
- }
- fSeenAnything = false;
- fSeenDoctype = false;
- fSeenRootElement = false;
- fSeenRootElementEnd = false;
- fSeenHeadElement = false;
- fSeenBodyElement = false;
-
-
- // pass on event
- if (fDocumentHandler != null) {
- XercesBridge.getInstance().XMLDocumentHandler_startDocument(fDocumentHandler, locator, encoding, nscontext, augs);
- }
-
- } // startDocument(XMLLocator,String,Augmentations)
-
- // old methods
-
- /** XML declaration. */
- public void xmlDecl(String version, String encoding, String standalone,
- Augmentations augs) throws XNIException {
- if (!fSeenAnything && fDocumentHandler != null) {
- fDocumentHandler.xmlDecl(version, encoding, standalone, augs);
- }
- } // xmlDecl(String,String,String,Augmentations)
-
- /** Doctype declaration. */
- public void doctypeDecl(String rootElementName, String publicId, String systemId,
- Augmentations augs) throws XNIException {
- fSeenAnything = true;
- if (fReportErrors) {
- if (fSeenRootElement) {
- fErrorReporter.reportError("HTML2010", null);
- }
- else if (fSeenDoctype) {
- fErrorReporter.reportError("HTML2011", null);
- }
- }
- if (!fSeenRootElement && !fSeenDoctype) {
- fSeenDoctype = true;
- if (fDocumentHandler != null) {
- fDocumentHandler.doctypeDecl(rootElementName, publicId, systemId, augs);
- }
- }
- } // doctypeDecl(String,String,String,Augmentations)
-
- /** End document. */
- public void endDocument(Augmentations augs) throws XNIException {
-
- // </body> and </html> have been buffered to consider outside content
- fIgnoreOutsideContent = true; // endElement should not ignore the elements passed from buffer
- consumeBufferedEndElements();
-
- // handle empty document
- if (!fSeenRootElement && !fDocumentFragment) {
- if (fReportErrors) {
- fErrorReporter.reportError("HTML2000", null);
- }
- if (fDocumentHandler != null) {
- fSeenRootElementEnd = false;
- forceStartBody(); // will force <html> and <head></head>
- final String body = modifyName("body", fNamesElems);
- fQName.setValues(null, body, body, null);
- callEndElement(fQName, synthesizedAugs());
-
- final String ename = modifyName("html", fNamesElems);
- fQName.setValues(null, ename, ename, null);
- callEndElement(fQName, synthesizedAugs());
- }
- }
-
- // pop all remaining elements
- else {
- int length = fElementStack.top - fragmentContextStackSize_;
- for (int i = 0; i < length; i++) {
- Info info = fElementStack.pop();
- if (fReportErrors) {
- String ename = info.qname.rawname;
- fErrorReporter.reportWarning("HTML2001", new Object[]{ename});
- }
- if (fDocumentHandler != null) {
- callEndElement(info.qname, synthesizedAugs());
- }
- }
- }
-
- // call handler
- if (fDocumentHandler != null) {
- fDocumentHandler.endDocument(augs);
- }
-
- } // endDocument(Augmentations)
-
- /**
- * Consume elements that have been buffered, like </body></html> that are first consumed
- * at the end of document
- */
- private void consumeBufferedEndElements() {
- final List toConsume = new ArrayList(endElementsBuffer_);
- endElementsBuffer_.clear();
- for (int i=0; i<toConsume.size(); ++i) {
- final ElementEntry entry = (ElementEntry) toConsume.get(i);
- forcedEndElement_ = true;
- endElement(entry.name_, entry.augs_);
- }
- endElementsBuffer_.clear();
- }
-
- /** Comment. */
- public void comment(XMLString text, Augmentations augs) throws XNIException {
- fSeenAnything = true;
- consumeEarlyTextIfNeeded();
- if (fDocumentHandler != null) {
- fDocumentHandler.comment(text, augs);
- }
- } // comment(XMLString,Augmentations)
-
- private void consumeEarlyTextIfNeeded() {
- if (!lostText_.isEmpty()) {
- if (!fSeenBodyElement) {
- forceStartBody();
- }
- lostText_.refeed(this);
- }
- }
-
- /** Processing instruction. */
- public void processingInstruction(String target, XMLString data,
- Augmentations augs) throws XNIException {
- fSeenAnything = true;
- consumeEarlyTextIfNeeded();
- if (fDocumentHandler != null) {
- fDocumentHandler.processingInstruction(target, data, augs);
- }
- } // processingInstruction(String,XMLString,Augmentations)
-
- /** Start element. */
- public void startElement(final QName elem, XMLAttributes attrs, final Augmentations augs)
- throws XNIException {
- fSeenAnything = true;
-
- final boolean isForcedCreation = forcedStartElement_;
- forcedStartElement_ = false;
-
- // check for end of document
- if (fSeenRootElementEnd) {
- notifyDiscardedStartElement(elem, attrs, augs);
- return;
- }
-
- // get element information
- final HTMLElements.Element element = getElement(elem);
- final short elementCode = element.code;
-
- // the creation of some elements like TABLE or SELECT can't be forced. Any others?
- if (isForcedCreation && (elementCode == HTMLElements.TABLE || elementCode == HTMLElements.SELECT)) {
- return; // don't accept creation
- }
-
- // ignore multiple html, head, body elements
- if (fSeenRootElement && elementCode == HTMLElements.HTML) {
- notifyDiscardedStartElement(elem, attrs, augs);
- return;
- }
- if (elementCode == HTMLElements.HEAD) {
- if (fSeenHeadElement) {
- notifyDiscardedStartElement(elem, attrs, augs);
- return;
- }
- fSeenHeadElement = true;
- }
- else if (elementCode == HTMLElements.FRAMESET) {
- consumeBufferedEndElements(); // </head> (if any) has been buffered
- }
- else if (elementCode == HTMLElements.BODY) {
- // create <head></head> if none was present
- if (!fSeenHeadElement) {
- final QName head = createQName("head");
- forceStartElement(head, null, synthesizedAugs());
- endElement(head, synthesizedAugs());
- }
- consumeBufferedEndElements(); // </head> (if any) has been buffered
-
- if (fSeenBodyElement) {
- notifyDiscardedStartElement(elem, attrs, augs);
- return;
- }
- fSeenBodyElement = true;
- }
- else if (elementCode == HTMLElements.FORM) {
- if (fOpenedForm) {
- notifyDiscardedStartElement(elem, attrs, augs);
- return;
- }
- fOpenedForm = true;
- }
- else if (elementCode == HTMLElements.UNKNOWN) {
- consumeBufferedEndElements();
- }
-
- // check proper parent
- if (element.parent != null) {
- if (!fSeenRootElement && !fDocumentFragment) {
- String pname = element.parent[0].name;
- pname = modifyName(pname, fNamesElems);
- if (fReportErrors) {
- String ename = elem.rawname;
- fErrorReporter.reportWarning("HTML2002", new Object[]{ename,pname});
- }
- final QName qname = new QName(null, pname, pname, null);
- final boolean parentCreated = forceStartElement(qname, null, synthesizedAugs());
- if (!parentCreated) {
- if (!isForcedCreation) {
- notifyDiscardedStartElement(elem, attrs, augs);
- }
- return;
- }
- }
- else {
- HTMLElements.Element preferedParent = element.parent[0];
- if (preferedParent.code != HTMLElements.HEAD || (!fSeenBodyElement && !fDocumentFragment)) {
- int depth = getParentDepth(element.parent, element.bounds);
- if (depth == -1) { // no parent found
- final String pname = modifyName(preferedParent.name, fNamesElems);
- final QName qname = new QName(null, pname, pname, null);
- if (fReportErrors) {
- String ename = elem.rawname;
- fErrorReporter.reportWarning("HTML2004", new Object[]{ename,pname});
- }
- final boolean parentCreated = forceStartElement(qname, null, synthesizedAugs());
- if (!parentCreated) {
- if (!isForcedCreation) {
- notifyDiscardedStartElement(elem, attrs, augs);
- }
- return;
- }
- }
- }
- }
- }
-
- // if block element, save immediate parent inline elements
- int depth = 0;
- if (element.flags == 0) {
- int length = fElementStack.top;
- fInlineStack.top = 0;
- for (int i = length - 1; i >= 0; i--) {
- Info info = fElementStack.data[i];
- if (!info.element.isInline()) {
- break;
- }
- fInlineStack.push(info);
- endElement(info.qname, synthesizedAugs());
- }
- depth = fInlineStack.top;
- }
-
- // close previous elements
- // all elements close a <script>
- // in head, no element has children
- if ((fElementStack.top > 1
- && (fElementStack.peek().element.code == HTMLElements.SCRIPT))
- || fElementStack.top > 2 && fElementStack.data[fElementStack.top-2].element.code == HTMLElements.HEAD) {
- final Info info = fElementStack.pop();
- if (fDocumentHandler != null) {
- callEndElement(info.qname, synthesizedAugs());
- }
- }
- if (element.closes != null) {
- int length = fElementStack.top;
- for (int i = length - 1; i >= 0; i--) {
- Info info = fElementStack.data[i];
-
- // does it close the element we're looking at?
- if (element.closes(info.element.code)) {
- if (fReportErrors) {
- String ename = elem.rawname;
- String iname = info.qname.rawname;
- fErrorReporter.reportWarning("HTML2005", new Object[]{ename,iname});
- }
- for (int j = length - 1; j >= i; j--) {
- info = fElementStack.pop();
- if (fDocumentHandler != null) {
- // PATCH: Marc-Andr� Morissette
- callEndElement(info.qname, synthesizedAugs());
- }
- }
- length = i;
- continue;
- }
-
- // should we stop searching?
- if(element.nestable) {
- if (info.element.isBlock() || element.isParent(info.element)) {
- break;
- }
- }
- }
- }
- // TODO: investigate if only table is special here
- // table closes all opened inline elements
- else if (elementCode == HTMLElements.TABLE) {
- for (int i=fElementStack.top-1; i >= 0; i--) {
- final Info info = fElementStack.data[i];
- if (!info.element.isInline()) {
- break;
- }
- endElement(info.qname, synthesizedAugs());
- }
- }
-
- // call handler
- fSeenRootElement = true;
- if (element != null && element.isEmpty()) {
- if (attrs == null) {
- attrs = emptyAttributes();
- }
- if (fDocumentHandler != null) {
- fDocumentHandler.emptyElement(elem, attrs, augs);
- }
- }
- else {
- boolean inline = element != null && element.isInline();
- fElementStack.push(new Info(element, elem, inline ? attrs : null));
- if (attrs == null) {
- attrs = emptyAttributes();
- }
- if (fDocumentHandler != null) {
- callStartElement(elem, attrs, augs);
- }
- }
-
- // re-open inline elements
- for (int i = 0; i < depth; i++) {
- Info info = fInlineStack.pop();
- forceStartElement(info.qname, info.attributes, synthesizedAugs());
- }
-
- if (elementCode == HTMLElements.BODY) {
- lostText_.refeed(this);
- }
- } // startElement(QName,XMLAttributes,Augmentations)
-
- /**
- * Forces an element start, taking care to set the information to allow startElement to "see" that's
- * the element has been forced.
- * @return <code>true</code> if creation could be done (TABLE's creation for instance can't be forced)
- */
- private boolean forceStartElement(final QName elem, XMLAttributes attrs, final Augmentations augs)
- throws XNIException {
-
- forcedStartElement_ = true;
- startElement(elem, attrs, augs);
-
- return fElementStack.top > 0 && elem.equals(fElementStack.peek().qname);
- }
-
- private QName createQName(String tagName) {
- tagName = modifyName(tagName, fNamesElems);
- return new QName(null, tagName, tagName, NamespaceBinder.XHTML_1_0_URI);
- }
-
- /** Empty element. */
- public void emptyElement(final QName element, XMLAttributes attrs, Augmentations augs)
- throws XNIException {
- startElement(element, attrs, augs);
- // browser ignore the closing indication for non empty tags like <form .../> but not for unknown element
- final HTMLElements.Element elem = getElement(element);
- if (elem.isEmpty() || elem.code == HTMLElements.UNKNOWN) {
- endElement(element, augs);
- }
- } // emptyElement(QName,XMLAttributes,Augmentations)
-
- /** Start entity. */
- public void startGeneralEntity(String name,
- XMLResourceIdentifier id,
- String encoding,
- Augmentations augs) throws XNIException {
- fSeenAnything = true;
-
- // check for end of document
- if (fSeenRootElementEnd) {
- return;
- }
-
- // insert body, if needed
- if (!fDocumentFragment) {
- boolean insertBody = !fSeenRootElement;
- if (!insertBody) {
- Info info = fElementStack.peek();
- if (info.element.code == HTMLElements.HEAD ||
- info.element.code == HTMLElements.HTML) {
- String hname = modifyName("head", fNamesElems);
- String bname = modifyName("body", fNamesElems);
- if (fReportErrors) {
- fErrorReporter.reportWarning("HTML2009", new Object[]{hname,bname});
- }
- fQName.setValues(null, hname, hname, null);
- endElement(fQName, synthesizedAugs());
- insertBody = true;
- }
- }
- if (insertBody) {
- forceStartBody();
- }
- }
-
- // call handler
- if (fDocumentHandler != null) {
- fDocumentHandler.startGeneralEntity(name, id, encoding, augs);
- }
-
- } // startGeneralEntity(String,XMLResourceIdentifier,String,Augmentations)
-
- /**
- * Generates a missing <body> (which creates missing <head> when needed)
- */
- private void forceStartBody() {
- final QName body = createQName("body");
- if (fReportErrors) {
- fErrorReporter.reportWarning("HTML2006", new Object[]{body.localpart});
- }
- forceStartElement(body, null, synthesizedAugs());
- }
-
- /** Text declaration. */
- public void textDecl(String version, String encoding, Augmentations augs)
- throws XNIException {
- fSeenAnything = true;
-
- // check for end of document
- if (fSeenRootElementEnd) {
- return;
- }
-
- // call handler
- if (fDocumentHandler != null) {
- fDocumentHandler.textDecl(version, encoding, augs);
- }
-
- } // textDecl(String,String,Augmentations)
-
- /** End entity. */
- public void endGeneralEntity(String name, Augmentations augs) throws XNIException {
-
- // check for end of document
- if (fSeenRootElementEnd) {
- return;
- }
-
- // call handler
- if (fDocumentHandler != null) {
- fDocumentHandler.endGeneralEntity(name, augs);
- }
-
- } // endGeneralEntity(String,Augmentations)
-
- /** Start CDATA section. */
- public void startCDATA(Augmentations augs) throws XNIException {
- fSeenAnything = true;
-
- consumeEarlyTextIfNeeded();
-
- // check for end of document
- if (fSeenRootElementEnd) {
- return;
- }
-
- // call handler
- if (fDocumentHandler != null) {
- fDocumentHandler.startCDATA(augs);
- }
-
- } // startCDATA(Augmentations)
-
- /** End CDATA section. */
- public void endCDATA(Augmentations augs) throws XNIException {
-
- // check for end of document
- if (fSeenRootElementEnd) {
- return;
- }
-
- // call handler
- if (fDocumentHandler != null) {
- fDocumentHandler.endCDATA(augs);
- }
-
- } // endCDATA(Augmentations)
-
- /** Characters. */
- public void characters(final XMLString text, final Augmentations augs) throws XNIException {
- // check for end of document
- if (fSeenRootElementEnd) {
- return;
- }
-
- if (fElementStack.top == 0 && !fDocumentFragment) {
- // character before first opening tag
- lostText_.add(text, augs);
- return;
- }
-
- // is this text whitespace?
- boolean whitespace = true;
- for (int i = 0; i < text.length; i++) {
- if (!Character.isWhitespace(text.ch[text.offset + i])) {
- whitespace = false;
- break;
- }
- }
-
- if (!fDocumentFragment) {
- // handle bare characters
- if (!fSeenRootElement) {
- if (whitespace) {
- return;
- }
- forceStartBody();
- }
-
- if (whitespace && (fElementStack.top < 2 || endElementsBuffer_.size() == 1)) {
- // ignore spaces directly within <html>
- return;
- }
-
- // handle character content in head
- // NOTE: This frequently happens when the document looks like:
- // <title>Title</title>
- // And here's some text.
- else if (!whitespace) {
- Info info = fElementStack.peek();
- if (info.element.code == HTMLElements.HEAD ||
- info.element.code == HTMLElements.HTML) {
- String hname = modifyName("head", fNamesElems);
- String bname = modifyName("body", fNamesElems);
- if (fReportErrors) {
- fErrorReporter.reportWarning("HTML2009", new Object[]{hname,bname});
- }
- forceStartBody();
- }
- }
- }
-
- // call handler
- if (fDocumentHandler != null) {
- fDocumentHandler.characters(text, augs);
- }
-
- } // characters(XMLString,Augmentations)
-
- /** Ignorable whitespace. */
- public void ignorableWhitespace(XMLString text, Augmentations augs)
- throws XNIException {
- characters(text, augs);
- } // ignorableWhitespace(XMLString,Augmentations)
-
- /** End element. */
- public void endElement(final QName element, final Augmentations augs) throws XNIException {
- final boolean forcedEndElement = forcedEndElement_;
- // is there anything to do?
- if (fSeenRootElementEnd) {
- notifyDiscardedEndElement(element, augs);
- return;
- }
-
- // get element information
- HTMLElements.Element elem = getElement(element);
-
- // if we consider outside content, just buffer </body> and </html> to consider them at the very end
- if (!fIgnoreOutsideContent &&
- (elem.code == HTMLElements.BODY || elem.code == HTMLElements.HTML)) {
- endElementsBuffer_.add(new ElementEntry(element, augs));
- return;
- }
-
- // check for end of document
- if (elem.code == HTMLElements.HTML) {
- fSeenRootElementEnd = true;
- }
- else if (elem.code == HTMLElements.FORM) {
- fOpenedForm = false;
- }
- else if (elem.code == HTMLElements.HEAD && !forcedEndElement) {
- // consume </head> first when <body> is reached to retrieve content lost between </head> and <body>
- endElementsBuffer_.add(new ElementEntry(element, augs));
- return;
- }
-
-
- // empty element
- int depth = getElementDepth(elem);
- if (depth == -1) {
- if (elem.code == HTMLElements.P) {
- forceStartElement(element, emptyAttributes(), synthesizedAugs());
- endElement(element, augs);
- }
- else if (!elem.isEmpty()) {
- notifyDiscardedEndElement(element, augs);
- }
- return;
- }
-
- // find unbalanced inline elements
- if (depth > 1 && elem.isInline()) {
- final int size = fElementStack.top;
- fInlineStack.top = 0;
- for (int i = 0; i < depth - 1; i++) {
- final Info info = fElementStack.data[size - i - 1];
- final HTMLElements.Element pelem = info.element;
-
- if (pelem.isInline() || pelem.code == HTMLElements.FONT) { // TODO: investigate if only FONT
- // NOTE: I don't have to make a copy of the info because
- // it will just be popped off of the element stack
- // as soon as we close it, anyway.
- fInlineStack.push(info);
- }
- }
- }
-
- // close children up to appropriate element
- for (int i = 0; i < depth; i++) {
- Info info = fElementStack.pop();
-
- if (fReportErrors && i < depth - 1) {
- String ename = modifyName(element.rawname, fNamesElems);
- String iname = info.qname.rawname;
- fErrorReporter.reportWarning("HTML2007", new Object[]{ename,iname});
- }
- if (fDocumentHandler != null) {
- // PATCH: Marc-Andr\u00e8 Morissette
- callEndElement(info.qname, i < depth - 1 ? synthesizedAugs() : augs);
- }
- }
-
- // re-open inline elements
- if (depth > 1) {
- int size = fInlineStack.top;
- for (int i = 0; i < size; i++) {
- Info info = (Info)fInlineStack.pop();
- XMLAttributes attributes = info.attributes;
- if (fReportErrors) {
- String iname = info.qname.rawname;
- fErrorReporter.reportWarning("HTML2008", new Object[]{iname});
- }
- forceStartElement(info.qname, attributes, synthesizedAugs());
- }
- }
-
- } // endElement(QName,Augmentations)
-
- // @since Xerces 2.1.0
-
- /** Sets the document source. */
- public void setDocumentSource(XMLDocumentSource source) {
- fDocumentSource = source;
- } // setDocumentSource(XMLDocumentSource)
-
- /** Returns the document source. */
- public XMLDocumentSource getDocumentSource() {
- return fDocumentSource;
- } // getDocumentSource():XMLDocumentSource
-
- // removed since Xerces-J 2.3.0
-
- /** Start document. */
- public void startDocument(XMLLocator locator, String encoding, Augmentations augs)
- throws XNIException {
- startDocument(locator, encoding, null, augs);
- } // startDocument(XMLLocator,String,Augmentations)
-
- /** Start prefix mapping. */
- public void startPrefixMapping(String prefix, String uri, Augmentations augs)
- throws XNIException {
-
- // check for end of document
- if (fSeenRootElementEnd) {
- return;
- }
-
- // call handler
- if (fDocumentHandler != null) {
- XercesBridge.getInstance().XMLDocumentHandler_startPrefixMapping(fDocumentHandler, prefix, uri, augs);
- }
-
- } // startPrefixMapping(String,String,Augmentations)
-
- /** End prefix mapping. */
- public void endPrefixMapping(String prefix, Augmentations augs)
- throws XNIException {
-
- // check for end of document
- if (fSeenRootElementEnd) {
- return;
- }
-
- // call handler
- if (fDocumentHandler != null) {
- XercesBridge.getInstance().XMLDocumentHandler_endPrefixMapping(fDocumentHandler, prefix, augs);
- }
-
- } // endPrefixMapping(String,Augmentations)
-
- //
- // Protected methods
- //
-
- /** Returns an HTML element. */
- protected HTMLElements.Element getElement(final QName elementName) {
- String name = elementName.rawname;
- if (fNamespaces && NamespaceBinder.XHTML_1_0_URI.equals(elementName.uri)) {
- int index = name.indexOf(':');
- if (index != -1) {
- name = name.substring(index+1);
- }
- }
- return HTMLElements.getElement(name);
- } // getElement(String):HTMLElements.Element
-
- /** Call document handler start element. */
- protected final void callStartElement(QName element, XMLAttributes attrs,
- Augmentations augs)
- throws XNIException {
- fDocumentHandler.startElement(element, attrs, augs);
- } // callStartElement(QName,XMLAttributes,Augmentations)
-
- /** Call document handler end element. */
- protected final void callEndElement(QName element, Augmentations augs)
- throws XNIException {
- fDocumentHandler.endElement(element, augs);
- } // callEndElement(QName,Augmentations)
-
- /**
- * Returns the depth of the open tag associated with the specified
- * element name or -1 if no matching element is found.
- *
- * @param element The element.
- */
- protected final int getElementDepth(HTMLElements.Element element) {
- final boolean container = element.isContainer();
- int depth = -1;
- for (int i = fElementStack.top - 1; i >=fragmentContextStackSize_; i--) {
- Info info = fElementStack.data[i];
- if (info.element.code == element.code) {
- depth = fElementStack.top - i;
- break;
- }
- if (!container && (element.nestable && info.element.isBlock())) {
- break;
- }
- }
- return depth;
- } // getElementDepth(HTMLElements.Element)
-
- /**
- * Returns the depth of the open tag associated with the specified
- * element parent names or -1 if no matching element is found.
- *
- * @param parents The parent elements.
- */
- protected int getParentDepth(HTMLElements.Element[] parents, short bounds) {
- if (parents != null) {
- for (int i = fElementStack.top - 1; i >= 0; i--) {
- Info info = fElementStack.data[i];
- if (info.element.code == bounds) {
- break;
- }
- for (int j = 0; j < parents.length; j++) {
- if (info.element.code == parents[j].code) {
- return fElementStack.top - i;
- }
- }
- }
- }
- return -1;
- } // getParentDepth(HTMLElements.Element[],short):int
-
- /** Returns a set of empty attributes. */
- protected final XMLAttributes emptyAttributes() {
- fEmptyAttrs.removeAllAttributes();
- return fEmptyAttrs;
- } // emptyAttributes():XMLAttributes
-
- /** Returns an augmentations object with a synthesized item added. */
- protected final Augmentations synthesizedAugs() {
- HTMLAugmentations augs = null;
- if (fAugmentations) {
- augs = fInfosetAugs;
- augs.removeAllItems();
- augs.putItem(AUGMENTATIONS, SYNTHESIZED_ITEM);
- }
- return augs;
- } // synthesizedAugs():Augmentations
-
- //
- // Protected static methods
- //
-
- /** Modifies the given name based on the specified mode. */
- protected static final String modifyName(String name, short mode) {
- switch (mode) {
- case NAMES_UPPERCASE: return name.toUpperCase();
- case NAMES_LOWERCASE: return name.toLowerCase();
- }
- return name;
- } // modifyName(String,short):String
-
- /**
- * Converts HTML names string value to constant value.
- *
- * @see #NAMES_NO_CHANGE
- * @see #NAMES_LOWERCASE
- * @see #NAMES_UPPERCASE
- */
- protected static final short getNamesValue(String value) {
- if (value.equals("lower")) {
- return NAMES_LOWERCASE;
- }
- if (value.equals("upper")) {
- return NAMES_UPPERCASE;
- }
- return NAMES_NO_CHANGE;
- } // getNamesValue(String):short
-
- //
- // Classes
- //
-
- /**
- * Element info for each start element. This information is used when
- * closing unbalanced inline elements. For example:
- * <pre>
- * <i>unbalanced <b>HTML</i> content</b>
- * </pre>
- * <p>
- * It seems that it is a waste of processing and memory to copy the
- * attributes for every start element even if there are no unbalanced
- * inline elements in the document. However, if the attributes are
- * <em>not</em> saved, then important attributes such as style
- * information would be lost.
- *
- * @author Andy Clark
- */
- public static class Info {
-
- //
- // Data
- //
-
- /** The element. */
- public HTMLElements.Element element;
-
- /** The element qualified name. */
- public QName qname;
-
- /** The element attributes. */
- public XMLAttributes attributes;
-
- //
- // Constructors
- //
-
- /**
- * Creates an element information object.
- * <p>
- * <strong>Note:</strong>
- * This constructor makes a copy of the element information.
- *
- * @param element The element qualified name.
- */
- public Info(HTMLElements.Element element, QName qname) {
- this(element, qname, null);
- } // <init>(HTMLElements.Element,QName)
-
- /**
- * Creates an element information object.
- * <p>
- * <strong>Note:</strong>
- * This constructor makes a copy of the element information.
- *
- * @param element The element qualified name.
- * @param attributes The element attributes.
- */
- public Info(HTMLElements.Element element,
- QName qname, XMLAttributes attributes) {
- this.element = element;
- this.qname = new QName(qname);
- if (attributes != null) {
- int length = attributes.getLength();
- if (length > 0) {
- QName aqname = new QName();
- XMLAttributes newattrs = new XMLAttributesImpl();
- for (int i = 0; i < length; i++) {
- attributes.getName(i, aqname);
- String type = attributes.getType(i);
- String value = attributes.getValue(i);
- String nonNormalizedValue = attributes.getNonNormalizedValue(i);
- boolean specified = attributes.isSpecified(i);
- newattrs.addAttribute(aqname, type, value);
- newattrs.setNonNormalizedValue(i, nonNormalizedValue);
- newattrs.setSpecified(i, specified);
- }
- this.attributes = newattrs;
- }
- }
- } // <init>(HTMLElements.Element,QName,XMLAttributes)
-
- /**
- * Simple representation to make debugging easier
- */
- public String toString() {
- return super.toString() + qname;
- }
- } // class Info
-
- /** Unsynchronized stack of element information. */
- public static class InfoStack {
-
- //
- // Data
- //
-
- /** The top of the stack. */
- public int top;
-
- /** The stack data. */
- public Info[] data = new Info[10];
-
- //
- // Public methods
- //
-
- /** Pushes element information onto the stack. */
- public void push(Info info) {
- if (top == data.length) {
- Info[] newarray = new Info[top + 10];
- System.arraycopy(data, 0, newarray, 0, top);
- data = newarray;
- }
- data[top++] = info;
- } // push(Info)
-
- /** Peeks at the top of the stack. */
- public Info peek() {
- return data[top-1];
- } // peek():Info
-
- /** Pops the top item off of the stack. */
- public Info pop() {
- return data[--top];
- } // pop():Info
-
- /**
- * Simple representation to make debugging easier
- */
- public String toString() {
- final StringBuffer sb = new StringBuffer("InfoStack(");
- for (int i=top-1; i>=0; --i) {
- sb.append(data[i]);
- if (i != 0)
- sb.append(", ");
- }
- sb.append(")");
- return sb.toString();
- }
-
-
- } // class InfoStack
-
- void setTagBalancingListener(final HTMLTagBalancingListener tagBalancingListener) {
- this.tagBalancingListener = tagBalancingListener;
- }
-
- /**
- * Notifies the tagBalancingListener (if any) of an ignored start element
- */
- private void notifyDiscardedStartElement(final QName elem, final XMLAttributes attrs,
- final Augmentations augs) {
- if (tagBalancingListener != null)
- tagBalancingListener.ignoredStartElement(elem, attrs, augs);
- }
-
- /**
- * Notifies the tagBalancingListener (if any) of an ignored end element
- */
- private void notifyDiscardedEndElement(final QName element, final Augmentations augs) {
- if (tagBalancingListener != null)
- tagBalancingListener.ignoredEndElement(element, augs);
- }
-
- /**
- * Structure to hold information about an element placed in buffer to be comsumed later
- */
- static class ElementEntry {
- private final QName name_;
- private final Augmentations augs_;
- ElementEntry(final QName element, final Augmentations augs) {
- name_ = new QName(element);
- augs_ = (augs == null) ? null : new HTMLAugmentations(augs);
- }
- }
-} // class HTMLTagBalancer
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。