7 Star 0 Fork 5

src-openEuler/boilerpipe

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
boilerpipe-1.2.0-nekohtml-patch 81.34 KB
一键复制 编辑 原始数据 按行查看 历史
small_leek 提交于 5年前 . package init
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228
diff -Nru boilerpipe-1.2.0/pom.xml boilerpipe-1.2.0-gil/pom.xml
--- boilerpipe-1.2.0/pom.xml 2013-10-11 11:54:23.418310128 +0200
+++ boilerpipe-1.2.0-gil/pom.xml 2013-10-11 11:51:51.334701196 +0200
@@ -32,4 +32,13 @@
<name>Christian Kohlschütter</name>
</developer>
</developers>
+
+ <dependencies>
+ <dependency>
+ <groupId>net.sourceforge.nekohtml</groupId>
+ <artifactId>nekohtml</artifactId>
+ <version>1.9.14</version>
+ </dependency>
+ </dependencies>
+
</project>
diff -Nru boilerpipe-1.2.0/src/main/org/cyberneko/html/HTMLElements.java boilerpipe-1.2.0-gil/src/main/org/cyberneko/html/HTMLElements.java
--- boilerpipe-1.2.0/src/main/org/cyberneko/html/HTMLElements.java 2010-12-16 11:30:06.000000000 +0100
+++ boilerpipe-1.2.0-gil/src/main/org/cyberneko/html/HTMLElements.java 1970-01-01 01:00:00.000000000 +0100
@@ -1,794 +0,0 @@
-/*
- * Copyright 2002-2009 Andy Clark, Marc Guillemot
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.cyberneko.html;
-
-/**
- * Collection of HTML element information.
- *
- * @author Andy Clark
- * @author Ahmed Ashour
- * @author Marc Guillemot
- *
- * @version $Id: HTMLElements.java,v 1.12 2005/02/14 07:16:59 andyc Exp $
- */
-public class HTMLElements {
-
- //
- // Constants
- //
-
- // element codes
-
- // NOTE: The element codes *must* start with 0 and increment in
- // sequence. The parent and closes references depends on
- // this assumption. -Ac
-
- public static final short A = 0;
- public static final short ABBR = A+1;
- public static final short ACRONYM = ABBR+1;
- public static final short ADDRESS = ACRONYM+1;
- public static final short APPLET = ADDRESS+1;
- public static final short AREA = APPLET+1;
- public static final short B = AREA+1;
- public static final short BASE = B+1;
- public static final short BASEFONT = BASE+1;
- public static final short BDO = BASEFONT+1;
- public static final short BGSOUND = BDO+1;
- public static final short BIG = BGSOUND+1;
- public static final short BLINK = BIG+1;
- public static final short BLOCKQUOTE = BLINK+1;
- public static final short BODY = BLOCKQUOTE+1;
- public static final short BR = BODY+1;
- public static final short BUTTON = BR+1;
- public static final short CAPTION = BUTTON+1;
- public static final short CENTER = CAPTION+1;
- public static final short CITE = CENTER+1;
- public static final short CODE = CITE+1;
- public static final short COL = CODE+1;
- public static final short COLGROUP = COL+1;
- public static final short COMMENT = COLGROUP+1;
- public static final short DEL = COMMENT+1;
- public static final short DFN = DEL+1;
- public static final short DIR = DFN+1;
- public static final short DIV = DIR+1;
- public static final short DD = DIV+1;
- public static final short DL = DD+1;
- public static final short DT = DL+1;
- public static final short EM = DT+1;
- public static final short EMBED = EM+1;
- public static final short FIELDSET = EMBED+1;
- public static final short FONT = FIELDSET+1;
- public static final short FORM = FONT+1;
- public static final short FRAME = FORM+1;
- public static final short FRAMESET = FRAME+1;
- public static final short H1 = FRAMESET+1;
- public static final short H2 = H1+1;
- public static final short H3 = H2+1;
- public static final short H4 = H3+1;
- public static final short H5 = H4+1;
- public static final short H6 = H5+1;
- public static final short HEAD = H6+1;
- public static final short HR = HEAD+1;
- public static final short HTML = HR+1;
- public static final short I = HTML+1;
- public static final short IFRAME = I+1;
- public static final short ILAYER = IFRAME+1;
- public static final short IMG = ILAYER+1;
- public static final short INPUT = IMG+1;
- public static final short INS = INPUT+1;
- public static final short ISINDEX = INS+1;
- public static final short KBD = ISINDEX+1;
- public static final short KEYGEN = KBD+1;
- public static final short LABEL = KEYGEN+1;
- public static final short LAYER = LABEL+1;
- public static final short LEGEND = LAYER+1;
- public static final short LI = LEGEND+1;
- public static final short LINK = LI+1;
- public static final short LISTING = LINK+1;
- public static final short MAP = LISTING+1;
- public static final short MARQUEE = MAP+1;
- public static final short MENU = MARQUEE+1;
- public static final short META = MENU+1;
- public static final short MULTICOL = META+1;
- public static final short NEXTID = MULTICOL+1;
- public static final short NOBR = NEXTID+1;
- public static final short NOEMBED = NOBR+1;
- public static final short NOFRAMES = NOEMBED+1;
- public static final short NOLAYER = NOFRAMES+1;
- public static final short NOSCRIPT = NOLAYER+1;
- public static final short OBJECT = NOSCRIPT+1;
- public static final short OL = OBJECT+1;
- public static final short OPTION = OL+1;
- public static final short OPTGROUP = OPTION+1;
- public static final short P = OPTGROUP+1;
- public static final short PARAM = P+1;
- public static final short PLAINTEXT = PARAM+1;
- public static final short PRE = PLAINTEXT+1;
- public static final short Q = PRE+1;
- public static final short RB = Q+1;
- public static final short RBC = RB+1;
- public static final short RP = RBC+1;
- public static final short RT = RP+1;
- public static final short RTC = RT+1;
- public static final short RUBY = RTC+1;
- public static final short S = RUBY+1;
- public static final short SAMP = S+1;
- public static final short SCRIPT = SAMP+1;
- public static final short SELECT = SCRIPT+1;
- public static final short SMALL = SELECT+1;
- public static final short SOUND = SMALL+1;
- public static final short SPACER = SOUND+1;
- public static final short SPAN = SPACER+1;
- public static final short STRIKE = SPAN+1;
- public static final short STRONG = STRIKE+1;
- public static final short STYLE = STRONG+1;
- public static final short SUB = STYLE+1;
- public static final short SUP = SUB+1;
- public static final short TABLE = SUP+1;
- public static final short TBODY = TABLE+1;
- public static final short TD = TBODY+1;
- public static final short TEXTAREA = TD+1;
- public static final short TFOOT = TEXTAREA+1;
- public static final short TH = TFOOT+1;
- public static final short THEAD = TH+1;
- public static final short TITLE = THEAD+1;
- public static final short TR = TITLE+1;
- public static final short TT = TR+1;
- public static final short U = TT+1;
- public static final short UL = U+1;
- public static final short VAR = UL+1;
- public static final short WBR = VAR+1;
- public static final short XML = WBR+1;
- public static final short XMP = XML+1;
- public static final short UNKNOWN = XMP+1;
-
- // information
-
- /** Element information organized by first letter. */
- protected static final Element[][] ELEMENTS_ARRAY = new Element[26][];
-
- /** Element information as a contiguous list. */
- protected static final ElementList ELEMENTS = new ElementList();
-
- /** No such element. */
- public static final Element NO_SUCH_ELEMENT = new Element(UNKNOWN, "", Element.CONTAINER, new short[]{BODY,HEAD}/*HTML*/, null);
-
- //
- // Static initializer
- //
-
- /**
- * Initializes the element information.
- * <p>
- * <strong>Note:</strong>
- * The <code>getElement</code> method requires that the HTML elements
- * are added to the list in alphabetical order. If new elements are
- * added, then they <em>must</em> be inserted in alphabetical order.
- */
- static {
- // <!ENTITY % heading "H1|H2|H3|H4|H5|H6">
- // <!ENTITY % fontstyle "TT | I | B | BIG | SMALL">
- // <!ENTITY % phrase "EM | STRONG | DFN | CODE | SAMP | KBD | VAR | CITE | ABBR | ACRONYM" >
- // <!ENTITY % special "A | IMG | OBJECT | BR | SCRIPT | MAP | Q | SUB | SUP | SPAN | BDO">
- // <!ENTITY % formctrl "INPUT | SELECT | TEXTAREA | LABEL | BUTTON">
- // <!ENTITY % inline "#PCDATA | %fontstyle; | %phrase; | %special; | %formctrl;">
- // <!ENTITY % block "P | %heading; | %list; | %preformatted; | DL | DIV | NOSCRIPT | BLOCKQUOTE | FORM | HR | TABLE | FIELDSET | ADDRESS">
- // <!ENTITY % flow "%block; | %inline;">
-
- // initialize array of element information
- ELEMENTS_ARRAY['A'-'A'] = new Element[] {
- // A - - (%inline;)* -(A)
- new Element(A, "A", Element.INLINE, BODY, new short[] {A}),
- // ABBR - - (%inline;)*
- new Element(ABBR, "ABBR", Element.INLINE, BODY, null),
- // ACRONYM - - (%inline;)*
- new Element(ACRONYM, "ACRONYM", Element.INLINE, BODY, null),
- // ADDRESS - - (%inline;)*
- new Element(ADDRESS, "ADDRESS", Element.BLOCK, BODY, null),
- // APPLET
- new Element(APPLET, "APPLET", 0, BODY, null),
- // AREA - O EMPTY
- new Element(AREA, "AREA", Element.EMPTY, MAP, null),
- };
- ELEMENTS_ARRAY['B'-'A'] = new Element[] {
- // B - - (%inline;)*
- new Element(B, "B", Element.INLINE, BODY, null),
- // BASE - O EMPTY
- new Element(BASE, "BASE", Element.EMPTY, HEAD, null),
- // BASEFONT
- new Element(BASEFONT, "BASEFONT", 0, HEAD, null),
- // BDO - - (%inline;)*
- new Element(BDO, "BDO", Element.INLINE, BODY, null),
- // BGSOUND
- new Element(BGSOUND, "BGSOUND", Element.EMPTY, HEAD, null),
- // BIG - - (%inline;)*
- new Element(BIG, "BIG", Element.INLINE, BODY, null),
- // BLINK
- new Element(BLINK, "BLINK", Element.INLINE, BODY, null),
- // BLOCKQUOTE - - (%block;|SCRIPT)+
- new Element(BLOCKQUOTE, "BLOCKQUOTE", Element.BLOCK, BODY, new short[]{P}),
- // BODY O O (%block;|SCRIPT)+ +(INS|DEL)
- new Element(BODY, "BODY", Element.CONTAINER, HTML, new short[]{HEAD}),
- // BR - O EMPTY
- new Element(BR, "BR", Element.EMPTY, BODY, null),
- // BUTTON - - (%flow;)* -(A|%formctrl;|FORM|FIELDSET)
- new Element(BUTTON, "BUTTON", 0, BODY, null),
- };
- ELEMENTS_ARRAY['C'-'A'] = new Element[] {
- // CAPTION - - (%inline;)*
- new Element(CAPTION, "CAPTION", Element.INLINE, TABLE, null),
- // CENTER,
- new Element(CENTER, "CENTER", 0, BODY, null),
- // CITE - - (%inline;)*
- new Element(CITE, "CITE", Element.INLINE, BODY, null),
- // CODE - - (%inline;)*
- new Element(CODE, "CODE", Element.INLINE, BODY, null),
- // COL - O EMPTY
- new Element(COL, "COL", Element.EMPTY, TABLE, null),
- // COLGROUP - O (COL)*
- new Element(COLGROUP, "COLGROUP", 0, TABLE, new short[]{COL,COLGROUP}),
- // COMMENT
- new Element(COMMENT, "COMMENT", Element.SPECIAL, HTML, null),
- };
- ELEMENTS_ARRAY['D'-'A'] = new Element[] {
- // DEL - - (%flow;)*
- new Element(DEL, "DEL", 0, BODY, null),
- // DFN - - (%inline;)*
- new Element(DFN, "DFN", Element.INLINE, BODY, null),
- // DIR
- new Element(DIR, "DIR", 0, BODY, null),
- // DIV - - (%flow;)*
- new Element(DIV, "DIV", Element.BLOCK, BODY, new short[]{P}),
- // DD - O (%flow;)*
- new Element(DD, "DD", 0, DL, new short[]{DT,DD}),
- // DL - - (DT|DD)+
- new Element(DL, "DL", Element.BLOCK, BODY, null),
- // DT - O (%inline;)*
- new Element(DT, "DT", 0, DL, new short[]{DT,DD}),
- };
- ELEMENTS_ARRAY['E'-'A'] = new Element[] {
- // EM - - (%inline;)*
- new Element(EM, "EM", Element.INLINE, BODY, null),
- // EMBED
- new Element(EMBED, "EMBED", 0, BODY, null),
- };
- ELEMENTS_ARRAY['F'-'A'] = new Element[] {
- // FIELDSET - - (#PCDATA,LEGEND,(%flow;)*)
- new Element(FIELDSET, "FIELDSET", 0, BODY, null),
- // FONT
- new Element(FONT, "FONT", Element.CONTAINER, BODY, null),
- // FORM - - (%block;|SCRIPT)+ -(FORM)
- new Element(FORM, "FORM", Element.CONTAINER, new short[]{BODY,TD,DIV}, new short[]{BUTTON,P}),
- // FRAME - O EMPTY
- new Element(FRAME, "FRAME", Element.EMPTY, FRAMESET, null),
- // FRAMESET - - ((FRAMESET|FRAME)+ & NOFRAMES?)
- new Element(FRAMESET, "FRAMESET", 0, HTML, null),
- };
- ELEMENTS_ARRAY['H'-'A'] = new Element[] {
- // (H1|H2|H3|H4|H5|H6) - - (%inline;)*
- new Element(H1, "H1", Element.BLOCK, new short[]{BODY,A}, new short[]{H1,H2,H3,H4,H5,H6,P}),
- new Element(H2, "H2", Element.BLOCK, new short[]{BODY,A}, new short[]{H1,H2,H3,H4,H5,H6,P}),
- new Element(H3, "H3", Element.BLOCK, new short[]{BODY,A}, new short[]{H1,H2,H3,H4,H5,H6,P}),
- new Element(H4, "H4", Element.BLOCK, new short[]{BODY,A}, new short[]{H1,H2,H3,H4,H5,H6,P}),
- new Element(H5, "H5", Element.BLOCK, new short[]{BODY,A}, new short[]{H1,H2,H3,H4,H5,H6,P}),
- new Element(H6, "H6", Element.BLOCK, new short[]{BODY,A}, new short[]{H1,H2,H3,H4,H5,H6,P}),
- // HEAD O O (%head.content;) +(%head.misc;)
- new Element(HEAD, "HEAD", 0, HTML, null),
- // HR - O EMPTY
- new Element(HR, "HR", Element.EMPTY, BODY, new short[]{P}),
- // HTML O O (%html.content;)
- new Element(HTML, "HTML", 0, null, null),
- };
- ELEMENTS_ARRAY['I'-'A'] = new Element[] {
- // I - - (%inline;)*
- new Element(I, "I", Element.INLINE, BODY, null),
- // IFRAME
- new Element(IFRAME, "IFRAME", Element.BLOCK, BODY, null),
- // ILAYER
- new Element(ILAYER, "ILAYER", Element.BLOCK, BODY, null),
- // IMG - O EMPTY
- new Element(IMG, "IMG", Element.EMPTY, BODY, null),
- // INPUT - O EMPTY
- new Element(INPUT, "INPUT", Element.EMPTY, BODY, null),
- // INS - - (%flow;)*
- new Element(INS, "INS", 0, BODY, null),
- // ISINDEX
- new Element(ISINDEX, "ISINDEX", 0, HEAD, null),
- };
- ELEMENTS_ARRAY['K'-'A'] = new Element[] {
- // KBD - - (%inline;)*
- new Element(KBD, "KBD", Element.INLINE, BODY, null),
- // KEYGEN
- new Element(KEYGEN, "KEYGEN", 0, BODY, null),
- };
- ELEMENTS_ARRAY['L'-'A'] = new Element[] {
- // LABEL - - (%inline;)* -(LABEL)
- new Element(LABEL, "LABEL", 0, BODY, null),
- // LAYER
- new Element(LAYER, "LAYER", Element.BLOCK, BODY, null),
- // LEGEND - - (%inline;)*
- new Element(LEGEND, "LEGEND", Element.INLINE, FIELDSET, null),
- // LI - O (%flow;)*
- new Element(LI, "LI", 0, new short[]{BODY,UL,OL}, new short[]{LI}),
- // LINK - O EMPTY
- new Element(LINK, "LINK", Element.EMPTY, HEAD, null),
- // LISTING
- new Element(LISTING, "LISTING", 0, BODY, null),
- };
- ELEMENTS_ARRAY['M'-'A'] = new Element[] {
- // MAP - - ((%block;) | AREA)+
- new Element(MAP, "MAP", Element.INLINE, BODY, null),
- // MARQUEE
- new Element(MARQUEE, "MARQUEE", 0, BODY, null),
- // MENU
- new Element(MENU, "MENU", 0, BODY, null),
- // META - O EMPTY
- new Element(META, "META", Element.EMPTY, HEAD, new short[]{STYLE,TITLE}),
- // MULTICOL
- new Element(MULTICOL, "MULTICOL", 0, BODY, null),
- };
- ELEMENTS_ARRAY['N'-'A'] = new Element[] {
- // NEXTID
- new Element(NEXTID, "NEXTID", Element.EMPTY, BODY, null),
- // NOBR
- new Element(NOBR, "NOBR", Element.INLINE, BODY, null),
- // NOEMBED
- new Element(NOEMBED, "NOEMBED", 0, BODY, null),
- // NOFRAMES - - (BODY) -(NOFRAMES)
- new Element(NOFRAMES, "NOFRAMES", 0, FRAMESET, null),
- // NOLAYER
- new Element(NOLAYER, "NOLAYER", 0, BODY, null),
- // NOSCRIPT - - (%block;)+
- new Element(NOSCRIPT, "NOSCRIPT", 0, new short[]{BODY}, null),
- };
- ELEMENTS_ARRAY['O'-'A'] = new Element[] {
- // OBJECT - - (PARAM | %flow;)*
- new Element(OBJECT, "OBJECT", 0, BODY, null),
- // OL - - (LI)+
- new Element(OL, "OL", Element.BLOCK, BODY, null),
- // OPTGROUP - - (OPTION)+
- new Element(OPTGROUP, "OPTGROUP", 0, SELECT, new short[]{OPTION}),
- // OPTION - O (#PCDATA)
- new Element(OPTION, "OPTION", 0, SELECT, new short[]{OPTION}),
- };
- ELEMENTS_ARRAY['P'-'A'] = new Element[] {
- // P - O (%inline;)*
- new Element(P, "P", Element.CONTAINER, BODY, new short[]{P}),
- // PARAM - O EMPTY
- new Element(PARAM, "PARAM", Element.EMPTY, new short[]{OBJECT,APPLET}, null),
- // PLAINTEXT
- new Element(PLAINTEXT, "PLAINTEXT", Element.SPECIAL, BODY, null),
- // PRE - - (%inline;)* -(%pre.exclusion;)
- new Element(PRE, "PRE", 0, BODY, null),
- };
- ELEMENTS_ARRAY['Q'-'A'] = new Element[] {
- // Q - - (%inline;)*
- new Element(Q, "Q", Element.INLINE, BODY, null),
- };
- ELEMENTS_ARRAY['R'-'A'] = new Element[] {
- // RB
- new Element(RB, "RB", Element.INLINE, RUBY, new short[]{RB}),
- // RBC
- new Element(RBC, "RBC", 0, RUBY, null),
- // RP
- new Element(RP, "RP", Element.INLINE, RUBY, new short[]{RB}),
- // RT
- new Element(RT, "RT", Element.INLINE, RUBY, new short[]{RB,RP}),
- // RTC
- new Element(RTC, "RTC", 0, RUBY, new short[]{RBC}),
- // RUBY
- new Element(RUBY, "RUBY", 0, BODY, new short[]{RUBY}),
- };
- ELEMENTS_ARRAY['S'-'A'] = new Element[] {
- // S
- new Element(S, "S", 0, BODY, null),
- // SAMP - - (%inline;)*
- new Element(SAMP, "SAMP", Element.INLINE, BODY, null),
- // SCRIPT - - %Script;
- new Element(SCRIPT, "SCRIPT", Element.SPECIAL, new short[]{HEAD,BODY}, null),
- // SELECT - - (OPTGROUP|OPTION)+
- new Element(SELECT, "SELECT", Element.CONTAINER, BODY, new short[]{SELECT}),
- // SMALL - - (%inline;)*
- new Element(SMALL, "SMALL", Element.INLINE, BODY, null),
- // SOUND
- new Element(SOUND, "SOUND", Element.EMPTY, HEAD, null),
- // SPACER
- new Element(SPACER, "SPACER", Element.EMPTY, BODY, null),
- // SPAN - - (%inline;)*
- new Element(SPAN, "SPAN", Element.CONTAINER, BODY, null),
- // STRIKE
- new Element(STRIKE, "STRIKE", Element.INLINE, BODY, null),
- // STRONG - - (%inline;)*
- new Element(STRONG, "STRONG", Element.INLINE, BODY, null),
- // STYLE - - %StyleSheet;
- new Element(STYLE, "STYLE", Element.SPECIAL, new short[]{HEAD,BODY}, new short[]{STYLE,TITLE,META}),
- // SUB - - (%inline;)*
- new Element(SUB, "SUB", Element.INLINE, BODY, null),
- // SUP - - (%inline;)*
- new Element(SUP, "SUP", Element.INLINE, BODY, null),
- };
- ELEMENTS_ARRAY['T'-'A'] = new Element[] {
- // TABLE - - (CAPTION?, (COL*|COLGROUP*), THEAD?, TFOOT?, TBODY+)
- new Element(TABLE, "TABLE", Element.BLOCK|Element.CONTAINER, BODY, null),
- // TBODY O O (TR)+
- new Element(TBODY, "TBODY", 0, TABLE, new short[]{THEAD,TD,TH,TR,COLGROUP}),
- // TD - O (%flow;)*
- new Element(TD, "TD", Element.CONTAINER, TR, TABLE, new short[]{TD,TH}),
- // TEXTAREA - - (#PCDATA)
- new Element(TEXTAREA, "TEXTAREA", Element.SPECIAL, BODY, null),
- // TFOOT - O (TR)+
- new Element(TFOOT, "TFOOT", 0, TABLE, new short[]{THEAD,TBODY,TD,TH,TR}),
- // TH - O (%flow;)*
- new Element(TH, "TH", Element.CONTAINER, TR, TABLE, new short[]{TD,TH}),
- // THEAD - O (TR)+
- new Element(THEAD, "THEAD", 0, TABLE, new short[]{COLGROUP}),
- // TITLE - - (#PCDATA) -(%head.misc;)
- new Element(TITLE, "TITLE", Element.SPECIAL, new short[]{HEAD,BODY}, null),
- // TR - O (TH|TD)+
- new Element(TR, "TR", Element.BLOCK, new short[]{TBODY, THEAD, TFOOT}, TABLE, new short[]{TD,TH,TR,COLGROUP}),
- // TT - - (%inline;)*
- new Element(TT, "TT", Element.INLINE, BODY, null),
- };
- ELEMENTS_ARRAY['U'-'A'] = new Element[] {
- // U,
- new Element(U, "U", Element.INLINE, BODY, null),
- // UL - - (LI)+
- new Element(UL, "UL", Element.BLOCK, BODY, null),
- };
- ELEMENTS_ARRAY['V'-'A'] = new Element[] {
- // VAR - - (%inline;)*
- new Element(VAR, "VAR", Element.INLINE, BODY, null),
- };
- ELEMENTS_ARRAY['W'-'A'] = new Element[] {
- // WBR
- new Element(WBR, "WBR", Element.EMPTY, BODY, null),
- };
- ELEMENTS_ARRAY['X'-'A'] = new Element[] {
- // XML
- new Element(XML, "XML", 0, BODY, null),
- // XMP
- new Element(XMP, "XMP", Element.SPECIAL, BODY, null),
- };
-
- // keep contiguous list of elements for lookups by code
- for (int i = 0; i < ELEMENTS_ARRAY.length; i++) {
- Element[] elements = ELEMENTS_ARRAY[i];
- if (elements != null) {
- for (int j = 0; j < elements.length; j++) {
- Element element = elements[j];
- ELEMENTS.addElement(element);
- }
- }
- }
- ELEMENTS.addElement(NO_SUCH_ELEMENT);
-
- // initialize cross references to parent elements
- for (int i = 0; i < ELEMENTS.size; i++) {
- Element element = ELEMENTS.data[i];
- if (element.parentCodes != null) {
- element.parent = new Element[element.parentCodes.length];
- for (int j = 0; j < element.parentCodes.length; j++) {
- element.parent[j] = ELEMENTS.data[element.parentCodes[j]];
- }
- element.parentCodes = null;
- }
- }
-
- } // <clinit>()
-
- //
- // Public static methods
- //
-
- /**
- * Returns the element information for the specified element code.
- *
- * @param code The element code.
- */
- public static final Element getElement(short code) {
- return ELEMENTS.data[code];
- } // getElement(short):Element
-
- /**
- * Returns the element information for the specified element name.
- *
- * @param ename The element name.
- */
- public static final Element getElement(String ename) {
- return getElement(ename, NO_SUCH_ELEMENT);
- } // getElement(String):Element
-
- /**
- * Returns the element information for the specified element name.
- *
- * @param ename The element name.
- * @param element The default element to return if not found.
- */
- public static final Element getElement(String ename, Element element) {
-
- if (ename.length() > 0) {
- int c = ename.charAt(0);
- if (c >= 'a' && c <= 'z') {
- c = 'A' + c - 'a';
- }
- if (c >= 'A' && c <= 'Z') {
- Element[] elements = ELEMENTS_ARRAY[c - 'A'];
- if (elements != null) {
- for (int i = 0; i < elements.length; i++) {
- Element elem = elements[i];
- if (elem.name.equalsIgnoreCase(ename)) {
- return elem;
- }
- }
- }
- }
- }
- return element;
-
- } // getElement(String):Element
-
- //
- // Classes
- //
-
- /**
- * Element information.
- *
- * @author Andy Clark
- */
- public static class Element {
-
- //
- // Constants
- //
-
- /** Inline element. */
- public static final int INLINE = 0x01;
-
- /** Block element. */
- public static final int BLOCK = 0x02;
-
- /** Empty element. */
- public static final int EMPTY = 0x04;
-
- /** Container element. */
- public static final int CONTAINER = 0x08;
-
- /** Special element. */
- public static final int SPECIAL = 0x10;
-
- //
- // Data
- //
-
- /** The element code. */
- public short code;
-
- /** The element name. */
- public String name;
-
- /** Informational flags. */
- public int flags;
-
- /** Parent elements. */
- public short[] parentCodes;
-
- /** Parent elements. */
- public Element[] parent;
-
- /** The bounding element code. */
- public short bounds;
-
- /** List of elements this element can close. */
- public short[] closes;
-
- /** If set to true, then this element may not be nested, example: "A" **/
- boolean nestable = true;
-
- //
- // Constructors
- //
-
- /**
- * Constructs an element object.
- *
- * @param code The element code.
- * @param name The element name.
- * @param flags Informational flags
- * @param parent Natural closing parent name.
- * @param closes List of elements this element can close.
- */
- public Element(short code, String name, int flags,
- short parent, short[] closes) {
- this(code, name, flags, new short[]{parent}, (short)-1, closes);
- } // <init>(short,String,int,short,short[]);
-
- /**
- * Constructs an element object.
- *
- * @param code The element code.
- * @param name The element name.
- * @param flags Informational flags
- * @param parent Natural closing parent name.
- * @param closes List of elements this element can close.
- */
- public Element(short code, String name, int flags,
- short parent, short bounds, short[] closes) {
- this(code, name, flags, new short[]{parent}, bounds, closes);
- } // <init>(short,String,int,short,short,short[])
-
- /**
- * Constructs an element object.
- *
- * @param code The element code.
- * @param name The element name.
- * @param flags Informational flags
- * @param parents Natural closing parent names.
- * @param closes List of elements this element can close.
- */
- public Element(short code, String name, int flags,
- short[] parents, short[] closes) {
- this(code, name, flags, parents, (short)-1, closes);
- } // <init>(short,String,int,short[],short[])
-
- /**
- * Constructs an element object.
- *
- * @param code The element code.
- * @param name The element name.
- * @param flags Informational flags
- * @param parents Natural closing parent names.
- * @param closes List of elements this element can close.
- */
- public Element(short code, String name, int flags,
- short[] parents, short bounds, short[] closes) {
- this.code = code;
- this.name = name;
- this.flags = flags;
- this.parentCodes = parents;
- this.parent = null;
- this.bounds = bounds;
- this.closes = closes;
- if(closes != null) {
- for(int i=0;i<closes.length;i++) {
- if(closes[i] == code) {
- this.nestable = false;
- break;
- }
- }
- }
- } // <init>(short,String,int,short[],short,short[])
-
- //
- // Public methods
- //
-
- /** Returns true if this element is an inline element. */
- public final boolean isInline() {
- return (flags & INLINE) != 0;
- } // isInline():boolean
-
- /** Returns true if this element is a block element. */
- public final boolean isBlock() {
- return (flags & BLOCK) != 0;
- } // isBlock():boolean
-
- /** Returns true if this element is an empty element. */
- public final boolean isEmpty() {
- return (flags & EMPTY) != 0;
- } // isEmpty():boolean
-
- /** Returns true if this element is a container element. */
- public final boolean isContainer() {
- return (flags & CONTAINER) != 0;
- } // isContainer():boolean
-
- /**
- * Returns true if this element is special -- if its content
- * should be parsed ignoring markup.
- */
- public final boolean isSpecial() {
- return (flags & SPECIAL) != 0;
- } // isSpecial():boolean
-
- /**
- * Returns true if this element can close the specified Element.
- *
- * @param tag The element.
- */
- public boolean closes(short tag) {
-
- if (closes != null) {
- for (int i = 0; i < closes.length; i++) {
- if (closes[i] == tag) {
- return true;
- }
- }
- }
- return false;
-
- } // closes(short):boolean
-
- //
- // Object methods
- //
-
- /** Returns a hash code for this object. */
- public int hashCode() {
- return name.hashCode();
- } // hashCode():int
-
- /** Returns true if the objects are equal. */
- public boolean equals(Object o) {
- return name.equals(o);
- } // equals(Object):boolean
-
- /**
- * Provides a simple representation to make debugging easier
- */
- public String toString() {
- return super.toString() + "(name=" + name + ")";
- }
-
- /**
- * Indicates if the provided element is an accepted parent of current element
- * @param element the element to test for "paternity"
- * @return <code>true</code> if <code>element</code> belongs to the {@link #parent}
- */
- public boolean isParent(final Element element) {
- if (parent == null)
- return false;
- else {
- for (int i=0; i<parent.length; ++i) {
- if (element.code == parent[i].code)
- return true;
- }
- }
- return false;
- }
- } // class Element
-
- /** Unsynchronized list of elements. */
- public static class ElementList {
-
- //
- // Data
- //
-
- /** The size of the list. */
- public int size;
-
- /** The data in the list. */
- public Element[] data = new Element[120];
-
- //
- // Public methods
- //
-
- /** Adds an element to list, resizing if necessary. */
- public void addElement(Element element) {
- if (size == data.length) {
- Element[] newarray = new Element[size + 20];
- System.arraycopy(data, 0, newarray, 0, size);
- data = newarray;
- }
- data[size++] = element;
- } // addElement(Element)
-
- } // class Element
-
-} // class HTMLElements
diff -Nru boilerpipe-1.2.0/src/main/org/cyberneko/html/HTMLTagBalancer.java boilerpipe-1.2.0-gil/src/main/org/cyberneko/html/HTMLTagBalancer.java
--- boilerpipe-1.2.0/src/main/org/cyberneko/html/HTMLTagBalancer.java 2010-12-16 11:30:06.000000000 +0100
+++ boilerpipe-1.2.0-gil/src/main/org/cyberneko/html/HTMLTagBalancer.java 1970-01-01 01:00:00.000000000 +0100
@@ -1,1409 +0,0 @@
-/*
- * Copyright 2002-2009 Andy Clark, Marc Guillemot
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.cyberneko.html;
-
-import java.util.ArrayList;
-import java.util.List;
-import org.apache.xerces.util.XMLAttributesImpl;
-import org.apache.xerces.xni.Augmentations;
-import org.apache.xerces.xni.NamespaceContext;
-import org.apache.xerces.xni.QName;
-import org.apache.xerces.xni.XMLAttributes;
-import org.apache.xerces.xni.XMLDocumentHandler;
-import org.apache.xerces.xni.XMLLocator;
-import org.apache.xerces.xni.XMLResourceIdentifier;
-import org.apache.xerces.xni.XMLString;
-import org.apache.xerces.xni.XNIException;
-import org.apache.xerces.xni.parser.XMLComponentManager;
-import org.apache.xerces.xni.parser.XMLConfigurationException;
-import org.apache.xerces.xni.parser.XMLDocumentFilter;
-import org.apache.xerces.xni.parser.XMLDocumentSource;
-import org.cyberneko.html.HTMLElements.Element;
-import org.cyberneko.html.filters.NamespaceBinder;
-import org.cyberneko.html.xercesbridge.XercesBridge;
-
-/**
- * Balances tags in an HTML document. This component receives document events
- * and tries to correct many common mistakes that human (and computer) HTML
- * document authors make. This tag balancer can:
- * <ul>
- * <li>add missing parent elements;
- * <li>automatically close elements with optional end tags; and
- * <li>handle mis-matched inline element tags.
- * </ul>
- * <p>
- * This component recognizes the following features:
- * <ul>
- * <li>http://cyberneko.org/html/features/augmentations
- * <li>http://cyberneko.org/html/features/report-errors
- * <li>http://cyberneko.org/html/features/balance-tags/document-fragment
- * <li>http://cyberneko.org/html/features/balance-tags/ignore-outside-content
- * </ul>
- * <p>
- * This component recognizes the following properties:
- * <ul>
- * <li>http://cyberneko.org/html/properties/names/elems
- * <li>http://cyberneko.org/html/properties/names/attrs
- * <li>http://cyberneko.org/html/properties/error-reporter
- * <li>http://cyberneko.org/html/properties/balance-tags/current-stack
- * </ul>
- *
- * @see HTMLElements
- *
- * @author Andy Clark
- * @author Marc Guillemot
- *
- * @version $Id: HTMLTagBalancer.java,v 1.20 2005/02/14 04:06:22 andyc Exp $
- */
-public class HTMLTagBalancer
- implements XMLDocumentFilter, HTMLComponent {
-
- //
- // Constants
- //
-
- // features
-
- /** Namespaces. */
- protected static final String NAMESPACES = "http://xml.org/sax/features/namespaces";
-
- /** Include infoset augmentations. */
- protected static final String AUGMENTATIONS = "http://cyberneko.org/html/features/augmentations";
-
- /** Report errors. */
- protected static final String REPORT_ERRORS = "http://cyberneko.org/html/features/report-errors";
-
- /** Document fragment balancing only (deprecated). */
- protected static final String DOCUMENT_FRAGMENT_DEPRECATED = "http://cyberneko.org/html/features/document-fragment";
-
- /** Document fragment balancing only. */
- protected static final String DOCUMENT_FRAGMENT = "http://cyberneko.org/html/features/balance-tags/document-fragment";
-
- /** Ignore outside content. */
- protected static final String IGNORE_OUTSIDE_CONTENT = "http://cyberneko.org/html/features/balance-tags/ignore-outside-content";
-
- /** Recognized features. */
- private static final String[] RECOGNIZED_FEATURES = {
- NAMESPACES,
- AUGMENTATIONS,
- REPORT_ERRORS,
- DOCUMENT_FRAGMENT_DEPRECATED,
- DOCUMENT_FRAGMENT,
- IGNORE_OUTSIDE_CONTENT,
- };
-
- /** Recognized features defaults. */
- private static final Boolean[] RECOGNIZED_FEATURES_DEFAULTS = {
- null,
- null,
- null,
- null,
- Boolean.FALSE,
- Boolean.FALSE,
- };
-
- // properties
-
- /** Modify HTML element names: { "upper", "lower", "default" }. */
- protected static final String NAMES_ELEMS = "http://cyberneko.org/html/properties/names/elems";
-
- /** Modify HTML attribute names: { "upper", "lower", "default" }. */
- protected static final String NAMES_ATTRS = "http://cyberneko.org/html/properties/names/attrs";
-
- /** Error reporter. */
- protected static final String ERROR_REPORTER = "http://cyberneko.org/html/properties/error-reporter";
-
- /**
- * <font color="red">EXPERIMENTAL: may change in next release</font><br/>
- * Name of the property holding the stack of elements in which context a document fragment should be parsed.
- **/
- public static final String FRAGMENT_CONTEXT_STACK = "http://cyberneko.org/html/properties/balance-tags/fragment-context-stack";
-
- /** Recognized properties. */
- private static final String[] RECOGNIZED_PROPERTIES = {
- NAMES_ELEMS,
- NAMES_ATTRS,
- ERROR_REPORTER,
- FRAGMENT_CONTEXT_STACK,
- };
-
- /** Recognized properties defaults. */
- private static final Object[] RECOGNIZED_PROPERTIES_DEFAULTS = {
- null,
- null,
- null,
- null,
- };
-
- // modify HTML names
-
- /** Don't modify HTML names. */
- protected static final short NAMES_NO_CHANGE = 0;
-
- /** Match HTML element names. */
- protected static final short NAMES_MATCH = 0;
-
- /** Uppercase HTML names. */
- protected static final short NAMES_UPPERCASE = 1;
-
- /** Lowercase HTML names. */
- protected static final short NAMES_LOWERCASE = 2;
-
- // static vars
-
- /** Synthesized event info item. */
- protected static final HTMLEventInfo SYNTHESIZED_ITEM =
- new HTMLEventInfo.SynthesizedItem();
-
- //
- // Data
- //
-
- // features
-
- /** Namespaces. */
- protected boolean fNamespaces;
-
- /** Include infoset augmentations. */
- protected boolean fAugmentations;
-
- /** Report errors. */
- protected boolean fReportErrors;
-
- /** Document fragment balancing only. */
- protected boolean fDocumentFragment;
-
- /** Ignore outside content. */
- protected boolean fIgnoreOutsideContent;
-
- // properties
-
- /** Modify HTML element names. */
- protected short fNamesElems;
-
- /** Modify HTML attribute names. */
- protected short fNamesAttrs;
-
- /** Error reporter. */
- protected HTMLErrorReporter fErrorReporter;
-
- // connections
-
- /** The document source. */
- protected XMLDocumentSource fDocumentSource;
-
- /** The document handler. */
- protected XMLDocumentHandler fDocumentHandler;
-
- // state
-
- /** The element stack. */
- protected final InfoStack fElementStack = new InfoStack();
-
- /** The inline stack. */
- protected final InfoStack fInlineStack = new InfoStack();
-
- /** True if seen anything. Important for xml declaration. */
- protected boolean fSeenAnything;
-
- /** True if root element has been seen. */
- protected boolean fSeenDoctype;
-
- /** True if root element has been seen. */
- protected boolean fSeenRootElement;
-
- /**
- * True if seen the end of the document element. In other words,
- * this variable is set to false <em>until</em> the end &lt;/HTML&gt;
- * tag is seen (or synthesized). This is used to ensure that
- * extraneous events after the end of the document element do not
- * make the document stream ill-formed.
- */
- protected boolean fSeenRootElementEnd;
-
- /** True if seen &lt;head&lt; element. */
- protected boolean fSeenHeadElement;
-
- /** True if seen &lt;body&lt; element. */
- protected boolean fSeenBodyElement;
-
- /** True if a form is in the stack (allow to discard opening of nested forms) */
- protected boolean fOpenedForm;
-
- // temp vars
-
- /** A qualified name. */
- private final QName fQName = new QName();
-
- /** Empty attributes. */
- private final XMLAttributes fEmptyAttrs = new XMLAttributesImpl();
-
- /** Augmentations. */
- private final HTMLAugmentations fInfosetAugs = new HTMLAugmentations();
-
- protected HTMLTagBalancingListener tagBalancingListener;
- private LostText lostText_ = new LostText();
-
- private boolean forcedStartElement_ = false;
- private boolean forcedEndElement_ = false;
-
- /**
- * Stack of elements determining the context in which a document fragment should be parsed
- */
- private QName[] fragmentContextStack_ = null;
- private int fragmentContextStackSize_ = 0; // not 0 only when a fragment is parsed and fragmentContextStack_ is set
-
- private List/*ElementEntry*/ endElementsBuffer_ = new ArrayList();
-
- //
- // HTMLComponent methods
- //
-
- /** Returns the default state for a feature. */
- public Boolean getFeatureDefault(String featureId) {
- int length = RECOGNIZED_FEATURES != null ? RECOGNIZED_FEATURES.length : 0;
- for (int i = 0; i < length; i++) {
- if (RECOGNIZED_FEATURES[i].equals(featureId)) {
- return RECOGNIZED_FEATURES_DEFAULTS[i];
- }
- }
- return null;
- } // getFeatureDefault(String):Boolean
-
- /** Returns the default state for a property. */
- public Object getPropertyDefault(String propertyId) {
- int length = RECOGNIZED_PROPERTIES != null ? RECOGNIZED_PROPERTIES.length : 0;
- for (int i = 0; i < length; i++) {
- if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) {
- return RECOGNIZED_PROPERTIES_DEFAULTS[i];
- }
- }
- return null;
- } // getPropertyDefault(String):Object
-
- //
- // XMLComponent methods
- //
-
- /** Returns recognized features. */
- public String[] getRecognizedFeatures() {
- return RECOGNIZED_FEATURES;
- } // getRecognizedFeatures():String[]
-
- /** Returns recognized properties. */
- public String[] getRecognizedProperties() {
- return RECOGNIZED_PROPERTIES;
- } // getRecognizedProperties():String[]
-
- /** Resets the component. */
- public void reset(XMLComponentManager manager)
- throws XMLConfigurationException {
-
- // get features
- fNamespaces = manager.getFeature(NAMESPACES);
- fAugmentations = manager.getFeature(AUGMENTATIONS);
- fReportErrors = manager.getFeature(REPORT_ERRORS);
- fDocumentFragment = manager.getFeature(DOCUMENT_FRAGMENT) ||
- manager.getFeature(DOCUMENT_FRAGMENT_DEPRECATED);
- fIgnoreOutsideContent = manager.getFeature(IGNORE_OUTSIDE_CONTENT);
-
- // get properties
- fNamesElems = getNamesValue(String.valueOf(manager.getProperty(NAMES_ELEMS)));
- fNamesAttrs = getNamesValue(String.valueOf(manager.getProperty(NAMES_ATTRS)));
- fErrorReporter = (HTMLErrorReporter)manager.getProperty(ERROR_REPORTER);
-
- fragmentContextStack_ = (QName[]) manager.getProperty(FRAGMENT_CONTEXT_STACK);
-
- } // reset(XMLComponentManager)
-
- /** Sets a feature. */
- public void setFeature(String featureId, boolean state)
- throws XMLConfigurationException {
-
- if (featureId.equals(AUGMENTATIONS)) {
- fAugmentations = state;
- return;
- }
- if (featureId.equals(REPORT_ERRORS)) {
- fReportErrors = state;
- return;
- }
- if (featureId.equals(IGNORE_OUTSIDE_CONTENT)) {
- fIgnoreOutsideContent = state;
- return;
- }
-
- } // setFeature(String,boolean)
-
- /** Sets a property. */
- public void setProperty(String propertyId, Object value)
- throws XMLConfigurationException {
-
- if (propertyId.equals(NAMES_ELEMS)) {
- fNamesElems = getNamesValue(String.valueOf(value));
- return;
- }
-
- if (propertyId.equals(NAMES_ATTRS)) {
- fNamesAttrs = getNamesValue(String.valueOf(value));
- return;
- }
-
- } // setProperty(String,Object)
-
- //
- // XMLDocumentSource methods
- //
-
- /** Sets the document handler. */
- public void setDocumentHandler(XMLDocumentHandler handler) {
- fDocumentHandler = handler;
- } // setDocumentHandler(XMLDocumentHandler)
-
- // @since Xerces 2.1.0
-
- /** Returns the document handler. */
- public XMLDocumentHandler getDocumentHandler() {
- return fDocumentHandler;
- } // getDocumentHandler():XMLDocumentHandler
-
- //
- // XMLDocumentHandler methods
- //
-
- // since Xerces-J 2.2.0
-
- /** Start document. */
- public void startDocument(XMLLocator locator, String encoding,
- NamespaceContext nscontext, Augmentations augs)
- throws XNIException {
-
- // reset state
- fElementStack.top = 0;
- if (fragmentContextStack_ != null) {
- fragmentContextStackSize_ = fragmentContextStack_.length;
- for (int i=0; i<fragmentContextStack_.length; ++i) {
- final QName name = fragmentContextStack_[i];
- final Element elt = HTMLElements.getElement(name.localpart);
- fElementStack.push(new Info(elt, name));
- }
-
- }
- else {
- fragmentContextStackSize_ = 0;
- }
- fSeenAnything = false;
- fSeenDoctype = false;
- fSeenRootElement = false;
- fSeenRootElementEnd = false;
- fSeenHeadElement = false;
- fSeenBodyElement = false;
-
-
- // pass on event
- if (fDocumentHandler != null) {
- XercesBridge.getInstance().XMLDocumentHandler_startDocument(fDocumentHandler, locator, encoding, nscontext, augs);
- }
-
- } // startDocument(XMLLocator,String,Augmentations)
-
- // old methods
-
- /** XML declaration. */
- public void xmlDecl(String version, String encoding, String standalone,
- Augmentations augs) throws XNIException {
- if (!fSeenAnything && fDocumentHandler != null) {
- fDocumentHandler.xmlDecl(version, encoding, standalone, augs);
- }
- } // xmlDecl(String,String,String,Augmentations)
-
- /** Doctype declaration. */
- public void doctypeDecl(String rootElementName, String publicId, String systemId,
- Augmentations augs) throws XNIException {
- fSeenAnything = true;
- if (fReportErrors) {
- if (fSeenRootElement) {
- fErrorReporter.reportError("HTML2010", null);
- }
- else if (fSeenDoctype) {
- fErrorReporter.reportError("HTML2011", null);
- }
- }
- if (!fSeenRootElement && !fSeenDoctype) {
- fSeenDoctype = true;
- if (fDocumentHandler != null) {
- fDocumentHandler.doctypeDecl(rootElementName, publicId, systemId, augs);
- }
- }
- } // doctypeDecl(String,String,String,Augmentations)
-
- /** End document. */
- public void endDocument(Augmentations augs) throws XNIException {
-
- // </body> and </html> have been buffered to consider outside content
- fIgnoreOutsideContent = true; // endElement should not ignore the elements passed from buffer
- consumeBufferedEndElements();
-
- // handle empty document
- if (!fSeenRootElement && !fDocumentFragment) {
- if (fReportErrors) {
- fErrorReporter.reportError("HTML2000", null);
- }
- if (fDocumentHandler != null) {
- fSeenRootElementEnd = false;
- forceStartBody(); // will force <html> and <head></head>
- final String body = modifyName("body", fNamesElems);
- fQName.setValues(null, body, body, null);
- callEndElement(fQName, synthesizedAugs());
-
- final String ename = modifyName("html", fNamesElems);
- fQName.setValues(null, ename, ename, null);
- callEndElement(fQName, synthesizedAugs());
- }
- }
-
- // pop all remaining elements
- else {
- int length = fElementStack.top - fragmentContextStackSize_;
- for (int i = 0; i < length; i++) {
- Info info = fElementStack.pop();
- if (fReportErrors) {
- String ename = info.qname.rawname;
- fErrorReporter.reportWarning("HTML2001", new Object[]{ename});
- }
- if (fDocumentHandler != null) {
- callEndElement(info.qname, synthesizedAugs());
- }
- }
- }
-
- // call handler
- if (fDocumentHandler != null) {
- fDocumentHandler.endDocument(augs);
- }
-
- } // endDocument(Augmentations)
-
- /**
- * Consume elements that have been buffered, like </body></html> that are first consumed
- * at the end of document
- */
- private void consumeBufferedEndElements() {
- final List toConsume = new ArrayList(endElementsBuffer_);
- endElementsBuffer_.clear();
- for (int i=0; i<toConsume.size(); ++i) {
- final ElementEntry entry = (ElementEntry) toConsume.get(i);
- forcedEndElement_ = true;
- endElement(entry.name_, entry.augs_);
- }
- endElementsBuffer_.clear();
- }
-
- /** Comment. */
- public void comment(XMLString text, Augmentations augs) throws XNIException {
- fSeenAnything = true;
- consumeEarlyTextIfNeeded();
- if (fDocumentHandler != null) {
- fDocumentHandler.comment(text, augs);
- }
- } // comment(XMLString,Augmentations)
-
- private void consumeEarlyTextIfNeeded() {
- if (!lostText_.isEmpty()) {
- if (!fSeenBodyElement) {
- forceStartBody();
- }
- lostText_.refeed(this);
- }
- }
-
- /** Processing instruction. */
- public void processingInstruction(String target, XMLString data,
- Augmentations augs) throws XNIException {
- fSeenAnything = true;
- consumeEarlyTextIfNeeded();
- if (fDocumentHandler != null) {
- fDocumentHandler.processingInstruction(target, data, augs);
- }
- } // processingInstruction(String,XMLString,Augmentations)
-
- /** Start element. */
- public void startElement(final QName elem, XMLAttributes attrs, final Augmentations augs)
- throws XNIException {
- fSeenAnything = true;
-
- final boolean isForcedCreation = forcedStartElement_;
- forcedStartElement_ = false;
-
- // check for end of document
- if (fSeenRootElementEnd) {
- notifyDiscardedStartElement(elem, attrs, augs);
- return;
- }
-
- // get element information
- final HTMLElements.Element element = getElement(elem);
- final short elementCode = element.code;
-
- // the creation of some elements like TABLE or SELECT can't be forced. Any others?
- if (isForcedCreation && (elementCode == HTMLElements.TABLE || elementCode == HTMLElements.SELECT)) {
- return; // don't accept creation
- }
-
- // ignore multiple html, head, body elements
- if (fSeenRootElement && elementCode == HTMLElements.HTML) {
- notifyDiscardedStartElement(elem, attrs, augs);
- return;
- }
- if (elementCode == HTMLElements.HEAD) {
- if (fSeenHeadElement) {
- notifyDiscardedStartElement(elem, attrs, augs);
- return;
- }
- fSeenHeadElement = true;
- }
- else if (elementCode == HTMLElements.FRAMESET) {
- consumeBufferedEndElements(); // </head> (if any) has been buffered
- }
- else if (elementCode == HTMLElements.BODY) {
- // create <head></head> if none was present
- if (!fSeenHeadElement) {
- final QName head = createQName("head");
- forceStartElement(head, null, synthesizedAugs());
- endElement(head, synthesizedAugs());
- }
- consumeBufferedEndElements(); // </head> (if any) has been buffered
-
- if (fSeenBodyElement) {
- notifyDiscardedStartElement(elem, attrs, augs);
- return;
- }
- fSeenBodyElement = true;
- }
- else if (elementCode == HTMLElements.FORM) {
- if (fOpenedForm) {
- notifyDiscardedStartElement(elem, attrs, augs);
- return;
- }
- fOpenedForm = true;
- }
- else if (elementCode == HTMLElements.UNKNOWN) {
- consumeBufferedEndElements();
- }
-
- // check proper parent
- if (element.parent != null) {
- if (!fSeenRootElement && !fDocumentFragment) {
- String pname = element.parent[0].name;
- pname = modifyName(pname, fNamesElems);
- if (fReportErrors) {
- String ename = elem.rawname;
- fErrorReporter.reportWarning("HTML2002", new Object[]{ename,pname});
- }
- final QName qname = new QName(null, pname, pname, null);
- final boolean parentCreated = forceStartElement(qname, null, synthesizedAugs());
- if (!parentCreated) {
- if (!isForcedCreation) {
- notifyDiscardedStartElement(elem, attrs, augs);
- }
- return;
- }
- }
- else {
- HTMLElements.Element preferedParent = element.parent[0];
- if (preferedParent.code != HTMLElements.HEAD || (!fSeenBodyElement && !fDocumentFragment)) {
- int depth = getParentDepth(element.parent, element.bounds);
- if (depth == -1) { // no parent found
- final String pname = modifyName(preferedParent.name, fNamesElems);
- final QName qname = new QName(null, pname, pname, null);
- if (fReportErrors) {
- String ename = elem.rawname;
- fErrorReporter.reportWarning("HTML2004", new Object[]{ename,pname});
- }
- final boolean parentCreated = forceStartElement(qname, null, synthesizedAugs());
- if (!parentCreated) {
- if (!isForcedCreation) {
- notifyDiscardedStartElement(elem, attrs, augs);
- }
- return;
- }
- }
- }
- }
- }
-
- // if block element, save immediate parent inline elements
- int depth = 0;
- if (element.flags == 0) {
- int length = fElementStack.top;
- fInlineStack.top = 0;
- for (int i = length - 1; i >= 0; i--) {
- Info info = fElementStack.data[i];
- if (!info.element.isInline()) {
- break;
- }
- fInlineStack.push(info);
- endElement(info.qname, synthesizedAugs());
- }
- depth = fInlineStack.top;
- }
-
- // close previous elements
- // all elements close a <script>
- // in head, no element has children
- if ((fElementStack.top > 1
- && (fElementStack.peek().element.code == HTMLElements.SCRIPT))
- || fElementStack.top > 2 && fElementStack.data[fElementStack.top-2].element.code == HTMLElements.HEAD) {
- final Info info = fElementStack.pop();
- if (fDocumentHandler != null) {
- callEndElement(info.qname, synthesizedAugs());
- }
- }
- if (element.closes != null) {
- int length = fElementStack.top;
- for (int i = length - 1; i >= 0; i--) {
- Info info = fElementStack.data[i];
-
- // does it close the element we're looking at?
- if (element.closes(info.element.code)) {
- if (fReportErrors) {
- String ename = elem.rawname;
- String iname = info.qname.rawname;
- fErrorReporter.reportWarning("HTML2005", new Object[]{ename,iname});
- }
- for (int j = length - 1; j >= i; j--) {
- info = fElementStack.pop();
- if (fDocumentHandler != null) {
- // PATCH: Marc-Andr� Morissette
- callEndElement(info.qname, synthesizedAugs());
- }
- }
- length = i;
- continue;
- }
-
- // should we stop searching?
- if(element.nestable) {
- if (info.element.isBlock() || element.isParent(info.element)) {
- break;
- }
- }
- }
- }
- // TODO: investigate if only table is special here
- // table closes all opened inline elements
- else if (elementCode == HTMLElements.TABLE) {
- for (int i=fElementStack.top-1; i >= 0; i--) {
- final Info info = fElementStack.data[i];
- if (!info.element.isInline()) {
- break;
- }
- endElement(info.qname, synthesizedAugs());
- }
- }
-
- // call handler
- fSeenRootElement = true;
- if (element != null && element.isEmpty()) {
- if (attrs == null) {
- attrs = emptyAttributes();
- }
- if (fDocumentHandler != null) {
- fDocumentHandler.emptyElement(elem, attrs, augs);
- }
- }
- else {
- boolean inline = element != null && element.isInline();
- fElementStack.push(new Info(element, elem, inline ? attrs : null));
- if (attrs == null) {
- attrs = emptyAttributes();
- }
- if (fDocumentHandler != null) {
- callStartElement(elem, attrs, augs);
- }
- }
-
- // re-open inline elements
- for (int i = 0; i < depth; i++) {
- Info info = fInlineStack.pop();
- forceStartElement(info.qname, info.attributes, synthesizedAugs());
- }
-
- if (elementCode == HTMLElements.BODY) {
- lostText_.refeed(this);
- }
- } // startElement(QName,XMLAttributes,Augmentations)
-
- /**
- * Forces an element start, taking care to set the information to allow startElement to "see" that's
- * the element has been forced.
- * @return <code>true</code> if creation could be done (TABLE's creation for instance can't be forced)
- */
- private boolean forceStartElement(final QName elem, XMLAttributes attrs, final Augmentations augs)
- throws XNIException {
-
- forcedStartElement_ = true;
- startElement(elem, attrs, augs);
-
- return fElementStack.top > 0 && elem.equals(fElementStack.peek().qname);
- }
-
- private QName createQName(String tagName) {
- tagName = modifyName(tagName, fNamesElems);
- return new QName(null, tagName, tagName, NamespaceBinder.XHTML_1_0_URI);
- }
-
- /** Empty element. */
- public void emptyElement(final QName element, XMLAttributes attrs, Augmentations augs)
- throws XNIException {
- startElement(element, attrs, augs);
- // browser ignore the closing indication for non empty tags like <form .../> but not for unknown element
- final HTMLElements.Element elem = getElement(element);
- if (elem.isEmpty() || elem.code == HTMLElements.UNKNOWN) {
- endElement(element, augs);
- }
- } // emptyElement(QName,XMLAttributes,Augmentations)
-
- /** Start entity. */
- public void startGeneralEntity(String name,
- XMLResourceIdentifier id,
- String encoding,
- Augmentations augs) throws XNIException {
- fSeenAnything = true;
-
- // check for end of document
- if (fSeenRootElementEnd) {
- return;
- }
-
- // insert body, if needed
- if (!fDocumentFragment) {
- boolean insertBody = !fSeenRootElement;
- if (!insertBody) {
- Info info = fElementStack.peek();
- if (info.element.code == HTMLElements.HEAD ||
- info.element.code == HTMLElements.HTML) {
- String hname = modifyName("head", fNamesElems);
- String bname = modifyName("body", fNamesElems);
- if (fReportErrors) {
- fErrorReporter.reportWarning("HTML2009", new Object[]{hname,bname});
- }
- fQName.setValues(null, hname, hname, null);
- endElement(fQName, synthesizedAugs());
- insertBody = true;
- }
- }
- if (insertBody) {
- forceStartBody();
- }
- }
-
- // call handler
- if (fDocumentHandler != null) {
- fDocumentHandler.startGeneralEntity(name, id, encoding, augs);
- }
-
- } // startGeneralEntity(String,XMLResourceIdentifier,String,Augmentations)
-
- /**
- * Generates a missing <body> (which creates missing <head> when needed)
- */
- private void forceStartBody() {
- final QName body = createQName("body");
- if (fReportErrors) {
- fErrorReporter.reportWarning("HTML2006", new Object[]{body.localpart});
- }
- forceStartElement(body, null, synthesizedAugs());
- }
-
- /** Text declaration. */
- public void textDecl(String version, String encoding, Augmentations augs)
- throws XNIException {
- fSeenAnything = true;
-
- // check for end of document
- if (fSeenRootElementEnd) {
- return;
- }
-
- // call handler
- if (fDocumentHandler != null) {
- fDocumentHandler.textDecl(version, encoding, augs);
- }
-
- } // textDecl(String,String,Augmentations)
-
- /** End entity. */
- public void endGeneralEntity(String name, Augmentations augs) throws XNIException {
-
- // check for end of document
- if (fSeenRootElementEnd) {
- return;
- }
-
- // call handler
- if (fDocumentHandler != null) {
- fDocumentHandler.endGeneralEntity(name, augs);
- }
-
- } // endGeneralEntity(String,Augmentations)
-
- /** Start CDATA section. */
- public void startCDATA(Augmentations augs) throws XNIException {
- fSeenAnything = true;
-
- consumeEarlyTextIfNeeded();
-
- // check for end of document
- if (fSeenRootElementEnd) {
- return;
- }
-
- // call handler
- if (fDocumentHandler != null) {
- fDocumentHandler.startCDATA(augs);
- }
-
- } // startCDATA(Augmentations)
-
- /** End CDATA section. */
- public void endCDATA(Augmentations augs) throws XNIException {
-
- // check for end of document
- if (fSeenRootElementEnd) {
- return;
- }
-
- // call handler
- if (fDocumentHandler != null) {
- fDocumentHandler.endCDATA(augs);
- }
-
- } // endCDATA(Augmentations)
-
- /** Characters. */
- public void characters(final XMLString text, final Augmentations augs) throws XNIException {
- // check for end of document
- if (fSeenRootElementEnd) {
- return;
- }
-
- if (fElementStack.top == 0 && !fDocumentFragment) {
- // character before first opening tag
- lostText_.add(text, augs);
- return;
- }
-
- // is this text whitespace?
- boolean whitespace = true;
- for (int i = 0; i < text.length; i++) {
- if (!Character.isWhitespace(text.ch[text.offset + i])) {
- whitespace = false;
- break;
- }
- }
-
- if (!fDocumentFragment) {
- // handle bare characters
- if (!fSeenRootElement) {
- if (whitespace) {
- return;
- }
- forceStartBody();
- }
-
- if (whitespace && (fElementStack.top < 2 || endElementsBuffer_.size() == 1)) {
- // ignore spaces directly within <html>
- return;
- }
-
- // handle character content in head
- // NOTE: This frequently happens when the document looks like:
- // <title>Title</title>
- // And here's some text.
- else if (!whitespace) {
- Info info = fElementStack.peek();
- if (info.element.code == HTMLElements.HEAD ||
- info.element.code == HTMLElements.HTML) {
- String hname = modifyName("head", fNamesElems);
- String bname = modifyName("body", fNamesElems);
- if (fReportErrors) {
- fErrorReporter.reportWarning("HTML2009", new Object[]{hname,bname});
- }
- forceStartBody();
- }
- }
- }
-
- // call handler
- if (fDocumentHandler != null) {
- fDocumentHandler.characters(text, augs);
- }
-
- } // characters(XMLString,Augmentations)
-
- /** Ignorable whitespace. */
- public void ignorableWhitespace(XMLString text, Augmentations augs)
- throws XNIException {
- characters(text, augs);
- } // ignorableWhitespace(XMLString,Augmentations)
-
- /** End element. */
- public void endElement(final QName element, final Augmentations augs) throws XNIException {
- final boolean forcedEndElement = forcedEndElement_;
- // is there anything to do?
- if (fSeenRootElementEnd) {
- notifyDiscardedEndElement(element, augs);
- return;
- }
-
- // get element information
- HTMLElements.Element elem = getElement(element);
-
- // if we consider outside content, just buffer </body> and </html> to consider them at the very end
- if (!fIgnoreOutsideContent &&
- (elem.code == HTMLElements.BODY || elem.code == HTMLElements.HTML)) {
- endElementsBuffer_.add(new ElementEntry(element, augs));
- return;
- }
-
- // check for end of document
- if (elem.code == HTMLElements.HTML) {
- fSeenRootElementEnd = true;
- }
- else if (elem.code == HTMLElements.FORM) {
- fOpenedForm = false;
- }
- else if (elem.code == HTMLElements.HEAD && !forcedEndElement) {
- // consume </head> first when <body> is reached to retrieve content lost between </head> and <body>
- endElementsBuffer_.add(new ElementEntry(element, augs));
- return;
- }
-
-
- // empty element
- int depth = getElementDepth(elem);
- if (depth == -1) {
- if (elem.code == HTMLElements.P) {
- forceStartElement(element, emptyAttributes(), synthesizedAugs());
- endElement(element, augs);
- }
- else if (!elem.isEmpty()) {
- notifyDiscardedEndElement(element, augs);
- }
- return;
- }
-
- // find unbalanced inline elements
- if (depth > 1 && elem.isInline()) {
- final int size = fElementStack.top;
- fInlineStack.top = 0;
- for (int i = 0; i < depth - 1; i++) {
- final Info info = fElementStack.data[size - i - 1];
- final HTMLElements.Element pelem = info.element;
-
- if (pelem.isInline() || pelem.code == HTMLElements.FONT) { // TODO: investigate if only FONT
- // NOTE: I don't have to make a copy of the info because
- // it will just be popped off of the element stack
- // as soon as we close it, anyway.
- fInlineStack.push(info);
- }
- }
- }
-
- // close children up to appropriate element
- for (int i = 0; i < depth; i++) {
- Info info = fElementStack.pop();
-
- if (fReportErrors && i < depth - 1) {
- String ename = modifyName(element.rawname, fNamesElems);
- String iname = info.qname.rawname;
- fErrorReporter.reportWarning("HTML2007", new Object[]{ename,iname});
- }
- if (fDocumentHandler != null) {
- // PATCH: Marc-Andr\u00e8 Morissette
- callEndElement(info.qname, i < depth - 1 ? synthesizedAugs() : augs);
- }
- }
-
- // re-open inline elements
- if (depth > 1) {
- int size = fInlineStack.top;
- for (int i = 0; i < size; i++) {
- Info info = (Info)fInlineStack.pop();
- XMLAttributes attributes = info.attributes;
- if (fReportErrors) {
- String iname = info.qname.rawname;
- fErrorReporter.reportWarning("HTML2008", new Object[]{iname});
- }
- forceStartElement(info.qname, attributes, synthesizedAugs());
- }
- }
-
- } // endElement(QName,Augmentations)
-
- // @since Xerces 2.1.0
-
- /** Sets the document source. */
- public void setDocumentSource(XMLDocumentSource source) {
- fDocumentSource = source;
- } // setDocumentSource(XMLDocumentSource)
-
- /** Returns the document source. */
- public XMLDocumentSource getDocumentSource() {
- return fDocumentSource;
- } // getDocumentSource():XMLDocumentSource
-
- // removed since Xerces-J 2.3.0
-
- /** Start document. */
- public void startDocument(XMLLocator locator, String encoding, Augmentations augs)
- throws XNIException {
- startDocument(locator, encoding, null, augs);
- } // startDocument(XMLLocator,String,Augmentations)
-
- /** Start prefix mapping. */
- public void startPrefixMapping(String prefix, String uri, Augmentations augs)
- throws XNIException {
-
- // check for end of document
- if (fSeenRootElementEnd) {
- return;
- }
-
- // call handler
- if (fDocumentHandler != null) {
- XercesBridge.getInstance().XMLDocumentHandler_startPrefixMapping(fDocumentHandler, prefix, uri, augs);
- }
-
- } // startPrefixMapping(String,String,Augmentations)
-
- /** End prefix mapping. */
- public void endPrefixMapping(String prefix, Augmentations augs)
- throws XNIException {
-
- // check for end of document
- if (fSeenRootElementEnd) {
- return;
- }
-
- // call handler
- if (fDocumentHandler != null) {
- XercesBridge.getInstance().XMLDocumentHandler_endPrefixMapping(fDocumentHandler, prefix, augs);
- }
-
- } // endPrefixMapping(String,Augmentations)
-
- //
- // Protected methods
- //
-
- /** Returns an HTML element. */
- protected HTMLElements.Element getElement(final QName elementName) {
- String name = elementName.rawname;
- if (fNamespaces && NamespaceBinder.XHTML_1_0_URI.equals(elementName.uri)) {
- int index = name.indexOf(':');
- if (index != -1) {
- name = name.substring(index+1);
- }
- }
- return HTMLElements.getElement(name);
- } // getElement(String):HTMLElements.Element
-
- /** Call document handler start element. */
- protected final void callStartElement(QName element, XMLAttributes attrs,
- Augmentations augs)
- throws XNIException {
- fDocumentHandler.startElement(element, attrs, augs);
- } // callStartElement(QName,XMLAttributes,Augmentations)
-
- /** Call document handler end element. */
- protected final void callEndElement(QName element, Augmentations augs)
- throws XNIException {
- fDocumentHandler.endElement(element, augs);
- } // callEndElement(QName,Augmentations)
-
- /**
- * Returns the depth of the open tag associated with the specified
- * element name or -1 if no matching element is found.
- *
- * @param element The element.
- */
- protected final int getElementDepth(HTMLElements.Element element) {
- final boolean container = element.isContainer();
- int depth = -1;
- for (int i = fElementStack.top - 1; i >=fragmentContextStackSize_; i--) {
- Info info = fElementStack.data[i];
- if (info.element.code == element.code) {
- depth = fElementStack.top - i;
- break;
- }
- if (!container && (element.nestable && info.element.isBlock())) {
- break;
- }
- }
- return depth;
- } // getElementDepth(HTMLElements.Element)
-
- /**
- * Returns the depth of the open tag associated with the specified
- * element parent names or -1 if no matching element is found.
- *
- * @param parents The parent elements.
- */
- protected int getParentDepth(HTMLElements.Element[] parents, short bounds) {
- if (parents != null) {
- for (int i = fElementStack.top - 1; i >= 0; i--) {
- Info info = fElementStack.data[i];
- if (info.element.code == bounds) {
- break;
- }
- for (int j = 0; j < parents.length; j++) {
- if (info.element.code == parents[j].code) {
- return fElementStack.top - i;
- }
- }
- }
- }
- return -1;
- } // getParentDepth(HTMLElements.Element[],short):int
-
- /** Returns a set of empty attributes. */
- protected final XMLAttributes emptyAttributes() {
- fEmptyAttrs.removeAllAttributes();
- return fEmptyAttrs;
- } // emptyAttributes():XMLAttributes
-
- /** Returns an augmentations object with a synthesized item added. */
- protected final Augmentations synthesizedAugs() {
- HTMLAugmentations augs = null;
- if (fAugmentations) {
- augs = fInfosetAugs;
- augs.removeAllItems();
- augs.putItem(AUGMENTATIONS, SYNTHESIZED_ITEM);
- }
- return augs;
- } // synthesizedAugs():Augmentations
-
- //
- // Protected static methods
- //
-
- /** Modifies the given name based on the specified mode. */
- protected static final String modifyName(String name, short mode) {
- switch (mode) {
- case NAMES_UPPERCASE: return name.toUpperCase();
- case NAMES_LOWERCASE: return name.toLowerCase();
- }
- return name;
- } // modifyName(String,short):String
-
- /**
- * Converts HTML names string value to constant value.
- *
- * @see #NAMES_NO_CHANGE
- * @see #NAMES_LOWERCASE
- * @see #NAMES_UPPERCASE
- */
- protected static final short getNamesValue(String value) {
- if (value.equals("lower")) {
- return NAMES_LOWERCASE;
- }
- if (value.equals("upper")) {
- return NAMES_UPPERCASE;
- }
- return NAMES_NO_CHANGE;
- } // getNamesValue(String):short
-
- //
- // Classes
- //
-
- /**
- * Element info for each start element. This information is used when
- * closing unbalanced inline elements. For example:
- * <pre>
- * &lt;i>unbalanced &lt;b>HTML&lt;/i> content&lt;/b>
- * </pre>
- * <p>
- * It seems that it is a waste of processing and memory to copy the
- * attributes for every start element even if there are no unbalanced
- * inline elements in the document. However, if the attributes are
- * <em>not</em> saved, then important attributes such as style
- * information would be lost.
- *
- * @author Andy Clark
- */
- public static class Info {
-
- //
- // Data
- //
-
- /** The element. */
- public HTMLElements.Element element;
-
- /** The element qualified name. */
- public QName qname;
-
- /** The element attributes. */
- public XMLAttributes attributes;
-
- //
- // Constructors
- //
-
- /**
- * Creates an element information object.
- * <p>
- * <strong>Note:</strong>
- * This constructor makes a copy of the element information.
- *
- * @param element The element qualified name.
- */
- public Info(HTMLElements.Element element, QName qname) {
- this(element, qname, null);
- } // <init>(HTMLElements.Element,QName)
-
- /**
- * Creates an element information object.
- * <p>
- * <strong>Note:</strong>
- * This constructor makes a copy of the element information.
- *
- * @param element The element qualified name.
- * @param attributes The element attributes.
- */
- public Info(HTMLElements.Element element,
- QName qname, XMLAttributes attributes) {
- this.element = element;
- this.qname = new QName(qname);
- if (attributes != null) {
- int length = attributes.getLength();
- if (length > 0) {
- QName aqname = new QName();
- XMLAttributes newattrs = new XMLAttributesImpl();
- for (int i = 0; i < length; i++) {
- attributes.getName(i, aqname);
- String type = attributes.getType(i);
- String value = attributes.getValue(i);
- String nonNormalizedValue = attributes.getNonNormalizedValue(i);
- boolean specified = attributes.isSpecified(i);
- newattrs.addAttribute(aqname, type, value);
- newattrs.setNonNormalizedValue(i, nonNormalizedValue);
- newattrs.setSpecified(i, specified);
- }
- this.attributes = newattrs;
- }
- }
- } // <init>(HTMLElements.Element,QName,XMLAttributes)
-
- /**
- * Simple representation to make debugging easier
- */
- public String toString() {
- return super.toString() + qname;
- }
- } // class Info
-
- /** Unsynchronized stack of element information. */
- public static class InfoStack {
-
- //
- // Data
- //
-
- /** The top of the stack. */
- public int top;
-
- /** The stack data. */
- public Info[] data = new Info[10];
-
- //
- // Public methods
- //
-
- /** Pushes element information onto the stack. */
- public void push(Info info) {
- if (top == data.length) {
- Info[] newarray = new Info[top + 10];
- System.arraycopy(data, 0, newarray, 0, top);
- data = newarray;
- }
- data[top++] = info;
- } // push(Info)
-
- /** Peeks at the top of the stack. */
- public Info peek() {
- return data[top-1];
- } // peek():Info
-
- /** Pops the top item off of the stack. */
- public Info pop() {
- return data[--top];
- } // pop():Info
-
- /**
- * Simple representation to make debugging easier
- */
- public String toString() {
- final StringBuffer sb = new StringBuffer("InfoStack(");
- for (int i=top-1; i>=0; --i) {
- sb.append(data[i]);
- if (i != 0)
- sb.append(", ");
- }
- sb.append(")");
- return sb.toString();
- }
-
-
- } // class InfoStack
-
- void setTagBalancingListener(final HTMLTagBalancingListener tagBalancingListener) {
- this.tagBalancingListener = tagBalancingListener;
- }
-
- /**
- * Notifies the tagBalancingListener (if any) of an ignored start element
- */
- private void notifyDiscardedStartElement(final QName elem, final XMLAttributes attrs,
- final Augmentations augs) {
- if (tagBalancingListener != null)
- tagBalancingListener.ignoredStartElement(elem, attrs, augs);
- }
-
- /**
- * Notifies the tagBalancingListener (if any) of an ignored end element
- */
- private void notifyDiscardedEndElement(final QName element, final Augmentations augs) {
- if (tagBalancingListener != null)
- tagBalancingListener.ignoredEndElement(element, augs);
- }
-
- /**
- * Structure to hold information about an element placed in buffer to be comsumed later
- */
- static class ElementEntry {
- private final QName name_;
- private final Augmentations augs_;
- ElementEntry(final QName element, final Augmentations augs) {
- name_ = new QName(element);
- augs_ = (augs == null) ? null : new HTMLAugmentations(augs);
- }
- }
-} // class HTMLTagBalancer
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/src-openeuler/boilerpipe.git
git@gitee.com:src-openeuler/boilerpipe.git
src-openeuler
boilerpipe
boilerpipe
openEuler-24.09

搜索帮助