org.apache.lucene.demo.html
Class HTMLParser

java.lang.Object
  extended by org.apache.lucene.demo.html.HTMLParser
All Implemented Interfaces:
HTMLParserConstants

public class HTMLParser
extends java.lang.Object
implements HTMLParserConstants


Nested Class Summary
(package private) static class HTMLParser.JJCalls
           
private static class HTMLParser.LookaheadSuccess
           
private  class HTMLParser.MyPipedInputStream
           
 
Field Summary
(package private)  boolean afterSpace
           
(package private)  boolean afterTag
           
(package private)  java.lang.String currentMetaContent
           
(package private)  java.lang.String currentMetaTag
           
(package private)  java.lang.String eol
           
(package private)  boolean inMetaTag
           
(package private)  boolean inStyle
           
(package private)  boolean inTitle
           
private  HTMLParser.JJCalls[] jj_2_rtns
           
private  int jj_endpos
           
private  java.util.Vector jj_expentries
           
private  int[] jj_expentry
           
private  int jj_gc
           
private  int jj_gen
           
(package private)  SimpleCharStream jj_input_stream
           
private  int jj_kind
           
private  int jj_la
           
private  int[] jj_la1
           
private static int[] jj_la1_0
           
private  Token jj_lastpos
           
private  int[] jj_lasttokens
           
private  HTMLParser.LookaheadSuccess jj_ls
           
 Token jj_nt
           
private  int jj_ntk
           
private  boolean jj_rescan
           
private  Token jj_scanpos
           
private  boolean jj_semLA
           
(package private)  int length
           
 boolean lookingAhead
           
(package private)  java.util.Properties metaTags
           
(package private)  java.io.Reader pipeIn
           
private  HTMLParser.MyPipedInputStream pipeInStream
           
(package private)  java.io.Writer pipeOut
           
private  java.io.PipedOutputStream pipeOutStream
           
(package private)  java.lang.StringBuffer summary
           
static int SUMMARY_LENGTH
           
(package private)  java.lang.StringBuffer title
           
(package private)  boolean titleComplete
           
 Token token
           
 HTMLParserTokenManager token_source
           
 
Fields inherited from interface org.apache.lucene.demo.html.HTMLParserConstants
AfterEquals, ArgEquals, ArgName, ArgQuote1, ArgQuote2, ArgValue, CloseQuote1, CloseQuote2, Comment1, Comment2, CommentEnd1, CommentEnd2, CommentText1, CommentText2, DeclName, DEFAULT, Entity, EOF, LET, NUM, Punct, Quote1Text, Quote2Text, ScriptEnd, ScriptStart, ScriptText, SP, Space, TagEnd, TagName, tokenImage, WithinComment1, WithinComment2, WithinQuote1, WithinQuote2, WithinScript, WithinTag, Word
 
Constructor Summary
HTMLParser(java.io.File file)
           
HTMLParser(HTMLParserTokenManager tm)
           
HTMLParser(java.io.InputStream stream)
           
HTMLParser(java.io.Reader stream)
           
 
Method Summary
(package private)  void addMetaTag()
           
(package private)  void addSpace()
           
(package private)  void addText(java.lang.String text)
           
(package private)  void addToSummary(java.lang.String text)
           
 Token ArgValue()
           
 void CommentTag()
           
 Token Decl()
           
 void disable_tracing()
           
 void enable_tracing()
           
 ParseException generateParseException()
           
 java.util.Properties getMetaTags()
           
 Token getNextToken()
           
 java.io.Reader getReader()
           
 java.lang.String getSummary()
           
 java.lang.String getTitle()
           
 Token getToken(int index)
           
 void HTMLDocument()
           
private  boolean jj_2_1(int xla)
           
private  boolean jj_2_2(int xla)
           
private  boolean jj_3_1()
           
private  boolean jj_3_2()
           
private  void jj_add_error_token(int kind, int pos)
           
private  Token jj_consume_token(int kind)
           
private static void jj_la1_0()
           
private  int jj_ntk()
           
private  void jj_rescan_token()
           
private  void jj_save(int index, int xla)
           
private  boolean jj_scan_token(int kind)
           
 void ReInit(HTMLParserTokenManager tm)
           
 void ReInit(java.io.InputStream stream)
           
 void ReInit(java.io.Reader stream)
           
 void ScriptTag()
           
 void Tag()
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

SUMMARY_LENGTH

public static int SUMMARY_LENGTH

title

java.lang.StringBuffer title

summary

java.lang.StringBuffer summary

metaTags

java.util.Properties metaTags

currentMetaTag

java.lang.String currentMetaTag

currentMetaContent

java.lang.String currentMetaContent

length

int length

titleComplete

boolean titleComplete

inTitle

boolean inTitle

inMetaTag

boolean inMetaTag

inStyle

boolean inStyle

afterTag

boolean afterTag

afterSpace

boolean afterSpace

eol

java.lang.String eol

pipeIn

java.io.Reader pipeIn

pipeOut

java.io.Writer pipeOut

pipeInStream

private HTMLParser.MyPipedInputStream pipeInStream

pipeOutStream

private java.io.PipedOutputStream pipeOutStream

token_source

public HTMLParserTokenManager token_source

jj_input_stream

SimpleCharStream jj_input_stream

token

public Token token

jj_nt

public Token jj_nt

jj_ntk

private int jj_ntk

jj_scanpos

private Token jj_scanpos

jj_lastpos

private Token jj_lastpos

jj_la

private int jj_la

lookingAhead

public boolean lookingAhead

jj_semLA

private boolean jj_semLA

jj_gen

private int jj_gen

jj_la1

private final int[] jj_la1

jj_la1_0

private static int[] jj_la1_0

jj_2_rtns

private final HTMLParser.JJCalls[] jj_2_rtns

jj_rescan

private boolean jj_rescan

jj_gc

private int jj_gc

jj_ls

private final HTMLParser.LookaheadSuccess jj_ls

jj_expentries

private java.util.Vector jj_expentries

jj_expentry

private int[] jj_expentry

jj_kind

private int jj_kind

jj_lasttokens

private int[] jj_lasttokens

jj_endpos

private int jj_endpos
Constructor Detail

HTMLParser

public HTMLParser(java.io.File file)
           throws java.io.FileNotFoundException
Throws:
java.io.FileNotFoundException

HTMLParser

public HTMLParser(java.io.InputStream stream)

HTMLParser

public HTMLParser(java.io.Reader stream)

HTMLParser

public HTMLParser(HTMLParserTokenManager tm)
Method Detail

getTitle

public java.lang.String getTitle()
                          throws java.io.IOException,
                                 java.lang.InterruptedException
Throws:
java.io.IOException
java.lang.InterruptedException

getMetaTags

public java.util.Properties getMetaTags()
                                 throws java.io.IOException,
                                        java.lang.InterruptedException
Throws:
java.io.IOException
java.lang.InterruptedException

getSummary

public java.lang.String getSummary()
                            throws java.io.IOException,
                                   java.lang.InterruptedException
Throws:
java.io.IOException
java.lang.InterruptedException

getReader

public java.io.Reader getReader()
                         throws java.io.IOException
Throws:
java.io.IOException

addToSummary

void addToSummary(java.lang.String text)

addText

void addText(java.lang.String text)
       throws java.io.IOException
Throws:
java.io.IOException

addMetaTag

void addMetaTag()
          throws java.io.IOException
Throws:
java.io.IOException

addSpace

void addSpace()
        throws java.io.IOException
Throws:
java.io.IOException

HTMLDocument

public final void HTMLDocument()
                        throws ParseException,
                               java.io.IOException
Throws:
ParseException
java.io.IOException

Tag

public final void Tag()
               throws ParseException,
                      java.io.IOException
Throws:
ParseException
java.io.IOException

ArgValue

public final Token ArgValue()
                     throws ParseException
Throws:
ParseException

Decl

public final Token Decl()
                 throws ParseException
Throws:
ParseException

CommentTag

public final void CommentTag()
                      throws ParseException
Throws:
ParseException

ScriptTag

public final void ScriptTag()
                     throws ParseException
Throws:
ParseException

jj_2_1

private final boolean jj_2_1(int xla)

jj_2_2

private final boolean jj_2_2(int xla)

jj_3_1

private final boolean jj_3_1()

jj_3_2

private final boolean jj_3_2()

jj_la1_0

private static void jj_la1_0()

ReInit

public void ReInit(java.io.InputStream stream)

ReInit

public void ReInit(java.io.Reader stream)

ReInit

public void ReInit(HTMLParserTokenManager tm)

jj_consume_token

private final Token jj_consume_token(int kind)
                              throws ParseException
Throws:
ParseException

jj_scan_token

private final boolean jj_scan_token(int kind)

getNextToken

public final Token getNextToken()

getToken

public final Token getToken(int index)

jj_ntk

private final int jj_ntk()

jj_add_error_token

private void jj_add_error_token(int kind,
                                int pos)

generateParseException

public ParseException generateParseException()

enable_tracing

public final void enable_tracing()

disable_tracing

public final void disable_tracing()

jj_rescan_token

private final void jj_rescan_token()

jj_save

private final void jj_save(int index,
                           int xla)