AUGMENTATIONS
protected static final String AUGMENTATIONS
Include infoset augmentations.
CDATA_SECTIONS
public static final String CDATA_SECTIONS
Scan CDATA sections.
DEBUG_CALLBACKS
protected static final boolean DEBUG_CALLBACKS
Set to true to debug callbacks.
DEFAULT_BUFFER_SIZE
protected static final int DEFAULT_BUFFER_SIZE
Default buffer size.
DEFAULT_ENCODING
protected static final String DEFAULT_ENCODING
Default encoding.
DOCTYPE_PUBID
protected static final String DOCTYPE_PUBID
Doctype declaration public identifier.
DOCTYPE_SYSID
protected static final String DOCTYPE_SYSID
Doctype declaration system identifier.
ERROR_REPORTER
protected static final String ERROR_REPORTER
Error reporter.
FIX_MSWINDOWS_REFS
public static final String FIX_MSWINDOWS_REFS
Fix Microsoft Windows® character entity references.
HTML_4_01_FRAMESET_PUBID
public static final String HTML_4_01_FRAMESET_PUBID
HTML 4.01 frameset public identifier ("-//W3C//DTD HTML 4.01 Frameset//EN").
HTML_4_01_FRAMESET_SYSID
public static final String HTML_4_01_FRAMESET_SYSID
HTML 4.01 frameset system identifier ("http://www.w3.org/TR/html4/frameset.dtd").
HTML_4_01_STRICT_PUBID
public static final String HTML_4_01_STRICT_PUBID
HTML 4.01 strict public identifier ("-//W3C//DTD HTML 4.01//EN").
HTML_4_01_STRICT_SYSID
public static final String HTML_4_01_STRICT_SYSID
HTML 4.01 strict system identifier ("http://www.w3.org/TR/html4/strict.dtd").
HTML_4_01_TRANSITIONAL_PUBID
public static final String HTML_4_01_TRANSITIONAL_PUBID
HTML 4.01 transitional public identifier ("-//W3C//DTD HTML 4.01 Transitional//EN").
HTML_4_01_TRANSITIONAL_SYSID
public static final String HTML_4_01_TRANSITIONAL_SYSID
HTML 4.01 transitional system identifier ("http://www.w3.org/TR/html4/loose.dtd").
IGNORE_SPECIFIED_CHARSET
public static final String IGNORE_SPECIFIED_CHARSET
Ignore specified charset found in the <meta equiv='Content-Type'
content='text/html;charset=…'> tag.
INSERT_DOCTYPE
public static final String INSERT_DOCTYPE
Insert document type declaration.
NAMES_ATTRS
protected static final String NAMES_ATTRS
Modify HTML attribute names: { "upper", "lower", "default" }.
NAMES_ELEMS
protected static final String NAMES_ELEMS
Modify HTML element names: { "upper", "lower", "default" }.
NAMES_LOWERCASE
protected static final short NAMES_LOWERCASE
Lowercase HTML names.
NAMES_NO_CHANGE
protected static final short NAMES_NO_CHANGE
Don't modify HTML names.
NAMES_UPPERCASE
protected static final short NAMES_UPPERCASE
Uppercase HTML names.
NOTIFY_CHAR_REFS
public static final String NOTIFY_CHAR_REFS
Notify character entity references (e.g.  ,  , etc).
NOTIFY_HTML_BUILTIN_REFS
public static final String NOTIFY_HTML_BUILTIN_REFS
Notify handler of built-in entity references (e.g. &nobr;,
©, etc).
Note:
This
includes the five pre-defined XML general entities.
NOTIFY_XML_BUILTIN_REFS
public static final String NOTIFY_XML_BUILTIN_REFS
Notify handler of built-in entity references (e.g. &,
<, etc).
Note:
This only applies to the five pre-defined XML general entities.
Specifically, "amp", "lt", "gt", "quot", and "apos". This is done
for compatibility with the Xerces feature.
To be notified of the built-in entity references in HTML, set the
http://cyberneko.org/html/features/scanner/notify-builtin-refs
feature to
true
.
OVERRIDE_DOCTYPE
public static final String OVERRIDE_DOCTYPE
Override doctype declaration public and system identifiers.
REPORT_ERRORS
protected static final String REPORT_ERRORS
Report errors.
SCRIPT_STRIP_CDATA_DELIMS
public static final String SCRIPT_STRIP_CDATA_DELIMS
Strip XHTML CDATA delimiters ("<![CDATA[" and "]]>") from
SCRIPT tag contents.
SCRIPT_STRIP_COMMENT_DELIMS
public static final String SCRIPT_STRIP_COMMENT_DELIMS
Strip HTML comment delimiters ("<!−−" and
"−−>") from SCRIPT tag contents.
STATE_CONTENT
protected static final short STATE_CONTENT
State: content.
STATE_END_DOCUMENT
protected static final short STATE_END_DOCUMENT
State: end document.
STATE_MARKUP_BRACKET
protected static final short STATE_MARKUP_BRACKET
State: markup bracket.
STATE_START_DOCUMENT
protected static final short STATE_START_DOCUMENT
State: start document.
STYLE_STRIP_CDATA_DELIMS
public static final String STYLE_STRIP_CDATA_DELIMS
Strip XHTML CDATA delimiters ("<![CDATA[" and "]]>") from
STYLE tag contents.
STYLE_STRIP_COMMENT_DELIMS
public static final String STYLE_STRIP_COMMENT_DELIMS
Strip HTML comment delimiters ("<!−−" and
"−−>") from STYLE tag contents.
SYNTHESIZED_ITEM
protected static final HTMLEventInfo SYNTHESIZED_ITEM
Synthesized event info item.
fAugmentations
protected boolean fAugmentations
Augmentations.
fBeginColumnNumber
protected int fBeginColumnNumber
Beginning column number.
fBeginLineNumber
protected int fBeginLineNumber
Beginning line number.
fCDATASections
protected boolean fCDATASections
CDATA sections.
fCurrentEntityStack
protected final Stack fCurrentEntityStack
The current entity stack.
fDefaultIANAEncoding
protected String fDefaultIANAEncoding
Default encoding.
fDoctypePubid
protected String fDoctypePubid
Doctype declaration public identifier.
fDoctypeSysid
protected String fDoctypeSysid
Doctype declaration system identifier.
fDocumentHandler
protected XMLDocumentHandler fDocumentHandler
The document handler.
fElementCount
protected int fElementCount
Element count.
fElementDepth
protected int fElementDepth
Element depth.
fEndColumnNumber
protected int fEndColumnNumber
Ending column number.
fEndLineNumber
protected int fEndLineNumber
Ending line number.
fFixWindowsCharRefs
protected boolean fFixWindowsCharRefs
Fix Microsoft Windows® character entity references.
fIANAEncoding
protected String fIANAEncoding
Auto-detected IANA encoding.
fIgnoreSpecifiedCharset
protected boolean fIgnoreSpecifiedCharset
Ignore specified character set.
fInsertDoctype
protected boolean fInsertDoctype
Insert document type declaration.
fIso8859Encoding
protected boolean fIso8859Encoding
True if the encoding matches "ISO-8859-*".
fJavaEncoding
protected String fJavaEncoding
Auto-detected Java encoding.
fNamesAttrs
protected short fNamesAttrs
Modify HTML attribute names.
fNamesElems
protected short fNamesElems
Modify HTML element names.
fNotifyCharRefs
protected boolean fNotifyCharRefs
Notify character entity references.
fNotifyHtmlBuiltinRefs
protected boolean fNotifyHtmlBuiltinRefs
Notify HTML built-in general entity references.
fNotifyXmlBuiltinRefs
protected boolean fNotifyXmlBuiltinRefs
Notify XML built-in general entity references.
fOverrideDoctype
protected boolean fOverrideDoctype
Override doctype declaration public and system identifiers.
fReportErrors
protected boolean fReportErrors
Report errors.
fScannerState
protected short fScannerState
The current scanner state.
fScriptStripCDATADelims
protected boolean fScriptStripCDATADelims
Strip CDATA delimiters from SCRIPT tags.
fScriptStripCommentDelims
protected boolean fScriptStripCommentDelims
Strip comment delimiters from SCRIPT tags.
fSpecialScanner
protected HTMLScanner.SpecialScanner fSpecialScanner
Special scanner used for elements whose content needs to be scanned
as plain text, ignoring markup such as elements and entity references.
For example: <SCRIPT> and <COMMENT>.
fString
protected final XMLString fString
String.
fStringBuffer
protected final XMLStringBuffer fStringBuffer
String buffer.
fStyleStripCDATADelims
protected boolean fStyleStripCDATADelims
Strip CDATA delimiters from STYLE tags.
fStyleStripCommentDelims
protected boolean fStyleStripCommentDelims
Strip comment delimiters from STYLE tags.
builtinXmlRef
protected static boolean builtinXmlRef(String name)
Returns true if the name is a built-in XML general entity reference.
cleanup
public void cleanup(boolean closeall)
Cleans up used resources. For example, if scanning is terminated
early, then this method ensures all remaining open streams are
closed.
closeall
- Close all streams, including the original.
This is used in cases when the application has
opened the original document stream and should
be responsible for closing it.
expandSystemId
public static String expandSystemId(String systemId,
String baseSystemId)
Expands a system id and returns the system id as a URI, if
it can be expanded. A return value of null means that the
identifier is already expanded. An exception thrown
indicates a failure to expand the id.
systemId
- The systemId to be expanded.
- Returns the URI string representing the expanded system
identifier. A null value indicates that the given
system identifier is already expanded.
fixURI
protected static String fixURI(String str)
Fixes a platform dependent filename to standard URI form.
- Returns the fixed URI string.
fixWindowsCharacter
protected int fixWindowsCharacter(int origChar)
getBaseSystemId
public String getBaseSystemId()
Returns the base system identifier.
getCharacterOffset
public int getCharacterOffset()
Returns the current character offset.
getColumnNumber
public int getColumnNumber()
Returns the current column number.
getDocumentHandler
public XMLDocumentHandler getDocumentHandler()
Returns the document handler.
getEncoding
public String getEncoding()
Returns the encoding.
getExpandedSystemId
public String getExpandedSystemId()
Returns the expanded system identifier.
getFeatureDefault
public Boolean getFeatureDefault(String featureId)
Returns the default state for a feature.
- getFeatureDefault in interface HTMLComponent
getLineNumber
public int getLineNumber()
Returns the current line number.
getLiteralSystemId
public String getLiteralSystemId()
Returns the literal system identifier.
getNamesValue
protected static final short getNamesValue(String value)
Converts HTML names string value to constant value.
getPropertyDefault
public Object getPropertyDefault(String propertyId)
Returns the default state for a property.
- getPropertyDefault in interface HTMLComponent
getPublicId
public String getPublicId()
Returns the public identifier.
getRecognizedFeatures
public String[] getRecognizedFeatures()
Returns recognized features.
getRecognizedProperties
public String[] getRecognizedProperties()
Returns recognized properties.
getValue
protected static String getValue(XMLAttributes attrs,
String aname)
Returns the value of the specified attribute, ignoring case.
getXMLVersion
public String getXMLVersion()
Returns the XML version.
load
protected int load(int offset)
throws IOException
Loads a new chunk of data into the buffer and returns the number of
characters loaded or -1 if no additional characters were loaded.
offset
- The offset at which new characters should be loaded.
locationAugs
protected final Augmentations locationAugs()
Returns an augmentations object with a location item added.
modifyName
protected static final String modifyName(String name,
short mode)
Modifies the given name based on the specified mode.
pushInputSource
public void pushInputSource(XMLInputSource inputSource)
Pushes an input source onto the current entity stack. This
enables the scanner to transparently scan new content (e.g.
the output written by an embedded script). At the end of the
current entity, the scanner returns where it left off at the
time this entity source was pushed.
Note:
This functionality is experimental at this time and is
subject to change in future releases of NekoHTML.
inputSource
- The new input source to start scanning.
read
protected int read()
throws IOException
Reads a single character.
reset
public void reset(XMLComponentManager manager)
throws XMLConfigurationException
Resets the component.
resourceId
protected final XMLResourceIdentifier resourceId()
Returns an empty resource identifier.
scanDoctype
protected void scanDoctype()
throws IOException
Scans a DOCTYPE line.
scanDocument
public boolean scanDocument(boolean complete)
throws XNIException,
IOException
Scans the document.
scanEntityRef
protected int scanEntityRef(XMLStringBuffer str,
boolean content)
throws IOException
Scans an entity reference.
scanLiteral
protected String scanLiteral()
throws IOException
Scans a quoted literal.
scanName
protected String scanName()
throws IOException
Scans a name.
setDocumentHandler
public void setDocumentHandler(XMLDocumentHandler handler)
Sets the document handler.
setFeature
public void setFeature(String featureId,
boolean state)
throws XMLConfigurationException
Sets a feature.
setInputSource
public void setInputSource(XMLInputSource source)
throws IOException
Sets the input source.
setProperty
public void setProperty(String propertyId,
Object value)
throws XMLConfigurationException
Sets a property.
setScannerState
protected void setScannerState(short state)
Sets the scanner state.
skip
protected boolean skip(String s,
boolean caseSensitive)
throws IOException
Returns true if the specified text is present and is skipped.
skipMarkup
protected boolean skipMarkup(boolean balance)
throws IOException
Skips markup.
skipNewlines
protected int skipNewlines()
throws IOException
Skips newlines and returns the number of newlines skipped.
skipNewlines
protected int skipNewlines(int maxlines)
throws IOException
Skips newlines and returns the number of newlines skipped.
skipSpaces
protected boolean skipSpaces()
throws IOException
Skips whitespace.
synthesizedAugs
protected final Augmentations synthesizedAugs()
Returns an augmentations object with a synthesized item added.