http://swpat.ffii.org/
Action against software patents
http://www.gnome.org/
Gnome2 Logo
http://www.w3.org/Status
W3C Logo
http://www.redhat.com/
Red Hat Logo
http://xmlsoft.org/
Made with Libxml2 Logo
Module HTMLparser from libxml2
API Menu
../index.html
Main Menu
../docs.html
Developer Menu
../examples/index.html
Code Examples
index.html
API Menu
libxml-parser.html
Parser API
libxml-tree.html
Tree API
libxml-xmlreader.html
Reader API
../guidelines.html
XML Guidelines
../ChangeLog.html
ChangeLog
API Indexes
../APIchunk0.html
Alphabetic
../APIconstructors.html
Constructors
../APIfunctions.html
Functions/Types
../APIfiles.html
Modules
../APIsymbols.html
Symbols
Related links
http://mail.gnome.org/archives/xml/
Mail archive
http://xmlsoft.org/XSLT/
XSLT libxslt
http://phd.cs.unibo.it/gdome2/
DOM gdome2
http://www.aleksey.com/xmlsec/
XML-DSig xmlsec
ftp://xmlsoft.org/
FTP
http://www.zlatkovic.com/projects/libxml/
Windows binaries
http://www.blastwave.org/packages.php/libxml2
Solaris binaries
http://www.explain.com.au/oss/libxml2xslt.html
MacOsX binaries
http://libxmlplusplus.sourceforge.net/
C++ bindings
http://www.zend.com/php5/articles/php5-xmlphp.php#Heading4
PHP bindings
http://sourceforge.net/projects/libxml2-pas/
Pascal bindings
http://rubyforge.org/projects/xml-tools/
Ruby bindings
http://tclxml.sourceforge.net/
Tcl bindings
http://bugzilla.gnome.org/buglist.cgi?product=libxml2
Bug Tracker
libxml-DOCBparser.html
Prev
libxml-DOCBparser.html
DOCBparser
index.html
Up
index.html
API documentation
../index.html
Home
../index.html
The XML C parser and toolkit of Gnome
libxml-HTMLtree.html
HTMLtree
libxml-HTMLtree.html
Next
this module implements an HTML 4.0 non-verifying parser with API compatible with the XML parser ones. It should be able to parse "real world" HTML, even if severely broken from a specification point of view.
Table of Contents
#define
#htmlDefaultSubelement
htmlDefaultSubelement
#define
#htmlElementAllowedHereDesc
htmlElementAllowedHereDesc
#define
#htmlRequiredAttrs
htmlRequiredAttrs
Typedef
libxml-tree.html#xmlDocPtr
xmlDocPtr
htmlDocPtr
Structure
#htmlElemDesc
htmlElemDesc
struct _htmlElemDesc
Typedef
libxml-HTMLparser.html#htmlElemDesc
htmlElemDesc
*
htmlElemDescPtr
Structure
#htmlEntityDesc
htmlEntityDesc
struct _htmlEntityDesc
Typedef
libxml-HTMLparser.html#htmlEntityDesc
htmlEntityDesc
*
htmlEntityDescPtr
Typedef
libxml-tree.html#xmlNodePtr
xmlNodePtr
htmlNodePtr
Typedef
libxml-tree.html#xmlParserCtxt
xmlParserCtxt
htmlParserCtxt
Typedef
libxml-tree.html#xmlParserCtxtPtr
xmlParserCtxtPtr
htmlParserCtxtPtr
Typedef
libxml-tree.html#xmlParserInput
xmlParserInput
htmlParserInput
Typedef
libxml-tree.html#xmlParserInputPtr
xmlParserInputPtr
htmlParserInputPtr
Typedef
libxml-parser.html#xmlParserNodeInfo
xmlParserNodeInfo
htmlParserNodeInfo
Enum
#htmlParserOption
htmlParserOption
Typedef
libxml-tree.html#xmlSAXHandler
xmlSAXHandler
htmlSAXHandler
Typedef
libxml-tree.html#xmlSAXHandlerPtr
xmlSAXHandlerPtr
htmlSAXHandlerPtr
Enum
#htmlStatus
htmlStatus
int
#UTF8ToHtml
UTF8ToHtml
(unsigned char * out,
int * outlen,
const unsigned char * in,
int * inlen)
libxml-HTMLparser.html#htmlStatus
htmlStatus
#htmlAttrAllowed
htmlAttrAllowed
(const
libxml-HTMLparser.html#htmlElemDesc
htmlElemDesc
* elt,
const
libxml-xmlstring.html#xmlChar
xmlChar
* attr,
int legacy)
int
#htmlAutoCloseTag
htmlAutoCloseTag
(
libxml-HTMLparser.html#htmlDocPtr
htmlDocPtr
doc,
const
libxml-xmlstring.html#xmlChar
xmlChar
* name,
libxml-HTMLparser.html#htmlNodePtr
htmlNodePtr
elem)
libxml-HTMLparser.html#htmlParserCtxtPtr
htmlParserCtxtPtr
#htmlCreateMemoryParserCtxt
htmlCreateMemoryParserCtxt
(const char * buffer,
int size)
libxml-HTMLparser.html#htmlParserCtxtPtr
htmlParserCtxtPtr
#htmlCreatePushParserCtxt
htmlCreatePushParserCtxt
(
libxml-HTMLparser.html#htmlSAXHandlerPtr
htmlSAXHandlerPtr
sax,
void * user_data,
const char * chunk,
int size,
const char * filename,
libxml-encoding.html#xmlCharEncoding
xmlCharEncoding
enc)
libxml-HTMLparser.html#htmlDocPtr
htmlDocPtr
#htmlCtxtReadDoc
htmlCtxtReadDoc
(
libxml-HTMLparser.html#htmlParserCtxtPtr
htmlParserCtxtPtr
ctxt,
const
libxml-xmlstring.html#xmlChar
xmlChar
* cur,
const char * URL,
const char * encoding,
int options)
libxml-HTMLparser.html#htmlDocPtr
htmlDocPtr
#htmlCtxtReadFd
htmlCtxtReadFd
(
libxml-HTMLparser.html#htmlParserCtxtPtr
htmlParserCtxtPtr
ctxt,
int fd,
const char * URL,
const char * encoding,
int options)
libxml-HTMLparser.html#htmlDocPtr
htmlDocPtr
#htmlCtxtReadFile
htmlCtxtReadFile
(
libxml-HTMLparser.html#htmlParserCtxtPtr
htmlParserCtxtPtr
ctxt,
const char * filename,
const char * encoding,
int options)
libxml-HTMLparser.html#htmlDocPtr
htmlDocPtr
#htmlCtxtReadIO
htmlCtxtReadIO
(
libxml-HTMLparser.html#htmlParserCtxtPtr
htmlParserCtxtPtr
ctxt,
libxml-xmlIO.html#xmlInputReadCallback
xmlInputReadCallback
ioread,
libxml-xmlIO.html#xmlInputCloseCallback
xmlInputCloseCallback
ioclose,
void * ioctx,
const char * URL,
const char * encoding,
int options)
libxml-HTMLparser.html#htmlDocPtr
htmlDocPtr
#htmlCtxtReadMemory
htmlCtxtReadMemory
(
libxml-HTMLparser.html#htmlParserCtxtPtr
htmlParserCtxtPtr
ctxt,
const char * buffer,
int size,
const char * URL,
const char * encoding,
int options)
void
#htmlCtxtReset
htmlCtxtReset
(
libxml-HTMLparser.html#htmlParserCtxtPtr
htmlParserCtxtPtr
ctxt)
int
#htmlCtxtUseOptions
htmlCtxtUseOptions
(
libxml-HTMLparser.html#htmlParserCtxtPtr
htmlParserCtxtPtr
ctxt,
int options)
int
#htmlElementAllowedHere
htmlElementAllowedHere
(const
libxml-HTMLparser.html#htmlElemDesc
htmlElemDesc
* parent,
const
libxml-xmlstring.html#xmlChar
xmlChar
* elt)
libxml-HTMLparser.html#htmlStatus
htmlStatus
#htmlElementStatusHere
htmlElementStatusHere
(const
libxml-HTMLparser.html#htmlElemDesc
htmlElemDesc
* parent,
const
libxml-HTMLparser.html#htmlElemDesc
htmlElemDesc
* elt)
int
#htmlEncodeEntities
htmlEncodeEntities
(unsigned char * out,
int * outlen,
const unsigned char * in,
int * inlen,
int quoteChar)
const
libxml-HTMLparser.html#htmlEntityDesc
htmlEntityDesc
*
#htmlEntityLookup
htmlEntityLookup
(const
libxml-xmlstring.html#xmlChar
xmlChar
* name)
const
libxml-HTMLparser.html#htmlEntityDesc
htmlEntityDesc
*
#htmlEntityValueLookup
htmlEntityValueLookup
(unsigned int value)
void
#htmlFreeParserCtxt
htmlFreeParserCtxt
(
libxml-HTMLparser.html#htmlParserCtxtPtr
htmlParserCtxtPtr
ctxt)
int
#htmlHandleOmittedElem
htmlHandleOmittedElem
(int val)
int
#htmlIsAutoClosed
htmlIsAutoClosed
(
libxml-HTMLparser.html#htmlDocPtr
htmlDocPtr
doc,
libxml-HTMLparser.html#htmlNodePtr
htmlNodePtr
elem)
int
#htmlIsScriptAttribute
htmlIsScriptAttribute
(const
libxml-xmlstring.html#xmlChar
xmlChar
* name)
libxml-HTMLparser.html#htmlStatus
htmlStatus
#htmlNodeStatus
htmlNodeStatus
(const
libxml-HTMLparser.html#htmlNodePtr
htmlNodePtr
node,
int legacy)
int
#htmlParseCharRef
htmlParseCharRef
(
libxml-HTMLparser.html#htmlParserCtxtPtr
htmlParserCtxtPtr
ctxt)
int
#htmlParseChunk
htmlParseChunk
(
libxml-HTMLparser.html#htmlParserCtxtPtr
htmlParserCtxtPtr
ctxt,
const char * chunk,
int size,
int terminate)
libxml-HTMLparser.html#htmlDocPtr
htmlDocPtr
#htmlParseDoc
htmlParseDoc
(
libxml-xmlstring.html#xmlChar
xmlChar
* cur,
const char * encoding)
int
#htmlParseDocument
htmlParseDocument
(
libxml-HTMLparser.html#htmlParserCtxtPtr
htmlParserCtxtPtr
ctxt)
void
#htmlParseElement
htmlParseElement
(
libxml-HTMLparser.html#htmlParserCtxtPtr
htmlParserCtxtPtr
ctxt)
const
libxml-HTMLparser.html#htmlEntityDesc
htmlEntityDesc
*
#htmlParseEntityRef
htmlParseEntityRef
(
libxml-HTMLparser.html#htmlParserCtxtPtr
htmlParserCtxtPtr
ctxt,
const
libxml-xmlstring.html#xmlChar
xmlChar
** str)
libxml-HTMLparser.html#htmlDocPtr
htmlDocPtr
#htmlParseFile
htmlParseFile
(const char * filename,
const char * encoding)
libxml-HTMLparser.html#htmlDocPtr
htmlDocPtr
#htmlReadDoc
htmlReadDoc
(const
libxml-xmlstring.html#xmlChar
xmlChar
* cur,
const char * URL,
const char * encoding,
int options)
libxml-HTMLparser.html#htmlDocPtr
htmlDocPtr
#htmlReadFd
htmlReadFd
(int fd,
const char * URL,
const char * encoding,
int options)
libxml-HTMLparser.html#htmlDocPtr
htmlDocPtr
#htmlReadFile
htmlReadFile
(const char * filename,
const char * encoding,
int options)
libxml-HTMLparser.html#htmlDocPtr
htmlDocPtr
#htmlReadIO
htmlReadIO
(
libxml-xmlIO.html#xmlInputReadCallback
xmlInputReadCallback
ioread,
libxml-xmlIO.html#xmlInputCloseCallback
xmlInputCloseCallback
ioclose,
void * ioctx,
const char * URL,
const char * encoding,
int options)
libxml-HTMLparser.html#htmlDocPtr
htmlDocPtr
#htmlReadMemory
htmlReadMemory
(const char * buffer,
int size,
const char * URL,
const char * encoding,
int options)
libxml-HTMLparser.html#htmlDocPtr
htmlDocPtr
#htmlSAXParseDoc
htmlSAXParseDoc
(
libxml-xmlstring.html#xmlChar
xmlChar
* cur,
const char * encoding,
libxml-HTMLparser.html#htmlSAXHandlerPtr
htmlSAXHandlerPtr
sax,
void * userData)
libxml-HTMLparser.html#htmlDocPtr
htmlDocPtr
#htmlSAXParseFile
htmlSAXParseFile
(const char * filename,
const char * encoding,
libxml-HTMLparser.html#htmlSAXHandlerPtr
htmlSAXHandlerPtr
sax,
void * userData)
const
libxml-HTMLparser.html#htmlElemDesc
htmlElemDesc
*
#htmlTagLookup
htmlTagLookup
(const
libxml-xmlstring.html#xmlChar
xmlChar
* tag)
Description
Macro: htmlDefaultSubelement
#define htmlDefaultSubelement
Returns the default subelement for this element
Macro: htmlElementAllowedHereDesc
#define htmlElementAllowedHereDesc
Checks whether an HTML element description may be a direct child of the specified element. Returns 1 if allowed; 0 otherwise.
Macro: htmlRequiredAttrs
#define htmlRequiredAttrs
Returns the attributes required for the specified element.
Structure htmlElemDesc
Structure htmlElemDesc
struct _htmlElemDesc {
const char *	name	: The tag name
char	startTag	: Whether the start tag can be implied
char	endTag	: Whether the end tag can be implied
char	saveEndTag	: Whether the end tag should be saved
char	empty	: Is this an empty element ?
char	depr	: Is this a deprecated element ?
char	dtd	: 1: only in Loose DTD, 2: only Frameset
char	isinline	: is this a block 0 or inline 1 element
const char *	desc	: the description NRK Jan.2003 * New fiel
const char **	subelts	: allowed sub-elements of this element
const char *	defaultsubelt	: subelement for suggested auto-repair if
const char **	attrs_opt	: Optional Attributes
const char **	attrs_depr	: Additional deprecated attributes
const char **	attrs_req	: Required attributes
}
Structure htmlEntityDesc
Structure htmlEntityDesc
struct _htmlEntityDesc {
unsigned int	value	: the UNICODE value for the character
const char *	name	: The entity name
const char *	desc	: the description
}
Enum
htmlParserOption
Enum htmlParserOption {
HTML_PARSE_NOERROR
= 32 : suppress error reports
HTML_PARSE_NOWARNING
= 64 : suppress warning reports
HTML_PARSE_PEDANTIC
= 128 : pedantic error reporting
HTML_PARSE_NOBLANKS
= 256 : remove blank nodes
HTML_PARSE_NONET
= 2048 : Forbid network access
}
Enum
htmlStatus
Enum htmlStatus {
HTML_NA
= 0 : something we don't check at all
HTML_INVALID
= 1
HTML_DEPRECATED
= 2
HTML_VALID
= 4
HTML_REQUIRED
= 12 : VALID bit set so ( &
libxml-HTMLparser.html#HTML_VALID
HTML_VALID
) is TRUE
}
Function: UTF8ToHtml
int	UTF8ToHtml			(unsigned char * out,
int * outlen,
const unsigned char * in,
int * inlen)
Take a block of UTF-8 chars in and try to convert it to an ASCII plus HTML entities block of chars out.
out
:
a pointer to an array of bytes to store the result
outlen
:
the length of @out
in
:
a pointer to an array of UTF-8 chars
inlen
:
the length of @in
Returns
:
0 if success, -2 if the transcoding fails, or -1 otherwise The value of @inlen after return is the number of octets consumed as the return value is positive, else unpredictable. The value of @outlen after return is the number of octets consumed.
Function: htmlAttrAllowed
libxml-HTMLparser.html#htmlStatus
htmlStatus
htmlAttrAllowed		(const
libxml-HTMLparser.html#htmlElemDesc
htmlElemDesc
* elt,
const
libxml-xmlstring.html#xmlChar
xmlChar
* attr,
int legacy)
Checks whether an
libxml-SAX.html#attribute
attribute
is valid for an element Has full knowledge of Required and Deprecated attributes
elt
:
HTML element
attr
:
HTML
libxml-SAX.html#attribute
attribute
legacy
:
whether to allow deprecated attributes
Returns
:
one of HTML_REQUIRED, HTML_VALID, HTML_DEPRECATED,
libxml-HTMLparser.html#HTML_INVALID
HTML_INVALID
Function: htmlAutoCloseTag
int	htmlAutoCloseTag		(
libxml-HTMLparser.html#htmlDocPtr
htmlDocPtr
doc,
const
libxml-xmlstring.html#xmlChar
xmlChar
* name,
libxml-HTMLparser.html#htmlNodePtr
htmlNodePtr
elem)
The HTML DTD allows a tag to implicitly close other tags. The list is kept in htmlStartClose array. This function checks if the element or one of it's children would autoclose the given tag.
doc
:
the HTML document
name
:
The tag name
elem
:
the HTML element
Returns
:
1 if autoclose, 0 otherwise
Function: htmlCreateMemoryParserCtxt
libxml-HTMLparser.html#htmlParserCtxtPtr
htmlParserCtxtPtr
htmlCreateMemoryParserCtxt	(const char * buffer,
int size)
Create a parser context for an HTML in-memory document.
buffer
:
a pointer to a char array
size
:
the size of the array
Returns
:
the new parser context or NULL
Function: htmlCreatePushParserCtxt
libxml-HTMLparser.html#htmlParserCtxtPtr
htmlParserCtxtPtr
htmlCreatePushParserCtxt	(
libxml-HTMLparser.html#htmlSAXHandlerPtr
htmlSAXHandlerPtr
sax,
void * user_data,
const char * chunk,
int size,
const char * filename,
libxml-encoding.html#xmlCharEncoding
xmlCharEncoding
enc)
Create a parser context for using the HTML parser in push mode The value of @filename is used for fetching external entities and error/warning reports.
sax
:
a SAX handler
user_data
:
The user data returned on SAX callbacks
chunk
:
a pointer to an array of chars
size
:
number of chars in the array
filename
:
an optional file name or URI
enc
:
an optional encoding
Returns
:
the new parser context or NULL
Function: htmlCtxtReadDoc
libxml-HTMLparser.html#htmlDocPtr
htmlDocPtr
htmlCtxtReadDoc		(
libxml-HTMLparser.html#htmlParserCtxtPtr
htmlParserCtxtPtr
ctxt,
const
libxml-xmlstring.html#xmlChar
xmlChar
* cur,
const char * URL,
const char * encoding,
int options)
parse an XML in-memory document and build a tree. This reuses the existing @ctxt parser context
ctxt
:
an HTML parser context
cur
:
a pointer to a zero terminated string
URL
:
the base URL to use for the document
encoding
:
the document encoding, or NULL
options
:
a combination of htmlParserOption(s)
Returns
:
the resulting document tree
Function: htmlCtxtReadFd
libxml-HTMLparser.html#htmlDocPtr
htmlDocPtr
htmlCtxtReadFd		(
libxml-HTMLparser.html#htmlParserCtxtPtr
htmlParserCtxtPtr
ctxt,
int fd,
const char * URL,
const char * encoding,
int options)
parse an XML from a file descriptor and build a tree. This reuses the existing @ctxt parser context
ctxt
:
an HTML parser context
fd
:
an open file descriptor
URL
:
the base URL to use for the document
encoding
:
the document encoding, or NULL
options
:
a combination of htmlParserOption(s)
Returns
:
the resulting document tree
Function: htmlCtxtReadFile
libxml-HTMLparser.html#htmlDocPtr
htmlDocPtr
htmlCtxtReadFile	(
libxml-HTMLparser.html#htmlParserCtxtPtr
htmlParserCtxtPtr
ctxt,
const char * filename,
const char * encoding,
int options)
parse an XML file from the filesystem or the network. This reuses the existing @ctxt parser context
ctxt
:
an HTML parser context
filename
:
a file or URL
encoding
:
the document encoding, or NULL
options
:
a combination of htmlParserOption(s)
Returns
:
the resulting document tree
Function: htmlCtxtReadIO
libxml-HTMLparser.html#htmlDocPtr
htmlDocPtr
htmlCtxtReadIO		(
libxml-HTMLparser.html#htmlParserCtxtPtr
htmlParserCtxtPtr
ctxt,
libxml-xmlIO.html#xmlInputReadCallback
xmlInputReadCallback
ioread,
libxml-xmlIO.html#xmlInputCloseCallback
xmlInputCloseCallback
ioclose,
void * ioctx,
const char * URL,
const char * encoding,
int options)
parse an HTML document from I/O functions and source and build a tree. This reuses the existing @ctxt parser context
ctxt
:
an HTML parser context
ioread
:
an I/O read function
ioclose
:
an I/O close function
ioctx
:
an I/O handler
URL
:
the base URL to use for the document
encoding
:
the document encoding, or NULL
options
:
a combination of htmlParserOption(s)
Returns
:
the resulting document tree
Function: htmlCtxtReadMemory
libxml-HTMLparser.html#htmlDocPtr
htmlDocPtr
htmlCtxtReadMemory	(
libxml-HTMLparser.html#htmlParserCtxtPtr
htmlParserCtxtPtr
ctxt,
const char * buffer,
int size,
const char * URL,
const char * encoding,
int options)
parse an XML in-memory document and build a tree. This reuses the existing @ctxt parser context
ctxt
:
an HTML parser context
buffer
:
a pointer to a char array
size
:
the size of the array
URL
:
the base URL to use for the document
encoding
:
the document encoding, or NULL
options
:
a combination of htmlParserOption(s)
Returns
:
the resulting document tree
Function: htmlCtxtReset
void	htmlCtxtReset			(
libxml-HTMLparser.html#htmlParserCtxtPtr
htmlParserCtxtPtr
ctxt)
Reset a parser context
ctxt
:
an HTML parser context
Function: htmlCtxtUseOptions
int	htmlCtxtUseOptions		(
libxml-HTMLparser.html#htmlParserCtxtPtr
htmlParserCtxtPtr
ctxt,
int options)
Applies the options to the parser context
ctxt
:
an HTML parser context
options
:
a combination of htmlParserOption(s)
Returns
:
0 in case of success, the set of unknown or unimplemented options in case of error.
Function: htmlElementAllowedHere
int	htmlElementAllowedHere		(const
libxml-HTMLparser.html#htmlElemDesc
htmlElemDesc
* parent,
const
libxml-xmlstring.html#xmlChar
xmlChar
* elt)
Checks whether an HTML element may be a direct child of a parent element. Note - doesn't check for deprecated elements
parent
:
HTML parent element
elt
:
HTML element
Returns
:
1 if allowed; 0 otherwise.
Function: htmlElementStatusHere
libxml-HTMLparser.html#htmlStatus
htmlStatus
htmlElementStatusHere	(const
libxml-HTMLparser.html#htmlElemDesc
htmlElemDesc
* parent,
const
libxml-HTMLparser.html#htmlElemDesc
htmlElemDesc
* elt)
Checks whether an HTML element may be a direct child of a parent element. and if so whether it is valid or deprecated.
parent
:
HTML parent element
elt
:
HTML element
Returns
:
one of HTML_VALID, HTML_DEPRECATED,
libxml-HTMLparser.html#HTML_INVALID
HTML_INVALID
Function: htmlEncodeEntities
int	htmlEncodeEntities		(unsigned char * out,
int * outlen,
const unsigned char * in,
int * inlen,
int quoteChar)
Take a block of UTF-8 chars in and try to convert it to an ASCII plus HTML entities block of chars out.
out
:
a pointer to an array of bytes to store the result
outlen
:
the length of @out
in
:
a pointer to an array of UTF-8 chars
inlen
:
the length of @in
quoteChar
:
the quote character to escape (' or ") or zero.
Returns
:
0 if success, -2 if the transcoding fails, or -1 otherwise The value of @inlen after return is the number of octets consumed as the return value is positive, else unpredictable. The value of @outlen after return is the number of octets consumed.
Function: htmlEntityLookup
const
libxml-HTMLparser.html#htmlEntityDesc
htmlEntityDesc
*	htmlEntityLookup	(const
libxml-xmlstring.html#xmlChar
xmlChar
* name)
Lookup the given entity in EntitiesTable TODO: the linear scan is really ugly, an hash table is really needed.
name
:
the entity name
Returns
:
the associated
libxml-HTMLparser.html#htmlEntityDescPtr
htmlEntityDescPtr
if found, NULL otherwise.
Function: htmlEntityValueLookup
const
libxml-HTMLparser.html#htmlEntityDesc
htmlEntityDesc
*	htmlEntityValueLookup	(unsigned int value)
Lookup the given entity in EntitiesTable TODO: the linear scan is really ugly, an hash table is really needed.
value
:
the entity's unicode value
Returns
:
the associated
libxml-HTMLparser.html#htmlEntityDescPtr
htmlEntityDescPtr
if found, NULL otherwise.
Function: htmlFreeParserCtxt
void	htmlFreeParserCtxt		(
libxml-HTMLparser.html#htmlParserCtxtPtr
htmlParserCtxtPtr
ctxt)
Free all the memory used by a parser context. However the parsed document in ctxt->myDoc is not freed.
ctxt
:
an HTML parser context
Function: htmlHandleOmittedElem
int	htmlHandleOmittedElem		(int val)
Set and return the previous value for handling HTML omitted tags.
val
:
int 0 or 1
Returns
:
the last value for 0 for no handling, 1 for auto insertion.
Function: htmlIsAutoClosed
int	htmlIsAutoClosed		(
libxml-HTMLparser.html#htmlDocPtr
htmlDocPtr
doc,
libxml-HTMLparser.html#htmlNodePtr
htmlNodePtr
elem)
The HTML DTD allows a tag to implicitly close other tags. The list is kept in htmlStartClose array. This function checks if a tag is autoclosed by one of it's child
doc
:
the HTML document
elem
:
the HTML element
Returns
:
1 if autoclosed, 0 otherwise
Function: htmlIsScriptAttribute
int	htmlIsScriptAttribute		(const
libxml-xmlstring.html#xmlChar
xmlChar
* name)
Check if an
libxml-SAX.html#attribute
attribute
is of content type Script
name
:
an
libxml-SAX.html#attribute
attribute
name
Returns
:
1 is the
libxml-SAX.html#attribute
attribute
is a script 0 otherwise
Function: htmlNodeStatus
libxml-HTMLparser.html#htmlStatus
htmlStatus
htmlNodeStatus		(const
libxml-HTMLparser.html#htmlNodePtr
htmlNodePtr
node,
int legacy)
Checks whether the tree node is valid. Experimental (the author only uses the HTML enhancements in a SAX parser)
node
:
an
libxml-HTMLparser.html#htmlNodePtr
htmlNodePtr
in a tree
legacy
:
whether to allow deprecated elements (YES is faster here for Element nodes)
Returns
:
for Element nodes, a return from
libxml-HTMLparser.html#htmlElementAllowedHere
htmlElementAllowedHere
(if legacy allowed) or
libxml-HTMLparser.html#htmlElementStatusHere
htmlElementStatusHere
(otherwise). for Attribute nodes, a return from
libxml-HTMLparser.html#htmlAttrAllowed
htmlAttrAllowed
for other nodes,
libxml-HTMLparser.html#HTML_NA
HTML_NA
(no checks performed)
Function: htmlParseCharRef
int	htmlParseCharRef		(
libxml-HTMLparser.html#htmlParserCtxtPtr
htmlParserCtxtPtr
ctxt)
parse Reference declarations [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
ctxt
:
an HTML parser context
Returns
:
the value parsed (as an int)
Function: htmlParseChunk
int	htmlParseChunk			(
libxml-HTMLparser.html#htmlParserCtxtPtr
htmlParserCtxtPtr
ctxt,
const char * chunk,
int size,
int terminate)
Parse a Chunk of memory
ctxt
:
an HTML parser context
chunk
:
an char array
size
:
the size in byte of the chunk
terminate
:
last chunk indicator
Returns
:
zero if no error, the
libxml-xmlerror.html#xmlParserErrors
xmlParserErrors
otherwise.
Function: htmlParseDoc
libxml-HTMLparser.html#htmlDocPtr
htmlDocPtr
htmlParseDoc		(
libxml-xmlstring.html#xmlChar
xmlChar
* cur,
const char * encoding)
parse an HTML in-memory document and build a tree.
cur
:
a pointer to an array of
libxml-xmlstring.html#xmlChar
xmlChar
encoding
:
a free form C string describing the HTML document encoding, or NULL
Returns
:
the resulting document tree
Function: htmlParseDocument
int	htmlParseDocument		(
libxml-HTMLparser.html#htmlParserCtxtPtr
htmlParserCtxtPtr
ctxt)
parse an HTML document (and build a tree if using the standard SAX interface).
ctxt
:
an HTML parser context
Returns
:
0, -1 in case of error. the parser context is augmented as a result of the parsing.
Function: htmlParseElement
void	htmlParseElement		(
libxml-HTMLparser.html#htmlParserCtxtPtr
htmlParserCtxtPtr
ctxt)
parse an HTML element, this is highly recursive [39] element ::= EmptyElemTag | STag content ETag [41] Attribute ::= Name Eq AttValue
ctxt
:
an HTML parser context
Function: htmlParseEntityRef
const
libxml-HTMLparser.html#htmlEntityDesc
htmlEntityDesc
*	htmlParseEntityRef	(
libxml-HTMLparser.html#htmlParserCtxtPtr
htmlParserCtxtPtr
ctxt,
const
libxml-xmlstring.html#xmlChar
xmlChar
** str)
parse an HTML ENTITY references [68] EntityRef ::= '&' Name ';'
ctxt
:
an HTML parser context
str
:
location to store the entity name
Returns
:
the associated
libxml-HTMLparser.html#htmlEntityDescPtr
htmlEntityDescPtr
if found, or NULL otherwise, if non-NULL *str will have to be freed by the caller.
Function: htmlParseFile
libxml-HTMLparser.html#htmlDocPtr
htmlDocPtr
htmlParseFile		(const char * filename,
const char * encoding)
parse an HTML file and build a tree. Automatic support for ZLIB/Compress compressed document is provided by default if found at compile-time.
filename
:
the filename
encoding
:
a free form C string describing the HTML document encoding, or NULL
Returns
:
the resulting document tree
Function: htmlReadDoc
libxml-HTMLparser.html#htmlDocPtr
htmlDocPtr
htmlReadDoc		(const
libxml-xmlstring.html#xmlChar
xmlChar
* cur,
const char * URL,
const char * encoding,
int options)
parse an XML in-memory document and build a tree.
cur
:
a pointer to a zero terminated string
URL
:
the base URL to use for the document
encoding
:
the document encoding, or NULL
options
:
a combination of htmlParserOption(s)
Returns
:
the resulting document tree
Function: htmlReadFd
libxml-HTMLparser.html#htmlDocPtr
htmlDocPtr
htmlReadFd		(int fd,
const char * URL,
const char * encoding,
int options)
parse an XML from a file descriptor and build a tree.
fd
:
an open file descriptor
URL
:
the base URL to use for the document
encoding
:
the document encoding, or NULL
options
:
a combination of htmlParserOption(s)
Returns
:
the resulting document tree
Function: htmlReadFile
libxml-HTMLparser.html#htmlDocPtr
htmlDocPtr
htmlReadFile		(const char * filename,
const char * encoding,
int options)
parse an XML file from the filesystem or the network.
filename
:
a file or URL
encoding
:
the document encoding, or NULL
options
:
a combination of htmlParserOption(s)
Returns
:
the resulting document tree
Function: htmlReadIO
libxml-HTMLparser.html#htmlDocPtr
htmlDocPtr
htmlReadIO		(
libxml-xmlIO.html#xmlInputReadCallback
xmlInputReadCallback
ioread,
libxml-xmlIO.html#xmlInputCloseCallback
xmlInputCloseCallback
ioclose,
void * ioctx,
const char * URL,
const char * encoding,
int options)
parse an HTML document from I/O functions and source and build a tree.
ioread
:
an I/O read function
ioclose
:
an I/O close function
ioctx
:
an I/O handler
URL
:
the base URL to use for the document
encoding
:
the document encoding, or NULL
options
:
a combination of htmlParserOption(s)
Returns
:
the resulting document tree
Function: htmlReadMemory
libxml-HTMLparser.html#htmlDocPtr
htmlDocPtr
htmlReadMemory		(const char * buffer,
int size,
const char * URL,
const char * encoding,
int options)
parse an XML in-memory document and build a tree.
buffer
:
a pointer to a char array
size
:
the size of the array
URL
:
the base URL to use for the document
encoding
:
the document encoding, or NULL
options
:
a combination of htmlParserOption(s)
Returns
:
the resulting document tree
Function: htmlSAXParseDoc
libxml-HTMLparser.html#htmlDocPtr
htmlDocPtr
htmlSAXParseDoc		(
libxml-xmlstring.html#xmlChar
xmlChar
* cur,
const char * encoding,
libxml-HTMLparser.html#htmlSAXHandlerPtr
htmlSAXHandlerPtr
sax,
void * userData)
Parse an HTML in-memory document. If sax is not NULL, use the SAX callbacks to handle parse events. If sax is NULL, fallback to the default DOM behavior and return a tree.
cur
:
a pointer to an array of
libxml-xmlstring.html#xmlChar
xmlChar
encoding
:
a free form C string describing the HTML document encoding, or NULL
sax
:
the SAX handler block
userData
:
if using SAX, this pointer will be provided on callbacks.
Returns
:
the resulting document tree unless SAX is NULL or the document is not well formed.
Function: htmlSAXParseFile
libxml-HTMLparser.html#htmlDocPtr
htmlDocPtr
htmlSAXParseFile	(const char * filename,
const char * encoding,
libxml-HTMLparser.html#htmlSAXHandlerPtr
htmlSAXHandlerPtr
sax,
void * userData)
parse an HTML file and build a tree. Automatic support for ZLIB/Compress compressed document is provided by default if found at compile-time. It use the given SAX function block to handle the parsing callback. If sax is NULL, fallback to the default DOM tree building routines.
filename
:
the filename
encoding
:
a free form C string describing the HTML document encoding, or NULL
sax
:
the SAX handler block
userData
:
if using SAX, this pointer will be provided on callbacks.
Returns
:
the resulting document tree unless SAX is NULL or the document is not well formed.
Function: htmlTagLookup
const
libxml-HTMLparser.html#htmlElemDesc
htmlElemDesc
*	htmlTagLookup	(const
libxml-xmlstring.html#xmlChar
xmlChar
* tag)
Lookup the HTML tag in the ElementTable
tag
:
The tag name in lowercase
Returns
:
the related
libxml-HTMLparser.html#htmlElemDescPtr
htmlElemDescPtr
or NULL if not found.
../bugs.html
Daniel Veillard
