http://swpat.ffii.org/Action against software patents http://www.gnome.org/Gnome2 Logo http://www.w3.org/StatusW3C Logo http://www.redhat.com/Red Hat Logo http://xmlsoft.org/Made with Libxml2 Logo 
Module HTMLparser from libxml2
API Menu ../index.htmlMain Menu ../docs.htmlDeveloper Menu ../examples/index.htmlCode Examples index.htmlAPI Menu libxml-parser.htmlParser API libxml-tree.htmlTree API libxml-xmlreader.htmlReader API ../guidelines.htmlXML Guidelines ../ChangeLog.htmlChangeLog API Indexes ../APIchunk0.htmlAlphabetic ../APIconstructors.htmlConstructors ../APIfunctions.htmlFunctions/Types ../APIfiles.htmlModules ../APIsymbols.htmlSymbols Related links http://mail.gnome.org/archives/xml/Mail archive http://xmlsoft.org/XSLT/XSLT libxslt http://phd.cs.unibo.it/gdome2/DOM gdome2 http://www.aleksey.com/xmlsec/XML-DSig xmlsec ftp://xmlsoft.org/FTP http://www.zlatkovic.com/projects/libxml/Windows binaries http://www.blastwave.org/packages.php/libxml2Solaris binaries http://www.explain.com.au/oss/libxml2xslt.htmlMacOsX binaries http://libxmlplusplus.sourceforge.net/C++ bindings http://www.zend.com/php5/articles/php5-xmlphp.php#Heading4PHP bindings http://sourceforge.net/projects/libxml2-pas/Pascal bindings http://rubyforge.org/projects/xml-tools/Ruby bindings http://tclxml.sourceforge.net/Tcl bindings http://bugzilla.gnome.org/buglist.cgi?product=libxml2Bug Tracker libxml-DOCBparser.htmlPrev libxml-DOCBparser.htmlDOCBparser index.htmlUp index.htmlAPI documentation ../index.htmlHome ../index.htmlThe XML C parser and toolkit of Gnome libxml-HTMLtree.htmlHTMLtree libxml-HTMLtree.htmlNext this module implements an HTML 4.0 non-verifying parser with API compatible with the XML parser ones. It should be able to parse "real world" HTML, even if severely broken from a specification point of view. 
Table of Contents
#define #htmlDefaultSubelementhtmlDefaultSubelement #define #htmlElementAllowedHereDeschtmlElementAllowedHereDesc #define #htmlRequiredAttrshtmlRequiredAttrs Typedef libxml-tree.html#xmlDocPtrxmlDocPtr  htmlDocPtr Structure #htmlElemDeschtmlElemDesc struct _htmlElemDesc
Typedef libxml-HTMLparser.html#htmlElemDeschtmlElemDesc  * htmlElemDescPtr Structure #htmlEntityDeschtmlEntityDesc struct _htmlEntityDesc
Typedef libxml-HTMLparser.html#htmlEntityDeschtmlEntityDesc  * htmlEntityDescPtr Typedef libxml-tree.html#xmlNodePtrxmlNodePtr  htmlNodePtr Typedef libxml-tree.html#xmlParserCtxtxmlParserCtxt  htmlParserCtxt Typedef libxml-tree.html#xmlParserCtxtPtrxmlParserCtxtPtr  htmlParserCtxtPtr Typedef libxml-tree.html#xmlParserInputxmlParserInput  htmlParserInput Typedef libxml-tree.html#xmlParserInputPtrxmlParserInputPtr  htmlParserInputPtr Typedef libxml-parser.html#xmlParserNodeInfoxmlParserNodeInfo  htmlParserNodeInfo Enum #htmlParserOptionhtmlParserOption Typedef libxml-tree.html#xmlSAXHandlerxmlSAXHandler  htmlSAXHandler Typedef libxml-tree.html#xmlSAXHandlerPtrxmlSAXHandlerPtr  htmlSAXHandlerPtr Enum #htmlStatushtmlStatus int	#UTF8ToHtmlUTF8ToHtml 			(unsigned char * out, 					 int * outlen, 					 const unsigned char * in, 					 int * inlen) libxml-HTMLparser.html#htmlStatushtmlStatus 	#htmlAttrAllowedhtmlAttrAllowed 		(const libxml-HTMLparser.html#htmlElemDeschtmlElemDesc  * elt, 					 const libxml-xmlstring.html#xmlCharxmlChar  * attr, 					 int legacy)int	#htmlAutoCloseTaghtmlAutoCloseTag 		( libxml-HTMLparser.html#htmlDocPtrhtmlDocPtr  doc, 					 const libxml-xmlstring.html#xmlCharxmlChar  * name, 					 libxml-HTMLparser.html#htmlNodePtrhtmlNodePtr  elem) libxml-HTMLparser.html#htmlParserCtxtPtrhtmlParserCtxtPtr 	#htmlCreateMemoryParserCtxthtmlCreateMemoryParserCtxt 	(const char * buffer, 							 int size) libxml-HTMLparser.html#htmlParserCtxtPtrhtmlParserCtxtPtr 	#htmlCreatePushParserCtxthtmlCreatePushParserCtxt 	( libxml-HTMLparser.html#htmlSAXHandlerPtrhtmlSAXHandlerPtr  sax, 							 void * user_data, 							 const char * chunk, 							 int size, 							 const char * filename, 							 libxml-encoding.html#xmlCharEncodingxmlCharEncoding  enc) libxml-HTMLparser.html#htmlDocPtrhtmlDocPtr 	#htmlCtxtReadDochtmlCtxtReadDoc 		( libxml-HTMLparser.html#htmlParserCtxtPtrhtmlParserCtxtPtr  ctxt, 					 const libxml-xmlstring.html#xmlCharxmlChar  * cur, 					 const char * URL, 					 const char * encoding, 					 int options) libxml-HTMLparser.html#htmlDocPtrhtmlDocPtr 	#htmlCtxtReadFdhtmlCtxtReadFd 		( libxml-HTMLparser.html#htmlParserCtxtPtrhtmlParserCtxtPtr  ctxt, 					 int fd, 					 const char * URL, 					 const char * encoding, 					 int options) libxml-HTMLparser.html#htmlDocPtrhtmlDocPtr 	#htmlCtxtReadFilehtmlCtxtReadFile 	( libxml-HTMLparser.html#htmlParserCtxtPtrhtmlParserCtxtPtr  ctxt, 					 const char * filename, 					 const char * encoding, 					 int options) libxml-HTMLparser.html#htmlDocPtrhtmlDocPtr 	#htmlCtxtReadIOhtmlCtxtReadIO 		( libxml-HTMLparser.html#htmlParserCtxtPtrhtmlParserCtxtPtr  ctxt, 					 libxml-xmlIO.html#xmlInputReadCallbackxmlInputReadCallback  ioread, 					 libxml-xmlIO.html#xmlInputCloseCallbackxmlInputCloseCallback  ioclose, 					 void * ioctx, 					 const char * URL, 					 const char * encoding, 					 int options) libxml-HTMLparser.html#htmlDocPtrhtmlDocPtr 	#htmlCtxtReadMemoryhtmlCtxtReadMemory 	( libxml-HTMLparser.html#htmlParserCtxtPtrhtmlParserCtxtPtr  ctxt, 					 const char * buffer, 					 int size, 					 const char * URL, 					 const char * encoding, 					 int options)void	#htmlCtxtResethtmlCtxtReset 			( libxml-HTMLparser.html#htmlParserCtxtPtrhtmlParserCtxtPtr  ctxt)int	#htmlCtxtUseOptionshtmlCtxtUseOptions 		( libxml-HTMLparser.html#htmlParserCtxtPtrhtmlParserCtxtPtr  ctxt, 					 int options)int	#htmlElementAllowedHerehtmlElementAllowedHere 		(const libxml-HTMLparser.html#htmlElemDeschtmlElemDesc  * parent, 					 const libxml-xmlstring.html#xmlCharxmlChar  * elt) libxml-HTMLparser.html#htmlStatushtmlStatus 	#htmlElementStatusHerehtmlElementStatusHere 	(const libxml-HTMLparser.html#htmlElemDeschtmlElemDesc  * parent, 					 const libxml-HTMLparser.html#htmlElemDeschtmlElemDesc  * elt)int	#htmlEncodeEntitieshtmlEncodeEntities 		(unsigned char * out, 					 int * outlen, 					 const unsigned char * in, 					 int * inlen, 					 int quoteChar)const libxml-HTMLparser.html#htmlEntityDeschtmlEntityDesc  *	#htmlEntityLookuphtmlEntityLookup 	(const libxml-xmlstring.html#xmlCharxmlChar  * name)const libxml-HTMLparser.html#htmlEntityDeschtmlEntityDesc  *	#htmlEntityValueLookuphtmlEntityValueLookup 	(unsigned int value)void	#htmlFreeParserCtxthtmlFreeParserCtxt 		( libxml-HTMLparser.html#htmlParserCtxtPtrhtmlParserCtxtPtr  ctxt)int	#htmlHandleOmittedElemhtmlHandleOmittedElem 		(int val)int	#htmlIsAutoClosedhtmlIsAutoClosed 		( libxml-HTMLparser.html#htmlDocPtrhtmlDocPtr  doc, 					 libxml-HTMLparser.html#htmlNodePtrhtmlNodePtr  elem)int	#htmlIsScriptAttributehtmlIsScriptAttribute 		(const libxml-xmlstring.html#xmlCharxmlChar  * name) libxml-HTMLparser.html#htmlStatushtmlStatus 	#htmlNodeStatushtmlNodeStatus 		(const libxml-HTMLparser.html#htmlNodePtrhtmlNodePtr  node, 					 int legacy)int	#htmlParseCharRefhtmlParseCharRef 		( libxml-HTMLparser.html#htmlParserCtxtPtrhtmlParserCtxtPtr  ctxt)int	#htmlParseChunkhtmlParseChunk 			( libxml-HTMLparser.html#htmlParserCtxtPtrhtmlParserCtxtPtr  ctxt, 					 const char * chunk, 					 int size, 					 int terminate) libxml-HTMLparser.html#htmlDocPtrhtmlDocPtr 	#htmlParseDochtmlParseDoc 		( libxml-xmlstring.html#xmlCharxmlChar  * cur, 					 const char * encoding)int	#htmlParseDocumenthtmlParseDocument 		( libxml-HTMLparser.html#htmlParserCtxtPtrhtmlParserCtxtPtr  ctxt)void	#htmlParseElementhtmlParseElement 		( libxml-HTMLparser.html#htmlParserCtxtPtrhtmlParserCtxtPtr  ctxt)const libxml-HTMLparser.html#htmlEntityDeschtmlEntityDesc  *	#htmlParseEntityRefhtmlParseEntityRef 	( libxml-HTMLparser.html#htmlParserCtxtPtrhtmlParserCtxtPtr  ctxt, 						 const libxml-xmlstring.html#xmlCharxmlChar  ** str) libxml-HTMLparser.html#htmlDocPtrhtmlDocPtr 	#htmlParseFilehtmlParseFile 		(const char * filename, 					 const char * encoding) libxml-HTMLparser.html#htmlDocPtrhtmlDocPtr 	#htmlReadDochtmlReadDoc 		(const libxml-xmlstring.html#xmlCharxmlChar  * cur, 					 const char * URL, 					 const char * encoding, 					 int options) libxml-HTMLparser.html#htmlDocPtrhtmlDocPtr 	#htmlReadFdhtmlReadFd 		(int fd, 					 const char * URL, 					 const char * encoding, 					 int options) libxml-HTMLparser.html#htmlDocPtrhtmlDocPtr 	#htmlReadFilehtmlReadFile 		(const char * filename, 					 const char * encoding, 					 int options) libxml-HTMLparser.html#htmlDocPtrhtmlDocPtr 	#htmlReadIOhtmlReadIO 		( libxml-xmlIO.html#xmlInputReadCallbackxmlInputReadCallback  ioread, 					 libxml-xmlIO.html#xmlInputCloseCallbackxmlInputCloseCallback  ioclose, 					 void * ioctx, 					 const char * URL, 					 const char * encoding, 					 int options) libxml-HTMLparser.html#htmlDocPtrhtmlDocPtr 	#htmlReadMemoryhtmlReadMemory 		(const char * buffer, 					 int size, 					 const char * URL, 					 const char * encoding, 					 int options) libxml-HTMLparser.html#htmlDocPtrhtmlDocPtr 	#htmlSAXParseDochtmlSAXParseDoc 		( libxml-xmlstring.html#xmlCharxmlChar  * cur, 					 const char * encoding, 					 libxml-HTMLparser.html#htmlSAXHandlerPtrhtmlSAXHandlerPtr  sax, 					 void * userData) libxml-HTMLparser.html#htmlDocPtrhtmlDocPtr 	#htmlSAXParseFilehtmlSAXParseFile 	(const char * filename, 					 const char * encoding, 					 libxml-HTMLparser.html#htmlSAXHandlerPtrhtmlSAXHandlerPtr  sax, 					 void * userData)const libxml-HTMLparser.html#htmlElemDeschtmlElemDesc  *	#htmlTagLookuphtmlTagLookup 	(const libxml-xmlstring.html#xmlCharxmlChar  * tag)Description
Macro: htmlDefaultSubelement
#define htmlDefaultSubelementReturns the default subelement for this element
Macro: htmlElementAllowedHereDesc
#define htmlElementAllowedHereDescChecks whether an HTML element description may be a direct child of the specified element. Returns 1 if allowed; 0 otherwise.
Macro: htmlRequiredAttrs
#define htmlRequiredAttrsReturns the attributes required for the specified element.
Structure htmlElemDesc 
Structure htmlElemDescstruct _htmlElemDesc {
    const char *	name	: The tag name
    char	startTag	: Whether the start tag can be implied
    char	endTag	: Whether the end tag can be implied
    char	saveEndTag	: Whether the end tag should be saved
    char	empty	: Is this an empty element ?
    char	depr	: Is this a deprecated element ?
    char	dtd	: 1: only in Loose DTD, 2: only Frameset
    char	isinline	: is this a block 0 or inline 1 element
    const char *	desc	: the description NRK Jan.2003 * New fiel
    const char **	subelts	: allowed sub-elements of this element
    const char *	defaultsubelt	: subelement for suggested auto-repair if
    const char **	attrs_opt	: Optional Attributes
    const char **	attrs_depr	: Additional deprecated attributes
    const char **	attrs_req	: Required attributes
}
Structure htmlEntityDesc 
Structure htmlEntityDescstruct _htmlEntityDesc {
    unsigned int	value	: the UNICODE value for the character
    const char *	name	: The entity name
    const char *	desc	: the description
}
Enum htmlParserOption 
Enum htmlParserOption {
    
HTML_PARSE_RECOVER  = 1 : Relaxed parsing
    
HTML_PARSE_NOERROR  = 32 : suppress error reports
    
HTML_PARSE_NOWARNING  = 64 : suppress warning reports
    
HTML_PARSE_PEDANTIC  = 128 : pedantic error reporting
    
HTML_PARSE_NOBLANKS  = 256 : remove blank nodes
    
HTML_PARSE_NONET  = 2048 : Forbid network access
    
HTML_PARSE_COMPACT  = 65536 : compact small text nodes
}
Enum htmlStatus 
Enum htmlStatus {
    
HTML_NA  = 0 : something we don't check at all
    
HTML_INVALID  = 1
    
HTML_DEPRECATED  = 2
    
HTML_VALID  = 4
    
HTML_REQUIRED  = 12 : VALID bit set so ( & libxml-HTMLparser.html#HTML_VALIDHTML_VALID  ) is TRUE
}
Function: UTF8ToHtml
int	UTF8ToHtml			(unsigned char * out, 					 int * outlen, 					 const unsigned char * in, 					 int * inlen)Take a block of UTF-8 chars in and try to convert it to an ASCII plus HTML entities block of chars out.
out: a pointer to an array of bytes to store the result outlen: the length of @out in: a pointer to an array of UTF-8 chars inlen: the length of @in Returns: 0 if success, -2 if the transcoding fails, or -1 otherwise The value of @inlen after return is the number of octets consumed as the return value is positive, else unpredictable. The value of @outlen after return is the number of octets consumed. Function: htmlAttrAllowed
libxml-HTMLparser.html#htmlStatushtmlStatus 	htmlAttrAllowed		(const libxml-HTMLparser.html#htmlElemDeschtmlElemDesc  * elt, 					 const libxml-xmlstring.html#xmlCharxmlChar  * attr, 					 int legacy)Checks whether an libxml-SAX.html#attributeattribute  is valid for an element Has full knowledge of Required and Deprecated attributes
elt: HTML element attr: HTML libxml-SAX.html#attributeattribute legacy: whether to allow deprecated attributes Returns: one of HTML_REQUIRED, HTML_VALID, HTML_DEPRECATED, libxml-HTMLparser.html#HTML_INVALIDHTML_INVALID Function: htmlAutoCloseTag
int	htmlAutoCloseTag		( libxml-HTMLparser.html#htmlDocPtrhtmlDocPtr  doc, 					 const libxml-xmlstring.html#xmlCharxmlChar  * name, 					 libxml-HTMLparser.html#htmlNodePtrhtmlNodePtr  elem)The HTML DTD allows a tag to implicitly close other tags. The list is kept in htmlStartClose array. This function checks if the element or one of it's children would autoclose the given tag.
doc: the HTML document name: The tag name elem: the HTML element Returns: 1 if autoclose, 0 otherwise Function: htmlCreateMemoryParserCtxt
libxml-HTMLparser.html#htmlParserCtxtPtrhtmlParserCtxtPtr 	htmlCreateMemoryParserCtxt	(const char * buffer, 							 int size)Create a parser context for an HTML in-memory document.
buffer: a pointer to a char array size: the size of the array Returns: the new parser context or NULL Function: htmlCreatePushParserCtxt
libxml-HTMLparser.html#htmlParserCtxtPtrhtmlParserCtxtPtr 	htmlCreatePushParserCtxt	( libxml-HTMLparser.html#htmlSAXHandlerPtrhtmlSAXHandlerPtr  sax, 							 void * user_data, 							 const char * chunk, 							 int size, 							 const char * filename, 							 libxml-encoding.html#xmlCharEncodingxmlCharEncoding  enc)Create a parser context for using the HTML parser in push mode The value of @filename is used for fetching external entities and error/warning reports.
sax: a SAX handler user_data: The user data returned on SAX callbacks chunk: a pointer to an array of chars size: number of chars in the array filename: an optional file name or URI enc: an optional encoding Returns: the new parser context or NULL Function: htmlCtxtReadDoc
libxml-HTMLparser.html#htmlDocPtrhtmlDocPtr 	htmlCtxtReadDoc		( libxml-HTMLparser.html#htmlParserCtxtPtrhtmlParserCtxtPtr  ctxt, 					 const libxml-xmlstring.html#xmlCharxmlChar  * cur, 					 const char * URL, 					 const char * encoding, 					 int options)parse an XML in-memory document and build a tree. This reuses the existing @ctxt parser context
ctxt: an HTML parser context cur: a pointer to a zero terminated string URL: the base URL to use for the document encoding: the document encoding, or NULL options: a combination of htmlParserOption(s) Returns: the resulting document tree Function: htmlCtxtReadFd
libxml-HTMLparser.html#htmlDocPtrhtmlDocPtr 	htmlCtxtReadFd		( libxml-HTMLparser.html#htmlParserCtxtPtrhtmlParserCtxtPtr  ctxt, 					 int fd, 					 const char * URL, 					 const char * encoding, 					 int options)parse an XML from a file descriptor and build a tree. This reuses the existing @ctxt parser context
ctxt: an HTML parser context fd: an open file descriptor URL: the base URL to use for the document encoding: the document encoding, or NULL options: a combination of htmlParserOption(s) Returns: the resulting document tree Function: htmlCtxtReadFile
libxml-HTMLparser.html#htmlDocPtrhtmlDocPtr 	htmlCtxtReadFile	( libxml-HTMLparser.html#htmlParserCtxtPtrhtmlParserCtxtPtr  ctxt, 					 const char * filename, 					 const char * encoding, 					 int options)parse an XML file from the filesystem or the network. This reuses the existing @ctxt parser context
ctxt: an HTML parser context filename: a file or URL encoding: the document encoding, or NULL options: a combination of htmlParserOption(s) Returns: the resulting document tree Function: htmlCtxtReadIO
libxml-HTMLparser.html#htmlDocPtrhtmlDocPtr 	htmlCtxtReadIO		( libxml-HTMLparser.html#htmlParserCtxtPtrhtmlParserCtxtPtr  ctxt, 					 libxml-xmlIO.html#xmlInputReadCallbackxmlInputReadCallback  ioread, 					 libxml-xmlIO.html#xmlInputCloseCallbackxmlInputCloseCallback  ioclose, 					 void * ioctx, 					 const char * URL, 					 const char * encoding, 					 int options)parse an HTML document from I/O functions and source and build a tree. This reuses the existing @ctxt parser context
ctxt: an HTML parser context ioread: an I/O read function ioclose: an I/O close function ioctx: an I/O handler URL: the base URL to use for the document encoding: the document encoding, or NULL options: a combination of htmlParserOption(s) Returns: the resulting document tree Function: htmlCtxtReadMemory
libxml-HTMLparser.html#htmlDocPtrhtmlDocPtr 	htmlCtxtReadMemory	( libxml-HTMLparser.html#htmlParserCtxtPtrhtmlParserCtxtPtr  ctxt, 					 const char * buffer, 					 int size, 					 const char * URL, 					 const char * encoding, 					 int options)parse an XML in-memory document and build a tree. This reuses the existing @ctxt parser context
ctxt: an HTML parser context buffer: a pointer to a char array size: the size of the array URL: the base URL to use for the document encoding: the document encoding, or NULL options: a combination of htmlParserOption(s) Returns: the resulting document tree Function: htmlCtxtReset
void	htmlCtxtReset			( libxml-HTMLparser.html#htmlParserCtxtPtrhtmlParserCtxtPtr  ctxt)Reset a parser context
ctxt: an HTML parser context Function: htmlCtxtUseOptions
int	htmlCtxtUseOptions		( libxml-HTMLparser.html#htmlParserCtxtPtrhtmlParserCtxtPtr  ctxt, 					 int options)Applies the options to the parser context
ctxt: an HTML parser context options: a combination of htmlParserOption(s) Returns: 0 in case of success, the set of unknown or unimplemented options in case of error. Function: htmlElementAllowedHere
int	htmlElementAllowedHere		(const libxml-HTMLparser.html#htmlElemDeschtmlElemDesc  * parent, 					 const libxml-xmlstring.html#xmlCharxmlChar  * elt)Checks whether an HTML element may be a direct child of a parent element. Note - doesn't check for deprecated elements
parent: HTML parent element elt: HTML element Returns: 1 if allowed; 0 otherwise. Function: htmlElementStatusHere
libxml-HTMLparser.html#htmlStatushtmlStatus 	htmlElementStatusHere	(const libxml-HTMLparser.html#htmlElemDeschtmlElemDesc  * parent, 					 const libxml-HTMLparser.html#htmlElemDeschtmlElemDesc  * elt)Checks whether an HTML element may be a direct child of a parent element. and if so whether it is valid or deprecated.
parent: HTML parent element elt: HTML element Returns: one of HTML_VALID, HTML_DEPRECATED, libxml-HTMLparser.html#HTML_INVALIDHTML_INVALID Function: htmlEncodeEntities
int	htmlEncodeEntities		(unsigned char * out, 					 int * outlen, 					 const unsigned char * in, 					 int * inlen, 					 int quoteChar)Take a block of UTF-8 chars in and try to convert it to an ASCII plus HTML entities block of chars out.
out: a pointer to an array of bytes to store the result outlen: the length of @out in: a pointer to an array of UTF-8 chars inlen: the length of @in quoteChar: the quote character to escape (' or ") or zero. Returns: 0 if success, -2 if the transcoding fails, or -1 otherwise The value of @inlen after return is the number of octets consumed as the return value is positive, else unpredictable. The value of @outlen after return is the number of octets consumed. Function: htmlEntityLookup
const libxml-HTMLparser.html#htmlEntityDeschtmlEntityDesc  *	htmlEntityLookup	(const libxml-xmlstring.html#xmlCharxmlChar  * name)Lookup the given entity in EntitiesTable TODO: the linear scan is really ugly, an hash table is really needed.
name: the entity name Returns: the associated libxml-HTMLparser.html#htmlEntityDescPtrhtmlEntityDescPtr  if found, NULL otherwise. Function: htmlEntityValueLookup
const libxml-HTMLparser.html#htmlEntityDeschtmlEntityDesc  *	htmlEntityValueLookup	(unsigned int value)Lookup the given entity in EntitiesTable TODO: the linear scan is really ugly, an hash table is really needed.
value: the entity's unicode value Returns: the associated libxml-HTMLparser.html#htmlEntityDescPtrhtmlEntityDescPtr  if found, NULL otherwise. Function: htmlFreeParserCtxt
void	htmlFreeParserCtxt		( libxml-HTMLparser.html#htmlParserCtxtPtrhtmlParserCtxtPtr  ctxt)Free all the memory used by a parser context. However the parsed document in ctxt->myDoc is not freed.
ctxt: an HTML parser context Function: htmlHandleOmittedElem
int	htmlHandleOmittedElem		(int val)Set and return the previous value for handling HTML omitted tags.
val: int 0 or 1 Returns: the last value for 0 for no handling, 1 for auto insertion. Function: htmlIsAutoClosed
int	htmlIsAutoClosed		( libxml-HTMLparser.html#htmlDocPtrhtmlDocPtr  doc, 					 libxml-HTMLparser.html#htmlNodePtrhtmlNodePtr  elem)The HTML DTD allows a tag to implicitly close other tags. The list is kept in htmlStartClose array. This function checks if a tag is autoclosed by one of it's child
doc: the HTML document elem: the HTML element Returns: 1 if autoclosed, 0 otherwise Function: htmlIsScriptAttribute
int	htmlIsScriptAttribute		(const libxml-xmlstring.html#xmlCharxmlChar  * name)Check if an libxml-SAX.html#attributeattribute  is of content type Script
name: an libxml-SAX.html#attributeattribute  name Returns: 1 is the libxml-SAX.html#attributeattribute  is a script 0 otherwise Function: htmlNodeStatus
libxml-HTMLparser.html#htmlStatushtmlStatus 	htmlNodeStatus		(const libxml-HTMLparser.html#htmlNodePtrhtmlNodePtr  node, 					 int legacy)Checks whether the tree node is valid. Experimental (the author only uses the HTML enhancements in a SAX parser)
node: an libxml-HTMLparser.html#htmlNodePtrhtmlNodePtr  in a tree legacy: whether to allow deprecated elements (YES is faster here for Element nodes) Returns: for Element nodes, a return from libxml-HTMLparser.html#htmlElementAllowedHerehtmlElementAllowedHere  (if legacy allowed) or libxml-HTMLparser.html#htmlElementStatusHerehtmlElementStatusHere  (otherwise). for Attribute nodes, a return from libxml-HTMLparser.html#htmlAttrAllowedhtmlAttrAllowed  for other nodes, libxml-HTMLparser.html#HTML_NAHTML_NA  (no checks performed) Function: htmlParseCharRef
int	htmlParseCharRef		( libxml-HTMLparser.html#htmlParserCtxtPtrhtmlParserCtxtPtr  ctxt)parse Reference declarations [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
ctxt: an HTML parser context Returns: the value parsed (as an int) Function: htmlParseChunk
int	htmlParseChunk			( libxml-HTMLparser.html#htmlParserCtxtPtrhtmlParserCtxtPtr  ctxt, 					 const char * chunk, 					 int size, 					 int terminate)Parse a Chunk of memory
ctxt: an HTML parser context chunk: an char array size: the size in byte of the chunk terminate: last chunk indicator Returns: zero if no error, the libxml-xmlerror.html#xmlParserErrorsxmlParserErrors  otherwise. Function: htmlParseDoc
libxml-HTMLparser.html#htmlDocPtrhtmlDocPtr 	htmlParseDoc		( libxml-xmlstring.html#xmlCharxmlChar  * cur, 					 const char * encoding)parse an HTML in-memory document and build a tree.
cur: a pointer to an array of libxml-xmlstring.html#xmlCharxmlChar encoding: a free form C string describing the HTML document encoding, or NULL Returns: the resulting document tree Function: htmlParseDocument
int	htmlParseDocument		( libxml-HTMLparser.html#htmlParserCtxtPtrhtmlParserCtxtPtr  ctxt)parse an HTML document (and build a tree if using the standard SAX interface).
ctxt: an HTML parser context Returns: 0, -1 in case of error. the parser context is augmented as a result of the parsing. Function: htmlParseElement
void	htmlParseElement		( libxml-HTMLparser.html#htmlParserCtxtPtrhtmlParserCtxtPtr  ctxt)parse an HTML element, this is highly recursive [39] element ::= EmptyElemTag | STag content ETag [41] Attribute ::= Name Eq AttValue
ctxt: an HTML parser context Function: htmlParseEntityRef
const libxml-HTMLparser.html#htmlEntityDeschtmlEntityDesc  *	htmlParseEntityRef	( libxml-HTMLparser.html#htmlParserCtxtPtrhtmlParserCtxtPtr  ctxt, 						 const libxml-xmlstring.html#xmlCharxmlChar  ** str)parse an HTML ENTITY references [68] EntityRef ::= '&' Name ';'
ctxt: an HTML parser context str: location to store the entity name Returns: the associated libxml-HTMLparser.html#htmlEntityDescPtrhtmlEntityDescPtr  if found, or NULL otherwise, if non-NULL *str will have to be freed by the caller. Function: htmlParseFile
libxml-HTMLparser.html#htmlDocPtrhtmlDocPtr 	htmlParseFile		(const char * filename, 					 const char * encoding)parse an HTML file and build a tree. Automatic support for ZLIB/Compress compressed document is provided by default if found at compile-time.
filename: the filename encoding: a free form C string describing the HTML document encoding, or NULL Returns: the resulting document tree Function: htmlReadDoc
libxml-HTMLparser.html#htmlDocPtrhtmlDocPtr 	htmlReadDoc		(const libxml-xmlstring.html#xmlCharxmlChar  * cur, 					 const char * URL, 					 const char * encoding, 					 int options)parse an XML in-memory document and build a tree.
cur: a pointer to a zero terminated string URL: the base URL to use for the document encoding: the document encoding, or NULL options: a combination of htmlParserOption(s) Returns: the resulting document tree Function: htmlReadFd
libxml-HTMLparser.html#htmlDocPtrhtmlDocPtr 	htmlReadFd		(int fd, 					 const char * URL, 					 const char * encoding, 					 int options)parse an XML from a file descriptor and build a tree.
fd: an open file descriptor URL: the base URL to use for the document encoding: the document encoding, or NULL options: a combination of htmlParserOption(s) Returns: the resulting document tree Function: htmlReadFile
libxml-HTMLparser.html#htmlDocPtrhtmlDocPtr 	htmlReadFile		(const char * filename, 					 const char * encoding, 					 int options)parse an XML file from the filesystem or the network.
filename: a file or URL encoding: the document encoding, or NULL options: a combination of htmlParserOption(s) Returns: the resulting document tree Function: htmlReadIO
libxml-HTMLparser.html#htmlDocPtrhtmlDocPtr 	htmlReadIO		( libxml-xmlIO.html#xmlInputReadCallbackxmlInputReadCallback  ioread, 					 libxml-xmlIO.html#xmlInputCloseCallbackxmlInputCloseCallback  ioclose, 					 void * ioctx, 					 const char * URL, 					 const char * encoding, 					 int options)parse an HTML document from I/O functions and source and build a tree.
ioread: an I/O read function ioclose: an I/O close function ioctx: an I/O handler URL: the base URL to use for the document encoding: the document encoding, or NULL options: a combination of htmlParserOption(s) Returns: the resulting document tree Function: htmlReadMemory
libxml-HTMLparser.html#htmlDocPtrhtmlDocPtr 	htmlReadMemory		(const char * buffer, 					 int size, 					 const char * URL, 					 const char * encoding, 					 int options)parse an XML in-memory document and build a tree.
buffer: a pointer to a char array size: the size of the array URL: the base URL to use for the document encoding: the document encoding, or NULL options: a combination of htmlParserOption(s) Returns: the resulting document tree Function: htmlSAXParseDoc
libxml-HTMLparser.html#htmlDocPtrhtmlDocPtr 	htmlSAXParseDoc		( libxml-xmlstring.html#xmlCharxmlChar  * cur, 					 const char * encoding, 					 libxml-HTMLparser.html#htmlSAXHandlerPtrhtmlSAXHandlerPtr  sax, 					 void * userData)Parse an HTML in-memory document. If sax is not NULL, use the SAX callbacks to handle parse events. If sax is NULL, fallback to the default DOM behavior and return a tree.
cur: a pointer to an array of libxml-xmlstring.html#xmlCharxmlChar encoding: a free form C string describing the HTML document encoding, or NULL sax: the SAX handler block userData: if using SAX, this pointer will be provided on callbacks. Returns: the resulting document tree unless SAX is NULL or the document is not well formed. Function: htmlSAXParseFile
libxml-HTMLparser.html#htmlDocPtrhtmlDocPtr 	htmlSAXParseFile	(const char * filename, 					 const char * encoding, 					 libxml-HTMLparser.html#htmlSAXHandlerPtrhtmlSAXHandlerPtr  sax, 					 void * userData)parse an HTML file and build a tree. Automatic support for ZLIB/Compress compressed document is provided by default if found at compile-time. It use the given SAX function block to handle the parsing callback. If sax is NULL, fallback to the default DOM tree building routines.
filename: the filename encoding: a free form C string describing the HTML document encoding, or NULL sax: the SAX handler block userData: if using SAX, this pointer will be provided on callbacks. Returns: the resulting document tree unless SAX is NULL or the document is not well formed. Function: htmlTagLookup
const libxml-HTMLparser.html#htmlElemDeschtmlElemDesc  *	htmlTagLookup	(const libxml-xmlstring.html#xmlCharxmlChar  * tag)Lookup the HTML tag in the ElementTable
tag: The tag name in lowercase Returns: the related libxml-HTMLparser.html#htmlElemDescPtrhtmlElemDescPtr  or NULL if not found. ../bugs.htmlDaniel Veillard 
