mirror of
https://github.com/php/php-src.git
synced 2025-08-15 21:48:51 +02:00
Adding the tidy extension to PECL
This commit is contained in:
parent
de7f7658cf
commit
2fb97cdf95
18 changed files with 2707 additions and 0 deletions
2
ext/tidy/CREDITS
Normal file
2
ext/tidy/CREDITS
Normal file
|
@ -0,0 +1,2 @@
|
|||
Tidy
|
||||
John Coggeshall
|
154
ext/tidy/README_TIDY
Normal file
154
ext/tidy/README_TIDY
Normal file
|
@ -0,0 +1,154 @@
|
|||
|
||||
README FOR ext/tidy by John Coggeshall <john@php.net>
|
||||
|
||||
Tidy Version: 0.5b
|
||||
|
||||
Tidy is an extension based on Libtidy (http://tidy.sf.net/) and allows a PHP developer
|
||||
to clean, repair, and traverse HTML, XHTML, and XML documents -- including ones with
|
||||
embedded scripting languages such as PHP or ASP within them using OO constructs.
|
||||
|
||||
The Tidy extension has two separate APIs, one for general parsing, cleaning, and
|
||||
repairing and another for document traversal. The general API is provided below:
|
||||
|
||||
tidy_create() Initialize and return a tidy document resource
|
||||
tidy_parse_file($tidy, $file) Parse the document stored in $file
|
||||
tidy_parse_string($tidy, $str) Parse the string stored in $str
|
||||
|
||||
tidy_clean_repair($tidy) Clean and repair the document
|
||||
tidy_diagnose($tidy) Diagnose a parsed document
|
||||
|
||||
tidy_setopt($tidy, $opt, $val) Set a configuration option $opt to $val
|
||||
tidy_getopt($tidy, $opt) Retrieve a configuration option
|
||||
|
||||
** note: $opt is a string representing the option. Right now the only
|
||||
source of these options is the LibTidy source.. eventually I'll document
|
||||
them offically -- see the src/config.c file in the tidy source **
|
||||
|
||||
tidy_get_output($tidy) Return the cleaned tidy HTML as a string
|
||||
tidy_get_error_buffer($tidy) Return a log of the errors and warnings
|
||||
returned by tidy
|
||||
|
||||
tidy_get_release() Return the Libtidy release date
|
||||
tidy_get_status($tidy) Return the status of the document
|
||||
tidy_get_html_ver($tidy) Return the major HTML version detected for
|
||||
the document;
|
||||
|
||||
tidy_is_xhtml($tidy) Determines if the document is XHTML
|
||||
tidy_is_xml($tidy) Determines if the document is a generic XML
|
||||
|
||||
tidy_error_count($tidy) Returns the number of errors in the document
|
||||
tidy_warning_count($tidy) Returns the number of warnings in the document
|
||||
tidy_access_count($tidy) Returns the number of accessibility-related
|
||||
warnings in the document.
|
||||
tidy_config_count($tidy) Returns the number of configuration errors found
|
||||
|
||||
tidy_load_config($tidy, $file) Loads the specified configuration file
|
||||
tidY_load_config_enc($tidy,
|
||||
$file,
|
||||
$enc) Loads the specified config file using the specified
|
||||
character encoding
|
||||
tidy_set_encoding($tidy, $enc) Sets the current character encoding for the document
|
||||
tidy_save_config($tidy, $file) Saves the current config to $file
|
||||
|
||||
|
||||
Beyond these general-purpose API functions, Tidy also supports the following
|
||||
functions which are used to retrieve an object for document traversal:
|
||||
|
||||
tidy_get_root($tidy) Returns an object starting at the root of the
|
||||
document
|
||||
tidy_get_head($tidy) Returns an object starting at the <HEAD> tag
|
||||
tidy_get_html($tidy) Returns an object starting at the <HTML> tag
|
||||
tidy_get_body($tidy) Returns an object starting at the <BODY> tag
|
||||
|
||||
All Navigation of the specified document is done via the PHP5 object constructs.
|
||||
There are two types of objects which Tidy can create. The first is TidyNode, which
|
||||
represents HTML Tags, Text, and more (see the TidyNode_Type Constants). The second
|
||||
is TidyAttr, which represents an attribute within an HTML tag (TidyNode). The
|
||||
functionality of these objects is represented by the following schema:
|
||||
|
||||
class TidyNode {
|
||||
|
||||
public $name; // name of node (i.e. HEAD)
|
||||
public $value; // value of node (everything between tags)
|
||||
public $type; // type of node (text, php, asp, etc.)
|
||||
public $id; // id of node (i.e. TIDY_TAG_HEAD)
|
||||
|
||||
public $line; // line # of node in source
|
||||
public $column; // column # of node in source
|
||||
|
||||
public $html_ver; // HTML version (0,1,2,3,4)
|
||||
|
||||
public $attribs; // an array of attributes (see TidyAttr)
|
||||
public $children; // an array of child nodes
|
||||
|
||||
function has_siblings(); // any sibling nodes?
|
||||
function has_children(); // any child nodes?
|
||||
function has_parent(); // have a parent?
|
||||
|
||||
function is_comment(); // is node a comment?
|
||||
function is_xhtml(); // is document XHTML?
|
||||
function is_xml(); // is document generic XML (not HTML/XHTML)
|
||||
function is_text(); // is node text?
|
||||
function is_html(); // is node an HTML tag?
|
||||
|
||||
function is_jste(); // is jste block?
|
||||
function is_asp(); // is Microsoft ASP block?
|
||||
function is_php(); // is PHP block?
|
||||
|
||||
function next(); // returns next node
|
||||
function prev(); // returns prev node
|
||||
function parent(); // returns parent node
|
||||
function child(); // returns first child node
|
||||
|
||||
/* Searches for a particular attribute in the current node based
|
||||
on node ID. If found returns a TidyAttr object for it */
|
||||
function get_attr_type($attr_id);
|
||||
|
||||
/*
|
||||
|
||||
NOT YET IMPLEMENTED
|
||||
|
||||
Recursively traverses the tree from the current node and returns
|
||||
an array of attributes matching the node ID/attr ID pair
|
||||
|
||||
Useful for pulling out things like links:
|
||||
foreach($body->fetch_attrs(TIDY_TAG_A, TIDY_ATTR_HREF) as $link) {
|
||||
echo "Link : {$link->value}\n";
|
||||
}
|
||||
*/
|
||||
|
||||
function fetch_attrs($node_id, $attr_id);
|
||||
|
||||
/*
|
||||
|
||||
NOT YET IMPLEMENTED
|
||||
|
||||
Recursively traverses the tree from the current node and returns
|
||||
an array of nodes matching the node ID
|
||||
|
||||
Useful for pulling out tables, etc (echos the HTML for every
|
||||
<TABLE> block)
|
||||
|
||||
foreach($body->fetch_nodes(TIDY_TAG_TABLE) as $table) {
|
||||
|
||||
echo $table->value;
|
||||
|
||||
}
|
||||
*/
|
||||
function fetch_nodes($node_id)
|
||||
}
|
||||
|
||||
class TidyAttr {
|
||||
|
||||
public $name; // attribute name i.e. HREF
|
||||
public $value; // attribute value
|
||||
public $id; // attribute id i.e. TIDY_ATTR_HREF
|
||||
|
||||
function next(); // returns next attribute in tag
|
||||
function tag(); // returns the tag node associated with attribute
|
||||
}
|
||||
|
||||
Examples of using these objects to navigate the tree can be found in the examples/
|
||||
directory (I suggest looking at urlgrab.php and dumpit.php)
|
||||
|
||||
E-mail thoughts, suggestions, patches, etc. to <john@php.net>
|
4
ext/tidy/TODO
Normal file
4
ext/tidy/TODO
Normal file
|
@ -0,0 +1,4 @@
|
|||
TODO
|
||||
|
||||
- Implement fetch_attr(), fetch_node() methods
|
||||
- Fix any memleaks (some may be purely ZE2 related)
|
35
ext/tidy/config.m4
Normal file
35
ext/tidy/config.m4
Normal file
|
@ -0,0 +1,35 @@
|
|||
dnl
|
||||
dnl $Id$
|
||||
dnl
|
||||
|
||||
PHP_ARG_WITH(tidy,for TIDY support,
|
||||
[ --with-tidy[=DIR] Include TIDY support])
|
||||
|
||||
if test "$PHP_TIDY" != "no"; then
|
||||
PHP_NEW_EXTENSION(tidy, tidy.c, $ext_shared)
|
||||
if test "$PHP_TIDY" != "yes"; then
|
||||
TIDY_SEARCH_DIRS=$PHP_TIDY
|
||||
else
|
||||
TIDY_SEARCH_DIRS="/usr/local /usr"
|
||||
fi
|
||||
for i in $TIDY_SEARCH_DIRS; do
|
||||
if test -f $i/include/tidy/tidy.h; then
|
||||
TIDY_DIR=$i
|
||||
TIDY_INCDIR=$i/include/tidy
|
||||
elif test -f $i/include/tidy.h; then
|
||||
TIDY_DIR=$i
|
||||
TIDY_INCDIR=$i/include
|
||||
fi
|
||||
done
|
||||
|
||||
if test -z "$TIDY_DIR"; then
|
||||
AC_MSG_ERROR(Cannot find libtidy)
|
||||
fi
|
||||
|
||||
TIDY_LIBDIR=$TIDY_DIR/lib
|
||||
|
||||
AC_DEFINE(HAVE_TIDY,1,[ ])
|
||||
PHP_SUBST(TIDY_SHARED_LIBADD)
|
||||
PHP_ADD_LIBRARY_WITH_PATH(tidy, $TIDY_LIBDIR, TIDY_SHARED_LIBADD)
|
||||
PHP_ADD_INCLUDE($TIDY_INCDIR)
|
||||
fi
|
40
ext/tidy/examples/cleanhtml.php
Normal file
40
ext/tidy/examples/cleanhtml.php
Normal file
|
@ -0,0 +1,40 @@
|
|||
<?php
|
||||
|
||||
/*
|
||||
* cleanhtml.php
|
||||
*
|
||||
* A simple script to clean and repair HTML,XHTML,PHP,ASP,etc. documents
|
||||
* if no file is provided, it reads from standard input.
|
||||
*
|
||||
* By: John Coggeshall <john@php.net>
|
||||
*
|
||||
* Usage: php cleanhtml.php [filename]
|
||||
*
|
||||
*/
|
||||
|
||||
$tidy = tidy_create();
|
||||
|
||||
if(!isset($_SERVER['argv'][1])) {
|
||||
$data = file_get_contents("php://stdin");
|
||||
tidy_parse_string($tidy, $data);
|
||||
} else {
|
||||
tidy_parse_file($tidy, $_SERVER['argv'][1]);
|
||||
}
|
||||
|
||||
tidy_clean_repair($tidy);
|
||||
|
||||
if(tidy_warning_count($tidy) ||
|
||||
tidy_error_count($tidy)) {
|
||||
|
||||
echo "\n\nThe following errors or warnings occured:\n";
|
||||
echo tidy_get_error_buffer($tidy);
|
||||
echo "\n";
|
||||
}
|
||||
|
||||
echo tidy_get_output($tidy);
|
||||
|
||||
?>
|
||||
|
||||
|
||||
|
||||
|
94
ext/tidy/examples/dumpit.php
Normal file
94
ext/tidy/examples/dumpit.php
Normal file
|
@ -0,0 +1,94 @@
|
|||
<?php
|
||||
/*
|
||||
* dumpit.php
|
||||
*
|
||||
* a command-line script which dumps the given HTML, PHP, ASP, XHTML, etc.
|
||||
* file as it is represented in the document model.
|
||||
*
|
||||
* By: John Coggeshall <john@php.net>
|
||||
*
|
||||
* Usage; php dumpit.php <filename>
|
||||
*/
|
||||
|
||||
|
||||
$tidy = tidy_create();
|
||||
tidy_parse_file($tidy, $_SERVER['argv'][1]);
|
||||
|
||||
/* Optionally you can do this here if you want to fix up the document */
|
||||
|
||||
/* tidy_clean_repair($tidy); */
|
||||
|
||||
$tree = tidy_get_root($tidy);
|
||||
dump_tree($tree);
|
||||
echo "\n";
|
||||
|
||||
function node_type($type) {
|
||||
|
||||
switch($type) {
|
||||
|
||||
case TIDY_NODETYPE_ROOT: return "Root Node";
|
||||
case TIDY_NODETYPE_DOCTYPE: return "DocType Node";
|
||||
case TIDY_NODETYPE_COMMENT: return "Comment Node";
|
||||
case TIDY_NODETYPE_PROCINS: return "ProcIns Node";
|
||||
case TIDY_NODETYPE_TEXT: return "Text Node";
|
||||
case TIDY_NODETYPE_START: return "Start Node";
|
||||
case TIDY_NODETYPE_END: return "End Node";
|
||||
case TIDY_NODETYPE_STARTEND: return "Start/End Node";
|
||||
case TIDY_NODETYPE_CDATA: return "CDATA Node";
|
||||
case TIDY_NODETYPE_SECTION: return "Section Node";
|
||||
case TIDY_NODETYPE_ASP: return "ASP Source Code Node";
|
||||
case TIDY_NODETYPE_PHP: return "PHP Source Code Node";
|
||||
case TIDY_NODETYPE_JSTE: return "JSTE Source Code";
|
||||
case TIDY_NODETYPE_XMLDECL: return "XML Declaration Node";
|
||||
default: return "Unknown Node";
|
||||
}
|
||||
}
|
||||
|
||||
function do_leaf($string, $indent) {
|
||||
for($i = 0; $i < $indent; $i++) {
|
||||
echo " ";
|
||||
}
|
||||
echo $string;
|
||||
}
|
||||
|
||||
function dump_tree($node, $indent = 0) {
|
||||
if($node) {
|
||||
/* Put something there if the node name is empty */
|
||||
$nodename = trim(strtoupper($node->name));
|
||||
$nodename = (empty($nodename)) ? "[EMPTY]" : $nodename;
|
||||
|
||||
/* Generate the Node, and a pretty name for it */
|
||||
do_leaf(" + $nodename (".node_type($node->type).")\n", $indent);
|
||||
|
||||
/* Check to see if this node is a text node. Text nodes are
|
||||
generated by start/end tags and contain the text in between.
|
||||
i.e. <B>foo</B> will create a text node with $node->value
|
||||
equal to 'foo' */
|
||||
if($node->type == TIDY_NODETYPE_TEXT) {
|
||||
do_leaf(" |\n", $indent);
|
||||
do_leaf(" +---- Value: '{$node->value}'\n", $indent);
|
||||
}
|
||||
|
||||
/* Any attributes on this node? */
|
||||
if(count($node->attribs)) {
|
||||
do_leaf(" |\n", $indent);
|
||||
do_leaf(" +---- Attributes\n", $indent);
|
||||
|
||||
/* Cycle through the attributes and display them and their values. */
|
||||
foreach($node->attribs as $attrib) {
|
||||
do_leaf(" +--{$attrib->name}\n", $indent);
|
||||
do_leaf(" | +-- Value: {$attrib->value}\n", $indent);
|
||||
}
|
||||
}
|
||||
|
||||
/* Recurse along the children to generate the remaining nodes */
|
||||
if($node->has_children()) {
|
||||
foreach($node->children as $child) {
|
||||
dump_tree($child, $indent + 3);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
?>
|
63
ext/tidy/examples/urlgrab.php
Normal file
63
ext/tidy/examples/urlgrab.php
Normal file
|
@ -0,0 +1,63 @@
|
|||
<?php
|
||||
|
||||
/*
|
||||
* urlgrab.php
|
||||
*
|
||||
* A simple command-line utility to extract all of the URLS contained
|
||||
* within <A HREF> tags from a document.
|
||||
*
|
||||
* By: John Coggeshall <john@php.net>
|
||||
*
|
||||
* Usage: php urlgrab.php <file>
|
||||
*
|
||||
*/
|
||||
|
||||
/* Create a Tidy Resource */
|
||||
$tidy = tidy_create();
|
||||
|
||||
/* Parse the document */
|
||||
tidy_parse_file($tidy, $_SERVER['argv'][1]);
|
||||
|
||||
/* Fix up the document */
|
||||
tidy_clean_repair($tidy);
|
||||
|
||||
/* Get an object representing everything from the <HTML> tag in */
|
||||
$html = tidy_get_html($tidy);
|
||||
|
||||
/* Traverse the document tree */
|
||||
print_r(get_links($html));
|
||||
|
||||
function get_links($node) {
|
||||
$urls = array();
|
||||
|
||||
/* Check to see if we are on an <A> tag or not */
|
||||
if($node->id == TIDY_TAG_A) {
|
||||
/* If we are, find the HREF attribute */
|
||||
$attrib = $node->get_attr_type(TIDY_ATTR_HREF);
|
||||
if($attrib) {
|
||||
/* Add the value of the HREF attrib to $urls */
|
||||
$urls[] = $attrib->value;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* Are there any children? */
|
||||
if($node->has_children()) {
|
||||
|
||||
/* Traverse down each child recursively */
|
||||
foreach($node->children as $child) {
|
||||
|
||||
/* Append the results from recursion to $urls */
|
||||
foreach(get_links($child) as $url) {
|
||||
|
||||
$urls[] = $url;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
return $urls;
|
||||
}
|
||||
|
||||
?>
|
55
ext/tidy/package.xml
Normal file
55
ext/tidy/package.xml
Normal file
|
@ -0,0 +1,55 @@
|
|||
<?xml version="1.0" encoding="ISO-8859-1" ?>
|
||||
<!DOCTYPE package SYSTEM "../pear/package.dtd">
|
||||
<package>
|
||||
<name>Tidy</name>
|
||||
<summary>Tidy HTML Repairing and Parsing</summary>
|
||||
<maintainers>
|
||||
<maintainer>
|
||||
<user>john</user>
|
||||
<name>John Coggeshall</name>
|
||||
<email>john@php.net</email>
|
||||
<role>lead</role>
|
||||
</maintainer>
|
||||
</maintainers>
|
||||
<description>
|
||||
Tidy is a binding for the Tidy HTML clean and repair utility which
|
||||
allows you to not only clean and otherwise manipluate HTML documents,
|
||||
but also traverse the document tree using the Zend Engine 2 OO semantics.
|
||||
|
||||
</description>
|
||||
<license>PHP</license>
|
||||
<release>
|
||||
<state>stable</state>
|
||||
<version>0.5</version>
|
||||
<date>TBA</date>
|
||||
<notes>
|
||||
</notes>
|
||||
<filelist>
|
||||
<file role="src" name="config.m4"/>
|
||||
<file role="src" name="tidy.c"/>
|
||||
<file role="src" name="php_tidy.h"/>
|
||||
|
||||
<file role="doc" name="CREDITS"/>
|
||||
<file role="doc" name="README_TIDY"/>
|
||||
<file role="doc" name="TODO"/>
|
||||
<file role="doc" name="examples/cleanhtml.php"/>
|
||||
<file role="doc" name="examples/dumpit.php"/>
|
||||
<file role="doc" name="examples/urlgrab.php"/>
|
||||
|
||||
<file role="test" name="tests/001.phpt"/>
|
||||
<file role="test" name="tests/002.phpt"/>
|
||||
<file role="test" name="tests/003.phpt"/>
|
||||
<file role="test" name="tests/004.phpt"/>
|
||||
<file role="test" name="tests/005.phpt"/>
|
||||
<file role="test" name="tests/005.html"/>
|
||||
<file role="test" name="tests/006.phpt"/>
|
||||
<file role="test" name="tests/007.phpt"/>
|
||||
</filelist>
|
||||
<deps>
|
||||
<dep type="php" rel="ge">5.0.0</dep>
|
||||
</deps>
|
||||
</release>
|
||||
</package>
|
||||
<!--
|
||||
vim:et:ts=1:sw=1
|
||||
-->
|
200
ext/tidy/php_tidy.h
Normal file
200
ext/tidy/php_tidy.h
Normal file
|
@ -0,0 +1,200 @@
|
|||
/*
|
||||
+----------------------------------------------------------------------+
|
||||
| PHP Version 4 |
|
||||
+----------------------------------------------------------------------+
|
||||
| Copyright (c) 1997-2003 The PHP Group |
|
||||
+----------------------------------------------------------------------+
|
||||
| This source file is subject to version 3.0 of the PHP license, |
|
||||
| that is bundled with this package in the file LICENSE, and is |
|
||||
| available through the world-wide-web at the following url: |
|
||||
| http://www.php.net/license/3_0.txt. |
|
||||
| If you did not receive a copy of the PHP license and are unable to |
|
||||
| obtain it through the world-wide-web, please send a note to |
|
||||
| license@php.net so we can mail you a copy immediately. |
|
||||
+----------------------------------------------------------------------+
|
||||
| Author: John Coggeshall <john@php.net> |
|
||||
+----------------------------------------------------------------------+
|
||||
*/
|
||||
|
||||
/* $Id$ */
|
||||
|
||||
#ifndef PHP_TIDY_H
|
||||
#define PHP_TIDY_H
|
||||
|
||||
extern zend_module_entry tidy_module_entry;
|
||||
#define phpext_tidy_ptr &tidy_module_entry
|
||||
|
||||
#ifdef PHP_WIN32
|
||||
#define PHP_TIDY_API __declspec(dllexport)
|
||||
#else
|
||||
#define PHP_TIDY_API
|
||||
#endif
|
||||
|
||||
#ifdef ZTS
|
||||
#include "TSRM.h"
|
||||
#endif
|
||||
|
||||
#include "tidyenum.h"
|
||||
#include "tidy.h"
|
||||
#include "buffio.h"
|
||||
|
||||
#ifdef ZTS
|
||||
#define TIDY_G(v) TSRMG(tidy_globals_id, zend_tidy_globals *, v)
|
||||
#else
|
||||
#define TIDY_G(v) (tidy_globals.v)
|
||||
#endif
|
||||
|
||||
#ifndef TRUE
|
||||
#define TRUE 1
|
||||
#define FALSE 0
|
||||
#endif
|
||||
|
||||
#define TIDY_RV_FALSE(__t) __t->type = IS_BOOL; __t->value.lval = FALSE
|
||||
#define TIDY_RV_TRUE(__t) __t->type = IS_BOOL; __t->value.lval = TRUE
|
||||
|
||||
#define TIDY_IS_TN_PROP(_p) zend_hash_exists(TIDY_G(tn_prop), #_p, strlen(#_p))
|
||||
#define TIDY_IS_TA_PROP(_p) zend_hash_exists(TIDY_G(ta_prop), #_p, strlen(#_p))
|
||||
|
||||
#define REMOVE_NEWLINE(_z) _z->value.str.val[_z->value.str.len-1] = '\0';
|
||||
|
||||
#define TIDY_TAG_CONST(tag) REGISTER_LONG_CONSTANT("TIDY_TAG_" #tag, TidyTag_##tag, CONST_CS | CONST_PERSISTENT)
|
||||
#define TIDY_ATTR_CONST(attr) REGISTER_LONG_CONSTANT("TIDY_ATTR_" #attr, TidyAttr_##attr, CONST_CS | CONST_PERSISTENT)
|
||||
#define TIDY_NODE_CONST(name, type) REGISTER_LONG_CONSTANT("TIDY_NODETYPE_" #name, TidyNode_##type, CONST_CS | CONST_PERSISTENT)
|
||||
|
||||
#define PHP_IS_TIDYUNDEF 0
|
||||
#define PHP_IS_TIDYNODE 1
|
||||
#define PHP_IS_TIDYATTR 2
|
||||
|
||||
|
||||
struct _PHPTidyDoc {
|
||||
|
||||
TidyDoc doc;
|
||||
TidyBuffer *errbuf;
|
||||
zend_bool parsed;
|
||||
};
|
||||
|
||||
typedef struct _PHPTidyDoc PHPTidyDoc;
|
||||
typedef struct _PHPTidyObj PHPTidyObj;
|
||||
|
||||
struct _PHPTidyObj {
|
||||
zend_object obj;
|
||||
TidyNode node;
|
||||
TidyAttr attr;
|
||||
PHPTidyDoc *tdoc;
|
||||
unsigned int type;
|
||||
};
|
||||
|
||||
|
||||
PHP_MINIT_FUNCTION(tidy);
|
||||
PHP_MSHUTDOWN_FUNCTION(tidy);
|
||||
PHP_RINIT_FUNCTION(tidy);
|
||||
PHP_RSHUTDOWN_FUNCTION(tidy);
|
||||
PHP_MINFO_FUNCTION(tidy);
|
||||
|
||||
PHP_FUNCTION(tidy_create);
|
||||
PHP_FUNCTION(tidy_setopt);
|
||||
PHP_FUNCTION(tidy_getopt);
|
||||
PHP_FUNCTION(tidy_parse_string);
|
||||
PHP_FUNCTION(tidy_parse_file);
|
||||
PHP_FUNCTION(tidy_clean_repair);
|
||||
PHP_FUNCTION(tidy_diagnose);
|
||||
PHP_FUNCTION(tidy_get_output);
|
||||
PHP_FUNCTION(tidy_get_error_buffer);
|
||||
PHP_FUNCTION(tidy_get_release);
|
||||
PHP_FUNCTION(tidy_get_status);
|
||||
PHP_FUNCTION(tidy_get_html_ver);
|
||||
PHP_FUNCTION(tidy_is_xhtml);
|
||||
PHP_FUNCTION(tidy_is_xml);
|
||||
PHP_FUNCTION(tidy_error_count);
|
||||
PHP_FUNCTION(tidy_warning_count);
|
||||
PHP_FUNCTION(tidy_access_count);
|
||||
PHP_FUNCTION(tidy_config_count);
|
||||
PHP_FUNCTION(tidy_load_config);
|
||||
PHP_FUNCTION(tidy_load_config_enc);
|
||||
PHP_FUNCTION(tidy_set_encoding);
|
||||
PHP_FUNCTION(tidy_save_config);
|
||||
|
||||
PHP_FUNCTION(tidy_get_root);
|
||||
PHP_FUNCTION(tidy_get_html);
|
||||
PHP_FUNCTION(tidy_get_head);
|
||||
PHP_FUNCTION(tidy_get_body);
|
||||
|
||||
static void php_tidy_obj_clone(void *, void ** TSRMLS_DC);
|
||||
static void php_tidy_obj_dtor(void *, zend_object_handle TSRMLS_DC);
|
||||
|
||||
zend_object_value php_tidy_create_obj(zend_class_entry * TSRMLS_DC);
|
||||
|
||||
/* object handlers */
|
||||
zval * tidy_property_read(zval *object, zval *member, zend_bool silent TSRMLS_DC);
|
||||
void tidy_property_write(zval *obj, zval *member, zval *value TSRMLS_DC);
|
||||
zval ** tidy_property_get_ptr(zval *obj, zval *member TSRMLS_DC);
|
||||
zval * tidy_object_get(zval *property TSRMLS_DC);
|
||||
void tidy_object_set(zval **property, zval *value TSRMLS_DC);
|
||||
int tidy_property_exists(zval *object, zval *member, int check_empty TSRMLS_DC);
|
||||
void tidy_property_delete(zval *obj, zval *member TSRMLS_DC);
|
||||
HashTable * tidy_get_properties(zval *object TSRMLS_DC);
|
||||
union _zend_function * tidy_get_method(zval *obj, char *method, int method_len TSRMLS_DC);
|
||||
int tidy_call_method(char *method, INTERNAL_FUNCTION_PARAMETERS);
|
||||
union _zend_function * tidy_get_constructor(zval *obj TSRMLS_DC);
|
||||
zend_class_entry * tidy_get_class_entry(zval *obj TSRMLS_DC);
|
||||
int tidy_get_class_name(zval *obj, char **class_name, zend_uint *name_len, int parent TSRMLS_DC);
|
||||
int tidy_objects_compare(zval *obj_one, zval *obj_two TSRMLS_DC);
|
||||
void tidy_object_cast(zval *readobj, zval *writeobj, int type, int should_free TSRMLS_DC);
|
||||
|
||||
zend_bool _php_tidy_attr_call_method(char *method, INTERNAL_FUNCTION_PARAMETERS);
|
||||
zend_bool _php_tidy_node_call_method(char *method, INTERNAL_FUNCTION_PARAMETERS);
|
||||
void _php_tidy_init_prop_hashtables();
|
||||
|
||||
/* resource dtor */
|
||||
void dtor_TidyDoc(zend_rsrc_list_entry * TSRMLS_DC);
|
||||
|
||||
/* constant register helpers */
|
||||
void _php_tidy_register_nodetypes(INIT_FUNC_ARGS);
|
||||
void _php_tidy_register_tags(INIT_FUNC_ARGS);
|
||||
void _php_tidy_register_attributes(INIT_FUNC_ARGS);
|
||||
|
||||
/* Callbacks for hooking Tidy Memory alloc into e*alloc */
|
||||
void * _php_tidy_mem_alloc(size_t size);
|
||||
void * _php_tidy_mem_realloc(void *mem, size_t newsize);
|
||||
void _php_tidy_mem_free(void *mem);
|
||||
void _php_tidy_mem_panic(ctmbstr errmsg);
|
||||
|
||||
ZEND_BEGIN_MODULE_GLOBALS(tidy)
|
||||
ZEND_END_MODULE_GLOBALS(tidy)
|
||||
|
||||
static zend_object_handlers php_tidy_object_handlers = {
|
||||
ZEND_OBJECTS_STORE_HANDLERS,
|
||||
tidy_property_read,
|
||||
tidy_property_write,
|
||||
NULL,
|
||||
NULL,
|
||||
tidy_property_get_ptr,
|
||||
tidy_property_get_ptr,
|
||||
tidy_object_get,
|
||||
tidy_object_set,
|
||||
tidy_property_exists,
|
||||
tidy_property_delete,
|
||||
tidy_get_properties,
|
||||
tidy_get_method,
|
||||
tidy_call_method,
|
||||
tidy_get_constructor,
|
||||
tidy_get_class_entry,
|
||||
tidy_get_class_name,
|
||||
tidy_objects_compare,
|
||||
tidy_object_cast
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* Local variables:
|
||||
* tab-width: 4
|
||||
* c-basic-offset: 4
|
||||
* End:
|
||||
* vim600: noet sw=4 ts=4 fdm=marker
|
||||
* vim<600: noet sw=4 ts=4
|
||||
*/
|
24
ext/tidy/tests/001.phpt
Normal file
24
ext/tidy/tests/001.phpt
Normal file
|
@ -0,0 +1,24 @@
|
|||
--TEST--
|
||||
Check for tidy presence
|
||||
--SKIPIF--
|
||||
<?php if (!extension_loaded("Tidy")) print "skip"; ?>
|
||||
--POST--
|
||||
--GET--
|
||||
--INI--
|
||||
--FILE--
|
||||
<?php
|
||||
echo "tidy extension is available";
|
||||
/*
|
||||
you can add regression tests for your extension here
|
||||
|
||||
the output of your test code has to be equal to the
|
||||
text in the --EXPECT-- section below for the tests
|
||||
to pass, differences between the output and the
|
||||
expected text are interpreted as failure
|
||||
|
||||
see php4/README.TESTING for further information on
|
||||
writing regression tests
|
||||
*/
|
||||
?>
|
||||
--EXPECT--
|
||||
tidy extension is available
|
25
ext/tidy/tests/002.phpt
Normal file
25
ext/tidy/tests/002.phpt
Normal file
|
@ -0,0 +1,25 @@
|
|||
--TEST--
|
||||
tidy_parse_string()
|
||||
--SKIPIF--
|
||||
<?php if (!extension_loaded("Tidy")) print "skip"; ?>
|
||||
--POST--
|
||||
--GET--
|
||||
--INI--
|
||||
--FILE--
|
||||
<?php
|
||||
|
||||
$tidy = tidy_create();
|
||||
|
||||
tidy_parse_string($tidy, "<HTML></HTML>");
|
||||
|
||||
echo tidy_get_output($tidy);
|
||||
|
||||
?>
|
||||
--EXPECT--
|
||||
<html>
|
||||
<head>
|
||||
<title></title>
|
||||
</head>
|
||||
<body>
|
||||
</body>
|
||||
</html>
|
27
ext/tidy/tests/003.phpt
Normal file
27
ext/tidy/tests/003.phpt
Normal file
|
@ -0,0 +1,27 @@
|
|||
--TEST--
|
||||
tidy_clean_repair()
|
||||
--SKIPIF--
|
||||
<?php if (!extension_loaded("Tidy")) print "skip"; ?>
|
||||
--POST--
|
||||
--GET--
|
||||
--INI--
|
||||
--FILE--
|
||||
<?php
|
||||
|
||||
$tidy = tidy_create();
|
||||
|
||||
tidy_parse_string($tidy, "<HTML></HTML>");
|
||||
tidy_clean_repair($tidy);
|
||||
|
||||
echo tidy_get_output($tidy);
|
||||
|
||||
?>
|
||||
--EXPECT--
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title></title>
|
||||
</head>
|
||||
<body>
|
||||
</body>
|
||||
</html>
|
24
ext/tidy/tests/004.phpt
Normal file
24
ext/tidy/tests/004.phpt
Normal file
|
@ -0,0 +1,24 @@
|
|||
--TEST--
|
||||
tidy_diagnose()
|
||||
--SKIPIF--
|
||||
<?php if (!extension_loaded("Tidy")) print "skip"; ?>
|
||||
--POST--
|
||||
--GET--
|
||||
--INI--
|
||||
--FILE--
|
||||
<?php
|
||||
|
||||
$tidy = tidy_create();
|
||||
|
||||
tidy_parse_string($tidy, "<HTML></HTML>");
|
||||
tidy_diagnose($tidy);
|
||||
echo tidy_get_error_buffer($tidy);
|
||||
|
||||
?>
|
||||
--EXPECT--
|
||||
|
||||
line 1 column 1 - Warning: missing <!DOCTYPE> declaration
|
||||
line 1 column 7 - Warning: discarding unexpected </html>
|
||||
line 1 column 14 - Warning: inserting missing 'title' element
|
||||
Info: Document content looks like HTML 3.2
|
||||
3 warnings, 0 errors were found!
|
1
ext/tidy/tests/005.html
Normal file
1
ext/tidy/tests/005.html
Normal file
|
@ -0,0 +1 @@
|
|||
<HTML></HTML>
|
25
ext/tidy/tests/005.phpt
Normal file
25
ext/tidy/tests/005.phpt
Normal file
|
@ -0,0 +1,25 @@
|
|||
--TEST--
|
||||
tidy_parse_file()
|
||||
--SKIPIF--
|
||||
<?php if (!extension_loaded("Tidy")) print "skip"; ?>
|
||||
--POST--
|
||||
--GET--
|
||||
--INI--
|
||||
--FILE--
|
||||
<?php
|
||||
|
||||
$tidy = tidy_create();
|
||||
|
||||
tidy_parse_file($tidy, "ext/tidy/tests/005.html");
|
||||
|
||||
echo tidy_get_output($tidy);
|
||||
|
||||
?>
|
||||
--EXPECT--
|
||||
<html>
|
||||
<head>
|
||||
<title></title>
|
||||
</head>
|
||||
<body>
|
||||
</body>
|
||||
</html>
|
23
ext/tidy/tests/006.phpt
Normal file
23
ext/tidy/tests/006.phpt
Normal file
|
@ -0,0 +1,23 @@
|
|||
--TEST--
|
||||
Verbose tidy_get_error_buffer()
|
||||
--SKIPIF--
|
||||
<?php if (!extension_loaded("Tidy")) print "skip"; ?>
|
||||
--POST--
|
||||
--GET--
|
||||
--INI--
|
||||
--FILE--
|
||||
<?php
|
||||
|
||||
$tidy = tidy_create();
|
||||
|
||||
tidy_parse_string($tidy, "<HTML><asd asdf></HTML>");
|
||||
|
||||
echo tidy_get_error_buffer($tidy, true);
|
||||
|
||||
?>
|
||||
--EXPECT--
|
||||
line 1 column 1 - Warning: missing <!DOCTYPE> declaration
|
||||
line 1 column 7 - Error: <asd> is not recognized!
|
||||
line 1 column 7 - Warning: discarding unexpected <asd>
|
||||
line 1 column 17 - Warning: discarding unexpected </html>
|
||||
line 1 column 7 - Warning: inserting missing 'title' element
|
37
ext/tidy/tests/007.phpt
Normal file
37
ext/tidy/tests/007.phpt
Normal file
|
@ -0,0 +1,37 @@
|
|||
--TEST--
|
||||
Verbose tidy_setopt() / tidy_getopt()
|
||||
--SKIPIF--
|
||||
<?php if (!extension_loaded("Tidy")) print "skip"; ?>
|
||||
--POST--
|
||||
--GET--
|
||||
--INI--
|
||||
--FILE--
|
||||
<?php
|
||||
|
||||
$tidy = tidy_create();
|
||||
echo "Current Value of 'tidy-mark': ";
|
||||
var_dump(tidy_getopt($tidy, "tidy-mark"));
|
||||
tidy_setopt($tidy, "tidy-mark", true);
|
||||
echo "\nNew Value of 'tidy-mark': ";
|
||||
var_dump(tidy_getopt($tidy, "tidy-mark"));
|
||||
echo "Current Value of 'error-file': ";
|
||||
var_dump(tidy_getopt($tidy, "error-file"));
|
||||
tidy_setopt($tidy, "error-file", "foobar");
|
||||
echo "\nNew Value of 'error-file': ";
|
||||
var_dump(tidy_getopt($tidy, "error-file"));
|
||||
echo "Current Value of 'tab-size': ";
|
||||
var_dump(tidy_getopt($tidy, "tab-size"));
|
||||
tidy_setopt($tidy, "tab-size", 10);
|
||||
echo "\nNew Value of 'tab-size': ";
|
||||
var_dump(tidy_getopt($tidy, "tab-size"));
|
||||
?>
|
||||
--EXPECT--
|
||||
Current Value of 'tidy-mark': bool(false)
|
||||
|
||||
New Value of 'tidy-mark': bool(true)
|
||||
Current Value of 'error-file': string(0) ""
|
||||
|
||||
New Value of 'error-file': string(6) "foobar"
|
||||
Current Value of 'tab-size': int(8)
|
||||
|
||||
New Value of 'tab-size': int(10)
|
1874
ext/tidy/tidy.c
Normal file
1874
ext/tidy/tidy.c
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue