1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
|
<?php
class XmlTypeCheck {
/**
* Will be set to true or false to indicate whether the file is
* well-formed XML. Note that this doesn't check schema validity.
*/
public $wellFormed = false;
/**
* Will be set to true if the optional element filter returned
* a match at some point.
*/
public $filterMatch = false;
/**
* Name of the document's root element, including any namespace
* as an expanded URL.
*/
public $rootElement = '';
/**
* A stack of strings containing the data of each xml element as it's processed. Append
* data to the top string of the stack, then pop off the string and process it when the
* element is closed.
*/
protected $elementData = array();
/**
* A stack of element names and attributes, as we process them.
*/
protected $elementDataContext = array();
/**
* Current depth of the data stack.
*/
protected $stackDepth = 0;
/**
* Additional parsing options
*/
private $parserOptions = array(
'processing_instruction_handler' => '',
);
/**
* @param $file string filename
* @param $filterCallback callable (optional)
* Function to call to do additional custom validity checks from the
* SAX element handler event. This gives you access to the element
* namespace, name, attributes, and text contents.
* Filter should return 'true' to toggle on $this->filterMatch
* @param array $options list of additional parsing options:
* processing_instruction_handler: Callback for xml_set_processing_instruction_handler
*/
function __construct( $file, $filterCallback=null, $options=array() ) {
$this->filterCallback = $filterCallback;
$this->parserOptions = array_merge( $this->parserOptions, $options );
$this->run( $file );
}
/**
* Get the root element. Simple accessor to $rootElement
*
* @return string
*/
public function getRootElement() {
return $this->rootElement;
}
/**
* @param $fname
*/
private function run( $fname ) {
$parser = xml_parser_create_ns( 'UTF-8' );
// case folding violates XML standard, turn it off
xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING, false );
xml_set_element_handler( $parser, array( $this, 'rootElementOpen' ), false );
if ( $this->parserOptions['processing_instruction_handler'] ) {
xml_set_processing_instruction_handler(
$parser,
array( $this, 'processingInstructionHandler' )
);
}
if ( file_exists( $fname ) ) {
$file = fopen( $fname, "rb" );
if ( $file ) {
do {
$chunk = fread( $file, 32768 );
$ret = xml_parse( $parser, $chunk, feof( $file ) );
if( $ret == 0 ) {
// XML isn't well-formed!
fclose( $file );
xml_parser_free( $parser );
return;
}
} while( !feof( $file ) );
fclose( $file );
}
}
$this->wellFormed = true;
xml_parser_free( $parser );
}
/**
* @param $parser
* @param $name
* @param $attribs
*/
private function rootElementOpen( $parser, $name, $attribs ) {
$this->rootElement = $name;
if( is_callable( $this->filterCallback ) ) {
xml_set_element_handler(
$parser,
array( $this, 'elementOpen' ),
array( $this, 'elementClose' )
);
xml_set_character_data_handler( $parser, array( $this, 'elementData' ) );
$this->elementOpen( $parser, $name, $attribs );
} else {
// We only need the first open element
xml_set_element_handler( $parser, false, false );
}
}
/**
* @param $parser
* @param $name
* @param $attribs
*/
private function elementOpen( $parser, $name, $attribs ) {
$this->elementDataContext[] = array( $name, $attribs );
$this->elementData[] = '';
$this->stackDepth++;
}
/**
* @param $parser
* @param $name
*/
private function elementClose( $parser, $name ) {
list( $name, $attribs ) = array_pop( $this->elementDataContext );
$data = array_pop( $this->elementData );
$this->stackDepth--;
if ( call_user_func(
$this->filterCallback,
$name,
$attribs,
$data
) ) {
// Filter hit!
$this->filterMatch = true;
}
}
/**
* @param $parser
* @param $data
*/
private function elementData( $parser, $data ) {
// xml_set_character_data_handler breaks the data on & characters, so
// we collect any data here, and we'll run the callback in elementClose
$this->elementData[ $this->stackDepth - 1 ] .= trim( $data );
}
/**
* @param $parser
* @param $target
* @param $data
*/
private function processingInstructionHandler( $parser, $target, $data ) {
if ( call_user_func( $this->parserOptions['processing_instruction_handler'], $target, $data ) ) {
// Filter hit!
$this->filterMatch = true;
}
}
}
|