Mar
03
Useful PHP class for parsing HTML
Filed Under (Code) by Mystalia on 03-03-2009
Tagged Under : Code, domdocument, html, htmlparse, parse, php, tutorial, xml
I’m hoping this is commented well enough.
01 class HTMLParse02 {03 // Missing InnerHTML function!04 function innerHTML($node){05 $doc = new DOMDocument();06 foreach ($node->childNodes as $child)07 $doc->appendChild($doc->importNode($child, true));08 09 return $doc->saveHTML();10 }11 12 // Get the inner html of all elements inside tag from the source (input).13 function GetInnerArray($tag, $input)14 {15 $doc = new DOMDocument();16 @$doc->loadHTML($input);17 $dataset = $doc->getElementsByTagName($tag);18 19 $stringarr = array();20 foreach( $dataset as $row )21 {22 array_push($stringarr, trim($this->innerHTML($row)));23 }24 return $stringarr;25 }26 27 // Get the inner html of all elements inside tag where an attribute exists with the value.28 function GetInnerArrayFilter($input, $tag, $attribute, $value)29 {30 $doc = new DOMDocument();31 @$doc->loadHTML($input);32 $dataset = $doc->getElementsByTagName($tag);33 34 $stringarr = array();35 foreach( $dataset as $row )36 {37 if($row->getAttribute($attribute) == $value)38 {39 array_push($stringarr, trim($this->innerHTML($row)));40 }41 }42 return $stringarr;43 }44 45 // Get attribute of tags.46 function GetTagAttribute($input, $tag, $attribute)47 {48 $doc = new DOMDocument();49 @$doc->loadHTML($input);50 $dataset = $doc->getElementsByTagName($tag);51 52 $stringarr = array();53 foreach( $dataset as $row )54 {55 array_push($stringarr, $row->getAttribute($attribute));56 }57 return $stringarr;58 }59 60 // Get attribute of tags where the tag attrib has the value.61 function GetTagAttributeFilter($input, $tag, $attribute, $qattrib, $value)62 {63 $doc = new DOMDocument();64 @$doc->loadHTML($input);65 $dataset = $doc->getElementsByTagName($tag);66 67 $stringarr = array();68 foreach( $dataset as $row )69 {70 if($row->getAttribute($qattrib) == $value)71 {72 array_push($stringarr, $row->getAttribute($attribute));73 }74 }75 return $stringarr;76 }77 }78 ?>
If you would like help with this class, please comment!
If you have an idea to improve this class, please comment!