Useful PHP class for parsing HTML

Filed Under (Code) by Mystalia on 03-03-2009

Tagged Under : , , , , , , ,

I’m hoping this is commented well enough.

01
class HTMLParse
02
{
03
  // Missing InnerHTML function!
04
  function innerHTML($node){
05
    $doc = new DOMDocument();
06
    foreach ($node->childNodes as $child)
07
      $doc->appendChild($doc->importNode($child, true));
08
 
09
    return $doc->saveHTML();
10
  }
11
 
12
  // Get the inner html of all elements inside tag from the source (input).
13
  function GetInnerArray($tag, $input)
14
  {
15
    $doc = new DOMDocument();
16
    @$doc->loadHTML($input);
17
    $dataset = $doc->getElementsByTagName($tag);
18
 
19
    $stringarr = array();
20
    foreach( $dataset as $row )
21
    {
22
      array_push($stringarr, trim($this->innerHTML($row)));
23
    }
24
    return $stringarr;
25
  }
26
 
27
  // Get the inner html of all elements inside tag where an attribute exists with the value.
28
  function GetInnerArrayFilter($input, $tag, $attribute, $value)
29
  {
30
    $doc = new DOMDocument();
31
    @$doc->loadHTML($input);
32
    $dataset = $doc->getElementsByTagName($tag);
33
 
34
    $stringarr = array();
35
    foreach( $dataset as $row )
36
    {
37
      if($row->getAttribute($attribute) == $value)
38
      {
39
        array_push($stringarr, trim($this->innerHTML($row)));
40
      }
41
    }
42
    return $stringarr;
43
  }
44
 
45
  // Get attribute of tags.
46
  function GetTagAttribute($input, $tag, $attribute)
47
  {
48
    $doc = new DOMDocument();
49
    @$doc->loadHTML($input);
50
    $dataset = $doc->getElementsByTagName($tag);
51
 
52
    $stringarr = array();
53
    foreach( $dataset as $row )
54
    {
55
        array_push($stringarr, $row->getAttribute($attribute));
56
    }
57
    return $stringarr;
58
  }
59
 
60
  // Get attribute of tags where the tag attrib has the value.
61
  function GetTagAttributeFilter($input, $tag, $attribute, $qattrib, $value)
62
  {
63
    $doc = new DOMDocument();
64
    @$doc->loadHTML($input);
65
    $dataset = $doc->getElementsByTagName($tag);
66
 
67
    $stringarr = array();
68
    foreach( $dataset as $row )
69
    {
70
      if($row->getAttribute($qattrib) == $value)
71
      {
72
        array_push($stringarr, $row->getAttribute($attribute));
73
      }
74
    }
75
    return $stringarr;
76
  }
77
}
78
?> 

If you would like help with this class, please comment!
If you have an idea to improve this class, please comment!