Useful HTMLParse Class for PHP

Filed Under (Code) by Mystalia on 11-04-2009

Tagged Under : , , , , , , ,

class HTMLParse
{
  // the missing InnerHTML function!
  function innerHTML($node){
    $doc = new DOMDocument();
    foreach ($node->childNodes as $child)
    {
      $doc->appendChild($doc->importNode($child, true));
    }
    $result = $doc->saveHTML();
    return $result;
  }
  // Get the inner html of all elements inside tag from the source (input).
  function GetInnerArray($tag, $input)
  {
    $doc = new DOMDocument();
    @$doc->loadHTML($input);
    $dataset = $doc->getElementsByTagName($tag);
    $stringarr = array();
    foreach( $dataset as $row )
    {
      array_push($stringarr, trim($this->innerHTML($row)));
    }
    return $stringarr;
  }
  // Get the inner html of all elements inside tag where an attribute exists with the value.
  function GetInnerArrayFilter($input, $tag, $attribute, $value)
  {
    $doc = new DOMDocument();
    @$doc->loadHTML($input);
    $dataset = $doc->getElementsByTagName($tag);
    $stringarr = array();
    foreach( $dataset as $row )
    {
      if($row->getAttribute($attribute) == $value)
      {
        array_push($stringarr, trim($this->innerHTML($row)));
      }
    }
    return $stringarr;
  }
  // Get attribute of tags.
  function GetTagAttribute($input, $tag, $attribute)
  {
    $doc = new DOMDocument();
    @$doc->loadHTML($input);
    $dataset = $doc->getElementsByTagName($tag);
    $stringarr = array();
    foreach( $dataset as $row )
    {
        array_push($stringarr, $row->getAttribute($attribute));
    }
    return $stringarr;
  }
  // Get attribute of tags where the tag attrib has the value.
  function GetTagAttributeFilter($input, $tag, $attribute, $qattrib, $value)
  {
    $doc = new DOMDocument();
    @$doc->loadHTML($input);
    $dataset = $doc->getElementsByTagName($tag);
    $stringarr = array();
    foreach( $dataset as $row )
    {
      if($row->getAttribute($qattrib) == $value)
      {
        array_push($stringarr, $row->getAttribute($attribute));
      }
    }
    return $stringarr;
  }
}
?> 

Mostly written by me.

Useful PHP class for parsing HTML

Filed Under (Code) by Mystalia on 03-03-2009

Tagged Under : , , , , , , ,

I’m hoping this is commented well enough.

01
class HTMLParse
02
{
03
  // Missing InnerHTML function!
04
  function innerHTML($node){
05
    $doc = new DOMDocument();
06
    foreach ($node->childNodes as $child)
07
      $doc->appendChild($doc->importNode($child, true));
08
 
09
    return $doc->saveHTML();
10
  }
11
 
12
  // Get the inner html of all elements inside tag from the source (input).
13
  function GetInnerArray($tag, $input)
14
  {
15
    $doc = new DOMDocument();
16
    @$doc->loadHTML($input);
17
    $dataset = $doc->getElementsByTagName($tag);
18
 
19
    $stringarr = array();
20
    foreach( $dataset as $row )
21
    {
22
      array_push($stringarr, trim($this->innerHTML($row)));
23
    }
24
    return $stringarr;
25
  }
26
 
27
  // Get the inner html of all elements inside tag where an attribute exists with the value.
28
  function GetInnerArrayFilter($input, $tag, $attribute, $value)
29
  {
30
    $doc = new DOMDocument();
31
    @$doc->loadHTML($input);
32
    $dataset = $doc->getElementsByTagName($tag);
33
 
34
    $stringarr = array();
35
    foreach( $dataset as $row )
36
    {
37
      if($row->getAttribute($attribute) == $value)
38
      {
39
        array_push($stringarr, trim($this->innerHTML($row)));
40
      }
41
    }
42
    return $stringarr;
43
  }
44
 
45
  // Get attribute of tags.
46
  function GetTagAttribute($input, $tag, $attribute)
47
  {
48
    $doc = new DOMDocument();
49
    @$doc->loadHTML($input);
50
    $dataset = $doc->getElementsByTagName($tag);
51
 
52
    $stringarr = array();
53
    foreach( $dataset as $row )
54
    {
55
        array_push($stringarr, $row->getAttribute($attribute));
56
    }
57
    return $stringarr;
58
  }
59
 
60
  // Get attribute of tags where the tag attrib has the value.
61
  function GetTagAttributeFilter($input, $tag, $attribute, $qattrib, $value)
62
  {
63
    $doc = new DOMDocument();
64
    @$doc->loadHTML($input);
65
    $dataset = $doc->getElementsByTagName($tag);
66
 
67
    $stringarr = array();
68
    foreach( $dataset as $row )
69
    {
70
      if($row->getAttribute($qattrib) == $value)
71
      {
72
        array_push($stringarr, $row->getAttribute($attribute));
73
      }
74
    }
75
    return $stringarr;
76
  }
77
}
78
?> 

If you would like help with this class, please comment!
If you have an idea to improve this class, please comment!

Plain Google Results in PHP

Filed Under (Code) by Mystalia on 26-05-2008

Tagged Under : , ,


function PlainResults($term)
{
$content = file_get_contents('http://www.google.com/search?num=100&safe=off&q=' . $term);
if ($content !== false)
{
$content = str_replace("http", "\nhttp", $content);
$content = str_replace("\"", "\n\"", $content);

$line = explode(”\n“, $content);
$i = 0;

while($line[$i] != “”)
{
$temp = trim($line[$i]);
if(substr($temp, 0, 21) !== “http://www.google.com” and substr($temp, 0, 1) !== “\”” and substr($temp, strlen($temp) – 1, 1) == “/“)
{
echo “$temp
\n“;
}
$i++;
}
}

This script will output googles directory results as just plain urls.