Filed Under (Code) by Mystalia on 11-04-2009
class HTMLParse |
{ |
// the missing InnerHTML function! |
function innerHTML($node){ |
$doc = new DOMDocument(); |
foreach ($node->childNodes as $child) |
{ |
$doc->appendChild($doc->importNode($child, true)); |
} |
$result = $doc->saveHTML(); |
return $result; |
} |
// Get the inner html of all elements inside tag from the source (input). |
function GetInnerArray($tag, $input) |
{ |
$doc = new DOMDocument(); |
@$doc->loadHTML($input); |
$dataset = $doc->getElementsByTagName($tag); |
$stringarr = array(); |
foreach( $dataset as $row ) |
{ |
array_push($stringarr, trim($this->innerHTML($row))); |
} |
return $stringarr; |
} |
// Get the inner html of all elements inside tag where an attribute exists with the value. |
function GetInnerArrayFilter($input, $tag, $attribute, $value) |
{ |
$doc = new DOMDocument(); |
@$doc->loadHTML($input); |
$dataset = $doc->getElementsByTagName($tag); |
$stringarr = array(); |
foreach( $dataset as $row ) |
{ |
if($row->getAttribute($attribute) == $value) |
{ |
array_push($stringarr, trim($this->innerHTML($row))); |
} |
} |
return $stringarr; |
} |
// Get attribute of tags. |
function GetTagAttribute($input, $tag, $attribute) |
{ |
$doc = new DOMDocument(); |
@$doc->loadHTML($input); |
$dataset = $doc->getElementsByTagName($tag); |
$stringarr = array(); |
foreach( $dataset as $row ) |
{ |
array_push($stringarr, $row->getAttribute($attribute)); |
} |
return $stringarr; |
} |
// Get attribute of tags where the tag attrib has the value. |
function GetTagAttributeFilter($input, $tag, $attribute, $qattrib, $value) |
{ |
$doc = new DOMDocument(); |
@$doc->loadHTML($input); |
$dataset = $doc->getElementsByTagName($tag); |
$stringarr = array(); |
foreach( $dataset as $row ) |
{ |
if($row->getAttribute($qattrib) == $value) |
{ |
array_push($stringarr, $row->getAttribute($attribute)); |
} |
} |
return $stringarr; |
} |
} |
?> |
|
Mostly written by me.
Filed Under (Code) by Mystalia on 03-03-2009
I’m hoping this is commented well enough.
| 01 | class HTMLParse |
| 02 | { |
| 03 | // Missing InnerHTML function! |
| 04 | function innerHTML($node){ |
| 05 | $doc = new DOMDocument(); |
| 06 | foreach ($node->childNodes as $child) |
| 07 | $doc->appendChild($doc->importNode($child, true)); |
| 08 | |
| 09 | return $doc->saveHTML(); |
| 10 | } |
| 11 | |
| 12 | // Get the inner html of all elements inside tag from the source (input). |
| 13 | function GetInnerArray($tag, $input) |
| 14 | { |
| 15 | $doc = new DOMDocument(); |
| 16 | @$doc->loadHTML($input); |
| 17 | $dataset = $doc->getElementsByTagName($tag); |
| 18 | |
| 19 | $stringarr = array(); |
| 20 | foreach( $dataset as $row ) |
| 21 | { |
| 22 | array_push($stringarr, trim($this->innerHTML($row))); |
| 23 | } |
| 24 | return $stringarr; |
| 25 | } |
| 26 | |
| 27 | // Get the inner html of all elements inside tag where an attribute exists with the value. |
| 28 | function GetInnerArrayFilter($input, $tag, $attribute, $value) |
| 29 | { |
| 30 | $doc = new DOMDocument(); |
| 31 | @$doc->loadHTML($input); |
| 32 | $dataset = $doc->getElementsByTagName($tag); |
| 33 | |
| 34 | $stringarr = array(); |
| 35 | foreach( $dataset as $row ) |
| 36 | { |
| 37 | if($row->getAttribute($attribute) == $value) |
| 38 | { |
| 39 | array_push($stringarr, trim($this->innerHTML($row))); |
| 40 | } |
| 41 | } |
| 42 | return $stringarr; |
| 43 | } |
| 44 | |
| 45 | // Get attribute of tags. |
| 46 | function GetTagAttribute($input, $tag, $attribute) |
| 47 | { |
| 48 | $doc = new DOMDocument(); |
| 49 | @$doc->loadHTML($input); |
| 50 | $dataset = $doc->getElementsByTagName($tag); |
| 51 | |
| 52 | $stringarr = array(); |
| 53 | foreach( $dataset as $row ) |
| 54 | { |
| 55 | array_push($stringarr, $row->getAttribute($attribute)); |
| 56 | } |
| 57 | return $stringarr; |
| 58 | } |
| 59 | |
| 60 | // Get attribute of tags where the tag attrib has the value. |
| 61 | function GetTagAttributeFilter($input, $tag, $attribute, $qattrib, $value) |
| 62 | { |
| 63 | $doc = new DOMDocument(); |
| 64 | @$doc->loadHTML($input); |
| 65 | $dataset = $doc->getElementsByTagName($tag); |
| 66 | |
| 67 | $stringarr = array(); |
| 68 | foreach( $dataset as $row ) |
| 69 | { |
| 70 | if($row->getAttribute($qattrib) == $value) |
| 71 | { |
| 72 | array_push($stringarr, $row->getAttribute($attribute)); |
| 73 | } |
| 74 | } |
| 75 | return $stringarr; |
| 76 | } |
| 77 | } |
| 78 | ?> |
If you would like help with this class, please comment!
If you have an idea to improve this class, please comment!
Filed Under (Code) by Mystalia on 26-05-2008
function PlainResults($term)
{
$content = file_get_contents('http://www.google.com/search?num=100&safe=off&q=' . $term);
if ($content !== false)
{
$content = str_replace("http", "\nhttp", $content);
$content = str_replace("\"", "\n\"", $content);
$line = explode(”\n“, $content);
$i = 0;
while($line[$i] != “”)
{
$temp = trim($line[$i]);
if(substr($temp, 0, 21) !== “http://www.google.com” and substr($temp, 0, 1) !== “\”” and substr($temp, strlen($temp) – 1, 1) == “/“)
{
echo “$temp
\n“;
}
$i++;
}
}
This script will output googles directory results as just plain urls.