-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathItemPropExtractor.php
91 lines (66 loc) · 2.64 KB
/
ItemPropExtractor.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
<?php
/*****
*
* HTML Itemprop-Attribute Extractor
* 2015 by Strauss Manuel
* www.strzlee.tk
*
* Fetches a website, extracts the itemprop-attributes and optionally saves it as json file.
* Returns array, assoc-array or false
*
* array ItemPropExtractor::fetch(string URL, [string FILENAME], [boolean ASSOC-ARRAY])
*
* Example:
*
* $props = ItemPropExtractor::fetch('http://www.jausentest.at/detail/235/', 'props.json', true);
*
* Result: array { ["photo"]=> ""
* ["rating"]=> ""
* ["name"]=> "Wimmer Stubn"
* ["address"]=> "4751 Dorf an der Pram, Thalling 2"
* ["tel"]=> "07764/20068"
* ["geo"]=> " "
* ["latitude"]=> ""
* ["longitude"]=> ""
* ["url"]=> "http://www.jausentest.at/#detail/235" }
*
*****/
class ItemPropExtractor {
private static $extract = 'itemprop';
public static function fetch($target, $exportFile = false, $assocArray = false) {
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $target);
curl_setopt($ch, CURLOPT_REFERER, "http://www.google.com");
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0");
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_TIMEOUT, 10);
$html = curl_exec($ch);
$responseCode = curl_getinfo($ch,CURLINFO_HTTP_CODE);
curl_close($ch);
if ($responseCode == 200) {
$nodes = array();
$d = new DOMDocument();
libxml_use_internal_errors(true);
$d->loadHTML($html);
libxml_use_internal_errors(false);
$xpath = new DOMXPath($d);
$itemprobs = $xpath->query('//*[@'.self::$extract.']');
foreach ($itemprobs as $item) {
if ($assocArray) {
$nodes[$item->getAttribute(self::$extract)] = $item->nodeValue;
} else {
$nodes[][$item->getAttribute(self::$extract)] = $item->nodeValue;
}
}
if ($exportFile) {
$fp = fopen($exportFile, 'w');
fwrite($fp, json_encode($nodes));
fclose($fp);
}
return $nodes;
} else {
return false;
}
}
}