blob: 845f5cb21fb494c48d48ae1f385c0e605c5623f5 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
|
<?php
/**
* Open Graph class that extracts open graph urls from a given string.
*
* @author Jan-Hendrik Willms <tleilax+studip@gmail.com>
* @license GPL2 or any later version
* @since Stud.IP 3.4
*/
class OpenGraph
{
/**
* Extracts urls and their according open graph infos from a given string
*
* @param string|null $string Text to extract urls and open graph infos from
* @return OpenGraphURLCollection containing the extracted urls
*/
public static function extract(?string $string): OpenGraphURLCollection
{
$collection = new OpenGraphURLCollection();
if (!Config::get()->OPENGRAPH_ENABLE || !$string) {
return $collection;
}
if (Studip\Markup::isHtml($string)) {
$urls = self::extractUrlsFromHtml($string);
} else {
$urls = self::extractUrlsFromText($string);
}
foreach ($urls as $url) {
$og_url = OpenGraphURL::fromURL($url);
if ($og_url && !$collection->find($og_url->id)) {
$og_url->store();
$collection[] = $og_url;
}
}
return $collection;
}
public static function filterURLs(array $urls): array
{
return array_filter($urls, function (string $url): bool {
if (!$url) {
return false;
}
return !isLinkIntern($url);
});
}
public static function extractUrlsFromText(string $text): array
{
$regexp = StudipCoreFormat::getStudipMarkup('links')['start'];
preg_match_all('/' . $regexp . '/ums', $text, $matches, PREG_SET_ORDER);
$urls = array_column($matches, 2);
return self::filterURLs($urls);
}
public static function extractUrlsFromHtml(string $html): array
{
$document = new DOMDocument();
$document->loadHTML($html, LIBXML_NOWARNING | LIBXML_NOERROR);
$elements = $document->getElementsByTagName('a');
$urls = [];
foreach ($elements as $element) {
if (!$element->hasAttribute('href')) {
continue;
}
$urls[] = $element->getAttribute('href');
}
return self::filterURLs($urls);
}
}
|