aboutsummaryrefslogtreecommitdiff
path: root/lib/classes/OpenGraph.php
blob: 845f5cb21fb494c48d48ae1f385c0e605c5623f5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
<?php
/**
 * Open Graph class that extracts open graph urls from a given string.
 *
 * @author  Jan-Hendrik Willms <tleilax+studip@gmail.com>
 * @license GPL2 or any later version
 * @since   Stud.IP 3.4
 */
class OpenGraph
{
    /**
     * Extracts urls and their according open graph infos from a given string
     *
     * @param string|null $string Text to extract urls and open graph infos from
     * @return OpenGraphURLCollection containing the extracted urls
     */
    public static function extract(?string $string): OpenGraphURLCollection
    {
        $collection = new OpenGraphURLCollection();

        if (!Config::get()->OPENGRAPH_ENABLE || !$string) {
            return $collection;
        }

        if (Studip\Markup::isHtml($string)) {
            $urls = self::extractUrlsFromHtml($string);
        } else {
            $urls = self::extractUrlsFromText($string);
        }

        foreach ($urls as $url) {
            $og_url = OpenGraphURL::fromURL($url);
            if ($og_url && !$collection->find($og_url->id)) {
                $og_url->store();

                $collection[] = $og_url;
            }
        }

        return $collection;
    }

    public static function filterURLs(array $urls): array
    {
        return array_filter($urls, function (string $url): bool {
            if (!$url) {
                return false;
            }

            return !isLinkIntern($url);
        });
    }

    public static function extractUrlsFromText(string $text): array
    {
        $regexp = StudipCoreFormat::getStudipMarkup('links')['start'];
        preg_match_all('/' . $regexp . '/ums', $text, $matches, PREG_SET_ORDER);
        $urls = array_column($matches, 2);

        return self::filterURLs($urls);
    }

    public static function extractUrlsFromHtml(string $html): array
    {
        $document = new DOMDocument();
        $document->loadHTML($html, LIBXML_NOWARNING | LIBXML_NOERROR);

        $elements = $document->getElementsByTagName('a');

        $urls = [];
        foreach ($elements as $element) {
            if (!$element->hasAttribute('href')) {
                continue;
            }

            $urls[] = $element->getAttribute('href');
        }

        return self::filterURLs($urls);
    }
}