diff options
| author | Philipp Schüttlöffel <schuettloeffel@zqs.uni-hannover.de> | 2024-09-24 10:53:31 +0200 |
|---|---|---|
| committer | Philipp Schüttlöffel <schuettloeffel@zqs.uni-hannover.de> | 2024-09-24 10:53:31 +0200 |
| commit | 4459dd7917f4d1c34f40bb68f0e991e9c3d53e4c (patch) | |
| tree | 5c07151ae61276d334e88f6309c30d439a85c12e /lib/classes/OpenGraph.php | |
| parent | da0022e5c1abbf9825ae76debaabdff7e8623bb4 (diff) | |
| parent | 97a188592c679890a25c37ab78463add76a52ff7 (diff) | |
Merge branch 'main' into issue-3911issue-3911
Diffstat (limited to 'lib/classes/OpenGraph.php')
| -rw-r--r-- | lib/classes/OpenGraph.php | 81 |
1 files changed, 59 insertions, 22 deletions
diff --git a/lib/classes/OpenGraph.php b/lib/classes/OpenGraph.php index 75326aa..845f5cb 100644 --- a/lib/classes/OpenGraph.php +++ b/lib/classes/OpenGraph.php @@ -11,34 +11,71 @@ class OpenGraph /** * Extracts urls and their according open graph infos from a given string * - * @param String $string Text to extract urls and open graph infos from + * @param string|null $string Text to extract urls and open graph infos from * @return OpenGraphURLCollection containing the extracted urls */ - public static function extract($string) + public static function extract(?string $string): OpenGraphURLCollection { - $collection = new OpenGraphURLCollection; - - if (Config::get()->OPENGRAPH_ENABLE) { - $regexp = StudipCoreFormat::getStudipMarkup('links')['start']; - $matched = preg_match_all('/' . $regexp . '/ums', $string, $matches, PREG_SET_ORDER); - foreach ($matches as $match) { - $url = $match[2]; - - if (!$url) { - continue; - } - - if (!isLinkIntern($url)) { - $og_url = OpenGraphURL::fromURL($url); - if ($og_url && !$collection->find($og_url->id)) { - $og_url->store(); - - $collection[] = $og_url; - } - } + $collection = new OpenGraphURLCollection(); + + if (!Config::get()->OPENGRAPH_ENABLE || !$string) { + return $collection; + } + + if (Studip\Markup::isHtml($string)) { + $urls = self::extractUrlsFromHtml($string); + } else { + $urls = self::extractUrlsFromText($string); + } + + foreach ($urls as $url) { + $og_url = OpenGraphURL::fromURL($url); + if ($og_url && !$collection->find($og_url->id)) { + $og_url->store(); + + $collection[] = $og_url; } } return $collection; } + + public static function filterURLs(array $urls): array + { + return array_filter($urls, function (string $url): bool { + if (!$url) { + return false; + } + + return !isLinkIntern($url); + }); + } + + public static function extractUrlsFromText(string $text): array + { + $regexp = StudipCoreFormat::getStudipMarkup('links')['start']; + preg_match_all('/' . $regexp . '/ums', $text, $matches, PREG_SET_ORDER); + $urls = array_column($matches, 2); + + return self::filterURLs($urls); + } + + public static function extractUrlsFromHtml(string $html): array + { + $document = new DOMDocument(); + $document->loadHTML($html, LIBXML_NOWARNING | LIBXML_NOERROR); + + $elements = $document->getElementsByTagName('a'); + + $urls = []; + foreach ($elements as $element) { + if (!$element->hasAttribute('href')) { + continue; + } + + $urls[] = $element->getAttribute('href'); + } + + return self::filterURLs($urls); + } } |
