aboutsummaryrefslogtreecommitdiff
path: root/lib/classes/OpenGraph.php
diff options
context:
space:
mode:
authorPhilipp Schüttlöffel <schuettloeffel@zqs.uni-hannover.de>2024-09-24 10:53:31 +0200
committerPhilipp Schüttlöffel <schuettloeffel@zqs.uni-hannover.de>2024-09-24 10:53:31 +0200
commit4459dd7917f4d1c34f40bb68f0e991e9c3d53e4c (patch)
tree5c07151ae61276d334e88f6309c30d439a85c12e /lib/classes/OpenGraph.php
parentda0022e5c1abbf9825ae76debaabdff7e8623bb4 (diff)
parent97a188592c679890a25c37ab78463add76a52ff7 (diff)
Merge branch 'main' into issue-3911issue-3911
Diffstat (limited to 'lib/classes/OpenGraph.php')
-rw-r--r--lib/classes/OpenGraph.php81
1 files changed, 59 insertions, 22 deletions
diff --git a/lib/classes/OpenGraph.php b/lib/classes/OpenGraph.php
index 75326aa..845f5cb 100644
--- a/lib/classes/OpenGraph.php
+++ b/lib/classes/OpenGraph.php
@@ -11,34 +11,71 @@ class OpenGraph
/**
* Extracts urls and their according open graph infos from a given string
*
- * @param String $string Text to extract urls and open graph infos from
+ * @param string|null $string Text to extract urls and open graph infos from
* @return OpenGraphURLCollection containing the extracted urls
*/
- public static function extract($string)
+ public static function extract(?string $string): OpenGraphURLCollection
{
- $collection = new OpenGraphURLCollection;
-
- if (Config::get()->OPENGRAPH_ENABLE) {
- $regexp = StudipCoreFormat::getStudipMarkup('links')['start'];
- $matched = preg_match_all('/' . $regexp . '/ums', $string, $matches, PREG_SET_ORDER);
- foreach ($matches as $match) {
- $url = $match[2];
-
- if (!$url) {
- continue;
- }
-
- if (!isLinkIntern($url)) {
- $og_url = OpenGraphURL::fromURL($url);
- if ($og_url && !$collection->find($og_url->id)) {
- $og_url->store();
-
- $collection[] = $og_url;
- }
- }
+ $collection = new OpenGraphURLCollection();
+
+ if (!Config::get()->OPENGRAPH_ENABLE || !$string) {
+ return $collection;
+ }
+
+ if (Studip\Markup::isHtml($string)) {
+ $urls = self::extractUrlsFromHtml($string);
+ } else {
+ $urls = self::extractUrlsFromText($string);
+ }
+
+ foreach ($urls as $url) {
+ $og_url = OpenGraphURL::fromURL($url);
+ if ($og_url && !$collection->find($og_url->id)) {
+ $og_url->store();
+
+ $collection[] = $og_url;
}
}
return $collection;
}
+
+ public static function filterURLs(array $urls): array
+ {
+ return array_filter($urls, function (string $url): bool {
+ if (!$url) {
+ return false;
+ }
+
+ return !isLinkIntern($url);
+ });
+ }
+
+ public static function extractUrlsFromText(string $text): array
+ {
+ $regexp = StudipCoreFormat::getStudipMarkup('links')['start'];
+ preg_match_all('/' . $regexp . '/ums', $text, $matches, PREG_SET_ORDER);
+ $urls = array_column($matches, 2);
+
+ return self::filterURLs($urls);
+ }
+
+ public static function extractUrlsFromHtml(string $html): array
+ {
+ $document = new DOMDocument();
+ $document->loadHTML($html, LIBXML_NOWARNING | LIBXML_NOERROR);
+
+ $elements = $document->getElementsByTagName('a');
+
+ $urls = [];
+ foreach ($elements as $element) {
+ if (!$element->hasAttribute('href')) {
+ continue;
+ }
+
+ $urls[] = $element->getAttribute('href');
+ }
+
+ return self::filterURLs($urls);
+ }
}