aboutsummaryrefslogtreecommitdiff
path: root/lib/models/OpenGraphURL.php
diff options
context:
space:
mode:
authorPhilipp Schüttlöffel <schuettloeffel@zqs.uni-hannover.de>2024-09-24 10:53:31 +0200
committerPhilipp Schüttlöffel <schuettloeffel@zqs.uni-hannover.de>2024-09-24 10:53:31 +0200
commit4459dd7917f4d1c34f40bb68f0e991e9c3d53e4c (patch)
tree5c07151ae61276d334e88f6309c30d439a85c12e /lib/models/OpenGraphURL.php
parentda0022e5c1abbf9825ae76debaabdff7e8623bb4 (diff)
parent97a188592c679890a25c37ab78463add76a52ff7 (diff)
Merge branch 'main' into issue-3911issue-3911
Diffstat (limited to 'lib/models/OpenGraphURL.php')
-rw-r--r--lib/models/OpenGraphURL.php326
1 files changed, 326 insertions, 0 deletions
diff --git a/lib/models/OpenGraphURL.php b/lib/models/OpenGraphURL.php
new file mode 100644
index 0000000..29e5718
--- /dev/null
+++ b/lib/models/OpenGraphURL.php
@@ -0,0 +1,326 @@
+<?php
+/*
+ * Copyright (C) 2013 - Rasmus Fuhse <fuhse@data-quest.de>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ */
+
+/**
+ * A model class to handle the database table "opengraphdata", fetch data from
+ * an Opengraph-URL and render a fitting box with the opengraph information to
+ * the user.
+ *
+ * @property int $id alias column for opengraph_id
+ * @property int $opengraph_id database column
+ * @property string $hash database column
+ * @property string $url database column
+ * @property int|null $is_opengraph database column
+ * @property string|null $title database column
+ * @property string|null $image database column
+ * @property string|null $description database column
+ * @property string|null $type database column
+ * @property JSONArrayObject $data database column
+ * @property int $last_update database column
+ * @property int $chdate database column
+ * @property int $mkdate database column
+ */
+class OpenGraphURL extends SimpleORMap
+{
+ const EXPIRES_DURATION = 86400; // = 24 * 60 * 60
+
+ /**
+ * Configures this model.
+ *
+ * @param Array $config Configuration array
+ */
+ protected static function configure($config = [])
+ {
+ $config['db_table'] = 'opengraphdata';
+
+ $config['serialized_fields']['data'] = JSONArrayObject::class;
+
+ parent::configure($config);
+ }
+
+ /**
+ * Create an instance of this model given url. Differs from findOneByURL
+ * insofar that it will return a new object with the given url set
+ * instead of null.
+ *
+ * @param String $url URL to find
+ * @return OpenGraphURL Either existing instance or a new instance for
+ * the given url
+ */
+ public static function fromURL($url)
+ {
+ $og = self::findOneByUrl($url);
+ if (!$og) {
+ $og = new self();
+ $og->url = $url;
+ }
+ return $og;
+ }
+
+ /**
+ * Specialized findOneByURL function that uses the hash to find the
+ * appropriate record instead.
+ *
+ * @param string $url URL to find record for
+ * @return mixed instance of OpenGraphURL if available, null otherwise
+ */
+ public static function findOneByURL($url)
+ {
+ return self::findOneByHash(md5($url));
+ }
+
+ /**
+ * Constructor of the object. Provides a fallback if a url is passed
+ * instead of the usually expected numeric id in order to not break
+ * backward compatibility.
+ * But this constructor will fail miserably if a url is passed that
+ * is not in the database. This was chosen by design to encourage the
+ * correct use of an id.
+ *
+ * @param mixed $id Numeric id, existing url or null
+ */
+ public function __construct($id = null)
+ {
+ // Try to find matching id when an url is passed instead of an id.
+ // This is to ensure that no legacy code will immediately break.
+ if ($id !== null && !ctype_digit($id)) {
+ $temp = self::findOneByUrl($id);
+ if ($temp) {
+ $id = $temp->id;
+ }
+ }
+
+ parent::__construct($id);
+ }
+
+ /**
+ * Sets value of a column. Overwritten so that the hash is also set when
+ * the url is set.
+ *
+ * @param string $field
+ * @param string $value
+ * @return string
+ * @see SimpleORMap::setValue
+ */
+ public function setValue($field, $value)
+ {
+ $ret = parent::setValue($field, $value);
+
+ if ($field === 'url') {
+ $this->content['hash'] = md5($value);
+ }
+
+ return $ret;
+ }
+
+ /**
+ * Stores the object and fetches the opengraph information when either
+ * the object is new or outdated.
+ *
+ * @return int Number of updated records
+ */
+ public function store()
+ {
+ if ($this->isNew() || $this->last_update < time() - self::EXPIRES_DURATION) {
+ // Store last update timestamp BEFORE fetching so another thread
+ // will not fetch again
+ $this->last_update = time();
+ parent::store();
+
+ $this->fetch();
+ }
+
+ return parent::store();
+ }
+
+ /**
+ * Fetches information from the url by getting the contents of the
+ * webpage, parse the webpage and extract the information from the
+ * opengraph meta-tags.
+ * If the site doesn't have any opengraph-metatags it is in fact no
+ * opengraph node and thus no data will be stored in the database.
+ * Only $url['is_opengraph'] === '0' indicates that the site is no
+ * opengraph node at all.
+ *
+ * @todo The combination of FileManager::fetchURLMetadata() and the following request
+ * leads to two requests for the open graph data. This should
+ * be fixed due to performance reasons.
+ */
+ public function fetch()
+ {
+ if (!Config::get()->OPENGRAPH_ENABLE) {
+ return;
+ }
+
+ $isOpenGraph = false;
+
+ $response = FileManager::fetchURLMetadata($this['url']);
+ if ((int)$response['response_code'] === 200 && isset($response['Content-Type']) && mb_strpos($response['Content-Type'],'html') !== false) {
+ if (preg_match('/(?<=charset=)[^;]*/i', $response['Content-Type'], $match)) {
+ $currentEncoding = trim($match[0], '"');
+ } else {
+ $currentEncoding = 'UTF-8';
+ }
+
+ $context = get_default_http_stream_context($this['url']);
+ stream_context_set_option($context, [
+ 'http' => [
+ 'method' => 'GET',
+ 'header' => sprintf("User-Agent: Stud.IP v%s OpenGraph Parser\r\n", $GLOBALS['SOFTWARE_VERSION']),
+ ],
+ ]);
+
+ $content = @file_get_contents($this['url'], false, $context);
+
+ if ($content === false) {
+ return;
+ }
+
+ $content = mb_encode_numericentity($content, [0x80, 0xffff, 0, 0xffff], $currentEncoding);
+ $old_libxml_error = libxml_use_internal_errors(true);
+ $doc = new DOMDocument();
+ $doc->loadHTML($content);
+ libxml_use_internal_errors($old_libxml_error);
+
+ $metatags = $doc->getElementsByTagName('meta');
+ $reservedTags = ['url', 'chdate', 'mkdate', 'last_update', 'is_opengraph', 'data'];
+ $ogTags = [];
+ $data = [];
+ foreach ($metatags as $tag) {
+ $key = false;
+ if ($tag->hasAttribute('property')
+ && mb_strpos($tag->getAttribute('property'), 'og:') === 0)
+ {
+ $key = mb_strtolower(mb_substr($tag->getAttribute('property'), 3));
+ }
+ if (!$key && $tag->hasAttribute('name')
+ && mb_strpos($tag->getAttribute('name'), 'og:') === 0)
+ {
+ $key = mb_strtolower(mb_substr($tag->getAttribute('name'), 3));
+ }
+ if ($key) {
+ $content = $tag->getAttribute('content');
+ $data[] = ['og:'.$key => $content];
+ $ogTags[$key] = $content;
+ $isOpenGraph = true;
+ }
+ }
+ foreach ($ogTags as $key => $tag) {
+ if ($this->isField($key) && !in_array($key, $reservedTags)) {
+ $this[$key] = $tag;
+ }
+ }
+ if (empty($this['title']) && $isOpenGraph) {
+ $titles = $doc->getElementsByTagName('title');
+ if ($titles->length > 0) {
+ $this['title'] = $titles->item(0)->textContent;
+ }
+ }
+ if (empty($this['description']) && $isOpenGraph) {
+ foreach ($metatags as $tag) {
+ if (mb_stripos($tag->getAttribute('name'), "description") !== false
+ || mb_stripos($tag->getAttribute('property'), "description") !== false)
+ {
+ $this['description'] = $tag->getAttribute('content');
+ }
+ }
+ }
+ $this['data'] = $data;
+ }
+
+ $this['is_opengraph'] = (int) $isOpenGraph;
+ }
+
+ /**
+ * Renders a small box with the information of the opengraph url. Used in
+ * blubber and in the forum.
+ *
+ * @return string html output of the box.
+ */
+ public function render()
+ {
+ if (!Config::get()->OPENGRAPH_ENABLE || !$this->getValue('is_opengraph')) {
+ return '';
+ }
+ $template = $GLOBALS['template_factory']->open('shared/opengraphinfo_wide.php');
+ $template->og = $this;
+ return $template->render();
+ }
+
+ /**
+ * Returns an array with all audiofiles that are provided by the opengraph-node.
+ * Each array-entry is an array itself with the url as first parameter and the
+ * content-type (important for <audio/> tags) as the second.
+ *
+ * @return array(array($url, $content_type), ...)
+ */
+ public function getAudioFiles()
+ {
+ return $this->getMediaFiles('audio');
+ }
+
+ /**
+ * Returns an array with all videofiles that are provided by the opengraph-node.
+ * Each array-entry is an array itself with the url as first parameter and the
+ * content-type (important for <video/> tags) as the second.
+ *
+ * @return array(array($url, $content_type), ...)
+ */
+ public function getVideoFiles()
+ {
+ return $this->getMediaFiles('video');
+ }
+
+ /**
+ * Returns an array with all mediafiles that are provided by the opengraph-node.
+ * Each array-entry is an array itself with the url as first parameter and the
+ * content-type (important for <audio/> or <video/> tags) as the second.
+ *
+ * @param string $type "audio" or "video"
+ * @return array(array($url, $content_type), ...)
+ */
+ protected function getMediaFiles($type)
+ {
+ $files = [];
+ $media = [];
+ $secure_media = [];
+ $media_types = [];
+ foreach ($this['data'] as $meta) {
+ foreach ($meta as $key => $value) {
+ switch ($key) {
+ case "og:{$type}:url":
+ case "og:{$type}":
+ $media[] = $value;
+ break;
+ case "og:{$type}:secure_url":
+ $secure_media[] = $value;
+ break;
+ case "og:{$type}:type":
+ $media_types[] = $value;
+ break;
+ }
+ }
+ }
+ if (
+ isset($_SERVER['HTTPS'])
+ && $_SERVER['HTTPS'] === 'on'
+ && count($secure_media) > 0
+ ) {
+ foreach ($secure_media as $index => $url) {
+ $files[] = [$url, $media_types[$index]];
+ }
+ } else {
+ foreach ($media as $index => $url) {
+ $files[] = [$url, $media_types[$index]];
+ }
+ }
+ return $files;
+ }
+}