1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
|
<?php
namespace FilesSearch;
use DBManager;
use Log;
use PDOException;
/**
* The FilesIndexManager is responsible for creating the fulltext
* index of all or just single files.
*
* @license GPL2 or any later version
*
* @since Stud.IP 4.1
*/
class FilesIndexManager
{
// max number of seconds creating the index is allowed to
const TIME_LIMIT = 3600;
// rating of a file's name in the index
const RATING_FILE_REF_NAME = 0.9;
// rating of a file's description in the index
const RATING_FILE_REF_DESCRIPTION = 0.4;
// rating of a file's author's name in the index
const RATING_FILE_AUTHOR = 0.5;
// rating of a course name in the index
const RATING_RANGE_COURSE_NAME = 0.1;
// rating of a institute name in the index
const RATING_RANGE_INSTITUTE_NAME = 0.1;
private static $log;
private static $verbose = false;
/**
* (Re-)Create the fulltext index of all files or a single file.
*
* @param FileRef $fileRef optional; the file ref to (re-)index or
* otherwise all files will be indexed
* @param array $options optional; an array of options, currently
* only `verbose` is used - resulting in a
* verbose description of what the
* FilesIndexManager is doing
*
* @return int the number of seconds the indexing took
*/
public static function sqlIndex(\FileRef $fileRef = null, array $options = [])
{
if (isset($options['verbose'])) {
self::$verbose = $options['verbose'];
}
set_time_limit(self::TIME_LIMIT);
$dbm = DBManager::get();
$time = time();
self::log('### Indexing started');
try {
// Purge DB
$dbm->query('DROP TABLE IF EXISTS
files_search_index_temp,
files_search_index_old,
files_search_attributes_temp,
files_search_attributes_old');
self::log('Database purged');
// Create temporary tables
$dbm->query('CREATE TABLE files_search_index_temp LIKE files_search_index');
$dbm->query('CREATE TABLE files_search_attributes_temp LIKE files_search_attributes');
self::log('Temporary tables created');
if (isset($fileRef)) {
self::log(sprintf('Index file %s', $fileRef->id));
self::indexFile($fileRef);
} else {
self::log('Indexing files');
self::indexFiles();
}
self::log('Finished indexing');
// Swap tables
$dbm->query('RENAME TABLE
files_search_index TO files_search_index_old,
files_search_attributes TO files_search_attributes_old,
files_search_index_temp TO files_search_index,
files_search_attributes_temp TO files_search_attributes');
self::log('Tables swapped');
// Drop old index
$dbm->query('DROP TABLE files_search_index_old, files_search_attributes_old');
self::log('Old tables dropped');
$runtime = time() - $time;
self::log(sprintf('FINISHED! Runtime: %0d:%02d', floor($runtime / 60), $runtime % 60));
// Return runtime
return $runtime;
// In case of mysql error imediately abort
} catch (PDOException $e) {
self::log('MySQL Error occured!');
self::log($e->getMessage());
var_dump($e);
self::log('Aborting');
throw $e;
}
}
/**
* This method indexes a single file.
*
* @param FileRef $fileRef the file to index
*/
public static function indexFile(\FileRef $fileRef)
{
self::fillAttributes($fileRef);
self::fillIndex($fileRef);
}
/**
* This method indexes all files.
*/
public static function indexFiles()
{
self::fillAttributes();
self::fillIndex();
}
/**
* This method indexes the direct children of a folder.
*
* @param Folder $folder the folder to index
*/
public static function indexFolder(\Folder $folder)
{
$folder->file_refs->each(function (\FileRef $fileRef) {
self::indexFile($fileRef);
});
}
/**
* This method drops all indexes of direct children of a folder.
*
* @param Folder $folder the folder to drop indexes of
*/
public static function dropIndexForFolder(\Folder $folder)
{
$folder->file_refs->each(function (\FileRef $fileRef) {
self::dropIndexForFile($fileRef);
});
}
/**
* This method drops the index of a single file.
*
* @param FileRef the file whose index shall be dropped
*/
public static function dropIndexForFile(\FileRef $fileRef)
{
DBManager::get()->execute(
'DELETE FROM files_search_index WHERE file_ref_id = :filerefid',
[':filerefid' => $fileRef->id]
);
DBManager::get()->execute(
'DELETE FROM files_search_attributes WHERE id = :filerefid',
[':filerefid' => $fileRef->id]
);
}
// Helpers
/**
* This method creates an index by using an SQL statement, some
* params and an optional FileRef instance.
*
* The SQL statement is executed using the params. If the index
* shall be created for a single FileRef, you have to specify it.
*
* @param string $sql the SQL statement
* @param array $params the params to be used in the (prepared) SQL statement
* @param FileRef $fileRef optional; if the index should be
* created for this FileRef only
*/
private static function createIndex($sql, $params, \FileRef $fileRef = null)
{
$table = isset($fileRef) ? 'files_search_index' : 'files_search_index_temp';
$query = sprintf('INSERT INTO %s (file_ref_id, text, relevance) %s', $table, $sql);
DBManager::get()->execute($query, $params);
}
private static function relevance($base, $modifier)
{
// 31556926 is the number of seconds in one year
return "POW( $base , ((UNIX_TIMESTAMP() - CAST($modifier AS SIGNED)) / 31556926)) AS relevance";
}
/**
* Logs an indexing event in the index.log file.
*
* @param type $info
*/
private static function log($info)
{
if (!self::$verbose) {
return;
}
if (!self::$log) {
self::$log = self::createLogger();
}
self::$log->info(self::class.': '.$info);
}
/**
* @SuppressWarnings(PHPMD.Superglobals)
*/
private static function createLogger()
{
@unlink($GLOBALS['TMP_PATH'].'/files_index.log');
Log::set('filesindexlog', $GLOBALS['TMP_PATH'].'/files_index.log');
return Log::get('filesindexlog');
}
/**
* This method fills the attributes table.
*
* If you do not specify a single FileRef instance, all the
* fileRefs in the database will be used. Otherwise the attributes
* table is filled with the attributes of that FileRef instance.
*
* @param FileRef $fileRef optional; if the attributes should be
* filled for this FileRef only
*/
private static function fillAttributes(\FileRef $fileRef = null)
{
if (isset($fileRef)) {
$table = 'files_search_attributes';
$where['sql'] = 'WHERE file_refs.id = :filerefid';
$where['params'][':filerefid'] = $fileRef->id;
} else {
$table = 'files_search_attributes_temp';
$where = ['sql' => '', 'params' => []];
}
$query = sprintf('
INSERT INTO %s
(id, file_ref_user_id, file_ref_mkdate, file_ref_chdate,
folder_id, folder_range_id, folder_range_type, folder_type,
course_status, semester_start, semester_end)
SELECT
file_refs.id,
file_refs.user_id,
file_refs.mkdate,
file_refs.chdate,
folders.id as folder_id,
folders.range_id AS folder_range_id,
folders.range_type AS folder_range_type,
folders.folder_type,
seminare.status AS course_status,
MIN(semester_data.beginn) AS semester_start,
MIN(semester_data.ende) AS semester_end
FROM file_refs
JOIN folders ON (file_refs.folder_id = folders.id)
LEFT JOIN seminare ON (folders.range_type = \'course\' AND folders.range_id = seminare.Seminar_id)
LEFT JOIN semester_courses ON (seminare.Seminar_id = semester_courses.course_id)
LEFT JOIN semester_data ON (semester_courses.semester_id = semester_data.semester_id)
%s
GROUP BY file_refs.id
', $table, $where['sql']);
DBManager::get()->execute($query, $where['params']);
}
private static function fillIndex(\FileRef $fileRef = null)
{
if (isset($fileRef)) {
$whereCondition = 'WHERE file_refs.id = :filerefid';
$whereParams = [':filerefid' => $fileRef->id];
} else {
$whereCondition = '';
$whereParams = [];
}
// titel
self::createIndex(
sprintf(
'SELECT file_refs.id, file_refs.name, %s FROM file_refs %s',
self::relevance(
self::RATING_FILE_REF_NAME,
'file_refs.chdate'
),
$whereCondition
),
$whereParams,
$fileRef
);
// beschreibung
self::createIndex(
sprintf(
'SELECT file_refs.id, file_refs.description, %s FROM file_refs %s',
self::relevance(
self::RATING_FILE_REF_DESCRIPTION,
'file_refs.chdate'
),
$whereCondition
),
$whereParams,
$fileRef
);
// name des autors
self::createIndex(
sprintf(
'SELECT file_refs.id,
CONCAT(auth_user_md5.Nachname,\' \', auth_user_md5.Vorname, \' \',
auth_user_md5.username), %s
FROM file_refs
JOIN auth_user_md5 ON (auth_user_md5.user_id = file_refs.user_id) %s',
self::relevance(
self::RATING_FILE_AUTHOR,
'file_refs.chdate'
),
$whereCondition
),
$whereParams,
$fileRef
);
$withRangeType = function ($rangeType) use ($whereCondition) {
return sprintf(
' %s %s ',
empty($whereCondition) ? 'WHERE' : $whereCondition.' AND ',
sprintf('folders.range_type = \'%s\'', $rangeType)
);
};
// name der veranstaltung
self::createIndex(
sprintf(
'SELECT file_refs.id, seminare.Name, %s
FROM file_refs
JOIN folders ON (file_refs.folder_id = folders.id)
JOIN seminare ON (folders.range_id = seminare.Seminar_id)
%s',
self::relevance(
self::RATING_RANGE_COURSE_NAME,
'file_refs.chdate'
),
$withRangeType('course')
),
$whereParams,
$fileRef
);
// name der einrichtungen
self::createIndex(
sprintf(
'SELECT file_refs.id, Institute.Name, %s
FROM file_refs
JOIN folders ON (file_refs.folder_id = folders.id)
JOIN Institute ON (folders.range_id = Institute.Institut_id)
%s',
self::relevance(
self::RATING_RANGE_INSTITUTE_NAME,
'file_refs.chdate'
),
$withRangeType('institute')
),
$whereParams,
$fileRef
);
}
}
|