This repository has been archived by the owner on Jan 27, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 38
/
Copy pathbatchreindex.php
74 lines (53 loc) · 1.95 KB
/
batchreindex.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
<?php
include_once 'data.php';
include_once 'functions.php';
session_write_close();
// Introduction.
if (!empty($_GET['intro'])) {
?>
<div class="item-sticker ui-widget-content ui-corner-all" style="margin:auto;margin-top:100px;width:340px">
<div class="ui-dialog-titlebar ui-state-default ui-corner-top" style="border:0;text-align:center">
Batch PDF re-indexing
</div>
<div class="separator" style="margin:0"></div>
<div class="alternating_row ui-corner-bottom" style="padding:4px 12px;overflow:auto;">
<p style="text-align:justify">
This tool will re-extract text from all existing PDF files. Beware, the process
can take several hours, if the number of files is large.
</p>
</div>
<div class="separator" style="margin:0"></div>
<div class="alternating_row ui-corner-bottom" style="padding:4px 7px;overflow:auto;vertical-align: middle">
<button>Start</button>
</div>
</div>
<?php
die();
}
// Script.
$order = array("\r\n", "\n", "\r");
echo '<div style="padding:1em 2em">';
echo '<h3>Batch PDF re-indexing.</h3>';
echo '<h4>Error log:</h4>';
// Iterate all PDF files.
$glob = new GlobIterator(IL_PDF_PATH . DIRECTORY_SEPARATOR . '[0-9]' . DIRECTORY_SEPARATOR . '[0-9]' . DIRECTORY_SEPARATOR . '*.pdf');
foreach ($glob as $pdf) {
$answer = array();
$file_path = $pdf->getPathname();
$file_name = $pdf->getFilename();
$file_id = intval(basename($pdf->getFilename(), '.pdf'));
// Extract text from PDF.
if (is_readable($file_path)) {
$answer[] = recordFulltext($file_id, $file_name);
} else {
$answer[] = "File not found.";
}
$answer = array_filter($answer);
$answers = join('<br>' . PHP_EOL, $answer);
if (!empty($answers))
echo $file_path . '<br><b>' . $answers . '</b><br>';
set_time_limit(60);
}
echo '<br>All done.';
echo '</div>';
?>