Skip to content

Commit

Permalink
tec: Replace Link.url_lookup by url_hash
Browse files Browse the repository at this point in the history
This column is using a more performant index type (hash index) and is
generated with sha256, which should generate more efficient indexes.

btree index is used on the idx_links_user_id_url_hash index because hash
index cannot be used with multiple columns.
  • Loading branch information
marienfressinaud committed Jun 13, 2024
1 parent c45d8bd commit fd1fe71
Show file tree
Hide file tree
Showing 16 changed files with 99 additions and 91 deletions.
2 changes: 1 addition & 1 deletion src/controllers/Feeds.php
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ public function create(Request $request): Response

$default_link = models\Link::findBy([
'user_id' => $support_user->id,
'url_lookup' => utils\Belt::removeScheme($url),
'url_hash' => models\Link::hashUrl($url),
]);
if (!$default_link) {
$default_link = new models\Link($url, $support_user->id, false);
Expand Down
2 changes: 1 addition & 1 deletion src/controllers/Links.php
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ public function create(Request $request): Response

$existing_link = models\Link::findBy([
'user_id' => $user->id,
'url_lookup' => utils\Belt::removeScheme($link->url),
'url_hash' => models\Link::hashUrl($link->url),
]);
if ($existing_link) {
$link = $existing_link;
Expand Down
6 changes: 3 additions & 3 deletions src/controllers/collections/Links.php
Original file line number Diff line number Diff line change
Expand Up @@ -120,9 +120,9 @@ public function create(Request $request): Response

$existing_link = models\Link::findBy([
'user_id' => $user->id,
// Can't use $link->url_lookup since it's a calculated property,
// generated in database (and the link is not yet saved).
'url_lookup' => utils\Belt::removeScheme($link->url),
// Can't use $link->url_hash directly since it's a calculated
// property, generated in database (and the link is not yet saved).
'url_hash' => models\Link::hashUrl($link->url),
]);

if ($existing_link) {
Expand Down
4 changes: 2 additions & 2 deletions src/controllers/links/Collections.php
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ public function index(Request $request): Response
} else {
$existing_link = models\Link::findBy([
'user_id' => $user->id,
'url_lookup' => utils\Belt::removeScheme($link->url),
'url_hash' => models\Link::hashUrl($link->url),
]);
if ($existing_link) {
$link = $existing_link;
Expand All @@ -77,7 +77,7 @@ public function index(Request $request): Response
$shared_collections = utils\Sorter::localeSort($shared_collections, 'name');
$collections_by_others = models\Collection::listWritableContainingNotOwnedLinkWithUrl(
$user->id,
$link->url_lookup,
$link->url_hash,
);
$collections_by_others = utils\Sorter::localeSort($collections_by_others, 'name');

Expand Down
6 changes: 3 additions & 3 deletions src/controllers/links/Searches.php
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,11 @@ public function show(Request $request): Response

$existing_link = models\Link::findComputedBy([
'user_id' => $user->id,
'url_lookup' => utils\Belt::removeScheme($url),
'url_hash' => models\Link::hashUrl($url),
], ['number_comments']);
$default_link = models\Link::findBy([
'user_id' => $support_user->id,
'url_lookup' => utils\Belt::removeScheme($url),
'url_hash' => models\Link::hashUrl($url),
'is_hidden' => 0,
]);

Expand Down Expand Up @@ -109,7 +109,7 @@ public function create(Request $request): Response

$default_link = models\Link::findBy([
'user_id' => $support_user->id,
'url_lookup' => utils\Belt::removeScheme($url),
'url_hash' => models\Link::hashUrl($url),
]);
if (!$default_link) {
$default_link = new models\Link($url, $support_user->id, false);
Expand Down
61 changes: 61 additions & 0 deletions src/migrations/Migration202406130002AddLinksUrlHash.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
<?php

namespace App\migrations;

class Migration202406130002AddLinksUrlHash
{
public function migrate(): bool
{
$database = \Minz\Database::get();

$database->exec(<<<'SQL'
CREATE EXTENSION IF NOT EXISTS pgcrypto;
ALTER TABLE links
ADD COLUMN url_hash TEXT GENERATED ALWAYS AS (encode(digest(url, 'sha256'), 'hex')) STORED;
DROP INDEX idx_links_user_id_url;
DROP INDEX idx_links_url;
ALTER TABLE links
DROP COLUMN url_lookup;
CREATE INDEX idx_links_user_id_url_hash ON links USING btree(user_id, url_hash);
CREATE INDEX idx_links_url_hash ON links USING hash(url_hash);
DROP FUNCTION simplify_url;
SQL);

return true;
}

public function rollback(): bool
{
$database = \Minz\Database::get();

$database->exec(<<<'SQL'
CREATE FUNCTION simplify_url(url TEXT)
RETURNS TEXT
IMMUTABLE
RETURNS NULL ON NULL INPUT
AS $$ SELECT substring(url from 'https?://(.+)') $$
LANGUAGE SQL;
ALTER TABLE links
ADD COLUMN url_lookup TEXT GENERATED ALWAYS AS (simplify_url(url)) STORED;
DROP INDEX idx_links_user_id_url_hash;
DROP INDEX idx_links_url_hash;
ALTER TABLE links
DROP COLUMN url_hash;
CREATE INDEX idx_links_user_id_url ON links(user_id, url_lookup);
CREATE INDEX idx_links_url ON links(url_lookup);
DROP EXTENSION IF EXISTS pgcrypto;
SQL);

return true;
}
}
4 changes: 2 additions & 2 deletions src/models/Collection.php
Original file line number Diff line number Diff line change
Expand Up @@ -232,12 +232,12 @@ public function links(array $selected_computed_props = [], array $options = []):
* Return a link from this collection with the given URL and not owned by
* the given user.
*/
public function linkNotOwnedByUrl(string $user_id, string $url_lookup): ?Link
public function linkNotOwnedByUrl(string $user_id, string $url_hash): ?Link
{
return Link::findNotOwnedByCollectionIdAndUrl(
$user_id,
$this->id,
$url_lookup,
$url_hash,
);
}

Expand Down
7 changes: 6 additions & 1 deletion src/models/Link.php
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ class Link
public string $search_index;

#[Database\Column(computed: true)]
public string $url_lookup;
public string $url_hash;

public function __construct(string $url, string $user_id, bool $is_hidden)
{
Expand Down Expand Up @@ -291,4 +291,9 @@ public function tagUri(): string
$date = $this->created_at->format('Y-m-d');
return "tag:{$host},{$date}:links/{$this->id}";
}

public static function hashUrl(string $url): string
{
return hash('sha256', $url);
}
}
4 changes: 2 additions & 2 deletions src/models/User.php
Original file line number Diff line number Diff line change
Expand Up @@ -405,10 +405,10 @@ public function obtainLinks(array $links): array
// Complete the owned_links list with links owned by the user, from the
// database.
$urls = array_column($not_owned_links, 'url');
$urls_lookup = array_map(['\App\utils\Belt', 'removeScheme'], $urls);
$urls_hashes = array_map(['\App\models\Link', 'hashUrl'], $urls);
$related_links = Link::listBy([
'user_id' => $this->id,
'url_lookup' => $urls_lookup,
'url_hash' => $urls_hashes,
]);
$owned_links = array_merge($owned_links, $related_links);

Expand Down
6 changes: 3 additions & 3 deletions src/models/dao/Collection.php
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,7 @@ public static function existsForUserIdAndLinkId(string $user_id, string $link_id
*
* @return self[]
*/
public static function listWritableContainingNotOwnedLinkWithUrl(string $user_id, string $url_lookup): array
public static function listWritableContainingNotOwnedLinkWithUrl(string $user_id, string $url_hash): array
{
$sql = <<<'SQL'
SELECT c.*
Expand All @@ -428,7 +428,7 @@ public static function listWritableContainingNotOwnedLinkWithUrl(string $user_id
AND lc.link_id = l.id
AND l.user_id != :user_id
AND l.url_lookup = :url_lookup
AND l.url_hash = :url_hash
AND (
c.user_id = :user_id OR EXISTS (
Expand All @@ -445,7 +445,7 @@ public static function listWritableContainingNotOwnedLinkWithUrl(string $user_id
$statement = $database->prepare($sql);
$statement->execute([
':user_id' => $user_id,
':url_lookup' => $url_lookup,
':url_hash' => $url_hash,
]);

return self::fromDatabaseRows($statement->fetchAll());
Expand Down
15 changes: 8 additions & 7 deletions src/models/dao/Link.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
namespace App\models\dao;

use Minz\Database;
use App\models;

/**
* Represent a link in database.
Expand Down Expand Up @@ -375,7 +376,7 @@ public static function isUrlInBookmarksOfUserId(string $user_id, string $url): b
FROM links l, collections c, links_to_collections lc
WHERE l.user_id = :user_id
AND l.url_lookup = simplify_url(:url)
AND l.url_hash = :url_hash
AND c.type = 'bookmarks'
Expand All @@ -387,7 +388,7 @@ public static function isUrlInBookmarksOfUserId(string $user_id, string $url): b
$statement = $database->prepare($sql);
$statement->execute([
':user_id' => $user_id,
':url' => $url,
':url_hash' => models\Link::hashUrl($url),
]);

return (bool) $statement->fetchColumn();
Expand All @@ -403,7 +404,7 @@ public static function isUrlReadByUserId(string $user_id, string $url): bool
FROM links l, collections c, links_to_collections lc
WHERE l.user_id = :user_id
AND l.url_lookup = simplify_url(:url)
AND l.url_hash = :url_hash
AND c.type = 'read'
Expand All @@ -415,7 +416,7 @@ public static function isUrlReadByUserId(string $user_id, string $url): bool
$statement = $database->prepare($sql);
$statement->execute([
':user_id' => $user_id,
':url' => $url,
':url_hash' => models\Link::hashUrl($url),
]);

return (bool) $statement->fetchColumn();
Expand All @@ -427,7 +428,7 @@ public static function isUrlReadByUserId(string $user_id, string $url): bool
public static function findNotOwnedByCollectionIdAndUrl(
string $user_id,
string $collection_id,
string $url_lookup,
string $url_hash,
): ?self {
$sql = <<<SQL
SELECT l.*
Expand All @@ -436,7 +437,7 @@ public static function findNotOwnedByCollectionIdAndUrl(
WHERE lc.link_id = l.id
AND lc.collection_id = :collection_id
AND l.url_lookup = :url_lookup
AND l.url_hash = :url_hash
AND l.user_id != :user_id
SQL;

Expand All @@ -445,7 +446,7 @@ public static function findNotOwnedByCollectionIdAndUrl(
$statement->execute([
':user_id' => $user_id,
':collection_id' => $collection_id,
':url_lookup' => $url_lookup,
':url_hash' => $url_hash,
]);

$result = $statement->fetch();
Expand Down
4 changes: 2 additions & 2 deletions src/models/dao/links/NewsQueries.php
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ public static function listFromFollowedCollectionsForNews(string $user_id): arra

$sql = <<<SQL
WITH excluded_links AS (
SELECT l_exclude.id, l_exclude.url_lookup
SELECT l_exclude.id, l_exclude.url_hash
FROM links l_exclude, collections c_exclude, links_to_collections lc_exclude
WHERE c_exclude.user_id = :user_id
Expand All @@ -57,7 +57,7 @@ public static function listFromFollowedCollectionsForNews(string $user_id): arra
FROM collections c, links_to_collections lc, followed_collections fc, links l
LEFT JOIN excluded_links
ON excluded_links.url_lookup = l.url_lookup
ON excluded_links.url_hash = l.url_hash
WHERE fc.user_id = :user_id
AND fc.collection_id = lc.collection_id
Expand Down
13 changes: 4 additions & 9 deletions src/schema.sql
Original file line number Diff line number Diff line change
@@ -1,9 +1,4 @@
CREATE FUNCTION simplify_url(url TEXT)
RETURNS TEXT
IMMUTABLE
RETURNS NULL ON NULL INPUT
AS $$ SELECT substring(url from 'https?://(.+)') $$
LANGUAGE SQL;
CREATE EXTENSION IF NOT EXISTS pgcrypto;

CREATE TABLE jobs (
id SERIAL PRIMARY KEY,
Expand Down Expand Up @@ -164,13 +159,13 @@ CREATE TABLE links (
group_by_source BOOLEAN NOT NULL DEFAULT false,

search_index TSVECTOR GENERATED ALWAYS AS (to_tsvector('french', title || ' ' || url)) STORED,
url_lookup TEXT GENERATED ALWAYS AS (simplify_url(url)) STORED,
url_hash TEXT GENERATED ALWAYS AS (encode(digest(url, 'sha256'), 'hex')) STORED,

user_id TEXT REFERENCES users ON DELETE CASCADE ON UPDATE CASCADE
);

CREATE INDEX idx_links_user_id_url ON links(user_id, url_lookup);
CREATE INDEX idx_links_url ON links(url_lookup);
CREATE INDEX idx_links_user_id_url_hash ON links USING btree(user_id, url_hash);
CREATE INDEX idx_links_url_hash ON links USING hash(url_hash);
CREATE INDEX idx_links_fetched_at ON links(fetched_at) WHERE fetched_at IS NULL;
CREATE INDEX idx_links_fetched_code ON links(fetched_code) WHERE fetched_code < 200 OR fetched_code >= 300;
CREATE INDEX idx_links_image_filename ON links(image_filename) WHERE image_filename IS NOT NULL;
Expand Down
18 changes: 0 additions & 18 deletions src/utils/Belt.php
Original file line number Diff line number Diff line change
Expand Up @@ -71,24 +71,6 @@ public static function host(string $url): string
}
}

/**
* Remove the scheme (http:// or https://) from a URL.
*/
public static function removeScheme(string $url): string
{
if (!$url) {
return '';
}

$url_no_scheme = preg_replace('#^https?://(.*)#', '\1', $url);

if ($url_no_scheme === null) {
return $url;
}

return $url_no_scheme;
}

/**
* Return a subpath from a file name (used for media files).
*
Expand Down
2 changes: 1 addition & 1 deletion src/views/collections/_collections_by_others.phtml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<ul class="list--collections-others list list--no-style">
<?php foreach ($collections as $collection): ?>
<?php $other_link = $collection->linkNotOwnedByUrl($current_user->id, $link->url_lookup); ?>
<?php $other_link = $collection->linkNotOwnedByUrl($current_user->id, $link->url_hash); ?>
<?php $owner = $other_link->owner(); ?>
<li class="list__item" data-link-id="<?= $other_link->id ?>">
<img class="avatar" src="<?= url_avatar($owner->avatar_filename) ?>" alt="" />
Expand Down
Loading

0 comments on commit fd1fe71

Please sign in to comment.