-
Notifications
You must be signed in to change notification settings - Fork 11.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
indexer-alt: wal_obj_types pipeline (#20116)
## Description Adds the concurrent pipeline that writes the write-ahead log for `sum_obj_types`. It re-uses the `process` implementation from `sum_obj_types` and then writes it into an append-only table. Note that today: - The pipelines are completely independent, which means the WAL pipeline redoes the processing work of the summary pipeline (this is presumably not an issue because the repeated work is not particularly heavy). - This change does not include the pruning necessary to keep this table's size in check (in practice it should only be a couple of gigs in size). This will come in a follow-up PR. ## Test plan Run the indexer and cross check the live-object set calculated from the write-ahead log with the summary: ``` sui$ cargo run -p sui-indexer-alt --release -- \ --database-url "postgres://postgres:postgrespw@localhost:5432/sui_indexer_alt" \ indexer --remote-store-url https://checkpoints.mainnet.sui.io \ --last-checkpoint 5000 ``` ``` sui_indexer_alt=# SELECT COUNT(*) FROM sum_obj_types; count ------- 592 (1 row) ^ sui_indexer_alt=# SELECT COUNT(*) FROM ( SELECT DISTINCT ON (object_id) * FROM wal_obj_types ORDER BY object_id, object_version DESC ) o WHERE o.owner_kind IS NOT NULL; count ------- 592 (1 row) ``` ## Stack - #20089 - #20114 --- ## Release notes Check each box that your changes affect. If none of the boxes relate to your changes, release notes aren't required. For each box you select, include information after the relevant heading that describes the impact of your changes that a user might notice and any actions they must take to implement updates. - [ ] Protocol: - [ ] Nodes (Validators and Full nodes): - [ ] Indexer: - [ ] JSON-RPC: - [ ] GraphQL: - [ ] CLI: - [ ] Rust SDK: - [ ] REST API:
- Loading branch information
Showing
9 changed files
with
189 additions
and
3 deletions.
There are no files selected for viewing
1 change: 1 addition & 0 deletions
1
crates/sui-indexer-alt/migrations/2024-10-30-214852_wal_obj_types/down.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
DROP TABLE IF EXISTS wal_obj_types; |
76 changes: 76 additions & 0 deletions
76
crates/sui-indexer-alt/migrations/2024-10-30-214852_wal_obj_types/up.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
-- Write-ahead log for `sum_obj_types`. | ||
-- | ||
-- It contains the same columns and indices as `sum_obj_types`, but with the | ||
-- following changes: | ||
-- | ||
-- - A `cp_sequence_number` column (and an index on it), to support pruning by | ||
-- checkpoint. | ||
-- | ||
-- - The primary key includes the version, as the table may contain multiple | ||
-- versions per object ID. | ||
-- | ||
-- - The `owner_kind` column is nullable, because this table also tracks | ||
-- deleted and wrapped objects (where all the fields except the ID, version, | ||
-- and checkpoint are NULL). | ||
-- | ||
-- - There is an additional index on ID and version for querying the latest | ||
-- version of every object. | ||
-- | ||
-- This table is used in conjunction with `sum_obj_types` to support consistent | ||
-- live object set queries: `sum_obj_types` holds the state of the live object | ||
-- set at some checkpoint `C < T` where `T` is the tip of the chain, and | ||
-- `wal_obj_types` stores all the updates and deletes between `C` and `T`. | ||
-- | ||
-- To reconstruct the the live object set at some snapshot checkpoint `S` | ||
-- between `C` and `T`, a query can be constructed that starts with the set | ||
-- from `sum_obj_types` and adds updates in `wal_obj_types` from | ||
-- `cp_sequence_number <= S`. | ||
-- | ||
-- See `up.sql` for the original `sum_obj_types` table for documentation on | ||
-- columns. | ||
CREATE TABLE IF NOT EXISTS wal_obj_types | ||
( | ||
object_id BYTEA NOT NULL, | ||
object_version BIGINT NOT NULL, | ||
owner_kind SMALLINT, | ||
owner_id BYTEA, | ||
package BYTEA, | ||
module TEXT, | ||
name TEXT, | ||
instantiation BYTEA, | ||
cp_sequence_number BIGINT NOT NULL, | ||
PRIMARY KEY (object_id, object_version) | ||
); | ||
|
||
CREATE INDEX IF NOT EXISTS wal_obj_types_cp_sequence_number | ||
ON wal_obj_types (cp_sequence_number); | ||
|
||
CREATE INDEX IF NOT EXISTS wal_obj_types_version | ||
ON wal_obj_types (object_id, object_version); | ||
|
||
CREATE INDEX IF NOT EXISTS wal_obj_types_owner | ||
ON wal_obj_types (owner_kind, owner_id, object_id, object_version); | ||
|
||
CREATE INDEX IF NOT EXISTS wal_obj_types_pkg | ||
ON wal_obj_types (package, object_id, object_version); | ||
|
||
CREATE INDEX IF NOT EXISTS wal_obj_types_mod | ||
ON wal_obj_types (package, module, object_id, object_version); | ||
|
||
CREATE INDEX IF NOT EXISTS wal_obj_types_name | ||
ON wal_obj_types (package, module, name, object_id, object_version); | ||
|
||
CREATE INDEX IF NOT EXISTS wal_obj_types_inst | ||
ON wal_obj_types (package, module, name, instantiation, object_id, object_version); | ||
|
||
CREATE INDEX IF NOT EXISTS wal_obj_types_owner_pkg | ||
ON wal_obj_types (owner_kind, owner_id, package, object_id, object_version); | ||
|
||
CREATE INDEX IF NOT EXISTS wal_obj_types_owner_mod | ||
ON wal_obj_types (owner_kind, owner_id, package, module, object_id, object_version); | ||
|
||
CREATE INDEX IF NOT EXISTS wal_obj_types_owner_name | ||
ON wal_obj_types (owner_kind, owner_id, package, module, name, object_id, object_version); | ||
|
||
CREATE INDEX IF NOT EXISTS wal_obj_types_owner_inst | ||
ON wal_obj_types (owner_kind, owner_id, package, module, name, instantiation, object_id, object_version); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
// Copyright (c) Mysten Labs, Inc. | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
use std::sync::Arc; | ||
|
||
use anyhow::Result; | ||
use diesel_async::RunQueryDsl; | ||
use sui_types::full_checkpoint_content::CheckpointData; | ||
|
||
use crate::{ | ||
db, | ||
models::objects::{StoredObjectUpdate, StoredSumObjType, StoredWalObjType}, | ||
pipeline::{concurrent::Handler, Processor}, | ||
schema::wal_obj_types, | ||
}; | ||
|
||
use super::sum_obj_types::SumObjTypes; | ||
|
||
pub struct WalObjTypes; | ||
|
||
impl Processor for WalObjTypes { | ||
const NAME: &'static str = "wal_obj_types"; | ||
|
||
type Value = StoredObjectUpdate<StoredSumObjType>; | ||
|
||
fn process(checkpoint: &Arc<CheckpointData>) -> Result<Vec<Self::Value>> { | ||
SumObjTypes::process(checkpoint) | ||
} | ||
} | ||
|
||
#[async_trait::async_trait] | ||
impl Handler for WalObjTypes { | ||
const MIN_EAGER_ROWS: usize = 100; | ||
const MAX_CHUNK_ROWS: usize = 1000; | ||
const MAX_PENDING_ROWS: usize = 10000; | ||
|
||
async fn commit(values: &[Self::Value], conn: &mut db::Connection<'_>) -> Result<usize> { | ||
let values: Vec<_> = values | ||
.iter() | ||
.map(|value| StoredWalObjType { | ||
object_id: value.object_id.to_vec(), | ||
object_version: value.object_version as i64, | ||
|
||
owner_kind: value.update.as_ref().map(|o| o.owner_kind), | ||
owner_id: value.update.as_ref().and_then(|o| o.owner_id.clone()), | ||
|
||
package: value.update.as_ref().and_then(|o| o.package.clone()), | ||
module: value.update.as_ref().and_then(|o| o.module.clone()), | ||
name: value.update.as_ref().and_then(|o| o.name.clone()), | ||
instantiation: value.update.as_ref().and_then(|o| o.instantiation.clone()), | ||
|
||
cp_sequence_number: value.cp_sequence_number as i64, | ||
}) | ||
.collect(); | ||
|
||
Ok(diesel::insert_into(wal_obj_types::table) | ||
.values(&values) | ||
.on_conflict_do_nothing() | ||
.execute(conn) | ||
.await?) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters