Skip to content

Commit

Permalink
indexer-alt: wal_obj_types pipeline (#20116)
Browse files Browse the repository at this point in the history
## Description

Adds the concurrent pipeline that writes the write-ahead log for
`sum_obj_types`. It re-uses the `process` implementation from
`sum_obj_types` and then writes it into an append-only table.

Note that today:

- The pipelines are completely independent, which means the WAL pipeline
redoes the processing work of the summary pipeline (this is presumably
not an issue because the repeated work is not particularly heavy).
- This change does not include the pruning necessary to keep this
table's size in check (in practice it should only be a couple of gigs in
size). This will come in a follow-up PR.

## Test plan

Run the indexer and cross check the live-object set calculated from the
write-ahead log with the summary:

```
sui$ cargo run -p sui-indexer-alt --release --                                   \
  --database-url "postgres://postgres:postgrespw@localhost:5432/sui_indexer_alt" \
  indexer --remote-store-url https://checkpoints.mainnet.sui.io                  \
  --last-checkpoint 5000
```

```
sui_indexer_alt=# SELECT COUNT(*) FROM sum_obj_types;
 count
-------
   592
(1 row)

                                                  ^
sui_indexer_alt=# SELECT
    COUNT(*)
FROM (
    SELECT DISTINCT ON (object_id)
        *
    FROM
        wal_obj_types
    ORDER BY
        object_id,
        object_version DESC
) o
WHERE
    o.owner_kind IS NOT NULL;
 count
-------
   592
(1 row)
```

## Stack

- #20089 
- #20114 

---

## Release notes

Check each box that your changes affect. If none of the boxes relate to
your changes, release notes aren't required.

For each box you select, include information after the relevant heading
that describes the impact of your changes that a user might notice and
any actions they must take to implement updates.

- [ ] Protocol: 
- [ ] Nodes (Validators and Full nodes): 
- [ ] Indexer: 
- [ ] JSON-RPC: 
- [ ] GraphQL: 
- [ ] CLI: 
- [ ] Rust SDK:
- [ ] REST API:
  • Loading branch information
amnn committed Nov 1, 2024
1 parent 7de0241 commit 71f1faa
Show file tree
Hide file tree
Showing 9 changed files with 189 additions and 3 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
DROP TABLE IF EXISTS wal_obj_types;
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
-- Write-ahead log for `sum_obj_types`.
--
-- It contains the same columns and indices as `sum_obj_types`, but with the
-- following changes:
--
-- - A `cp_sequence_number` column (and an index on it), to support pruning by
-- checkpoint.
--
-- - The primary key includes the version, as the table may contain multiple
-- versions per object ID.
--
-- - The `owner_kind` column is nullable, because this table also tracks
-- deleted and wrapped objects (where all the fields except the ID, version,
-- and checkpoint are NULL).
--
-- - There is an additional index on ID and version for querying the latest
-- version of every object.
--
-- This table is used in conjunction with `sum_obj_types` to support consistent
-- live object set queries: `sum_obj_types` holds the state of the live object
-- set at some checkpoint `C < T` where `T` is the tip of the chain, and
-- `wal_obj_types` stores all the updates and deletes between `C` and `T`.
--
-- To reconstruct the the live object set at some snapshot checkpoint `S`
-- between `C` and `T`, a query can be constructed that starts with the set
-- from `sum_obj_types` and adds updates in `wal_obj_types` from
-- `cp_sequence_number <= S`.
--
-- See `up.sql` for the original `sum_obj_types` table for documentation on
-- columns.
CREATE TABLE IF NOT EXISTS wal_obj_types
(
object_id BYTEA NOT NULL,
object_version BIGINT NOT NULL,
owner_kind SMALLINT,
owner_id BYTEA,
package BYTEA,
module TEXT,
name TEXT,
instantiation BYTEA,
cp_sequence_number BIGINT NOT NULL,
PRIMARY KEY (object_id, object_version)
);

CREATE INDEX IF NOT EXISTS wal_obj_types_cp_sequence_number
ON wal_obj_types (cp_sequence_number);

CREATE INDEX IF NOT EXISTS wal_obj_types_version
ON wal_obj_types (object_id, object_version);

CREATE INDEX IF NOT EXISTS wal_obj_types_owner
ON wal_obj_types (owner_kind, owner_id, object_id, object_version);

CREATE INDEX IF NOT EXISTS wal_obj_types_pkg
ON wal_obj_types (package, object_id, object_version);

CREATE INDEX IF NOT EXISTS wal_obj_types_mod
ON wal_obj_types (package, module, object_id, object_version);

CREATE INDEX IF NOT EXISTS wal_obj_types_name
ON wal_obj_types (package, module, name, object_id, object_version);

CREATE INDEX IF NOT EXISTS wal_obj_types_inst
ON wal_obj_types (package, module, name, instantiation, object_id, object_version);

CREATE INDEX IF NOT EXISTS wal_obj_types_owner_pkg
ON wal_obj_types (owner_kind, owner_id, package, object_id, object_version);

CREATE INDEX IF NOT EXISTS wal_obj_types_owner_mod
ON wal_obj_types (owner_kind, owner_id, package, module, object_id, object_version);

CREATE INDEX IF NOT EXISTS wal_obj_types_owner_name
ON wal_obj_types (owner_kind, owner_id, package, module, name, object_id, object_version);

CREATE INDEX IF NOT EXISTS wal_obj_types_owner_inst
ON wal_obj_types (owner_kind, owner_id, package, module, name, instantiation, object_id, object_version);
1 change: 1 addition & 0 deletions crates/sui-indexer-alt/src/handlers/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ pub mod sum_coin_balances;
pub mod sum_obj_types;
pub mod tx_affected_objects;
pub mod tx_balance_changes;
pub mod wal_obj_types;
9 changes: 8 additions & 1 deletion crates/sui-indexer-alt/src/handlers/sum_coin_balances.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,13 @@ impl Processor for SumCoinBalances {
type Value = StoredObjectUpdate<StoredSumCoinBalance>;

fn process(checkpoint: &Arc<CheckpointData>) -> anyhow::Result<Vec<Self::Value>> {
let CheckpointData { transactions, .. } = checkpoint.as_ref();
let CheckpointData {
transactions,
checkpoint_summary,
..
} = checkpoint.as_ref();

let cp_sequence_number = checkpoint_summary.sequence_number;
let mut values: BTreeMap<ObjectID, Self::Value> = BTreeMap::new();
let mut coin_types: BTreeMap<ObjectID, Vec<u8>> = BTreeMap::new();

Expand Down Expand Up @@ -78,6 +83,7 @@ impl Processor for SumCoinBalances {
entry.insert(StoredObjectUpdate {
object_id,
object_version,
cp_sequence_number,
update: None,
});
}
Expand Down Expand Up @@ -111,6 +117,7 @@ impl Processor for SumCoinBalances {
entry.insert(StoredObjectUpdate {
object_id,
object_version,
cp_sequence_number,
update: Some(StoredSumCoinBalance {
object_id: object_id.to_vec(),
object_version: object_version as i64,
Expand Down
9 changes: 8 additions & 1 deletion crates/sui-indexer-alt/src/handlers/sum_obj_types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,13 @@ impl Processor for SumObjTypes {
type Value = StoredObjectUpdate<StoredSumObjType>;

fn process(checkpoint: &Arc<CheckpointData>) -> anyhow::Result<Vec<Self::Value>> {
let CheckpointData { transactions, .. } = checkpoint.as_ref();
let CheckpointData {
transactions,
checkpoint_summary,
..
} = checkpoint.as_ref();

let cp_sequence_number = checkpoint_summary.sequence_number;
let mut values: BTreeMap<ObjectID, Self::Value> = BTreeMap::new();

// Iterate over transactions in reverse so we see the latest version of each object first.
Expand All @@ -63,6 +68,7 @@ impl Processor for SumObjTypes {
entry.insert(StoredObjectUpdate {
object_id,
object_version,
cp_sequence_number,
update: None,
});
}
Expand All @@ -83,6 +89,7 @@ impl Processor for SumObjTypes {
entry.insert(StoredObjectUpdate {
object_id,
object_version,
cp_sequence_number,
update: Some(StoredSumObjType {
object_id: object_id.to_vec(),
object_version: object_version as i64,
Expand Down
62 changes: 62 additions & 0 deletions crates/sui-indexer-alt/src/handlers/wal_obj_types.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Copyright (c) Mysten Labs, Inc.
// SPDX-License-Identifier: Apache-2.0

use std::sync::Arc;

use anyhow::Result;
use diesel_async::RunQueryDsl;
use sui_types::full_checkpoint_content::CheckpointData;

use crate::{
db,
models::objects::{StoredObjectUpdate, StoredSumObjType, StoredWalObjType},
pipeline::{concurrent::Handler, Processor},
schema::wal_obj_types,
};

use super::sum_obj_types::SumObjTypes;

pub struct WalObjTypes;

impl Processor for WalObjTypes {
const NAME: &'static str = "wal_obj_types";

type Value = StoredObjectUpdate<StoredSumObjType>;

fn process(checkpoint: &Arc<CheckpointData>) -> Result<Vec<Self::Value>> {
SumObjTypes::process(checkpoint)
}
}

#[async_trait::async_trait]
impl Handler for WalObjTypes {
const MIN_EAGER_ROWS: usize = 100;
const MAX_CHUNK_ROWS: usize = 1000;
const MAX_PENDING_ROWS: usize = 10000;

async fn commit(values: &[Self::Value], conn: &mut db::Connection<'_>) -> Result<usize> {
let values: Vec<_> = values
.iter()
.map(|value| StoredWalObjType {
object_id: value.object_id.to_vec(),
object_version: value.object_version as i64,

owner_kind: value.update.as_ref().map(|o| o.owner_kind),
owner_id: value.update.as_ref().and_then(|o| o.owner_id.clone()),

package: value.update.as_ref().and_then(|o| o.package.clone()),
module: value.update.as_ref().and_then(|o| o.module.clone()),
name: value.update.as_ref().and_then(|o| o.name.clone()),
instantiation: value.update.as_ref().and_then(|o| o.instantiation.clone()),

cp_sequence_number: value.cp_sequence_number as i64,
})
.collect();

Ok(diesel::insert_into(wal_obj_types::table)
.values(&values)
.on_conflict_do_nothing()
.execute(conn)
.await?)
}
}
2 changes: 2 additions & 0 deletions crates/sui-indexer-alt/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use sui_indexer_alt::{
kv_objects::KvObjects, kv_transactions::KvTransactions, obj_versions::ObjVersions,
sum_coin_balances::SumCoinBalances, sum_obj_types::SumObjTypes,
tx_affected_objects::TxAffectedObjects, tx_balance_changes::TxBalanceChanges,
wal_obj_types::WalObjTypes,
},
Indexer,
};
Expand Down Expand Up @@ -43,6 +44,7 @@ async fn main() -> Result<()> {
indexer.concurrent_pipeline::<ObjVersions>().await?;
indexer.concurrent_pipeline::<TxAffectedObjects>().await?;
indexer.concurrent_pipeline::<TxBalanceChanges>().await?;
indexer.concurrent_pipeline::<WalObjTypes>().await?;
indexer.sequential_pipeline::<SumCoinBalances>(lag).await?;
indexer.sequential_pipeline::<SumObjTypes>(lag).await?;

Expand Down
17 changes: 16 additions & 1 deletion crates/sui-indexer-alt/src/models/objects.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use diesel::{
use sui_field_count::FieldCount;
use sui_types::base_types::ObjectID;

use crate::schema::{kv_objects, obj_versions, sum_coin_balances, sum_obj_types};
use crate::schema::{kv_objects, obj_versions, sum_coin_balances, sum_obj_types, wal_obj_types};

#[derive(Insertable, Debug, Clone, FieldCount)]
#[diesel(table_name = kv_objects, primary_key(object_id, object_version))]
Expand All @@ -32,6 +32,7 @@ pub struct StoredObjVersion {
pub struct StoredObjectUpdate<T> {
pub object_id: ObjectID,
pub object_version: u64,
pub cp_sequence_number: u64,
/// `None` means the object was deleted or wrapped at this version, `Some(x)` means it was
/// changed to `x`.
pub update: Option<T>,
Expand Down Expand Up @@ -70,6 +71,20 @@ pub struct StoredSumObjType {
pub instantiation: Option<Vec<u8>>,
}

#[derive(Insertable, Debug, Clone)]
#[diesel(table_name = wal_obj_types, primary_key(object_id, object_version))]
pub struct StoredWalObjType {
pub object_id: Vec<u8>,
pub object_version: i64,
pub owner_kind: Option<StoredOwnerKind>,
pub owner_id: Option<Vec<u8>>,
pub package: Option<Vec<u8>>,
pub module: Option<String>,
pub name: Option<String>,
pub instantiation: Option<Vec<u8>>,
pub cp_sequence_number: i64,
}

impl<DB: Backend> serialize::ToSql<SmallInt, DB> for StoredOwnerKind
where
i16: serialize::ToSql<SmallInt, DB>,
Expand Down
15 changes: 15 additions & 0 deletions crates/sui-indexer-alt/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,20 @@ diesel::table! {
}
}

diesel::table! {
wal_obj_types (object_id, object_version) {
object_id -> Bytea,
object_version -> Int8,
owner_kind -> Nullable<Int2>,
owner_id -> Nullable<Bytea>,
package -> Nullable<Bytea>,
module -> Nullable<Text>,
name -> Nullable<Text>,
instantiation -> Nullable<Bytea>,
cp_sequence_number -> Int8,
}
}

diesel::table! {
watermarks (pipeline) {
pipeline -> Text,
Expand All @@ -120,5 +134,6 @@ diesel::allow_tables_to_appear_in_same_query!(
sum_obj_types,
tx_affected_objects,
tx_balance_changes,
wal_obj_types,
watermarks,
);

0 comments on commit 71f1faa

Please sign in to comment.