Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

indexer-alt: wal_obj_types pipeline #20116

Merged
merged 1 commit into from
Nov 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
DROP TABLE IF EXISTS wal_obj_types;
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
-- Write-ahead log for `sum_obj_types`.
--
-- It contains the same columns and indices as `sum_obj_types`, but with the
-- following changes:
--
-- - A `cp_sequence_number` column (and an index on it), to support pruning by
-- checkpoint.
--
-- - The primary key includes the version, as the table may contain multiple
-- versions per object ID.
--
-- - The `owner_kind` column is nullable, because this table also tracks
-- deleted and wrapped objects (where all the fields except the ID, version,
-- and checkpoint are NULL).
--
-- - There is an additional index on ID and version for querying the latest
-- version of every object.
--
-- This table is used in conjunction with `sum_obj_types` to support consistent
-- live object set queries: `sum_obj_types` holds the state of the live object
-- set at some checkpoint `C < T` where `T` is the tip of the chain, and
-- `wal_obj_types` stores all the updates and deletes between `C` and `T`.
Comment on lines +19 to +22
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sum_obj_types is objects_snapshot and wal_obj_types is objects_history, kind of?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's right -- but without object contents, or coin info.

--
-- To reconstruct the the live object set at some snapshot checkpoint `S`
-- between `C` and `T`, a query can be constructed that starts with the set
-- from `sum_obj_types` and adds updates in `wal_obj_types` from
-- `cp_sequence_number <= S`.
--
-- See `up.sql` for the original `sum_obj_types` table for documentation on
-- columns.
CREATE TABLE IF NOT EXISTS wal_obj_types
(
object_id BYTEA NOT NULL,
object_version BIGINT NOT NULL,
owner_kind SMALLINT,
owner_id BYTEA,
package BYTEA,
module TEXT,
name TEXT,
instantiation BYTEA,
cp_sequence_number BIGINT NOT NULL,
PRIMARY KEY (object_id, object_version)
);

CREATE INDEX IF NOT EXISTS wal_obj_types_cp_sequence_number
ON wal_obj_types (cp_sequence_number);

CREATE INDEX IF NOT EXISTS wal_obj_types_version
ON wal_obj_types (object_id, object_version);

CREATE INDEX IF NOT EXISTS wal_obj_types_owner
ON wal_obj_types (owner_kind, owner_id, object_id, object_version);

CREATE INDEX IF NOT EXISTS wal_obj_types_pkg
ON wal_obj_types (package, object_id, object_version);

CREATE INDEX IF NOT EXISTS wal_obj_types_mod
ON wal_obj_types (package, module, object_id, object_version);

CREATE INDEX IF NOT EXISTS wal_obj_types_name
ON wal_obj_types (package, module, name, object_id, object_version);

CREATE INDEX IF NOT EXISTS wal_obj_types_inst
ON wal_obj_types (package, module, name, instantiation, object_id, object_version);

CREATE INDEX IF NOT EXISTS wal_obj_types_owner_pkg
ON wal_obj_types (owner_kind, owner_id, package, object_id, object_version);

CREATE INDEX IF NOT EXISTS wal_obj_types_owner_mod
ON wal_obj_types (owner_kind, owner_id, package, module, object_id, object_version);

CREATE INDEX IF NOT EXISTS wal_obj_types_owner_name
ON wal_obj_types (owner_kind, owner_id, package, module, name, object_id, object_version);

CREATE INDEX IF NOT EXISTS wal_obj_types_owner_inst
ON wal_obj_types (owner_kind, owner_id, package, module, name, instantiation, object_id, object_version);
1 change: 1 addition & 0 deletions crates/sui-indexer-alt/src/handlers/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ pub mod sum_coin_balances;
pub mod sum_obj_types;
pub mod tx_affected_objects;
pub mod tx_balance_changes;
pub mod wal_obj_types;
9 changes: 8 additions & 1 deletion crates/sui-indexer-alt/src/handlers/sum_coin_balances.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,13 @@ impl Processor for SumCoinBalances {
type Value = StoredObjectUpdate<StoredSumCoinBalance>;

fn process(checkpoint: &Arc<CheckpointData>) -> anyhow::Result<Vec<Self::Value>> {
let CheckpointData { transactions, .. } = checkpoint.as_ref();
let CheckpointData {
transactions,
checkpoint_summary,
..
} = checkpoint.as_ref();

let cp_sequence_number = checkpoint_summary.sequence_number;
let mut values: BTreeMap<ObjectID, Self::Value> = BTreeMap::new();
let mut coin_types: BTreeMap<ObjectID, Vec<u8>> = BTreeMap::new();

Expand Down Expand Up @@ -78,6 +83,7 @@ impl Processor for SumCoinBalances {
entry.insert(StoredObjectUpdate {
object_id,
object_version,
cp_sequence_number,
update: None,
});
}
Expand Down Expand Up @@ -111,6 +117,7 @@ impl Processor for SumCoinBalances {
entry.insert(StoredObjectUpdate {
object_id,
object_version,
cp_sequence_number,
update: Some(StoredSumCoinBalance {
object_id: object_id.to_vec(),
object_version: object_version as i64,
Expand Down
9 changes: 8 additions & 1 deletion crates/sui-indexer-alt/src/handlers/sum_obj_types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,13 @@ impl Processor for SumObjTypes {
type Value = StoredObjectUpdate<StoredSumObjType>;

fn process(checkpoint: &Arc<CheckpointData>) -> anyhow::Result<Vec<Self::Value>> {
let CheckpointData { transactions, .. } = checkpoint.as_ref();
let CheckpointData {
transactions,
checkpoint_summary,
..
} = checkpoint.as_ref();

let cp_sequence_number = checkpoint_summary.sequence_number;
let mut values: BTreeMap<ObjectID, Self::Value> = BTreeMap::new();

// Iterate over transactions in reverse so we see the latest version of each object first.
Expand All @@ -63,6 +68,7 @@ impl Processor for SumObjTypes {
entry.insert(StoredObjectUpdate {
object_id,
object_version,
cp_sequence_number,
update: None,
});
}
Expand All @@ -83,6 +89,7 @@ impl Processor for SumObjTypes {
entry.insert(StoredObjectUpdate {
object_id,
object_version,
cp_sequence_number,
update: Some(StoredSumObjType {
object_id: object_id.to_vec(),
object_version: object_version as i64,
Expand Down
62 changes: 62 additions & 0 deletions crates/sui-indexer-alt/src/handlers/wal_obj_types.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Copyright (c) Mysten Labs, Inc.
// SPDX-License-Identifier: Apache-2.0

use std::sync::Arc;

use anyhow::Result;
use diesel_async::RunQueryDsl;
use sui_types::full_checkpoint_content::CheckpointData;

use crate::{
db,
models::objects::{StoredObjectUpdate, StoredSumObjType, StoredWalObjType},
pipeline::{concurrent::Handler, Processor},
schema::wal_obj_types,
};

use super::sum_obj_types::SumObjTypes;

pub struct WalObjTypes;

impl Processor for WalObjTypes {
const NAME: &'static str = "wal_obj_types";

type Value = StoredObjectUpdate<StoredSumObjType>;

fn process(checkpoint: &Arc<CheckpointData>) -> Result<Vec<Self::Value>> {
SumObjTypes::process(checkpoint)
}
}

#[async_trait::async_trait]
impl Handler for WalObjTypes {
const MIN_EAGER_ROWS: usize = 100;
const MAX_CHUNK_ROWS: usize = 1000;
const MAX_PENDING_ROWS: usize = 10000;

async fn commit(values: &[Self::Value], conn: &mut db::Connection<'_>) -> Result<usize> {
let values: Vec<_> = values
.iter()
.map(|value| StoredWalObjType {
object_id: value.object_id.to_vec(),
object_version: value.object_version as i64,

owner_kind: value.update.as_ref().map(|o| o.owner_kind),
owner_id: value.update.as_ref().and_then(|o| o.owner_id.clone()),

package: value.update.as_ref().and_then(|o| o.package.clone()),
module: value.update.as_ref().and_then(|o| o.module.clone()),
name: value.update.as_ref().and_then(|o| o.name.clone()),
instantiation: value.update.as_ref().and_then(|o| o.instantiation.clone()),

cp_sequence_number: value.cp_sequence_number as i64,
})
.collect();

Ok(diesel::insert_into(wal_obj_types::table)
.values(&values)
.on_conflict_do_nothing()
.execute(conn)
.await?)
}
}
2 changes: 2 additions & 0 deletions crates/sui-indexer-alt/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use sui_indexer_alt::{
kv_objects::KvObjects, kv_transactions::KvTransactions, obj_versions::ObjVersions,
sum_coin_balances::SumCoinBalances, sum_obj_types::SumObjTypes,
tx_affected_objects::TxAffectedObjects, tx_balance_changes::TxBalanceChanges,
wal_obj_types::WalObjTypes,
},
Indexer,
};
Expand Down Expand Up @@ -43,6 +44,7 @@ async fn main() -> Result<()> {
indexer.concurrent_pipeline::<ObjVersions>().await?;
indexer.concurrent_pipeline::<TxAffectedObjects>().await?;
indexer.concurrent_pipeline::<TxBalanceChanges>().await?;
indexer.concurrent_pipeline::<WalObjTypes>().await?;
indexer.sequential_pipeline::<SumCoinBalances>(lag).await?;
indexer.sequential_pipeline::<SumObjTypes>(lag).await?;

Expand Down
17 changes: 16 additions & 1 deletion crates/sui-indexer-alt/src/models/objects.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use diesel::{
};
use sui_types::base_types::ObjectID;

use crate::schema::{kv_objects, obj_versions, sum_coin_balances, sum_obj_types};
use crate::schema::{kv_objects, obj_versions, sum_coin_balances, sum_obj_types, wal_obj_types};

#[derive(Insertable, Debug, Clone)]
#[diesel(table_name = kv_objects, primary_key(object_id, object_version))]
Expand All @@ -31,6 +31,7 @@ pub struct StoredObjVersion {
pub struct StoredObjectUpdate<T> {
pub object_id: ObjectID,
pub object_version: u64,
pub cp_sequence_number: u64,
/// `None` means the object was deleted or wrapped at this version, `Some(x)` means it was
/// changed to `x`.
pub update: Option<T>,
Expand Down Expand Up @@ -69,6 +70,20 @@ pub struct StoredSumObjType {
pub instantiation: Option<Vec<u8>>,
}

#[derive(Insertable, Debug, Clone)]
#[diesel(table_name = wal_obj_types, primary_key(object_id, object_version))]
pub struct StoredWalObjType {
pub object_id: Vec<u8>,
pub object_version: i64,
pub owner_kind: Option<StoredOwnerKind>,
pub owner_id: Option<Vec<u8>>,
pub package: Option<Vec<u8>>,
pub module: Option<String>,
pub name: Option<String>,
pub instantiation: Option<Vec<u8>>,
pub cp_sequence_number: i64,
}

impl<DB: Backend> serialize::ToSql<SmallInt, DB> for StoredOwnerKind
where
i16: serialize::ToSql<SmallInt, DB>,
Expand Down
15 changes: 15 additions & 0 deletions crates/sui-indexer-alt/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,20 @@ diesel::table! {
}
}

diesel::table! {
wal_obj_types (object_id, object_version) {
object_id -> Bytea,
object_version -> Int8,
owner_kind -> Nullable<Int2>,
owner_id -> Nullable<Bytea>,
package -> Nullable<Bytea>,
module -> Nullable<Text>,
name -> Nullable<Text>,
instantiation -> Nullable<Bytea>,
cp_sequence_number -> Int8,
}
}

diesel::table! {
watermarks (pipeline) {
pipeline -> Text,
Expand All @@ -120,5 +134,6 @@ diesel::allow_tables_to_appear_in_same_query!(
sum_obj_types,
tx_affected_objects,
tx_balance_changes,
wal_obj_types,
watermarks,
);
Loading