From c38aba14b4c3bb15d47de0e0bd78a2de9c6fe65d Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Wed, 2 Jun 2021 16:46:29 -0700
Subject: [PATCH 01/66] update gitignore

---
 .gitignore | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 927fb084..a37445a5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,4 +2,5 @@
 run/datasets/data/
 **/__pycache__/
 **/.ipynb_checkpoints
-.idea/
\ No newline at end of file
+.idea/
+.vscode/settings.json

From 961140f79fc259fbef0355b34c0bf0cde8ecf947 Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Thu, 3 Jun 2021 14:47:00 -0700
Subject: [PATCH 02/66] make copy.

---
 graphgym/models/head_mem.py        | 130 ++++++++++++++++
 graphgym/models/layer_recurrent.py | 238 +++++++++++++++++++++++++++++
 2 files changed, 368 insertions(+)
 create mode 100644 graphgym/models/head_mem.py
 create mode 100644 graphgym/models/layer_recurrent.py

diff --git a/graphgym/models/head_mem.py b/graphgym/models/head_mem.py
new file mode 100644
index 00000000..3114cc72
--- /dev/null
+++ b/graphgym/models/head_mem.py
@@ -0,0 +1,130 @@
+""" GNN heads are the last layer of a GNN right before loss computation.
+
+They are constructed in the init function of the gnn.GNN.
+"""
+
+import torch
+import torch.nn as nn
+
+from graphgym.config import cfg
+from graphgym.models.layer import MLP
+from graphgym.models.pooling import pooling_dict
+
+from graphgym.contrib.head import *
+import graphgym.register as register
+
+
+########### Head ############
+
+class GNNNodeHead(nn.Module):
+    '''Head of GNN, node prediction'''
+
+    def __init__(self, dim_in, dim_out):
+        super(GNNNodeHead, self).__init__()
+        self.layer_post_mp = MLP(dim_in, dim_out,
+                                 num_layers=cfg.gnn.layers_post_mp, bias=True)
+
+    def _apply_index(self, batch):
+        if batch.node_label_index.shape[0] == batch.node_label.shape[0]:
+            return batch.node_feature[batch.node_label_index], batch.node_label
+        else:
+            return batch.node_feature[batch.node_label_index], \
+                   batch.node_label[batch.node_label_index]
+
+    def forward(self, batch):
+        batch = self.layer_post_mp(batch)
+        pred, label = self._apply_index(batch)
+        return pred, label
+
+
+class GNNEdgeHead(nn.Module):
+    '''Head of GNN, edge prediction'''
+
+    def __init__(self, dim_in, dim_out):
+        ''' Head of Edge and link prediction models.
+
+        Args:
+            dim_out: output dimension. For binary prediction, dim_out=1.
+        '''
+        # Use dim_in for graph conv, since link prediction dim_out could be
+        # binary
+        # E.g. if decoder='dot', link probability is dot product between
+        # node embeddings, of dimension dim_in
+        super(GNNEdgeHead, self).__init__()
+        # module to decode edges from node embeddings
+
+        if cfg.model.edge_decoding == 'concat':
+            self.layer_post_mp = MLP(dim_in * 2, dim_out,
+                                     num_layers=cfg.gnn.layers_post_mp,
+                                     bias=True)
+            # requires parameter
+            self.decode_module = lambda v1, v2: \
+                self.layer_post_mp(torch.cat((v1, v2), dim=-1))
+        else:
+            if dim_out > 1:
+                raise ValueError(
+                    'Binary edge decoding ({})is used for multi-class '
+                    'edge/link prediction.'.format(cfg.model.edge_decoding))
+            self.layer_post_mp = MLP(dim_in, dim_in,
+                                     num_layers=cfg.gnn.layers_post_mp,
+                                     bias=True)
+            if cfg.model.edge_decoding == 'dot':
+                self.decode_module = lambda v1, v2: torch.sum(v1 * v2, dim=-1)
+            elif cfg.model.edge_decoding == 'cosine_similarity':
+                self.decode_module = nn.CosineSimilarity(dim=-1)
+            else:
+                raise ValueError('Unknown edge decoding {}.'.format(
+                    cfg.model.edge_decoding))
+
+    def _apply_index(self, batch):
+        return batch.node_feature[batch.edge_label_index], \
+               batch.edge_label
+
+    def forward(self, batch):
+        if cfg.model.edge_decoding != 'concat':
+            batch = self.layer_post_mp(batch)
+        pred, label = self._apply_index(batch)
+        nodes_first = pred[0]
+        nodes_second = pred[1]
+        pred = self.decode_module(nodes_first, nodes_second)
+        return pred, label
+
+
+class GNNGraphHead(nn.Module):
+    '''Head of GNN, graph prediction
+
+    The optional post_mp layer (specified by cfg.gnn.post_mp) is used
+    to transform the pooled embedding using an MLP.
+    '''
+
+    def __init__(self, dim_in, dim_out):
+        super(GNNGraphHead, self).__init__()
+        # todo: PostMP before or after global pooling
+        self.layer_post_mp = MLP(dim_in, dim_out,
+                                 num_layers=cfg.gnn.layers_post_mp, bias=True)
+        self.pooling_fun = pooling_dict[cfg.model.graph_pooling]
+
+    def _apply_index(self, batch):
+        return batch.graph_feature, batch.graph_label
+
+    def forward(self, batch):
+        if cfg.dataset.transform == 'ego':
+            graph_emb = self.pooling_fun(batch.node_feature, batch.batch,
+                                         batch.node_id_index)
+        else:
+            graph_emb = self.pooling_fun(batch.node_feature, batch.batch)
+        graph_emb = self.layer_post_mp(graph_emb)
+        batch.graph_feature = graph_emb
+        pred, label = self._apply_index(batch)
+        return pred, label
+
+
+# Head models for external interface
+head_dict = {
+    'node': GNNNodeHead,
+    'edge': GNNEdgeHead,
+    'link_pred': GNNEdgeHead,
+    'graph': GNNGraphHead
+}
+
+head_dict = {**register.head_dict, **head_dict}
diff --git a/graphgym/models/layer_recurrent.py b/graphgym/models/layer_recurrent.py
new file mode 100644
index 00000000..df60700e
--- /dev/null
+++ b/graphgym/models/layer_recurrent.py
@@ -0,0 +1,238 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch_geometric as pyg
+
+from graphgym.config import cfg
+from graphgym.models.act import act_dict
+from graphgym.contrib.layer.generalconv import (GeneralConvLayer,
+                                                GeneralEdgeConvLayer)
+
+from graphgym.contrib.layer import *
+import graphgym.register as register
+
+
+## General classes
+class GeneralLayer(nn.Module):
+    '''General wrapper for layers'''
+
+    def __init__(self, name, dim_in, dim_out, has_act=True, has_bn=True,
+                 has_l2norm=False, **kwargs):
+        super(GeneralLayer, self).__init__()
+        self.has_l2norm = has_l2norm
+        has_bn = has_bn and cfg.gnn.batchnorm
+        self.layer = layer_dict[name](dim_in, dim_out,
+                                      bias=not has_bn, **kwargs)
+        layer_wrapper = []
+        if has_bn:
+            layer_wrapper.append(nn.BatchNorm1d(
+                dim_out, eps=cfg.bn.eps, momentum=cfg.bn.mom))
+        if cfg.gnn.dropout > 0:
+            layer_wrapper.append(nn.Dropout(
+                p=cfg.gnn.dropout, inplace=cfg.mem.inplace))
+        if has_act:
+            layer_wrapper.append(act_dict[cfg.gnn.act])
+        self.post_layer = nn.Sequential(*layer_wrapper)
+
+    def forward(self, batch):
+        batch = self.layer(batch)
+        if isinstance(batch, torch.Tensor):
+            batch = self.post_layer(batch)
+            if self.has_l2norm:
+                batch = F.normalize(batch, p=2, dim=1)
+        else:
+            batch.node_feature = self.post_layer(batch.node_feature)
+            if self.has_l2norm:
+                batch.node_feature = F.normalize(batch.node_feature, p=2, dim=1)
+        return batch
+
+
+class GeneralMultiLayer(nn.Module):
+    '''General wrapper for stack of layers'''
+
+    def __init__(self, name, num_layers, dim_in, dim_out, dim_inner=None,
+                 final_act=True, **kwargs):
+        super(GeneralMultiLayer, self).__init__()
+        dim_inner = dim_in if dim_inner is None else dim_inner
+        for i in range(num_layers):
+            d_in = dim_in if i == 0 else dim_inner
+            d_out = dim_out if i == num_layers - 1 else dim_inner
+            has_act = final_act if i == num_layers - 1 else True
+            layer = GeneralLayer(name, d_in, d_out, has_act, **kwargs)
+            self.add_module('Layer_{}'.format(i), layer)
+
+    def forward(self, batch):
+        for layer in self.children():
+            batch = layer(batch)
+        return batch
+
+
+## Core basic layers
+# Input: batch; Output: batch
+class Linear(nn.Module):
+    def __init__(self, dim_in, dim_out, bias=False, **kwargs):
+        super(Linear, self).__init__()
+        self.model = nn.Linear(dim_in, dim_out, bias=bias)
+
+    def forward(self, batch):
+        if isinstance(batch, torch.Tensor):
+            batch = self.model(batch)
+        else:
+            batch.node_feature = self.model(batch.node_feature)
+        return batch
+
+
+class BatchNorm1dNode(nn.Module):
+    '''General wrapper for layers'''
+
+    def __init__(self, dim_in):
+        super(BatchNorm1dNode, self).__init__()
+        self.bn = nn.BatchNorm1d(dim_in, eps=cfg.bn.eps, momentum=cfg.bn.mom)
+
+    def forward(self, batch):
+        batch.node_feature = self.bn(batch.node_feature)
+        return batch
+
+
+class BatchNorm1dEdge(nn.Module):
+    '''General wrapper for layers'''
+
+    def __init__(self, dim_in):
+        super(BatchNorm1dEdge, self).__init__()
+        self.bn = nn.BatchNorm1d(dim_in, eps=cfg.bn.eps, momentum=cfg.bn.mom)
+
+    def forward(self, batch):
+        batch.edge_feature = self.bn(batch.edge_feature)
+        return batch
+
+
+class MLP(nn.Module):
+    def __init__(self, dim_in, dim_out, bias=True, dim_inner=None,
+                 num_layers=2, **kwargs):
+        '''
+        Note: MLP works for 0 layers
+        '''
+        super(MLP, self).__init__()
+        dim_inner = dim_in if dim_inner is None else dim_inner
+        layers = []
+        if num_layers > 1:
+            layers.append(
+                GeneralMultiLayer('linear', num_layers - 1, dim_in, dim_inner,
+                                  dim_inner, final_act=True))
+            layers.append(Linear(dim_inner, dim_out, bias))
+        else:
+            layers.append(Linear(dim_in, dim_out, bias))
+        self.model = nn.Sequential(*layers)
+
+    def forward(self, batch):
+        if isinstance(batch, torch.Tensor):
+            batch = self.model(batch)
+        else:
+            batch.node_feature = self.model(batch.node_feature)
+        return batch
+
+
+class GCNConv(nn.Module):
+    def __init__(self, dim_in, dim_out, bias=False, **kwargs):
+        super(GCNConv, self).__init__()
+        self.model = pyg.nn.GCNConv(dim_in, dim_out, bias=bias)
+
+    def forward(self, batch):
+        batch.node_feature = self.model(batch.node_feature, batch.edge_index)
+        return batch
+
+
+class SAGEConv(nn.Module):
+    def __init__(self, dim_in, dim_out, bias=False, **kwargs):
+        super(SAGEConv, self).__init__()
+        self.model = pyg.nn.SAGEConv(dim_in, dim_out, bias=bias, concat=True)
+
+    def forward(self, batch):
+        batch.node_feature = self.model(batch.node_feature, batch.edge_index)
+        return batch
+
+
+class GATConv(nn.Module):
+    def __init__(self, dim_in, dim_out, bias=False, **kwargs):
+        super(GATConv, self).__init__()
+        self.model = pyg.nn.GATConv(dim_in, dim_out, bias=bias)
+
+    def forward(self, batch):
+        batch.node_feature = self.model(batch.node_feature, batch.edge_index)
+        return batch
+
+
+class GINConv(nn.Module):
+    def __init__(self, dim_in, dim_out, bias=False, **kwargs):
+        super(GINConv, self).__init__()
+        gin_nn = nn.Sequential(nn.Linear(dim_in, dim_out), nn.ReLU(),
+                               nn.Linear(dim_out, dim_out))
+        self.model = pyg.nn.GINConv(gin_nn)
+
+    def forward(self, batch):
+        batch.node_feature = self.model(batch.node_feature, batch.edge_index)
+        return batch
+
+
+class SplineConv(nn.Module):
+    def __init__(self, dim_in, dim_out, bias=False, **kwargs):
+        super(SplineConv, self).__init__()
+        self.model = pyg.nn.SplineConv(dim_in, dim_out,
+                                       dim=1, kernel_size=2, bias=bias)
+
+    def forward(self, batch):
+        batch.node_feature = self.model(batch.node_feature, batch.edge_index,
+                                        batch.edge_feature)
+        return batch
+
+
+class GeneralConv(nn.Module):
+    def __init__(self, dim_in, dim_out, bias=False, **kwargs):
+        super(GeneralConv, self).__init__()
+        self.model = GeneralConvLayer(dim_in, dim_out, bias=bias)
+
+    def forward(self, batch):
+        batch.node_feature = self.model(batch.node_feature, batch.edge_index)
+        return batch
+
+
+class GeneralEdgeConv(nn.Module):
+    def __init__(self, dim_in, dim_out, bias=False, **kwargs):
+        super(GeneralEdgeConv, self).__init__()
+        self.model = GeneralEdgeConvLayer(dim_in, dim_out, bias=bias)
+
+    def forward(self, batch):
+        batch.node_feature = self.model(batch.node_feature, batch.edge_index,
+                                        edge_feature=batch.edge_feature)
+        return batch
+
+
+class GeneralSampleEdgeConv(nn.Module):
+    def __init__(self, dim_in, dim_out, bias=False, **kwargs):
+        super(GeneralSampleEdgeConv, self).__init__()
+        self.model = GeneralEdgeConvLayer(dim_in, dim_out, bias=bias)
+
+    def forward(self, batch):
+        edge_mask = torch.rand(batch.edge_index.shape[1]) < cfg.gnn.keep_edge
+        edge_index = batch.edge_index[:, edge_mask]
+        edge_feature = batch.edge_feature[edge_mask, :]
+        batch.node_feature = self.model(batch.node_feature, edge_index,
+                                        edge_feature=edge_feature)
+        return batch
+
+
+layer_dict = {
+    'linear': Linear,
+    'mlp': MLP,
+    'gcnconv': GCNConv,
+    'sageconv': SAGEConv,
+    'gatconv': GATConv,
+    'splineconv': SplineConv,
+    'ginconv': GINConv,
+    'generalconv': GeneralConv,
+    'generaledgeconv': GeneralEdgeConv,
+    'generalsampleedgeconv': GeneralSampleEdgeConv,
+}
+
+# register additional convs
+layer_dict = {**register.layer_dict, **layer_dict}

From 2d57c1278ed7c07116a1cdd66dcf3274463da0d3 Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Thu, 3 Jun 2021 15:04:28 -0700
Subject: [PATCH 03/66] add config file for roland.

---
 graphgym/contrib/config/roland.py | 204 ++++++++++++++++++++++++++++++
 1 file changed, 204 insertions(+)
 create mode 100644 graphgym/contrib/config/roland.py

diff --git a/graphgym/contrib/config/roland.py b/graphgym/contrib/config/roland.py
new file mode 100644
index 00000000..a30c38fe
--- /dev/null
+++ b/graphgym/contrib/config/roland.py
@@ -0,0 +1,204 @@
+from yacs.config import CfgNode as CN
+
+from graphgym.register import register_config
+
+
+def set_cfg_roland(cfg):
+    r'''
+    This function sets the default config value for customized options
+    :return: customized configuration use by the experiment.
+    '''
+
+    # ----------------------------------------------------------------------- #
+    # Customized options
+    # ----------------------------------------------------------------------- #
+    # Method to update node embedding from old node embedding and new node features.
+    # Options: 'moving_average', 'masked_gru', 'gru'
+    # moving average: new embedding = r * old + (1-r) * node_feature.
+    # gru: new embedding = GRU(node_feature, old_embedding).
+    # masked_gru: only apply GRU to active nodes.
+    cfg.gnn.embed_update_method = 'moving_average'
+    # what kind of GRU kernel to use if GRU is required for embedding updating.
+    cfg.gnn.gru_kernel = 'linear'
+    # how many layers to use in the MLP updater.
+    # default: 1, use a simple linear layer.
+    cfg.gnn.mlp_update_layers = 2
+    
+    # For meta-learning.
+    cfg.meta = CN()
+    # Whether to do meta-learning via initialization moving average.
+    # Default to False.
+    cfg.meta.is_meta = False
+
+    # choose between 'moving_average' and 'online_mean'
+    cfg.meta.method = 'moving_average'
+    # For online mean:
+    # new_mean = (n-1)/n * old_mean + 1/n * new_value.
+    # where *_mean corresponds to W_init.
+
+    # Weight used in moving average for model parameters.
+    # After fine-tuning the model in period t and get model M[t],
+    # Set W_init = (1-alpha) * W_init + alpha * M[t].
+    # For the next period, use W_init as the initialization for fine-tune
+    # Set cfg.meta.alpha = 1.0 to recover the original algorithm.
+    cfg.meta.alpha = 0.9
+
+    # Use to identify experiments.
+    cfg.remark = ''
+    # Experimental Features, use this name space to save all controls for
+    # experimental features.
+    cfg.experimental = CN()
+
+    # How many negative edges for each node to compute rank-based evaluation
+    # metrics such as MRR and recall at K.
+    # E.g., if multiplier = 1000 and a node has 3 positive edges, then we
+    # compute the MRR using 1000 randomly generated negative edges
+    # + 3 existing positive edges.
+    cfg.experimental.rank_eval_multiplier = 1000
+
+    # Only use the first n snapshots (time periods) to train the model.
+    # Empirically, the model learns rich dynamics from only a few periods.
+    # Set to -1 if using all snapshots.
+    cfg.experimental.restrict_training_set = -1
+
+    # Whether to visualize edge attention of GNN layer after training.
+    cfg.experimental.visualize_gnn_layer = False
+
+    cfg.train.tbptt_freq = 5
+
+    cfg.train.internal_validation_tolerance = 5
+
+    # Computing MRR is slow in the baseline setting.
+    # Only start to compute MRR in the test set range after certain time.
+    cfg.train.start_compute_mrr = 0
+    
+    # How to handle node features in AS dataset.
+    # available: ['one', 'one_hot_id', 'one_hot_degree_global', 'one_hot_degree_local']
+    cfg.dataset.AS_node_feature = 'one'
+
+    # ----------------------------------------------------------------------- #
+    # Additional dataset option for the BSI dataset.
+    # ----------------------------------------------------------------------- #
+    # Method used to sample negative edges for edge_label_index.
+    # 'uniform': all non-existing edges have same probability of being sampled
+    #            as negative edges.
+    # 'src':  non-existing edges from high-degree nodes are more likely to be
+    #         sampled as negative edges.
+    # 'dest': non-existing edges pointed to high-degree nodes are more likely
+    #         to be sampled as negative edges.
+    cfg.dataset.negative_sample_weight = 'uniform'
+
+    # whether to load heterogeneous graphs.
+    cfg.dataset.is_hetero = False
+
+    # where to put type information. 'append' or 'graph_attribute'.
+    cfg.dataset.type_info_loc = 'append'
+
+    # whether to look for and load cached graph. By default (load_cache=False)
+    # the loader loads the raw tsv file from disk and
+    cfg.dataset.load_cache = False
+
+    cfg.dataset.premade_datasets = 'fresh'
+
+    cfg.dataset.include_node_features = False
+
+    # 'chronological_temporal' or 'default'.
+    # 'chronological_temporal': only for temporal graphs, for example,
+    # the first 80% snapshots are for training, then subsequent 10% snapshots
+    # are for validation and the last 10% snapshots are for testing.
+    cfg.dataset.split_method = 'default'
+
+    cfg.gnn.skip_connection = 'none'  # {'none', 'identity', 'affine'}
+    # ----------------------------------------------------------------------- #
+    # Customized options
+    # ----------------------------------------------------------------------- #
+
+    # example argument group
+    cfg.transaction = CN()
+
+    # whether use snapshot
+    cfg.transaction.snapshot = False
+
+    # snapshot split method 1: number of snapshots
+    # split dataset into fixed number of snapshots.
+    cfg.transaction.snapshot_num = 100
+
+    # snapshot split method 2: snapshot frequency
+    # e.g., one snapshot contains transactions within 1 day.
+    cfg.transaction.snapshot_freq = 'D'
+
+    cfg.transaction.check_snapshot = False
+
+    # how to use transaction history
+    # full or rolling
+    cfg.transaction.history = 'full'
+
+
+    # type of loss: supervised / meta
+    cfg.transaction.loss = 'meta'
+
+    # feature dim for int edge features
+    cfg.transaction.feature_int_dim = 32
+    cfg.transaction.feature_edge_int_num = [50, 8, 252, 252, 3, 3]
+    cfg.transaction.feature_node_int_num = [0]
+
+    # feature dim for amount (float) edge feature
+    cfg.transaction.feature_amount_dim = 64
+
+    # feature dim for time (float) edge feature
+    cfg.transaction.feature_time_dim = 64
+
+    #
+    cfg.transaction.node_feature = 'raw'
+
+    # how many days look into the future
+    cfg.transaction.horizon = 1
+
+    # prediction mode for the task; 'before' or 'after'
+    cfg.transaction.pred_mode = 'before'
+
+    # number of periods to be captured.
+    # set to a list of integers if wish to use pre-defined periodicity.
+    # e.g., [1,7,28,31,...] etc.
+    cfg.transaction.time_enc_periods = [1]
+
+    # if 'enc_before_diff': attention weight = diff(enc(t1), enc(t2))
+    # if 'diff_before_enc': attention weight = enc(t1 - t2)
+    cfg.transaction.time_enc_mode = 'enc_before_diff'
+
+    # how to compute the keep ratio while updating the recurrent GNN.
+    # the update ratio (for each node) is a function of its degree in [0, t)
+    # and its degree in snapshot t.
+    cfg.transaction.keep_ratio = 'linear'
+
+    cfg.metric = CN()
+    # how to compute MRR.
+    # available: f = 'min', 'max', 'mean'.
+    # Step 1: get the p* = f(scores of positive edges)
+    # Step 2: compute the rank r of p* among all negative edges.
+    # Step 3: RR = 1 / rank.
+    # Step 4: average over all users.
+    # expected MRR(min) <= MRR(mean) <= MRR(max).
+    cfg.metric.mrr_method = 'max'
+
+    # Specs for the link prediction task using BSI dataset.
+    # All units are days.
+    cfg.link_pred_spec = CN()
+
+    # The period of `today`'s increase: how often the system is making forecast.
+    # E.g., when = 1,
+    # the system forecasts transactions in upcoming 7 days for everyday.
+    # One training epoch loops over
+    # {Jan-1-2020, Jan-2-2020, Jan-3-2020..., Dec-31-2020}
+    # When = 7, the system makes prediction every week.
+    # E.g., the system forecasts transactions in upcoming 7 days
+    # on every Monday.
+    cfg.link_pred_spec.forecast_frequency = 1
+
+    # How many days into the future the model is trained to predict.
+    # The model forecasts transactions in (today, today + forecast_horizon].
+    # NOTE: forecast_horizon should >= forecast_frequency to cover all days.
+    cfg.link_pred_spec.forecast_horizon = 7
+
+
+register_config('roland', set_cfg_roland)

From 2774bbd1b63902b7085afeb96c22ee4a34461144 Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Thu, 3 Jun 2021 19:33:59 -0700
Subject: [PATCH 04/66] add register for embedding update module.

---
 graphgym/register.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/graphgym/register.py b/graphgym/register.py
index 32d64b6d..cfa95ade 100644
--- a/graphgym/register.py
+++ b/graphgym/register.py
@@ -33,6 +33,10 @@ def register_head(key, module):
 def register_layer(key, module):
     register(key, module, layer_dict)
 
+update_dict = {}
+def register_update(key, module):
+    register(key, module, update_dict)
+
 pooling_dict = {}
 def register_pooling(key, module):
     register(key, module, pooling_dict)

From 015b86cbfb2a635746b468837688577d774add78 Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Thu, 3 Jun 2021 19:52:12 -0700
Subject: [PATCH 05/66] add embedding update modules.

---
 graphgym/models/update.py | 188 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 188 insertions(+)
 create mode 100644 graphgym/models/update.py

diff --git a/graphgym/models/update.py b/graphgym/models/update.py
new file mode 100644
index 00000000..df99d3aa
--- /dev/null
+++ b/graphgym/models/update.py
@@ -0,0 +1,188 @@
+"""Embedding update modules for dynamic graphs."""
+import graphgym.register as register
+import torch
+import torch.nn as nn
+from graphgym.models.layer import MLP
+
+
+class MovingAverageUpdater(nn.Module):
+    """
+    Moving average updater for node embeddings,
+    let h[l, t] denote all nodes' embedding at the l-th layer at snapshot t.
+    
+    h[l,t] = KeepRatio * h[l,t-1] + (1-KeepRatio) * h[l-1,t]
+    
+    where the precomputed KeepRatio at current snapshot t is node-specific,
+        which depends on the node's degree in all snapshots before t and nodes's
+        degree in snapshot at time t.
+    """
+
+    def __init__(self, dim_in: int, dim_out: int, layer_id: int) -> None:
+        self.layer_id = layer_id
+        super(MovingAverageUpdater, self).__init__()
+
+    def forward(self, batch):
+        # TODO: check if boardcasting is correct.
+        H_prev = batch.node_states[self.layer_id]
+        X = batch.node_feature
+        H_new = H_prev * batch.keep_ratio + X * (1.0 - batch.keep_ratio)
+        batch.node_states[self.layer_id] = H_new
+        return batch
+
+
+class MLPUpdater(nn.Module):
+    """
+    Node embedding update block using simple MLP.
+    
+    h[l,t] = MLP(concat(h[l,t-1],h[l-1,t]))
+    """
+
+    def __init__(self, dim_in: int, dim_out: int, layer_id: int,
+                 num_layers: int):
+        """
+        Args:
+            dim_in (int): dimension of h[l-1, t].
+            dim_out (int): dimension of h[l, t-1], node embedding dimension of
+                the current layer level.
+            layer_id (int): the index of current layer in multi-layer setting.
+            num_layers (int): number of layers in MLP.
+        """
+        super(MLPUpdater, self).__init__()
+        self.layer_id = layer_id
+        # FIXME:
+        # assert num_layers > 1, 'There is a problem with layer=1 now, pending fix.'
+        self.mlp = MLP(dim_in=dim_in + dim_out, dim_out=dim_out,
+                       num_layers=num_layers)
+
+    def forward(self, batch):
+        H_prev = batch.node_states[self.layer_id]
+        X = batch.node_feature
+        concat = torch.cat((H_prev, X), axis=1)
+        H_new = self.mlp(concat)
+        batch.node_states[self.layer_id] = H_new
+        return batch
+
+
+class GRUUpdater(nn.Module):
+    """
+    Node embedding update block using standard GRU.
+
+    h[l,t] = GRU(h[l,t-1], h[l-1,t])
+    """
+    def __init__(self, dim_in: int, dim_out: int, layer_id: int):
+        # dim_in (dim of X): dimension of input node_feature.
+        # dim_out (dim of H): dimension of previous and current hidden states.
+        # forward(X, H) --> H.
+        super(GRUUpdater, self).__init__()
+        self.layer_id = layer_id
+        self.GRU_Z = nn.Sequential(
+            nn.Linear(dim_in + dim_out, dim_out, bias=True),
+            nn.Sigmoid())
+        # reset gate.
+        self.GRU_R = nn.Sequential(
+            nn.Linear(dim_in + dim_out, dim_out, bias=True),
+            nn.Sigmoid())
+        # new embedding gate.
+        self.GRU_H_Tilde = nn.Sequential(
+            nn.Linear(dim_in + dim_out, dim_out, bias=True),
+            nn.Tanh())
+
+    def forward(self, batch):
+        H_prev = batch.node_states[self.layer_id]
+        X = batch.node_feature
+        Z = self.GRU_Z(torch.cat([X, H_prev], dim=1))
+        R = self.GRU_R(torch.cat([X, H_prev], dim=1))
+        H_tilde = self.GRU_H_Tilde(torch.cat([X, R * H_prev], dim=1))
+        H_gru = Z * H_prev + (1 - Z) * H_tilde
+        batch.node_states[self.layer_id] = H_gru
+        return batch
+
+
+# class MaskedGRUUpdater(nn.Module):
+#     """
+#     Node embedding update block using standard GRU.
+
+#     h[l,t] = GRU(h[l,t-1], h[l-1,t])
+#     """
+#     def __init__(self, dim_in: int, dim_out: int, layer_id: int):
+#         # dim_in (dim of X): dimension of input node_feature.
+#         # dim_out (dim of H): dimension of previous and current hidden states.
+#         # forward(X, H) --> H.
+#         super(MaskedGRUUpdater, self).__init__()
+#         self.layer_id = layer_id
+#         self.GRU_Z = nn.Sequential(
+#             nn.Linear(dim_in + dim_out, dim_out, bias=True),
+#             nn.Sigmoid())
+#         # reset gate.
+#         self.GRU_R = nn.Sequential(
+#             nn.Linear(dim_in + dim_out, dim_out, bias=True),
+#             nn.Sigmoid())
+#         # new embedding gate.
+#         self.GRU_H_Tilde = nn.Sequential(
+#             nn.Linear(dim_in + dim_out, dim_out, bias=True),
+#             nn.Tanh())
+
+#     def forward(self, batch):
+#         H_prev = batch.node_states[self.layer_id]
+#         X = batch.node_feature
+#         Z = self.GRU_Z(torch.cat([X, H_prev], dim=1))
+#         R = self.GRU_R(torch.cat([X, H_prev], dim=1))
+#         H_tilde = self.GRU_H_Tilde(torch.cat([X, R * H_prev], dim=1))
+#         H_gru = Z * H_prev + (1 - Z) * H_tilde
+
+#         # Update for active nodes only, use output from GRU.
+#         keep_mask = (batch.node_degree_new == 0)
+#         H_out = H_gru
+#         # Reset inactive nodes' embedding.
+#         H_out[keep_mask, :] = H_prev[keep_mask, :]
+
+#         batch.node_states[self.layer_id] = H_out
+#         return batch
+
+
+# class MovingAverageGRUUpdater(nn.Module):
+#     """
+#     Node embedding update block using standard GRU.
+
+#     h[l,t] = GRU(h[l,t-1], h[l-1,t])
+#     """
+#     def __init__(self, dim_in: int, dim_out: int, layer_id: int):
+#         # dim_in (dim of X): dimension of input node_feature.
+#         # dim_out (dim of H): dimension of previous and current hidden states.
+#         # forward(X, H) --> H.
+#         super(GRUUpdater, self).__init__()
+#         self.layer_id = layer_id
+#         self.GRU_Z = nn.Sequential(
+#             nn.Linear(dim_in + dim_out, dim_out, bias=True),
+#             nn.Sigmoid())
+#         # reset gate.
+#         self.GRU_R = nn.Sequential(
+#             nn.Linear(dim_in + dim_out, dim_out, bias=True),
+#             nn.Sigmoid())
+#         # new embedding gate.
+#         self.GRU_H_Tilde = nn.Sequential(
+#             nn.Linear(dim_in + dim_out, dim_out, bias=True),
+#             nn.Tanh())
+
+#     def forward(self, batch):
+#         H_prev = batch.node_states[self.layer_id]
+#         X = batch.node_feature
+#         Z = self.GRU_Z(torch.cat([X, H_prev], dim=1))
+#         R = self.GRU_R(torch.cat([X, H_prev], dim=1))
+#         H_tilde = self.GRU_H_Tilde(torch.cat([X, R * H_prev], dim=1))
+#         H_gru = Z * H_prev + (1 - Z) * H_tilde
+        
+#         H_out = H_prev * batch.keep_ratio + H_gru * (1 - batch.keep_ratio)
+        
+#         batch.node_states[self.layer_id] = H_out
+#         return batch
+
+
+update_dict = {
+    'moving_average': MovingAverageUpdater,
+    'mlp': MLPUpdater,
+    'gru': GRUUpdater
+}
+
+# merge additional update modules in register.update_dict.
+update_dict = {**register.update_dict, **update_dict}

From 158d859cd4201a25253c90bafbc3cd34ce056454 Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Thu, 3 Jun 2021 19:53:02 -0700
Subject: [PATCH 06/66] add config for roland.

---
 graphgym/contrib/config/roland.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/graphgym/contrib/config/roland.py b/graphgym/contrib/config/roland.py
index a30c38fe..cf00a50b 100644
--- a/graphgym/contrib/config/roland.py
+++ b/graphgym/contrib/config/roland.py
@@ -18,8 +18,7 @@ def set_cfg_roland(cfg):
     # gru: new embedding = GRU(node_feature, old_embedding).
     # masked_gru: only apply GRU to active nodes.
     cfg.gnn.embed_update_method = 'moving_average'
-    # what kind of GRU kernel to use if GRU is required for embedding updating.
-    cfg.gnn.gru_kernel = 'linear'
+
     # how many layers to use in the MLP updater.
     # default: 1, use a simple linear layer.
     cfg.gnn.mlp_update_layers = 2
@@ -31,7 +30,7 @@ def set_cfg_roland(cfg):
     cfg.meta.is_meta = False
 
     # choose between 'moving_average' and 'online_mean'
-    cfg.meta.method = 'moving_average'
+    cfg.meta.method = 'moving_average'  # TODO: remove, only use moving_average.
     # For online mean:
     # new_mean = (n-1)/n * old_mean + 1/n * new_value.
     # where *_mean corresponds to W_init.

From e7467484037faacc137963b8044e414f77eea0be Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Sat, 5 Jun 2021 15:41:20 -0700
Subject: [PATCH 07/66] add residual edge convolution

---
 graphgym/contrib/layer/residual_edge_conv.py | 137 +++++++++++++++++++
 1 file changed, 137 insertions(+)
 create mode 100644 graphgym/contrib/layer/residual_edge_conv.py

diff --git a/graphgym/contrib/layer/residual_edge_conv.py b/graphgym/contrib/layer/residual_edge_conv.py
new file mode 100644
index 00000000..e3fa28d7
--- /dev/null
+++ b/graphgym/contrib/layer/residual_edge_conv.py
@@ -0,0 +1,137 @@
+import torch
+import torch.nn as nn
+from torch.nn import Parameter
+from torch_geometric.nn.conv import MessagePassing
+from torch_geometric.nn.inits import zeros
+from torch_geometric.utils import add_remaining_self_loops
+from torch_scatter import scatter_add
+
+from graphgym.config import cfg
+from graphgym.register import register_layer
+
+
+class ResidualEdgeConvLayer(MessagePassing):
+    r'''
+    A general GNN layer with arbitrary edge features and self residual
+    connections.
+    '''
+
+    def __init__(self, in_channels: int, out_channels: int,
+                 improved: bool = False, cached: bool = False, bias: bool = True,
+                 **kwargs):
+        super(ResidualEdgeConvLayer, self).__init__(aggr=cfg.gnn.agg, **kwargs)
+
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.improved = improved
+        self.cached = cached
+        self.normalize = cfg.gnn.normalize_adj
+        self.msg_direction = cfg.gnn.msg_direction
+
+        if self.msg_direction == 'single':
+            self.linear_msg = nn.Linear(in_channels + cfg.dataset.edge_dim,
+                                        out_channels, bias=False)
+        elif self.msg_direction == 'both':
+            self.linear_msg = nn.Linear(in_channels * 2 + cfg.dataset.edge_dim,
+                                        out_channels, bias=False)
+        else:
+            raise ValueError
+
+        if cfg.gnn.skip_connection == 'affine':
+            self.linear_skip = nn.Linear(in_channels, out_channels, bias=True)
+        elif cfg.gnn.skip_connection == 'identity':
+            assert self.in_channels == self.out_channels
+
+        if bias:
+            self.bias = Parameter(torch.Tensor(out_channels))
+        else:
+            self.register_parameter('bias', None)
+
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        zeros(self.bias)
+        self.cached_result = None
+        self.cached_num_edges = None
+
+    @staticmethod
+    def norm(edge_index, num_nodes, edge_weight=None, improved=False,
+             dtype=None):
+        if edge_weight is None:
+            edge_weight = torch.ones((edge_index.size(1),), dtype=dtype,
+                                     device=edge_index.device)
+
+        fill_value = 1 if not improved else 2
+        edge_index, edge_weight = add_remaining_self_loops(
+            edge_index, edge_weight, fill_value, num_nodes)
+
+        row, col = edge_index
+        deg = scatter_add(edge_weight, row, dim=0, dim_size=num_nodes)
+        deg_inv_sqrt = deg.pow(-0.5)
+        deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
+
+        return edge_index, deg_inv_sqrt[row] * edge_weight * deg_inv_sqrt[col]
+
+    def forward(self, x, edge_index, edge_weight=None, edge_feature=None):
+        if self.cached and self.cached_result is not None:
+            if edge_index.size(1) != self.cached_num_edges:
+                raise RuntimeError(
+                    'Cached {} number of edges, but found {}. Please '
+                    'disable the caching behavior of this layer by removing '
+                    'the `cached=True` argument in its constructor.'.format(
+                        self.cached_num_edges, edge_index.size(1)))
+
+        if not self.cached or self.cached_result is None:
+            self.cached_num_edges = edge_index.size(1)
+            if self.normalize:
+                edge_index, norm = self.norm(edge_index, x.size(self.node_dim),
+                                             edge_weight, self.improved,
+                                             x.dtype)
+            else:
+                norm = edge_weight
+            self.cached_result = edge_index, norm
+
+        edge_index, norm = self.cached_result
+        if cfg.gnn.skip_connection == 'affine':
+            skip_x = self.linear_skip(x)
+        elif cfg.gnn.skip_connection == 'identity':
+            skip_x = x
+        else:
+            skip_x = 0.0
+        return self.propagate(edge_index, x=x, norm=norm,
+                              edge_feature=edge_feature) + skip_x
+
+    def message(self, x_i, x_j, norm, edge_feature):
+        if self.msg_direction == 'both':
+            x_j = torch.cat((x_i, x_j, edge_feature), dim=-1)
+        elif self.msg_direction == 'single':
+            x_j = torch.cat((x_j, edge_feature), dim=-1)
+        else:
+            raise ValueError
+        x_j = self.linear_msg(x_j)
+        return norm.view(-1, 1) * x_j if norm is not None else x_j
+
+    def update(self, aggr_out):
+        if self.bias is not None:
+            aggr_out = aggr_out + self.bias
+        return aggr_out
+
+    def __repr__(self):
+        return '{}({}, {})'.format(self.__class__.__name__, self.in_channels,
+                                   self.out_channels)
+
+
+class ResidualEdgeConv(nn.Module):
+    '''Wrapper for residual edge conv layer'''
+
+    def __init__(self, dim_in, dim_out, bias=False, **kwargs):
+        super(ResidualEdgeConv, self).__init__()
+        self.model = ResidualEdgeConvLayer(dim_in, dim_out, bias=bias)
+
+    def forward(self, batch):
+        batch.node_feature = self.model(batch.node_feature, batch.edge_index,
+                                        edge_feature=batch.edge_feature)
+        return batch
+
+
+register_layer('residual_edge_conv', ResidualEdgeConv)

From 6d0cf0327d6fcbc9b85c57a840408c0f93b7a439 Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Sat, 5 Jun 2021 15:42:36 -0700
Subject: [PATCH 08/66] update

---
 graphgym/contrib/layer/residual_edge_conv.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/graphgym/contrib/layer/residual_edge_conv.py b/graphgym/contrib/layer/residual_edge_conv.py
index e3fa28d7..dc7a7ec0 100644
--- a/graphgym/contrib/layer/residual_edge_conv.py
+++ b/graphgym/contrib/layer/residual_edge_conv.py
@@ -11,10 +11,10 @@
 
 
 class ResidualEdgeConvLayer(MessagePassing):
-    r'''
+    """
     A general GNN layer with arbitrary edge features and self residual
     connections.
-    '''
+    """
 
     def __init__(self, in_channels: int, out_channels: int,
                  improved: bool = False, cached: bool = False, bias: bool = True,

From 55c384e63f9555a7476784158cd10648a1753cb4 Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Sat, 5 Jun 2021 15:48:15 -0700
Subject: [PATCH 09/66] add flie

---
 .../contrib/loader/dynamic_graph_utils.py     | 80 +++++++++++++++++++
 1 file changed, 80 insertions(+)
 create mode 100644 graphgym/contrib/loader/dynamic_graph_utils.py

diff --git a/graphgym/contrib/loader/dynamic_graph_utils.py b/graphgym/contrib/loader/dynamic_graph_utils.py
new file mode 100644
index 00000000..098b3f5e
--- /dev/null
+++ b/graphgym/contrib/loader/dynamic_graph_utils.py
@@ -0,0 +1,80 @@
+"""
+Helper functions and utilities for dynamic graphs.
+
+Mar. 31, 2021.
+"""
+import numpy as np
+import pandas as pd
+from deepsnap.graph import Graph
+from typing import List
+
+
+def make_graph_snapshot(g_all: Graph,
+                        snapshot_freq: str,
+                        is_hetero: bool=False) -> List[Graph]:
+    """
+    Constructs a list of graph snapshots based from g_all using g_all.edge_time
+    and provided snapshot_freq (frequency on calendar).
+
+    Args:
+        g_all: the entire graph object, g_all must have a edge_time attribute,
+            g_all.edge_time consists of unix timestamp of edge time.
+        snapshot_freq: snapshot frequency, must be one of
+            'D': daily, 'W': weekly, and 'M': monthly.
+        is_hetero: whether the graph is heterogeneous.
+
+    Return:
+        A list of graph object, each graph snapshot has edge level information
+            (edge_feature, edge_time, etc) of only edges in that time period.
+            However, every graph snapshot has the same and full node level
+            information (node_feature, node_type, etc).
+    """
+    # Arg check.
+    if not hasattr(g_all, 'edge_time'):
+        raise KeyError('Temporal graph needs to have edge_time attribute.')
+
+    if snapshot_freq.upper() not in ['D', 'W', 'M']:
+        raise ValueError(f'Unsupported snapshot freq: {snapshot_freq}.')
+
+    snapshot_freq = snapshot_freq.upper()
+    t = g_all.edge_time.numpy().astype(np.int64)  # all timestamps.
+
+    period_split = pd.DataFrame(
+        {'Timestamp': t, 'TransactionTime': pd.to_datetime(t, unit='s')},
+        index=range(len(g_all.edge_time))
+    )
+
+    freq_map = {'D': '%j',  # day of year.
+                'W': '%W',  # week of year.
+                'M': '%m'}  # month of year.
+
+    period_split['Year'] = period_split['TransactionTime'].dt.strftime(
+        '%Y').astype(int)
+
+    period_split['SubYearFlag'] = period_split['TransactionTime'].dt.strftime(
+        freq_map[snapshot_freq]).astype(int)
+
+    period2id = period_split.groupby(['Year', 'SubYearFlag']).indices
+    # e.g., dictionary w/ key = (2021, 3) and val = array(edge IDs).
+
+    periods = sorted(list(period2id.keys()))  # ascending order.
+    # alternatively, sorted(..., key=lambda x: x[0] + x[1]/1000).
+    snapshot_list = list()
+    for p in periods:
+        # unique IDs of edges in this period.
+        period_members = period2id[p]
+
+        g_incr = Graph(
+            node_feature=g_all.node_feature,
+            edge_feature=g_all.edge_feature[period_members, :],
+            edge_index=g_all.edge_index[:, period_members],
+            edge_time=g_all.edge_time[period_members],
+            directed=g_all.directed,
+            list_n_type=g_all.list_n_type if is_hetero else None,
+            list_e_type=g_all.list_e_type if is_hetero else None,
+        )
+        if is_hetero and hasattr(g_all, 'node_type'):
+            g_incr.node_type = g_all.node_type
+            g_incr.edge_type = g_all.edge_type[period_members]
+        snapshot_list.append(g_incr)
+    return snapshot_list

From 505e017a6f20ad1893fd509d8a63f3a69f76c35a Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Sat, 5 Jun 2021 15:53:48 -0700
Subject: [PATCH 10/66] add method make_graph_snapshot_by_seconds

---
 .../contrib/loader/dynamic_graph_utils.py     | 25 ++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/graphgym/contrib/loader/dynamic_graph_utils.py b/graphgym/contrib/loader/dynamic_graph_utils.py
index 098b3f5e..448401ea 100644
--- a/graphgym/contrib/loader/dynamic_graph_utils.py
+++ b/graphgym/contrib/loader/dynamic_graph_utils.py
@@ -3,10 +3,12 @@
 
 Mar. 31, 2021.
 """
+from typing import List
+
 import numpy as np
 import pandas as pd
+import torch
 from deepsnap.graph import Graph
-from typing import List
 
 
 def make_graph_snapshot(g_all: Graph,
@@ -78,3 +80,24 @@ def make_graph_snapshot(g_all: Graph,
             g_incr.edge_type = g_all.edge_type[period_members]
         snapshot_list.append(g_incr)
     return snapshot_list
+
+
+def make_graph_snapshot_by_seconds(g_all: Graph,
+                                   freq_sec: int) -> List[Graph]:
+    """
+    Split the entire graph into snapshots by frequency in terms of seconds.
+    """
+    split_criterion = g_all.edge_time // freq_sec
+    groups = torch.sort(torch.unique(split_criterion))[0]
+    snapshot_list = list()
+    for t in groups:
+        period_members = (split_criterion == t)
+        g_incr = Graph(
+            node_feature=g_all.node_feature,
+            edge_feature=g_all.edge_feature[period_members, :],
+            edge_index=g_all.edge_index[:, period_members],
+            edge_time=g_all.edge_time[period_members],
+            directed=g_all.directed
+        )
+        snapshot_list.append(g_incr)
+    return snapshot_list

From c22f1a22a5d2ced5eacabe838b5fbb60838d7d77 Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Sat, 5 Jun 2021 15:54:49 -0700
Subject: [PATCH 11/66] add loader for UCI message.

---
 graphgym/contrib/loader/roland_ucimsg.py | 112 +++++++++++++++++++++++
 1 file changed, 112 insertions(+)
 create mode 100644 graphgym/contrib/loader/roland_ucimsg.py

diff --git a/graphgym/contrib/loader/roland_ucimsg.py b/graphgym/contrib/loader/roland_ucimsg.py
new file mode 100644
index 00000000..6912be75
--- /dev/null
+++ b/graphgym/contrib/loader/roland_ucimsg.py
@@ -0,0 +1,112 @@
+"""
+Loader for the CollegeMsg temporal network.
+
+For more information: https://snap.stanford.edu/data/CollegeMsg.html
+
+Mar. 31, 2021
+"""
+import os
+from typing import List, Union
+
+import deepsnap
+import numpy as np
+import pandas as pd
+import torch
+from deepsnap.graph import Graph
+from sklearn.preprocessing import MinMaxScaler
+
+from graphgym.config import cfg
+import graphgym.contrib.loader.dynamic_graph_utils as utils
+from graphgym.register import register_loader
+
+
+def load_single_dataset(dataset_dir: str) -> Graph:
+    df_trans = pd.read_csv(dataset_dir, sep=' ', header=None)
+    df_trans.columns = ['SRC', 'DST', 'TIMESTAMP']
+    assert not np.any(pd.isna(df_trans).values)
+    df_trans.reset_index(drop=True, inplace=True)
+
+    # Node IDs of this dataset start from 1, re-index to 0-based.
+    df_trans['SRC'] -= 1
+    df_trans['DST'] -= 1
+
+    print('num of edges:', len(df_trans))
+    print('num of nodes:', np.max(df_trans[['SRC', 'DST']].values) + 1)
+
+    time_scaler = MinMaxScaler((0, 2))
+    df_trans['TimestampScaled'] = time_scaler.fit_transform(
+        df_trans['TIMESTAMP'].values.reshape(-1, 1))
+
+    edge_feature = torch.Tensor(
+        df_trans[['TimestampScaled']].values).view(-1, 1)
+    edge_index = torch.Tensor(
+        df_trans[['SRC', 'DST']].values.transpose()).long()  # (2, E)
+    num_nodes = torch.max(edge_index) + 1
+
+    node_feature = torch.ones(num_nodes, 1)
+
+    print('feature_node_int_num: ', node_feature.max() + 1)
+
+    edge_time = torch.FloatTensor(df_trans['TIMESTAMP'].values)
+
+    graph = Graph(
+        node_feature=node_feature,
+        edge_feature=edge_feature,
+        edge_index=edge_index,
+        edge_time=edge_time,
+        directed=True
+    )
+
+    return graph
+
+
+def load_snapshots(dataset_dir: str,
+                   snapshot: bool = True,
+                   snapshot_freq: str = None
+                   ) -> Union[deepsnap.graph.Graph,
+                              List[deepsnap.graph.Graph]]:
+    g_all = load_single_dataset(dataset_dir)
+    if not snapshot:
+        return g_all
+    if snapshot_freq.upper() not in ['D', 'W', 'M']:
+        # format: '1200000s'
+        assert snapshot_freq.endswith('s')
+        freq = int(snapshot_freq.strip('s'))
+        snapshot_list = utils.make_graph_snapshot_by_seconds(g_all, freq)
+    else:
+        snapshot_list = utils.make_graph_snapshot(g_all, snapshot_freq,
+                                                  is_hetero=False)
+
+    num_nodes = g_all.edge_index.max() + 1
+
+    for g_snapshot in snapshot_list:
+        g_snapshot.node_states = [0 for _ in range(cfg.gnn.layers_mp)]
+        g_snapshot.node_cells = [0 for _ in range(cfg.gnn.layers_mp)]
+        g_snapshot.node_degree_existing = torch.zeros(num_nodes)
+
+    return snapshot_list
+
+
+def load_uci_dataset(format, name, dataset_dir):
+    if format == 'uci_message':
+        graphs = load_snapshots(os.path.join(dataset_dir, name),
+                                snapshot=cfg.transaction.snapshot,
+                                snapshot_freq=cfg.transaction.snapshot_freq)
+        if cfg.dataset.split_method == 'chronological_temporal':
+            # return graphs with enough number of edges.
+            filtered_graphs = list()
+            for g in graphs:
+                if g.num_edges >= 2:
+                    filtered_graphs.append(g)
+            return filtered_graphs
+        else:
+            # The default split (80-10-10) requires at least 10 edges each
+            # snapshot.
+            filtered_graphs = list()
+            for g in graphs:
+                if g.num_edges >= 10:
+                    filtered_graphs.append(g)
+            return filtered_graphs
+
+
+register_loader('roland_uci_message', load_uci_dataset)

From 238c4772722383944934fbb5d11671cee0d35e2a Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Sat, 5 Jun 2021 15:57:39 -0700
Subject: [PATCH 12/66] remove print

---
 graphgym/contrib/loader/roland_ucimsg.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/graphgym/contrib/loader/roland_ucimsg.py b/graphgym/contrib/loader/roland_ucimsg.py
index 6912be75..6ac2b9fc 100644
--- a/graphgym/contrib/loader/roland_ucimsg.py
+++ b/graphgym/contrib/loader/roland_ucimsg.py
@@ -45,8 +45,6 @@ def load_single_dataset(dataset_dir: str) -> Graph:
 
     node_feature = torch.ones(num_nodes, 1)
 
-    print('feature_node_int_num: ', node_feature.max() + 1)
-
     edge_time = torch.FloatTensor(df_trans['TIMESTAMP'].values)
 
     graph = Graph(

From 5c05fa481f61af8fe74215afe96d0b39b4039a50 Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Sat, 5 Jun 2021 16:57:57 -0700
Subject: [PATCH 13/66] add loader

---
 graphgym/contrib/loader/roland_as.py  | 167 +++++++++++++++++++++++
 graphgym/contrib/loader/roland_btc.py | 182 ++++++++++++++++++++++++++
 2 files changed, 349 insertions(+)
 create mode 100644 graphgym/contrib/loader/roland_as.py
 create mode 100644 graphgym/contrib/loader/roland_btc.py

diff --git a/graphgym/contrib/loader/roland_as.py b/graphgym/contrib/loader/roland_as.py
new file mode 100644
index 00000000..4cab81ad
--- /dev/null
+++ b/graphgym/contrib/loader/roland_as.py
@@ -0,0 +1,167 @@
+"""
+Loader for the Autonomous systems AS-733 dataset.
+"""
+import os
+from datetime import datetime
+from typing import List
+
+import numpy as np
+import pandas as pd
+import torch
+from deepsnap.graph import Graph
+from graphgym.config import cfg
+from graphgym.register import register_loader
+from sklearn.preprocessing import OrdinalEncoder
+from tqdm import tqdm
+
+
+def make_graph_snapshot(g_all: Graph, snapshot_freq: str) -> List[Graph]:
+    t = g_all.edge_time.numpy().astype(np.int64)
+    snapshot_freq = snapshot_freq.upper()
+
+    period_split = pd.DataFrame(
+        {'Timestamp': t,
+         'TransactionTime': pd.to_datetime(t, unit='s')},
+        index=range(len(g_all.edge_time)))
+
+    freq_map = {'D': '%j',  # day of year.
+                'W': '%W',  # week of year.
+                'M': '%m'  # month of year.
+                }
+
+    period_split['Year'] = period_split['TransactionTime'].dt.strftime(
+        '%Y').astype(int)
+
+    period_split['SubYearFlag'] = period_split['TransactionTime'].dt.strftime(
+        freq_map[snapshot_freq]).astype(int)
+
+    period2id = period_split.groupby(['Year', 'SubYearFlag']).indices
+
+    periods = sorted(list(period2id.keys()))
+    snapshot_list = list()
+
+    for p in periods:
+        # unique IDs of edges in this period.
+        period_members = period2id[p]
+        assert np.all(period_members == np.unique(period_members))
+
+        g_incr = Graph(
+            node_feature=g_all.node_feature,
+            edge_feature=g_all.edge_feature[period_members, :],
+            edge_index=g_all.edge_index[:, period_members],
+            edge_time=g_all.edge_time[period_members],
+            directed=g_all.directed
+        )
+        snapshot_list.append(g_incr)
+
+    snapshot_list.sort(key=lambda x: torch.min(x.edge_time))
+
+    return snapshot_list
+
+
+def file2timestamp(file_name):
+    t = file_name.strip('.txt').strip('as')
+    ts = int(datetime.strptime(t, '%Y%m%d').timestamp())
+    return ts
+
+
+def load_generic_dataset(format, name, dataset_dir):
+    if format == 'as':
+        all_files = [x for x in sorted(os.listdir(dataset_dir))
+                     if (x.startswith('as') and x.endswith('.txt'))]
+        assert len(all_files) == 733
+        assert all(x.endswith('.txt') for x in all_files)
+
+        edge_index_lst, edge_time_lst = list(), list()
+        all_files = sorted(all_files)
+        # if cfg.train.mode in ['baseline', 'baseline_v2', 'live_update_fixed_split']:
+        #     # The baseline setting in EvolveGCN paper only uses 100 snapshots.
+        #     all_files = all_files[:100]
+        for graph_file in tqdm(all_files):
+            today = file2timestamp(graph_file)
+            graph_file = os.path.join(dataset_dir, graph_file)
+
+            src, dst = list(), list()
+            with open(graph_file, 'r') as f:
+                for line in f.readlines():
+                    if line.startswith('#'):
+                        continue
+                    line = line.strip('\n')
+                    v1, v2 = line.split('\t')
+                    src.append(int(v1))
+                    dst.append(int(v2))
+
+            edge_index = np.stack((src, dst))
+            edge_index_lst.append(edge_index)
+
+            edge_time = np.ones(edge_index.shape[1]) * today
+            edge_time_lst.append(edge_time)
+
+        edge_index_raw = np.concatenate(edge_index_lst, axis=1).astype(int)
+
+        num_nodes = len(np.unique(edge_index_raw))
+
+        # encode node indices to consecutive integers.
+        node_indices = np.sort(np.unique(edge_index_raw))
+        enc = OrdinalEncoder(categories=[node_indices, node_indices])
+        edge_index = enc.fit_transform(edge_index_raw.transpose()).transpose()
+        edge_index = torch.Tensor(edge_index).long()
+        edge_time = torch.Tensor(np.concatenate(edge_time_lst))
+
+        # Use scaled datetime as edge_feature.
+        scale = edge_time.max() - edge_time.min()
+        base = edge_time.min()
+        scaled_edge_time = 2 * (edge_time.clone() - base) / scale
+        
+        assert cfg.dataset.AS_node_feature in ['one', 'one_hot_id',
+                                               'one_hot_degree_global',
+                                               'one_hot_degree_local']
+
+        if cfg.dataset.AS_node_feature == 'one':
+            node_feature = torch.ones(num_nodes, 1)
+        elif cfg.dataset.AS_node_feature == 'one_hot_id':
+            # One hot encoding the node ID.
+            node_feature = torch.Tensor(np.eye(num_nodes))
+        elif cfg.dataset.AS_node_feature == 'one_hot_degree_global':
+            # undirected graph, use only out degree.
+            _, node_degree = torch.unique(edge_index[0], sorted=True,
+                                          return_counts=True)
+            node_feature = np.zeros((num_nodes, node_degree.max() + 1))
+            node_feature[np.arange(num_nodes), node_degree] = 1
+            # 1 ~ 63748 degrees, but only 710 possible levels, exclude all zero
+            # columns.
+            non_zero_cols = (node_feature.sum(axis=0) > 0)
+            node_feature = node_feature[:, non_zero_cols]
+            node_feature = torch.Tensor(node_feature)
+        else:
+            raise NotImplementedError
+
+        g_all = Graph(
+            node_feature=node_feature,
+            edge_feature=scaled_edge_time.reshape(-1, 1),
+            edge_index=edge_index,
+            edge_time=edge_time,
+            directed=True
+        )
+
+        snapshot_list = make_graph_snapshot(g_all,
+                                            cfg.transaction.snapshot_freq)
+
+        for g_snapshot in snapshot_list:
+            g_snapshot.node_states = [0 for _ in range(cfg.gnn.layers_mp)]
+            g_snapshot.node_cells = [0 for _ in range(cfg.gnn.layers_mp)]
+            g_snapshot.node_degree_existing = torch.zeros(num_nodes)
+
+        if cfg.dataset.split_method == 'chronological_temporal':
+            return snapshot_list
+        else:
+            # The default split (80-10-10) requires at least 10 edges each
+            # snapshot.
+            filtered_graphs = list()
+            for g in tqdm(snapshot_list):
+                if g.num_edges >= 10:
+                    filtered_graphs.append(g)
+            return filtered_graphs
+
+
+register_loader('roland_as', load_generic_dataset)
diff --git a/graphgym/contrib/loader/roland_btc.py b/graphgym/contrib/loader/roland_btc.py
new file mode 100644
index 00000000..58a9884d
--- /dev/null
+++ b/graphgym/contrib/loader/roland_btc.py
@@ -0,0 +1,182 @@
+"""
+Data loader for bitcoin datasets.
+Mar. 27, 2021
+"""
+import os
+from typing import List, Union
+
+import deepsnap
+import graphgym.contrib.loader.dynamic_graph_utils as utils
+import numpy as np
+import pandas as pd
+import torch
+from deepsnap.graph import Graph
+from graphgym.config import cfg
+from graphgym.register import register_loader
+from sklearn.preprocessing import MinMaxScaler, OrdinalEncoder
+
+
+def load_single_dataset(dataset_dir: str) -> Graph:
+    df_trans = pd.read_csv(dataset_dir, sep=',', header=None, index_col=None)
+    df_trans.columns = ['SOURCE', 'TARGET', 'RATING', 'TIME']
+    # NOTE: 'SOURCE' and 'TARGET' are not consecutive.
+    num_nodes = len(
+        pd.unique(df_trans[['SOURCE', 'TARGET']].to_numpy().ravel()))
+
+    # bitcoin OTC contains decimal numbers, round them.
+    df_trans['TIME'] = df_trans['TIME'].astype(np.int).astype(np.float)
+    assert not np.any(pd.isna(df_trans).values)
+
+    time_scaler = MinMaxScaler((0, 2))
+    df_trans['TimestampScaled'] = time_scaler.fit_transform(
+        df_trans['TIME'].values.reshape(-1, 1))
+
+    edge_feature = torch.Tensor(
+        df_trans[['RATING', 'TimestampScaled']].values)  # (E, edge_dim)
+    # SOURCE and TARGET IDs are already encoded in the csv file.
+    # edge_index = torch.Tensor(
+    #     df_trans[['SOURCE', 'TARGET']].values.transpose()).long()  # (2, E)
+
+    node_indices = np.sort(
+        pd.unique(df_trans[['SOURCE', 'TARGET']].to_numpy().ravel()))
+    enc = OrdinalEncoder(categories=[node_indices, node_indices])
+    raw_edges = df_trans[['SOURCE', 'TARGET']].values
+    edge_index = enc.fit_transform(raw_edges).transpose()
+    edge_index = torch.LongTensor(edge_index)
+
+    # num_nodes = torch.max(edge_index) + 1
+    # Use dummy node features.
+    node_feature = torch.ones(num_nodes, 1).float()
+
+    edge_time = torch.FloatTensor(df_trans['TIME'].values)
+
+    # TODO: add option here.
+    # if cfg.train.mode in ['baseline', 'baseline_v2', 'live_update_fixed_split']:
+    #     edge_feature = torch.cat((edge_feature, edge_feature.clone()), dim=0)
+    #     reversed_idx = torch.stack([edge_index[1], edge_index[0]]).clone()
+    #     edge_index = torch.cat((edge_index, reversed_idx), dim=1)
+    #     edge_time = torch.cat((edge_time, edge_time.clone()))
+
+    graph = Graph(
+        node_feature=node_feature,
+        edge_feature=edge_feature,
+        edge_index=edge_index,
+        edge_time=edge_time,
+        directed=True
+    )
+    return graph
+
+
+# def make_graph_snapshot(g_all: Graph, snapshot_freq: str) -> List[Graph]:
+#     t = g_all.edge_time.numpy().astype(np.int64)
+#     snapshot_freq = snapshot_freq.upper()
+
+#     period_split = pd.DataFrame(
+#         {'Timestamp': t,
+#          'TransactionTime': pd.to_datetime(t, unit='s')},
+#         index=range(len(g_all.edge_time)))
+
+#     freq_map = {'D': '%j',  # day of year.
+#                 'W': '%W',  # week of year.
+#                 'M': '%m'  # month of year.
+#                 }
+
+#     period_split['Year'] = period_split['TransactionTime'].dt.strftime(
+#         '%Y').astype(int)
+
+#     period_split['SubYearFlag'] = period_split['TransactionTime'].dt.strftime(
+#         freq_map[snapshot_freq]).astype(int)
+
+#     period2id = period_split.groupby(['Year', 'SubYearFlag']).indices
+
+#     periods = sorted(list(period2id.keys()))
+#     snapshot_list = list()
+
+#     for p in periods:
+#         # unique IDs of edges in this period.
+#         period_members = period2id[p]
+#         assert np.all(period_members == np.unique(period_members))
+
+#         g_incr = Graph(
+#             node_feature=g_all.node_feature,
+#             edge_feature=g_all.edge_feature[period_members, :],
+#             edge_index=g_all.edge_index[:, period_members],
+#             edge_time=g_all.edge_time[period_members],
+#             directed=g_all.directed
+#         )
+#         snapshot_list.append(g_incr)
+
+#     snapshot_list.sort(key=lambda x: torch.min(x.edge_time))
+
+#     return snapshot_list
+
+
+# def split_by_seconds(g_all, freq_sec: int):
+#     # Split the entire graph into snapshots.
+#     split_criterion = g_all.edge_time // freq_sec
+#     groups = torch.sort(torch.unique(split_criterion))[0]
+#     snapshot_list = list()
+#     for t in groups:
+#         period_members = (split_criterion == t)
+#         g_incr = Graph(
+#             node_feature=g_all.node_feature,
+#             edge_feature=g_all.edge_feature[period_members, :],
+#             edge_index=g_all.edge_index[:, period_members],
+#             edge_time=g_all.edge_time[period_members],
+#             directed=g_all.directed
+#         )
+#         snapshot_list.append(g_incr)
+#     return snapshot_list
+
+# TODO: merge these two method.
+def load_snapshots(dataset_dir: str,
+                   snapshot: bool = True,
+                   snapshot_freq: str = None
+                   ) -> Union[deepsnap.graph.Graph,
+                              List[deepsnap.graph.Graph]]:
+    g_all = load_single_dataset(dataset_dir)
+    if not snapshot:
+        return g_all
+
+    if snapshot_freq.upper() not in ['D', 'W', 'M']:
+        # format: '1200000s'
+        # assume split by seconds (timestamp) as in EvolveGCN paper.
+        freq = int(snapshot_freq.strip('s'))
+        snapshot_list = utils.make_graph_snapshot_by_seconds(g_all, freq)
+    else:
+        snapshot_list = utils.make_graph_snapshot(g_all, snapshot_freq)
+    num_nodes = g_all.edge_index.max() + 1
+
+    for g_snapshot in snapshot_list:
+        g_snapshot.node_states = [0 for _ in range(cfg.gnn.layers_mp)]
+        g_snapshot.node_cells = [0 for _ in range(cfg.gnn.layers_mp)]
+        g_snapshot.node_degree_existing = torch.zeros(num_nodes)
+
+    # check snapshots ordering.
+    prev_end = -1
+    for g in snapshot_list:
+        start, end = torch.min(g.edge_time), torch.max(g.edge_time)
+        assert prev_end < start <= end
+        prev_end = end
+
+    return snapshot_list
+
+
+def load_btc_dataset(format: str, name: str, dataset_dir: str):
+    if format == 'bitcoin':
+        graphs = load_snapshots(os.path.join(dataset_dir, name),
+                                snapshot=cfg.transaction.snapshot,
+                                snapshot_freq=cfg.transaction.snapshot_freq)
+        if cfg.dataset.split_method == 'chronological_temporal':
+            return graphs
+        else:
+            # The default split (80-10-10) requires at least 10 edges each
+            # snapshot.
+            filtered_graphs = list()
+            for g in graphs:
+                if g.num_edges >= 10:
+                    filtered_graphs.append(g)
+            return filtered_graphs
+
+
+register_loader('roland_btc', load_btc_dataset)

From c60761fdd0d14bc4373fdc8dd37ea5c10941c478 Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Sat, 5 Jun 2021 17:01:14 -0700
Subject: [PATCH 14/66] add

---
 graphgym/contrib/loader/roland_reddit.py | 174 +++++++++++++++++++++++
 1 file changed, 174 insertions(+)
 create mode 100644 graphgym/contrib/loader/roland_reddit.py

diff --git a/graphgym/contrib/loader/roland_reddit.py b/graphgym/contrib/loader/roland_reddit.py
new file mode 100644
index 00000000..37d7e66d
--- /dev/null
+++ b/graphgym/contrib/loader/roland_reddit.py
@@ -0,0 +1,174 @@
+import os
+from typing import List, Union
+
+import dask.dataframe as dd
+import deepsnap
+import graphgym.contrib.loader.dynamic_graph_utils as utils
+import numpy as np
+import pandas as pd
+import torch
+from dask_ml.preprocessing import OrdinalEncoder
+from deepsnap.graph import Graph
+from graphgym.config import cfg
+from graphgym.register import register_loader
+from sklearn.preprocessing import MinMaxScaler
+
+
+def load_single_dataset(dataset_dir: str) -> Graph:
+    df_trans = dd.read_csv(dataset_dir, sep='\t', low_memory=False)
+    df_trans = df_trans.compute()
+    assert not np.any(pd.isna(df_trans).values)
+    df_trans.reset_index(drop=True, inplace=True)  # required for dask.
+
+    # Encode src and dst node IDs.
+    # get unique values of src and dst.
+    unique_subreddits = pd.unique(
+        df_trans[['SOURCE_SUBREDDIT', 'TARGET_SUBREDDIT']].to_numpy().ravel())
+    unique_subreddits = np.sort(unique_subreddits)
+    cate_type = pd.api.types.CategoricalDtype(categories=unique_subreddits,
+                                              ordered=True)
+    df_trans['SOURCE_SUBREDDIT'] = df_trans['SOURCE_SUBREDDIT'].astype(
+        cate_type)
+    df_trans['TARGET_SUBREDDIT'] = df_trans['TARGET_SUBREDDIT'].astype(
+        cate_type)
+    enc = OrdinalEncoder(columns=['SOURCE_SUBREDDIT', 'TARGET_SUBREDDIT'])
+    df_encoded = enc.fit_transform(df_trans)
+    df_encoded.reset_index(drop=True, inplace=True)
+
+    # Add node feature from the embedding dataset.
+    node_embedding_dir = os.path.join(cfg.dataset.dir,
+                                      'web-redditEmbeddings-subreddits.csv')
+
+    # index: subreddit name, values: embedding.
+    df_node = pd.read_csv(node_embedding_dir, header=None, index_col=0)
+
+    # ordinal encoding follows order in unique_subreddits.
+    # df_encoded['SOURCE_SUBREDDIT'] contains encoded integral values.
+    # unique_subreddits[df_encoded['SOURCE_SUBREDDIT']]
+    # tries to reverse encoded_integer --> original subreddit name.
+    # check if recovered sub-reddit name matched the raw data.
+    for col in ['SOURCE_SUBREDDIT', 'TARGET_SUBREDDIT']:
+        assert all(unique_subreddits[df_encoded[col]] == df_trans[col])
+
+    num_nodes = len(cate_type.categories)
+    node_feature = torch.ones(size=(num_nodes, 300))
+    # for nodes without precomputed embedding, use the average value.
+    node_feature = node_feature * np.mean(df_node.values)
+
+    # cate_type.categories[i] is encoded to i, by construction.
+    for i, subreddit in enumerate(cate_type.categories):
+        if subreddit in df_node.index:
+            embedding = df_node.loc[subreddit]
+            node_feature[i, :] = torch.Tensor(embedding.values)
+
+    # Original format: df['TIMESTAMP'][0] = '2013-12-31 16:39:18'
+    # Convert to unix timestamp (integers).
+    df_encoded['TIMESTAMP'] = pd.to_datetime(df_encoded['TIMESTAMP'],
+                                             format='%Y-%m-%d %H:%M:%S')
+    df_encoded['TIMESTAMP'] = (df_encoded['TIMESTAMP'] - pd.Timestamp(
+        '1970-01-01')) // pd.Timedelta('1s')  # now integers.
+
+    # Scale edge time.
+    time_scaler = MinMaxScaler((0, 2))
+    df_encoded['TimestampScaled'] = time_scaler.fit_transform(
+        df_encoded['TIMESTAMP'].values.reshape(-1, 1))
+
+    # Link sentimental representation (86-dimension).
+    # comma-separated string: '3.1,5.1,0.0,...'
+    senti_str_lst = df_encoded['PROPERTIES'].values
+    edge_senti_embedding = [x.split(',') for x in senti_str_lst]
+    edge_senti_embedding = np.array(edge_senti_embedding).astype(np.float32)
+    # (E, 86)
+
+    ef = df_encoded[['TimestampScaled', 'LINK_SENTIMENT']].values
+    edge_feature = np.concatenate([ef, edge_senti_embedding], axis=1)
+    edge_feature = torch.Tensor(edge_feature).float()  # (E, 88)
+
+    edge_index = torch.Tensor(
+        df_encoded[['SOURCE_SUBREDDIT',
+                    'TARGET_SUBREDDIT']].values.transpose()).long()  # (2, E)
+    num_nodes = torch.max(edge_index) + 1
+
+    edge_time = torch.FloatTensor(df_encoded['TIMESTAMP'].values)
+
+    graph = Graph(
+        node_feature=node_feature,
+        edge_feature=edge_feature,
+        edge_index=edge_index,
+        edge_time=edge_time,
+        directed=True
+    )
+
+    return graph
+
+
+# def make_graph_snapshot(g_all: Graph, snapshot_freq: str) -> list:
+#     t = g_all.edge_time.numpy().astype(np.int64)
+#     snapshot_freq = snapshot_freq.upper()
+
+#     period_split = pd.DataFrame(
+#         {'Timestamp': t,
+#          'TransactionTime': pd.to_datetime(t, unit='s')},
+#         index=range(len(g_all.edge_time)))
+
+#     freq_map = {'D': '%j',  # day of year.
+#                 'W': '%W',  # week of year.
+#                 'M': '%m'  # month of year.
+#                 }
+
+#     period_split['Year'] = period_split['TransactionTime'].dt.strftime(
+#         '%Y').astype(int)
+
+#     period_split['SubYearFlag'] = period_split['TransactionTime'].dt.strftime(
+#         freq_map[snapshot_freq]).astype(int)
+
+#     period2id = period_split.groupby(['Year', 'SubYearFlag']).indices
+#     # e.g., dictionary w/ key = (2021, 3) and val = array(edges).
+
+#     periods = sorted(list(period2id.keys()))
+#     snapshot_list = list()
+#     for p in periods:
+#         # unique IDs of edges in this period.
+#         period_members = period2id[p]
+#         assert np.all(period_members == np.unique(period_members))
+
+#         g_incr = Graph(
+#             node_feature=g_all.node_feature,
+#             edge_feature=g_all.edge_feature[period_members, :],
+#             edge_index=g_all.edge_index[:, period_members],
+#             edge_time=g_all.edge_time[period_members],
+#             directed=g_all.directed
+#         )
+#         snapshot_list.append(g_incr)
+#     return snapshot_list
+
+
+def load_generic(dataset_dir: str,
+                 snapshot: bool = True,
+                 snapshot_freq: str = None
+                 ) -> Union[deepsnap.graph.Graph,
+                            List[deepsnap.graph.Graph]]:
+    g_all = load_single_dataset(dataset_dir)
+    if not snapshot:
+        return g_all
+    else:
+        snapshot_list = utils.make_graph_snapshot(g_all, snapshot_freq)
+        num_nodes = g_all.edge_index.max() + 1
+
+        for g_snapshot in snapshot_list:
+            g_snapshot.node_states = [0 for _ in range(cfg.gnn.layers_mp)]
+            g_snapshot.node_cells = [0 for _ in range(cfg.gnn.layers_mp)]
+            g_snapshot.node_degree_existing = torch.zeros(num_nodes)
+
+        return snapshot_list
+
+
+def load_generic_dataset(format, name, dataset_dir):
+    if format == 'reddit_hyperlink':
+        graphs = load_generic(os.path.join(dataset_dir, name),
+                              snapshot=cfg.transaction.snapshot,
+                              snapshot_freq=cfg.transaction.snapshot_freq)
+        return graphs
+
+
+register_loader('roland_reddit_hyperlink', load_generic_dataset)

From e4c8173a102965caffff479fd86706dbd7eacc11 Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Sat, 5 Jun 2021 17:03:18 -0700
Subject: [PATCH 15/66] add

---
 graphgym/contrib/loader/roland_bsi_v3.py | 339 +++++++++++++++++++++++
 1 file changed, 339 insertions(+)
 create mode 100644 graphgym/contrib/loader/roland_bsi_v3.py

diff --git a/graphgym/contrib/loader/roland_bsi_v3.py b/graphgym/contrib/loader/roland_bsi_v3.py
new file mode 100644
index 00000000..93683931
--- /dev/null
+++ b/graphgym/contrib/loader/roland_bsi_v3.py
@@ -0,0 +1,339 @@
+"""
+A refined version for loading the roland dataset. This version has the
+following key points:
+
+(1) Node's features are determined by their first transaction, so that
+    payer and payee information are no longer included as a edge features.
+
+    Node features include:
+        company identity, bank, country, region, Skd, SkdL1, SkdL2, Skis,
+        SkisL1, SkisL2.
+
+(2) edge features include: # system, currency, scaled amount (EUR), and
+    scaled timestamp.
+
+Mar. 31, 2021
+"""
+import os
+from typing import List, Union
+
+import dask.dataframe as dd
+import deepsnap
+import graphgym.contrib.loader.dynamic_graph_utils as utils
+import numpy as np
+import pandas as pd
+import torch
+from dask_ml.preprocessing import OrdinalEncoder
+from deepsnap.graph import Graph
+from graphgym.config import cfg
+from graphgym.register import register_loader
+from sklearn.preprocessing import MinMaxScaler
+from sklearn.preprocessing import OrdinalEncoder as SkOrdinalEncoder
+
+# =============================================================================
+# Configure and instantiate the loader here.
+# =============================================================================
+# Required for all graphs.
+SRC_NODE: str = 'Payer'
+DST_NODE: str = 'Payee'
+TIMESTAMP: str = 'Timestamp'
+AMOUNT: str = 'AmountEUR'
+
+# Categorical columns are SRC_NODE+var and DST_NODE+var.
+# columns: SRC_NODE + NODE_CATE_VARS, DST_NODE + NODE_CATE_VARS, EDGE_CATE_VARS
+# will be encoded using ordinal encoder.
+# Note that '' corresponds to columns SRC_NODE and DST_NODE.
+NODE_CATE_VARS: List[str] = ['', 'Bank', 'Country', 'Region', 'Skd', 'SkdL1',
+                             'SkdL2', 'Skis', 'SkisL1', 'SkisL2']
+EDGE_CATE_VARS: List[str] = ['# System', 'Currency']
+
+# contents of graph.edge_feature
+EDGE_FEATURE_COLS: List[str] = [AMOUNT, 'TimestampScaled']
+# contents of graph.node_feature
+NODE_FEATURE_LIST: List[str] = ['Bank', 'Country', 'Region', 'SkdL1', 'SkisL1']
+
+# Required for heterogeneous graphs only.
+# Node and edge features used to define node and edge type in hete GNN.
+NODE_TYPE_DEFN: List[str] = ['Country']
+EDGE_TYPE_DEFN: List[str] = ['# System']
+
+
+# Required for graphs with node features only.
+
+def get_node_feature(df: pd.DataFrame) -> pd.DataFrame:
+    """Extract node features from a transaction dataset.
+    """
+    temp = list()
+    for p in [SRC_NODE, DST_NODE]:
+        # require ['Payer', 'PayerBank', 'PayerCountry', ...]
+        cols = [p] + [p + var for var in NODE_FEATURE_LIST]
+        relevant = df[cols].copy()
+        # rename to ['Company', 'Bank', 'Country', ...]
+        relevant.columns = ['Company'] + NODE_FEATURE_LIST
+        temp.append(relevant)
+    df_char = pd.concat(temp, axis=0)
+
+    # get company's information based on its first occurrence.
+    df_char = df_char.groupby('Company').first()
+    return df_char[NODE_FEATURE_LIST]
+
+
+def construct_additional_features(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Constructs additional features of the transaction dataset.
+    """
+    # for p in ('Payer', 'Payee'):
+    #     # %% Location of companies.
+    #     mask = (df[p + 'Country'] != 'SI')
+    #     out_of_country = np.empty(len(df), dtype=object)
+    #     out_of_country[mask] = 'OutOfCountry'
+    #     out_of_country[~mask] = 'InCountry'
+    #     df[p + 'OutOfCountry'] = out_of_country
+    #
+    # mask = (df['PayerCountry'] != df['PayeeCountry'])
+    # missing_mask = np.logical_or(df['PayerCountry'] == 'missing',
+    #                              df['PayeeCountry'] == 'missing')
+    # cross_country = np.empty(len(df), dtype=object)
+    # cross_country[mask] = 'CrossCountry'
+    # cross_country[~mask] = 'WithinCountry'
+    # cross_country[missing_mask] = 'Missing'
+    # df['CrossCountry'] = cross_country
+    #
+    # amount_level = np.empty(len(df), dtype=object)
+    # mask_small = df['AmountEUR'] < 500
+    # mask_medium = np.logical_and(df['AmountEUR'] >= 500,
+    #                              df['AmountEUR'] < 1000)
+    # mask_large = df['AmountEUR'] >= 1000
+    # amount_level[mask_small] = '$<500'
+    # amount_level[mask_medium] = '500<=$<1k'
+    # amount_level[mask_large] = '$>=1k'
+    #
+    # df['AmountLevel'] = amount_level
+    return df
+
+
+def load_single_dataset(dataset_dir: str, is_hetero: bool = True,
+                        type_info_loc: str = 'append'
+                        ) -> Graph:
+    """
+    Loads a single graph object from tsv file.
+
+    Args:
+        dataset_dir: the path of tsv file to be loaded.
+        is_hetero: whether to load heterogeneous graph.
+        type_info_loc: 'append' or 'graph_attribute'.
+
+    Returns:
+        graph: a (homogenous) deepsnap graph object.
+    """
+    # Load dataset using dask for fast parallel loading.
+    df_trans = dd.read_csv(dataset_dir, sep='\t', low_memory=False)
+    df_trans = df_trans.fillna('missing')
+    df_trans = df_trans.compute()
+    df_trans = construct_additional_features(df_trans)
+    df_trans.reset_index(drop=True, inplace=True)  # necessary for dask.
+
+    # a unique values of node-level categorical variables.
+    node_cat_uniques = dict()  # Dict[str, np.ndarray of str]
+    for var in NODE_CATE_VARS:  # for each node level categorical variable.
+        # get unique values of this categorical variable.
+        relevant = df_trans[[SRC_NODE + var, DST_NODE + var]]
+        unique_var = pd.unique(relevant.to_numpy().ravel())
+        node_cat_uniques[var] = np.sort(unique_var)
+        # convert corresponding columns into pandas categorical variables.
+        cate_type = pd.api.types.CategoricalDtype(
+            categories=node_cat_uniques[var], ordered=True)
+        for p in ['Payer', 'Payee']:
+            df_trans[p + var] = df_trans[p + var].astype(cate_type)
+
+    # Convert edge level categorical variables.
+    for var in EDGE_CATE_VARS:
+        unique_var = np.sort(pd.unique(df_trans[[var]].to_numpy().ravel()))
+        cate_type = pd.api.types.CategoricalDtype(categories=unique_var,
+                                                  ordered=True)
+        df_trans[var] = df_trans[var].astype(cate_type)
+
+    # Encoding categorical variables, the dask_ml.OrdinalEncoder only modify
+    # and encode columns of categorical dtype.
+    enc = OrdinalEncoder()
+    df_encoded = enc.fit_transform(df_trans)
+    df_encoded.reset_index(drop=True, inplace=True)
+    print('Columns encoded to ordinal:')
+    print(list(enc.categorical_columns_))
+
+    # Scaling transaction amounts.
+    scaler = MinMaxScaler((0, 2))
+    df_encoded[AMOUNT] = scaler.fit_transform(
+        df_encoded[AMOUNT].values.reshape(-1, 1))
+
+    # Scaling timestamps.
+    time_scaler = MinMaxScaler((0, 2))
+    df_encoded['TimestampScaled'] = time_scaler.fit_transform(
+        df_encoded[TIMESTAMP].values.reshape(-1, 1))
+
+    # Prepare for output.
+    edge_feature = torch.Tensor(df_encoded[EDGE_FEATURE_COLS].values)
+
+    print('feature_edge_int_num',
+          [int(torch.max(edge_feature[:, i])) + 1
+           for i in range(len(EDGE_FEATURE_COLS) - 2)])
+
+    edge_index = torch.Tensor(
+        df_encoded[[SRC_NODE, DST_NODE]].values.transpose()).long()  # (2, E)
+    num_nodes = torch.max(edge_index) + 1
+    assert num_nodes == len(node_cat_uniques[''])
+
+    df_node_info = get_node_feature(df_encoded)
+    print(df_node_info.shape)
+    node_feature = torch.Tensor(df_node_info.astype(float).values)
+
+    cfg.transaction.feature_node_int_num = [
+        int(torch.max(node_feature[:, i])) + 1
+        for i in range(len(NODE_FEATURE_LIST))
+    ]
+
+    print('feature_node_int_num: ',
+          [int(torch.max(node_feature[:, i])) + 1
+           for i in range(len(NODE_FEATURE_LIST))])
+
+    edge_time = torch.FloatTensor(df_encoded[TIMESTAMP].values)
+
+    graph = Graph(
+        node_feature=node_feature,
+        edge_feature=edge_feature,
+        edge_index=edge_index,
+        edge_time=edge_time,
+        directed=True
+    )
+
+    if is_hetero:
+        # Construct node type signatures. E.g., 'USA--CA' for country + region.
+        df_node_info['NodeType'] = df_node_info[NODE_TYPE_DEFN[0]].astype(str)
+        for var in NODE_TYPE_DEFN[1:]:
+            df_node_info['NodeType'] += ('--' + df_node_info[var].astype(str))
+
+        node_type_enc = SkOrdinalEncoder()
+        # The sklearn ordinal encoder transforms numpy array instead.
+        node_type_int = node_type_enc.fit_transform(
+            df_node_info['NodeType'].values.reshape(-1, 1))
+        node_type_int = torch.FloatTensor(node_type_int)
+
+        # Construct edge type signatures.
+        df_trans['EdgeType'] = df_trans[EDGE_TYPE_DEFN[0]].astype(str)
+        for var in EDGE_TYPE_DEFN[1:]:
+            df_trans['EdgeType'] += ('--' + df_trans[var].astype(str))
+
+        edge_type_enc = SkOrdinalEncoder()
+        edge_type_int = edge_type_enc.fit_transform(
+            df_trans['EdgeType'].values.reshape(-1, 1))
+        edge_type_int = torch.FloatTensor(edge_type_int)
+
+        if type_info_loc == 'append':
+            graph.edge_feature = torch.cat((graph.edge_feature, edge_type_int),
+                                           dim=1)
+            graph.node_feature = torch.cat((graph.node_feature, node_type_int),
+                                           dim=1)
+        elif type_info_loc == 'graph_attribute':
+            graph.node_type = node_type_int.reshape(-1, )
+            graph.edge_type = edge_type_int.reshape(-1, )
+        else:
+            raise ValueError(f'Unsupported type info loc: {type_info_loc}')
+
+        # add a list of unique types for reference.
+        graph.list_n_type = node_type_int.unique().long()
+        graph.list_e_type = edge_type_int.unique().long()
+
+    return graph
+
+
+# def make_graph_snapshot(g_all: Graph,
+#                         snapshot_freq: str,
+#                         is_hetero: bool = True) -> list:
+#     """
+#     Constructs a list of graph snapshots (Graph or HeteroGraph) based
+#         on g_all and snapshot_freq.
+#
+#     Args:
+#         g_all: the entire homogenous graph.
+#         snapshot_freq: snapshot frequency.
+#         is_hetero: if make heterogeneous graphs.
+#     """
+#     t = g_all.edge_time.numpy().astype(np.int64)
+#     snapshot_freq = snapshot_freq.upper()
+#
+#     period_split = pd.DataFrame(
+#         {'Timestamp': t,
+#          'TransactionTime': pd.to_datetime(t, unit='s')},
+#         index=range(len(g_all.edge_time)))
+#
+#     freq_map = {'D': '%j',  # day of year.
+#                 'W': '%W',  # week of year.
+#                 'M': '%m'  # month of year.
+#                 }
+#
+#     period_split['Year'] = period_split['TransactionTime'].dt.strftime(
+#         '%Y').astype(int)
+#
+#     period_split['SubYearFlag'] = period_split['TransactionTime'].dt.strftime(
+#         freq_map[snapshot_freq]).astype(int)
+#
+#     period2id = period_split.groupby(['Year', 'SubYearFlag']).indices
+#     # e.g., dictionary w/ key = (2021, 3) and val = array(edges).
+#
+#     periods = sorted(list(period2id.keys()))  # ascending order.
+#     # alternatively, sorted(..., key=lambda x: x[0] + x[1]/1000).
+#     snapshot_list = list()
+#     for p in periods:
+#         # unique IDs of edges in this period.
+#         period_members = period2id[p]
+#
+#         g_incr = Graph(
+#             node_feature=g_all.node_feature,
+#             edge_feature=g_all.edge_feature[period_members, :],
+#             edge_index=g_all.edge_index[:, period_members],
+#             edge_time=g_all.edge_time[period_members],
+#             directed=g_all.directed,
+#             list_n_type=g_all.list_n_type if is_hetero else None,
+#             list_e_type=g_all.list_e_type if is_hetero else None,
+#         )
+#         if is_hetero and hasattr(g_all, 'node_type'):
+#             g_incr.node_type = g_all.node_type
+#             g_incr.edge_type = g_all.edge_type[period_members]
+#         snapshot_list.append(g_incr)
+#     return snapshot_list
+
+
+def load_generic(dataset_dir: str,
+                 snapshot: bool = True,
+                 snapshot_freq: str = None,
+                 is_hetero: bool = False,
+                 type_info_loc: str = 'graph_attribute'
+                 ) -> Union[deepsnap.graph.Graph, List[deepsnap.graph.Graph]]:
+    g_all = load_single_dataset(dataset_dir, is_hetero=is_hetero,
+                                type_info_loc=type_info_loc)
+    if not snapshot:
+        return g_all
+    else:
+        snapshot_list = utils.make_graph_snapshot(g_all, snapshot_freq, is_hetero)
+        num_nodes = g_all.edge_index.max() + 1
+
+        for g_snapshot in snapshot_list:
+            g_snapshot.node_states = [0 for _ in range(cfg.gnn.layers_mp)]
+            g_snapshot.node_cells = [0 for _ in range(cfg.gnn.layers_mp)]
+            g_snapshot.node_degree_existing = torch.zeros(num_nodes)
+
+        return snapshot_list
+
+
+def load_generic_dataset(format, name, dataset_dir):
+    if format == 'roland_bsi_general':
+        dataset_dir = os.path.join(dataset_dir, name)
+        graphs = load_generic(dataset_dir,
+                              snapshot=cfg.transaction.snapshot,
+                              snapshot_freq=cfg.transaction.snapshot_freq,
+                              is_hetero=cfg.dataset.is_hetero,
+                              type_info_loc=cfg.dataset.type_info_loc)
+        return graphs
+
+
+register_loader('roland_bsi_v3', load_generic_dataset)

From 13ff46c168aaea49262c36db135912cc728c6143 Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Sat, 5 Jun 2021 17:04:06 -0700
Subject: [PATCH 16/66] rename

---
 graphgym/contrib/loader/{roland_bsi_v3.py => roland.py} | 1 +
 1 file changed, 1 insertion(+)
 rename graphgym/contrib/loader/{roland_bsi_v3.py => roland.py} (99%)

diff --git a/graphgym/contrib/loader/roland_bsi_v3.py b/graphgym/contrib/loader/roland.py
similarity index 99%
rename from graphgym/contrib/loader/roland_bsi_v3.py
rename to graphgym/contrib/loader/roland.py
index 93683931..0e640e77 100644
--- a/graphgym/contrib/loader/roland_bsi_v3.py
+++ b/graphgym/contrib/loader/roland.py
@@ -336,4 +336,5 @@ def load_generic_dataset(format, name, dataset_dir):
         return graphs
 
 
+# TODO: change name.
 register_loader('roland_bsi_v3', load_generic_dataset)

From e9c71f178312fe6ef05ab5dd3aadfd038abf9b63 Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Sat, 5 Jun 2021 17:28:50 -0700
Subject: [PATCH 17/66] add

---
 graphgym/contrib/train/train_utils.py | 444 ++++++++++++++++++++++++++
 1 file changed, 444 insertions(+)
 create mode 100644 graphgym/contrib/train/train_utils.py

diff --git a/graphgym/contrib/train/train_utils.py b/graphgym/contrib/train/train_utils.py
new file mode 100644
index 00000000..bfc5100d
--- /dev/null
+++ b/graphgym/contrib/train/train_utils.py
@@ -0,0 +1,444 @@
+"""
+Metrics, other utility, and helper functions.
+"""
+from typing import Dict, List, Optional
+
+import deepsnap
+import numpy as np
+import torch
+from graphgym.config import cfg
+from graphgym.loss import compute_loss
+from torch_scatter import scatter_max, scatter_mean, scatter_min
+from tqdm import tqdm
+
+
+def get_keep_ratio(existing: torch.Tensor, new: torch.Tensor,
+                   mode: str='linear') -> torch.Tensor:
+    """
+    Get the keep ratio for individual nodes to update node embeddings.
+    Specifically:
+       state[v,t] = state[v,t-1]*keep_ratio + new_feature[v,t]*(1-keep_ratio)
+
+    Args:
+        existing: a tensor of nodes' degrees in G[0], G[1], ..., G[t-1].
+        new: a tensor of nodes' degrees in G[t].
+        mode: how to compute the keep_ratio.
+
+    Returns:
+        A tensor with shape (num_nodes,) valued in [0, 1].
+    """
+    if mode == 'constant':
+        # This scheme is equivalent to exponential decaying.
+        ratio = torch.ones_like(existing)
+        # node observed for the first time, keep_ratio = 0.
+        ratio[torch.logical_and(existing == 0, new > 0)] = 0
+        # take convex combination of old and new embeddings.
+        # 1/2 can be changed to other values.
+        ratio[torch.logical_and(existing > 0, new > 0)] = 1 / 2
+        # inactive nodes have keep ratio 1, embeddings don't change.
+    elif mode == 'linear':
+        # The original method proposed by Jiaxuan.
+        ratio = existing / (existing + new + 1e-6)
+    # Following methods aim to shrink the weight of existing
+    # degrees, help to ensure non-trivial embedding update when the graph
+    # is large and history is long.
+    elif mode == 'log':
+        ratio = torch.log(existing + 1) / (
+            torch.log(existing + 1) + new + 1e-6)
+    elif mode == 'sqrt':
+        ratio = torch.sqrt(existing) / (torch.sqrt(existing) + new + 1e-6)
+    else:
+        raise NotImplementedError(f'Mode {mode} is not supported.')
+    return ratio
+
+
+def size_of(batch: deepsnap.graph.Graph) -> int:
+    """Computes how much memory a batch has consumed."""
+    total_byte = 0
+    for k, v in batch.__dict__.items():
+        if isinstance(v, torch.Tensor):
+            total_byte += v.element_size() * v.nelement()
+        elif isinstance(v, list):  # for node_states.
+            for sub_v in v:
+                if isinstance(sub_v, torch.Tensor):
+                    total_byte += sub_v.element_size() * sub_v.nelement()
+
+    return total_byte / (1024 ** 2)  # MiB.
+
+
+def move_batch_to_device(batch: deepsnap.graph.Graph,
+                         device: str) -> deepsnap.graph.Graph:
+    """Moves and collects everything in the batch to the target device."""
+    device = torch.device(device)
+    # This handles node_feature, edge_feature, etc.
+    batch = batch.to(device)
+
+    for layer in range(len(batch.node_states)):
+        if torch.is_tensor(batch.node_states[layer]):
+            batch.node_states[layer] = batch.node_states[layer].to(device)
+
+    if hasattr(batch, 'node_cells'):
+        # node_cells exist only for LSTM type RNNs.   
+        for layer in range(len(batch.node_cells)):
+            if torch.is_tensor(batch.node_cells[layer]):
+                batch.node_cells[layer] = batch.node_cells[layer].to(device)
+
+    return batch
+
+
+def edge_index_difference(edge_include: torch.LongTensor,
+                          edge_except: torch.LongTensor,
+                          num_nodes: int) -> torch.LongTensor:
+    """Set difference operator, return edges in edge_all but not
+        in edge_except.
+
+    Args:
+        edge_all (torch.LongTensor): (2, E1) tensor of edge indices.
+        edge_except (torch.LongTensor): (2, E2) tensor of edge indices to be
+            excluded from edge_all.
+        num_nodes (int): total number of nodes.
+
+    Returns:
+        torch.LongTensor: Edge indices in edge_include but not in edge_except. 
+    """
+    # flatten (i, j) edge representations.
+    idx_include = edge_include[0] * num_nodes + edge_include[1]
+    idx_except = edge_except[0] * num_nodes + edge_except[1]
+    # filter out edges in idx_except.
+    mask = torch.from_numpy(np.isin(idx_include, idx_except)).to(torch.bool)
+    idx_kept = idx_include[~mask]
+    i = idx_kept // num_nodes
+    j = idx_kept % num_nodes
+    return torch.stack([i, j], dim=0).long()
+
+
+def gen_negative_edges(edge_index: torch.LongTensor,
+                       num_neg_per_node: int,
+                       num_nodes: int) -> torch.LongTensor:
+    """Generates a fixed number of negative edges for each node.
+
+    Args:
+        edge_index (torch.LongTensor): (2, E) array of positive edges.
+        num_neg_per_node (int): 'approximate' number of negative edges generated
+            for each source node in edge_index.
+        num_nodes (int): total number of nodes.
+    
+    Returns:
+        torch.LongTensor: approximate num_nodes * num_neg_per_node
+            negative edges.
+    """
+    src_lst = torch.unique(edge_index[0])  # get unique senders.
+    num_neg_per_node = int(1.2 * num_neg_per_node)  # add some redundancy.
+    i = src_lst.repeat_interleave(num_neg_per_node)
+    j = torch.Tensor(np.random.choice(num_nodes, len(i), replace=True))
+    # candidates for negative edges, X candidates from each src.
+    candidates = torch.stack([i, j], dim=0).long()
+    # filter out positive edges in candidate.
+    neg_edge_index = edge_index_difference(candidates, edge_index.to('cpu'),
+                                           num_nodes)
+    return neg_edge_index
+
+
+def compute_src_mrr_and_recall(edge_label_index: torch.LongTensor,
+                               edge_label: torch.LongTensor,
+                               pred_score: torch.Tensor,
+                               recall_k_lst: List[int],
+                               mrr_top_k: Optional[int] = None
+                               ) -> (float, Dict[int, float]):
+    """
+    Computes source-based MRR and recall at K for each source node in
+        edge_label_index.
+
+    Args:
+        edge_label_index: combination of positive and negative edges.
+        edge_label: label of edges in edge_label_index.
+        pred_score: P(E=positive) for each edge in edge_label_index.
+        recall_k_lst: to report recall at k for all k in this list.
+        mrr_top_k: calculating MRR for each source node using mean(1/rank) for
+            k positive edges with the highest pred_score. Set to None to use
+            all positive edges.
+    """
+    assert edge_label_index.shape[1] == len(edge_label) == len(pred_score)
+
+    src_lst = torch.unique(edge_label_index[0])  # source nodes to consider.
+    # edge_label_index were constructed by adding negative edges to every
+    # node in edge_index[0], thus every node in src_lst has at least one
+    # positive edge in edge_label_index.
+    # I.e., src_lst == torch.unique(edge_label_index[0][edge_label == 1])
+
+    node_level_mrr = []  # store MRR for each node.
+    node_recall_at = dict((k, []) for k in recall_k_lst)
+    for src in tqdm(src_lst, leave=False, desc='Node level MRR/Recall'):
+        # get positive/negative edges emitted from src node.
+        self_mask = (edge_label_index[0] == src)
+        self_label = edge_label[self_mask]
+        self_pred_score = pred_score[self_mask]
+
+        # Alternative implementation.
+        best = torch.max(self_pred_score[self_label == 1])
+        rank = torch.sum(self_pred_score[self_label == 0] >= best) + 1
+        # print(pos_edge_rank[0], true, torch.sum(label == 0))
+        mrr = float(1 / rank)
+        node_level_mrr.append(mrr)  # mrr for this node.
+
+        for k in recall_k_lst:
+            recall = _calculate_recall_at_k(self_pred_score, self_label, k)
+            node_recall_at[k].append(recall)
+
+    # Average over all nodes.
+    macro_recall = dict((k, np.mean(v)) for (k, v) in node_recall_at.items())
+    macro_mrr = float(np.mean(node_level_mrr))
+    return macro_mrr, macro_recall
+
+
+def _calculate_recall_at_k(pred_score: torch.Tensor,
+                           label: torch.Tensor,
+                           k: int) -> int:
+    """Computes whether the score of the most confident positive edge is
+        within the highest k scores. I.e., whether the most confident
+        positive edge beats at least k most confident negative edges.
+
+    Args:
+        pred_score: a tensor of scores of predictions.
+        label: a tensor of labels.
+        k: get whether successful recall at k.
+
+    Returns:
+        an indicator whether there is a successful recall at rank k.
+    """
+    neg_score = pred_score[label == 0]
+    if len(neg_score) == 0:
+        return 0
+    best_pos_score = torch.max(pred_score[label == 1])
+    rank = torch.sum(neg_score >= best_pos_score) + 1
+    return int(rank <= k)
+
+
+@torch.no_grad()
+def fast_batch_mrr_and_recall(edge_label_index: torch.Tensor,
+                              edge_label: torch.Tensor,
+                              pred_score: torch.Tensor,
+                              num_neg_per_node: int,
+                              num_nodes: int
+                              ) -> (float, Dict[int, float]):
+    """
+    A vectorized implementation to compute average rank-based metrics over
+    all source nodes.
+
+    Args:
+        edge_label_index:
+        edge_label:
+        pred_score: P(edge i is positive) from the model.
+        num_neg_per_node: number of negative edges per node.
+        num_nodes: total number of nodes in the graph.
+    """
+    # start = datetime.now()
+
+    # A list of source nodes to consider.
+    src_lst = torch.unique(edge_label_index[0], sorted=True)
+    num_users = len(src_lst)
+
+    edge_pos = edge_label_index[:, edge_label == 1]
+    edge_neg = edge_label_index[:, edge_label == 0]
+
+    # By construction, negative edge index should be sorted by their src nodes.
+    assert torch.all(edge_neg[0].sort()[0] == edge_neg[0])
+
+    # Prediction scores of all positive and negative edges.
+    p_pos = pred_score[edge_label == 1]
+    p_neg = pred_score[edge_label == 0]
+
+    # For each player src, compute the highest score among all positive edges
+    # from src.
+    # We want to compute the rank of this edge.
+    # Construct an interval of model's performance.
+    if cfg.metric.mrr_method == 'mean':
+        best_p_pos = scatter_mean(src=p_pos, index=edge_pos[0],
+                                  dim_size=num_nodes)
+    elif cfg.metric.mrr_method == 'min':
+        best_p_pos, _ = scatter_min(src=p_pos, index=edge_pos[0],
+                                    dim_size=num_nodes)
+    else:
+        # The default setting, consider the rank of the most confident edge.
+        best_p_pos, _ = scatter_max(src=p_pos, index=edge_pos[0],
+                                    dim_size=num_nodes)
+    # best_p_pos has shape (num_nodes), for nodes not in src_lst has value 0.
+    best_p_pos_by_user = best_p_pos[src_lst]
+
+    # Sanity check.
+    # src_lst_2, inverse = torch.unique(edge_pos[0], return_inverse=True)
+    # best_p_pos, _ = scatter_max(p_pos, inverse)
+    # assert torch.all(best_p_pos_by_user == best_p_pos)
+
+    uni, counts = torch.unique(edge_neg[0], sorted=True, return_counts=True)
+    # assert torch.all(counts >= num_neg_per_node)
+    # assert torch.all(uni == src_lst)
+    # note: edge_neg (src, dst) are sorted by src.
+    # find index of first occurrence of each src in edge_neg[0].
+    # neg edges[0], [1,1,...1, 2, 2, ... 2, 3, ..]
+    first_occ_idx = torch.cumsum(counts, dim=0) - counts
+    add = torch.arange(num_neg_per_node, device=first_occ_idx.device)
+
+    # take the first 100 negative edges from each src.
+    score_idx = first_occ_idx.view(-1, 1) + add.view(1, -1)
+
+    assert torch.all(edge_neg[0][score_idx].float().std(axis=1) == 0)
+    # Z = edge_neg[0][first_occ_idx - 1]
+    # A = edge_neg[0][first_occ_idx]
+    # B = edge_neg[0][first_occ_idx + 1]
+    # assert torch.all(Z != A)
+    # assert torch.all(B == A)
+
+    p_neg_by_user = p_neg[score_idx]  # (num_users, num_neg_per_node)
+    compare = (p_neg_by_user >= best_p_pos_by_user.view(num_users, 1)).float()
+    assert compare.shape == (num_users, num_neg_per_node)
+    # compare[i, j], for node i, the j-th negative edge's score > p_best.
+
+    # counts 1 + how many negative edge from src has higher score than p_best.
+    # if there's no such negative edge, rank is 1.
+    rank_by_user = compare.sum(axis=1) + 1  # (num_users,)
+    assert rank_by_user.shape == (num_users,)
+
+    mrr = float(torch.mean(1 / rank_by_user))
+    # print(f'MRR={mrr}, time taken: {datetime.now() - start}')
+    # computes recall at k as well
+    recall_at = dict()
+    for k in [1, 3, 10]:
+        recall_at[k] = float((rank_by_user <= k).float().mean())
+
+    return mrr, recall_at
+
+
+@torch.no_grad()
+def report_rank_based_eval(eval_batch, model, num_neg_per_node: int = 1000):
+    if num_neg_per_node == -1:
+        # Do not report rank-based metrics, used in debug mode.
+        return 0, 0, 0, 0
+    # Get positive edge indices.
+    edge_index = eval_batch.edge_label_index[:, eval_batch.edge_label == 1]
+    edge_index = edge_index.to('cpu')
+
+    neg_edge_index = gen_negative_edges(edge_index, num_neg_per_node,
+                                        num_nodes=eval_batch.num_nodes)
+
+    new_edge_label_index = torch.cat((edge_index, neg_edge_index),
+                                     dim=1).long()
+    new_edge_label = torch.cat((torch.ones(edge_index.shape[1]),
+                                torch.zeros(neg_edge_index.shape[1])
+                                ), dim=0).long()
+
+    # Construct evaluation samples.
+    eval_batch.edge_label_index = new_edge_label_index
+    eval_batch.edge_label = new_edge_label
+
+    eval_batch.to(torch.device(cfg.device))
+    # move state to gpu
+    for layer in range(len(eval_batch.node_states)):
+        if torch.is_tensor(eval_batch.node_states[layer]):
+            eval_batch.node_states[layer] = eval_batch.node_states[layer].to(
+                torch.device(cfg.device))
+    pred, true = model(eval_batch)
+    loss, pred_score = compute_loss(pred, true)
+
+    mrr, recall_at = fast_batch_mrr_and_recall(eval_batch.edge_label_index,
+                                               eval_batch.edge_label,
+                                               pred_score,
+                                               num_neg_per_node,
+                                               eval_batch.num_nodes)
+
+    # return mrr, 0, 0, 0
+    #
+    # mrr_old, recall_at_old = compute_src_mrr_and_recall(
+    #     eval_batch.edge_label_index,
+    #     eval_batch.edge_label,
+    #     pred_score,
+    #     recall_k_lst=[1, 3, 10],
+    #     mrr_top_k=1)
+    #
+    # print(f'Old MRR: {mrr_old: 0.6f}, new MRR {mrr: 0.6f}')
+    # print(
+    #     f'Old Recall@1: {recall_at_old[1]: 0.6f}, new Recall@1 {recall_at[1]: 0.6f}')
+    # print(
+    #     f'Old Recall@3: {recall_at_old[3]: 0.6f}, new Recall@3 {recall_at[3]: 0.6f}')
+    # print(
+    #     f'Old Recall@10: {recall_at_old[10]: 0.6f}, new Recall@10 {recall_at[10]: 0.6f}')
+
+    return mrr, recall_at[1], recall_at[3], recall_at[10]
+
+
+def get_row_MRR(probs, true_classes):
+    existing_mask = true_classes == 1
+    # descending in probability for all edge predictions.
+    ordered_indices = np.flip(probs.argsort())
+    # indicators of positive/negative, in prob desc order.
+    ordered_existing_mask = existing_mask[ordered_indices]
+    # [1, 2, ... ][ordered_existing_mask]
+    # prob rank of positive edges.
+    existing_ranks = np.arange(1, true_classes.shape[0] + 1,
+                               dtype=np.float)[ordered_existing_mask]
+    # average 1/rank of positive edges.
+    MRR = (1 / existing_ranks).sum() / existing_ranks.shape[0]
+    return MRR
+
+
+@torch.no_grad()
+def report_baseline_MRR(eval_batch, model):
+    # Get positive edge indices.
+    edge_index = eval_batch.edge_label_index[:, eval_batch.edge_label == 1]
+    edge_index = edge_index.to('cpu')
+    num_nodes = eval_batch.num_nodes
+    src_of_pos_edges = torch.unique(edge_index[0]).numpy()
+
+    all_edges_idx = np.arange(num_nodes)
+    all_edges_idx = np.array(np.meshgrid(all_edges_idx,
+                                         all_edges_idx)).reshape(2, -1)
+    all_edges_idx = torch.LongTensor(all_edges_idx)
+    # Get all O(N^2) negative edges.
+    neg_edge_index = edge_index_difference(
+        all_edges_idx, edge_index, num_nodes)
+    # Only keep negative edges share src node with some positive edges.
+    mask = np.isin(neg_edge_index[0], src_of_pos_edges)
+    neg_edge_index = neg_edge_index[:, mask]
+
+    new_edge_label_index = torch.cat((edge_index, neg_edge_index),
+                                     dim=1).long()
+    new_edge_label = torch.cat((torch.ones(edge_index.shape[1]),
+                                torch.zeros(neg_edge_index.shape[1])
+                                ), dim=0).long()
+
+    # Construct evaluation samples.
+    eval_batch.edge_label_index = new_edge_label_index
+    eval_batch.edge_label = new_edge_label
+
+    eval_batch.to(torch.device(cfg.device))
+    # move state to gpu
+    for layer in range(len(eval_batch.node_states)):
+        if torch.is_tensor(eval_batch.node_states[layer]):
+            eval_batch.node_states[layer] = eval_batch.node_states[layer].to(
+                torch.device(cfg.device))
+    pred, true = model(eval_batch)
+    loss, pred_score = compute_loss(pred, true)
+
+    probs = pred_score.cpu().numpy().squeeze()
+    true = true.cpu().numpy()
+
+    xi = new_edge_label_index[0].cpu().numpy()
+    xj = new_edge_label_index[1].cpu().numpy()
+    # pred_matrix = coo_matrix((probs, (xi, xj))).toarray()
+    # true_matrix = coo_matrix((true, (xi, xj))).toarray()
+
+    row_MRRs = []
+    for src in src_of_pos_edges:
+        mask = np.argwhere(xi == src)
+        pred_row = probs.take(mask).squeeze()
+        true_row = true.take(mask).squeeze()
+        row_MRRs.append(get_row_MRR(pred_row, true_row))
+
+    # for i, pred_row in enumerate(pred_matrix):
+    #     #check if there are any existing edges
+    #     # only evaluate senders with existing edge (of course).
+    #     if np.isin(1, true_matrix[i]):
+    #         row_MRRs.append(get_row_MRR(pred_row, true_matrix[i]))
+
+    avg_MRR = torch.tensor(row_MRRs).mean()
+    return float(avg_MRR)

From 55f9e765eb76a664f2bba7a176e1d17ea3f27c0c Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Sun, 6 Jun 2021 03:37:21 -0700
Subject: [PATCH 18/66] move mrr_num_negative_edges to metric field.

---
 graphgym/contrib/config/roland.py | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/graphgym/contrib/config/roland.py b/graphgym/contrib/config/roland.py
index cf00a50b..62d5b3c3 100644
--- a/graphgym/contrib/config/roland.py
+++ b/graphgym/contrib/config/roland.py
@@ -12,6 +12,7 @@ def set_cfg_roland(cfg):
     # ----------------------------------------------------------------------- #
     # Customized options
     # ----------------------------------------------------------------------- #
+    # TODO: add documentation.
     # Method to update node embedding from old node embedding and new node features.
     # Options: 'moving_average', 'masked_gru', 'gru'
     # moving average: new embedding = r * old + (1-r) * node_feature.
@@ -46,19 +47,13 @@ def set_cfg_roland(cfg):
     cfg.remark = ''
     # Experimental Features, use this name space to save all controls for
     # experimental features.
-    cfg.experimental = CN()
-
-    # How many negative edges for each node to compute rank-based evaluation
-    # metrics such as MRR and recall at K.
-    # E.g., if multiplier = 1000 and a node has 3 positive edges, then we
-    # compute the MRR using 1000 randomly generated negative edges
-    # + 3 existing positive edges.
-    cfg.experimental.rank_eval_multiplier = 1000
+    # TODO: consider remove experiment field.
+    # cfg.experimental = CN()
 
     # Only use the first n snapshots (time periods) to train the model.
     # Empirically, the model learns rich dynamics from only a few periods.
     # Set to -1 if using all snapshots.
-    cfg.experimental.restrict_training_set = -1
+    # cfg.experimental.restrict_training_set = -1
 
     # Whether to visualize edge attention of GNN layer after training.
     cfg.experimental.visualize_gnn_layer = False
@@ -171,6 +166,14 @@ def set_cfg_roland(cfg):
     cfg.transaction.keep_ratio = 'linear'
 
     cfg.metric = CN()
+    # How many negative edges for each node to compute rank-based evaluation
+    # metrics such as MRR and recall at K.
+    # E.g., if multiplier = 1000 and a node has 3 positive edges, then we
+    # compute the MRR using 1000 randomly generated negative edges
+    # + 3 existing positive edges.
+    # Use 100 ~ 1000 for fast and reliable results.
+    cfg.metric.mrr_num_negative_edges = 1000
+
     # how to compute MRR.
     # available: f = 'min', 'max', 'mean'.
     # Step 1: get the p* = f(scores of positive edges)
@@ -180,9 +183,10 @@ def set_cfg_roland(cfg):
     # expected MRR(min) <= MRR(mean) <= MRR(max).
     cfg.metric.mrr_method = 'max'
 
+    # TODO: consider remove link_pred_spec field.
     # Specs for the link prediction task using BSI dataset.
     # All units are days.
-    cfg.link_pred_spec = CN()
+    # cfg.link_pred_spec = CN()
 
     # The period of `today`'s increase: how often the system is making forecast.
     # E.g., when = 1,
@@ -192,12 +196,12 @@ def set_cfg_roland(cfg):
     # When = 7, the system makes prediction every week.
     # E.g., the system forecasts transactions in upcoming 7 days
     # on every Monday.
-    cfg.link_pred_spec.forecast_frequency = 1
+    # cfg.link_pred_spec.forecast_frequency = 1
 
     # How many days into the future the model is trained to predict.
     # The model forecasts transactions in (today, today + forecast_horizon].
     # NOTE: forecast_horizon should >= forecast_frequency to cover all days.
-    cfg.link_pred_spec.forecast_horizon = 7
+    # cfg.link_pred_spec.forecast_horizon = 7
 
 
 register_config('roland', set_cfg_roland)

From c014e687900005a827841304f24300eff859e885 Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Sun, 6 Jun 2021 03:40:18 -0700
Subject: [PATCH 19/66] update

---
 graphgym/contrib/train/train_utils.py | 394 +++++++++++++++-----------
 1 file changed, 225 insertions(+), 169 deletions(-)

diff --git a/graphgym/contrib/train/train_utils.py b/graphgym/contrib/train/train_utils.py
index bfc5100d..dad12ff7 100644
--- a/graphgym/contrib/train/train_utils.py
+++ b/graphgym/contrib/train/train_utils.py
@@ -1,19 +1,19 @@
 """
 Metrics, other utility, and helper functions.
 """
-from typing import Dict, List, Optional
-
 import deepsnap
 import numpy as np
 import torch
 from graphgym.config import cfg
 from graphgym.loss import compute_loss
 from torch_scatter import scatter_max, scatter_mean, scatter_min
-from tqdm import tqdm
+# TODO: proof-read this file.
+# TODO: remove comments.
 
 
-def get_keep_ratio(existing: torch.Tensor, new: torch.Tensor,
-                   mode: str='linear') -> torch.Tensor:
+def get_keep_ratio(existing: torch.Tensor,
+                   new: torch.Tensor,
+                   mode: str = 'linear') -> torch.Tensor:
     """
     Get the keep ratio for individual nodes to update node embeddings.
     Specifically:
@@ -78,7 +78,7 @@ def move_batch_to_device(batch: deepsnap.graph.Graph,
             batch.node_states[layer] = batch.node_states[layer].to(device)
 
     if hasattr(batch, 'node_cells'):
-        # node_cells exist only for LSTM type RNNs.   
+        # node_cells exist only for LSTM type RNNs.
         for layer in range(len(batch.node_cells)):
             if torch.is_tensor(batch.node_cells[layer]):
                 batch.node_cells[layer] = batch.node_cells[layer].to(device)
@@ -122,7 +122,7 @@ def gen_negative_edges(edge_index: torch.LongTensor,
         num_neg_per_node (int): 'approximate' number of negative edges generated
             for each source node in edge_index.
         num_nodes (int): total number of nodes.
-    
+
     Returns:
         torch.LongTensor: approximate num_nodes * num_neg_per_node
             negative edges.
@@ -139,91 +139,91 @@ def gen_negative_edges(edge_index: torch.LongTensor,
     return neg_edge_index
 
 
-def compute_src_mrr_and_recall(edge_label_index: torch.LongTensor,
-                               edge_label: torch.LongTensor,
-                               pred_score: torch.Tensor,
-                               recall_k_lst: List[int],
-                               mrr_top_k: Optional[int] = None
-                               ) -> (float, Dict[int, float]):
-    """
-    Computes source-based MRR and recall at K for each source node in
-        edge_label_index.
-
-    Args:
-        edge_label_index: combination of positive and negative edges.
-        edge_label: label of edges in edge_label_index.
-        pred_score: P(E=positive) for each edge in edge_label_index.
-        recall_k_lst: to report recall at k for all k in this list.
-        mrr_top_k: calculating MRR for each source node using mean(1/rank) for
-            k positive edges with the highest pred_score. Set to None to use
-            all positive edges.
-    """
-    assert edge_label_index.shape[1] == len(edge_label) == len(pred_score)
-
-    src_lst = torch.unique(edge_label_index[0])  # source nodes to consider.
-    # edge_label_index were constructed by adding negative edges to every
-    # node in edge_index[0], thus every node in src_lst has at least one
-    # positive edge in edge_label_index.
-    # I.e., src_lst == torch.unique(edge_label_index[0][edge_label == 1])
-
-    node_level_mrr = []  # store MRR for each node.
-    node_recall_at = dict((k, []) for k in recall_k_lst)
-    for src in tqdm(src_lst, leave=False, desc='Node level MRR/Recall'):
-        # get positive/negative edges emitted from src node.
-        self_mask = (edge_label_index[0] == src)
-        self_label = edge_label[self_mask]
-        self_pred_score = pred_score[self_mask]
-
-        # Alternative implementation.
-        best = torch.max(self_pred_score[self_label == 1])
-        rank = torch.sum(self_pred_score[self_label == 0] >= best) + 1
-        # print(pos_edge_rank[0], true, torch.sum(label == 0))
-        mrr = float(1 / rank)
-        node_level_mrr.append(mrr)  # mrr for this node.
-
-        for k in recall_k_lst:
-            recall = _calculate_recall_at_k(self_pred_score, self_label, k)
-            node_recall_at[k].append(recall)
-
-    # Average over all nodes.
-    macro_recall = dict((k, np.mean(v)) for (k, v) in node_recall_at.items())
-    macro_mrr = float(np.mean(node_level_mrr))
-    return macro_mrr, macro_recall
-
-
-def _calculate_recall_at_k(pred_score: torch.Tensor,
-                           label: torch.Tensor,
-                           k: int) -> int:
-    """Computes whether the score of the most confident positive edge is
-        within the highest k scores. I.e., whether the most confident
-        positive edge beats at least k most confident negative edges.
-
-    Args:
-        pred_score: a tensor of scores of predictions.
-        label: a tensor of labels.
-        k: get whether successful recall at k.
-
-    Returns:
-        an indicator whether there is a successful recall at rank k.
-    """
-    neg_score = pred_score[label == 0]
-    if len(neg_score) == 0:
-        return 0
-    best_pos_score = torch.max(pred_score[label == 1])
-    rank = torch.sum(neg_score >= best_pos_score) + 1
-    return int(rank <= k)
+# def compute_src_mrr_and_recall(edge_label_index: torch.LongTensor,
+#                                edge_label: torch.LongTensor,
+#                                pred_score: torch.Tensor,
+#                                recall_k_lst: List[int],
+#                                mrr_top_k: Optional[int] = None
+#                                ) -> (float, Dict[int, float]):
+#     """
+#     Computes source-based MRR and recall at K for each source node in
+#         edge_label_index.
+
+#     Args:
+#         edge_label_index: combination of positive and negative edges.
+#         edge_label: label of edges in edge_label_index.
+#         pred_score: P(E=positive) for each edge in edge_label_index.
+#         recall_k_lst: to report recall at k for all k in this list.
+#         mrr_top_k: calculating MRR for each source node using mean(1/rank) for
+#             k positive edges with the highest pred_score. Set to None to use
+#             all positive edges.
+#     """
+#     assert edge_label_index.shape[1] == len(edge_label) == len(pred_score)
+
+#     src_lst = torch.unique(edge_label_index[0])  # source nodes to consider.
+#     # edge_label_index were constructed by adding negative edges to every
+#     # node in edge_index[0], thus every node in src_lst has at least one
+#     # positive edge in edge_label_index.
+#     # I.e., src_lst == torch.unique(edge_label_index[0][edge_label == 1])
+
+#     node_level_mrr = []  # store MRR for each node.
+#     node_recall_at = dict((k, []) for k in recall_k_lst)
+#     for src in tqdm(src_lst, leave=False, desc='Node level MRR/Recall'):
+#         # get positive/negative edges emitted from src node.
+#         self_mask = (edge_label_index[0] == src)
+#         self_label = edge_label[self_mask]
+#         self_pred_score = pred_score[self_mask]
+
+#         # Alternative implementation.
+#         best = torch.max(self_pred_score[self_label == 1])
+#         rank = torch.sum(self_pred_score[self_label == 0] >= best) + 1
+#         # print(pos_edge_rank[0], true, torch.sum(label == 0))
+#         mrr = float(1 / rank)
+#         node_level_mrr.append(mrr)  # mrr for this node.
+
+#         for k in recall_k_lst:
+#             recall = _calculate_recall_at_k(self_pred_score, self_label, k)
+#             node_recall_at[k].append(recall)
+
+#     # Average over all nodes.
+#     macro_recall = dict((k, np.mean(v)) for (k, v) in node_recall_at.items())
+#     macro_mrr = float(np.mean(node_level_mrr))
+#     return macro_mrr, macro_recall
+
+
+# def _calculate_recall_at_k(pred_score: torch.Tensor,
+#                            label: torch.Tensor,
+#                            k: int) -> int:
+#     """Computes whether the score of the most confident positive edge is
+#         within the highest k scores. I.e., whether the most confident
+#         positive edge beats at least k most confident negative edges.
+
+#     Args:
+#         pred_score: a tensor of scores of predictions.
+#         label: a tensor of labels.
+#         k: get whether successful recall at k.
+
+#     Returns:
+#         an indicator whether there is a successful recall at rank k.
+#     """
+#     neg_score = pred_score[label == 0]
+#     if len(neg_score) == 0:
+#         return 0
+#     best_pos_score = torch.max(pred_score[label == 1])
+#     rank = torch.sum(neg_score >= best_pos_score) + 1
+#     return int(rank <= k)
 
 
 @torch.no_grad()
-def fast_batch_mrr_and_recall(edge_label_index: torch.Tensor,
-                              edge_label: torch.Tensor,
-                              pred_score: torch.Tensor,
-                              num_neg_per_node: int,
-                              num_nodes: int
-                              ) -> (float, Dict[int, float]):
+def fast_batch_mrr(edge_label_index: torch.Tensor,
+                   edge_label: torch.Tensor,
+                   pred_score: torch.Tensor,
+                   num_neg_per_node: int,
+                   num_nodes: int,
+                   method: str) -> float:
     """
     A vectorized implementation to compute average rank-based metrics over
-    all source nodes.
+        all source nodes.
 
     Args:
         edge_label_index:
@@ -231,9 +231,10 @@ def fast_batch_mrr_and_recall(edge_label_index: torch.Tensor,
         pred_score: P(edge i is positive) from the model.
         num_neg_per_node: number of negative edges per node.
         num_nodes: total number of nodes in the graph.
-    """
-    # start = datetime.now()
 
+    Returns:
+        the MRR for all nodes.
+    """
     # A list of source nodes to consider.
     src_lst = torch.unique(edge_label_index[0], sorted=True)
     num_users = len(src_lst)
@@ -252,27 +253,22 @@ def fast_batch_mrr_and_recall(edge_label_index: torch.Tensor,
     # from src.
     # We want to compute the rank of this edge.
     # Construct an interval of model's performance.
-    if cfg.metric.mrr_method == 'mean':
+    if method == 'mean':
         best_p_pos = scatter_mean(src=p_pos, index=edge_pos[0],
                                   dim_size=num_nodes)
-    elif cfg.metric.mrr_method == 'min':
+    elif method == 'min':
         best_p_pos, _ = scatter_min(src=p_pos, index=edge_pos[0],
                                     dim_size=num_nodes)
-    else:
+    elif method == 'max':
         # The default setting, consider the rank of the most confident edge.
         best_p_pos, _ = scatter_max(src=p_pos, index=edge_pos[0],
                                     dim_size=num_nodes)
+    else:
+        raise ValueError(f'Unrecognized method: {method}.')
     # best_p_pos has shape (num_nodes), for nodes not in src_lst has value 0.
     best_p_pos_by_user = best_p_pos[src_lst]
 
-    # Sanity check.
-    # src_lst_2, inverse = torch.unique(edge_pos[0], return_inverse=True)
-    # best_p_pos, _ = scatter_max(p_pos, inverse)
-    # assert torch.all(best_p_pos_by_user == best_p_pos)
-
     uni, counts = torch.unique(edge_neg[0], sorted=True, return_counts=True)
-    # assert torch.all(counts >= num_neg_per_node)
-    # assert torch.all(uni == src_lst)
     # note: edge_neg (src, dst) are sorted by src.
     # find index of first occurrence of each src in edge_neg[0].
     # neg edges[0], [1,1,...1, 2, 2, ... 2, 3, ..]
@@ -283,11 +279,6 @@ def fast_batch_mrr_and_recall(edge_label_index: torch.Tensor,
     score_idx = first_occ_idx.view(-1, 1) + add.view(1, -1)
 
     assert torch.all(edge_neg[0][score_idx].float().std(axis=1) == 0)
-    # Z = edge_neg[0][first_occ_idx - 1]
-    # A = edge_neg[0][first_occ_idx]
-    # B = edge_neg[0][first_occ_idx + 1]
-    # assert torch.all(Z != A)
-    # assert torch.all(B == A)
 
     p_neg_by_user = p_neg[score_idx]  # (num_users, num_neg_per_node)
     compare = (p_neg_by_user >= best_p_pos_by_user.view(num_users, 1)).float()
@@ -300,70 +291,66 @@ def fast_batch_mrr_and_recall(edge_label_index: torch.Tensor,
     assert rank_by_user.shape == (num_users,)
 
     mrr = float(torch.mean(1 / rank_by_user))
-    # print(f'MRR={mrr}, time taken: {datetime.now() - start}')
-    # computes recall at k as well
-    recall_at = dict()
-    for k in [1, 3, 10]:
-        recall_at[k] = float((rank_by_user <= k).float().mean())
-
-    return mrr, recall_at
-
-
-@torch.no_grad()
-def report_rank_based_eval(eval_batch, model, num_neg_per_node: int = 1000):
-    if num_neg_per_node == -1:
-        # Do not report rank-based metrics, used in debug mode.
-        return 0, 0, 0, 0
-    # Get positive edge indices.
-    edge_index = eval_batch.edge_label_index[:, eval_batch.edge_label == 1]
-    edge_index = edge_index.to('cpu')
-
-    neg_edge_index = gen_negative_edges(edge_index, num_neg_per_node,
-                                        num_nodes=eval_batch.num_nodes)
-
-    new_edge_label_index = torch.cat((edge_index, neg_edge_index),
-                                     dim=1).long()
-    new_edge_label = torch.cat((torch.ones(edge_index.shape[1]),
-                                torch.zeros(neg_edge_index.shape[1])
-                                ), dim=0).long()
-
-    # Construct evaluation samples.
-    eval_batch.edge_label_index = new_edge_label_index
-    eval_batch.edge_label = new_edge_label
-
-    eval_batch.to(torch.device(cfg.device))
-    # move state to gpu
-    for layer in range(len(eval_batch.node_states)):
-        if torch.is_tensor(eval_batch.node_states[layer]):
-            eval_batch.node_states[layer] = eval_batch.node_states[layer].to(
-                torch.device(cfg.device))
-    pred, true = model(eval_batch)
-    loss, pred_score = compute_loss(pred, true)
-
-    mrr, recall_at = fast_batch_mrr_and_recall(eval_batch.edge_label_index,
-                                               eval_batch.edge_label,
-                                               pred_score,
-                                               num_neg_per_node,
-                                               eval_batch.num_nodes)
-
-    # return mrr, 0, 0, 0
-    #
-    # mrr_old, recall_at_old = compute_src_mrr_and_recall(
-    #     eval_batch.edge_label_index,
-    #     eval_batch.edge_label,
-    #     pred_score,
-    #     recall_k_lst=[1, 3, 10],
-    #     mrr_top_k=1)
-    #
-    # print(f'Old MRR: {mrr_old: 0.6f}, new MRR {mrr: 0.6f}')
-    # print(
-    #     f'Old Recall@1: {recall_at_old[1]: 0.6f}, new Recall@1 {recall_at[1]: 0.6f}')
-    # print(
-    #     f'Old Recall@3: {recall_at_old[3]: 0.6f}, new Recall@3 {recall_at[3]: 0.6f}')
-    # print(
-    #     f'Old Recall@10: {recall_at_old[10]: 0.6f}, new Recall@10 {recall_at[10]: 0.6f}')
-
-    return mrr, recall_at[1], recall_at[3], recall_at[10]
+    return mrr
+
+
+# @torch.no_grad()
+# def report_rank_based_eval(eval_batch, model, method: str,
+#                            num_neg_per_node: int=1000):
+#     if num_neg_per_node == -1:
+#         # Do not report rank-based metrics, used in debug mode.
+#         return 0, 0, 0, 0
+#     # Get positive edge indices.
+#     edge_index = eval_batch.edge_label_index[:, eval_batch.edge_label == 1]
+#     edge_index = edge_index.to('cpu')
+
+#     neg_edge_index = gen_negative_edges(edge_index, num_neg_per_node,
+#                                         num_nodes=eval_batch.num_nodes)
+
+#     new_edge_label_index = torch.cat((edge_index, neg_edge_index),
+#                                      dim=1).long()
+#     new_edge_label = torch.cat((torch.ones(edge_index.shape[1]),
+#                                 torch.zeros(neg_edge_index.shape[1])
+#                                 ), dim=0).long()
+
+#     # Construct evaluation samples.
+#     eval_batch.edge_label_index = new_edge_label_index
+#     eval_batch.edge_label = new_edge_label
+
+#     eval_batch.to(torch.device(cfg.device))
+#     # move state to gpu
+#     for layer in range(len(eval_batch.node_states)):
+#         if torch.is_tensor(eval_batch.node_states[layer]):
+#             eval_batch.node_states[layer] = eval_batch.node_states[layer].to(
+#                 torch.device(cfg.device))
+#     pred, true = model(eval_batch)
+#     loss, pred_score = compute_loss(pred, true)
+
+#     mrr, recall_at = fast_batch_mrr_and_recall(eval_batch.edge_label_index,
+#                                                eval_batch.edge_label,
+#                                                pred_score,
+#                                                num_neg_per_node,
+#                                                eval_batch.num_nodes,
+#                                                method)
+
+#     # return mrr, 0, 0, 0
+#     #
+#     # mrr_old, recall_at_old = compute_src_mrr_and_recall(
+#     #     eval_batch.edge_label_index,
+#     #     eval_batch.edge_label,
+#     #     pred_score,
+#     #     recall_k_lst=[1, 3, 10],
+#     #     mrr_top_k=1)
+#     #
+#     # print(f'Old MRR: {mrr_old: 0.6f}, new MRR {mrr: 0.6f}')
+#     # print(
+#     #     f'Old Recall@1: {recall_at_old[1]: 0.6f}, new Recall@1 {recall_at[1]: 0.6f}')
+#     # print(
+#     #     f'Old Recall@3: {recall_at_old[3]: 0.6f}, new Recall@3 {recall_at[3]: 0.6f}')
+#     # print(
+#     #     f'Old Recall@10: {recall_at_old[10]: 0.6f}, new Recall@10 {recall_at[10]: 0.6f}')
+
+#     return mrr, recall_at[1], recall_at[3], recall_at[10]
 
 
 def get_row_MRR(probs, true_classes):
@@ -382,7 +369,8 @@ def get_row_MRR(probs, true_classes):
 
 
 @torch.no_grad()
-def report_baseline_MRR(eval_batch, model):
+def report_baseline_MRR(eval_batch: deepsnap.graph.Graph,
+                        model: torch.nn.Module) -> float:
     # Get positive edge indices.
     edge_index = eval_batch.edge_label_index[:, eval_batch.edge_label == 1]
     edge_index = edge_index.to('cpu')
@@ -442,3 +430,71 @@ def report_baseline_MRR(eval_batch, model):
 
     avg_MRR = torch.tensor(row_MRRs).mean()
     return float(avg_MRR)
+
+
+def compute_MRR(eval_batch: deepsnap.graph.Graph,
+                model: torch.nn.Module,
+                num_neg_per_node: int,
+                method: str) -> float:
+    """Computes the MRR score on the evaluation batch.
+
+    Args:
+        eval_batch (deepsnap.graph.Graph): a graph snapshot.
+        model (torch.nn.Module): a GNN model for this graph snapshot
+        num_neg_per_node (int): how many negative edges per node required for
+            computing the MRR score.
+            For example, if num_neg_per_node = 1000, this method firstly
+            sample 1,000 negative edges for each source node, and compute the
+            average rank of positive edges from each source node among these
+            1,000 sampled negative edges.
+            Setting num_neg_per_node = -1 to use all possible negative edges.
+        method (str): {'min', 'mean', 'max', 'all'}
+            All methods firstly compute MRR for each source node, and then
+            average MRRs over all source nodes.
+            For each source node v,
+            let P denote scores of all positive edges from v, the rank()
+            operator computes the rank among all negative edges from v.
+            'min' computes 1/rank(min(P))
+            'mean' computes 1/rank(mean(P))
+            'max' computes 1/rank(max(P))
+            'all' computes mean[1/rank(x) for x in P]
+    """
+    if method == 'all':
+        # NOTE: this method requires iterating over all nodes, which is slow.
+        assert num_neg_per_node == -1
+        return report_baseline_MRR(eval_batch, model)
+    else:
+        assert num_neg_per_node > 0
+        # Sample negative edges for each node.
+        edge_index = eval_batch.edge_label_index[:, eval_batch.edge_label == 1]
+        edge_index = edge_index.to('cpu')
+
+        neg_edge_index = gen_negative_edges(edge_index, num_neg_per_node,
+                                            num_nodes=eval_batch.num_nodes)
+
+        new_edge_label_index = torch.cat((edge_index, neg_edge_index),
+                                         dim=1).long()
+        new_edge_label = torch.cat((torch.ones(edge_index.shape[1]),
+                                    torch.zeros(neg_edge_index.shape[1])
+                                    ), dim=0).long()
+
+        # Construct evaluation samples.
+        eval_batch.edge_label_index = new_edge_label_index
+        eval_batch.edge_label = new_edge_label
+
+        eval_batch.to(torch.device(cfg.device))
+        # move state to gpu
+        for layer in range(len(eval_batch.node_states)):
+            if torch.is_tensor(eval_batch.node_states[layer]):
+                eval_batch.node_states[layer] = eval_batch.node_states[layer].to(
+                    torch.device(cfg.device))
+        pred, true = model(eval_batch)
+        loss, pred_score = compute_loss(pred, true)
+
+        mrr = fast_batch_mrr(eval_batch.edge_label_index,
+                             eval_batch.edge_label,
+                             pred_score,
+                             num_neg_per_node,
+                             eval_batch.num_nodes,
+                             method)
+        return mrr

From 2c9ba8c406b58490d593527b4eed26a23d0dff79 Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Sun, 6 Jun 2021 03:42:00 -0700
Subject: [PATCH 20/66] add file

---
 graphgym/contrib/train/train_live_update.py | 324 ++++++++++++++++++++
 1 file changed, 324 insertions(+)
 create mode 100644 graphgym/contrib/train/train_live_update.py

diff --git a/graphgym/contrib/train/train_live_update.py b/graphgym/contrib/train/train_live_update.py
new file mode 100644
index 00000000..75d0dad6
--- /dev/null
+++ b/graphgym/contrib/train/train_live_update.py
@@ -0,0 +1,324 @@
+"""
+The ROLAND training pipeline with live-update.
+"""
+import copy
+import datetime
+import logging
+import os
+from typing import Dict, List, Optional, Tuple
+
+import deepsnap
+import numpy as np
+import torch
+from graphgym.checkpoint import clean_ckpt
+from graphgym.config import cfg
+from graphgym.contrib.train import train_utils
+from graphgym.loss import compute_loss
+from graphgym.optimizer import create_optimizer, create_scheduler
+from graphgym.register import register_train
+from graphgym.utils.io import makedirs_rm_exist
+from graphgym.utils.stats import node_degree
+from torch.utils.tensorboard import SummaryWriter
+from tqdm import tqdm
+
+
+@torch.no_grad()
+def average_state_dict(dict1: dict, dict2: dict, weight: float) -> dict:
+    """
+    Average two model.state_dict() objects,
+    ut = (1-w)*dict1 + w*dict2
+    when dict1, dict2 are model_dicts, this method updates the meta-model.
+    """
+    assert 0 <= weight <= 1
+    d1 = copy.deepcopy(dict1)
+    d2 = copy.deepcopy(dict2)
+    out = dict()
+    for key in d1.keys():
+        assert isinstance(d1[key], torch.Tensor)
+        param1 = d1[key].detach().clone()
+        assert isinstance(d2[key], torch.Tensor)
+        param2 = d2[key].detach().clone()
+        out[key] = (1 - weight) * param1 + weight * param2
+    return out
+
+
+def precompute_edge_degree_info(dataset: deepsnap.dataset.GraphDataset):
+    """Pre-computes edge_degree_existing, edge_degree_new and keep ratio
+    at each snapshot. Inplace modifications.
+    """
+    # Assume all graph snapshots have the same number of nodes.
+    num_nodes = dataset[0].node_feature.shape[0]
+    for t in range(len(dataset)):
+        if t == 0:
+            # No previous edges for any nodes.
+            dataset[t].node_degree_existing = torch.zeros(num_nodes)
+        else:
+            dataset[t].node_degree_existing \
+                = dataset[t - 1].node_degree_existing \
+                + dataset[t - 1].node_degree_new
+
+        dataset[t].node_degree_new = node_degree(dataset[t].edge_index,
+                                                 n=num_nodes)
+
+        dataset[t].keep_ratio = train_utils.get_keep_ratio(
+            existing=dataset[t].node_degree_existing,
+            new=dataset[t].node_degree_new,
+            mode=cfg.transaction.keep_ratio)
+        dataset[t].keep_ratio = dataset[t].keep_ratio.unsqueeze(-1)
+
+
+@torch.no_grad()
+def get_task_batch(dataset: deepsnap.dataset.GraphDataset,
+                   today: int, tomorrow: int,
+                   prev_node_states: Optional[Dict[str, List[torch.Tensor]]]
+                   ) -> deepsnap.graph.Graph:
+    """
+    Construct batch required for the task (today, tomorrow). As defined in
+    batch's get_item method (used to get edge_label and get_label_index),
+    edge_label and edge_label_index returned would be different everytime
+    get_task_batch() is called.
+
+    Moreover, copy node-memories (node_states and node_cells) to the batch.
+    """
+    assert today < tomorrow < len(dataset)
+    # Get edges for message passing and prediction task.
+    batch = dataset[today].clone()
+    batch.edge_label = dataset[tomorrow].edge_label.clone()
+    batch.edge_label_index = dataset[tomorrow].edge_label_index.clone()
+
+    # Copy previous memory to the batch.
+    if prev_node_states is not None:
+        for key, val in prev_node_states.items():
+            copied = [x.detach().clone() for x in val]
+            setattr(batch, key, copied)
+
+    batch = train_utils.move_batch_to_device(batch, cfg.device)
+    return batch
+
+
+@torch.no_grad()
+def update_node_states(model, dataset, task: Tuple[int, int],
+                       prev_node_states: Optional[
+                           Dict[str, List[torch.Tensor]]]
+                       ) -> Dict[str, List[torch.Tensor]]:
+    """Perform the provided task and keep track of the latest node_states.
+
+    Example: task = (t, t+1),
+        the prev_node_states contains node embeddings at time (t-1).
+        the model perform task (t, t+1):
+            Input: (node embedding at t - 1, edges at t).
+            Output: possible transactions at t+1.
+        the model also generates node embeddings at t.
+
+    after doing task (t, t+1), node_states contains information
+    from snapshot t.
+    """
+    today, tomorrow = task
+    batch = get_task_batch(dataset, today, tomorrow, prev_node_states).clone()
+    # Let the model modify batch.node_states (and batch.node_cells).
+    _, _ = model(batch)
+    # Collect the updated node states.
+    out = dict()
+    out['node_states'] = [x.detach().clone() for x in batch.node_states]
+    if isinstance(batch.node_cells[0], torch.Tensor):
+        out['node_cells'] = [x.detach().clone() for x in batch.node_cells]
+
+    return out
+
+
+def train_step(model, optimizer, scheduler, dataset,
+               task: Tuple[int, int],
+               prev_node_states: Optional[Dict[str, torch.Tensor]]
+               ) -> dict:
+    """
+    After receiving ground truth from a particular task, update the model by
+    performing back-propagation.
+    For example, on day t, the ground truth of task (t-1, t) has been revealed,
+    train the model using G[t-1] for message passing and label[t] as target.
+    """
+    optimizer.zero_grad()
+    torch.cuda.empty_cache()
+
+    today, tomorrow = task
+    model.train()
+    batch = get_task_batch(dataset, today, tomorrow, prev_node_states).clone()
+
+    pred, true = model(batch)
+    loss, pred_score = compute_loss(pred, true)
+    loss.backward()
+    optimizer.step()
+
+    scheduler.step()
+    return {'loss': loss}
+
+
+@torch.no_grad()
+def evaluate_step(model, dataset, task: Tuple[int, int],
+                  prev_node_states: Optional[Dict[str, List[torch.Tensor]]],
+                  fast: bool = False) -> dict:
+    """
+    Evaluate model's performance on task = (today, tomorrow)
+        where today and tomorrow are integers indexing snapshots.
+    """
+    today, tomorrow = task
+    model.eval()
+    batch = get_task_batch(dataset, today, tomorrow, prev_node_states).clone()
+
+    pred, true = model(batch)
+    loss, pred_score = compute_loss(pred, true)
+
+    if fast:
+        # skip MRR calculation for internal validation.
+        return {'loss': loss.item()}
+
+    mrr_batch = get_task_batch(dataset, today, tomorrow,
+                               prev_node_states).clone()
+
+    mrr, rck1, rck3, rck10 = train_utils.report_rank_based_eval(
+        mrr_batch, model,
+        num_neg_per_node=cfg.metric.mrr_num_negative_edges)
+
+    return {'loss': loss.item(), 'mrr': mrr, 'rck1': rck1, 'rck3': rck3,
+            'rck10': rck10}
+
+
+def train_live_update(loggers, loaders, model, optimizer, scheduler, datasets,
+                      **kwargs):
+
+    for dataset in datasets:
+        # Sometimes edge degree info is already included in dataset.
+        if not hasattr(dataset[0], 'keep_ratio'):
+            precompute_edge_degree_info(dataset)
+
+    if cfg.dataset.premade_datasets == 'fresh_save_cache':
+        if not os.path.exists(f'{cfg.dataset.dir}/cache/'):
+            os.mkdir(f'{cfg.dataset.dir}/cache/')
+        cache_path = '{}/cache/cached_datasets_{}_{}_{}.pt'.format(
+            cfg.dataset.dir, cfg.dataset.format.replace('.tsv', ''),
+            cfg.transaction.snapshot_freq,
+            datetime.now().strftime('%Y_%m_%d__%H_%M_%S')
+        )
+        torch.save(datasets, cache_path)
+
+    num_splits = len(loggers)  # train/val/test splits.
+    # range for today in (today, tomorrow) task pairs.
+    task_range = range(len(datasets[0]) - cfg.transaction.horizon)
+
+    t = datetime.datetime.now().strftime('%b%d_%H-%M-%S')
+
+    # directory to store tensorboard files of this run.
+    out_dir = cfg.out_dir.replace('/', '\\')
+    # dir to store all run outputs for the entire batch.
+    run_dir = 'runs_' + cfg.remark
+
+    print(f'Tensorboard directory: {out_dir}')
+    # If tensorboard directory exists, this config is in the re-run phase
+    # of run_batch, replace logs of previous runs with the new one.
+    makedirs_rm_exist(f'./{run_dir}/{out_dir}')
+    writer = SummaryWriter(f'./{run_dir}/{out_dir}')
+
+    # save a copy of configuration for later identifications.
+    with open(f'./{run_dir}/{out_dir}/config.yaml', 'w') as f:
+        cfg.dump(stream=f)
+
+    prev_node_states = None  # no previous state on day 0.
+    # {'node_states': [Tensor, Tensor], 'node_cells: [Tensor, Tensor]}
+
+    model_init = None  # for meta-learning only, a model.state_dict() object.
+
+    for t in tqdm(task_range, desc='snapshot', leave=True):
+        # current task: t --> t+1.
+        # (1) Evaluate model's performance on this task, at this time, the
+        # model has seen no information on t+1, this evaluation is fair.
+        for i in range(1, num_splits):
+            perf = evaluate_step(model, datasets[i], (t, t + 1),
+                                 prev_node_states)
+
+            writer.add_scalars('val' if i == 1 else 'test', perf, t)
+
+        # (2) Reveal the ground truth of task (t, t+1) and update the model
+        # to prepare for the next task.
+        del optimizer, scheduler  # use new optimizers.
+        optimizer = create_optimizer(model.parameters())
+        scheduler = create_scheduler(optimizer)
+
+        # best model's validation loss, training epochs, and state_dict.
+        best_model = {'val_loss': np.inf, 'train_epoch': 0, 'state': None}
+        # keep track of how long we have NOT update the best model.
+        best_model_unchanged = 0
+        # after not updating the best model for `tol` epochs, stop.
+        tol = cfg.train.internal_validation_tolerance
+
+        # internal training loop (intra-snapshot cross-validation).
+        # choose the best model using current validation set, prepare for
+        # next task.
+
+        if cfg.meta.is_meta and (model_init is not None):
+            # For meta-learning, start fine-tuning from the pre-computed
+            # initialization weight.
+            model.load_state_dict(copy.deepcopy(model_init))
+
+        for i in tqdm(range(cfg.optim.max_epoch + 1), desc='live update',
+                      leave=True):
+            # Start with the un-trained model (i = 0), evaluate the model.
+            internal_val_perf = evaluate_step(model, datasets[1],
+                                              (t, t + 1),
+                                              prev_node_states, fast=True)
+            val_loss = internal_val_perf['loss']
+
+            if val_loss < best_model['val_loss']:
+                # replace the best model with the current model.
+                best_model = {'val_loss': val_loss, 'train_epoch': i,
+                              'state': copy.deepcopy(model.state_dict())}
+                best_model_unchanged = 0
+            else:
+                # the current best model has dominated for these epochs.
+                best_model_unchanged += 1
+
+            # if (i >= 2 * tol) and (best_model_unchanged >= tol):
+            if best_model_unchanged >= tol:
+                # If the best model has not been updated for a while, stop.
+                break
+            else:
+                # Otherwise, keep training.
+                train_perf = train_step(model, optimizer, scheduler,
+                                        datasets[0], (t, t + 1),
+                                        prev_node_states)
+                writer.add_scalars('train', train_perf, t)
+
+        writer.add_scalar('internal_best_val', best_model['val_loss'], t)
+        writer.add_scalar('best epoch', best_model['train_epoch'], t)
+
+        # (3) Actually perform the update on training set to get node_states
+        # contains information up to time t.
+        # Use the best model selected from intra-snapshot cross-validation.
+        model.load_state_dict(best_model['state'])
+
+        if cfg.meta.is_meta:  # update meta-learning's initialization weights.
+            if model_init is None:  # for the first task.
+                model_init = copy.deepcopy(best_model['state'])
+            else:  # for subsequent task, update init.
+                if cfg.meta.method == 'moving_average':
+                    new_weight = cfg.meta.alpha
+                elif cfg.meta.method == 'online_mean':
+                    new_weight = 1 / (t + 1)  # for t=1, the second item, 1/2.
+                else:
+                    raise ValueError(f'Invalid method: {cfg.meta.method}')
+
+                # (1-new_weight)*model_init + new_weight*best_model.
+                model_init = average_state_dict(model_init,
+                                                best_model['state'],
+                                                new_weight)
+
+        prev_node_states = update_node_states(model, datasets[0], (t, t + 1),
+                                              prev_node_states)
+
+    writer.close()
+
+    if cfg.train.ckpt_clean:
+        clean_ckpt()
+
+    logging.info('Task done, results saved in {}'.format(cfg.out_dir))
+
+
+register_train('live_update', train_live_update)

From 0cd303c7ca7db64c9b40fb6c082c9c80d6268d0a Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Sun, 6 Jun 2021 16:22:37 -0700
Subject: [PATCH 21/66] update

---
 graphgym/contrib/train/train_live_update.py | 88 +++++++++++----------
 1 file changed, 46 insertions(+), 42 deletions(-)

diff --git a/graphgym/contrib/train/train_live_update.py b/graphgym/contrib/train/train_live_update.py
index 75d0dad6..d54d5ece 100644
--- a/graphgym/contrib/train/train_live_update.py
+++ b/graphgym/contrib/train/train_live_update.py
@@ -4,7 +4,6 @@
 import copy
 import datetime
 import logging
-import os
 from typing import Dict, List, Optional, Tuple
 
 import deepsnap
@@ -42,6 +41,7 @@ def average_state_dict(dict1: dict, dict2: dict, weight: float) -> dict:
     return out
 
 
+@torch.no_grad()
 def precompute_edge_degree_info(dataset: deepsnap.dataset.GraphDataset):
     """Pre-computes edge_degree_existing, edge_degree_new and keep ratio
     at each snapshot. Inplace modifications.
@@ -53,6 +53,7 @@ def precompute_edge_degree_info(dataset: deepsnap.dataset.GraphDataset):
             # No previous edges for any nodes.
             dataset[t].node_degree_existing = torch.zeros(num_nodes)
         else:
+            # degree[<t] = degree[<t-1] + degree[=t-1].
             dataset[t].node_degree_existing \
                 = dataset[t - 1].node_degree_existing \
                 + dataset[t - 1].node_degree_new
@@ -73,12 +74,15 @@ def get_task_batch(dataset: deepsnap.dataset.GraphDataset,
                    prev_node_states: Optional[Dict[str, List[torch.Tensor]]]
                    ) -> deepsnap.graph.Graph:
     """
-    Construct batch required for the task (today, tomorrow). As defined in
-    batch's get_item method (used to get edge_label and get_label_index),
-    edge_label and edge_label_index returned would be different everytime
-    get_task_batch() is called.
+    Construct batch required for the task (today, tomorrow).
+    For current implementation, we use tomorrow = today + 1.
+    As defined in batch's get_item method (used to get edge_label and
+    get_label_index), edge_label and edge_label_index returned would be
+    different everytime get_task_batch() is called.
 
     Moreover, copy node-memories (node_states and node_cells) to the batch.
+    
+    Lastly, this method moves the created task batch to the appropriate device.
     """
     assert today < tomorrow < len(dataset)
     # Get edges for message passing and prediction task.
@@ -116,10 +120,12 @@ def update_node_states(model, dataset, task: Tuple[int, int],
     today, tomorrow = task
     batch = get_task_batch(dataset, today, tomorrow, prev_node_states).clone()
     # Let the model modify batch.node_states (and batch.node_cells).
-    _, _ = model(batch)
+    # This operation does not track gradient, so should not affect back-prop.
+    _, _ = model(batch)  # Inplace modification on batch.
     # Collect the updated node states.
     out = dict()
     out['node_states'] = [x.detach().clone() for x in batch.node_states]
+    # If node cells are also used.
     if isinstance(batch.node_cells[0], torch.Tensor):
         out['node_cells'] = [x.detach().clone() for x in batch.node_cells]
 
@@ -129,7 +135,7 @@ def update_node_states(model, dataset, task: Tuple[int, int],
 def train_step(model, optimizer, scheduler, dataset,
                task: Tuple[int, int],
                prev_node_states: Optional[Dict[str, torch.Tensor]]
-               ) -> dict:
+               ) -> Dict[str, float]:
     """
     After receiving ground truth from a particular task, update the model by
     performing back-propagation.
@@ -149,13 +155,13 @@ def train_step(model, optimizer, scheduler, dataset,
     optimizer.step()
 
     scheduler.step()
-    return {'loss': loss}
+    return {'loss': loss.item()}
 
 
 @torch.no_grad()
 def evaluate_step(model, dataset, task: Tuple[int, int],
                   prev_node_states: Optional[Dict[str, List[torch.Tensor]]],
-                  fast: bool = False) -> dict:
+                  fast: bool=False) -> Dict[str, float]:
     """
     Evaluate model's performance on task = (today, tomorrow)
         where today and tomorrow are integers indexing snapshots.
@@ -174,12 +180,13 @@ def evaluate_step(model, dataset, task: Tuple[int, int],
     mrr_batch = get_task_batch(dataset, today, tomorrow,
                                prev_node_states).clone()
 
-    mrr, rck1, rck3, rck10 = train_utils.report_rank_based_eval(
-        mrr_batch, model,
-        num_neg_per_node=cfg.metric.mrr_num_negative_edges)
+    mrr = train_utils.compute_MRR(
+        mrr_batch,
+        model,
+        num_neg_per_node=cfg.metric.mrr_num_negative_edges,
+        method=cfg.metric.mrr_method)
 
-    return {'loss': loss.item(), 'mrr': mrr, 'rck1': rck1, 'rck3': rck3,
-            'rck10': rck10}
+    return {'loss': loss.item(), 'mrr': mrr}
 
 
 def train_live_update(loggers, loaders, model, optimizer, scheduler, datasets,
@@ -190,15 +197,15 @@ def train_live_update(loggers, loaders, model, optimizer, scheduler, datasets,
         if not hasattr(dataset[0], 'keep_ratio'):
             precompute_edge_degree_info(dataset)
 
-    if cfg.dataset.premade_datasets == 'fresh_save_cache':
-        if not os.path.exists(f'{cfg.dataset.dir}/cache/'):
-            os.mkdir(f'{cfg.dataset.dir}/cache/')
-        cache_path = '{}/cache/cached_datasets_{}_{}_{}.pt'.format(
-            cfg.dataset.dir, cfg.dataset.format.replace('.tsv', ''),
-            cfg.transaction.snapshot_freq,
-            datetime.now().strftime('%Y_%m_%d__%H_%M_%S')
-        )
-        torch.save(datasets, cache_path)
+    # if cfg.dataset.premade_datasets == 'fresh_save_cache':
+    #     if not os.path.exists(f'{cfg.dataset.dir}/cache/'):
+    #         os.mkdir(f'{cfg.dataset.dir}/cache/')
+    #     cache_path = '{}/cache/cached_datasets_{}_{}_{}.pt'.format(
+    #         cfg.dataset.dir, cfg.dataset.format.replace('.tsv', ''),
+    #         cfg.transaction.snapshot_freq,
+    #         datetime.now().strftime('%Y_%m_%d__%H_%M_%S')
+    #     )
+    #     torch.save(datasets, cache_path)
 
     num_splits = len(loggers)  # train/val/test splits.
     # range for today in (today, tomorrow) task pairs.
@@ -224,13 +231,17 @@ def train_live_update(loggers, loaders, model, optimizer, scheduler, datasets,
     prev_node_states = None  # no previous state on day 0.
     # {'node_states': [Tensor, Tensor], 'node_cells: [Tensor, Tensor]}
 
-    model_init = None  # for meta-learning only, a model.state_dict() object.
+    model_meta = None  # the state_dict() object of the meta-model.
+
+    # TODO: How to incorporate logger?
 
-    for t in tqdm(task_range, desc='snapshot', leave=True):
+    for t in tqdm(task_range, desc='Snapshot'):
         # current task: t --> t+1.
         # (1) Evaluate model's performance on this task, at this time, the
         # model has seen no information on t+1, this evaluation is fair.
+        # TODO: modify here to predict on all edges?
         for i in range(1, num_splits):
+            # Validation and test edges.
             perf = evaluate_step(model, datasets[i], (t, t + 1),
                                  prev_node_states)
 
@@ -253,13 +264,13 @@ def train_live_update(loggers, loaders, model, optimizer, scheduler, datasets,
         # choose the best model using current validation set, prepare for
         # next task.
 
-        if cfg.meta.is_meta and (model_init is not None):
-            # For meta-learning, start fine-tuning from the pre-computed
-            # initialization weight.
-            model.load_state_dict(copy.deepcopy(model_init))
+        if cfg.meta.is_meta and (model_meta is not None):
+            # For meta-learning, start fine-tuning from the meta-model.
+            model.load_state_dict(copy.deepcopy(model_meta))
 
+        # Internal training loop.
         for i in tqdm(range(cfg.optim.max_epoch + 1), desc='live update',
-                      leave=True):
+                      leave=False):
             # Start with the un-trained model (i = 0), evaluate the model.
             internal_val_perf = evaluate_step(model, datasets[1],
                                               (t, t + 1),
@@ -295,20 +306,13 @@ def train_live_update(loggers, loaders, model, optimizer, scheduler, datasets,
         model.load_state_dict(best_model['state'])
 
         if cfg.meta.is_meta:  # update meta-learning's initialization weights.
-            if model_init is None:  # for the first task.
-                model_init = copy.deepcopy(best_model['state'])
+            if model_meta is None:  # for the first task.
+                model_meta = copy.deepcopy(best_model['state'])
             else:  # for subsequent task, update init.
-                if cfg.meta.method == 'moving_average':
-                    new_weight = cfg.meta.alpha
-                elif cfg.meta.method == 'online_mean':
-                    new_weight = 1 / (t + 1)  # for t=1, the second item, 1/2.
-                else:
-                    raise ValueError(f'Invalid method: {cfg.meta.method}')
-
-                # (1-new_weight)*model_init + new_weight*best_model.
-                model_init = average_state_dict(model_init,
+                # (1-alpha)*model_meta + alpha*best_model.
+                model_meta = average_state_dict(model_meta,
                                                 best_model['state'],
-                                                new_weight)
+                                                cfg.meta.alpha)
 
         prev_node_states = update_node_states(model, datasets[0], (t, t + 1),
                                               prev_node_states)

From bc1b0bc4adf0c92b0284e2ca3e2a1031073683d3 Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Sun, 6 Jun 2021 16:28:42 -0700
Subject: [PATCH 22/66] add roland feature encoder.

---
 graphgym/contrib/feature_encoder/roland.py | 115 +++++++++++++++++++++
 1 file changed, 115 insertions(+)
 create mode 100644 graphgym/contrib/feature_encoder/roland.py

diff --git a/graphgym/contrib/feature_encoder/roland.py b/graphgym/contrib/feature_encoder/roland.py
new file mode 100644
index 00000000..6090843e
--- /dev/null
+++ b/graphgym/contrib/feature_encoder/roland.py
@@ -0,0 +1,115 @@
+import deepsnap
+import torch
+import torch.nn as nn
+from graphgym.config import cfg
+from graphgym.register import register_edge_encoder, register_node_encoder
+
+
+class TransactionEdgeEncoder(torch.nn.Module):
+    r"""A module that encodes edge features in the transaction graph.
+
+    Example:
+        TransactionEdgeEncoder(
+          (embedding_list): ModuleList(
+            (0): Embedding(50, 32)  # The first integral edge feature has 50 unique values.
+                # convert this integral feature to 32 dimensional embedding.
+            (1): Embedding(8, 32)
+            (2): Embedding(252, 32)
+            (3): Embedding(252, 32)
+          )
+          (linear_amount): Linear(in_features=1, out_features=64, bias=True)
+          (linear_time): Linear(in_features=1, out_features=64, bias=True)
+        )
+
+        Initial edge feature dimension = 6
+        Final edge embedding dimension = 32 + 32 + 32 + 32 + 64 + 64 = 256
+    """
+
+    def __init__(self, emb_dim: int):
+        # emb_dim is not used here.
+        super(TransactionEdgeEncoder, self).__init__()
+
+        self.embedding_list = torch.nn.ModuleList()
+        # Note: feature_edge_int_num[i] = len(torch.unique(graph.edge_feature[:, i]))
+        # where i-th edge features are integral.
+        for num in cfg.transaction.feature_edge_int_num:
+            emb = torch.nn.Embedding(num, cfg.transaction.feature_int_dim)
+            torch.nn.init.xavier_uniform_(emb.weight.data)
+            self.embedding_list.append(emb)
+
+        # Embed non-integral features.
+        self.linear_amount = nn.Linear(1, cfg.transaction.feature_amount_dim)
+        self.linear_time = nn.Linear(1, cfg.transaction.feature_time_dim)
+        # update edge_dim
+        cfg.dataset.edge_dim = len(cfg.transaction.feature_edge_int_num) \
+            * cfg.transaction.feature_int_dim \
+            + cfg.transaction.feature_amount_dim \
+            + cfg.transaction.feature_time_dim
+
+    def forward(self, batch: deepsnap.batch.Batch) -> deepsnap.batch.Batch:
+        edge_embedding = []
+        for i in range(len(self.embedding_list)):
+            edge_embedding.append(
+                self.embedding_list[i](batch.edge_feature[:, i].long())
+            )
+        # By default, edge_feature[:, -2] contains edge amount,
+        # edge_feature[:, -1] contains edge time.
+        edge_embedding.append(
+            self.linear_amount(batch.edge_feature[:, -2].view(-1, 1))
+        )
+        edge_embedding.append(
+            self.linear_time(batch.edge_feature[:, -1].view(-1, 1))
+        )
+        batch.edge_feature = torch.cat(edge_embedding, dim=1)
+        return batch
+
+
+register_edge_encoder('roland', TransactionEdgeEncoder)
+
+
+class TransactionNodeEncoder(torch.nn.Module):
+    r"""A module that encodes node features in the transaction graph.
+
+    Parameters:
+        num_classes - the number of classes for the embedding mapping to learn
+
+    Example:
+        3 unique values for the first integral node feature.
+        3 unique values for the second integral node feature.
+
+        cfg.transaction.feature_node_int_num = [3, 3]
+        cfg.transaction.feature_int_dim = 32
+
+        TransactionNodeEncoder(
+          (embedding_list): ModuleList(
+            (0): Embedding(3, 32)  # embed the first node feature to 32-dimensional space.
+            (1): Embedding(3, 32)  # embed the second node feature to 32-dimensional space.
+          )
+        )
+
+        Initial node feature dimension = 2
+        Final node embedding dimension = 32 + 32 = 256
+    """
+
+    def __init__(self, emb_dim: int, num_classes=None):
+        super(TransactionNodeEncoder, self).__init__()
+        self.embedding_list = torch.nn.ModuleList()
+        for i, num in enumerate(cfg.transaction.feature_node_int_num):
+            emb = torch.nn.Embedding(num, cfg.transaction.feature_int_dim)
+            torch.nn.init.xavier_uniform_(emb.weight.data)
+            self.embedding_list.append(emb)
+        # update encoder_dim
+        cfg.dataset.encoder_dim = len(cfg.transaction.feature_node_int_num) \
+            * cfg.transaction.feature_int_dim
+
+    def forward(self, batch: deepsnap.batch.Batch) -> deepsnap.batch.Batch:
+        node_embedding = []
+        for i in range(len(self.embedding_list)):
+            node_embedding.append(
+                self.embedding_list[i](batch.node_feature[:, i].long())
+            )
+        batch.node_feature = torch.cat(node_embedding, dim=1)
+        return batch
+
+
+register_node_encoder('roland', TransactionNodeEncoder)

From 57984c1500a5d56356f9cf518d55731bd60db1bd Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Sun, 6 Jun 2021 16:46:23 -0700
Subject: [PATCH 23/66] update config

---
 graphgym/contrib/config/roland.py | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/graphgym/contrib/config/roland.py b/graphgym/contrib/config/roland.py
index 62d5b3c3..69bc7d27 100644
--- a/graphgym/contrib/config/roland.py
+++ b/graphgym/contrib/config/roland.py
@@ -30,12 +30,6 @@ def set_cfg_roland(cfg):
     # Default to False.
     cfg.meta.is_meta = False
 
-    # choose between 'moving_average' and 'online_mean'
-    cfg.meta.method = 'moving_average'  # TODO: remove, only use moving_average.
-    # For online mean:
-    # new_mean = (n-1)/n * old_mean + 1/n * new_value.
-    # where *_mean corresponds to W_init.
-
     # Weight used in moving average for model parameters.
     # After fine-tuning the model in period t and get model M[t],
     # Set W_init = (1-alpha) * W_init + alpha * M[t].
@@ -55,9 +49,6 @@ def set_cfg_roland(cfg):
     # Set to -1 if using all snapshots.
     # cfg.experimental.restrict_training_set = -1
 
-    # Whether to visualize edge attention of GNN layer after training.
-    cfg.experimental.visualize_gnn_layer = False
-
     cfg.train.tbptt_freq = 5
 
     cfg.train.internal_validation_tolerance = 5

From 4efdd0cf4fb330198eee203506e2d94c92607333 Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Sun, 6 Jun 2021 16:46:39 -0700
Subject: [PATCH 24/66] Add general LinearEdgeEncoder.

---
 graphgym/contrib/feature_encoder/roland.py | 31 ++++++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/graphgym/contrib/feature_encoder/roland.py b/graphgym/contrib/feature_encoder/roland.py
index 6090843e..5a3b4898 100644
--- a/graphgym/contrib/feature_encoder/roland.py
+++ b/graphgym/contrib/feature_encoder/roland.py
@@ -6,7 +6,7 @@
 
 
 class TransactionEdgeEncoder(torch.nn.Module):
-    r"""A module that encodes edge features in the transaction graph.
+    """A module that encodes edge features in the transaction graph.
 
     Example:
         TransactionEdgeEncoder(
@@ -68,7 +68,7 @@ def forward(self, batch: deepsnap.batch.Batch) -> deepsnap.batch.Batch:
 
 
 class TransactionNodeEncoder(torch.nn.Module):
-    r"""A module that encodes node features in the transaction graph.
+    """A module that encodes node features in the transaction graph.
 
     Parameters:
         num_classes - the number of classes for the embedding mapping to learn
@@ -113,3 +113,30 @@ def forward(self, batch: deepsnap.batch.Batch) -> deepsnap.batch.Batch:
 
 
 register_node_encoder('roland', TransactionNodeEncoder)
+
+
+class LinearEdgeEncoder(torch.nn.Module):
+    """
+    Basic edge encoder for temporal graphs, this encoder does not assume edge dim,
+    this encoder uses linear layers to contract/expand raw edge features to
+    dimension cfg.transaction.feature_amount_dim + feature_time_dim for consistency.
+    """
+    def __init__(self, emb_dim: int):
+        # emb_dim is not used here.
+        super(LinearEdgeEncoder, self).__init__()
+        # For consistency, for non-transaction datasets with only timestamp,
+        # we use the feature amount dimension + time dimension to generate
+        # the same dimension as transaction datasets.
+        # TODO: change to feature_time_dim only for better naming?
+        expected_dim = cfg.transaction.feature_amount_dim \
+            + cfg.transaction.feature_time_dim
+        
+        self.linear = nn.Linear(cfg.dataset.edge_dim, expected_dim)
+        cfg.dataset.edge_dim = expected_dim
+
+    def forward(self, batch: deepsnap.batch.Batch) -> deepsnap.batch.Batch:
+        batch.edge_feature = self.linear(batch.edge_feature)
+        return batch
+
+
+register_edge_encoder('roland_general', LinearEdgeEncoder)

From bc8f2c73ef5fa0a00a466ffed85a61036b48fcfc Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Sun, 6 Jun 2021 17:00:05 -0700
Subject: [PATCH 25/66] move average_state_dict and precompute_edge_degree_info
 to utils.

---
 graphgym/contrib/train/train_live_update.py | 59 +++------------------
 graphgym/contrib/train/train_utils.py       | 54 ++++++++++++++++++-
 2 files changed, 58 insertions(+), 55 deletions(-)

diff --git a/graphgym/contrib/train/train_live_update.py b/graphgym/contrib/train/train_live_update.py
index d54d5ece..60555125 100644
--- a/graphgym/contrib/train/train_live_update.py
+++ b/graphgym/contrib/train/train_live_update.py
@@ -21,53 +21,6 @@
 from tqdm import tqdm
 
 
-@torch.no_grad()
-def average_state_dict(dict1: dict, dict2: dict, weight: float) -> dict:
-    """
-    Average two model.state_dict() objects,
-    ut = (1-w)*dict1 + w*dict2
-    when dict1, dict2 are model_dicts, this method updates the meta-model.
-    """
-    assert 0 <= weight <= 1
-    d1 = copy.deepcopy(dict1)
-    d2 = copy.deepcopy(dict2)
-    out = dict()
-    for key in d1.keys():
-        assert isinstance(d1[key], torch.Tensor)
-        param1 = d1[key].detach().clone()
-        assert isinstance(d2[key], torch.Tensor)
-        param2 = d2[key].detach().clone()
-        out[key] = (1 - weight) * param1 + weight * param2
-    return out
-
-
-@torch.no_grad()
-def precompute_edge_degree_info(dataset: deepsnap.dataset.GraphDataset):
-    """Pre-computes edge_degree_existing, edge_degree_new and keep ratio
-    at each snapshot. Inplace modifications.
-    """
-    # Assume all graph snapshots have the same number of nodes.
-    num_nodes = dataset[0].node_feature.shape[0]
-    for t in range(len(dataset)):
-        if t == 0:
-            # No previous edges for any nodes.
-            dataset[t].node_degree_existing = torch.zeros(num_nodes)
-        else:
-            # degree[<t] = degree[<t-1] + degree[=t-1].
-            dataset[t].node_degree_existing \
-                = dataset[t - 1].node_degree_existing \
-                + dataset[t - 1].node_degree_new
-
-        dataset[t].node_degree_new = node_degree(dataset[t].edge_index,
-                                                 n=num_nodes)
-
-        dataset[t].keep_ratio = train_utils.get_keep_ratio(
-            existing=dataset[t].node_degree_existing,
-            new=dataset[t].node_degree_new,
-            mode=cfg.transaction.keep_ratio)
-        dataset[t].keep_ratio = dataset[t].keep_ratio.unsqueeze(-1)
-
-
 @torch.no_grad()
 def get_task_batch(dataset: deepsnap.dataset.GraphDataset,
                    today: int, tomorrow: int,
@@ -81,7 +34,7 @@ def get_task_batch(dataset: deepsnap.dataset.GraphDataset,
     different everytime get_task_batch() is called.
 
     Moreover, copy node-memories (node_states and node_cells) to the batch.
-    
+
     Lastly, this method moves the created task batch to the appropriate device.
     """
     assert today < tomorrow < len(dataset)
@@ -161,7 +114,7 @@ def train_step(model, optimizer, scheduler, dataset,
 @torch.no_grad()
 def evaluate_step(model, dataset, task: Tuple[int, int],
                   prev_node_states: Optional[Dict[str, List[torch.Tensor]]],
-                  fast: bool=False) -> Dict[str, float]:
+                  fast: bool = False) -> Dict[str, float]:
     """
     Evaluate model's performance on task = (today, tomorrow)
         where today and tomorrow are integers indexing snapshots.
@@ -195,7 +148,7 @@ def train_live_update(loggers, loaders, model, optimizer, scheduler, datasets,
     for dataset in datasets:
         # Sometimes edge degree info is already included in dataset.
         if not hasattr(dataset[0], 'keep_ratio'):
-            precompute_edge_degree_info(dataset)
+            train_utils.precompute_edge_degree_info(dataset)
 
     # if cfg.dataset.premade_datasets == 'fresh_save_cache':
     #     if not os.path.exists(f'{cfg.dataset.dir}/cache/'):
@@ -310,9 +263,9 @@ def train_live_update(loggers, loaders, model, optimizer, scheduler, datasets,
                 model_meta = copy.deepcopy(best_model['state'])
             else:  # for subsequent task, update init.
                 # (1-alpha)*model_meta + alpha*best_model.
-                model_meta = average_state_dict(model_meta,
-                                                best_model['state'],
-                                                cfg.meta.alpha)
+                model_meta = train_utils.average_state_dict(model_meta,
+                                                            best_model['state'],
+                                                            cfg.meta.alpha)
 
         prev_node_states = update_node_states(model, datasets[0], (t, t + 1),
                                               prev_node_states)
diff --git a/graphgym/contrib/train/train_utils.py b/graphgym/contrib/train/train_utils.py
index dad12ff7..6caf7ffd 100644
--- a/graphgym/contrib/train/train_utils.py
+++ b/graphgym/contrib/train/train_utils.py
@@ -1,14 +1,37 @@
 """
 Metrics, other utility, and helper functions.
 """
+# TODO: proof-read this file.
+# TODO: remove comments.
+import copy
+
 import deepsnap
 import numpy as np
 import torch
 from graphgym.config import cfg
 from graphgym.loss import compute_loss
+from graphgym.utils.stats import node_degree
 from torch_scatter import scatter_max, scatter_mean, scatter_min
-# TODO: proof-read this file.
-# TODO: remove comments.
+
+
+@torch.no_grad()
+def average_state_dict(dict1: dict, dict2: dict, weight: float) -> dict:
+    """
+    Average two model.state_dict() objects,
+    ut = (1-w)*dict1 + w*dict2
+    when dict1, dict2 are model_dicts, this method updates the meta-model.
+    """
+    assert 0 <= weight <= 1
+    d1 = copy.deepcopy(dict1)
+    d2 = copy.deepcopy(dict2)
+    out = dict()
+    for key in d1.keys():
+        assert isinstance(d1[key], torch.Tensor)
+        param1 = d1[key].detach().clone()
+        assert isinstance(d2[key], torch.Tensor)
+        param2 = d2[key].detach().clone()
+        out[key] = (1 - weight) * param1 + weight * param2
+    return out
 
 
 def get_keep_ratio(existing: torch.Tensor,
@@ -52,6 +75,33 @@ def get_keep_ratio(existing: torch.Tensor,
     return ratio
 
 
+@torch.no_grad()
+def precompute_edge_degree_info(dataset: deepsnap.dataset.GraphDataset):
+    """Pre-computes edge_degree_existing, edge_degree_new and keep ratio
+    at each snapshot. Inplace modifications.
+    """
+    # Assume all graph snapshots have the same number of nodes.
+    num_nodes = dataset[0].node_feature.shape[0]
+    for t in range(len(dataset)):
+        if t == 0:
+            # No previous edges for any nodes.
+            dataset[t].node_degree_existing = torch.zeros(num_nodes)
+        else:
+            # degree[<t] = degree[<t-1] + degree[=t-1].
+            dataset[t].node_degree_existing \
+                = dataset[t - 1].node_degree_existing \
+                + dataset[t - 1].node_degree_new
+
+        dataset[t].node_degree_new = node_degree(dataset[t].edge_index,
+                                                 n=num_nodes)
+
+        dataset[t].keep_ratio = get_keep_ratio(
+            existing=dataset[t].node_degree_existing,
+            new=dataset[t].node_degree_new,
+            mode=cfg.transaction.keep_ratio)
+        dataset[t].keep_ratio = dataset[t].keep_ratio.unsqueeze(-1)
+
+
 def size_of(batch: deepsnap.graph.Graph) -> int:
     """Computes how much memory a batch has consumed."""
     total_byte = 0

From 7bba9d776df5901e6433d0748e76daacb4adf627 Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Sun, 6 Jun 2021 17:36:20 -0700
Subject: [PATCH 26/66] add training pipeline.

---
 .../contrib/train/train_live_update_bptt.py   | 269 ++++++++++++++++++
 1 file changed, 269 insertions(+)
 create mode 100644 graphgym/contrib/train/train_live_update_bptt.py

diff --git a/graphgym/contrib/train/train_live_update_bptt.py b/graphgym/contrib/train/train_live_update_bptt.py
new file mode 100644
index 00000000..e07b8d95
--- /dev/null
+++ b/graphgym/contrib/train/train_live_update_bptt.py
@@ -0,0 +1,269 @@
+"""
+The baseline training (non-incremental) training for live-update scheme.
+NOTE: this setup requires extensive GPU memory and could lead to OOM error.
+"""
+import copy
+import datetime
+import logging
+import os
+from typing import Dict, List, Optional, Tuple
+
+import deepsnap
+import numpy as np
+import torch
+from graphgym.checkpoint import clean_ckpt
+from graphgym.config import cfg
+from graphgym.contrib.train import train_utils
+from graphgym.loss import compute_loss
+from graphgym.optimizer import create_optimizer, create_scheduler
+from graphgym.register import register_train
+from graphgym.utils.io import makedirs_rm_exist
+from graphgym.utils.stats import node_degree
+from torch.utils.tensorboard import SummaryWriter
+from tqdm import tqdm
+
+
+@torch.no_grad()
+def get_task_batch(dataset: deepsnap.dataset.GraphDataset,
+                   today: int, tomorrow: int,
+                   prev_node_states: Optional[Dict[str, List[torch.Tensor]]]
+                   ) -> deepsnap.graph.Graph:
+    """
+    Construct batch required for the task (today, tomorrow). As defined in
+    batch's get_item method (used to get edge_label and get_label_index),
+    edge_label and edge_label_index returned would be different everytime
+    get_task_batch() is called.
+
+    Moreover, copy node-memories (node_states and node_cells) to the batch.
+    """
+    assert today < tomorrow < len(dataset)
+    # Get edges for message passing and prediction task.
+    batch = dataset[today].clone()
+    batch.edge_label = dataset[tomorrow].edge_label.clone()
+    batch.edge_label_index = dataset[tomorrow].edge_label_index.clone()
+
+    # Copy previous memory to the batch.
+    if prev_node_states is not None:
+        for key, val in prev_node_states.items():
+            copied = [x.detach().clone() for x in val]
+            setattr(batch, key, copied)
+
+    batch = train_utils.move_batch_to_device(batch, cfg.device)
+    return batch
+
+
+@torch.no_grad()
+def update_node_states(model, dataset, task: Tuple[int, int],
+                       prev_node_states: Optional[
+                           Dict[str, List[torch.Tensor]]]
+                       ) -> Dict[str, List[torch.Tensor]]:
+    """Perform the provided task and keep track of the latest node_states.
+
+    Example: task = (t, t+1),
+        the prev_node_states contains node embeddings at time (t-1).
+        the model perform task (t, t+1):
+            Input: (node embedding at t - 1, edges at t).
+            Output: possible transactions at t+1.
+        the model also generates node embeddings at t.
+
+    after doing task (t, t+1), node_states contains information
+    from snapshot t.
+    """
+    today, tomorrow = task
+    batch = get_task_batch(dataset, today, tomorrow, prev_node_states).clone()
+    # Let the model modify batch.node_states (and batch.node_cells).
+    _, _ = model(batch)
+    # Collect the updated node states.
+    out = dict()
+    out['node_states'] = [x.detach().clone() for x in batch.node_states]
+    if isinstance(batch.node_cells[0], torch.Tensor):
+        out['node_cells'] = [x.detach().clone() for x in batch.node_cells]
+
+    return out
+
+
+def train_step(model, optimizer, scheduler, dataset,
+               task: Tuple[int, int]) -> dict:
+    """
+    After receiving ground truth from a particular task, update the model by
+    performing back-propagation.
+    For example, on day t, the ground truth of task (t-1, t) has been revealed,
+    train the model using G[t-1] for message passing and label[t] as target.
+    """
+    optimizer.zero_grad()
+    torch.cuda.empty_cache()
+    model.train()
+
+    today, _ = task
+
+    # get loss over time.
+    total_loss_over_time = torch.tensor(0.0).to(torch.device(cfg.device))
+    # iterate from the beginning to compute node_states.
+    for t in range(today + 1):  # (0, 1), (1, 2), ..., (today, today+1).
+        # perform task (t, t+1), use information up to tomorrow.
+        new_batch = get_task_batch(dataset, t, t + 1, None).clone()
+        if t > 0:  # manually inherit node states and node cells for LSTM.
+            new_batch.node_states = batch.node_states
+            new_batch.node_cells = batch.node_cells
+        batch = new_batch
+        pred, true = model(batch)
+        loss, _ = compute_loss(pred, true)
+        if t > today - cfg.train.tbptt_freq:
+            # Perform the truncated version, only accumulate loss for recent
+            # snapshots.
+            total_loss_over_time += loss
+    # get average loss over time.
+    total_loss_over_time /= (today + 1)
+    # perform back-prop through time.
+    total_loss_over_time.backward()
+    optimizer.step()
+
+    scheduler.step()
+    return {'loss': total_loss_over_time}
+
+
+@torch.no_grad()
+def evaluate_step(model, dataset, task: Tuple[int, int], fast: bool = False
+                  ) -> dict:
+    """
+    Evaluate model's performance on task = (today, tomorrow)
+        where today and tomorrow are integers indexing snapshots.
+    """
+    today, tomorrow = task
+    model.eval()
+
+    # Run forward pass to get the latest node states.
+    for t in range(today):  # (0, 1), (1, 2), ...(today-1, today)
+        # Iterative through snapshots in the past, up to (today-1, today)
+        new_batch = get_task_batch(dataset, t, t + 1, None).clone()
+        if t > 0:
+            new_batch.node_states = batch.node_states
+            new_batch.node_cells = batch.node_cells
+        batch = new_batch
+        # forward pass to update node_states in batch.
+        _, _ = model(batch)
+
+    # Evaluation.
+    # (today, today+1)
+    cur_batch = get_task_batch(dataset, today, tomorrow, None).clone()
+    if today > 0:
+        cur_batch.node_states = copy.deepcopy(batch.node_states)
+        cur_batch.node_cells = copy.deepcopy(batch.node_cells)
+
+    pred, true = model(cur_batch)
+    loss, _ = compute_loss(pred, true)
+
+    if fast:
+        # skip MRR calculation for internal validation.
+        return {'loss': loss.item()}
+
+    mrr_batch = get_task_batch(dataset, today, tomorrow, None).clone()
+    if today > 0:
+        mrr_batch.node_states = copy.deepcopy(batch.node_states)
+        mrr_batch.node_cells = copy.deepcopy(batch.node_cells)
+
+    mrr = train_utils.compute_MRR(
+        mrr_batch,
+        model,
+        num_neg_per_node=cfg.metric.mrr_num_negative_edges,
+        method=cfg.metric.mrr_method)
+
+    return {'loss': loss.item(), 'mrr': mrr}
+
+
+def train_live_update_bptt(loggers, loaders, model, optimizer, scheduler, datasets,
+                           **kwargs):
+    for dataset in datasets:
+        # Sometimes edge degree info is already included in dataset.
+        if not hasattr(dataset[0], 'keep_ratio'):
+            train_utils.precompute_edge_degree_info(dataset)
+
+    num_splits = len(loggers)  # train/val/test splits.
+    # range for today in (today, tomorrow) task pairs.
+    task_range = range(len(datasets[0]) - cfg.transaction.horizon)
+
+    t = datetime.datetime.now().strftime('%b%d_%H-%M-%S')
+
+    # directory to store tensorboard files of this run.
+    out_dir = cfg.out_dir.replace('/', '\\')
+    # dir to store all run outputs for the entire batch.
+    run_dir = 'runs_' + cfg.remark
+
+    print(f'Tensorboard directory: {out_dir}')
+    # If tensorboard directory exists, this config is in the re-run phase
+    # of run_batch, replace logs of previous runs with the new one.
+    makedirs_rm_exist(f'./{run_dir}/{out_dir}')
+    writer = SummaryWriter(f'./{run_dir}/{out_dir}')
+
+    # save a copy of configuration for later identifications.
+    with open(f'./{run_dir}/{out_dir}/config.yaml', 'w') as f:
+        cfg.dump(stream=f)
+
+    for t in tqdm(task_range, desc='Snapshot'):
+        # current task: t --> t+1.
+        # (1) Evaluate model's performance on this task, at this time, the
+        # model has seen no information on t+1, this evaluation is fair.
+        for i in range(1, num_splits):
+            perf = evaluate_step(model, datasets[i], (t, t + 1), fast=False)
+
+            writer.add_scalars('val' if i == 1 else 'test', perf, t)
+
+        # (2) Reveal the ground truth of task (t, t+1) and update the model
+        # to prepare for the next task.
+        del optimizer, scheduler  # use new optimizers.
+        optimizer = create_optimizer(model.parameters())
+        scheduler = create_scheduler(optimizer)
+
+        # best model's validation loss, training epochs, and state_dict.
+        best_model = {'val_loss': np.inf, 'train_epoch': 0, 'state': None}
+        # keep track of how long we have NOT update the best model.
+        best_model_unchanged = 0
+        # after not updating the best model for `tol` epochs, stop.
+        tol = cfg.train.internal_validation_tolerance
+
+        # internal training loop (intra-snapshot cross-validation).
+        # choose the best model using current validation set, prepare for
+        # next task.
+
+        for i in tqdm(range(cfg.optim.max_epoch + 1), desc='live update',
+                      leave=False):
+            # Start with the un-trained model (i = 0), evaluate the model.
+            internal_val_perf = evaluate_step(model, datasets[1],
+                                              (t, t + 1), fast=True)
+            val_loss = internal_val_perf['loss']
+
+            if val_loss < best_model['val_loss']:
+                # replace the best model with the current model.
+                best_model = {'val_loss': val_loss, 'train_epoch': i,
+                              'state': copy.deepcopy(model.state_dict())}
+                best_model_unchanged = 0
+            else:
+                # the current best model has dominated for these epochs.
+                best_model_unchanged += 1
+
+            if best_model_unchanged >= tol:
+                # If the best model has not been updated for a while, stop.
+                break
+            else:
+                # Otherwise, keep training.
+                train_perf = train_step(model, optimizer, scheduler,
+                                        datasets[0], (t, t + 1))
+                writer.add_scalars('train', train_perf, t)
+
+        writer.add_scalar('internal_best_val', best_model['val_loss'], t)
+        writer.add_scalar('best epoch', best_model['train_epoch'], t)
+
+        # (3) Actually perform the update on training set to get node_states
+        # contains information up to time t.
+        # Use the best model selected from intra-snapshot cross-validation.
+        model.load_state_dict(best_model['state'])
+
+    writer.close()
+
+    if cfg.train.ckpt_clean:
+        clean_ckpt()
+
+    logging.info('Task done, results saved in {}'.format(cfg.out_dir))
+
+
+register_train('live_update_baseline', train_live_update_bptt)

From 0a87698962c7088f683677fe0608c14495e99e54 Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Sun, 6 Jun 2021 17:51:16 -0700
Subject: [PATCH 27/66] add pipeline

---
 .../train/train_live_update_fixed_split.py    | 281 ++++++++++++++++++
 1 file changed, 281 insertions(+)
 create mode 100644 graphgym/contrib/train/train_live_update_fixed_split.py

diff --git a/graphgym/contrib/train/train_live_update_fixed_split.py b/graphgym/contrib/train/train_live_update_fixed_split.py
new file mode 100644
index 00000000..cde96628
--- /dev/null
+++ b/graphgym/contrib/train/train_live_update_fixed_split.py
@@ -0,0 +1,281 @@
+"""
+A pipeline training model using live-update scheme but only evaluates the model
+using the last 10% of snapshots, which is the same as conventional chronological
+data splitting method.
+"""
+import copy
+import datetime
+import logging
+import os
+from typing import Dict, List, Optional, Tuple
+
+import deepsnap
+import numpy as np
+import torch
+from graphgym.checkpoint import clean_ckpt
+from graphgym.config import cfg
+from graphgym.contrib.train import train_utils
+from graphgym.loss import compute_loss
+from graphgym.optimizer import create_optimizer, create_scheduler
+from graphgym.register import register_train
+from graphgym.utils.io import makedirs_rm_exist
+from torch.utils.tensorboard import SummaryWriter
+from tqdm import tqdm
+
+
+@torch.no_grad()
+def get_task_batch(dataset: deepsnap.dataset.GraphDataset,
+                   today: int, tomorrow: int,
+                   prev_node_states: Optional[Dict[str, List[torch.Tensor]]]
+                   ) -> deepsnap.graph.Graph:
+    """
+    Construct batch required for the task (today, tomorrow). As defined in
+    batch's get_item method (used to get edge_label and get_label_index),
+    edge_label and edge_label_index returned would be different everytime
+    get_task_batch() is called.
+
+    Moreover, copy node-memories (node_states and node_cells) to the batch.
+    """
+    assert today < tomorrow < len(dataset)
+    # Get edges for message passing and prediction task.
+    batch = dataset[today].clone()
+    batch.edge_label = dataset[tomorrow].edge_label.clone()
+    batch.edge_label_index = dataset[tomorrow].edge_label_index.clone()
+
+    # Copy previous memory to the batch.
+    if prev_node_states is not None:
+        for key, val in prev_node_states.items():
+            copied = [x.detach().clone() for x in val]
+            setattr(batch, key, copied)
+
+    batch = train_utils.move_batch_to_device(batch, cfg.device)
+    return batch
+
+
+@torch.no_grad()
+def update_node_states(model, dataset, task: Tuple[int, int],
+                       prev_node_states: Optional[
+                           Dict[str, List[torch.Tensor]]]
+                       ) -> Dict[str, List[torch.Tensor]]:
+    """Perform the provided task and keep track of the latest node_states.
+
+    Example: task = (t, t+1),
+        the prev_node_states contains node embeddings at time (t-1).
+        the model perform task (t, t+1):
+            Input: (node embedding at t - 1, edges at t).
+            Output: possible transactions at t+1.
+        the model also generates node embeddings at t.
+
+    after doing task (t, t+1), node_states contains information
+    from snapshot t.
+    """
+    today, tomorrow = task
+    batch = get_task_batch(dataset, today, tomorrow, prev_node_states).clone()
+    # Let the model modify batch.node_states (and batch.node_cells).
+    _, _ = model(batch)
+    # Collect the updated node states.
+    out = dict()
+    out['node_states'] = [x.detach().clone() for x in batch.node_states]
+    if isinstance(batch.node_cells[0], torch.Tensor):
+        out['node_cells'] = [x.detach().clone() for x in batch.node_cells]
+
+    return out
+
+
+def train_step(model, optimizer, scheduler, dataset,
+               task: Tuple[int, int],
+               prev_node_states: Optional[Dict[str, torch.Tensor]]
+               ) -> dict:
+    """
+    After receiving ground truth from a particular task, update the model by
+    performing back-propagation.
+    For example, on day t, the ground truth of task (t-1, t) has been revealed,
+    train the model using G[t-1] for message passing and label[t] as target.
+    """
+    optimizer.zero_grad()
+    torch.cuda.empty_cache()
+
+    today, tomorrow = task
+    model.train()
+    batch = get_task_batch(dataset, today, tomorrow, prev_node_states).clone()
+
+    pred, true = model(batch)
+    loss, pred_score = compute_loss(pred, true)
+    loss.backward()
+    optimizer.step()
+
+    scheduler.step()
+    return {'loss': loss}
+
+
+@torch.no_grad()
+def evaluate_step(model, dataset, task: Tuple[int, int],
+                  prev_node_states: Optional[Dict[str, List[torch.Tensor]]],
+                  fast: bool = False) -> dict:
+    """
+    Evaluate model's performance on task = (today, tomorrow)
+        where today and tomorrow are integers indexing snapshots.
+    """
+    today, tomorrow = task
+    model.eval()
+    batch = get_task_batch(dataset, today, tomorrow, prev_node_states).clone()
+
+    pred, true = model(batch)
+    loss, pred_score = compute_loss(pred, true)
+
+    if fast:
+        # skip MRR calculation for internal validation.
+        return {'loss': loss.item()}
+
+    mrr_batch = get_task_batch(dataset, today, tomorrow,
+                               prev_node_states).clone()
+
+    mrr = train_utils.compute_MRR(mrr_batch, model, -1, 'all')
+
+    return {'loss': loss.item(), 'mrr': mrr}
+
+
+def train_live_update(loggers, loaders, model, optimizer, scheduler, datasets,
+                      **kwargs):
+
+    for dataset in datasets:
+        # Sometimes edge degree info is already included in dataset.
+        if not hasattr(dataset[0], 'keep_ratio'):
+            train_utils.precompute_edge_degree_info(dataset)
+
+    if cfg.dataset.premade_datasets == 'fresh_save_cache':
+        if not os.path.exists(f'{cfg.dataset.dir}/cache/'):
+            os.mkdir(f'{cfg.dataset.dir}/cache/')
+        cache_path = '{}/cache/cached_datasets_{}_{}_{}.pt'.format(
+            cfg.dataset.dir, cfg.dataset.format.replace('.tsv', ''),
+            cfg.transaction.snapshot_freq,
+            datetime.now().strftime('%Y_%m_%d__%H_%M_%S')
+        )
+        torch.save(datasets, cache_path)
+
+    num_splits = len(loggers)  # train/val/test splits.
+    # range for today in (today, tomorrow) task pairs.
+    task_range = range(len(datasets[0]) - cfg.transaction.horizon)
+
+    t = datetime.datetime.now().strftime('%b%d_%H-%M-%S')
+
+    # directory to store tensorboard files of this run.
+    out_dir = cfg.out_dir.replace('/', '\\')
+    # dir to store all run outputs for the entire batch.
+    run_dir = 'runs_' + cfg.remark
+
+    print(f'Tensorboard directory: {out_dir}')
+    # If tensorboard directory exists, this config is in the re-run phase
+    # of run_batch, replace logs of previous runs with the new one.
+    makedirs_rm_exist(f'./{run_dir}/{out_dir}')
+    writer = SummaryWriter(f'./{run_dir}/{out_dir}')
+
+    # save a copy of configuration for later identifications.
+    with open(f'./{run_dir}/{out_dir}/config.yaml', 'w') as f:
+        cfg.dump(stream=f)
+
+    prev_node_states = None  # no previous state on day 0.
+    # {'node_states': [Tensor, Tensor], 'node_cells: [Tensor, Tensor]}
+
+    model_init = None  # for meta-learning only, a model.state_dict() object.
+    for t in tqdm(task_range, desc='snapshot', leave=True):
+        # current task: t --> t+1.
+        # (1) Evaluate model's performance on this task, at this time, the
+        # model has seen no information on t+1, this evaluation is fair.
+        # Only evaluate the performance within the test set split region.
+        # Test snapshots are indexed [cfg.train.start_compute_mrr, end].
+        perf = evaluate_step(model, datasets[2], (t, t + 1),
+                             prev_node_states, fast=t < cfg.train.start_compute_mrr)
+
+        writer.add_scalars('test', perf, t)
+
+        # (2) Reveal the ground truth of task (t, t+1) and update the model
+        # to prepare for the next task.
+        del optimizer, scheduler  # use new optimizers.
+        optimizer = create_optimizer(model.parameters())
+        scheduler = create_scheduler(optimizer)
+
+        # best model's validation loss, training epochs, and state_dict.
+        # The untrained model is the default best model.
+        best_model = {'val_loss': np.inf, 'train_epoch': 0,
+                      'state': copy.deepcopy(model.state_dict())}
+        # keep track of how long we have NOT update the best model.
+        best_model_unchanged = 0
+        # after not updating the best model for `tol` epochs, stop.
+        tol = cfg.train.internal_validation_tolerance
+
+        # internal training loop (intra-snapshot cross-validation).
+        # choose the best model using current validation set, prepare for
+        # next task.
+
+        if cfg.meta.is_meta and (model_init is not None):
+            # For meta-learning, start fine-tuning from the pre-computed
+            # initialization weight.
+            model.load_state_dict(copy.deepcopy(model_init))
+
+        for i in tqdm(range(cfg.optim.max_epoch + 1), desc='live update',
+                      leave=True):
+            # Start with the un-trained model (i = 0), evaluate the model.
+            internal_val_perf = evaluate_step(model, datasets[1],
+                                              (t, t + 1),
+                                              prev_node_states, fast=True)
+            val_loss = internal_val_perf['loss']
+
+            if val_loss < best_model['val_loss']:
+                # replace the best model with the current model.
+                best_model = {'val_loss': val_loss, 'train_epoch': i,
+                              'state': copy.deepcopy(model.state_dict())}
+                best_model_unchanged = 0
+            else:
+                # the current best model has dominated for these epochs.
+                best_model_unchanged += 1
+
+            # if (i >= 2 * tol) and (best_model_unchanged >= tol):
+            if best_model_unchanged >= tol:
+                # If the best model has not been updated for a while, stop.
+                break
+            else:
+                # Otherwise, keep training.
+                train_perf = train_step(model, optimizer, scheduler,
+                                        datasets[0], (t, t + 1),
+                                        prev_node_states)
+                writer.add_scalars('train', train_perf, t)
+
+        writer.add_scalar('internal_best_val', best_model['val_loss'], t)
+        writer.add_scalar('best epoch', best_model['train_epoch'], t)
+
+        # (3) Actually perform the update on training set to get node_states
+        # contains information up to time t.
+        # Use the best model selected from intra-snapshot cross-validation.
+        # if best_model['state'] is None:
+        #     breakpoint()
+        model.load_state_dict(best_model['state'])
+
+        if cfg.meta.is_meta:  # update meta-learning's initialization weights.
+            if model_init is None:  # for the first task.
+                model_init = copy.deepcopy(best_model['state'])
+            else:  # for subsequent task, update init.
+                if cfg.meta.method == 'moving_average':
+                    new_weight = cfg.meta.alpha
+                elif cfg.meta.method == 'online_mean':
+                    new_weight = 1 / (t + 1)  # for t=1, the second item, 1/2.
+                else:
+                    raise ValueError(f'Invalid method: {cfg.meta.method}')
+
+                # (1-new_weight)*model_init + new_weight*best_model.
+                model_init = train_utils.average_state_dict(model_init,
+                                                            best_model['state'],
+                                                            new_weight)
+
+        prev_node_states = update_node_states(model, datasets[0], (t, t + 1),
+                                              prev_node_states)
+
+    writer.close()
+
+    if cfg.train.ckpt_clean:
+        clean_ckpt()
+
+    logging.info('Task done, results saved in {}'.format(cfg.out_dir))
+
+
+register_train('live_update_fixed_split', train_live_update)

From 34cc91d4ac5ebb1f83c951c6708b79f6b22ca405 Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Sun, 6 Jun 2021 18:15:27 -0700
Subject: [PATCH 28/66] add

---
 .../contrib/head/head_large_prediction.py     | 109 ++++++++++++++++++
 1 file changed, 109 insertions(+)
 create mode 100644 graphgym/contrib/head/head_large_prediction.py

diff --git a/graphgym/contrib/head/head_large_prediction.py b/graphgym/contrib/head/head_large_prediction.py
new file mode 100644
index 00000000..3b7e99c9
--- /dev/null
+++ b/graphgym/contrib/head/head_large_prediction.py
@@ -0,0 +1,109 @@
+"""
+An improved version of graphgym.models.head.GNNEdgeHead. This head handles
+large link prediction tasks by splitting them into chunks to avoid OOM errors.
+This is particular useful for computing MRR when a large amount of memory is
+needed.
+
+(Not implemented yet) Alternatively, one may implement head for MRR by all
+prediction task to CPU, by doing so, we need sepearate heads for training and
+inference (training requires everything including head to be on GPU).
+"""
+import torch
+import torch.nn as nn
+from graphgym.config import cfg
+from graphgym.models.layer import MLP
+from graphgym.register import register_head
+
+
+class LargeGNNEdgeHead(nn.Module):
+    def __init__(self, dim_in: int, dim_out: int):
+        # Use dim_in for graph conv, since link prediction dim_out could be
+        # binary
+        # E.g. if decoder='dot', link probability is dot product between
+        # node embeddings, of dimension dim_in
+        super(LargeGNNEdgeHead, self).__init__()
+        # module to decode edges from node embeddings
+
+        if cfg.model.edge_decoding == 'concat':
+            # Only use node features.
+            self.layer_post_mp = MLP(dim_in * 2, dim_out,
+                                     num_layers=cfg.gnn.layers_post_mp,
+                                     bias=True)
+            # requires parameter
+            self.decode_module = lambda v1, v2: \
+                self.layer_post_mp(torch.cat((v1, v2), dim=-1))
+        elif cfg.model.edge_decoding == 'edgeconcat':
+            # Use both node and edge features.
+            self.layer_post_mp = MLP(dim_in * 2 + cfg.dataset.edge_dim, dim_out,
+                                     num_layers=cfg.gnn.layers_post_mp,
+                                     bias=True)
+            # requires parameter
+            self.decode_module = lambda v1, v2, edge: \
+                self.layer_post_mp(torch.cat((v1, v2, edge), dim=-1))
+        else:
+            if dim_out > 1:
+                raise ValueError(
+                    'Binary edge decoding ({})is used for multi-class '
+                    'edge/link prediction.'.format(cfg.model.edge_decoding))
+            self.layer_post_mp = MLP(dim_in, dim_in,
+                                     num_layers=cfg.gnn.layers_post_mp,
+                                     bias=True)
+            if cfg.model.edge_decoding == 'dot':
+                self.decode_module = lambda v1, v2: torch.sum(v1 * v2, dim=-1)
+            elif cfg.model.edge_decoding == 'cosine_similarity':
+                self.decode_module = nn.CosineSimilarity(dim=-1)
+            else:
+                raise ValueError('Unknown edge decoding {}.'.format(
+                    cfg.model.edge_decoding))
+
+    def _apply_index(self, batch):
+        return batch.node_feature[batch.edge_label_index], \
+            batch.edge_label
+
+    def forward_pred(self, batch):
+        # TODO: consider moving this to config.
+        predict_batch_size = 500000  # depends on GPU memroy size.
+        num_pred = len(batch.edge_label)
+        label = batch.edge_label
+        if num_pred >= predict_batch_size:
+            # for large prediction tasks, split into chunks.
+            num_chunks = num_pred // predict_batch_size + 1
+            edge_label_index_chunks = torch.chunk(
+                batch.edge_label_index, num_chunks, dim=1)
+            gathered_pred = list()
+
+            for edge_label_index in edge_label_index_chunks:
+                pred = batch.node_feature[edge_label_index]
+                # node features of the source node of each edge.
+                nodes_first = pred[0]
+                nodes_second = pred[1]
+                if cfg.model.edge_decoding == 'edgeconcat':
+                    raise NotImplementedError
+                else:
+                    pred = self.decode_module(nodes_first, nodes_second)
+                gathered_pred.append(pred)
+
+            pred = torch.cat(gathered_pred)
+        else:
+            pred, label = self._apply_index(batch)
+            # node features of the source node of each edge.
+            nodes_first = pred[0]
+            nodes_second = pred[1]
+            if cfg.model.edge_decoding == 'edgeconcat':
+                edge_feature = torch.index_select(
+                    batch.edge_feature, 0, batch.edge_split_index)
+                pred = self.decode_module(
+                    nodes_first, nodes_second, edge_feature)
+            else:
+                pred = self.decode_module(nodes_first, nodes_second)
+        return pred, label
+
+    def forward(self, batch):
+        if cfg.model.edge_decoding != 'concat' and \
+                cfg.model.edge_decoding != 'edgeconcat':
+            batch = self.layer_post_mp(batch)
+        pred, label = self.forward_pred(batch)
+        return pred, label
+
+
+register_head('link_pred_large', LargeGNNEdgeHead)

From 4826e76b5f7e9810aa9ea0370e7fb58f9ac17f4b Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Sun, 6 Jun 2021 18:56:47 -0700
Subject: [PATCH 29/66] update config file.

---
 graphgym/contrib/config/roland.py | 106 +++++++++++++++---------------
 1 file changed, 52 insertions(+), 54 deletions(-)

diff --git a/graphgym/contrib/config/roland.py b/graphgym/contrib/config/roland.py
index 69bc7d27..58a32bac 100644
--- a/graphgym/contrib/config/roland.py
+++ b/graphgym/contrib/config/roland.py
@@ -4,30 +4,43 @@
 
 
 def set_cfg_roland(cfg):
-    r'''
+    """
     This function sets the default config value for customized options
     :return: customized configuration use by the experiment.
-    '''
+    """
 
     # ----------------------------------------------------------------------- #
     # Customized options
     # ----------------------------------------------------------------------- #
-    # TODO: add documentation.
+
+    # Use to identify experiments, tensorboard will be saved to this path.
+    # Options: any string.
+    cfg.remark = ''
+
+    # ----------------------------------------------------------------------- #
+    # Additional GNN options.
+    # ----------------------------------------------------------------------- #
     # Method to update node embedding from old node embedding and new node features.
-    # Options: 'moving_average', 'masked_gru', 'gru'
-    # moving average: new embedding = r * old + (1-r) * node_feature.
-    # gru: new embedding = GRU(node_feature, old_embedding).
-    # masked_gru: only apply GRU to active nodes.
+    # Options: {'moving_average', 'mlp', 'gru'}
     cfg.gnn.embed_update_method = 'moving_average'
 
-    # how many layers to use in the MLP updater.
-    # default: 1, use a simple linear layer.
+    # How many layers to use in the MLP updater.
+    # Options: integers >= 1.
+    # NOTE: there is a known issue when set to 1, use >= 2 for now.
+    # Only effective when cfg.gnn.embed_update_method == 'mlp'.
     cfg.gnn.mlp_update_layers = 2
-    
+
+    # What kind of skip-connection to use.
+    # Options: {'none', 'identity', 'affine'}.
+    cfg.gnn.skip_connection = 'none'
+
+    # ----------------------------------------------------------------------- #
+    # Meta-Learning options.
+    # ----------------------------------------------------------------------- #
     # For meta-learning.
     cfg.meta = CN()
     # Whether to do meta-learning via initialization moving average.
-    # Default to False.
+    # Options: {True, False}
     cfg.meta.is_meta = False
 
     # Weight used in moving average for model parameters.
@@ -35,36 +48,36 @@ def set_cfg_roland(cfg):
     # Set W_init = (1-alpha) * W_init + alpha * M[t].
     # For the next period, use W_init as the initialization for fine-tune
     # Set cfg.meta.alpha = 1.0 to recover the original algorithm.
+    # Options: float between 0.0 and 1.0.
     cfg.meta.alpha = 0.9
 
-    # Use to identify experiments.
-    cfg.remark = ''
-    # Experimental Features, use this name space to save all controls for
-    # experimental features.
-    # TODO: consider remove experiment field.
-    # cfg.experimental = CN()
-
-    # Only use the first n snapshots (time periods) to train the model.
-    # Empirically, the model learns rich dynamics from only a few periods.
-    # Set to -1 if using all snapshots.
-    # cfg.experimental.restrict_training_set = -1
-
-    cfg.train.tbptt_freq = 5
+    # ----------------------------------------------------------------------- #
+    # Additional GNN options.
+    # ----------------------------------------------------------------------- #
+    # How many snapshots for the truncated back-propagation.
+    # Set to a very large integer to use full-back-prop-through-time
+    # Options: integers >= 1.
+    cfg.train.tbptt_freq = 10
 
+    # Early stopping tolerance in live-update.
+    # Options: integers >= 1.
     cfg.train.internal_validation_tolerance = 5
 
     # Computing MRR is slow in the baseline setting.
     # Only start to compute MRR in the test set range after certain time.
+    # Options: integers >= 0.
     cfg.train.start_compute_mrr = 0
-    
-    # How to handle node features in AS dataset.
-    # available: ['one', 'one_hot_id', 'one_hot_degree_global', 'one_hot_degree_local']
-    cfg.dataset.AS_node_feature = 'one'
 
     # ----------------------------------------------------------------------- #
-    # Additional dataset option for the BSI dataset.
+    # Additional dataset options.
     # ----------------------------------------------------------------------- #
+
+    # How to handle node features in AS-733 dataset.
+    # Options: ['one', 'one_hot_id', 'one_hot_degree_global']
+    cfg.dataset.AS_node_feature = 'one'
+
     # Method used to sample negative edges for edge_label_index.
+    # Options:
     # 'uniform': all non-existing edges have same probability of being sampled
     #            as negative edges.
     # 'src':  non-existing edges from high-degree nodes are more likely to be
@@ -73,10 +86,13 @@ def set_cfg_roland(cfg):
     #         to be sampled as negative edges.
     cfg.dataset.negative_sample_weight = 'uniform'
 
-    # whether to load heterogeneous graphs.
+    # Whether to load dataset as heterogeneous graphs.
+    # Options: {True, False}.
     cfg.dataset.is_hetero = False
 
-    # where to put type information. 'append' or 'graph_attribute'.
+    # Where to put type information.
+    # Options: {'append', 'graph_attribute'}.
+    # Only effective if cfg.dataset.is_hetero == True.
     cfg.dataset.type_info_loc = 'append'
 
     # whether to look for and load cached graph. By default (load_cache=False)
@@ -93,9 +109,8 @@ def set_cfg_roland(cfg):
     # are for validation and the last 10% snapshots are for testing.
     cfg.dataset.split_method = 'default'
 
-    cfg.gnn.skip_connection = 'none'  # {'none', 'identity', 'affine'}
     # ----------------------------------------------------------------------- #
-    # Customized options
+    # Customized options: `transaction` for ROLAND dynamic graphs.
     # ----------------------------------------------------------------------- #
 
     # example argument group
@@ -118,7 +133,6 @@ def set_cfg_roland(cfg):
     # full or rolling
     cfg.transaction.history = 'full'
 
-
     # type of loss: supervised / meta
     cfg.transaction.loss = 'meta'
 
@@ -156,6 +170,10 @@ def set_cfg_roland(cfg):
     # and its degree in snapshot t.
     cfg.transaction.keep_ratio = 'linear'
 
+    # ----------------------------------------------------------------------- #
+    # Customized options: metrics.
+    # ----------------------------------------------------------------------- #
+
     cfg.metric = CN()
     # How many negative edges for each node to compute rank-based evaluation
     # metrics such as MRR and recall at K.
@@ -174,25 +192,5 @@ def set_cfg_roland(cfg):
     # expected MRR(min) <= MRR(mean) <= MRR(max).
     cfg.metric.mrr_method = 'max'
 
-    # TODO: consider remove link_pred_spec field.
-    # Specs for the link prediction task using BSI dataset.
-    # All units are days.
-    # cfg.link_pred_spec = CN()
-
-    # The period of `today`'s increase: how often the system is making forecast.
-    # E.g., when = 1,
-    # the system forecasts transactions in upcoming 7 days for everyday.
-    # One training epoch loops over
-    # {Jan-1-2020, Jan-2-2020, Jan-3-2020..., Dec-31-2020}
-    # When = 7, the system makes prediction every week.
-    # E.g., the system forecasts transactions in upcoming 7 days
-    # on every Monday.
-    # cfg.link_pred_spec.forecast_frequency = 1
-
-    # How many days into the future the model is trained to predict.
-    # The model forecasts transactions in (today, today + forecast_horizon].
-    # NOTE: forecast_horizon should >= forecast_frequency to cover all days.
-    # cfg.link_pred_spec.forecast_horizon = 7
-
 
 register_config('roland', set_cfg_roland)

From 22db15d28b2594251b09cab68dd4f0b7b3a6140c Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Sun, 6 Jun 2021 18:57:17 -0700
Subject: [PATCH 30/66] remove unused 'one_hot_degree_local' option.

---
 graphgym/contrib/loader/roland_as.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/graphgym/contrib/loader/roland_as.py b/graphgym/contrib/loader/roland_as.py
index 4cab81ad..bcf3b7a1 100644
--- a/graphgym/contrib/loader/roland_as.py
+++ b/graphgym/contrib/loader/roland_as.py
@@ -114,8 +114,7 @@ def load_generic_dataset(format, name, dataset_dir):
         scaled_edge_time = 2 * (edge_time.clone() - base) / scale
         
         assert cfg.dataset.AS_node_feature in ['one', 'one_hot_id',
-                                               'one_hot_degree_global',
-                                               'one_hot_degree_local']
+                                               'one_hot_degree_global']
 
         if cfg.dataset.AS_node_feature == 'one':
             node_feature = torch.ones(num_nodes, 1)

From a40de8651c2c7a473189a9d14657e717bef21c2f Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Sun, 6 Jun 2021 18:59:58 -0700
Subject: [PATCH 31/66] copy stats utility from GraphGym_dev

---
 graphgym/utils/stats.py | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100644 graphgym/utils/stats.py

diff --git a/graphgym/utils/stats.py b/graphgym/utils/stats.py
new file mode 100644
index 00000000..f8bb0e29
--- /dev/null
+++ b/graphgym/utils/stats.py
@@ -0,0 +1,26 @@
+import torch
+
+
+def node_degree(edge_index, n=None, mode='in'):
+    if mode == 'in':
+        index = edge_index[0, :]
+    elif mode == 'out':
+        index = edge_index[1, :]
+    else:
+        index = edge_index.flatten()
+    n = edge_index.max() + 1 if n is None else n
+    degree = torch.zeros(n)
+    ones = torch.ones(index.shape[0])
+    return degree.scatter_add_(0, index, ones)
+
+
+
+
+
+
+
+# edge_index = torch.tensor([[1, 2, 3, 4], [2, 3, 4, 5]])
+
+# print(compute_degree(edge_index, mode='in'))
+# print(compute_degree(edge_index, mode='out'))
+# print(compute_degree(edge_index, mode='both'))

From 11162c4187e40e005bd7b6149d907a56533d5b8c Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Sun, 6 Jun 2021 19:24:43 -0700
Subject: [PATCH 32/66] add loader template for dynamic graphs.

---
 graphgym/contrib/loader/roland_template.py | 62 ++++++++++++++++++++++
 1 file changed, 62 insertions(+)
 create mode 100644 graphgym/contrib/loader/roland_template.py

diff --git a/graphgym/contrib/loader/roland_template.py b/graphgym/contrib/loader/roland_template.py
new file mode 100644
index 00000000..901cf329
--- /dev/null
+++ b/graphgym/contrib/loader/roland_template.py
@@ -0,0 +1,62 @@
+"""
+A generic loader for the roland project, modify this template to build
+loaders for other financial transaction datasets and dynamic graphs.
+NOTE: this script is the trimmed version for homogenous graphs only.
+Mar. 22, 2021.
+# Search for TODO in this file.
+"""
+import os
+from typing import List
+
+import deepsnap
+import graphgym.contrib.loader.dynamic_graph_utils as utils
+from deepsnap.graph import Graph
+from graphgym.config import cfg
+from graphgym.register import register_loader
+
+
+def load_single_dataset(dataset_dir: str) -> Graph:
+    # TODO: Load your data here.
+    node_feature = None  # (num_nodes, *)
+    edge_feature = None  # (num_edges, *)
+    edge_index = None  # (2, num_edges)
+    # edge time should be unix timestmap integers.
+    edge_time = None  # (num_edges)
+
+    graph = Graph(
+        node_feature=node_feature,
+        edge_feature=edge_feature,
+        edge_index=edge_index,
+        edge_time=edge_time,
+        directed=True
+    )
+
+    return graph
+
+
+def load_generic_dataset(format: str, name: str, dataset_dir: str
+                         ) -> List[deepsnap.graph.Graph]:
+    """Load the dataset as a list of graph snapshots.
+
+    Args:
+        format (str): format of dataset.
+        name (str): file name of dataset.
+        dataset_dir (str): path of dataset, do NOT include the file name, use
+            the parent directory of dataset file.
+
+    Returns:
+        List[deepsnap.graph.Graph]: a list of graph snapshots.
+    """
+    # TODO: change the format name.
+    if format == 'generic':
+        dataset_dir = os.path.join(dataset_dir, name)
+        g_all = load_single_dataset(dataset_dir)
+        snapshot_list = utils.make_graph_snapshot(
+            g_all,
+            snapshot_freq=cfg.transaction.snapshot_freq,
+            is_hetero=cfg.dataset.is_hetero)
+        return snapshot_list
+
+
+# TODO: don't forget to register the loader.
+register_loader('roland_generic', load_generic_dataset)

From 71eb77f68292094bad633c970061229a84385d64 Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Sun, 6 Jun 2021 19:24:53 -0700
Subject: [PATCH 33/66] remove

---
 graphgym/models/head_mem.py | 130 ------------------------------------
 1 file changed, 130 deletions(-)
 delete mode 100644 graphgym/models/head_mem.py

diff --git a/graphgym/models/head_mem.py b/graphgym/models/head_mem.py
deleted file mode 100644
index 3114cc72..00000000
--- a/graphgym/models/head_mem.py
+++ /dev/null
@@ -1,130 +0,0 @@
-""" GNN heads are the last layer of a GNN right before loss computation.
-
-They are constructed in the init function of the gnn.GNN.
-"""
-
-import torch
-import torch.nn as nn
-
-from graphgym.config import cfg
-from graphgym.models.layer import MLP
-from graphgym.models.pooling import pooling_dict
-
-from graphgym.contrib.head import *
-import graphgym.register as register
-
-
-########### Head ############
-
-class GNNNodeHead(nn.Module):
-    '''Head of GNN, node prediction'''
-
-    def __init__(self, dim_in, dim_out):
-        super(GNNNodeHead, self).__init__()
-        self.layer_post_mp = MLP(dim_in, dim_out,
-                                 num_layers=cfg.gnn.layers_post_mp, bias=True)
-
-    def _apply_index(self, batch):
-        if batch.node_label_index.shape[0] == batch.node_label.shape[0]:
-            return batch.node_feature[batch.node_label_index], batch.node_label
-        else:
-            return batch.node_feature[batch.node_label_index], \
-                   batch.node_label[batch.node_label_index]
-
-    def forward(self, batch):
-        batch = self.layer_post_mp(batch)
-        pred, label = self._apply_index(batch)
-        return pred, label
-
-
-class GNNEdgeHead(nn.Module):
-    '''Head of GNN, edge prediction'''
-
-    def __init__(self, dim_in, dim_out):
-        ''' Head of Edge and link prediction models.
-
-        Args:
-            dim_out: output dimension. For binary prediction, dim_out=1.
-        '''
-        # Use dim_in for graph conv, since link prediction dim_out could be
-        # binary
-        # E.g. if decoder='dot', link probability is dot product between
-        # node embeddings, of dimension dim_in
-        super(GNNEdgeHead, self).__init__()
-        # module to decode edges from node embeddings
-
-        if cfg.model.edge_decoding == 'concat':
-            self.layer_post_mp = MLP(dim_in * 2, dim_out,
-                                     num_layers=cfg.gnn.layers_post_mp,
-                                     bias=True)
-            # requires parameter
-            self.decode_module = lambda v1, v2: \
-                self.layer_post_mp(torch.cat((v1, v2), dim=-1))
-        else:
-            if dim_out > 1:
-                raise ValueError(
-                    'Binary edge decoding ({})is used for multi-class '
-                    'edge/link prediction.'.format(cfg.model.edge_decoding))
-            self.layer_post_mp = MLP(dim_in, dim_in,
-                                     num_layers=cfg.gnn.layers_post_mp,
-                                     bias=True)
-            if cfg.model.edge_decoding == 'dot':
-                self.decode_module = lambda v1, v2: torch.sum(v1 * v2, dim=-1)
-            elif cfg.model.edge_decoding == 'cosine_similarity':
-                self.decode_module = nn.CosineSimilarity(dim=-1)
-            else:
-                raise ValueError('Unknown edge decoding {}.'.format(
-                    cfg.model.edge_decoding))
-
-    def _apply_index(self, batch):
-        return batch.node_feature[batch.edge_label_index], \
-               batch.edge_label
-
-    def forward(self, batch):
-        if cfg.model.edge_decoding != 'concat':
-            batch = self.layer_post_mp(batch)
-        pred, label = self._apply_index(batch)
-        nodes_first = pred[0]
-        nodes_second = pred[1]
-        pred = self.decode_module(nodes_first, nodes_second)
-        return pred, label
-
-
-class GNNGraphHead(nn.Module):
-    '''Head of GNN, graph prediction
-
-    The optional post_mp layer (specified by cfg.gnn.post_mp) is used
-    to transform the pooled embedding using an MLP.
-    '''
-
-    def __init__(self, dim_in, dim_out):
-        super(GNNGraphHead, self).__init__()
-        # todo: PostMP before or after global pooling
-        self.layer_post_mp = MLP(dim_in, dim_out,
-                                 num_layers=cfg.gnn.layers_post_mp, bias=True)
-        self.pooling_fun = pooling_dict[cfg.model.graph_pooling]
-
-    def _apply_index(self, batch):
-        return batch.graph_feature, batch.graph_label
-
-    def forward(self, batch):
-        if cfg.dataset.transform == 'ego':
-            graph_emb = self.pooling_fun(batch.node_feature, batch.batch,
-                                         batch.node_id_index)
-        else:
-            graph_emb = self.pooling_fun(batch.node_feature, batch.batch)
-        graph_emb = self.layer_post_mp(graph_emb)
-        batch.graph_feature = graph_emb
-        pred, label = self._apply_index(batch)
-        return pred, label
-
-
-# Head models for external interface
-head_dict = {
-    'node': GNNNodeHead,
-    'edge': GNNEdgeHead,
-    'link_pred': GNNEdgeHead,
-    'graph': GNNGraphHead
-}
-
-head_dict = {**register.head_dict, **head_dict}

From ffc2cc9d565609b4927f6dc7a96d367c9b3b9ced Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Sun, 6 Jun 2021 20:03:58 -0700
Subject: [PATCH 34/66] add GNN recurrent layer

---
 graphgym/contrib/network/gnn_recurrent.py | 110 ++++++++++++++++++++++
 1 file changed, 110 insertions(+)
 create mode 100644 graphgym/contrib/network/gnn_recurrent.py

diff --git a/graphgym/contrib/network/gnn_recurrent.py b/graphgym/contrib/network/gnn_recurrent.py
new file mode 100644
index 00000000..1d0c562c
--- /dev/null
+++ b/graphgym/contrib/network/gnn_recurrent.py
@@ -0,0 +1,110 @@
+import graphgym.register as register
+import torch.nn as nn
+import torch.nn.functional as F
+from graphgym.config import cfg
+from graphgym.contrib.stage import *
+from graphgym.init import init_weights
+from graphgym.models.act import act_dict
+from graphgym.models.feature_augment import Preprocess
+from graphgym.models.feature_encoder import (edge_encoder_dict,
+                                             node_encoder_dict)
+from graphgym.models.head import head_dict
+from graphgym.models.layer import (BatchNorm1dEdge, BatchNorm1dNode,
+                                   GeneralMultiLayer, layer_dict)
+from graphgym.models.layer_recurrent import RecurrentGraphLayer
+from graphgym.register import register_network
+
+
+def GNNLayer(dim_in: int, dim_out: int, has_act: bool=True, layer_id: int=0):
+    # General constructor for GNN layer.
+    return RecurrentGraphLayer(cfg.gnn.layer_type, dim_in, dim_out,
+                               has_act, layer_id=layer_id)
+
+
+def GNNPreMP(dim_in, dim_out):
+    r'''Constructs preprocessing layers: dim_in --> dim_out --> dim_out --> ... --> dim_out'''
+    return GeneralMultiLayer('linear', cfg.gnn.layers_pre_mp,
+                             dim_in, dim_out, dim_inner=dim_out,
+                             final_act=True)
+
+
+class GNNStackStage(nn.Module):
+    def __init__(self, dim_in, dim_out, num_layers):
+        super(GNNStackStage, self).__init__()
+        for i in range(num_layers):
+            d_in = dim_in if i == 0 else dim_out
+            layer = GNNLayer(d_in, dim_out, layer_id=i)
+            self.add_module('layer{}'.format(i), layer)
+        self.dim_out = dim_out
+
+    def forward(self, batch):
+        for layer in self.children():
+            batch = layer(batch)
+        if cfg.gnn.l2norm:
+            batch.node_feature = F.normalize(batch.node_feature, p=2, dim=-1)
+        return batch
+
+
+stage_dict = {
+    'stack': GNNStackStage,
+}
+
+stage_dict = {**register.stage_dict, **stage_dict}
+
+
+class GNNRecurrent(nn.Module):
+    r'''The General GNN model'''
+
+    def __init__(self, dim_in, dim_out, **kwargs):
+        r'''Initializes the GNN model.
+
+        Args:
+            dim_in, dim_out: dimensions of in and out channels.
+            Parameters:
+            node_encoding_classes - For integer features, gives the number
+            of possible integer features to map.
+        '''
+        super(GNNRecurrent, self).__init__()
+        # Stage: inter-layer connections.
+        GNNStage = stage_dict[cfg.gnn.stage_type]
+        # Head: prediction head, the final layer.
+        GNNHead = head_dict[cfg.dataset.task]
+
+        if cfg.dataset.node_encoder:
+            # Encode integer node features via nn.Embeddings
+            NodeEncoder = node_encoder_dict[cfg.dataset.node_encoder_name]
+            self.node_encoder = NodeEncoder(cfg.dataset.encoder_dim)
+            if cfg.dataset.node_encoder_bn:
+                self.node_encoder_bn = BatchNorm1dNode(cfg.dataset.encoder_dim)
+            # Update dim_in to reflect the new dimension fo the node features
+            dim_in = cfg.dataset.encoder_dim
+
+        if cfg.dataset.edge_encoder:
+            # Encode integer edge features via nn.Embeddings
+            EdgeEncoder = edge_encoder_dict[cfg.dataset.edge_encoder_name]
+            self.edge_encoder = EdgeEncoder(cfg.dataset.encoder_dim)
+            if cfg.dataset.edge_encoder_bn:
+                self.edge_encoder_bn = BatchNorm1dEdge(cfg.dataset.edge_dim)
+
+        self.preprocess = Preprocess(dim_in)
+        d_in = self.preprocess.dim_out
+
+        if cfg.gnn.layers_pre_mp > 0:
+            self.pre_mp = GNNPreMP(d_in, cfg.gnn.dim_inner)
+            d_in = cfg.gnn.dim_inner
+        if cfg.gnn.layers_mp >= 1:
+            self.mp = GNNStage(dim_in=d_in,
+                               dim_out=cfg.gnn.dim_inner,
+                               num_layers=cfg.gnn.layers_mp)
+            d_in = self.mp.dim_out
+        self.post_mp = GNNHead(dim_in=d_in, dim_out=dim_out)
+
+        self.apply(init_weights)
+
+    def forward(self, batch):
+        for module in self.children():
+            batch = module(batch)
+        return batch
+
+
+register_network('gnn_recurrent', GNNRecurrent)

From 61563973aaa9874dc74fd341ea1635fe2a282ad6 Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Sun, 6 Jun 2021 20:04:27 -0700
Subject: [PATCH 35/66] add gnn recurrent layer.

---
 graphgym/models/layer_recurrent.py | 281 +++++++----------------------
 1 file changed, 68 insertions(+), 213 deletions(-)

diff --git a/graphgym/models/layer_recurrent.py b/graphgym/models/layer_recurrent.py
index df60700e..50e4cfde 100644
--- a/graphgym/models/layer_recurrent.py
+++ b/graphgym/models/layer_recurrent.py
@@ -1,26 +1,55 @@
+'''
+This file contains wrapper layers and constructors for dynamic/recurrent GNNs.
+'''
+from graphgym.register import register_layer
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-import torch_geometric as pyg
-
 from graphgym.config import cfg
 from graphgym.models.act import act_dict
-from graphgym.contrib.layer.generalconv import (GeneralConvLayer,
-                                                GeneralEdgeConvLayer)
-
-from graphgym.contrib.layer import *
-import graphgym.register as register
-
-
-## General classes
-class GeneralLayer(nn.Module):
-    '''General wrapper for layers'''
-
-    def __init__(self, name, dim_in, dim_out, has_act=True, has_bn=True,
-                 has_l2norm=False, **kwargs):
-        super(GeneralLayer, self).__init__()
+from graphgym.models.layer import layer_dict
+from graphgym.models.update import update_dict
+
+
+class RecurrentGraphLayer(nn.Module):
+    '''
+    The recurrent graph layer for snapshot-based dynamic graphs.
+    This layer requires
+        (1): a GNN block for message passing.
+        (2): a node embedding/state update module.
+
+    This layer updates node embedding as the following:
+        h[l, t] = Update(h[l, t-1], GNN(h[l-1, t])).
+    
+    This layer corresponds to a particular l-th layer in multi-layer setting,
+        the layer id is specified by 'id' in '__init__'.
+    '''
+    def __init__(self, name: str, dim_in: int, dim_out: int, has_act: bool=True,
+                 has_bn: bool=True, has_l2norm: bool=False, layer_id: int=0,
+                 **kwargs):
+        '''
+        Args:
+            name (str): The name of GNN layer to use for message-passing.
+            dim_in (int): Dimension of input node feature.
+            dim_out (int): Dimension of updated embedding.
+            has_act (bool, optional): Whether to after message passing.
+                Defaults to True.
+            has_bn (bool, optional): Whether add batch normalization for
+                node embedding. Defaults to True.
+            has_l2norm (bool, optional): Whether to add L2-normalization for
+                message passing result. Defaults to False.
+            layer_id (int, optional): The layer id in multi-layer setting.
+                Defaults to 0.
+        '''
+        super(RecurrentGraphLayer, self).__init__()
         self.has_l2norm = has_l2norm
+        if layer_id < 0:
+            raise ValueError(f'layer_id must be non-negative, got {layer_id}.')
+        self.layer_id = layer_id
         has_bn = has_bn and cfg.gnn.batchnorm
+        self.dim_in = dim_in
+        self.dim_out = dim_out
+        # Construct the internal GNN layer.
         self.layer = layer_dict[name](dim_in, dim_out,
                                       bias=not has_bn, **kwargs)
         layer_wrapper = []
@@ -33,206 +62,32 @@ def __init__(self, name, dim_in, dim_out, has_act=True, has_bn=True,
         if has_act:
             layer_wrapper.append(act_dict[cfg.gnn.act])
         self.post_layer = nn.Sequential(*layer_wrapper)
+        # self.update = self.construct_update_block(self.dim_in, self.dim_out,
+        #                                           self.layer_id)
+        self.update = update_dict[cfg.gnn.embed_update_method](self.dim_in,
+                                                               self.dim_out,
+                                                               self.layer_id)
+
+    def _init_hidden_state(self, batch):
+        # Initialize all node-states to zero.
+        if not isinstance(batch.node_states[self.layer_id], torch.Tensor):
+            batch.node_states[self.layer_id] = torch.zeros(
+                batch.node_feature.shape[0], self.dim_out).to(
+                batch.node_feature.device)
 
     def forward(self, batch):
+        # Message passing.
         batch = self.layer(batch)
-        if isinstance(batch, torch.Tensor):
-            batch = self.post_layer(batch)
-            if self.has_l2norm:
-                batch = F.normalize(batch, p=2, dim=1)
-        else:
-            batch.node_feature = self.post_layer(batch.node_feature)
-            if self.has_l2norm:
-                batch.node_feature = F.normalize(batch.node_feature, p=2, dim=1)
-        return batch
-
-
-class GeneralMultiLayer(nn.Module):
-    '''General wrapper for stack of layers'''
-
-    def __init__(self, name, num_layers, dim_in, dim_out, dim_inner=None,
-                 final_act=True, **kwargs):
-        super(GeneralMultiLayer, self).__init__()
-        dim_inner = dim_in if dim_inner is None else dim_inner
-        for i in range(num_layers):
-            d_in = dim_in if i == 0 else dim_inner
-            d_out = dim_out if i == num_layers - 1 else dim_inner
-            has_act = final_act if i == num_layers - 1 else True
-            layer = GeneralLayer(name, d_in, d_out, has_act, **kwargs)
-            self.add_module('Layer_{}'.format(i), layer)
-
-    def forward(self, batch):
-        for layer in self.children():
-            batch = layer(batch)
-        return batch
-
-
-## Core basic layers
-# Input: batch; Output: batch
-class Linear(nn.Module):
-    def __init__(self, dim_in, dim_out, bias=False, **kwargs):
-        super(Linear, self).__init__()
-        self.model = nn.Linear(dim_in, dim_out, bias=bias)
-
-    def forward(self, batch):
-        if isinstance(batch, torch.Tensor):
-            batch = self.model(batch)
-        else:
-            batch.node_feature = self.model(batch.node_feature)
-        return batch
-
-
-class BatchNorm1dNode(nn.Module):
-    '''General wrapper for layers'''
-
-    def __init__(self, dim_in):
-        super(BatchNorm1dNode, self).__init__()
-        self.bn = nn.BatchNorm1d(dim_in, eps=cfg.bn.eps, momentum=cfg.bn.mom)
-
-    def forward(self, batch):
-        batch.node_feature = self.bn(batch.node_feature)
-        return batch
-
-
-class BatchNorm1dEdge(nn.Module):
-    '''General wrapper for layers'''
-
-    def __init__(self, dim_in):
-        super(BatchNorm1dEdge, self).__init__()
-        self.bn = nn.BatchNorm1d(dim_in, eps=cfg.bn.eps, momentum=cfg.bn.mom)
-
-    def forward(self, batch):
-        batch.edge_feature = self.bn(batch.edge_feature)
-        return batch
-
-
-class MLP(nn.Module):
-    def __init__(self, dim_in, dim_out, bias=True, dim_inner=None,
-                 num_layers=2, **kwargs):
-        '''
-        Note: MLP works for 0 layers
-        '''
-        super(MLP, self).__init__()
-        dim_inner = dim_in if dim_inner is None else dim_inner
-        layers = []
-        if num_layers > 1:
-            layers.append(
-                GeneralMultiLayer('linear', num_layers - 1, dim_in, dim_inner,
-                                  dim_inner, final_act=True))
-            layers.append(Linear(dim_inner, dim_out, bias))
-        else:
-            layers.append(Linear(dim_in, dim_out, bias))
-        self.model = nn.Sequential(*layers)
-
-    def forward(self, batch):
-        if isinstance(batch, torch.Tensor):
-            batch = self.model(batch)
-        else:
-            batch.node_feature = self.model(batch.node_feature)
-        return batch
-
-
-class GCNConv(nn.Module):
-    def __init__(self, dim_in, dim_out, bias=False, **kwargs):
-        super(GCNConv, self).__init__()
-        self.model = pyg.nn.GCNConv(dim_in, dim_out, bias=bias)
-
-    def forward(self, batch):
-        batch.node_feature = self.model(batch.node_feature, batch.edge_index)
-        return batch
-
-
-class SAGEConv(nn.Module):
-    def __init__(self, dim_in, dim_out, bias=False, **kwargs):
-        super(SAGEConv, self).__init__()
-        self.model = pyg.nn.SAGEConv(dim_in, dim_out, bias=bias, concat=True)
-
-    def forward(self, batch):
-        batch.node_feature = self.model(batch.node_feature, batch.edge_index)
-        return batch
-
-
-class GATConv(nn.Module):
-    def __init__(self, dim_in, dim_out, bias=False, **kwargs):
-        super(GATConv, self).__init__()
-        self.model = pyg.nn.GATConv(dim_in, dim_out, bias=bias)
-
-    def forward(self, batch):
-        batch.node_feature = self.model(batch.node_feature, batch.edge_index)
-        return batch
-
-
-class GINConv(nn.Module):
-    def __init__(self, dim_in, dim_out, bias=False, **kwargs):
-        super(GINConv, self).__init__()
-        gin_nn = nn.Sequential(nn.Linear(dim_in, dim_out), nn.ReLU(),
-                               nn.Linear(dim_out, dim_out))
-        self.model = pyg.nn.GINConv(gin_nn)
-
-    def forward(self, batch):
-        batch.node_feature = self.model(batch.node_feature, batch.edge_index)
+        batch.node_feature = self.post_layer(batch.node_feature)
+        if self.has_l2norm:
+            batch.node_feature = F.normalize(batch.node_feature, p=2, dim=1)
+
+        self._init_hidden_state(batch)
+        # Compute output from updater block.
+        batch = self.update(batch)
+        # batch.node_states[self.layer_id] = node_states_new
+        batch.node_feature = batch.node_states[self.layer_id]
         return batch
 
 
-class SplineConv(nn.Module):
-    def __init__(self, dim_in, dim_out, bias=False, **kwargs):
-        super(SplineConv, self).__init__()
-        self.model = pyg.nn.SplineConv(dim_in, dim_out,
-                                       dim=1, kernel_size=2, bias=bias)
-
-    def forward(self, batch):
-        batch.node_feature = self.model(batch.node_feature, batch.edge_index,
-                                        batch.edge_feature)
-        return batch
-
-
-class GeneralConv(nn.Module):
-    def __init__(self, dim_in, dim_out, bias=False, **kwargs):
-        super(GeneralConv, self).__init__()
-        self.model = GeneralConvLayer(dim_in, dim_out, bias=bias)
-
-    def forward(self, batch):
-        batch.node_feature = self.model(batch.node_feature, batch.edge_index)
-        return batch
-
-
-class GeneralEdgeConv(nn.Module):
-    def __init__(self, dim_in, dim_out, bias=False, **kwargs):
-        super(GeneralEdgeConv, self).__init__()
-        self.model = GeneralEdgeConvLayer(dim_in, dim_out, bias=bias)
-
-    def forward(self, batch):
-        batch.node_feature = self.model(batch.node_feature, batch.edge_index,
-                                        edge_feature=batch.edge_feature)
-        return batch
-
-
-class GeneralSampleEdgeConv(nn.Module):
-    def __init__(self, dim_in, dim_out, bias=False, **kwargs):
-        super(GeneralSampleEdgeConv, self).__init__()
-        self.model = GeneralEdgeConvLayer(dim_in, dim_out, bias=bias)
-
-    def forward(self, batch):
-        edge_mask = torch.rand(batch.edge_index.shape[1]) < cfg.gnn.keep_edge
-        edge_index = batch.edge_index[:, edge_mask]
-        edge_feature = batch.edge_feature[edge_mask, :]
-        batch.node_feature = self.model(batch.node_feature, edge_index,
-                                        edge_feature=edge_feature)
-        return batch
-
-
-layer_dict = {
-    'linear': Linear,
-    'mlp': MLP,
-    'gcnconv': GCNConv,
-    'sageconv': SAGEConv,
-    'gatconv': GATConv,
-    'splineconv': SplineConv,
-    'ginconv': GINConv,
-    'generalconv': GeneralConv,
-    'generaledgeconv': GeneralEdgeConv,
-    'generalsampleedgeconv': GeneralSampleEdgeConv,
-}
-
-# register additional convs
-layer_dict = {**register.layer_dict, **layer_dict}
+register_layer('recurrent_graph_layer', RecurrentGraphLayer)

From 57fd6eb0f1d8791524cfcedbf3f0699ac44dbd97 Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Sun, 6 Jun 2021 21:28:54 -0700
Subject: [PATCH 36/66] add cfg.dataset.link_pred_all_edges option

---
 graphgym/loader.py | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/graphgym/loader.py b/graphgym/loader.py
index c4e810ac..402e8062 100644
--- a/graphgym/loader.py
+++ b/graphgym/loader.py
@@ -166,12 +166,23 @@ def transform_before_split(dataset):
     return dataset
 
 
-def transform_after_split(datasets):
+def transform_after_split(datasets, dataset):
     '''
     Dataset transformation after train/val/test split
     :param dataset: A list of DeepSNAP dataset objects
     :return: A list of transformed DeepSNAP dataset objects
     '''
+    if cfg.dataset.link_pred_all_edges:
+        for t in range(len(datasets[2])):
+            g = datasets[2].graphs[t]
+            neg = g.negative_sampling(dataset[t].edge_index,
+                                      dataset[t].num_nodes,
+                                      dataset[t].edge_index.shape[1])
+            pos = dataset[t].edge_index
+            g.edge_label_index = torch.cat((neg, pos), dim=1)
+            g.edge_label = torch.cat((torch.zeros(neg.shape[1]),
+                                      torch.ones(pos.shape[1])))
+
     if cfg.dataset.transform == 'ego':
         for split_dataset in datasets:
             split_dataset.apply_transform(ego_nets,
@@ -190,18 +201,18 @@ def transform_after_split(datasets):
 
 
 def create_dataset():
-    ## Load dataset
+    # Load dataset
     time1 = time.time()
     if cfg.dataset.format == 'OGB':
         graphs, splits = load_dataset()
     else:
         graphs = load_dataset()
 
-    ## Filter graphs
+    # Filter graphs
     time2 = time.time()
     min_node = filter_graphs()
 
-    ## Create whole dataset
+    # Create whole dataset
     dataset = GraphDataset(
         graphs,
         task=cfg.dataset.task,
@@ -211,10 +222,10 @@ def create_dataset():
         resample_disjoint=cfg.dataset.resample_disjoint,
         minimum_node_per_graph=min_node)
 
-    ## Transform the whole dataset
+    # Transform the whole dataset
     dataset = transform_before_split(dataset)
 
-    ## Split dataset
+    # Split dataset
     time3 = time.time()
     # Use custom data splits
     if cfg.dataset.format == 'OGB':
@@ -232,14 +243,14 @@ def create_dataset():
     for i in range(1, len(datasets)):
         dataset.edge_negative_sampling_ratio = 1
 
-    ## Transform each split dataset
+    # Transform each split dataset
     time4 = time.time()
-    datasets = transform_after_split(datasets)
+    datasets = transform_after_split(datasets, dataset)
 
     time5 = time.time()
     logging.info('Load: {:.4}s, Before split: {:.4}s, '
                  'Split: {:.4}s, After split: {:.4}s'.format(
-        time2 - time1, time3 - time2, time4 - time3, time5 - time4))
+                     time2 - time1, time3 - time2, time4 - time3, time5 - time4))
 
     return datasets
 

From f61cffb4a38646b70b68274e4c59cedc16733600 Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Sun, 6 Jun 2021 21:29:01 -0700
Subject: [PATCH 37/66] add cfg.dataset.link_pred_all_edges

---
 graphgym/contrib/config/roland.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/graphgym/contrib/config/roland.py b/graphgym/contrib/config/roland.py
index 58a32bac..1e4f242b 100644
--- a/graphgym/contrib/config/roland.py
+++ b/graphgym/contrib/config/roland.py
@@ -109,6 +109,8 @@ def set_cfg_roland(cfg):
     # are for validation and the last 10% snapshots are for testing.
     cfg.dataset.split_method = 'default'
 
+    # In the case of live-update, whether to predict all edges at time t+1.
+    cfg.dataset.link_pred_all_edges = False
     # ----------------------------------------------------------------------- #
     # Customized options: `transaction` for ROLAND dynamic graphs.
     # ----------------------------------------------------------------------- #

From 910274449a89e39ee6a56fa3f121efe648107be3 Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Sun, 6 Jun 2021 21:30:33 -0700
Subject: [PATCH 38/66] remove unused training scheme

---
 .../contrib/train/train_live_update_bptt.py   | 269 -----------------
 .../train/train_live_update_fixed_split.py    | 281 ------------------
 2 files changed, 550 deletions(-)
 delete mode 100644 graphgym/contrib/train/train_live_update_bptt.py
 delete mode 100644 graphgym/contrib/train/train_live_update_fixed_split.py

diff --git a/graphgym/contrib/train/train_live_update_bptt.py b/graphgym/contrib/train/train_live_update_bptt.py
deleted file mode 100644
index e07b8d95..00000000
--- a/graphgym/contrib/train/train_live_update_bptt.py
+++ /dev/null
@@ -1,269 +0,0 @@
-"""
-The baseline training (non-incremental) training for live-update scheme.
-NOTE: this setup requires extensive GPU memory and could lead to OOM error.
-"""
-import copy
-import datetime
-import logging
-import os
-from typing import Dict, List, Optional, Tuple
-
-import deepsnap
-import numpy as np
-import torch
-from graphgym.checkpoint import clean_ckpt
-from graphgym.config import cfg
-from graphgym.contrib.train import train_utils
-from graphgym.loss import compute_loss
-from graphgym.optimizer import create_optimizer, create_scheduler
-from graphgym.register import register_train
-from graphgym.utils.io import makedirs_rm_exist
-from graphgym.utils.stats import node_degree
-from torch.utils.tensorboard import SummaryWriter
-from tqdm import tqdm
-
-
-@torch.no_grad()
-def get_task_batch(dataset: deepsnap.dataset.GraphDataset,
-                   today: int, tomorrow: int,
-                   prev_node_states: Optional[Dict[str, List[torch.Tensor]]]
-                   ) -> deepsnap.graph.Graph:
-    """
-    Construct batch required for the task (today, tomorrow). As defined in
-    batch's get_item method (used to get edge_label and get_label_index),
-    edge_label and edge_label_index returned would be different everytime
-    get_task_batch() is called.
-
-    Moreover, copy node-memories (node_states and node_cells) to the batch.
-    """
-    assert today < tomorrow < len(dataset)
-    # Get edges for message passing and prediction task.
-    batch = dataset[today].clone()
-    batch.edge_label = dataset[tomorrow].edge_label.clone()
-    batch.edge_label_index = dataset[tomorrow].edge_label_index.clone()
-
-    # Copy previous memory to the batch.
-    if prev_node_states is not None:
-        for key, val in prev_node_states.items():
-            copied = [x.detach().clone() for x in val]
-            setattr(batch, key, copied)
-
-    batch = train_utils.move_batch_to_device(batch, cfg.device)
-    return batch
-
-
-@torch.no_grad()
-def update_node_states(model, dataset, task: Tuple[int, int],
-                       prev_node_states: Optional[
-                           Dict[str, List[torch.Tensor]]]
-                       ) -> Dict[str, List[torch.Tensor]]:
-    """Perform the provided task and keep track of the latest node_states.
-
-    Example: task = (t, t+1),
-        the prev_node_states contains node embeddings at time (t-1).
-        the model perform task (t, t+1):
-            Input: (node embedding at t - 1, edges at t).
-            Output: possible transactions at t+1.
-        the model also generates node embeddings at t.
-
-    after doing task (t, t+1), node_states contains information
-    from snapshot t.
-    """
-    today, tomorrow = task
-    batch = get_task_batch(dataset, today, tomorrow, prev_node_states).clone()
-    # Let the model modify batch.node_states (and batch.node_cells).
-    _, _ = model(batch)
-    # Collect the updated node states.
-    out = dict()
-    out['node_states'] = [x.detach().clone() for x in batch.node_states]
-    if isinstance(batch.node_cells[0], torch.Tensor):
-        out['node_cells'] = [x.detach().clone() for x in batch.node_cells]
-
-    return out
-
-
-def train_step(model, optimizer, scheduler, dataset,
-               task: Tuple[int, int]) -> dict:
-    """
-    After receiving ground truth from a particular task, update the model by
-    performing back-propagation.
-    For example, on day t, the ground truth of task (t-1, t) has been revealed,
-    train the model using G[t-1] for message passing and label[t] as target.
-    """
-    optimizer.zero_grad()
-    torch.cuda.empty_cache()
-    model.train()
-
-    today, _ = task
-
-    # get loss over time.
-    total_loss_over_time = torch.tensor(0.0).to(torch.device(cfg.device))
-    # iterate from the beginning to compute node_states.
-    for t in range(today + 1):  # (0, 1), (1, 2), ..., (today, today+1).
-        # perform task (t, t+1), use information up to tomorrow.
-        new_batch = get_task_batch(dataset, t, t + 1, None).clone()
-        if t > 0:  # manually inherit node states and node cells for LSTM.
-            new_batch.node_states = batch.node_states
-            new_batch.node_cells = batch.node_cells
-        batch = new_batch
-        pred, true = model(batch)
-        loss, _ = compute_loss(pred, true)
-        if t > today - cfg.train.tbptt_freq:
-            # Perform the truncated version, only accumulate loss for recent
-            # snapshots.
-            total_loss_over_time += loss
-    # get average loss over time.
-    total_loss_over_time /= (today + 1)
-    # perform back-prop through time.
-    total_loss_over_time.backward()
-    optimizer.step()
-
-    scheduler.step()
-    return {'loss': total_loss_over_time}
-
-
-@torch.no_grad()
-def evaluate_step(model, dataset, task: Tuple[int, int], fast: bool = False
-                  ) -> dict:
-    """
-    Evaluate model's performance on task = (today, tomorrow)
-        where today and tomorrow are integers indexing snapshots.
-    """
-    today, tomorrow = task
-    model.eval()
-
-    # Run forward pass to get the latest node states.
-    for t in range(today):  # (0, 1), (1, 2), ...(today-1, today)
-        # Iterative through snapshots in the past, up to (today-1, today)
-        new_batch = get_task_batch(dataset, t, t + 1, None).clone()
-        if t > 0:
-            new_batch.node_states = batch.node_states
-            new_batch.node_cells = batch.node_cells
-        batch = new_batch
-        # forward pass to update node_states in batch.
-        _, _ = model(batch)
-
-    # Evaluation.
-    # (today, today+1)
-    cur_batch = get_task_batch(dataset, today, tomorrow, None).clone()
-    if today > 0:
-        cur_batch.node_states = copy.deepcopy(batch.node_states)
-        cur_batch.node_cells = copy.deepcopy(batch.node_cells)
-
-    pred, true = model(cur_batch)
-    loss, _ = compute_loss(pred, true)
-
-    if fast:
-        # skip MRR calculation for internal validation.
-        return {'loss': loss.item()}
-
-    mrr_batch = get_task_batch(dataset, today, tomorrow, None).clone()
-    if today > 0:
-        mrr_batch.node_states = copy.deepcopy(batch.node_states)
-        mrr_batch.node_cells = copy.deepcopy(batch.node_cells)
-
-    mrr = train_utils.compute_MRR(
-        mrr_batch,
-        model,
-        num_neg_per_node=cfg.metric.mrr_num_negative_edges,
-        method=cfg.metric.mrr_method)
-
-    return {'loss': loss.item(), 'mrr': mrr}
-
-
-def train_live_update_bptt(loggers, loaders, model, optimizer, scheduler, datasets,
-                           **kwargs):
-    for dataset in datasets:
-        # Sometimes edge degree info is already included in dataset.
-        if not hasattr(dataset[0], 'keep_ratio'):
-            train_utils.precompute_edge_degree_info(dataset)
-
-    num_splits = len(loggers)  # train/val/test splits.
-    # range for today in (today, tomorrow) task pairs.
-    task_range = range(len(datasets[0]) - cfg.transaction.horizon)
-
-    t = datetime.datetime.now().strftime('%b%d_%H-%M-%S')
-
-    # directory to store tensorboard files of this run.
-    out_dir = cfg.out_dir.replace('/', '\\')
-    # dir to store all run outputs for the entire batch.
-    run_dir = 'runs_' + cfg.remark
-
-    print(f'Tensorboard directory: {out_dir}')
-    # If tensorboard directory exists, this config is in the re-run phase
-    # of run_batch, replace logs of previous runs with the new one.
-    makedirs_rm_exist(f'./{run_dir}/{out_dir}')
-    writer = SummaryWriter(f'./{run_dir}/{out_dir}')
-
-    # save a copy of configuration for later identifications.
-    with open(f'./{run_dir}/{out_dir}/config.yaml', 'w') as f:
-        cfg.dump(stream=f)
-
-    for t in tqdm(task_range, desc='Snapshot'):
-        # current task: t --> t+1.
-        # (1) Evaluate model's performance on this task, at this time, the
-        # model has seen no information on t+1, this evaluation is fair.
-        for i in range(1, num_splits):
-            perf = evaluate_step(model, datasets[i], (t, t + 1), fast=False)
-
-            writer.add_scalars('val' if i == 1 else 'test', perf, t)
-
-        # (2) Reveal the ground truth of task (t, t+1) and update the model
-        # to prepare for the next task.
-        del optimizer, scheduler  # use new optimizers.
-        optimizer = create_optimizer(model.parameters())
-        scheduler = create_scheduler(optimizer)
-
-        # best model's validation loss, training epochs, and state_dict.
-        best_model = {'val_loss': np.inf, 'train_epoch': 0, 'state': None}
-        # keep track of how long we have NOT update the best model.
-        best_model_unchanged = 0
-        # after not updating the best model for `tol` epochs, stop.
-        tol = cfg.train.internal_validation_tolerance
-
-        # internal training loop (intra-snapshot cross-validation).
-        # choose the best model using current validation set, prepare for
-        # next task.
-
-        for i in tqdm(range(cfg.optim.max_epoch + 1), desc='live update',
-                      leave=False):
-            # Start with the un-trained model (i = 0), evaluate the model.
-            internal_val_perf = evaluate_step(model, datasets[1],
-                                              (t, t + 1), fast=True)
-            val_loss = internal_val_perf['loss']
-
-            if val_loss < best_model['val_loss']:
-                # replace the best model with the current model.
-                best_model = {'val_loss': val_loss, 'train_epoch': i,
-                              'state': copy.deepcopy(model.state_dict())}
-                best_model_unchanged = 0
-            else:
-                # the current best model has dominated for these epochs.
-                best_model_unchanged += 1
-
-            if best_model_unchanged >= tol:
-                # If the best model has not been updated for a while, stop.
-                break
-            else:
-                # Otherwise, keep training.
-                train_perf = train_step(model, optimizer, scheduler,
-                                        datasets[0], (t, t + 1))
-                writer.add_scalars('train', train_perf, t)
-
-        writer.add_scalar('internal_best_val', best_model['val_loss'], t)
-        writer.add_scalar('best epoch', best_model['train_epoch'], t)
-
-        # (3) Actually perform the update on training set to get node_states
-        # contains information up to time t.
-        # Use the best model selected from intra-snapshot cross-validation.
-        model.load_state_dict(best_model['state'])
-
-    writer.close()
-
-    if cfg.train.ckpt_clean:
-        clean_ckpt()
-
-    logging.info('Task done, results saved in {}'.format(cfg.out_dir))
-
-
-register_train('live_update_baseline', train_live_update_bptt)
diff --git a/graphgym/contrib/train/train_live_update_fixed_split.py b/graphgym/contrib/train/train_live_update_fixed_split.py
deleted file mode 100644
index cde96628..00000000
--- a/graphgym/contrib/train/train_live_update_fixed_split.py
+++ /dev/null
@@ -1,281 +0,0 @@
-"""
-A pipeline training model using live-update scheme but only evaluates the model
-using the last 10% of snapshots, which is the same as conventional chronological
-data splitting method.
-"""
-import copy
-import datetime
-import logging
-import os
-from typing import Dict, List, Optional, Tuple
-
-import deepsnap
-import numpy as np
-import torch
-from graphgym.checkpoint import clean_ckpt
-from graphgym.config import cfg
-from graphgym.contrib.train import train_utils
-from graphgym.loss import compute_loss
-from graphgym.optimizer import create_optimizer, create_scheduler
-from graphgym.register import register_train
-from graphgym.utils.io import makedirs_rm_exist
-from torch.utils.tensorboard import SummaryWriter
-from tqdm import tqdm
-
-
-@torch.no_grad()
-def get_task_batch(dataset: deepsnap.dataset.GraphDataset,
-                   today: int, tomorrow: int,
-                   prev_node_states: Optional[Dict[str, List[torch.Tensor]]]
-                   ) -> deepsnap.graph.Graph:
-    """
-    Construct batch required for the task (today, tomorrow). As defined in
-    batch's get_item method (used to get edge_label and get_label_index),
-    edge_label and edge_label_index returned would be different everytime
-    get_task_batch() is called.
-
-    Moreover, copy node-memories (node_states and node_cells) to the batch.
-    """
-    assert today < tomorrow < len(dataset)
-    # Get edges for message passing and prediction task.
-    batch = dataset[today].clone()
-    batch.edge_label = dataset[tomorrow].edge_label.clone()
-    batch.edge_label_index = dataset[tomorrow].edge_label_index.clone()
-
-    # Copy previous memory to the batch.
-    if prev_node_states is not None:
-        for key, val in prev_node_states.items():
-            copied = [x.detach().clone() for x in val]
-            setattr(batch, key, copied)
-
-    batch = train_utils.move_batch_to_device(batch, cfg.device)
-    return batch
-
-
-@torch.no_grad()
-def update_node_states(model, dataset, task: Tuple[int, int],
-                       prev_node_states: Optional[
-                           Dict[str, List[torch.Tensor]]]
-                       ) -> Dict[str, List[torch.Tensor]]:
-    """Perform the provided task and keep track of the latest node_states.
-
-    Example: task = (t, t+1),
-        the prev_node_states contains node embeddings at time (t-1).
-        the model perform task (t, t+1):
-            Input: (node embedding at t - 1, edges at t).
-            Output: possible transactions at t+1.
-        the model also generates node embeddings at t.
-
-    after doing task (t, t+1), node_states contains information
-    from snapshot t.
-    """
-    today, tomorrow = task
-    batch = get_task_batch(dataset, today, tomorrow, prev_node_states).clone()
-    # Let the model modify batch.node_states (and batch.node_cells).
-    _, _ = model(batch)
-    # Collect the updated node states.
-    out = dict()
-    out['node_states'] = [x.detach().clone() for x in batch.node_states]
-    if isinstance(batch.node_cells[0], torch.Tensor):
-        out['node_cells'] = [x.detach().clone() for x in batch.node_cells]
-
-    return out
-
-
-def train_step(model, optimizer, scheduler, dataset,
-               task: Tuple[int, int],
-               prev_node_states: Optional[Dict[str, torch.Tensor]]
-               ) -> dict:
-    """
-    After receiving ground truth from a particular task, update the model by
-    performing back-propagation.
-    For example, on day t, the ground truth of task (t-1, t) has been revealed,
-    train the model using G[t-1] for message passing and label[t] as target.
-    """
-    optimizer.zero_grad()
-    torch.cuda.empty_cache()
-
-    today, tomorrow = task
-    model.train()
-    batch = get_task_batch(dataset, today, tomorrow, prev_node_states).clone()
-
-    pred, true = model(batch)
-    loss, pred_score = compute_loss(pred, true)
-    loss.backward()
-    optimizer.step()
-
-    scheduler.step()
-    return {'loss': loss}
-
-
-@torch.no_grad()
-def evaluate_step(model, dataset, task: Tuple[int, int],
-                  prev_node_states: Optional[Dict[str, List[torch.Tensor]]],
-                  fast: bool = False) -> dict:
-    """
-    Evaluate model's performance on task = (today, tomorrow)
-        where today and tomorrow are integers indexing snapshots.
-    """
-    today, tomorrow = task
-    model.eval()
-    batch = get_task_batch(dataset, today, tomorrow, prev_node_states).clone()
-
-    pred, true = model(batch)
-    loss, pred_score = compute_loss(pred, true)
-
-    if fast:
-        # skip MRR calculation for internal validation.
-        return {'loss': loss.item()}
-
-    mrr_batch = get_task_batch(dataset, today, tomorrow,
-                               prev_node_states).clone()
-
-    mrr = train_utils.compute_MRR(mrr_batch, model, -1, 'all')
-
-    return {'loss': loss.item(), 'mrr': mrr}
-
-
-def train_live_update(loggers, loaders, model, optimizer, scheduler, datasets,
-                      **kwargs):
-
-    for dataset in datasets:
-        # Sometimes edge degree info is already included in dataset.
-        if not hasattr(dataset[0], 'keep_ratio'):
-            train_utils.precompute_edge_degree_info(dataset)
-
-    if cfg.dataset.premade_datasets == 'fresh_save_cache':
-        if not os.path.exists(f'{cfg.dataset.dir}/cache/'):
-            os.mkdir(f'{cfg.dataset.dir}/cache/')
-        cache_path = '{}/cache/cached_datasets_{}_{}_{}.pt'.format(
-            cfg.dataset.dir, cfg.dataset.format.replace('.tsv', ''),
-            cfg.transaction.snapshot_freq,
-            datetime.now().strftime('%Y_%m_%d__%H_%M_%S')
-        )
-        torch.save(datasets, cache_path)
-
-    num_splits = len(loggers)  # train/val/test splits.
-    # range for today in (today, tomorrow) task pairs.
-    task_range = range(len(datasets[0]) - cfg.transaction.horizon)
-
-    t = datetime.datetime.now().strftime('%b%d_%H-%M-%S')
-
-    # directory to store tensorboard files of this run.
-    out_dir = cfg.out_dir.replace('/', '\\')
-    # dir to store all run outputs for the entire batch.
-    run_dir = 'runs_' + cfg.remark
-
-    print(f'Tensorboard directory: {out_dir}')
-    # If tensorboard directory exists, this config is in the re-run phase
-    # of run_batch, replace logs of previous runs with the new one.
-    makedirs_rm_exist(f'./{run_dir}/{out_dir}')
-    writer = SummaryWriter(f'./{run_dir}/{out_dir}')
-
-    # save a copy of configuration for later identifications.
-    with open(f'./{run_dir}/{out_dir}/config.yaml', 'w') as f:
-        cfg.dump(stream=f)
-
-    prev_node_states = None  # no previous state on day 0.
-    # {'node_states': [Tensor, Tensor], 'node_cells: [Tensor, Tensor]}
-
-    model_init = None  # for meta-learning only, a model.state_dict() object.
-    for t in tqdm(task_range, desc='snapshot', leave=True):
-        # current task: t --> t+1.
-        # (1) Evaluate model's performance on this task, at this time, the
-        # model has seen no information on t+1, this evaluation is fair.
-        # Only evaluate the performance within the test set split region.
-        # Test snapshots are indexed [cfg.train.start_compute_mrr, end].
-        perf = evaluate_step(model, datasets[2], (t, t + 1),
-                             prev_node_states, fast=t < cfg.train.start_compute_mrr)
-
-        writer.add_scalars('test', perf, t)
-
-        # (2) Reveal the ground truth of task (t, t+1) and update the model
-        # to prepare for the next task.
-        del optimizer, scheduler  # use new optimizers.
-        optimizer = create_optimizer(model.parameters())
-        scheduler = create_scheduler(optimizer)
-
-        # best model's validation loss, training epochs, and state_dict.
-        # The untrained model is the default best model.
-        best_model = {'val_loss': np.inf, 'train_epoch': 0,
-                      'state': copy.deepcopy(model.state_dict())}
-        # keep track of how long we have NOT update the best model.
-        best_model_unchanged = 0
-        # after not updating the best model for `tol` epochs, stop.
-        tol = cfg.train.internal_validation_tolerance
-
-        # internal training loop (intra-snapshot cross-validation).
-        # choose the best model using current validation set, prepare for
-        # next task.
-
-        if cfg.meta.is_meta and (model_init is not None):
-            # For meta-learning, start fine-tuning from the pre-computed
-            # initialization weight.
-            model.load_state_dict(copy.deepcopy(model_init))
-
-        for i in tqdm(range(cfg.optim.max_epoch + 1), desc='live update',
-                      leave=True):
-            # Start with the un-trained model (i = 0), evaluate the model.
-            internal_val_perf = evaluate_step(model, datasets[1],
-                                              (t, t + 1),
-                                              prev_node_states, fast=True)
-            val_loss = internal_val_perf['loss']
-
-            if val_loss < best_model['val_loss']:
-                # replace the best model with the current model.
-                best_model = {'val_loss': val_loss, 'train_epoch': i,
-                              'state': copy.deepcopy(model.state_dict())}
-                best_model_unchanged = 0
-            else:
-                # the current best model has dominated for these epochs.
-                best_model_unchanged += 1
-
-            # if (i >= 2 * tol) and (best_model_unchanged >= tol):
-            if best_model_unchanged >= tol:
-                # If the best model has not been updated for a while, stop.
-                break
-            else:
-                # Otherwise, keep training.
-                train_perf = train_step(model, optimizer, scheduler,
-                                        datasets[0], (t, t + 1),
-                                        prev_node_states)
-                writer.add_scalars('train', train_perf, t)
-
-        writer.add_scalar('internal_best_val', best_model['val_loss'], t)
-        writer.add_scalar('best epoch', best_model['train_epoch'], t)
-
-        # (3) Actually perform the update on training set to get node_states
-        # contains information up to time t.
-        # Use the best model selected from intra-snapshot cross-validation.
-        # if best_model['state'] is None:
-        #     breakpoint()
-        model.load_state_dict(best_model['state'])
-
-        if cfg.meta.is_meta:  # update meta-learning's initialization weights.
-            if model_init is None:  # for the first task.
-                model_init = copy.deepcopy(best_model['state'])
-            else:  # for subsequent task, update init.
-                if cfg.meta.method == 'moving_average':
-                    new_weight = cfg.meta.alpha
-                elif cfg.meta.method == 'online_mean':
-                    new_weight = 1 / (t + 1)  # for t=1, the second item, 1/2.
-                else:
-                    raise ValueError(f'Invalid method: {cfg.meta.method}')
-
-                # (1-new_weight)*model_init + new_weight*best_model.
-                model_init = train_utils.average_state_dict(model_init,
-                                                            best_model['state'],
-                                                            new_weight)
-
-        prev_node_states = update_node_states(model, datasets[0], (t, t + 1),
-                                              prev_node_states)
-
-    writer.close()
-
-    if cfg.train.ckpt_clean:
-        clean_ckpt()
-
-    logging.info('Task done, results saved in {}'.format(cfg.out_dir))
-
-
-register_train('live_update_fixed_split', train_live_update)

From 9f7a5fdd0b2d8bb1527637513a91206daed6c3e2 Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Mon, 7 Jun 2021 00:14:48 -0700
Subject: [PATCH 39/66] add template for homogenous graphs.

---
 graphgym/contrib/loader/roland_template.py | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/graphgym/contrib/loader/roland_template.py b/graphgym/contrib/loader/roland_template.py
index 901cf329..b576029e 100644
--- a/graphgym/contrib/loader/roland_template.py
+++ b/graphgym/contrib/loader/roland_template.py
@@ -10,18 +10,25 @@
 
 import deepsnap
 import graphgym.contrib.loader.dynamic_graph_utils as utils
+import torch
 from deepsnap.graph import Graph
 from graphgym.config import cfg
 from graphgym.register import register_loader
 
 
 def load_single_dataset(dataset_dir: str) -> Graph:
-    # TODO: Load your data here.
-    node_feature = None  # (num_nodes, *)
-    edge_feature = None  # (num_edges, *)
-    edge_index = None  # (2, num_edges)
+    # TODO: Load your data from dataset_dir here.
+    # Example:
+    num_nodes = 500
+    num_node_feature = 16
+    num_edges = 10000
+    num_edge_feature = 32
+    node_feature = torch.rand((num_nodes, num_node_feature))
+    edge_feature = torch.rand((num_edges, num_edge_feature))
+    edge_index = torch.randint(0, num_nodes - 1, (2, num_edges))
     # edge time should be unix timestmap integers.
-    edge_time = None  # (num_edges)
+    # random generate timestamps from 2021-05-01 to 2021-06-01
+    edge_time = torch.randint(1619852450, 1622530850, (num_edges,)).sort()[0]
 
     graph = Graph(
         node_feature=node_feature,
@@ -48,7 +55,7 @@ def load_generic_dataset(format: str, name: str, dataset_dir: str
         List[deepsnap.graph.Graph]: a list of graph snapshots.
     """
     # TODO: change the format name.
-    if format == 'generic':
+    if format == 'YOUR_FORMAT_NAME_HERE':
         dataset_dir = os.path.join(dataset_dir, name)
         g_all = load_single_dataset(dataset_dir)
         snapshot_list = utils.make_graph_snapshot(
@@ -59,4 +66,4 @@ def load_generic_dataset(format: str, name: str, dataset_dir: str
 
 
 # TODO: don't forget to register the loader.
-register_loader('roland_generic', load_generic_dataset)
+register_loader('YOUR_LOADER_NAME_HERE', load_generic_dataset)

From f0b72f78e6fc2eee9d49bcfe6fa8110fbc8c1412 Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Mon, 7 Jun 2021 00:21:48 -0700
Subject: [PATCH 40/66] rename layer and don't register.

---
 graphgym/models/layer_recurrent.py | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/graphgym/models/layer_recurrent.py b/graphgym/models/layer_recurrent.py
index 50e4cfde..c64f039c 100644
--- a/graphgym/models/layer_recurrent.py
+++ b/graphgym/models/layer_recurrent.py
@@ -1,6 +1,6 @@
-'''
+"""
 This file contains wrapper layers and constructors for dynamic/recurrent GNNs.
-'''
+"""
 from graphgym.register import register_layer
 import torch
 import torch.nn as nn
@@ -11,8 +11,8 @@
 from graphgym.models.update import update_dict
 
 
-class RecurrentGraphLayer(nn.Module):
-    '''
+class GeneralRecurrentLayer(nn.Module):
+    """
     The recurrent graph layer for snapshot-based dynamic graphs.
     This layer requires
         (1): a GNN block for message passing.
@@ -23,11 +23,11 @@ class RecurrentGraphLayer(nn.Module):
     
     This layer corresponds to a particular l-th layer in multi-layer setting,
         the layer id is specified by 'id' in '__init__'.
-    '''
+    """
     def __init__(self, name: str, dim_in: int, dim_out: int, has_act: bool=True,
                  has_bn: bool=True, has_l2norm: bool=False, layer_id: int=0,
                  **kwargs):
-        '''
+        """
         Args:
             name (str): The name of GNN layer to use for message-passing.
             dim_in (int): Dimension of input node feature.
@@ -40,8 +40,8 @@ def __init__(self, name: str, dim_in: int, dim_out: int, has_act: bool=True,
                 message passing result. Defaults to False.
             layer_id (int, optional): The layer id in multi-layer setting.
                 Defaults to 0.
-        '''
-        super(RecurrentGraphLayer, self).__init__()
+        """
+        super(GeneralRecurrentLayer, self).__init__()
         self.has_l2norm = has_l2norm
         if layer_id < 0:
             raise ValueError(f'layer_id must be non-negative, got {layer_id}.')
@@ -62,8 +62,7 @@ def __init__(self, name: str, dim_in: int, dim_out: int, has_act: bool=True,
         if has_act:
             layer_wrapper.append(act_dict[cfg.gnn.act])
         self.post_layer = nn.Sequential(*layer_wrapper)
-        # self.update = self.construct_update_block(self.dim_in, self.dim_out,
-        #                                           self.layer_id)
+
         self.update = update_dict[cfg.gnn.embed_update_method](self.dim_in,
                                                                self.dim_out,
                                                                self.layer_id)
@@ -88,6 +87,3 @@ def forward(self, batch):
         # batch.node_states[self.layer_id] = node_states_new
         batch.node_feature = batch.node_states[self.layer_id]
         return batch
-
-
-register_layer('recurrent_graph_layer', RecurrentGraphLayer)

From 7045cebe66bd499891eec62495dd4dde14a73441 Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Mon, 7 Jun 2021 02:36:33 -0700
Subject: [PATCH 41/66] rename

---
 .../{head_large_prediction.py => scalable_link_pred.py}  | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)
 rename graphgym/contrib/head/{head_large_prediction.py => scalable_link_pred.py} (94%)

diff --git a/graphgym/contrib/head/head_large_prediction.py b/graphgym/contrib/head/scalable_link_pred.py
similarity index 94%
rename from graphgym/contrib/head/head_large_prediction.py
rename to graphgym/contrib/head/scalable_link_pred.py
index 3b7e99c9..c2a08b06 100644
--- a/graphgym/contrib/head/head_large_prediction.py
+++ b/graphgym/contrib/head/scalable_link_pred.py
@@ -15,13 +15,13 @@
 from graphgym.register import register_head
 
 
-class LargeGNNEdgeHead(nn.Module):
+class ScalableLinkPred(nn.Module):
     def __init__(self, dim_in: int, dim_out: int):
         # Use dim_in for graph conv, since link prediction dim_out could be
         # binary
         # E.g. if decoder='dot', link probability is dot product between
         # node embeddings, of dimension dim_in
-        super(LargeGNNEdgeHead, self).__init__()
+        super(ScalableLinkPred, self).__init__()
         # module to decode edges from node embeddings
 
         if cfg.model.edge_decoding == 'concat':
@@ -61,8 +61,7 @@ def _apply_index(self, batch):
             batch.edge_label
 
     def forward_pred(self, batch):
-        # TODO: consider moving this to config.
-        predict_batch_size = 500000  # depends on GPU memroy size.
+        predict_batch_size = cfg.metric.link_pred_batch_size
         num_pred = len(batch.edge_label)
         label = batch.edge_label
         if num_pred >= predict_batch_size:
@@ -106,4 +105,4 @@ def forward(self, batch):
         return pred, label
 
 
-register_head('link_pred_large', LargeGNNEdgeHead)
+register_head('scalable_link_pred', ScalableLinkPred)

From 6ef2747595ce83e9b7329a816222442cacdc90ea Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Mon, 7 Jun 2021 02:37:55 -0700
Subject: [PATCH 42/66] add cfg.gnn.link_pred_batch_size

---
 graphgym/contrib/config/roland.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/graphgym/contrib/config/roland.py b/graphgym/contrib/config/roland.py
index 1e4f242b..99b5ced5 100644
--- a/graphgym/contrib/config/roland.py
+++ b/graphgym/contrib/config/roland.py
@@ -34,6 +34,9 @@ def set_cfg_roland(cfg):
     # Options: {'none', 'identity', 'affine'}.
     cfg.gnn.skip_connection = 'none'
 
+    # The bath size while making link prediction, useful when number of negative
+    # edges is huge, use a smaller number depends on GPU memroy size..
+    cfg.gnn.link_pred_batch_size = 500000
     # ----------------------------------------------------------------------- #
     # Meta-Learning options.
     # ----------------------------------------------------------------------- #

From 267e11dfb2d7d39cb92563e9f2f1cf3b4b7fb0ac Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Mon, 7 Jun 2021 02:38:22 -0700
Subject: [PATCH 43/66] fix naming

---
 graphgym/contrib/head/scalable_link_pred.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/graphgym/contrib/head/scalable_link_pred.py b/graphgym/contrib/head/scalable_link_pred.py
index c2a08b06..6ea875a6 100644
--- a/graphgym/contrib/head/scalable_link_pred.py
+++ b/graphgym/contrib/head/scalable_link_pred.py
@@ -61,7 +61,7 @@ def _apply_index(self, batch):
             batch.edge_label
 
     def forward_pred(self, batch):
-        predict_batch_size = cfg.metric.link_pred_batch_size
+        predict_batch_size = cfg.gnn.link_pred_batch_size
         num_pred = len(batch.edge_label)
         label = batch.edge_label
         if num_pred >= predict_batch_size:

From bf28bf13a9796b1e3fafd6cff4d2df6502d13f28 Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Mon, 7 Jun 2021 12:40:52 -0700
Subject: [PATCH 44/66] add roland loader.

---
 graphgym/contrib/loader/roland.py | 492 ++++++++++++++++++++++--------
 1 file changed, 357 insertions(+), 135 deletions(-)

diff --git a/graphgym/contrib/loader/roland.py b/graphgym/contrib/loader/roland.py
index 0e640e77..ed38f30b 100644
--- a/graphgym/contrib/loader/roland.py
+++ b/graphgym/contrib/loader/roland.py
@@ -1,37 +1,113 @@
 """
-A refined version for loading the roland dataset. This version has the
-following key points:
-
-(1) Node's features are determined by their first transaction, so that
-    payer and payee information are no longer included as a edge features.
-
-    Node features include:
-        company identity, bank, country, region, Skd, SkdL1, SkdL2, Skis,
-        SkisL1, SkisL2.
-
-(2) edge features include: # system, currency, scaled amount (EUR), and
-    scaled timestamp.
-
-Mar. 31, 2021
+One single loader for the roland project.
 """
 import os
-from typing import List, Union
+from datetime import datetime
+from typing import List
 
 import dask.dataframe as dd
-import deepsnap
 import graphgym.contrib.loader.dynamic_graph_utils as utils
 import numpy as np
 import pandas as pd
 import torch
-from dask_ml.preprocessing import OrdinalEncoder
+from dask_ml.preprocessing import OrdinalEncoder as DaskOrdinalEncoder
 from deepsnap.graph import Graph
 from graphgym.config import cfg
 from graphgym.register import register_loader
 from sklearn.preprocessing import MinMaxScaler
 from sklearn.preprocessing import OrdinalEncoder as SkOrdinalEncoder
+from tqdm import tqdm
 
 # =============================================================================
-# Configure and instantiate the loader here.
+# AS-733 Dataset.
+# =============================================================================
+
+
+def load_AS_dataset(dataset_dir: str) -> Graph:
+    all_files = [x for x in sorted(os.listdir(dataset_dir))
+                 if (x.startswith('as') and x.endswith('.txt'))]
+    assert len(all_files) == 733
+    assert all(x.endswith('.txt') for x in all_files)
+
+    def file2timestamp(file_name: str) -> int:
+        t = file_name.strip('.txt').strip('as')
+        ts = int(datetime.strptime(t, '%Y%m%d').timestamp())
+        return ts
+
+    edge_index_lst, edge_time_lst = list(), list()
+    all_files = sorted(all_files)
+
+    for graph_file in tqdm(all_files):
+        today = file2timestamp(graph_file)
+        graph_file = os.path.join(dataset_dir, graph_file)
+
+        src, dst = list(), list()
+        with open(graph_file, 'r') as f:
+            for line in f.readlines():
+                if line.startswith('#'):
+                    continue
+                line = line.strip('\n')
+                v1, v2 = line.split('\t')
+                src.append(int(v1))
+                dst.append(int(v2))
+
+        edge_index = np.stack((src, dst))
+        edge_index_lst.append(edge_index)
+
+        edge_time = np.ones(edge_index.shape[1]) * today
+        edge_time_lst.append(edge_time)
+
+    edge_index_raw = np.concatenate(edge_index_lst, axis=1).astype(int)
+
+    num_nodes = len(np.unique(edge_index_raw))
+
+    # encode node indices to consecutive integers.
+    node_indices = np.sort(np.unique(edge_index_raw))
+    enc = SkOrdinalEncoder(categories=[node_indices, node_indices])
+    edge_index = enc.fit_transform(edge_index_raw.transpose()).transpose()
+    edge_index = torch.Tensor(edge_index).long()
+    edge_time = torch.Tensor(np.concatenate(edge_time_lst))
+
+    # Use scaled datetime as edge_feature.
+    scale = edge_time.max() - edge_time.min()
+    base = edge_time.min()
+    scaled_edge_time = 2 * (edge_time.clone() - base) / scale
+
+    assert cfg.dataset.AS_node_feature in ['one', 'one_hot_id',
+                                           'one_hot_degree_global']
+
+    if cfg.dataset.AS_node_feature == 'one':
+        node_feature = torch.ones(num_nodes, 1)
+    elif cfg.dataset.AS_node_feature == 'one_hot_id':
+        # One hot encoding the node ID.
+        node_feature = torch.Tensor(np.eye(num_nodes))
+    elif cfg.dataset.AS_node_feature == 'one_hot_degree_global':
+        # undirected graph, use only out degree.
+        _, node_degree = torch.unique(edge_index[0], sorted=True,
+                                      return_counts=True)
+        node_feature = np.zeros((num_nodes, node_degree.max() + 1))
+        node_feature[np.arange(num_nodes), node_degree] = 1
+        # 1 ~ 63748 degrees, but only 710 possible levels, exclude all zero
+        # columns.
+        non_zero_cols = (node_feature.sum(axis=0) > 0)
+        node_feature = node_feature[:, non_zero_cols]
+        node_feature = torch.Tensor(node_feature)
+    else:
+        raise NotImplementedError
+
+    g_all = Graph(
+        node_feature=node_feature,
+        edge_feature=scaled_edge_time.reshape(-1, 1),
+        edge_index=edge_index,
+        edge_time=edge_time,
+        directed=True
+    )
+
+    return g_all
+
+
+# =============================================================================
+# BSI-SVT Dataset
 # =============================================================================
 # Required for all graphs.
 SRC_NODE: str = 'Payer'
@@ -82,39 +158,39 @@ def construct_additional_features(df: pd.DataFrame) -> pd.DataFrame:
     """
     Constructs additional features of the transaction dataset.
     """
-    # for p in ('Payer', 'Payee'):
-    #     # %% Location of companies.
-    #     mask = (df[p + 'Country'] != 'SI')
-    #     out_of_country = np.empty(len(df), dtype=object)
-    #     out_of_country[mask] = 'OutOfCountry'
-    #     out_of_country[~mask] = 'InCountry'
-    #     df[p + 'OutOfCountry'] = out_of_country
-    #
-    # mask = (df['PayerCountry'] != df['PayeeCountry'])
-    # missing_mask = np.logical_or(df['PayerCountry'] == 'missing',
-    #                              df['PayeeCountry'] == 'missing')
-    # cross_country = np.empty(len(df), dtype=object)
-    # cross_country[mask] = 'CrossCountry'
-    # cross_country[~mask] = 'WithinCountry'
-    # cross_country[missing_mask] = 'Missing'
-    # df['CrossCountry'] = cross_country
-    #
-    # amount_level = np.empty(len(df), dtype=object)
-    # mask_small = df['AmountEUR'] < 500
-    # mask_medium = np.logical_and(df['AmountEUR'] >= 500,
-    #                              df['AmountEUR'] < 1000)
-    # mask_large = df['AmountEUR'] >= 1000
-    # amount_level[mask_small] = '$<500'
-    # amount_level[mask_medium] = '500<=$<1k'
-    # amount_level[mask_large] = '$>=1k'
-    #
-    # df['AmountLevel'] = amount_level
+    for p in ('Payer', 'Payee'):
+        # %% Location of companies.
+        mask = (df[p + 'Country'] != 'SI')
+        out_of_country = np.empty(len(df), dtype=object)
+        out_of_country[mask] = 'OutOfCountry'
+        out_of_country[~mask] = 'InCountry'
+        df[p + 'OutOfCountry'] = out_of_country
+
+    mask = (df['PayerCountry'] != df['PayeeCountry'])
+    missing_mask = np.logical_or(df['PayerCountry'] == 'missing',
+                                 df['PayeeCountry'] == 'missing')
+    cross_country = np.empty(len(df), dtype=object)
+    cross_country[mask] = 'CrossCountry'
+    cross_country[~mask] = 'WithinCountry'
+    cross_country[missing_mask] = 'Missing'
+    df['CrossCountry'] = cross_country
+
+    amount_level = np.empty(len(df), dtype=object)
+    mask_small = df['AmountEUR'] < 500
+    mask_medium = np.logical_and(df['AmountEUR'] >= 500,
+                                 df['AmountEUR'] < 1000)
+    mask_large = df['AmountEUR'] >= 1000
+    amount_level[mask_small] = '$<500'
+    amount_level[mask_medium] = '500<=$<1k'
+    amount_level[mask_large] = '$>=1k'
+
+    df['AmountLevel'] = amount_level
     return df
 
 
-def load_single_dataset(dataset_dir: str, is_hetero: bool = True,
-                        type_info_loc: str = 'append'
-                        ) -> Graph:
+def load_bsi_dataset(dataset_dir: str, is_hetero: bool = False,
+                     type_info_loc: str = 'append'
+                     ) -> Graph:
     """
     Loads a single graph object from tsv file.
 
@@ -130,7 +206,8 @@ def load_single_dataset(dataset_dir: str, is_hetero: bool = True,
     df_trans = dd.read_csv(dataset_dir, sep='\t', low_memory=False)
     df_trans = df_trans.fillna('missing')
     df_trans = df_trans.compute()
-    df_trans = construct_additional_features(df_trans)
+    if is_hetero:
+        df_trans = construct_additional_features(df_trans)
     df_trans.reset_index(drop=True, inplace=True)  # necessary for dask.
 
     # a unique values of node-level categorical variables.
@@ -155,7 +232,7 @@ def load_single_dataset(dataset_dir: str, is_hetero: bool = True,
 
     # Encoding categorical variables, the dask_ml.OrdinalEncoder only modify
     # and encode columns of categorical dtype.
-    enc = OrdinalEncoder()
+    enc = DaskOrdinalEncoder()
     df_encoded = enc.fit_transform(df_trans)
     df_encoded.reset_index(drop=True, inplace=True)
     print('Columns encoded to ordinal:')
@@ -174,9 +251,10 @@ def load_single_dataset(dataset_dir: str, is_hetero: bool = True,
     # Prepare for output.
     edge_feature = torch.Tensor(df_encoded[EDGE_FEATURE_COLS].values)
 
-    print('feature_edge_int_num',
-          [int(torch.max(edge_feature[:, i])) + 1
-           for i in range(len(EDGE_FEATURE_COLS) - 2)])
+    feature_edge_int_num = [int(torch.max(edge_feature[:, i])) + 1
+                            for i in range(len(EDGE_FEATURE_COLS) - 2)]
+    cfg.transaction.feature_edge_int_num = feature_edge_int_num
+    print('feature_edge_int_num', feature_edge_int_num)
 
     edge_index = torch.Tensor(
         df_encoded[[SRC_NODE, DST_NODE]].values.transpose()).long()  # (2, E)
@@ -245,76 +323,226 @@ def load_single_dataset(dataset_dir: str, is_hetero: bool = True,
 
     return graph
 
+# =============================================================================
+# Bitcoin Dataset.
+# =============================================================================
+
+
+def load_bitcoin_dataset(dataset_dir: str) -> Graph:
+    df_trans = pd.read_csv(dataset_dir, sep=',', header=None, index_col=None)
+    df_trans.columns = ['SOURCE', 'TARGET', 'RATING', 'TIME']
+    # NOTE: 'SOURCE' and 'TARGET' are not consecutive.
+    num_nodes = len(
+        pd.unique(df_trans[['SOURCE', 'TARGET']].to_numpy().ravel()))
+
+    # bitcoin OTC contains decimal numbers, round them.
+    df_trans['TIME'] = df_trans['TIME'].astype(np.int).astype(np.float)
+    assert not np.any(pd.isna(df_trans).values)
+
+    time_scaler = MinMaxScaler((0, 2))
+    df_trans['TimestampScaled'] = time_scaler.fit_transform(
+        df_trans['TIME'].values.reshape(-1, 1))
+
+    edge_feature = torch.Tensor(
+        df_trans[['RATING', 'TimestampScaled']].values)  # (E, edge_dim)
+
+    node_indices = np.sort(
+        pd.unique(df_trans[['SOURCE', 'TARGET']].to_numpy().ravel()))
+    enc = SkOrdinalEncoder(categories=[node_indices, node_indices])
+    raw_edges = df_trans[['SOURCE', 'TARGET']].values
+    edge_index = enc.fit_transform(raw_edges).transpose()
+    edge_index = torch.LongTensor(edge_index)
+
+    # num_nodes = torch.max(edge_index) + 1
+    # Use dummy node features.
+    node_feature = torch.ones(num_nodes, 1).float()
+
+    edge_time = torch.FloatTensor(df_trans['TIME'].values)
+
+    graph = Graph(
+        node_feature=node_feature,
+        edge_feature=edge_feature,
+        edge_index=edge_index,
+        edge_time=edge_time,
+        directed=True
+    )
+    return graph
+
+
+# =============================================================================
+# Reddit Dataset.
+# =============================================================================
+
+
+def load_reddit_dataset(dataset_dir: str) -> Graph:
+    df_trans = dd.read_csv(dataset_dir, sep='\t', low_memory=False)
+    df_trans = df_trans.compute()
+    assert not np.any(pd.isna(df_trans).values)
+    df_trans.reset_index(drop=True, inplace=True)  # required for dask.
+
+    # Encode src and dst node IDs.
+    # get unique values of src and dst.
+    unique_subreddits = pd.unique(
+        df_trans[['SOURCE_SUBREDDIT', 'TARGET_SUBREDDIT']].to_numpy().ravel())
+    unique_subreddits = np.sort(unique_subreddits)
+    cate_type = pd.api.types.CategoricalDtype(categories=unique_subreddits,
+                                              ordered=True)
+    df_trans['SOURCE_SUBREDDIT'] = df_trans['SOURCE_SUBREDDIT'].astype(
+        cate_type)
+    df_trans['TARGET_SUBREDDIT'] = df_trans['TARGET_SUBREDDIT'].astype(
+        cate_type)
+    enc = DaskOrdinalEncoder(columns=['SOURCE_SUBREDDIT', 'TARGET_SUBREDDIT'])
+    df_encoded = enc.fit_transform(df_trans)
+    df_encoded.reset_index(drop=True, inplace=True)
+
+    # Add node feature from the embedding dataset.
+    node_embedding_dir = os.path.join(cfg.dataset.dir,
+                                      'web-redditEmbeddings-subreddits.csv')
+
+    # index: subreddit name, values: embedding.
+    df_node = pd.read_csv(node_embedding_dir, header=None, index_col=0)
+
+    # ordinal encoding follows order in unique_subreddits.
+    # df_encoded['SOURCE_SUBREDDIT'] contains encoded integral values.
+    # unique_subreddits[df_encoded['SOURCE_SUBREDDIT']]
+    # tries to reverse encoded_integer --> original subreddit name.
+    # check if recovered sub-reddit name matched the raw data.
+    for col in ['SOURCE_SUBREDDIT', 'TARGET_SUBREDDIT']:
+        assert all(unique_subreddits[df_encoded[col]] == df_trans[col])
+
+    num_nodes = len(cate_type.categories)
+    node_feature = torch.ones(size=(num_nodes, 300))
+    # for nodes without precomputed embedding, use the average value.
+    node_feature = node_feature * np.mean(df_node.values)
+
+    # cate_type.categories[i] is encoded to i, by construction.
+    for i, subreddit in enumerate(cate_type.categories):
+        if subreddit in df_node.index:
+            embedding = df_node.loc[subreddit]
+            node_feature[i, :] = torch.Tensor(embedding.values)
+
+    # Original format: df['TIMESTAMP'][0] = '2013-12-31 16:39:18'
+    # Convert to unix timestamp (integers).
+    df_encoded['TIMESTAMP'] = pd.to_datetime(df_encoded['TIMESTAMP'],
+                                             format='%Y-%m-%d %H:%M:%S')
+    df_encoded['TIMESTAMP'] = (df_encoded['TIMESTAMP'] - pd.Timestamp(
+        '1970-01-01')) // pd.Timedelta('1s')  # now integers.
+
+    # Scale edge time.
+    time_scaler = MinMaxScaler((0, 2))
+    df_encoded['TimestampScaled'] = time_scaler.fit_transform(
+        df_encoded['TIMESTAMP'].values.reshape(-1, 1))
+
+    # Link sentimental representation (86-dimension).
+    # comma-separated string: '3.1,5.1,0.0,...'
+    senti_str_lst = df_encoded['PROPERTIES'].values
+    edge_senti_embedding = [x.split(',') for x in senti_str_lst]
+    edge_senti_embedding = np.array(edge_senti_embedding).astype(np.float32)
+    # (E, 86)
+
+    ef = df_encoded[['TimestampScaled', 'LINK_SENTIMENT']].values
+    edge_feature = np.concatenate([ef, edge_senti_embedding], axis=1)
+    edge_feature = torch.Tensor(edge_feature).float()  # (E, 88)
+
+    edge_index = torch.Tensor(
+        df_encoded[['SOURCE_SUBREDDIT',
+                    'TARGET_SUBREDDIT']].values.transpose()).long()  # (2, E)
+    num_nodes = torch.max(edge_index) + 1
+
+    edge_time = torch.FloatTensor(df_encoded['TIMESTAMP'].values)
+
+    graph = Graph(
+        node_feature=node_feature,
+        edge_feature=edge_feature,
+        edge_index=edge_index,
+        edge_time=edge_time,
+        directed=True
+    )
+
+    return graph
+
+
+# =============================================================================
+# College Message Dataset.
+# =============================================================================
+
+
+def load_college_message_dataset(dataset_dir: str) -> Graph:
+    df_trans = pd.read_csv(dataset_dir, sep=' ', header=None)
+    df_trans.columns = ['SRC', 'DST', 'TIMESTAMP']
+    assert not np.any(pd.isna(df_trans).values)
+    df_trans.reset_index(drop=True, inplace=True)
+
+    # Node IDs of this dataset start from 1, re-index to 0-based.
+    df_trans['SRC'] -= 1
+    df_trans['DST'] -= 1
+
+    print('num of edges:', len(df_trans))
+    print('num of nodes:', np.max(df_trans[['SRC', 'DST']].values) + 1)
+
+    time_scaler = MinMaxScaler((0, 2))
+    df_trans['TimestampScaled'] = time_scaler.fit_transform(
+        df_trans['TIMESTAMP'].values.reshape(-1, 1))
+
+    edge_feature = torch.Tensor(
+        df_trans[['TimestampScaled']].values).view(-1, 1)
+    edge_index = torch.Tensor(
+        df_trans[['SRC', 'DST']].values.transpose()).long()  # (2, E)
+    num_nodes = torch.max(edge_index) + 1
+
+    node_feature = torch.ones(num_nodes, 1)
+
+    edge_time = torch.FloatTensor(df_trans['TIMESTAMP'].values)
+
+    graph = Graph(
+        node_feature=node_feature,
+        edge_feature=edge_feature,
+        edge_index=edge_index,
+        edge_time=edge_time,
+        directed=True
+    )
+
+    return graph
+
+
+def load_roland_dataset(format: str, name: str, dataset_dir: str
+                        ) -> List[Graph]:
+    if format == 'roland':
+        # Load the entire graph from specified dataset.
+        if name in ['AS-733']:
+            g_all = load_AS_dataset(os.path.join(dataset_dir, name))
+        elif name in ['bsi_svt_2008.tsv']:
+            # NOTE: only BSI dataset supports hetero graph.
+            g_all = load_bsi_dataset(os.path.join(dataset_dir, name),
+                                     is_hetero=cfg.dataset.is_hetero,
+                                     type_info_loc=cfg.dataset.type_info_loc)
+        elif name in ['bitcoinotc.csv', 'bitcoinalpha.csv']:
+            g_all = load_bitcoin_dataset(os.path.join(dataset_dir, name))
+        elif name in ['reddit-body.tsv', 'reddit-title.tsv']:
+            g_all = load_reddit_dataset(os.path.join(dataset_dir, name))
+        elif name in ['CollegeMsg.txt']:
+            g_all = load_college_message_dataset(
+                os.path.join(dataset_dir, name))
+        else:
+            raise ValueError(f'Unsupported filename')
+
+        # Make the graph snapshots.
+        snapshot_freq = cfg.transaction.snapshot_freq
+        if snapshot_freq.upper() in ['D', 'W', 'M']:
+            # Split snapshot using calendar frequency.
+            snapshot_list = utils.make_graph_snapshot(g_all,
+                                                      snapshot_freq,
+                                                      cfg.dataset.is_hetero)
+        elif snapshot_freq.endswith('s'):
+            # Split using frequency in terms of seconds.
+            assert snapshot_freq.endswith('s')
+            snapshot_freq = int(snapshot_freq.strip('s'))
+            assert not cfg.dataset.is_hetero, 'Hetero graph is not supported.'
+            snapshot_list = utils.make_graph_snapshot_by_seconds(g_all,
+                                                                 snapshot_freq)
+        else:
+            raise ValueError(f'Unsupported frequency type: {snapshot_freq}')
 
-# def make_graph_snapshot(g_all: Graph,
-#                         snapshot_freq: str,
-#                         is_hetero: bool = True) -> list:
-#     """
-#     Constructs a list of graph snapshots (Graph or HeteroGraph) based
-#         on g_all and snapshot_freq.
-#
-#     Args:
-#         g_all: the entire homogenous graph.
-#         snapshot_freq: snapshot frequency.
-#         is_hetero: if make heterogeneous graphs.
-#     """
-#     t = g_all.edge_time.numpy().astype(np.int64)
-#     snapshot_freq = snapshot_freq.upper()
-#
-#     period_split = pd.DataFrame(
-#         {'Timestamp': t,
-#          'TransactionTime': pd.to_datetime(t, unit='s')},
-#         index=range(len(g_all.edge_time)))
-#
-#     freq_map = {'D': '%j',  # day of year.
-#                 'W': '%W',  # week of year.
-#                 'M': '%m'  # month of year.
-#                 }
-#
-#     period_split['Year'] = period_split['TransactionTime'].dt.strftime(
-#         '%Y').astype(int)
-#
-#     period_split['SubYearFlag'] = period_split['TransactionTime'].dt.strftime(
-#         freq_map[snapshot_freq]).astype(int)
-#
-#     period2id = period_split.groupby(['Year', 'SubYearFlag']).indices
-#     # e.g., dictionary w/ key = (2021, 3) and val = array(edges).
-#
-#     periods = sorted(list(period2id.keys()))  # ascending order.
-#     # alternatively, sorted(..., key=lambda x: x[0] + x[1]/1000).
-#     snapshot_list = list()
-#     for p in periods:
-#         # unique IDs of edges in this period.
-#         period_members = period2id[p]
-#
-#         g_incr = Graph(
-#             node_feature=g_all.node_feature,
-#             edge_feature=g_all.edge_feature[period_members, :],
-#             edge_index=g_all.edge_index[:, period_members],
-#             edge_time=g_all.edge_time[period_members],
-#             directed=g_all.directed,
-#             list_n_type=g_all.list_n_type if is_hetero else None,
-#             list_e_type=g_all.list_e_type if is_hetero else None,
-#         )
-#         if is_hetero and hasattr(g_all, 'node_type'):
-#             g_incr.node_type = g_all.node_type
-#             g_incr.edge_type = g_all.edge_type[period_members]
-#         snapshot_list.append(g_incr)
-#     return snapshot_list
-
-
-def load_generic(dataset_dir: str,
-                 snapshot: bool = True,
-                 snapshot_freq: str = None,
-                 is_hetero: bool = False,
-                 type_info_loc: str = 'graph_attribute'
-                 ) -> Union[deepsnap.graph.Graph, List[deepsnap.graph.Graph]]:
-    g_all = load_single_dataset(dataset_dir, is_hetero=is_hetero,
-                                type_info_loc=type_info_loc)
-    if not snapshot:
-        return g_all
-    else:
-        snapshot_list = utils.make_graph_snapshot(g_all, snapshot_freq, is_hetero)
         num_nodes = g_all.edge_index.max() + 1
 
         for g_snapshot in snapshot_list:
@@ -322,19 +550,13 @@ def load_generic(dataset_dir: str,
             g_snapshot.node_cells = [0 for _ in range(cfg.gnn.layers_mp)]
             g_snapshot.node_degree_existing = torch.zeros(num_nodes)
 
-        return snapshot_list
-
+        # Filter small snapshots.
+        filtered_graphs = list()
+        for g in snapshot_list:
+            if g.num_edges >= 10:
+                filtered_graphs.append(g)
 
-def load_generic_dataset(format, name, dataset_dir):
-    if format == 'roland_bsi_general':
-        dataset_dir = os.path.join(dataset_dir, name)
-        graphs = load_generic(dataset_dir,
-                              snapshot=cfg.transaction.snapshot,
-                              snapshot_freq=cfg.transaction.snapshot_freq,
-                              is_hetero=cfg.dataset.is_hetero,
-                              type_info_loc=cfg.dataset.type_info_loc)
-        return graphs
+        return filtered_graphs
 
 
-# TODO: change name.
-register_loader('roland_bsi_v3', load_generic_dataset)
+register_loader('roland', load_roland_dataset)

From 7d71297249f9b75032ae43051fdc8fa4eb4cddc3 Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Mon, 7 Jun 2021 12:42:56 -0700
Subject: [PATCH 45/66] update gitignore

---
 .gitignore | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.gitignore b/.gitignore
index a37445a5..4b747366 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,7 @@
 **/data_dir/
 run/datasets/data/
+run/results/
+run/runs_*/
 **/__pycache__/
 **/.ipynb_checkpoints
 .idea/

From 9e6336f231d2dbb78d973767dedb31a0adb054ca Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Mon, 7 Jun 2021 12:45:10 -0700
Subject: [PATCH 46/66] add example yamls.

---
 run/configs/ROLAND/roland_gru_as733.yaml      | 70 ++++++++++++++++++
 run/configs/ROLAND/roland_gru_btcalpha.yaml   | 70 ++++++++++++++++++
 run/configs/ROLAND/roland_gru_btcotc.yaml     | 70 ++++++++++++++++++
 run/configs/ROLAND/roland_gru_redditbody.yaml | 70 ++++++++++++++++++
 .../ROLAND/roland_gru_reddittitle.yaml        | 70 ++++++++++++++++++
 run/configs/ROLAND/roland_gru_ucimsg.yaml     | 70 ++++++++++++++++++
 run/configs/ROLAND/roland_mlp_bsisvt.yaml     | 71 +++++++++++++++++++
 7 files changed, 491 insertions(+)
 create mode 100644 run/configs/ROLAND/roland_gru_as733.yaml
 create mode 100644 run/configs/ROLAND/roland_gru_btcalpha.yaml
 create mode 100644 run/configs/ROLAND/roland_gru_btcotc.yaml
 create mode 100644 run/configs/ROLAND/roland_gru_redditbody.yaml
 create mode 100644 run/configs/ROLAND/roland_gru_reddittitle.yaml
 create mode 100644 run/configs/ROLAND/roland_gru_ucimsg.yaml
 create mode 100644 run/configs/ROLAND/roland_mlp_bsisvt.yaml

diff --git a/run/configs/ROLAND/roland_gru_as733.yaml b/run/configs/ROLAND/roland_gru_as733.yaml
new file mode 100644
index 00000000..16901c0a
--- /dev/null
+++ b/run/configs/ROLAND/roland_gru_as733.yaml
@@ -0,0 +1,70 @@
+remark: live_update
+out_dir: results
+device: auto
+metric:
+  mrr_method: max
+  mrr_num_negative_edges: 1000
+dataset:
+  format: roland
+  name: AS-733
+  is_hetero: False
+  dir: /home/tianyudu/Data/all_datasets
+  task: link_pred
+  task_type: classification
+  transductive: True
+  split: [0.8, 0.1, 0.1]
+  augment_feature: []
+  augment_feature_dims: [0]
+  edge_encoder: True
+  edge_encoder_name: roland_general
+  edge_dim: 1
+  node_encoder: False
+  link_pred_all_edges: False
+transaction:
+  keep_ratio: linear
+  snapshot: True
+  snapshot_freq: D
+  check_snapshot: False
+  history: rolling
+  horizon: 1
+  pred_mode: at
+  loss: supervised
+  feature_int_dim: 16
+  feature_edge_int_num: []
+  feature_node_int_num: []
+  feature_amount_dim: 16
+  feature_time_dim: 16
+train:
+  batch_size: 32
+  eval_period: 20
+  ckpt_period: 400
+  mode: live_update
+  internal_validation_tolerance: 5
+meta:
+  is_meta: True
+  alpha: 0.5
+model:
+  type: gnn_recurrent
+  loss_fun: cross_entropy
+  edge_decoding: concat
+gnn:
+  embed_update_method: gru
+  layers_pre_mp: 2
+  layers_mp: 2
+  layers_post_mp: 2
+  dim_inner: 128
+  mlp_update_layers: 2
+  layer_type: residual_edge_conv
+  skip_connection: affine
+  stage_type: stack
+  batchnorm: True
+  act: prelu
+  dropout: 0.0
+  agg: add
+  att_heads: 1
+  normalize_adj: False
+  msg_direction: both
+optim:
+  optimizer: adam
+  base_lr: 0.03
+  max_epoch: 100
\ No newline at end of file
diff --git a/run/configs/ROLAND/roland_gru_btcalpha.yaml b/run/configs/ROLAND/roland_gru_btcalpha.yaml
new file mode 100644
index 00000000..5b5ed5e7
--- /dev/null
+++ b/run/configs/ROLAND/roland_gru_btcalpha.yaml
@@ -0,0 +1,70 @@
+remark: live_update
+out_dir: results
+device: auto
+metric:
+  mrr_method: max
+  mrr_num_negative_edges: 1000
+dataset:
+  format: roland
+  name: bitcoinalpha.csv
+  is_hetero: False
+  dir: /home/tianyudu/Data/all_datasets
+  task: link_pred
+  task_type: classification
+  transductive: True
+  split: [0.8, 0.1, 0.1]
+  augment_feature: []
+  augment_feature_dims: [0]
+  edge_encoder: True
+  edge_encoder_name: roland_general
+  edge_dim: 2
+  node_encoder: False
+  link_pred_all_edges: False
+transaction:
+  keep_ratio: linear
+  snapshot: True
+  snapshot_freq: W
+  check_snapshot: False
+  history: rolling
+  horizon: 1
+  pred_mode: at
+  loss: supervised
+  feature_int_dim: 16
+  feature_edge_int_num: []
+  feature_node_int_num: []
+  feature_amount_dim: 16
+  feature_time_dim: 16
+train:
+  batch_size: 32
+  eval_period: 20
+  ckpt_period: 400
+  mode: live_update
+  internal_validation_tolerance: 5
+meta:
+  is_meta: True
+  alpha: 0.8
+model:
+  type: gnn_recurrent
+  loss_fun: cross_entropy
+  edge_decoding: concat
+gnn:
+  embed_update_method: gru
+  layers_pre_mp: 2
+  layers_mp: 2
+  layers_post_mp: 2
+  dim_inner: 64
+  mlp_update_layers: 2
+  layer_type: residual_edge_conv
+  skip_connection: affine
+  stage_type: stack
+  batchnorm: False
+  act: prelu
+  dropout: 0.0
+  agg: add
+  att_heads: 1
+  normalize_adj: False
+  msg_direction: both
+optim:
+  optimizer: adam
+  base_lr: 0.003
+  max_epoch: 100
\ No newline at end of file
diff --git a/run/configs/ROLAND/roland_gru_btcotc.yaml b/run/configs/ROLAND/roland_gru_btcotc.yaml
new file mode 100644
index 00000000..fafc3cb1
--- /dev/null
+++ b/run/configs/ROLAND/roland_gru_btcotc.yaml
@@ -0,0 +1,70 @@
+remark: live_update
+out_dir: results
+device: auto
+metric:
+  mrr_method: max
+  mrr_num_negative_edges: 1000
+dataset:
+  format: roland
+  name: bitcoinotc.csv
+  is_hetero: False
+  dir: /home/tianyudu/Data/all_datasets
+  task: link_pred
+  task_type: classification
+  transductive: True
+  split: [0.8, 0.1, 0.1]
+  augment_feature: []
+  augment_feature_dims: [0]
+  edge_encoder: True
+  edge_encoder_name: roland_general
+  edge_dim: 2
+  node_encoder: False
+  link_pred_all_edges: False
+transaction:
+  keep_ratio: linear
+  snapshot: True
+  snapshot_freq: W
+  check_snapshot: False
+  history: rolling
+  horizon: 1
+  pred_mode: at
+  loss: supervised
+  feature_int_dim: 16
+  feature_edge_int_num: []
+  feature_node_int_num: []
+  feature_amount_dim: 16
+  feature_time_dim: 16
+train:
+  batch_size: 32
+  eval_period: 20
+  ckpt_period: 400
+  mode: live_update
+  internal_validation_tolerance: 5
+meta:
+  is_meta: True
+  alpha: 0.9
+model:
+  type: gnn_recurrent
+  loss_fun: cross_entropy
+  edge_decoding: concat
+gnn:
+  embed_update_method: gru
+  layers_pre_mp: 2
+  layers_mp: 4
+  layers_post_mp: 2
+  dim_inner: 64
+  mlp_update_layers: 2
+  layer_type: residual_edge_conv
+  skip_connection: affine
+  stage_type: stack
+  batchnorm: False
+  act: prelu
+  dropout: 0.0
+  agg: add
+  att_heads: 1
+  normalize_adj: False
+  msg_direction: both
+optim:
+  optimizer: adam
+  base_lr: 0.003
+  max_epoch: 100
\ No newline at end of file
diff --git a/run/configs/ROLAND/roland_gru_redditbody.yaml b/run/configs/ROLAND/roland_gru_redditbody.yaml
new file mode 100644
index 00000000..ee2da59a
--- /dev/null
+++ b/run/configs/ROLAND/roland_gru_redditbody.yaml
@@ -0,0 +1,70 @@
+remark: live_update
+out_dir: results
+device: auto
+metric:
+  mrr_method: max
+  mrr_num_negative_edges: 1000
+dataset:
+  format: roland
+  name: reddit-body.tsv
+  is_hetero: False
+  dir: /home/tianyudu/Data/all_datasets
+  task: link_pred
+  task_type: classification
+  transductive: True
+  split: [0.8, 0.1, 0.1]
+  augment_feature: []
+  augment_feature_dims: [0]
+  edge_encoder: True
+  edge_encoder_name: roland_general
+  edge_dim: 88
+  node_encoder: False
+  link_pred_all_edges: False
+transaction:
+  keep_ratio: linear
+  snapshot: True
+  snapshot_freq: W
+  check_snapshot: False
+  history: rolling
+  horizon: 1
+  pred_mode: at
+  loss: supervised
+  feature_int_dim: 16
+  feature_edge_int_num: []
+  feature_node_int_num: []
+  feature_amount_dim: 16
+  feature_time_dim: 16
+train:
+  batch_size: 32
+  eval_period: 20
+  ckpt_period: 400
+  mode: live_update
+  internal_validation_tolerance: 5
+meta:
+  is_meta: True
+  alpha: 0.5
+model:
+  type: gnn_recurrent
+  loss_fun: cross_entropy
+  edge_decoding: concat
+gnn:
+  embed_update_method: gru
+  layers_pre_mp: 2
+  layers_mp: 2
+  layers_post_mp: 2
+  dim_inner: 64
+  mlp_update_layers: 2
+  layer_type: residual_edge_conv
+  skip_connection: affine
+  stage_type: stack
+  batchnorm: True
+  act: prelu
+  dropout: 0.0
+  agg: add
+  att_heads: 1
+  normalize_adj: False
+  msg_direction: both
+optim:
+  optimizer: adam
+  base_lr: 0.003
+  max_epoch: 100
\ No newline at end of file
diff --git a/run/configs/ROLAND/roland_gru_reddittitle.yaml b/run/configs/ROLAND/roland_gru_reddittitle.yaml
new file mode 100644
index 00000000..e48519fd
--- /dev/null
+++ b/run/configs/ROLAND/roland_gru_reddittitle.yaml
@@ -0,0 +1,70 @@
+remark: live_update
+out_dir: results
+device: auto
+metric:
+  mrr_method: max
+  mrr_num_negative_edges: 1000
+dataset:
+  format: roland
+  name: reddit-title.tsv
+  is_hetero: False
+  dir: /home/tianyudu/Data/all_datasets
+  task: link_pred
+  task_type: classification
+  transductive: True
+  split: [0.8, 0.1, 0.1]
+  augment_feature: []
+  augment_feature_dims: [0]
+  edge_encoder: True
+  edge_encoder_name: roland_general
+  edge_dim: 88
+  node_encoder: False
+  link_pred_all_edges: False
+transaction:
+  keep_ratio: linear
+  snapshot: True
+  snapshot_freq: W
+  check_snapshot: False
+  history: rolling
+  horizon: 1
+  pred_mode: at
+  loss: supervised
+  feature_int_dim: 16
+  feature_edge_int_num: []
+  feature_node_int_num: []
+  feature_amount_dim: 16
+  feature_time_dim: 16
+train:
+  batch_size: 32
+  eval_period: 20
+  ckpt_period: 400
+  mode: live_update
+  internal_validation_tolerance: 5
+meta:
+  is_meta: True
+  alpha: 0.1
+model:
+  type: gnn_recurrent
+  loss_fun: cross_entropy
+  edge_decoding: concat
+gnn:
+  embed_update_method: gru
+  layers_pre_mp: 2
+  layers_mp: 6
+  layers_post_mp: 2
+  dim_inner: 128
+  mlp_update_layers: 2
+  layer_type: residual_edge_conv
+  skip_connection: affine
+  stage_type: stack
+  batchnorm: True
+  act: prelu
+  dropout: 0.0
+  agg: add
+  att_heads: 1
+  normalize_adj: False
+  msg_direction: both
+optim:
+  optimizer: adam
+  base_lr: 0.003
+  max_epoch: 100
\ No newline at end of file
diff --git a/run/configs/ROLAND/roland_gru_ucimsg.yaml b/run/configs/ROLAND/roland_gru_ucimsg.yaml
new file mode 100644
index 00000000..441cb0c9
--- /dev/null
+++ b/run/configs/ROLAND/roland_gru_ucimsg.yaml
@@ -0,0 +1,70 @@
+remark: live_update
+out_dir: results
+device: auto
+metric:
+  mrr_method: max
+  mrr_num_negative_edges: 1000
+dataset:
+  format: roland
+  name: CollegeMsg.txt
+  is_hetero: False
+  dir: /home/tianyudu/Data/all_datasets
+  task: link_pred
+  task_type: classification
+  transductive: True
+  split: [0.8, 0.1, 0.1]
+  augment_feature: []
+  augment_feature_dims: [0]
+  edge_encoder: True
+  edge_encoder_name: roland_general
+  edge_dim: 1
+  node_encoder: False
+  link_pred_all_edges: False
+transaction:
+  keep_ratio: linear
+  snapshot: True
+  snapshot_freq: W
+  check_snapshot: False
+  history: rolling
+  horizon: 1
+  pred_mode: at
+  loss: supervised
+  feature_int_dim: 16
+  feature_edge_int_num: []
+  feature_node_int_num: []
+  feature_amount_dim: 16
+  feature_time_dim: 16
+train:
+  batch_size: 32
+  eval_period: 20
+  ckpt_period: 400
+  mode: live_update
+  internal_validation_tolerance: 5
+meta:
+  is_meta: True
+  alpha: 0.5
+model:
+  type: gnn_recurrent
+  loss_fun: cross_entropy
+  edge_decoding: concat
+gnn:
+  embed_update_method: gru
+  layers_pre_mp: 2
+  layers_mp: 8
+  layers_post_mp: 2
+  dim_inner: 64
+  mlp_update_layers: 2
+  layer_type: residual_edge_conv
+  skip_connection: affine
+  stage_type: stack
+  batchnorm: True
+  act: prelu
+  dropout: 0.0
+  agg: add
+  att_heads: 1
+  normalize_adj: False
+  msg_direction: both
+optim:
+  optimizer: adam
+  base_lr: 0.01
+  max_epoch: 100
\ No newline at end of file
diff --git a/run/configs/ROLAND/roland_mlp_bsisvt.yaml b/run/configs/ROLAND/roland_mlp_bsisvt.yaml
new file mode 100644
index 00000000..35949287
--- /dev/null
+++ b/run/configs/ROLAND/roland_mlp_bsisvt.yaml
@@ -0,0 +1,71 @@
+remark: live_update
+out_dir: results
+device: auto
+metric:
+  mrr_method: max
+  mrr_num_negative_edges: 1000
+dataset:
+  format: roland
+  name: bsi_svt_2008.tsv
+  is_hetero: False
+  dir: /home/tianyudu/Data/all_datasets
+  task: link_pred
+  task_type: classification
+  transductive: True
+  split: [0.8, 0.1, 0.1]
+  augment_feature: []
+  augment_feature_dims: [0]
+  edge_encoder: True
+  edge_encoder_name: roland
+  edge_dim: 2
+  node_encoder: True
+  node_encoder_name: roland
+  link_pred_all_edges: False
+transaction:
+  keep_ratio: linear
+  snapshot: True
+  snapshot_freq: W
+  check_snapshot: False
+  history: rolling
+  horizon: 1
+  pred_mode: at
+  loss: supervised
+  feature_int_dim: 16
+  feature_edge_int_num: []
+  feature_node_int_num: [1018, 33, 13, 23, 5]
+  feature_amount_dim: 16
+  feature_time_dim: 16
+train:
+  batch_size: 32
+  eval_period: 20
+  ckpt_period: 400
+  mode: live_update
+  internal_validation_tolerance: 5
+meta:
+  is_meta: True
+  alpha: 0.4
+model:
+  type: gnn_recurrent
+  loss_fun: cross_entropy
+  edge_decoding: concat
+gnn:
+  embed_update_method: gru
+  layers_pre_mp: 2
+  layers_mp: 4
+  layers_post_mp: 2
+  dim_inner: 128
+  mlp_update_layers: 2
+  layer_type: residual_edge_conv
+  skip_connection: affine
+  stage_type: stack
+  batchnorm: True
+  act: prelu
+  dropout: 0.0
+  agg: add
+  att_heads: 1
+  normalize_adj: False
+  msg_direction: both
+optim:
+  optimizer: adam
+  base_lr: 0.003
+  max_epoch: 100
\ No newline at end of file

From 998dae07d6c9941f114131be02d3e0fac184adec Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Mon, 7 Jun 2021 12:56:02 -0700
Subject: [PATCH 47/66] add training script for dynamic prediction tasks
 (lvie-update)

---
 run/main_dynamic.py | 71 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 71 insertions(+)
 create mode 100644 run/main_dynamic.py

diff --git a/run/main_dynamic.py b/run/main_dynamic.py
new file mode 100644
index 00000000..cb52e1a3
--- /dev/null
+++ b/run/main_dynamic.py
@@ -0,0 +1,71 @@
+import logging
+import os
+import random
+import warnings
+from datetime import datetime
+from itertools import product
+
+import numpy as np
+import torch
+from graphgym.cmd_args import parse_args
+from graphgym.config import (assert_cfg, cfg, dump_cfg, get_parent_dir,
+                             update_out_dir)
+from graphgym.contrib.train import *
+from graphgym.loader import create_dataset, create_loader
+from graphgym.logger import create_logger, setup_printing
+from graphgym.model_builder import create_model
+from graphgym.optimizer import create_optimizer, create_scheduler
+from graphgym.register import train_dict
+from graphgym.train import train
+from graphgym.utils.agg_runs import agg_runs
+from graphgym.utils.comp_budget import params_count
+from graphgym.utils.device import auto_select_device
+
+os.environ['MPLCONFIGDIR'] = "/tmp"
+
+
+if __name__ == '__main__':
+    # Load cmd line args
+    args = parse_args()
+    # Repeat for different random seeds
+    for i in range(args.repeat):
+        # Load config file
+        cfg.merge_from_file(args.cfg_file)
+        cfg.merge_from_list(args.opts)
+        assert_cfg(cfg)
+        # Set Pytorch environment
+        torch.set_num_threads(cfg.num_threads)
+        out_dir_parent = cfg.out_dir
+        cfg.seed = i + 1
+        random.seed(cfg.seed)
+        np.random.seed(cfg.seed)
+        torch.manual_seed(cfg.seed)
+        update_out_dir(out_dir_parent, args.cfg_file)
+        dump_cfg(cfg)
+        setup_printing()
+        auto_select_device()
+
+        # Set learning environment
+        datasets = create_dataset()
+
+        cfg.dataset.num_nodes = datasets[0][0].num_nodes
+        loaders = create_loader(datasets)
+        meters = create_logger(datasets, loaders)
+
+        model = create_model(datasets)
+        # breakpoint()
+        optimizer = create_optimizer(model.parameters())
+        scheduler = create_scheduler(optimizer)
+        # Print model info
+        logging.info(model)
+        logging.info(cfg)
+        cfg.params = params_count(model)
+        logging.info('Num parameters: {}'.format(cfg.params))
+        # Start training
+        if cfg.train.mode == 'live_update':
+            train_dict[cfg.train.mode](
+                meters, loaders, model, optimizer, scheduler, datasets=datasets)
+
+    # When being launched in batch mode, mark a yaml as done
+    if args.mark_done:
+        os.rename(args.cfg_file, '{}_done'.format(args.cfg_file))

From 89c5197fdcd4155c89bff2d132af3658413e6817 Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Mon, 7 Jun 2021 12:58:58 -0700
Subject: [PATCH 48/66] remove comments

---
 graphgym/utils/stats.py | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/graphgym/utils/stats.py b/graphgym/utils/stats.py
index f8bb0e29..4b97b3b6 100644
--- a/graphgym/utils/stats.py
+++ b/graphgym/utils/stats.py
@@ -12,15 +12,3 @@ def node_degree(edge_index, n=None, mode='in'):
     degree = torch.zeros(n)
     ones = torch.ones(index.shape[0])
     return degree.scatter_add_(0, index, ones)
-
-
-
-
-
-
-
-# edge_index = torch.tensor([[1, 2, 3, 4], [2, 3, 4, 5]])
-
-# print(compute_degree(edge_index, mode='in'))
-# print(compute_degree(edge_index, mode='out'))
-# print(compute_degree(edge_index, mode='both'))

From 93f7ff367aa2d6ba66d4a02ac2f520d2ccf422d2 Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Mon, 7 Jun 2021 13:22:18 -0700
Subject: [PATCH 49/66] add template for hetero graphs

---
 .../contrib/loader/roland_template_hetero.py  | 95 +++++++++++++++++++
 1 file changed, 95 insertions(+)
 create mode 100644 graphgym/contrib/loader/roland_template_hetero.py

diff --git a/graphgym/contrib/loader/roland_template_hetero.py b/graphgym/contrib/loader/roland_template_hetero.py
new file mode 100644
index 00000000..1573a9ba
--- /dev/null
+++ b/graphgym/contrib/loader/roland_template_hetero.py
@@ -0,0 +1,95 @@
+"""
+A generic loader for the roland project, modify this template to build
+loaders for other financial transaction datasets and dynamic graphs.
+NOTE: this script is the trimmed version for homogenous graphs only.
+Mar. 22, 2021.
+# Search for TODO in this file.
+"""
+import os
+from typing import List
+
+import deepsnap
+import graphgym.contrib.loader.dynamic_graph_utils as utils
+import torch
+from deepsnap.graph import Graph
+from graphgym.config import cfg
+from graphgym.register import register_loader
+
+
+def load_single_hetero_dataset(dataset_dir: str, type_info_loc: str) -> Graph:
+    # TODO: Load your data from dataset_dir here.
+    # Example:
+    num_nodes = 500
+    num_node_feature = 16
+    num_edges = 10000
+    num_edge_feature = 32
+    node_feature = torch.rand((num_nodes, num_node_feature))
+    edge_feature = torch.rand((num_edges, num_edge_feature))
+    edge_index = torch.randint(0, num_nodes - 1, (2, num_edges))
+    # edge time should be unix timestmap integers.
+    # random generate timestamps from 2021-05-01 to 2021-06-01
+    edge_time = torch.randint(1619852450, 1622530850, (num_edges,)).sort()[0]
+
+    graph = Graph(
+        node_feature=node_feature,
+        edge_feature=edge_feature,
+        edge_index=edge_index,
+        edge_time=edge_time,
+        directed=True
+    )
+
+    # TODO: additional operations required for heterogeneous graphs.
+    # Assume there are 3 types of edges.
+    num_edge_types = 3
+    edge_type_int = torch.randint(0, num_edge_types - 1, (num_edges,)).float()
+    # Assume there are 5 types of nodes.
+    num_node_types = 5
+    node_type_int = torch.randint(0, num_node_types - 1, (num_nodes,)).float()
+
+    if type_info_loc == 'append':
+        graph.edge_feature = torch.cat((graph.edge_feature, edge_type_int),
+                                       dim=1)
+        graph.node_feature = torch.cat((graph.node_feature, node_type_int),
+                                       dim=1)
+    elif type_info_loc == 'graph_attribute':
+        graph.node_type = node_type_int.reshape(-1, )
+        graph.edge_type = edge_type_int.reshape(-1, )
+    else:
+        raise ValueError(f'Unsupported type info loc: {type_info_loc}')
+
+    # add a list of unique types for reference.
+    graph.list_n_type = node_type_int.unique().long()
+    graph.list_e_type = edge_type_int.unique().long()
+
+    return graph
+
+
+def load_generic_dataset(format: str, name: str, dataset_dir: str
+                         ) -> List[deepsnap.graph.Graph]:
+    """Load the dataset as a list of graph snapshots.
+
+    Args:
+        format (str): format of dataset.
+        name (str): file name of dataset.
+        dataset_dir (str): path of dataset, do NOT include the file name, use
+            the parent directory of dataset file.
+
+    Returns:
+        List[deepsnap.graph.Graph]: a list of graph snapshots.
+    """
+    # TODO: change the format name.
+    if format == 'YOUR_HETERO_FORMAT_NAME_HERE':
+        assert cfg.dataset.is_hetero
+        dataset_dir = os.path.join(dataset_dir, name)
+        g_all = load_single_hetero_dataset(
+            dataset_dir,
+            type_info_loc=cfg.dataset.type_info_loc)
+        snapshot_list = utils.make_graph_snapshot(
+            g_all,
+            snapshot_freq=cfg.transaction.snapshot_freq,
+            is_hetero=cfg.dataset.is_hetero)
+        return snapshot_list
+
+
+# TODO: don't forget to register the loader.
+register_loader('YOUR_HETERO_LOADER_NAME_HERE', load_generic_dataset)

From ddf58c68f1a73fe4920ffe924b4bf513490a206e Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Mon, 7 Jun 2021 13:22:32 -0700
Subject: [PATCH 50/66] rename

---
 graphgym/contrib/network/gnn_recurrent.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/graphgym/contrib/network/gnn_recurrent.py b/graphgym/contrib/network/gnn_recurrent.py
index 1d0c562c..85667d63 100644
--- a/graphgym/contrib/network/gnn_recurrent.py
+++ b/graphgym/contrib/network/gnn_recurrent.py
@@ -4,21 +4,20 @@
 from graphgym.config import cfg
 from graphgym.contrib.stage import *
 from graphgym.init import init_weights
-from graphgym.models.act import act_dict
 from graphgym.models.feature_augment import Preprocess
 from graphgym.models.feature_encoder import (edge_encoder_dict,
                                              node_encoder_dict)
 from graphgym.models.head import head_dict
 from graphgym.models.layer import (BatchNorm1dEdge, BatchNorm1dNode,
                                    GeneralMultiLayer, layer_dict)
-from graphgym.models.layer_recurrent import RecurrentGraphLayer
+from graphgym.models.layer_recurrent import GeneralRecurrentLayer
 from graphgym.register import register_network
 
 
-def GNNLayer(dim_in: int, dim_out: int, has_act: bool=True, layer_id: int=0):
+def GNNLayer(dim_in: int, dim_out: int, has_act: bool = True, layer_id: int = 0):
     # General constructor for GNN layer.
-    return RecurrentGraphLayer(cfg.gnn.layer_type, dim_in, dim_out,
-                               has_act, layer_id=layer_id)
+    return GeneralRecurrentLayer(cfg.gnn.layer_type, dim_in, dim_out,
+                                 has_act, layer_id=layer_id)
 
 
 def GNNPreMP(dim_in, dim_out):

From 2ea233c58ca3ffa6688310f6d2e2becbc735889c Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Mon, 7 Jun 2021 13:22:45 -0700
Subject: [PATCH 51/66] remove comment

---
 graphgym/contrib/train/train_live_update.py | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/graphgym/contrib/train/train_live_update.py b/graphgym/contrib/train/train_live_update.py
index 60555125..aa8a91fd 100644
--- a/graphgym/contrib/train/train_live_update.py
+++ b/graphgym/contrib/train/train_live_update.py
@@ -150,16 +150,6 @@ def train_live_update(loggers, loaders, model, optimizer, scheduler, datasets,
         if not hasattr(dataset[0], 'keep_ratio'):
             train_utils.precompute_edge_degree_info(dataset)
 
-    # if cfg.dataset.premade_datasets == 'fresh_save_cache':
-    #     if not os.path.exists(f'{cfg.dataset.dir}/cache/'):
-    #         os.mkdir(f'{cfg.dataset.dir}/cache/')
-    #     cache_path = '{}/cache/cached_datasets_{}_{}_{}.pt'.format(
-    #         cfg.dataset.dir, cfg.dataset.format.replace('.tsv', ''),
-    #         cfg.transaction.snapshot_freq,
-    #         datetime.now().strftime('%Y_%m_%d__%H_%M_%S')
-    #     )
-    #     torch.save(datasets, cache_path)
-
     num_splits = len(loggers)  # train/val/test splits.
     # range for today in (today, tomorrow) task pairs.
     task_range = range(len(datasets[0]) - cfg.transaction.horizon)

From c03bb12fc6049a3ee9f4da73d129042d7b67e9de Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Mon, 7 Jun 2021 13:23:01 -0700
Subject: [PATCH 52/66] add example run files for ROLAND

---
 run/run_roland_single.sh | 13 +++++++++++++
 1 file changed, 13 insertions(+)
 create mode 100644 run/run_roland_single.sh

diff --git a/run/run_roland_single.sh b/run/run_roland_single.sh
new file mode 100644
index 00000000..b10c5533
--- /dev/null
+++ b/run/run_roland_single.sh
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+
+# python3 main_dynamic.py --cfg configs/ROLAND/roland_gru_btcalpha.yaml --repeat 1
+
+# python3 main_dynamic.py --cfg configs/ROLAND/roland_gru_btcotc.yaml --repeat 1
+
+# python3 main_dynamic.py --cfg configs/ROLAND/roland_gru_ucimsg.yaml --repeat 1
+
+# python3 main_dynamic.py --cfg configs/ROLAND/roland_gru_reddittitle.yaml --repeat 1
+
+# python3 main_dynamic.py --cfg configs/ROLAND/roland_gru_redditbody.yaml --repeat 1
+
+python3 main_dynamic.py --cfg configs/ROLAND/roland_mlp_bsisvt.yaml --repeat 1
\ No newline at end of file

From 190f49f3ddcc494d777bb8a030fa6682d324c558 Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Mon, 7 Jun 2021 15:25:49 -0700
Subject: [PATCH 53/66] add readme

---
 ROLAND_README.md | 65 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 65 insertions(+)
 create mode 100644 ROLAND_README.md

diff --git a/ROLAND_README.md b/ROLAND_README.md
new file mode 100644
index 00000000..3d052c99
--- /dev/null
+++ b/ROLAND_README.md
@@ -0,0 +1,65 @@
+# Use case: ROLAND: Graph Neural Networks for Dynamic Graphs
+Code associated with the ROLAND project.
+
+
+## TODO: add figures to illustrate the ROLAND framework.
+
+## Datasets
+Most of datasets are used in our paper can be found at `https://snap.stanford.edu/data/index.html`.
+
+```bash
+mkdir ./all_datasets/
+cd ./all_datasets
+wget 'https://snap.stanford.edu/data/soc-sign-bitcoinotc.csv.gz'
+wget 'https://snap.stanford.edu/data/soc-sign-bitcoinalpha.csv.gz'
+wget 'https://snap.stanford.edu/data/as-733.tar.gz'
+wget 'https://snap.stanford.edu/data/CollegeMsg.txt.gz'
+wget 'https://snap.stanford.edu/data/soc-redditHyperlinks-body.tsv'
+wget 'https://snap.stanford.edu/data/soc-redditHyperlinks-title.tsv'
+wget 'http://snap.stanford.edu/data/web-redditEmbeddings-subreddits.csv'
+
+# Unzip files
+gunzip CollegeMsg.txt.gz
+gunzip soc-sign-bitcoinalpha.csv.gz
+gunzip soc-sign-bitcoinotc.csv.gz
+tar xf ./as-733.tar.gz
+
+# Rename files.
+mv ./soc-sign-bitcoinotc.csv ./bitcoinotc.csv
+mv ./soc-sign-bitcoinalpha.csv ./bitcoinalpha.csv
+
+mv ./soc-redditHyperlinks-body.tsv ./reddit-body.tsv
+mv ./soc-redditHyperlinks-title.tsv ./reddit-title.tsv
+```
+## Examples of ROLAND Use Cases
+See `./run/run_roland_single.sh` for experiments on all datasets.
+To run link-prediction task on `CollegeMsg.txt` dataset:
+```bash
+cd ./run
+python3 main_dynamic.py --cfg configs/ROLAND/roland_gru_ucimsg.yaml --repeat 1
+```
+To explore training result:
+```bash
+cd ./run
+tensorboard --logdir=./runs_live_update --port=6006
+```
+
+## Examples on Homogenous Graph Snapshots
+Prediction for BitCoin transactions.
+
+```bash
+TODO: add yaml file.
+```
+
+## Examples on Heterogenous Graph Snapshots
+TODO.
+
+## How to Load Your Own Dataset
+Please refer to `./graphgym/contrib/loader/roland_template.py` and `./graphgym/contrib/loader/roland_template_hetero.py` for examples of building loaders.
+
+## Data Structures for Snapshot-Based Dynamic Graphs
+
+
+## Grid Search
+`./run/grids/ROLAND/`
+`./`

From 18bb3004927d7980ffc19bc7b844b3a320796966 Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Mon, 7 Jun 2021 15:30:42 -0700
Subject: [PATCH 54/66] add

---
 ROLAND_README.md | 31 +++++++++++++++----------------
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/ROLAND_README.md b/ROLAND_README.md
index 3d052c99..7c688ab4 100644
--- a/ROLAND_README.md
+++ b/ROLAND_README.md
@@ -32,34 +32,33 @@ mv ./soc-redditHyperlinks-body.tsv ./reddit-body.tsv
 mv ./soc-redditHyperlinks-title.tsv ./reddit-title.tsv
 ```
 ## Examples of ROLAND Use Cases
-See `./run/run_roland_single.sh` for experiments on all datasets.
+The ROLAND project focuses on link-predictions for homogenous dynamic graphs.
 To run link-prediction task on `CollegeMsg.txt` dataset:
 ```bash
 cd ./run
 python3 main_dynamic.py --cfg configs/ROLAND/roland_gru_ucimsg.yaml --repeat 1
 ```
-To explore training result:
+For other datasets:
 ```bash
-cd ./run
-tensorboard --logdir=./runs_live_update --port=6006
-```
+python3 main_dynamic.py --cfg configs/ROLAND/roland_gru_btcalpha.yaml --repeat 1
 
-## Examples on Homogenous Graph Snapshots
-Prediction for BitCoin transactions.
+python3 main_dynamic.py --cfg configs/ROLAND/roland_gru_btcotc.yaml --repeat 1
 
+python3 main_dynamic.py --cfg configs/ROLAND/roland_gru_ucimsg.yaml --repeat 1
+
+python3 main_dynamic.py --cfg configs/ROLAND/roland_gru_reddittitle.yaml --repeat 1
+
+python3 main_dynamic.py --cfg configs/ROLAND/roland_gru_redditbody.yaml --repeat 1
+```
+
+To explore training result:
 ```bash
-TODO: add yaml file.
+cd ./run
+tensorboard --logdir=./runs_live_update --port=6006
 ```
 
 ## Examples on Heterogenous Graph Snapshots
-TODO.
+`Under development`
 
 ## How to Load Your Own Dataset
 Please refer to `./graphgym/contrib/loader/roland_template.py` and `./graphgym/contrib/loader/roland_template_hetero.py` for examples of building loaders.
-
-## Data Structures for Snapshot-Based Dynamic Graphs
-
-
-## Grid Search
-`./run/grids/ROLAND/`
-`./`

From 5a42cb67138b1700e196ca92e28fbdf811beee4d Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Wed, 9 Jun 2021 22:22:09 -0700
Subject: [PATCH 55/66] remove type_info_loc config

---
 graphgym/contrib/config/roland.py             |  5 -----
 graphgym/contrib/loader/roland.py             | 20 ++++---------------
 .../contrib/loader/roland_template_hetero.py  | 18 ++++-------------
 3 files changed, 8 insertions(+), 35 deletions(-)

diff --git a/graphgym/contrib/config/roland.py b/graphgym/contrib/config/roland.py
index 99b5ced5..af6da3fc 100644
--- a/graphgym/contrib/config/roland.py
+++ b/graphgym/contrib/config/roland.py
@@ -93,11 +93,6 @@ def set_cfg_roland(cfg):
     # Options: {True, False}.
     cfg.dataset.is_hetero = False
 
-    # Where to put type information.
-    # Options: {'append', 'graph_attribute'}.
-    # Only effective if cfg.dataset.is_hetero == True.
-    cfg.dataset.type_info_loc = 'append'
-
     # whether to look for and load cached graph. By default (load_cache=False)
     # the loader loads the raw tsv file from disk and
     cfg.dataset.load_cache = False
diff --git a/graphgym/contrib/loader/roland.py b/graphgym/contrib/loader/roland.py
index ed38f30b..77e09fe1 100644
--- a/graphgym/contrib/loader/roland.py
+++ b/graphgym/contrib/loader/roland.py
@@ -188,16 +188,13 @@ def construct_additional_features(df: pd.DataFrame) -> pd.DataFrame:
     return df
 
 
-def load_bsi_dataset(dataset_dir: str, is_hetero: bool = False,
-                     type_info_loc: str = 'append'
-                     ) -> Graph:
+def load_bsi_dataset(dataset_dir: str, is_hetero: bool = False) -> Graph:
     """
     Loads a single graph object from tsv file.
 
     Args:
         dataset_dir: the path of tsv file to be loaded.
         is_hetero: whether to load heterogeneous graph.
-        type_info_loc: 'append' or 'graph_attribute'.
 
     Returns:
         graph: a (homogenous) deepsnap graph object.
@@ -306,16 +303,8 @@ def load_bsi_dataset(dataset_dir: str, is_hetero: bool = False,
             df_trans['EdgeType'].values.reshape(-1, 1))
         edge_type_int = torch.FloatTensor(edge_type_int)
 
-        if type_info_loc == 'append':
-            graph.edge_feature = torch.cat((graph.edge_feature, edge_type_int),
-                                           dim=1)
-            graph.node_feature = torch.cat((graph.node_feature, node_type_int),
-                                           dim=1)
-        elif type_info_loc == 'graph_attribute':
-            graph.node_type = node_type_int.reshape(-1, )
-            graph.edge_type = edge_type_int.reshape(-1, )
-        else:
-            raise ValueError(f'Unsupported type info loc: {type_info_loc}')
+        graph.node_type = node_type_int.reshape(-1,)
+        graph.edge_type = edge_type_int.reshape(-1,)
 
         # add a list of unique types for reference.
         graph.list_n_type = node_type_int.unique().long()
@@ -514,8 +503,7 @@ def load_roland_dataset(format: str, name: str, dataset_dir: str
         elif name in ['bsi_svt_2008.tsv']:
             # NOTE: only BSI dataset supports hetero graph.
             g_all = load_bsi_dataset(os.path.join(dataset_dir, name),
-                                     is_hetero=cfg.dataset.is_hetero,
-                                     type_info_loc=cfg.dataset.type_info_loc)
+                                     is_hetero=cfg.dataset.is_hetero)
         elif name in ['bitcoinotc.csv', 'bitcoinalpha.csv']:
             g_all = load_bitcoin_dataset(os.path.join(dataset_dir, name))
         elif name in ['reddit-body.tsv', 'reddit-title.tsv']:
diff --git a/graphgym/contrib/loader/roland_template_hetero.py b/graphgym/contrib/loader/roland_template_hetero.py
index 1573a9ba..77bc3049 100644
--- a/graphgym/contrib/loader/roland_template_hetero.py
+++ b/graphgym/contrib/loader/roland_template_hetero.py
@@ -16,7 +16,7 @@
 from graphgym.register import register_loader
 
 
-def load_single_hetero_dataset(dataset_dir: str, type_info_loc: str) -> Graph:
+def load_single_hetero_dataset(dataset_dir: str) -> Graph:
     # TODO: Load your data from dataset_dir here.
     # Example:
     num_nodes = 500
@@ -46,16 +46,8 @@ def load_single_hetero_dataset(dataset_dir: str, type_info_loc: str) -> Graph:
     num_node_types = 5
     node_type_int = torch.randint(0, num_node_types - 1, (num_nodes,)).float()
 
-    if type_info_loc == 'append':
-        graph.edge_feature = torch.cat((graph.edge_feature, edge_type_int),
-                                       dim=1)
-        graph.node_feature = torch.cat((graph.node_feature, node_type_int),
-                                       dim=1)
-    elif type_info_loc == 'graph_attribute':
-        graph.node_type = node_type_int.reshape(-1, )
-        graph.edge_type = edge_type_int.reshape(-1, )
-    else:
-        raise ValueError(f'Unsupported type info loc: {type_info_loc}')
+    graph.node_type = node_type_int.reshape(-1,)
+    graph.edge_type = edge_type_int.reshape(-1,)
 
     # add a list of unique types for reference.
     graph.list_n_type = node_type_int.unique().long()
@@ -81,9 +73,7 @@ def load_generic_dataset(format: str, name: str, dataset_dir: str
     if format == 'YOUR_HETERO_FORMAT_NAME_HERE':
         assert cfg.dataset.is_hetero
         dataset_dir = os.path.join(dataset_dir, name)
-        g_all = load_single_hetero_dataset(
-            dataset_dir,
-            type_info_loc=cfg.dataset.type_info_loc)
+        g_all = load_single_hetero_dataset(dataset_dir)
         snapshot_list = utils.make_graph_snapshot(
             g_all,
             snapshot_freq=cfg.transaction.snapshot_freq,

From c77ec978adbaa276e5622c768f6a41ef58c58ebc Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Thu, 10 Jun 2021 00:20:36 -0700
Subject: [PATCH 56/66] update loader

---
 graphgym/contrib/loader/roland.py                 | 6 +++---
 graphgym/contrib/loader/roland_template_hetero.py | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/graphgym/contrib/loader/roland.py b/graphgym/contrib/loader/roland.py
index 77e09fe1..ffa8d94d 100644
--- a/graphgym/contrib/loader/roland.py
+++ b/graphgym/contrib/loader/roland.py
@@ -131,7 +131,7 @@ def file2timestamp(file_name: str) -> int:
 # Required for heterogeneous graphs only.
 # Node and edge features used to define node and edge type in hete GNN.
 NODE_TYPE_DEFN: List[str] = ['Country']
-EDGE_TYPE_DEFN: List[str] = ['# System']
+EDGE_TYPE_DEFN: List[str] = ['# System', 'AmountLevel']
 
 
 # Required for graphs with node features only.
@@ -303,8 +303,8 @@ def load_bsi_dataset(dataset_dir: str, is_hetero: bool = False) -> Graph:
             df_trans['EdgeType'].values.reshape(-1, 1))
         edge_type_int = torch.FloatTensor(edge_type_int)
 
-        graph.node_type = node_type_int.reshape(-1,)
-        graph.edge_type = edge_type_int.reshape(-1,)
+        graph.node_type = node_type_int.reshape(-1,).long()
+        graph.edge_type = edge_type_int.reshape(-1,).long()
 
         # add a list of unique types for reference.
         graph.list_n_type = node_type_int.unique().long()
diff --git a/graphgym/contrib/loader/roland_template_hetero.py b/graphgym/contrib/loader/roland_template_hetero.py
index 77bc3049..fe202a33 100644
--- a/graphgym/contrib/loader/roland_template_hetero.py
+++ b/graphgym/contrib/loader/roland_template_hetero.py
@@ -46,8 +46,8 @@ def load_single_hetero_dataset(dataset_dir: str) -> Graph:
     num_node_types = 5
     node_type_int = torch.randint(0, num_node_types - 1, (num_nodes,)).float()
 
-    graph.node_type = node_type_int.reshape(-1,)
-    graph.edge_type = edge_type_int.reshape(-1,)
+    graph.node_type = node_type_int.reshape(-1,).long()
+    graph.edge_type = edge_type_int.reshape(-1,).long()
 
     # add a list of unique types for reference.
     graph.list_n_type = node_type_int.unique().long()

From f0579b4cb109114755c0188d15091f2ba3924444 Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Thu, 10 Jun 2021 00:21:12 -0700
Subject: [PATCH 57/66] remove comments

---
 graphgym/contrib/train/train_utils.py | 88 ++-------------------------
 1 file changed, 4 insertions(+), 84 deletions(-)

diff --git a/graphgym/contrib/train/train_utils.py b/graphgym/contrib/train/train_utils.py
index 6caf7ffd..9117ee76 100644
--- a/graphgym/contrib/train/train_utils.py
+++ b/graphgym/contrib/train/train_utils.py
@@ -189,81 +189,6 @@ def gen_negative_edges(edge_index: torch.LongTensor,
     return neg_edge_index
 
 
-# def compute_src_mrr_and_recall(edge_label_index: torch.LongTensor,
-#                                edge_label: torch.LongTensor,
-#                                pred_score: torch.Tensor,
-#                                recall_k_lst: List[int],
-#                                mrr_top_k: Optional[int] = None
-#                                ) -> (float, Dict[int, float]):
-#     """
-#     Computes source-based MRR and recall at K for each source node in
-#         edge_label_index.
-
-#     Args:
-#         edge_label_index: combination of positive and negative edges.
-#         edge_label: label of edges in edge_label_index.
-#         pred_score: P(E=positive) for each edge in edge_label_index.
-#         recall_k_lst: to report recall at k for all k in this list.
-#         mrr_top_k: calculating MRR for each source node using mean(1/rank) for
-#             k positive edges with the highest pred_score. Set to None to use
-#             all positive edges.
-#     """
-#     assert edge_label_index.shape[1] == len(edge_label) == len(pred_score)
-
-#     src_lst = torch.unique(edge_label_index[0])  # source nodes to consider.
-#     # edge_label_index were constructed by adding negative edges to every
-#     # node in edge_index[0], thus every node in src_lst has at least one
-#     # positive edge in edge_label_index.
-#     # I.e., src_lst == torch.unique(edge_label_index[0][edge_label == 1])
-
-#     node_level_mrr = []  # store MRR for each node.
-#     node_recall_at = dict((k, []) for k in recall_k_lst)
-#     for src in tqdm(src_lst, leave=False, desc='Node level MRR/Recall'):
-#         # get positive/negative edges emitted from src node.
-#         self_mask = (edge_label_index[0] == src)
-#         self_label = edge_label[self_mask]
-#         self_pred_score = pred_score[self_mask]
-
-#         # Alternative implementation.
-#         best = torch.max(self_pred_score[self_label == 1])
-#         rank = torch.sum(self_pred_score[self_label == 0] >= best) + 1
-#         # print(pos_edge_rank[0], true, torch.sum(label == 0))
-#         mrr = float(1 / rank)
-#         node_level_mrr.append(mrr)  # mrr for this node.
-
-#         for k in recall_k_lst:
-#             recall = _calculate_recall_at_k(self_pred_score, self_label, k)
-#             node_recall_at[k].append(recall)
-
-#     # Average over all nodes.
-#     macro_recall = dict((k, np.mean(v)) for (k, v) in node_recall_at.items())
-#     macro_mrr = float(np.mean(node_level_mrr))
-#     return macro_mrr, macro_recall
-
-
-# def _calculate_recall_at_k(pred_score: torch.Tensor,
-#                            label: torch.Tensor,
-#                            k: int) -> int:
-#     """Computes whether the score of the most confident positive edge is
-#         within the highest k scores. I.e., whether the most confident
-#         positive edge beats at least k most confident negative edges.
-
-#     Args:
-#         pred_score: a tensor of scores of predictions.
-#         label: a tensor of labels.
-#         k: get whether successful recall at k.
-
-#     Returns:
-#         an indicator whether there is a successful recall at rank k.
-#     """
-#     neg_score = pred_score[label == 0]
-#     if len(neg_score) == 0:
-#         return 0
-#     best_pos_score = torch.max(pred_score[label == 1])
-#     rank = torch.sum(neg_score >= best_pos_score) + 1
-#     return int(rank <= k)
-
-
 @torch.no_grad()
 def fast_batch_mrr(edge_label_index: torch.Tensor,
                    edge_label: torch.Tensor,
@@ -343,6 +268,7 @@ def fast_batch_mrr(edge_label_index: torch.Tensor,
     mrr = float(torch.mean(1 / rank_by_user))
     return mrr
 
+# TODO: get recall at k back.
 
 # @torch.no_grad()
 # def report_rank_based_eval(eval_batch, model, method: str,
@@ -419,8 +345,8 @@ def get_row_MRR(probs, true_classes):
 
 
 @torch.no_grad()
-def report_baseline_MRR(eval_batch: deepsnap.graph.Graph,
-                        model: torch.nn.Module) -> float:
+def report_MRR_all(eval_batch: deepsnap.graph.Graph,
+                   model: torch.nn.Module) -> float:
     # Get positive edge indices.
     edge_index = eval_batch.edge_label_index[:, eval_batch.edge_label == 1]
     edge_index = edge_index.to('cpu')
@@ -472,12 +398,6 @@ def report_baseline_MRR(eval_batch: deepsnap.graph.Graph,
         true_row = true.take(mask).squeeze()
         row_MRRs.append(get_row_MRR(pred_row, true_row))
 
-    # for i, pred_row in enumerate(pred_matrix):
-    #     #check if there are any existing edges
-    #     # only evaluate senders with existing edge (of course).
-    #     if np.isin(1, true_matrix[i]):
-    #         row_MRRs.append(get_row_MRR(pred_row, true_matrix[i]))
-
     avg_MRR = torch.tensor(row_MRRs).mean()
     return float(avg_MRR)
 
@@ -512,7 +432,7 @@ def compute_MRR(eval_batch: deepsnap.graph.Graph,
     if method == 'all':
         # NOTE: this method requires iterating over all nodes, which is slow.
         assert num_neg_per_node == -1
-        return report_baseline_MRR(eval_batch, model)
+        return report_MRR_all(eval_batch, model)
     else:
         assert num_neg_per_node > 0
         # Sample negative edges for each node.

From 6eabfa9a0625ec261ad11a3344d8964b4706afd6 Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Thu, 10 Jun 2021 00:21:30 -0700
Subject: [PATCH 58/66] Add example config yaml for hetero GNN

---
 run/configs/ROLAND/roland_hetero.yaml | 70 +++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)
 create mode 100644 run/configs/ROLAND/roland_hetero.yaml

diff --git a/run/configs/ROLAND/roland_hetero.yaml b/run/configs/ROLAND/roland_hetero.yaml
new file mode 100644
index 00000000..23bf9feb
--- /dev/null
+++ b/run/configs/ROLAND/roland_hetero.yaml
@@ -0,0 +1,70 @@
+remark: live_update
+out_dir: results
+device: auto
+metric:
+  mrr_method: max
+  mrr_num_negative_edges: 1000
+dataset:
+  format: roland
+  name: bsi_svt_2008.tsv
+  is_hetero: True
+  dir: /home/tianyudu/Data/all_datasets
+  task: link_pred  # edge, node.
+  task_type: classification
+  transductive: True
+  split: [0.8, 0.1, 0.1]
+  augment_feature: []
+  augment_feature_dims: [0]
+  edge_encoder: True
+  edge_encoder_name: roland_general
+  edge_dim: 1
+  node_encoder: False
+  link_pred_all_edges: False
+transaction:
+  keep_ratio: linear
+  snapshot: True
+  snapshot_freq: W
+  check_snapshot: False
+  history: rolling
+  horizon: 1
+  pred_mode: at
+  loss: supervised
+  feature_int_dim: 16
+  feature_edge_int_num: []
+  feature_node_int_num: []
+  feature_amount_dim: 16 # *
+  feature_time_dim: 16 # *
+train:
+  batch_size: 32
+  eval_period: 20
+  ckpt_period: 400
+  mode: live_update
+  internal_validation_tolerance: 5 # *
+meta:
+  is_meta: True
+  alpha: 0.5 # *
+model:
+  type: gnn_recurrent
+  loss_fun: cross_entropy
+  edge_decoding: concat
+gnn:
+  embed_update_method: gru  # *
+  layers_pre_mp: 2 # *
+  layers_mp: 8 # *
+  layers_post_mp: 2 # *
+  dim_inner: 64 # *
+  mlp_update_layers: 2 # *
+  layer_type: residual_edge_conv # *
+  skip_connection: affine # *
+  stage_type: stack
+  batchnorm: True # *
+  act: prelu
+  dropout: 0.0
+  agg: add # *
+  att_heads: 1
+  normalize_adj: False
+  msg_direction: both # *
+optim:
+  optimizer: adam
+  base_lr: 0.01 # *
+  max_epoch: 100
\ No newline at end of file

From 2466f5c87c522dc68b7d4e09bf94cf2e3750a166 Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Mon, 14 Jun 2021 13:52:39 -0700
Subject: [PATCH 59/66] add examples

---
 run/grids/ROLAND/example_grid.txt |  7 +++++++
 run/run_roland_batch.sh           | 21 +++++++++++++++++++++
 2 files changed, 28 insertions(+)
 create mode 100644 run/grids/ROLAND/example_grid.txt
 create mode 100644 run/run_roland_batch.sh

diff --git a/run/grids/ROLAND/example_grid.txt b/run/grids/ROLAND/example_grid.txt
new file mode 100644
index 00000000..d2c3291f
--- /dev/null
+++ b/run/grids/ROLAND/example_grid.txt
@@ -0,0 +1,7 @@
+meta.is_meta is_meta [True]
+meta.alpha alpha [0.2,0.4,0.6,0.8,1.0]
+gnn.skip_connection skip ['affine','identity','none']
+gnn.embed_update_method update ['gru']
+gnn.layers_mp mp [2,3]
+gnn.batchnorm bn [True]
+optim.base_lr lr [0.003,0.01,0.03]
\ No newline at end of file
diff --git a/run/run_roland_batch.sh b/run/run_roland_batch.sh
new file mode 100644
index 00000000..398fa830
--- /dev/null
+++ b/run/run_roland_batch.sh
@@ -0,0 +1,21 @@
+#!/usr/bin/env bash
+
+CONFIG=roland_gru_ucimsg
+GRID=example_grid
+REPEAT=3
+MAX_JOBS=10
+SLEEP=1
+
+python configs_gen.py --config configs/ROLAND/${CONFIG}.yaml \
+  --grid grids/ROLAND/${GRID}.txt \
+  --out_dir configs
+# run batch of configs
+# Args: config_dir, num of repeats, max jobs running, sleep time
+bash parallel.sh configs/${CONFIG}_grid_${GRID} $REPEAT $MAX_JOBS $SLEEP
+# rerun missed / stopped experiments
+bash parallel.sh configs/${CONFIG}_grid_${GRID} $REPEAT $MAX_JOBS $SLEEP
+# rerun missed / stopped experiments
+bash parallel.sh configs/${CONFIG}_grid_${GRID} $REPEAT $MAX_JOBS $SLEEP
+
+# aggregate results for the batch
+python agg_batch.py --dir results/${CONFIG}_grid_${GRID}

From f89fbbfd278161da974e12f6150401f065673846 Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Mon, 14 Jun 2021 14:08:46 -0700
Subject: [PATCH 60/66] update markdown

---
 ROLAND_README.md | 60 ++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 51 insertions(+), 9 deletions(-)

diff --git a/ROLAND_README.md b/ROLAND_README.md
index 7c688ab4..4b7390e5 100644
--- a/ROLAND_README.md
+++ b/ROLAND_README.md
@@ -1,13 +1,17 @@
-# Use case: ROLAND: Graph Neural Networks for Dynamic Graphs
-Code associated with the ROLAND project.
+# ROLAND: Graph Neural Networks for Dynamic Graphs
+This repository contains code associated with the ROLAND project and more.
+You can firstly walk through the *how-to* sections to run experiments on existing
+public datasets.
+After understanding how to run and analyze experiments, you can read through the *development topics* to run our 
 
 
 ## TODO: add figures to illustrate the ROLAND framework.
 
-## Datasets
+## How to Download Datasets
 Most of datasets are used in our paper can be found at `https://snap.stanford.edu/data/index.html`.
 
 ```bash
+# Or Use your own dataset directory.
 mkdir ./all_datasets/
 cd ./all_datasets
 wget 'https://snap.stanford.edu/data/soc-sign-bitcoinotc.csv.gz'
@@ -24,16 +28,23 @@ gunzip soc-sign-bitcoinalpha.csv.gz
 gunzip soc-sign-bitcoinotc.csv.gz
 tar xf ./as-733.tar.gz
 
-# Rename files.
+# Rename files, this step is required by our loader.
+# You can leave the web-redditEmbeddings-subreddits.csv file unchanged.
 mv ./soc-sign-bitcoinotc.csv ./bitcoinotc.csv
 mv ./soc-sign-bitcoinalpha.csv ./bitcoinalpha.csv
 
 mv ./soc-redditHyperlinks-body.tsv ./reddit-body.tsv
 mv ./soc-redditHyperlinks-title.tsv ./reddit-title.tsv
 ```
-## Examples of ROLAND Use Cases
+You should expect 740 files, including the zipped `as-733.tar.gz`, by checking `ls | wc -l`.
+The total disk space required is approximately 950MiB.
+## How to Run Single Experiments from Our Paper
+**WARNING**: for each `yaml` file in `./run/configs/ROLAND`, you need to update the `dataset.dir` field to the correct path of datasets downloaded above.
+
 The ROLAND project focuses on link-predictions for homogenous dynamic graphs.
-To run link-prediction task on `CollegeMsg.txt` dataset:
+Here we demonstrate example runs using 
+
+To run link-prediction task on `CollegeMsg.txt` dataset with default settings:
 ```bash
 cd ./run
 python3 main_dynamic.py --cfg configs/ROLAND/roland_gru_ucimsg.yaml --repeat 1
@@ -50,15 +61,46 @@ python3 main_dynamic.py --cfg configs/ROLAND/roland_gru_reddittitle.yaml --repea
 
 python3 main_dynamic.py --cfg configs/ROLAND/roland_gru_redditbody.yaml --repeat 1
 ```
+The `--repeat` argument controls for number of random seeds used for each experiment. For example, setting `--repeat 3` runs each single experiments for three times with three different random seeds.
 
 To explore training result:
 ```bash
 cd ./run
 tensorboard --logdir=./runs_live_update --port=6006
 ```
+**WARNING** The x-axis of plots in tensorboard is **not** epochs, they are snapshot IDs (e.g., the $i^{th}$ day or the $i^{th}$ week) instead.
 
 ## Examples on Heterogenous Graph Snapshots
-`Under development`
+```bash
+Under development.
+```
+
+## How to Run Grid Search / Batch Experiments
+To run grid search / batch experiments, one needs a `main.py` file, a `base_config.yaml`, and a `grid.txt` file. The main and config files are the same as in the single experiment setup above.
+If one wants to do link-prediction on `CollegeMsg.txt` dataset with configurations from  `configs/ROLAND/roland_gru_ucimsg.yaml`, in addition, she wants to try out (1) *different numbers of GNN message passing layers* and (2) *different learning rates*.
+In this case, one can use the following grid file:
+```text
+# grid.txt, lines starting with # are comments.
+gnn.layers_mp mp [2,3,4,5]
+optim.base_lr lr [0.003,0.01,0.03]
+```
+**WARNING**: the format of each line is crucial: `NAME_IN_YAML<space>SHORT_ALIAS<space>LIST_OF_VALUES`, and there should **not** be any space in the list of values.
+
+The `grid.txt` above will generate $4\times 3=12$ different configurations by modifying `gnn.layers_mp` and `gnn.layers_mp` to the respective levels in base config file `roland_gru_ucimsg.yaml`.
+
+Please see `./run/grids/ROLAND/example_grid.txt` for a complete example of grid search text file.
+
+To run the experiment using `example_grid.txt`:
+```bash
+bash ./run_roland_batch.sh
+```
+## How to Export Tensorboard Results to CSV
+We provide a simple script to aggregate results from a batch of tensorboard files, please feel free to look into `tabulate_events.py` and modify it.
+```bash
+# Usage: python3 ./tabulate_events.py <tensorboard_logdir> <output_file_name>
+python3 ./tabulate_events.py ./live_update ./out.csv
+```
 
-## How to Load Your Own Dataset
-Please refer to `./graphgym/contrib/loader/roland_template.py` and `./graphgym/contrib/loader/roland_template_hetero.py` for examples of building loaders.
+## Development Topic: Use Your Own Dataset
+We provided two examples of constructing your own datasets, please refer to
+(1) `./graphgym/contrib/loader/roland_template.py` and (2) `./graphgym/contrib/loader/roland_template_hetero.py` for examples of building loaders.

From f97532188c2a1b71f170ac9aa464fbc5c6de7168 Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Mon, 14 Jun 2021 14:08:56 -0700
Subject: [PATCH 61/66] add file

---
 run/tabulate_events.py | 116 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 116 insertions(+)
 create mode 100644 run/tabulate_events.py

diff --git a/run/tabulate_events.py b/run/tabulate_events.py
new file mode 100644
index 00000000..42e640cb
--- /dev/null
+++ b/run/tabulate_events.py
@@ -0,0 +1,116 @@
+"""
+A simple utility that generates performance report for different model on
+different datasets.
+
+This script works for live-update scheme only, use graphgym's native analyze
+tools for rolling/fixed-split scheme.
+"""
+import os
+import sys
+from typing import List
+
+import numpy as np
+import pandas as pd
+import yaml
+from tensorboard.backend.event_processing.event_accumulator import \
+    EventAccumulator
+from tqdm import tqdm
+
+
+def squeeze_dict(old_dict: dict) -> dict:
+    """Squeezes nested dictionary keys.
+        Example: old_dict['key1'] = {'key2': 'hello'}.
+        will generate new_dict['key1.key2'] = 'hello'.
+    """
+    new_dict = dict()
+    for k1 in old_dict.keys():
+        if isinstance(old_dict[k1], dict):
+            for k2 in old_dict[k1].keys():
+                new_key = k1 + '.' + k2
+                new_dict[new_key] = old_dict[k1][k2]
+        else:
+            new_dict[k1] = old_dict[k1]
+    return new_dict
+
+
+def tabulate_events(logdir: str, variables: List[str]) -> pd.DataFrame:
+    """
+    Generates a pandas dataframe which contains experiment (runs) as its rows,
+    the returned dataframe contains columns:
+        (1) File name/path of that run.
+        (2) Fields required in `variables' from corresponding config.yaml.
+        (3) Test and validation set performance (MRR and Recall at k).
+    """
+    all_runs = list()
+    count = 0  # count number of experiment runs processed.
+
+    for run_dir in tqdm(os.listdir(logdir)):
+        if run_dir.startswith('.'):
+            # Ignore hidden files.
+            continue
+
+        if not os.path.isdir(os.path.join(logdir, run_dir)):
+            # Ignore other things such as generated tables.
+            print(run_dir)
+            continue
+
+        count += 1
+
+        config_dir = os.path.join(logdir, run_dir, 'config.yaml')
+        with open(config_dir) as file:
+            config = yaml.full_load(file)
+        config = squeeze_dict(config)
+
+        current_run = {'run': run_dir}
+        for var in variables:
+            # record required variables in config.yaml.
+            current_run[var] = config[var]
+
+        # for metric in ['test_mrr', 'test_rck1', 'test_rck3', 'test_rck10',
+        #                'test_loss',
+        #                'val_mrr', 'val_rck1', 'val_rck3', 'val_rck10',
+        #                'val_loss']:
+        for metric in ['test_mrr']:
+            event_path = os.path.join(logdir, run_dir, metric)
+            # print(f'Processing event file {event_path}')
+
+            ea = EventAccumulator(event_path).Reload()
+
+            tag_values = []
+            steps = []
+
+            x = 'test' if metric.startswith('test') else 'val'
+            for event in ea.Scalars(x):
+                # Each (value, step) corresponds to a (value, snapshot).
+                tag_values.append(event.value)
+                steps.append(event.step)
+
+            current_run['average_' + metric] = np.mean(tag_values)
+        # current_run: one row in the aggregated dataset.
+        all_runs.append(current_run)
+    print(f'exported {count} experiments.')
+    return pd.DataFrame(all_runs)
+
+
+if __name__ == '__main__':
+    # 1. directory of baseline experiment set.
+    # 2. directory of fine-tuning experiment, our model + all datasets.
+    # 3. directory of output tables and files.
+    path, out_dir = sys.argv[1], sys.argv[2]
+    # fields from config.yaml to be included as columns,
+    # doesn't hurt to add more columns.
+    variables = ['dataset.format', 'dataset.name',
+                 'dataset.AS_node_feature',
+                 'gnn.layer_type', 'gnn.batchnorm', 'gnn.layers_mp',
+                 'gnn.layers_post_mp',
+                 'gnn.layers_pre_mp',
+                 'gnn.skip_connection', 'gnn.embed_update_method',
+                 'optim.base_lr',
+                 'transaction.feature_int_dim',
+                 'gnn.agg', 'train.mode',
+                 'gnn.msg_direction',
+                 'train.internal_validation_tolerance', 'gnn.dim_inner',
+                 'meta.is_meta', 'meta.method', 'meta.alpha',
+                 'transaction.snapshot_freq', 'gnn.embed_update_method']
+    df = tabulate_events(path, variables)
+    df.to_csv(out_dir)

From 0671c0bbd6ad2df3a8aabeef1400ec5764cf65d0 Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Tue, 15 Jun 2021 13:34:25 -0700
Subject: [PATCH 62/66] clean up comments

---
 graphgym/models/update.py | 80 ---------------------------------------
 1 file changed, 80 deletions(-)

diff --git a/graphgym/models/update.py b/graphgym/models/update.py
index df99d3aa..0d3a6520 100644
--- a/graphgym/models/update.py
+++ b/graphgym/models/update.py
@@ -98,86 +98,6 @@ def forward(self, batch):
         return batch
 
 
-# class MaskedGRUUpdater(nn.Module):
-#     """
-#     Node embedding update block using standard GRU.
-
-#     h[l,t] = GRU(h[l,t-1], h[l-1,t])
-#     """
-#     def __init__(self, dim_in: int, dim_out: int, layer_id: int):
-#         # dim_in (dim of X): dimension of input node_feature.
-#         # dim_out (dim of H): dimension of previous and current hidden states.
-#         # forward(X, H) --> H.
-#         super(MaskedGRUUpdater, self).__init__()
-#         self.layer_id = layer_id
-#         self.GRU_Z = nn.Sequential(
-#             nn.Linear(dim_in + dim_out, dim_out, bias=True),
-#             nn.Sigmoid())
-#         # reset gate.
-#         self.GRU_R = nn.Sequential(
-#             nn.Linear(dim_in + dim_out, dim_out, bias=True),
-#             nn.Sigmoid())
-#         # new embedding gate.
-#         self.GRU_H_Tilde = nn.Sequential(
-#             nn.Linear(dim_in + dim_out, dim_out, bias=True),
-#             nn.Tanh())
-
-#     def forward(self, batch):
-#         H_prev = batch.node_states[self.layer_id]
-#         X = batch.node_feature
-#         Z = self.GRU_Z(torch.cat([X, H_prev], dim=1))
-#         R = self.GRU_R(torch.cat([X, H_prev], dim=1))
-#         H_tilde = self.GRU_H_Tilde(torch.cat([X, R * H_prev], dim=1))
-#         H_gru = Z * H_prev + (1 - Z) * H_tilde
-
-#         # Update for active nodes only, use output from GRU.
-#         keep_mask = (batch.node_degree_new == 0)
-#         H_out = H_gru
-#         # Reset inactive nodes' embedding.
-#         H_out[keep_mask, :] = H_prev[keep_mask, :]
-
-#         batch.node_states[self.layer_id] = H_out
-#         return batch
-
-
-# class MovingAverageGRUUpdater(nn.Module):
-#     """
-#     Node embedding update block using standard GRU.
-
-#     h[l,t] = GRU(h[l,t-1], h[l-1,t])
-#     """
-#     def __init__(self, dim_in: int, dim_out: int, layer_id: int):
-#         # dim_in (dim of X): dimension of input node_feature.
-#         # dim_out (dim of H): dimension of previous and current hidden states.
-#         # forward(X, H) --> H.
-#         super(GRUUpdater, self).__init__()
-#         self.layer_id = layer_id
-#         self.GRU_Z = nn.Sequential(
-#             nn.Linear(dim_in + dim_out, dim_out, bias=True),
-#             nn.Sigmoid())
-#         # reset gate.
-#         self.GRU_R = nn.Sequential(
-#             nn.Linear(dim_in + dim_out, dim_out, bias=True),
-#             nn.Sigmoid())
-#         # new embedding gate.
-#         self.GRU_H_Tilde = nn.Sequential(
-#             nn.Linear(dim_in + dim_out, dim_out, bias=True),
-#             nn.Tanh())
-
-#     def forward(self, batch):
-#         H_prev = batch.node_states[self.layer_id]
-#         X = batch.node_feature
-#         Z = self.GRU_Z(torch.cat([X, H_prev], dim=1))
-#         R = self.GRU_R(torch.cat([X, H_prev], dim=1))
-#         H_tilde = self.GRU_H_Tilde(torch.cat([X, R * H_prev], dim=1))
-#         H_gru = Z * H_prev + (1 - Z) * H_tilde
-        
-#         H_out = H_prev * batch.keep_ratio + H_gru * (1 - batch.keep_ratio)
-        
-#         batch.node_states[self.layer_id] = H_out
-#         return batch
-
-
 update_dict = {
     'moving_average': MovingAverageUpdater,
     'mlp': MLPUpdater,

From 71e0086b72b288bf0fdbe21b5f27bed9db644f29 Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Tue, 15 Jun 2021 13:34:57 -0700
Subject: [PATCH 63/66] cleanup comments

---
 graphgym/contrib/train/train_utils.py | 60 ---------------------------
 1 file changed, 60 deletions(-)

diff --git a/graphgym/contrib/train/train_utils.py b/graphgym/contrib/train/train_utils.py
index 9117ee76..5784147e 100644
--- a/graphgym/contrib/train/train_utils.py
+++ b/graphgym/contrib/train/train_utils.py
@@ -268,66 +268,6 @@ def fast_batch_mrr(edge_label_index: torch.Tensor,
     mrr = float(torch.mean(1 / rank_by_user))
     return mrr
 
-# TODO: get recall at k back.
-
-# @torch.no_grad()
-# def report_rank_based_eval(eval_batch, model, method: str,
-#                            num_neg_per_node: int=1000):
-#     if num_neg_per_node == -1:
-#         # Do not report rank-based metrics, used in debug mode.
-#         return 0, 0, 0, 0
-#     # Get positive edge indices.
-#     edge_index = eval_batch.edge_label_index[:, eval_batch.edge_label == 1]
-#     edge_index = edge_index.to('cpu')
-
-#     neg_edge_index = gen_negative_edges(edge_index, num_neg_per_node,
-#                                         num_nodes=eval_batch.num_nodes)
-
-#     new_edge_label_index = torch.cat((edge_index, neg_edge_index),
-#                                      dim=1).long()
-#     new_edge_label = torch.cat((torch.ones(edge_index.shape[1]),
-#                                 torch.zeros(neg_edge_index.shape[1])
-#                                 ), dim=0).long()
-
-#     # Construct evaluation samples.
-#     eval_batch.edge_label_index = new_edge_label_index
-#     eval_batch.edge_label = new_edge_label
-
-#     eval_batch.to(torch.device(cfg.device))
-#     # move state to gpu
-#     for layer in range(len(eval_batch.node_states)):
-#         if torch.is_tensor(eval_batch.node_states[layer]):
-#             eval_batch.node_states[layer] = eval_batch.node_states[layer].to(
-#                 torch.device(cfg.device))
-#     pred, true = model(eval_batch)
-#     loss, pred_score = compute_loss(pred, true)
-
-#     mrr, recall_at = fast_batch_mrr_and_recall(eval_batch.edge_label_index,
-#                                                eval_batch.edge_label,
-#                                                pred_score,
-#                                                num_neg_per_node,
-#                                                eval_batch.num_nodes,
-#                                                method)
-
-#     # return mrr, 0, 0, 0
-#     #
-#     # mrr_old, recall_at_old = compute_src_mrr_and_recall(
-#     #     eval_batch.edge_label_index,
-#     #     eval_batch.edge_label,
-#     #     pred_score,
-#     #     recall_k_lst=[1, 3, 10],
-#     #     mrr_top_k=1)
-#     #
-#     # print(f'Old MRR: {mrr_old: 0.6f}, new MRR {mrr: 0.6f}')
-#     # print(
-#     #     f'Old Recall@1: {recall_at_old[1]: 0.6f}, new Recall@1 {recall_at[1]: 0.6f}')
-#     # print(
-#     #     f'Old Recall@3: {recall_at_old[3]: 0.6f}, new Recall@3 {recall_at[3]: 0.6f}')
-#     # print(
-#     #     f'Old Recall@10: {recall_at_old[10]: 0.6f}, new Recall@10 {recall_at[10]: 0.6f}')
-
-#     return mrr, recall_at[1], recall_at[3], recall_at[10]
-
 
 def get_row_MRR(probs, true_classes):
     existing_mask = true_classes == 1

From bd0527a0b7a7ee9b72ff74ad72e075c6ff5be05e Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Tue, 15 Jun 2021 13:35:56 -0700
Subject: [PATCH 64/66] remove loaders

---
 graphgym/contrib/loader/roland_as.py     | 166 ---------------------
 graphgym/contrib/loader/roland_btc.py    | 182 -----------------------
 graphgym/contrib/loader/roland_reddit.py | 174 ----------------------
 graphgym/contrib/loader/roland_ucimsg.py | 110 --------------
 4 files changed, 632 deletions(-)
 delete mode 100644 graphgym/contrib/loader/roland_as.py
 delete mode 100644 graphgym/contrib/loader/roland_btc.py
 delete mode 100644 graphgym/contrib/loader/roland_reddit.py
 delete mode 100644 graphgym/contrib/loader/roland_ucimsg.py

diff --git a/graphgym/contrib/loader/roland_as.py b/graphgym/contrib/loader/roland_as.py
deleted file mode 100644
index bcf3b7a1..00000000
--- a/graphgym/contrib/loader/roland_as.py
+++ /dev/null
@@ -1,166 +0,0 @@
-"""
-Loader for the Autonomous systems AS-733 dataset.
-"""
-import os
-from datetime import datetime
-from typing import List
-
-import numpy as np
-import pandas as pd
-import torch
-from deepsnap.graph import Graph
-from graphgym.config import cfg
-from graphgym.register import register_loader
-from sklearn.preprocessing import OrdinalEncoder
-from tqdm import tqdm
-
-
-def make_graph_snapshot(g_all: Graph, snapshot_freq: str) -> List[Graph]:
-    t = g_all.edge_time.numpy().astype(np.int64)
-    snapshot_freq = snapshot_freq.upper()
-
-    period_split = pd.DataFrame(
-        {'Timestamp': t,
-         'TransactionTime': pd.to_datetime(t, unit='s')},
-        index=range(len(g_all.edge_time)))
-
-    freq_map = {'D': '%j',  # day of year.
-                'W': '%W',  # week of year.
-                'M': '%m'  # month of year.
-                }
-
-    period_split['Year'] = period_split['TransactionTime'].dt.strftime(
-        '%Y').astype(int)
-
-    period_split['SubYearFlag'] = period_split['TransactionTime'].dt.strftime(
-        freq_map[snapshot_freq]).astype(int)
-
-    period2id = period_split.groupby(['Year', 'SubYearFlag']).indices
-
-    periods = sorted(list(period2id.keys()))
-    snapshot_list = list()
-
-    for p in periods:
-        # unique IDs of edges in this period.
-        period_members = period2id[p]
-        assert np.all(period_members == np.unique(period_members))
-
-        g_incr = Graph(
-            node_feature=g_all.node_feature,
-            edge_feature=g_all.edge_feature[period_members, :],
-            edge_index=g_all.edge_index[:, period_members],
-            edge_time=g_all.edge_time[period_members],
-            directed=g_all.directed
-        )
-        snapshot_list.append(g_incr)
-
-    snapshot_list.sort(key=lambda x: torch.min(x.edge_time))
-
-    return snapshot_list
-
-
-def file2timestamp(file_name):
-    t = file_name.strip('.txt').strip('as')
-    ts = int(datetime.strptime(t, '%Y%m%d').timestamp())
-    return ts
-
-
-def load_generic_dataset(format, name, dataset_dir):
-    if format == 'as':
-        all_files = [x for x in sorted(os.listdir(dataset_dir))
-                     if (x.startswith('as') and x.endswith('.txt'))]
-        assert len(all_files) == 733
-        assert all(x.endswith('.txt') for x in all_files)
-
-        edge_index_lst, edge_time_lst = list(), list()
-        all_files = sorted(all_files)
-        # if cfg.train.mode in ['baseline', 'baseline_v2', 'live_update_fixed_split']:
-        #     # The baseline setting in EvolveGCN paper only uses 100 snapshots.
-        #     all_files = all_files[:100]
-        for graph_file in tqdm(all_files):
-            today = file2timestamp(graph_file)
-            graph_file = os.path.join(dataset_dir, graph_file)
-
-            src, dst = list(), list()
-            with open(graph_file, 'r') as f:
-                for line in f.readlines():
-                    if line.startswith('#'):
-                        continue
-                    line = line.strip('\n')
-                    v1, v2 = line.split('\t')
-                    src.append(int(v1))
-                    dst.append(int(v2))
-
-            edge_index = np.stack((src, dst))
-            edge_index_lst.append(edge_index)
-
-            edge_time = np.ones(edge_index.shape[1]) * today
-            edge_time_lst.append(edge_time)
-
-        edge_index_raw = np.concatenate(edge_index_lst, axis=1).astype(int)
-
-        num_nodes = len(np.unique(edge_index_raw))
-
-        # encode node indices to consecutive integers.
-        node_indices = np.sort(np.unique(edge_index_raw))
-        enc = OrdinalEncoder(categories=[node_indices, node_indices])
-        edge_index = enc.fit_transform(edge_index_raw.transpose()).transpose()
-        edge_index = torch.Tensor(edge_index).long()
-        edge_time = torch.Tensor(np.concatenate(edge_time_lst))
-
-        # Use scaled datetime as edge_feature.
-        scale = edge_time.max() - edge_time.min()
-        base = edge_time.min()
-        scaled_edge_time = 2 * (edge_time.clone() - base) / scale
-        
-        assert cfg.dataset.AS_node_feature in ['one', 'one_hot_id',
-                                               'one_hot_degree_global']
-
-        if cfg.dataset.AS_node_feature == 'one':
-            node_feature = torch.ones(num_nodes, 1)
-        elif cfg.dataset.AS_node_feature == 'one_hot_id':
-            # One hot encoding the node ID.
-            node_feature = torch.Tensor(np.eye(num_nodes))
-        elif cfg.dataset.AS_node_feature == 'one_hot_degree_global':
-            # undirected graph, use only out degree.
-            _, node_degree = torch.unique(edge_index[0], sorted=True,
-                                          return_counts=True)
-            node_feature = np.zeros((num_nodes, node_degree.max() + 1))
-            node_feature[np.arange(num_nodes), node_degree] = 1
-            # 1 ~ 63748 degrees, but only 710 possible levels, exclude all zero
-            # columns.
-            non_zero_cols = (node_feature.sum(axis=0) > 0)
-            node_feature = node_feature[:, non_zero_cols]
-            node_feature = torch.Tensor(node_feature)
-        else:
-            raise NotImplementedError
-
-        g_all = Graph(
-            node_feature=node_feature,
-            edge_feature=scaled_edge_time.reshape(-1, 1),
-            edge_index=edge_index,
-            edge_time=edge_time,
-            directed=True
-        )
-
-        snapshot_list = make_graph_snapshot(g_all,
-                                            cfg.transaction.snapshot_freq)
-
-        for g_snapshot in snapshot_list:
-            g_snapshot.node_states = [0 for _ in range(cfg.gnn.layers_mp)]
-            g_snapshot.node_cells = [0 for _ in range(cfg.gnn.layers_mp)]
-            g_snapshot.node_degree_existing = torch.zeros(num_nodes)
-
-        if cfg.dataset.split_method == 'chronological_temporal':
-            return snapshot_list
-        else:
-            # The default split (80-10-10) requires at least 10 edges each
-            # snapshot.
-            filtered_graphs = list()
-            for g in tqdm(snapshot_list):
-                if g.num_edges >= 10:
-                    filtered_graphs.append(g)
-            return filtered_graphs
-
-
-register_loader('roland_as', load_generic_dataset)
diff --git a/graphgym/contrib/loader/roland_btc.py b/graphgym/contrib/loader/roland_btc.py
deleted file mode 100644
index 58a9884d..00000000
--- a/graphgym/contrib/loader/roland_btc.py
+++ /dev/null
@@ -1,182 +0,0 @@
-"""
-Data loader for bitcoin datasets.
-Mar. 27, 2021
-"""
-import os
-from typing import List, Union
-
-import deepsnap
-import graphgym.contrib.loader.dynamic_graph_utils as utils
-import numpy as np
-import pandas as pd
-import torch
-from deepsnap.graph import Graph
-from graphgym.config import cfg
-from graphgym.register import register_loader
-from sklearn.preprocessing import MinMaxScaler, OrdinalEncoder
-
-
-def load_single_dataset(dataset_dir: str) -> Graph:
-    df_trans = pd.read_csv(dataset_dir, sep=',', header=None, index_col=None)
-    df_trans.columns = ['SOURCE', 'TARGET', 'RATING', 'TIME']
-    # NOTE: 'SOURCE' and 'TARGET' are not consecutive.
-    num_nodes = len(
-        pd.unique(df_trans[['SOURCE', 'TARGET']].to_numpy().ravel()))
-
-    # bitcoin OTC contains decimal numbers, round them.
-    df_trans['TIME'] = df_trans['TIME'].astype(np.int).astype(np.float)
-    assert not np.any(pd.isna(df_trans).values)
-
-    time_scaler = MinMaxScaler((0, 2))
-    df_trans['TimestampScaled'] = time_scaler.fit_transform(
-        df_trans['TIME'].values.reshape(-1, 1))
-
-    edge_feature = torch.Tensor(
-        df_trans[['RATING', 'TimestampScaled']].values)  # (E, edge_dim)
-    # SOURCE and TARGET IDs are already encoded in the csv file.
-    # edge_index = torch.Tensor(
-    #     df_trans[['SOURCE', 'TARGET']].values.transpose()).long()  # (2, E)
-
-    node_indices = np.sort(
-        pd.unique(df_trans[['SOURCE', 'TARGET']].to_numpy().ravel()))
-    enc = OrdinalEncoder(categories=[node_indices, node_indices])
-    raw_edges = df_trans[['SOURCE', 'TARGET']].values
-    edge_index = enc.fit_transform(raw_edges).transpose()
-    edge_index = torch.LongTensor(edge_index)
-
-    # num_nodes = torch.max(edge_index) + 1
-    # Use dummy node features.
-    node_feature = torch.ones(num_nodes, 1).float()
-
-    edge_time = torch.FloatTensor(df_trans['TIME'].values)
-
-    # TODO: add option here.
-    # if cfg.train.mode in ['baseline', 'baseline_v2', 'live_update_fixed_split']:
-    #     edge_feature = torch.cat((edge_feature, edge_feature.clone()), dim=0)
-    #     reversed_idx = torch.stack([edge_index[1], edge_index[0]]).clone()
-    #     edge_index = torch.cat((edge_index, reversed_idx), dim=1)
-    #     edge_time = torch.cat((edge_time, edge_time.clone()))
-
-    graph = Graph(
-        node_feature=node_feature,
-        edge_feature=edge_feature,
-        edge_index=edge_index,
-        edge_time=edge_time,
-        directed=True
-    )
-    return graph
-
-
-# def make_graph_snapshot(g_all: Graph, snapshot_freq: str) -> List[Graph]:
-#     t = g_all.edge_time.numpy().astype(np.int64)
-#     snapshot_freq = snapshot_freq.upper()
-
-#     period_split = pd.DataFrame(
-#         {'Timestamp': t,
-#          'TransactionTime': pd.to_datetime(t, unit='s')},
-#         index=range(len(g_all.edge_time)))
-
-#     freq_map = {'D': '%j',  # day of year.
-#                 'W': '%W',  # week of year.
-#                 'M': '%m'  # month of year.
-#                 }
-
-#     period_split['Year'] = period_split['TransactionTime'].dt.strftime(
-#         '%Y').astype(int)
-
-#     period_split['SubYearFlag'] = period_split['TransactionTime'].dt.strftime(
-#         freq_map[snapshot_freq]).astype(int)
-
-#     period2id = period_split.groupby(['Year', 'SubYearFlag']).indices
-
-#     periods = sorted(list(period2id.keys()))
-#     snapshot_list = list()
-
-#     for p in periods:
-#         # unique IDs of edges in this period.
-#         period_members = period2id[p]
-#         assert np.all(period_members == np.unique(period_members))
-
-#         g_incr = Graph(
-#             node_feature=g_all.node_feature,
-#             edge_feature=g_all.edge_feature[period_members, :],
-#             edge_index=g_all.edge_index[:, period_members],
-#             edge_time=g_all.edge_time[period_members],
-#             directed=g_all.directed
-#         )
-#         snapshot_list.append(g_incr)
-
-#     snapshot_list.sort(key=lambda x: torch.min(x.edge_time))
-
-#     return snapshot_list
-
-
-# def split_by_seconds(g_all, freq_sec: int):
-#     # Split the entire graph into snapshots.
-#     split_criterion = g_all.edge_time // freq_sec
-#     groups = torch.sort(torch.unique(split_criterion))[0]
-#     snapshot_list = list()
-#     for t in groups:
-#         period_members = (split_criterion == t)
-#         g_incr = Graph(
-#             node_feature=g_all.node_feature,
-#             edge_feature=g_all.edge_feature[period_members, :],
-#             edge_index=g_all.edge_index[:, period_members],
-#             edge_time=g_all.edge_time[period_members],
-#             directed=g_all.directed
-#         )
-#         snapshot_list.append(g_incr)
-#     return snapshot_list
-
-# TODO: merge these two method.
-def load_snapshots(dataset_dir: str,
-                   snapshot: bool = True,
-                   snapshot_freq: str = None
-                   ) -> Union[deepsnap.graph.Graph,
-                              List[deepsnap.graph.Graph]]:
-    g_all = load_single_dataset(dataset_dir)
-    if not snapshot:
-        return g_all
-
-    if snapshot_freq.upper() not in ['D', 'W', 'M']:
-        # format: '1200000s'
-        # assume split by seconds (timestamp) as in EvolveGCN paper.
-        freq = int(snapshot_freq.strip('s'))
-        snapshot_list = utils.make_graph_snapshot_by_seconds(g_all, freq)
-    else:
-        snapshot_list = utils.make_graph_snapshot(g_all, snapshot_freq)
-    num_nodes = g_all.edge_index.max() + 1
-
-    for g_snapshot in snapshot_list:
-        g_snapshot.node_states = [0 for _ in range(cfg.gnn.layers_mp)]
-        g_snapshot.node_cells = [0 for _ in range(cfg.gnn.layers_mp)]
-        g_snapshot.node_degree_existing = torch.zeros(num_nodes)
-
-    # check snapshots ordering.
-    prev_end = -1
-    for g in snapshot_list:
-        start, end = torch.min(g.edge_time), torch.max(g.edge_time)
-        assert prev_end < start <= end
-        prev_end = end
-
-    return snapshot_list
-
-
-def load_btc_dataset(format: str, name: str, dataset_dir: str):
-    if format == 'bitcoin':
-        graphs = load_snapshots(os.path.join(dataset_dir, name),
-                                snapshot=cfg.transaction.snapshot,
-                                snapshot_freq=cfg.transaction.snapshot_freq)
-        if cfg.dataset.split_method == 'chronological_temporal':
-            return graphs
-        else:
-            # The default split (80-10-10) requires at least 10 edges each
-            # snapshot.
-            filtered_graphs = list()
-            for g in graphs:
-                if g.num_edges >= 10:
-                    filtered_graphs.append(g)
-            return filtered_graphs
-
-
-register_loader('roland_btc', load_btc_dataset)
diff --git a/graphgym/contrib/loader/roland_reddit.py b/graphgym/contrib/loader/roland_reddit.py
deleted file mode 100644
index 37d7e66d..00000000
--- a/graphgym/contrib/loader/roland_reddit.py
+++ /dev/null
@@ -1,174 +0,0 @@
-import os
-from typing import List, Union
-
-import dask.dataframe as dd
-import deepsnap
-import graphgym.contrib.loader.dynamic_graph_utils as utils
-import numpy as np
-import pandas as pd
-import torch
-from dask_ml.preprocessing import OrdinalEncoder
-from deepsnap.graph import Graph
-from graphgym.config import cfg
-from graphgym.register import register_loader
-from sklearn.preprocessing import MinMaxScaler
-
-
-def load_single_dataset(dataset_dir: str) -> Graph:
-    df_trans = dd.read_csv(dataset_dir, sep='\t', low_memory=False)
-    df_trans = df_trans.compute()
-    assert not np.any(pd.isna(df_trans).values)
-    df_trans.reset_index(drop=True, inplace=True)  # required for dask.
-
-    # Encode src and dst node IDs.
-    # get unique values of src and dst.
-    unique_subreddits = pd.unique(
-        df_trans[['SOURCE_SUBREDDIT', 'TARGET_SUBREDDIT']].to_numpy().ravel())
-    unique_subreddits = np.sort(unique_subreddits)
-    cate_type = pd.api.types.CategoricalDtype(categories=unique_subreddits,
-                                              ordered=True)
-    df_trans['SOURCE_SUBREDDIT'] = df_trans['SOURCE_SUBREDDIT'].astype(
-        cate_type)
-    df_trans['TARGET_SUBREDDIT'] = df_trans['TARGET_SUBREDDIT'].astype(
-        cate_type)
-    enc = OrdinalEncoder(columns=['SOURCE_SUBREDDIT', 'TARGET_SUBREDDIT'])
-    df_encoded = enc.fit_transform(df_trans)
-    df_encoded.reset_index(drop=True, inplace=True)
-
-    # Add node feature from the embedding dataset.
-    node_embedding_dir = os.path.join(cfg.dataset.dir,
-                                      'web-redditEmbeddings-subreddits.csv')
-
-    # index: subreddit name, values: embedding.
-    df_node = pd.read_csv(node_embedding_dir, header=None, index_col=0)
-
-    # ordinal encoding follows order in unique_subreddits.
-    # df_encoded['SOURCE_SUBREDDIT'] contains encoded integral values.
-    # unique_subreddits[df_encoded['SOURCE_SUBREDDIT']]
-    # tries to reverse encoded_integer --> original subreddit name.
-    # check if recovered sub-reddit name matched the raw data.
-    for col in ['SOURCE_SUBREDDIT', 'TARGET_SUBREDDIT']:
-        assert all(unique_subreddits[df_encoded[col]] == df_trans[col])
-
-    num_nodes = len(cate_type.categories)
-    node_feature = torch.ones(size=(num_nodes, 300))
-    # for nodes without precomputed embedding, use the average value.
-    node_feature = node_feature * np.mean(df_node.values)
-
-    # cate_type.categories[i] is encoded to i, by construction.
-    for i, subreddit in enumerate(cate_type.categories):
-        if subreddit in df_node.index:
-            embedding = df_node.loc[subreddit]
-            node_feature[i, :] = torch.Tensor(embedding.values)
-
-    # Original format: df['TIMESTAMP'][0] = '2013-12-31 16:39:18'
-    # Convert to unix timestamp (integers).
-    df_encoded['TIMESTAMP'] = pd.to_datetime(df_encoded['TIMESTAMP'],
-                                             format='%Y-%m-%d %H:%M:%S')
-    df_encoded['TIMESTAMP'] = (df_encoded['TIMESTAMP'] - pd.Timestamp(
-        '1970-01-01')) // pd.Timedelta('1s')  # now integers.
-
-    # Scale edge time.
-    time_scaler = MinMaxScaler((0, 2))
-    df_encoded['TimestampScaled'] = time_scaler.fit_transform(
-        df_encoded['TIMESTAMP'].values.reshape(-1, 1))
-
-    # Link sentimental representation (86-dimension).
-    # comma-separated string: '3.1,5.1,0.0,...'
-    senti_str_lst = df_encoded['PROPERTIES'].values
-    edge_senti_embedding = [x.split(',') for x in senti_str_lst]
-    edge_senti_embedding = np.array(edge_senti_embedding).astype(np.float32)
-    # (E, 86)
-
-    ef = df_encoded[['TimestampScaled', 'LINK_SENTIMENT']].values
-    edge_feature = np.concatenate([ef, edge_senti_embedding], axis=1)
-    edge_feature = torch.Tensor(edge_feature).float()  # (E, 88)
-
-    edge_index = torch.Tensor(
-        df_encoded[['SOURCE_SUBREDDIT',
-                    'TARGET_SUBREDDIT']].values.transpose()).long()  # (2, E)
-    num_nodes = torch.max(edge_index) + 1
-
-    edge_time = torch.FloatTensor(df_encoded['TIMESTAMP'].values)
-
-    graph = Graph(
-        node_feature=node_feature,
-        edge_feature=edge_feature,
-        edge_index=edge_index,
-        edge_time=edge_time,
-        directed=True
-    )
-
-    return graph
-
-
-# def make_graph_snapshot(g_all: Graph, snapshot_freq: str) -> list:
-#     t = g_all.edge_time.numpy().astype(np.int64)
-#     snapshot_freq = snapshot_freq.upper()
-
-#     period_split = pd.DataFrame(
-#         {'Timestamp': t,
-#          'TransactionTime': pd.to_datetime(t, unit='s')},
-#         index=range(len(g_all.edge_time)))
-
-#     freq_map = {'D': '%j',  # day of year.
-#                 'W': '%W',  # week of year.
-#                 'M': '%m'  # month of year.
-#                 }
-
-#     period_split['Year'] = period_split['TransactionTime'].dt.strftime(
-#         '%Y').astype(int)
-
-#     period_split['SubYearFlag'] = period_split['TransactionTime'].dt.strftime(
-#         freq_map[snapshot_freq]).astype(int)
-
-#     period2id = period_split.groupby(['Year', 'SubYearFlag']).indices
-#     # e.g., dictionary w/ key = (2021, 3) and val = array(edges).
-
-#     periods = sorted(list(period2id.keys()))
-#     snapshot_list = list()
-#     for p in periods:
-#         # unique IDs of edges in this period.
-#         period_members = period2id[p]
-#         assert np.all(period_members == np.unique(period_members))
-
-#         g_incr = Graph(
-#             node_feature=g_all.node_feature,
-#             edge_feature=g_all.edge_feature[period_members, :],
-#             edge_index=g_all.edge_index[:, period_members],
-#             edge_time=g_all.edge_time[period_members],
-#             directed=g_all.directed
-#         )
-#         snapshot_list.append(g_incr)
-#     return snapshot_list
-
-
-def load_generic(dataset_dir: str,
-                 snapshot: bool = True,
-                 snapshot_freq: str = None
-                 ) -> Union[deepsnap.graph.Graph,
-                            List[deepsnap.graph.Graph]]:
-    g_all = load_single_dataset(dataset_dir)
-    if not snapshot:
-        return g_all
-    else:
-        snapshot_list = utils.make_graph_snapshot(g_all, snapshot_freq)
-        num_nodes = g_all.edge_index.max() + 1
-
-        for g_snapshot in snapshot_list:
-            g_snapshot.node_states = [0 for _ in range(cfg.gnn.layers_mp)]
-            g_snapshot.node_cells = [0 for _ in range(cfg.gnn.layers_mp)]
-            g_snapshot.node_degree_existing = torch.zeros(num_nodes)
-
-        return snapshot_list
-
-
-def load_generic_dataset(format, name, dataset_dir):
-    if format == 'reddit_hyperlink':
-        graphs = load_generic(os.path.join(dataset_dir, name),
-                              snapshot=cfg.transaction.snapshot,
-                              snapshot_freq=cfg.transaction.snapshot_freq)
-        return graphs
-
-
-register_loader('roland_reddit_hyperlink', load_generic_dataset)
diff --git a/graphgym/contrib/loader/roland_ucimsg.py b/graphgym/contrib/loader/roland_ucimsg.py
deleted file mode 100644
index 6ac2b9fc..00000000
--- a/graphgym/contrib/loader/roland_ucimsg.py
+++ /dev/null
@@ -1,110 +0,0 @@
-"""
-Loader for the CollegeMsg temporal network.
-
-For more information: https://snap.stanford.edu/data/CollegeMsg.html
-
-Mar. 31, 2021
-"""
-import os
-from typing import List, Union
-
-import deepsnap
-import numpy as np
-import pandas as pd
-import torch
-from deepsnap.graph import Graph
-from sklearn.preprocessing import MinMaxScaler
-
-from graphgym.config import cfg
-import graphgym.contrib.loader.dynamic_graph_utils as utils
-from graphgym.register import register_loader
-
-
-def load_single_dataset(dataset_dir: str) -> Graph:
-    df_trans = pd.read_csv(dataset_dir, sep=' ', header=None)
-    df_trans.columns = ['SRC', 'DST', 'TIMESTAMP']
-    assert not np.any(pd.isna(df_trans).values)
-    df_trans.reset_index(drop=True, inplace=True)
-
-    # Node IDs of this dataset start from 1, re-index to 0-based.
-    df_trans['SRC'] -= 1
-    df_trans['DST'] -= 1
-
-    print('num of edges:', len(df_trans))
-    print('num of nodes:', np.max(df_trans[['SRC', 'DST']].values) + 1)
-
-    time_scaler = MinMaxScaler((0, 2))
-    df_trans['TimestampScaled'] = time_scaler.fit_transform(
-        df_trans['TIMESTAMP'].values.reshape(-1, 1))
-
-    edge_feature = torch.Tensor(
-        df_trans[['TimestampScaled']].values).view(-1, 1)
-    edge_index = torch.Tensor(
-        df_trans[['SRC', 'DST']].values.transpose()).long()  # (2, E)
-    num_nodes = torch.max(edge_index) + 1
-
-    node_feature = torch.ones(num_nodes, 1)
-
-    edge_time = torch.FloatTensor(df_trans['TIMESTAMP'].values)
-
-    graph = Graph(
-        node_feature=node_feature,
-        edge_feature=edge_feature,
-        edge_index=edge_index,
-        edge_time=edge_time,
-        directed=True
-    )
-
-    return graph
-
-
-def load_snapshots(dataset_dir: str,
-                   snapshot: bool = True,
-                   snapshot_freq: str = None
-                   ) -> Union[deepsnap.graph.Graph,
-                              List[deepsnap.graph.Graph]]:
-    g_all = load_single_dataset(dataset_dir)
-    if not snapshot:
-        return g_all
-    if snapshot_freq.upper() not in ['D', 'W', 'M']:
-        # format: '1200000s'
-        assert snapshot_freq.endswith('s')
-        freq = int(snapshot_freq.strip('s'))
-        snapshot_list = utils.make_graph_snapshot_by_seconds(g_all, freq)
-    else:
-        snapshot_list = utils.make_graph_snapshot(g_all, snapshot_freq,
-                                                  is_hetero=False)
-
-    num_nodes = g_all.edge_index.max() + 1
-
-    for g_snapshot in snapshot_list:
-        g_snapshot.node_states = [0 for _ in range(cfg.gnn.layers_mp)]
-        g_snapshot.node_cells = [0 for _ in range(cfg.gnn.layers_mp)]
-        g_snapshot.node_degree_existing = torch.zeros(num_nodes)
-
-    return snapshot_list
-
-
-def load_uci_dataset(format, name, dataset_dir):
-    if format == 'uci_message':
-        graphs = load_snapshots(os.path.join(dataset_dir, name),
-                                snapshot=cfg.transaction.snapshot,
-                                snapshot_freq=cfg.transaction.snapshot_freq)
-        if cfg.dataset.split_method == 'chronological_temporal':
-            # return graphs with enough number of edges.
-            filtered_graphs = list()
-            for g in graphs:
-                if g.num_edges >= 2:
-                    filtered_graphs.append(g)
-            return filtered_graphs
-        else:
-            # The default split (80-10-10) requires at least 10 edges each
-            # snapshot.
-            filtered_graphs = list()
-            for g in graphs:
-                if g.num_edges >= 10:
-                    filtered_graphs.append(g)
-            return filtered_graphs
-
-
-register_loader('roland_uci_message', load_uci_dataset)

From 5f4f45fa8b6a9ccff56ff251404238ed6ba7e26a Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Tue, 15 Jun 2021 13:41:44 -0700
Subject: [PATCH 65/66] comment out under-development part.

---
 ROLAND_README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/ROLAND_README.md b/ROLAND_README.md
index 4b7390e5..933d02d2 100644
--- a/ROLAND_README.md
+++ b/ROLAND_README.md
@@ -5,7 +5,7 @@ public datasets.
 After understanding how to run and analyze experiments, you can read through the *development topics* to run our 
 
 
-## TODO: add figures to illustrate the ROLAND framework.
+<!-- ## TODO: add figures to illustrate the ROLAND framework. -->
 
 ## How to Download Datasets
 Most of datasets are used in our paper can be found at `https://snap.stanford.edu/data/index.html`.
@@ -70,10 +70,10 @@ tensorboard --logdir=./runs_live_update --port=6006
 ```
 **WARNING** The x-axis of plots in tensorboard is **not** epochs, they are snapshot IDs (e.g., the $i^{th}$ day or the $i^{th}$ week) instead.
 
-## Examples on Heterogenous Graph Snapshots
+<!-- ## Examples on Heterogenous Graph Snapshots
 ```bash
 Under development.
-```
+``` -->
 
 ## How to Run Grid Search / Batch Experiments
 To run grid search / batch experiments, one needs a `main.py` file, a `base_config.yaml`, and a `grid.txt` file. The main and config files are the same as in the single experiment setup above.

From c6e35a20b3e687a9f7fdbfbb8229966e42123d75 Mon Sep 17 00:00:00 2001
From: Tianyu Du <tianyudu@stanford.edu>
Date: Tue, 15 Jun 2021 14:00:24 -0700
Subject: [PATCH 66/66] update requirements.txt

---
 requirements.txt | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index f2111231..0c1a0d8c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,11 +4,15 @@ torch
 torch-scatter
 torch-geometric
 deepsnap
+dask_ml
+dask[complete]
 ogb
 numpy
-pandas
+pandas>=1.0
 scipy
 scikit-learn
 matplotlib
 seaborn
-notebook
\ No newline at end of file
+notebook
+tensorboard
+tqdm
\ No newline at end of file