From 556160eef7efa773a0797d8c75f88cd24a8439de Mon Sep 17 00:00:00 2001 From: xpai Date: Tue, 17 Dec 2024 21:55:00 +0800 Subject: [PATCH] - Fix TransAct bug (#132) - Set polars <= 1.0.0 - Remove group_id from feature_map.json --- CHANGELOG.md | 9 +++++++-- README.md | 4 ++-- fuxictr/features.py | 4 +--- model_zoo/LongCTR/ETA/config/dataset_config.yaml | 8 ++++---- model_zoo/LongCTR/SDIM/config/dataset_config.yaml | 8 ++++---- model_zoo/TransAct/src/TransAct.py | 4 ++-- requirements.txt | 2 +- 7 files changed, 21 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ca7959c..e7d7626 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,11 @@ [Doing] Add support for saving pb file, exporting embeddings [Doing] Add support of multi-gpu training +**FuxiCTR v2.3.6, 2024-12-28** ++ [Fix] Fix init_weights() for PretrainedEmbedding by modifying embedding_initializer ([#126](https://github.com/reczoo/FuxiCTR/issues/126)) ++ [Fix] Fix get_mask issue when num_heads > 1 ([#130](https://github.com/reczoo/FuxiCTR/issues/130)) ++ [Fix] Fix TransAct error when number of sequence features > 2 ([#132](https://github.com/reczoo/FuxiCTR/issues/132)) + **FuxiCTR v2.3.5, 2024-11-06** + [Fix] Fix get_inputs() bug ([#115](https://github.com/reczoo/FuxiCTR/issues/115)) @@ -112,8 +117,8 @@ ### FuxiCTR v1.2 **FuxiCTR v1.2.2, 2022-07-03** -+ [Fix] Fix bug in EDCN #29 -+ [Fix] Fix MultiHeadAttention bug #30 ++ [Fix] Fix bug in EDCN ([#29](https://github.com/reczoo/FuxiCTR/issues/29)) ++ [Fix] Fix MultiHeadAttention bug ([#30](https://github.com/reczoo/FuxiCTR/issues/30)) **FuxiCTR v1.2.1, 2022-06-12** + [Fix] Fix layernorm bug in MaskNet diff --git a/README.md b/README.md index f3e368e..7866d48 100644 --- a/README.md +++ b/README.md @@ -81,8 +81,8 @@ Click-through rate (CTR) prediction is a critical task for various industrial ap | 44 | DLP-KDD'19 | [BST](./model_zoo/BST) | [Behavior Sequence Transformer for E-commerce Recommendation in Alibaba](https://arxiv.org/abs/1905.06874) :triangular_flag_on_post:**Alibaba** | [:arrow_upper_right:](https://github.com/reczoo/BARS/tree/main/ranking/ctr/BST) | `torch` | | 45 | CIKM'20 | [DMIN](./model_zoo/DMIN) | [Deep Multi-Interest Network for Click-through Rate Prediction](https://dl.acm.org/doi/10.1145/3340531.3412092) :triangular_flag_on_post:**Alibaba** | [:arrow_upper_right:](https://github.com/reczoo/BARS/tree/main/ranking/ctr/DMIN) | `torch` | | 46 | AAAI'20 | [DMR](./model_zoo/DMR) | [Deep Match to Rank Model for Personalized Click-Through Rate Prediction](https://ojs.aaai.org/index.php/AAAI/article/view/5346) :triangular_flag_on_post:**Alibaba** | [:arrow_upper_right:](https://github.com/reczoo/BARS/tree/main/ranking/ctr/DMR) | `torch` | -| 47 | DLP-KDD'22 | [ETA](./model_zoo/ETA) | [Efficient Long Sequential User Data Modeling for Click-Through Rate Prediction](https://arxiv.org/abs/2209.12212) :triangular_flag_on_post:**Alibaba** | | `torch` | -| 48 | CIKM'22 | [SDIM](./model_zoo/SDIM) | [Sampling Is All You Need on Modeling Long-Term User Behaviors for CTR Prediction](https://arxiv.org/abs/2205.10249) :triangular_flag_on_post:**Meituan** | | `torch` | +| 47 | DLP-KDD'22 | [ETA](./model_zoo/LongCTR/ETA) | [Efficient Long Sequential User Data Modeling for Click-Through Rate Prediction](https://arxiv.org/abs/2209.12212) :triangular_flag_on_post:**Alibaba** | | `torch` | +| 48 | CIKM'22 | [SDIM](./model_zoo/LongCTR/SDIM) | [Sampling Is All You Need on Modeling Long-Term User Behaviors for CTR Prediction](https://arxiv.org/abs/2205.10249) :triangular_flag_on_post:**Meituan** | | `torch` | | 49 | KDD'23 | [TransAct](./model_zoo/TransAct) | [TransAct: Transformer-based Realtime User Action Model for Recommendation at Pinterest](https://arxiv.org/abs/2306.00248) :triangular_flag_on_post:**Pinterest** | [:arrow_upper_right:](https://github.com/reczoo/BARS/tree/main/ranking/ctr/TransAct) | `torch` | |:open_file_folder: **Dynamic Weight Network**| | 50 | NeurIPS'22 | [APG](./model_zoo/APG) | [APG: Adaptive Parameter Generation Network for Click-Through Rate Prediction](https://arxiv.org/abs/2203.16218) :triangular_flag_on_post:**Alibaba** | [:arrow_upper_right:](https://github.com/reczoo/BARS/tree/main/ranking/ctr/APG) | `torch` | diff --git a/fuxictr/features.py b/fuxictr/features.py index 42eba79..11b4903 100644 --- a/fuxictr/features.py +++ b/fuxictr/features.py @@ -45,7 +45,7 @@ def load(self, json_file, params): self.labels = feature_map.get("labels", []) self.total_features = feature_map.get("total_features", 0) self.input_length = feature_map.get("input_length", 0) - self.group_id = feature_map.get("group_id", None) + self.group_id = params.get("group_id", None) self.default_emb_dim = params.get("embedding_dim", None) self.features = OrderedDict((k, v) for x in feature_map["features"] for k, v in x.items()) self.num_fields = self.get_num_fields() @@ -74,8 +74,6 @@ def save(self, json_file): feature_map["total_features"] = self.total_features feature_map["input_length"] = self.input_length feature_map["labels"] = self.labels - if self.group_id is not None: - feature_map["group_id"] = self.group_id feature_map["features"] = [{k: v} for k, v in self.features.items()] with open(json_file, "w") as fd: json.dump(feature_map, fd, indent=4) diff --git a/model_zoo/LongCTR/ETA/config/dataset_config.yaml b/model_zoo/LongCTR/ETA/config/dataset_config.yaml index 16c1bc3..938149b 100644 --- a/model_zoo/LongCTR/ETA/config/dataset_config.yaml +++ b/model_zoo/LongCTR/ETA/config/dataset_config.yaml @@ -1,8 +1,8 @@ ### Tiny data for tests only tiny_seq: - data_root: ../../data/ + data_root: ../../../data/ data_format: npz - train_data: ../../data/tiny_seq/train.npz - valid_data: ../../data/tiny_seq/valid.npz - test_data: ../../data/tiny_seq/test.npz + train_data: ../../../data/tiny_seq/train.npz + valid_data: ../../../data/tiny_seq/valid.npz + test_data: ../../../data/tiny_seq/test.npz diff --git a/model_zoo/LongCTR/SDIM/config/dataset_config.yaml b/model_zoo/LongCTR/SDIM/config/dataset_config.yaml index 16c1bc3..938149b 100644 --- a/model_zoo/LongCTR/SDIM/config/dataset_config.yaml +++ b/model_zoo/LongCTR/SDIM/config/dataset_config.yaml @@ -1,8 +1,8 @@ ### Tiny data for tests only tiny_seq: - data_root: ../../data/ + data_root: ../../../data/ data_format: npz - train_data: ../../data/tiny_seq/train.npz - valid_data: ../../data/tiny_seq/valid.npz - test_data: ../../data/tiny_seq/test.npz + train_data: ../../../data/tiny_seq/train.npz + valid_data: ../../../data/tiny_seq/valid.npz + test_data: ../../../data/tiny_seq/test.npz diff --git a/model_zoo/TransAct/src/TransAct.py b/model_zoo/TransAct/src/TransAct.py index f58aba7..3e45ef3 100644 --- a/model_zoo/TransAct/src/TransAct.py +++ b/model_zoo/TransAct/src/TransAct.py @@ -122,7 +122,6 @@ def __init__(self, else embedding_dim ) transformer_in_dim = seq_emb_dim + target_emb_dim - seq_out_dim += (first_k_cols + int(concat_max_pool)) * transformer_in_dim self.transformer_encoders.append( TransActTransformer(transformer_in_dim, dim_feedforward=dim_feedforward, @@ -134,7 +133,8 @@ def __init__(self, first_k_cols=first_k_cols, concat_max_pool=concat_max_pool) ) - dcn_in_dim = feature_map.sum_emb_out_dim() + seq_out_dim - seq_emb_dim + seq_out_dim += (first_k_cols + int(concat_max_pool)) * transformer_in_dim - seq_emb_dim + dcn_in_dim = feature_map.sum_emb_out_dim() + seq_out_dim self.crossnet = CrossNetV2(dcn_in_dim, dcn_cross_layers) self.parallel_dnn = MLP_Block(input_dim=dcn_in_dim, output_dim=None, # output hidden layer diff --git a/requirements.txt b/requirements.txt index 9e00700..9ccf9aa 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,4 +6,4 @@ numpy h5py tqdm pyarrow -polars +polars<=1.0.0