update graph info in readme

tddschn · Oct 31, 2022 · b5d8fbc · b5d8fbc
1 parent c1c4b35
commit b5d8fbc
Show file tree

Hide file tree

Showing 3 changed files with 31 additions and 21 deletions.
diff --git a/README.md b/README.md
@@ -280,20 +280,28 @@ The `er_*` Erdos-Renyi random graphs are generated with `eg.erdos_renyi_P()`, av
 
 
 
+
+
 <!-- BEGIN DATASET TABLE -->
 
 | Dataset Name | nodes | edges | is_directed | average_degree | density | type |
 |-----|-----|-----|-----|-----|-----|-----|
 | [cheminformatics](https://networkrepository.com/ENZYMES-g1.php) | 37 | 168 | True | 9.08108108108108 | 0.12612612612612611 | easygraph.classes.directed_graph.DiGraph |
 | [eco](https://networkrepository.com/econ-mahindas.php) | 1258 | 7619 | False | 12.112877583465819 | 0.009636338570776308 | networkx.classes.graph.Graph |
 | [bio](https://networkrepository.com/bio-yeast.php) | 1458 | 1948 | False | 2.672153635116598 | 0.0018340107310340413 | easygraph.classes.graph.Graph |
+| road_sampled | 2075 | 1132 | False | 1.0910843373493977 | 0.0005260773082687548 | networkx.classes.graph.Graph |
+| coauthorship_sampled | 4340 | 6398 | False | 2.9483870967741934 | 0.0006795084343798557 | networkx.classes.graph.Graph |
 | [uspowergrid](https://toreopsahl.com/datasets/#uspowergrid) | 4941 | 6594 | False | 2.66909532483303 | 0.0005403026973346214 | networkx.classes.graph.Graph |
-| [enron](https://snap.stanford.edu/data/email-Enron.html) | 36692 | 183831 | False | 10.020222391802028 | 0.00027309755503535 | networkx.classes.graph.Graph |
-| [pgp](https://github.com/tddschn/easygraph-bench/blob/master/dataset/pgp/pgp.xml) | 39796 | 301498 | True | 15.15217609810031 | 0.00019037788790175037 | networkx.classes.digraph.DiGraph |
-| [pgp_undirected](https://github.com/tddschn/easygraph-bench/blob/master/dataset/pgp/pgp_undirected.xml) | 39796 | 197150 | False | 9.908030957885215 | 0.00024897677994434515 | networkx.classes.graph.Graph |
-| [road](https://networkrepository.com/road-usa.php) | 129164 | 165435 | False | 2.5616270787525934 | 1.9832514564949666e-05 | easygraph.classes.graph.Graph |
-| [amazon](https://snap.stanford.edu/data/amazon0302.html) | 262111 | 1234877 | True | 9.42254998836371 | 1.7974419114806206e-05 | networkx.classes.digraph.DiGraph |
-| [coauthorship](https://github.com/chenyang03/co-authorship-network) | 402392 | 1234019 | False | 6.1334171653512 | 1.5242431280399412e-05 | networkx.classes.graph.Graph |
+| pgp_sampled | 6465 | 18906 | True | 5.848723897911833 | 0.00045240747972709105 | networkx.classes.digraph.DiGraph |
+| pgp_undirected_sampled | 8781 | 51939 | False | 11.829859924837718 | 0.0013473644561318586 | networkx.classes.graph.Graph |
+| enron_sampled | 9301 | 79905 | False | 17.182023438339964 | 0.001847529401972039 | networkx.classes.graph.Graph |
+| [enron](https://github.com/tddschn/easygraph-bench/blob/master/dataset/sampled) | 36692 | 183831 | False | 10.020222391802028 | 0.00027309755503535 | networkx.classes.graph.Graph |
+| [pgp](https://github.com/tddschn/easygraph-bench/blob/master/dataset/sampled) | 39796 | 301498 | True | 15.15217609810031 | 0.00019037788790175037 | networkx.classes.digraph.DiGraph |
+| [pgp_undirected](https://github.com/tddschn/easygraph-bench/blob/master/dataset/sampled) | 39796 | 197150 | False | 9.908030957885215 | 0.00024897677994434515 | networkx.classes.graph.Graph |
+| [road](https://github.com/tddschn/easygraph-bench/blob/master/dataset/sampled) | 129164 | 165435 | False | 2.5616270787525934 | 1.9832514564949666e-05 | easygraph.classes.graph.Graph |
+| [amazon](https://github.com/tddschn/easygraph-bench/blob/master/dataset/sampled) | 262111 | 1234877 | True | 9.42254998836371 | 1.7974419114806206e-05 | networkx.classes.digraph.DiGraph |
+| amazon_sampled | 262111 | 1234877 | True | 9.42254998836371 | 1.7974419114806206e-05 | networkx.classes.digraph.DiGraph |
+| [coauthorship](https://github.com/tddschn/easygraph-bench/blob/master/dataset/sampled) | 402392 | 1234019 | False | 6.1334171653512 | 1.5242431280399412e-05 | networkx.classes.graph.Graph |
 | [google](https://snap.stanford.edu/data/web-Google.html) | 875713 | 5105039 | True | 11.659160021605253 | 6.656960291514363e-06 | networkx.classes.digraph.DiGraph |
 | [pokec](https://snap.stanford.edu/data/soc-Pokec.html) | 1632803 | 30622564 | True | 37.50919614919865 | 1.148614349725155e-05 | networkx.classes.digraph.DiGraph |
 | [er_500](https://github.com/tddschn/easygraph-bench/blob/master/dataset_loaders.py) | 500 | 2511 | False | 10.044 | 0.020128256513026053 | easygraph.classes.graph.Graph |
@@ -303,8 +311,6 @@ The `er_*` Erdos-Renyi random graphs are generated with `eg.erdos_renyi_P()`, av
 
 
 <!-- END DATASET TABLE -->
-## Results
-
 - Machine: MacBookPro16,2 (Mid-2020 MacBook Pro, Intel i7-1068NG7 (8) @ 2.30GHz, 16GB RAM)
 - OS: macOS Monterey 12.6 21G115 x86_64
 - python: Python 3.10.5 | packaged by conda-forge | (main, Jun 14 2022, 07:03:09) [Clang 13.0.1 ] on darwin

diff --git a/config.py b/config.py
@@ -169,6 +169,18 @@
     x.removeprefix('load_') for x in random_erdos_renyi_graphs_load_function_names
 ]
 
+# for sampling graphs
+sampled_graph_dir = DATASET_DIR / 'sampled'
+default_target_node_number = 10000
+sampled_graph_dataset_names = sorted(
+    (x.stem for x in sorted(sampled_graph_dir.glob('*.edgelist'))),
+    key=lambda dataset_name: get_dataset_list_sorted_by_nodes_and_edges().index(
+        dataset_name
+    )
+    if dataset_name in get_dataset_list_sorted_by_nodes_and_edges()
+    else 1000000,
+)
+
 easygraph_multipcoessing_methods = [
     'laplacian',
     'betweenness_centrality',
@@ -216,6 +228,10 @@
         er_dataset_name: 'https://github.com/tddschn/easygraph-bench/blob/master/dataset_loaders.py'
         for er_dataset_name in random_erdos_renyi_dataset_names
     },
+    **{
+        sampled_graph_dataset_name: 'https://github.com/tddschn/easygraph-bench/blob/master/dataset/sampled'
+        for sampled_graph_dataset_name in sampled_graph_dataset_names
+    },
 }
 
 
@@ -301,15 +317,3 @@ def read_profile_preparation_code() -> dict[str, str]:
 er_dataset_names_for_paper_multiprocessing = [
     f'er_{x}' for x in (500, 1000, 5000, 10000)
 ]
-
-# for sampling graphs
-sampled_graph_dir = DATASET_DIR / 'sampled'
-default_target_node_number = 10000
-sampled_graph_dataset_names = sorted(
-    (x.stem for x in sorted(sampled_graph_dir.glob('*.edgelist'))),
-    key=lambda dataset_name: get_dataset_list_sorted_by_nodes_and_edges().index(
-        dataset_name
-    )
-    if dataset_name in get_dataset_list_sorted_by_nodes_and_edges()
-    else 1000000,
-)
diff --git a/gen_graph_info_markdown_table.py b/gen_graph_info_markdown_table.py
@@ -30,7 +30,7 @@ def get_markdown_content() -> str:
         else:
             data.append({'Dataset Name': graph_name, **graph_info})
 
-    sorted_data = sorted(data, key=lambda x: ((n := x['Dataset Name']).startswith('er_') or n.startswith('[er_'), x['nodes'], n.endswith('_directed')))  # type: ignore
+    sorted_data = sorted(data, key=lambda x: ((n := x['Dataset Name']).startswith('er_') or n.startswith('[er_'), x['nodes'], 'sampled' in n, n.endswith('_directed')))  # type: ignore
     # from icecream import ic
 
     # ic(sorted_data)