Les Miserables Benchmark#
This benchmark uses NetworkX’s built-in nx.les_miserables_graph() and
applies a strict score-then-filter workflow for every method:
Run a scoring method that returns the full graph with edge attributes.
Apply a filter (threshold_filter, fraction_filter, or boolean_filter).
Report edge counts only from the filtered graph.
If the filtered graph has the same edge count as the original graph, issue a warning and re-test the method parameters.
Setup#
import warnings
import networkx as nx
import networkx_backbone as nb
G = nx.les_miserables_graph()
G_unweighted = nx.Graph()
G_unweighted.add_nodes_from(G.nodes(data=True))
G_unweighted.add_edges_from(G.edges())
ORIGINAL_EDGES = G.number_of_edges() # 254
def warn_if_no_reduction(name, filtered):
if filtered.number_of_edges() == ORIGINAL_EDGES:
warnings.warn(
f"{name}: filtered graph has the same number of edges as "
f"the original ({ORIGINAL_EDGES}). Re-test and validate.",
UserWarning,
)
Filtered Edge Counts#
All counts below are after explicit filtering.
Function |
Filter Applied |
Edges After Filter |
|---|---|---|
|
247 |
|
|
98 |
|
|
70 |
|
|
254 |
|
|
109 |
|
|
109 |
|
|
157 |
|
|
113 |
|
|
127 |
|
|
69 |
|
|
127 |
|
|
237 |
|
|
76 |
|
|
163 |
|
|
118 |
|
|
109 |
|
|
22 |
|
|
72 |
|
|
162 |
|
|
76 |
|
top 30% by |
76 |
|
top 30% by |
76 |
|
top 30% by |
76 |
|
top 30% by |
76 |
|
top 30% by |
76 |
|
top 30% by |
76 |
|
top 30% by |
76 |
|
top 30% by |
76 |
|
top 30% by |
76 |
|
top 30% by |
76 |
|
top 30% by |
76 |
|
top 30% by |
76 |
|
|
5 |
|
|
136 |
|
|
136 |
|
|
135 |
Reproducibility Script#
import warnings
import networkx as nx
import networkx_backbone as nb
G = nx.les_miserables_graph()
G_unweighted = nx.Graph()
G_unweighted.add_nodes_from(G.nodes(data=True))
G_unweighted.add_edges_from(G.edges())
original_edges = G.number_of_edges()
methods = [
("disparity_filter", lambda: nb.threshold_filter(nb.disparity_filter(G), "disparity_pvalue", 0.05, mode="below")),
("noise_corrected_filter", lambda: nb.threshold_filter(nb.noise_corrected_filter(G), "nc_score", 2.0, mode="above")),
("marginal_likelihood_filter", lambda: nb.threshold_filter(nb.marginal_likelihood_filter(G), "ml_pvalue", 0.05, mode="below")),
("ecm_filter", lambda: nb.threshold_filter(nb.ecm_filter(G), "ecm_pvalue", 0.05, mode="below")),
("lans_filter", lambda: nb.threshold_filter(nb.lans_filter(G), "lans_pvalue", 0.05, mode="below")),
("multiple_linkage_analysis(alpha=0.05)", lambda: nb.boolean_filter(nb.multiple_linkage_analysis(G, alpha=0.05), "mla_keep")),
("global_threshold_filter(threshold=2)", lambda: nb.boolean_filter(nb.global_threshold_filter(G, threshold=2), "global_threshold_keep")),
("strongest_n_ties(n=2)", lambda: nb.boolean_filter(nb.strongest_n_ties(G, n=2), "strongest_n_ties_keep")),
("global_sparsification(s=0.5)", lambda: nb.boolean_filter(nb.global_sparsification(G, s=0.5), "global_sparsification_keep")),
("primary_linkage_analysis", lambda: nb.boolean_filter(nb.primary_linkage_analysis(G), "primary_linkage_keep")),
("edge_betweenness_filter(s=0.5)", lambda: nb.boolean_filter(nb.edge_betweenness_filter(G, s=0.5), "edge_betweenness_keep")),
("node_degree_filter(min_degree=2)", lambda: nb.boolean_filter(nb.node_degree_filter(G, min_degree=2), "node_degree_keep")),
("high_salience_skeleton", lambda: nb.threshold_filter(nb.high_salience_skeleton(G), "salience", 0.5, mode="above")),
("metric_backbone", lambda: nb.boolean_filter(nb.metric_backbone(G), "metric_keep")),
("ultrametric_backbone", lambda: nb.boolean_filter(nb.ultrametric_backbone(G), "ultrametric_keep")),
("doubly_stochastic_filter", lambda: nb.threshold_filter(nb.doubly_stochastic_filter(G), "ds_weight", 0.1, mode="above")),
("h_backbone", lambda: nb.boolean_filter(nb.h_backbone(G), "h_backbone_keep")),
("modularity_backbone", lambda: nb.boolean_filter(nb.modularity_backbone(G), "modularity_keep")),
("planar_maximally_filtered_graph", lambda: nb.boolean_filter(nb.planar_maximally_filtered_graph(G), "pmfg_keep")),
("maximum_spanning_tree_backbone", lambda: nb.boolean_filter(nb.maximum_spanning_tree_backbone(G), "mst_keep")),
("neighborhood_overlap", lambda: nb.fraction_filter(nb.neighborhood_overlap(G), "overlap", 0.3, ascending=False)),
("jaccard_backbone", lambda: nb.fraction_filter(nb.jaccard_backbone(G), "jaccard", 0.3, ascending=False)),
("dice_backbone", lambda: nb.fraction_filter(nb.dice_backbone(G), "dice", 0.3, ascending=False)),
("cosine_backbone", lambda: nb.fraction_filter(nb.cosine_backbone(G), "cosine", 0.3, ascending=False)),
("hub_promoted_index", lambda: nb.fraction_filter(nb.hub_promoted_index(G), "hpi", 0.3, ascending=False)),
("hub_depressed_index", lambda: nb.fraction_filter(nb.hub_depressed_index(G), "hdi", 0.3, ascending=False)),
("lhn_local_index", lambda: nb.fraction_filter(nb.lhn_local_index(G), "lhn_local", 0.3, ascending=False)),
("preferential_attachment_score", lambda: nb.fraction_filter(nb.preferential_attachment_score(G), "pa", 0.3, ascending=False)),
("adamic_adar_index", lambda: nb.fraction_filter(nb.adamic_adar_index(G), "adamic_adar", 0.3, ascending=False)),
("resource_allocation_index", lambda: nb.fraction_filter(nb.resource_allocation_index(G), "resource_allocation", 0.3, ascending=False)),
("graph_distance_proximity", lambda: nb.fraction_filter(nb.graph_distance_proximity(G), "dist", 0.3, ascending=False)),
("local_path_index", lambda: nb.fraction_filter(nb.local_path_index(G), "lp", 0.3, ascending=False)),
("glab_filter", lambda: nb.threshold_filter(nb.glab_filter(G), "glab_pvalue", 0.05, mode="below")),
("sparsify", lambda: nb.boolean_filter(nb.sparsify(G_unweighted), "sparsify_keep")),
("lspar", lambda: nb.boolean_filter(nb.lspar(G_unweighted), "sparsify_keep")),
("local_degree", lambda: nb.boolean_filter(nb.local_degree(G_unweighted), "sparsify_keep")),
]
print("original_edges", original_edges)
for name, fn in methods:
filtered = fn()
if filtered.number_of_edges() == original_edges:
warnings.warn(
f"{name}: filtered graph has the same number of edges as "
f"the original ({original_edges}). Re-test and validate.",
UserWarning,
)
print(name, filtered.number_of_edges())