Source code for dynamix.generators.barabasi.divide

from __future__ import print_function, division, unicode_literals, absolute_import, generators
# -*- coding: utf-8 -*-
"""
Created on Tue May 13 17:27:40 2014

@author: Alex
"""
from .tools import *
import networkx as nx
from operator import itemgetter


[docs]def random_divide(g, n1):
    """Splits the set of nodes randomly and creates two new subgraphs following the Barabási-Albert model.

    Randomly chooses n1 nodes of graph g to create a new graph using the Barabási-Albert model.
    All other nodes will be used for the creation of the second subgraph following the same procedure.

    Parameters
    ----------
    g : Graph
        Graph which will be splitted
    n1 : int
        Number of nodes used for subgraph 1

    Returns
    -------
    g1 : Graph
        Subgraph 1 of g with n1 nodes
    g2 : Graph
        Subgraph 2 of g with len(g.nodes())-n1 nodes

    Notes
    -----
    n2 = len(g.nodes()) - n1
    """
    m = estimate_m(g)
    nodes = g.nodes()

    #get random list of nodes 1, returns a set
    nodes_1 = random_subset(nodes, n1)
    #use set operation
    nodes_2 = list(set(nodes)-nodes_1)

    #convert to list and shuffle for random order
    nodes_1 = list(nodes_1)
    random.shuffle(nodes_1)
    random.shuffle(nodes_2)

    #create barabasi-albert-graphs
    g1 = barabasi_graph(nodes_1, m)
    g2 = barabasi_graph(nodes_2, m)
    return g1, g2


[docs]def random_subgraph_divide(g, n1):
    """Splits the set of nodes randomly, preserves edges of the two groups and creates two new subgraphs
    following the Barabási-Albert model.

    Randomly chooses n1 nodes of graph g to create a new graph using the Barabási-Albert model through
    adding edges to the induced subgraph. All other nodes and their intra-group-edges will be used for
    the creation of the second subgraph following the same procedure. The repair-operator is used for
    completing the graphs.

    Parameters
    ----------
    g : Graph
        Graph which will be splitted
    n1 : int
        Number of nodes used for subgraph 1

    Returns
    -------
    g1 : Graph
        Subgraph 1 of g with n1 nodes
    g2 : Graph
        Subgraph 2 of g with len(g.nodes())-n1 nodes

    Notes
    -----
    n2 = len(g.nodes()) - n1
    """
    nodes_1 = random_subset(g.nodes(), n1)
    g1 = g.subgraph(nodes_1)
    g2 = g.subgraph(set(g.nodes())-nodes_1)

    m1 = estimate_m(g1)
    m2 = estimate_m(g2)
    repair_graph(g1, m1)
    repair_graph(g2, m2)
    return g1, g2


[docs]def node_degree_divide_a(g, n1):
    """Splits the set of nodes by their node degree order and creates two new subgraphs
    following the Barabási-Albert model.

    Strategy A:
    Chooses n1 highest ranked nodes (by node degree) of graph g to create a new graph using
    the Barabási-Albert model Nodes will be added in order of their rank. All other nodes
    will be used for the creation of the second subgraph following the same procedure.

    Parameters
    ----------
    g : Graph
        Graph which will be splitted
    n1 : int
        Number of nodes used for subgraph 1

    Returns
    -------
    g1 : Graph
        Subgraph 1 of g with n1 nodes
    g2 : Graph
        Subgraph 2 of g with len(g.nodes())-n1 nodes

    Notes
    -----
    n2 = len(g.nodes()) - n1
    """
    m = estimate_m(g)

    #divide nodelist
    sorted_nodes = sort_nodes_by_degree(g)
    nodes_1 = sorted_nodes[:n1]
    nodes_2 = sorted_nodes[n1:]

    #create barabasi-albert-graphs
    g1 = barabasi_graph(nodes_1, m)
    g2 = barabasi_graph(nodes_2, m)
    return g1, g2


[docs]def node_degree_divide_b(g, n1):
    """Splits the set of nodes by their node degree order and creates two new subgraphs
    following the Barabási-Albert model.

    Strategy B:
    Sorts nodes in order of their node degree. Chooses nodes of n1 and n2 in alternating
    order to create both subgraphs using the Barabási-Albert model. Nodes will be added in
    order of their rank.

    Parameters
    ----------
    g : Graph
        Graph which will be splitted
    n1 : int
        Number of nodes used for subgraph 1

    Returns
    -------
    g1 : Graph
        Subgraph 1 of g with n1 nodes
    g2 : Graph
        Subgraph 2 of g with len(g.nodes())-n1 nodes

    Notes
    -----
    n2 = len(g.nodes()) - n1
    """
    m = estimate_m(g)

    #sort nodes by their node degree
    sorted_nodes = sort_nodes_by_degree(g)

    #alternating way to produce indices
    f = lambda a, b: [i*b//a + b//(2*a) for i in range(a)]
    #indice list, try to equally space the indices
    ind = f(n1, len(sorted_nodes))
    nodes_1 = itemgetter(*ind)(sorted_nodes)
    nodes_1_set = set(nodes_1)
    nodes_2 = [i for i in sorted_nodes if i not in nodes_1_set]

    g1 = barabasi_graph(nodes_1, m)
    g2 = barabasi_graph(nodes_2, m)
    return g1, g2


[docs]def maximum_cut_split(g, n1):
    """ not documented yet

    Parameters
    ----------
    g : Graph
        Graph which will be splitted
    n1 : int
        Size of Subgraph 1

    Returns
    -------
    g1 : Graph
        Subgraph 1 of g
    g2 : Graph
        Subgraph 2 of g

    Notes
    -----
    n2 = len(g.nodes()) - n1
    """

    nodes = g.nodes()
    #reset edge weights to zero
    for s, targets in g.edge.items():
        for target in targets.keys():
            targets[target]["path_lenght_weight"] = len(nodes)**2+1
            #set size = nr_of_nodes + 1 for maximum spanning tree using the mst function
            #e['weight'] = len(nodes)+1

    #traverse through all node pairs
    for source in nodes:
        for target in nodes:
            if source == target:
                continue
            #search for shortest paths
            paths = nx.all_shortest_paths(g, source, target)
            paths = [x for x in paths]
            nr_of_paths = len(paths)
            #print("nr_of_paths", nr_of_paths)
            for path in paths:
                for edge in list(zip(path[::1], path[1::1])):
                    #increase edge weights for minimum spanning tree
                    g[edge[0]][edge[1]]['path_lenght_weight'] -= 1/nr_of_paths
                    #reduce edge weights for maximum spanning tree using the mst function
                    #g[edge[0]][edge[1]]['weight'] -= 1/nr_of_paths

    #create minimum spanning tree
    tree = nx.minimum_spanning_tree(g, "path_lenght_weight")

    #--- search for subtree with size of n1 or n2 ---#
    #subtree size of every leave = 1
    leaves = [n for n, d in tree.degree().items() if d == 1]
    subtrees = {n: [n] for n in leaves}

    #check subtree sizes for every node
    while len(subtrees) < len(nodes):
        #print("subtrees", subtrees)
        newdict = subtrees.copy()
        for n in g.nodes():
            if n not in subtrees:
                subtrees_to_merge = [subtrees[e] for e in tree.edge[n] if e in subtrees]
                if len(subtrees_to_merge) >= tree.degree()[n]-1:
                    newdict[n] = list(chain([n], chain(*[trees for trees in subtrees_to_merge])))
        subtrees = newdict

    #print("subgraph sizes", subtrees)

    #search for subtree with size of approximately n1
    n2 = len(g.nodes())-n1
    size_error = {}
    perfect = None
    for n in subtrees:
        error_n1 = abs(len(subtrees[n]) - n1)
        error_n2 = abs(len(subtrees[n]) - n2)
        size_error[n] = (error_n1, error_n2)
        if error_n1 == 0 or error_n2 == 0:
            perfect = (n, size_error[n])
            break

    #if not succesfull, search for better error value by leaving one tree out
    if perfect:
        best_error = perfect
    else:
        #try lowest error value
        best_error = min(size_error.items(), key=lambda x: min(x[1]))

    #print("error_values:", size_error)
    #print("best_index", best_error)
    if best_error[1].index(min(best_error[1])) == 0:
        #print("best error achieved by using subtree for building g1")
        g1 = g.subgraph(subtrees[best_error[0]])
        g2 = g.subgraph(set(g.nodes())-set(subtrees[best_error[0]]))
    else:
        #print("best error achieved by leaving out the subtree for building g1")
        g2 = g.subgraph(subtrees[best_error[0]])
        g1 = g.subgraph(set(g.nodes())-set(subtrees[best_error[0]]))

    """
    plt.clf()
    plt.subplot(2, 2, 1)
    plt.title("Graph")
    nx.draw(g, node_size=500, with_labels=True)

    plt.subplot(2, 2, 2)
    plt.title("Tree")
    nx.draw(tree, node_size=500, with_labels=True)

    plt.subplot(2, 2, 3)
    plt.title("Graph 1 before repair")
    nx.draw(g1, node_size=500, with_labels=True)

    plt.subplot(2, 2, 4)
    plt.title("Graph 2 before repair")
    nx.draw(g2, node_size=500, with_labels=True)
    plt.show()
    """

    #repair both graphs
    m = estimate_m(g)
    repair_graph(g1, m)
    repair_graph(g2, m)
    print("nodes_1", len(g1.nodes()))
    print("nodes_2", len(g2.nodes()))

    return g1, g2



[docs]def subgraph_expansion_divide(g, n1):
    """ Creates a new graph by choosing the node with the lowest node degree and iteratively
    expanding the subgraph using the neighborhood of the current subgraph.

    Takes the node with the lowest node degree as starting point for the subgraph creation.
    Iteratively extends the subgraph by adding the neighbors of the last added node to the
    subgraph. Repeat this process till subgraph 1 consists of n1 nodes. All nodes not added
    to subgraph 1 will be used to create subgraph 2

    Both graphs need to be repaired afterwards.

    Parameters
    ----------
    g : Graph
        Graph which will be splitted
    n1 : int
        Number of nodes used for subgraph 1

    Returns
    -------
    g1 : Graph
        Subgraph 1 of g with n1 nodes
    g2 : Graph
        Subgraph 2 of g with len(g.nodes())-n1 nodes

    Notes
    -----
    n2 = len(g.nodes()) - n1
    """
    m = estimate_m(g)

    sorted_nodes = sort_nodes_by_degree(g)

    nodes_1 = set()

    #nodes_to_add will be used as queue
    node_queue = sorted_nodes[-1:]
    node_set = set(node_queue)

    while len(nodes_1) < n1:
        #add node first in queue to nodes_1
        actual_node = node_queue.pop(0)
        nodes_1.add(actual_node)

        #next line can be improved for sure
        new_nodes_to_add = tuple(x for x in list(g.edge[actual_node].keys()) if x not in node_set)
        node_queue.extend(new_nodes_to_add)
        node_set.add(new_nodes_to_add)

    #build subgraph with nodes from nodes_1 and second subgraph from all other nodes
    #these do not have to be a real barabasi-albert-graph
    g1 = g.subgraph(list(nodes_1))
    nodes_2 = list(set(g.nodes()) - nodes_1)
    g2 = g.subgraph(nodes_2)

    #print("nodes_1", nodes_1)
    #print("nodes_2", nodes_2)

    #repair both graphs
    repair_graph(g1, m)
    repair_graph(g2, m)
    return g1, g2


if __name__ == "__main__":

    G = barabasi_graph(["" + str(i) for i in range(1000)], 3)
    nodes = sort_nodes_by_degree(G)
    #print("sorted_node_list", nodes)

    #A1, A2 = random_divide(G, 20)
    #B1, B2 = priority_split(G, 20)
    #C1, C2 = node_degree_divide_b(G, 20)
    #compare_merge(G, C1, C2)

    """
    for i in range(21):
        D1, D2 = subgraph_expansion_divide(G, i)

    plt.clf()
    plt.subplot(3, 1, 1)
    plt.title("Graph ")
    nx.draw(G, node_size=500, with_labels=True)

    plt.subplot(3, 1, 2)
    plt.title("Graph 2")
    nx.draw(D1, node_size=500, with_labels=True)

    plt.subplot(3, 1, 3)
    plt.title("Graph 2")
    nx.draw(D2, node_size=500, with_labels=True)
    plt.show()
    """
    E1, E2 = maximum_cut_split(G, 500)
    #for i in range(21):
    #    F1, F2 = random_subgraph_divide(G, i)

    """
    plt.clf()
    plt.subplot(3, 2, 1)
    plt.title("Graph 1")
    nx.draw(G1, node_size=500, with_labels=True)

    plt.subplot(3, 2, 2)
    plt.title("Degree Distribution Graph 1")
    plt.plot(nx.degree_histogram(G1))

    plt.subplot(3, 2, 3)
    plt.title("Graph 2")
    nx.draw(G2, node_size=500, with_labels=True)

    plt.subplot(3, 2, 4)
    plt.title("Degree Distribution Graph 2")
    plt.plot(nx.degree_histogram(G2))

    plt.subplot(3, 2, 5)
    plt.title("mergedGraph")
    nx.draw(priorityGraph, node_size=500, with_labels=True)

    plt.subplot(3, 2, 6)
    plt.title("Degree Distribution Merged Graph")
    plt.plot(nx.degree_histogram(priorityGraph))
    plt.show()
    """

    #print("\nRandom Split")
    #compare_divide(G, A1, A2)
    #print("\nPriority Split A")
    #compare_divide(G, B1, B2)
    #print("\nPriority Split B")
    #compare_divide(G, C1, C2)
    #print("\nSubgraph Expansion Split")
    #compare_divide(G, D1, D2)