# **Laboratory 3** : visualization with igraph

## Setup

### Install the libraries

In [None]:
!pip install python-igraph
!pip install cairocffi

### Import libraries

In [None]:
import pandas as pd
import itertools
import networkx as nx
import igraph as ig
import ast
import cairocffi as cairo
import random
import math

## Step 1: Create graph from data

### 1. Create networkx graph from weighted edgelist

In [None]:
!cat edges.csv

In [None]:
friendship_nw = nx.read_weighted_edgelist("edges.csv",delimiter=",")

### 2. Change into igraph

In [None]:
g = ig.Graph.from_networkx(friendship_nw)

In [None]:
for vertice in g.vs:
    print(vertice.attributes())
for edge in g.es:
    print(edge.attributes())

## Step 2: Plotting the graph

In [None]:
ig.plot(g)

### Setting plot properties

In [None]:
#dict of X11 color names
colors = ig.drawing.colors.known_colors
colors = list(colors.keys())
#set label to be names of nx graph nodes
g.vs["label"] = g.vs["_nx_name"]

visual_style = {}
#node size
visual_style["vertex_size"] = 20
#node color
visual_style["vertex_color"] =  random.sample(colors,len(g.vs()))
#node label
visual_style["vertex_label"] = g.vs["label"]
#node label color
visual_style["vertex_label_color"] = "black"
#node label size
visual_style["vertex_label_size"] = [3*degree for degree in g.degree()]
#edge thickness
visual_style["edge_width"] = [0.14 * int(weight) for weight in g.es["weight"]]
#bounding box
visual_style["bbox"] = (500, 500)
#margin
visual_style["margin"] = 20

ig.plot(g, **visual_style)

### Choosing a layout

In [None]:
#use layout algorithms
#The default is auto, which selects a layout algorithm automatically based on the size and connectedness of
#the graph. For this graph it's one of the "force" algorithms (fr, kk - they all give similar values)

colsep, rowsep = 40, 40
width, height = 300, 300

visual_style = {}
#node size
visual_style["vertex_size"] = 20
#node color
visual_style["vertex_color"] = [random.choice(colors) for v in g.vs()]
#node label
visual_style["vertex_label"] = g.vs["label"]
#node label color
visual_style["vertex_label_color"] = "black"
#node label size
visual_style["vertex_label_size"] = [3*degree for degree in g.degree()]
#edge thickness
visual_style["edge_width"] = [0.14 * int(weight) for weight in g.es["weight"]]

plot = ig.plot("plot.png", bbox=(4*width, 4*height), background="white")


plot.add(g, **visual_style,bbox=(colsep/2 + width*0, rowsep/2 + height*0, -colsep/2 + width*(0+1), -rowsep/2 + height*(0+1)),layout="fr")
plot.add(g, **visual_style,bbox=(colsep/2 + width*1, rowsep/2 + height*0, -colsep/2 + width*(1+1), -rowsep/2 + height*(0+1)),layout="kk")
plot.add(g, **visual_style,bbox=(colsep/2 + width*0, rowsep/2 + height*1, -colsep/2 + width*(0+1), -rowsep/2 + height*(1+1)),layout="circle")
plot.add(g, **visual_style,bbox=(colsep/2 + width*1, rowsep/2 + height*1, -colsep/2 + width*(1+1), -rowsep/2 + height*(1+1)),layout="tree")

plot

## Step 3: Saving the plot

### As PDF, PNG or SVG

In [None]:
visual_style

In [None]:
ig.plot(g, "friendship_network.pdf", **visual_style)

### Alternative
https://igraph.org/python/doc/igraph.Graph-class.html#write_svg

In [None]:
visual_style = {}
#node size
visual_style["vertex_size"] = 20
#node color
visual_style["colors"] = ["pink" for node in g.vs()]
#node label
visual_style["labels"] = g.vs["label"]
#node label size
visual_style["font_size"] = "14"
#edge thickness
visual_style["edge_stroke_widths"] = [0.14 * int(weight) for weight in g.es["weight"]]
#bounding box
visual_style["width"] = 500
visual_style["height"] = 500

g.write_svg("friendship_network",**visual_style)


## Network properties visualization

### 1. Create networkx graph with node and edge information

In [None]:
!cat edges_numericalid.csv

In [None]:
edges = pd.read_csv('edges_numericalid.csv', sep=";")
friendship_nw_prop = nx.from_pandas_edgelist(edges, 'Source', 'Target', ['Weight'])

nodes = pd.read_csv('nodes.csv', header=0,delim_whitespace=True)
nodes = nodes.set_index('Id').to_dict('index').items()

friendship_nw_prop.add_nodes_from(nodes)
print(friendship_nw_prop.nodes(data=True))
print(friendship_nw_prop.edges(data=True))


### 2. Change into igraph

In [None]:
g_prop = ig.Graph.from_networkx(friendship_nw_prop)

for vertice in g_prop.vs:
    print(vertice.attributes())
for edge in g_prop.es:
    print(edge.attributes())

### 3. Color nodes according to properties

In [None]:
colors_dict = {"Bosnia and Herzegovina":"blue", "Italy":"darkgreen","Germany":"yellow","Austria":"red","The Netherlands":"White"}

visual_style = {}
#node size
visual_style["vertex_size"] = 20
#node color
visual_style["vertex_color"] = [colors_dict[country] for country in g_prop.vs["Country"] ]
#node label
visual_style["vertex_label"] = g_prop.vs["Label"]
#label size
visual_style["vertex_label_size"] = [3*degree if degree>2 else 7 for degree in g_prop.degree()]
#edge thickness
visual_style["edge_width"] = [0.1 * int(weight) for weight in g_prop.es["Weight"]]
#bounding box
visual_style["bbox"] = (500, 500)
#margin
visual_style["margin"] = 20
ig.plot(g_prop, **visual_style)


### 4. Look at network properties

#### Size

In [None]:
# GRAPH ORDER = NO OF NODES

nodes = g_prop.vs()
edges = g_prop.es()

print("Graph order:", len(nodes))

# GRAPH SIZE = NO OF EDGES

print("Graph size:", len(edges))

# DENSITY - HOW CONNECTED ARE THE NODES? NO OF EDGES/NO OF POSSIBLE EDGES

print("Number of possible edges (N*(N-1)):", 22*21)
print("Graph density:", g_prop.density())


ig.plot(g_prop, **visual_style)

#### Connectedness

In [None]:
# CONNECTEDNESS - EASIEST TO INSPECT VISUALLY

print("Is the graph connected:","yes" if g_prop.is_connected() else "no")

#IF NOT YOU CAN FIND ISOLATED COMPONENENTS
for component in g_prop.components():
  print(component)

#### Node degrees

In [None]:
# CONNECTIVITY - HOW WELL CONNECTED A NODE IS

print("Node degrees:"),

for node in g_prop.vs():
    print("  ",node["Label"],"degree:",node.degree())

#SET SIZE PROP TO DEGREE
visual_style["vertex_size"] = [4*degree if degree>3 else 20 for degree in g_prop.degree()]

#COLOR NEAREST NEIGHBORS OF A CERTAIN NODE
node_name = "Lejla"
for node in nodes:
    if node["Label"] == node_name:
        node_index = node.index

neighbors = g_prop.neighborhood(nodes[node_index],order=1)
visual_style["vertex_color"] = ["fuchsia" if node.index in neighbors else "grey" for node in nodes]
visual_style["edge_color"] = "darkgrey"

ig.plot(g_prop, **visual_style)


#### Network diameter

In [None]:
# DIAMETER - HOW FAR ARE THE TWO MOST DISTANT NODES

print("Network diameter:", g_prop.diameter(directed=False))
d = g_prop.get_diameter()
# GET NODES IN THE DIAMETER PATH
diameter_path = []
for i in range(0, g_prop.diameter()):
  diameter_path.append((d[i], d[i+1]))
# GET EDGES IN THE DIAMETER PATH
diameter_edges = g_prop.get_eids(pairs=diameter_path, directed=False)


#COLOR THE DIAMETER PATH
visual_style["vertex_color"] = ["red" if node.index in diameter_path else "white" for node in nodes]
visual_style["edge_color"] = ["red" if edge.index in diameter_edges else "black" for edge in edges]


print("Path between most distant nodes:", [nodes[index]["Label"] for index in diameter_path])

# AVERAGE PATH LENGTH - HOW CLOSE ARE THE NODES TO EACH OTHER ON AVERAGE

print("Average path length:", g_prop.average_path_length(directed=False))

ig.plot(g_prop, **visual_style)


#### Shortest paths

In [None]:
# SHORTEST PATHS
shortest_paths = {}
for node in nodes:
    shortest_paths[node["Label"]]=[nodes[v]["Label"] for v in g_prop.get_all_shortest_paths(node)
                                                    ]
# SHOW DISTANCES OF SHORTEST PATHS BETWEEN ONE NODE AND EVERYBODY ELSE

node_name = "Lejla"
print("Shortest paths between", node_name,"and other nodes", shortest_paths[node_name])

distances = [len(path) for path in shortest_paths[node_name]]

visual_style["vertex_color"] = ["pink" if node["Label"]==node_name else "grey" for node in nodes]
visual_style["edge_color"] = "darkgrey"

visual_style["vertex_label"] = distances

ig.plot(g_prop, **visual_style)

#### Clustering coefficient

In [None]:
# GLOBAL OR LOCAL CLUSTERING COEFFICIENT - GENERAL INDICATION OF THE GRAPH'S TENDENCY TO BE ORGANISED INTO CLUSTERS

# GLOBAL CC - NUMBER OF CLOSED TRIPLETS/NUMBER OF POSSIBLE TRIPLETS

print("Global clustering coefficient", g_prop.transitivity_undirected())

# LOCAL CC - ARE THE NEIGHBOURS OF THE NODES ALSO CONNECTED?

print("Local clustering components:")
local_ccs = g_prop.transitivity_local_undirected()
sum_cc = 0
for local_cc in local_ccs:
    if not math.isnan(local_cc):
        sum_cc += local_cc

for node in nodes:
        print("   Local clustering coefficient of node", node["Label"],":",local_ccs[node.index])


# AVERAGE CC

print("Average clustering component", sum_cc/len(g_prop.vs()))


#### Communities

In [None]:
# CLUSTERS, COMMUNITIES, GROUPS - WHAT ARE THE NODES THAT ARE MORE CONNECTED TO EACH OTHER THAN TO OTHER NODES
# check out the different community algorithms and pick the one you'd like

communities = g_prop.community_optimal_modularity()
community_colors = random.sample(colors,len(communities))
node_colors = {}
counter = 0

print("Communities in the network:")

for community in communities:
    print("  ",[nodes[member]["Label"] for member in community])
    for member in community:
        node_colors[member] = community_colors[counter]

    counter += 1

visual_style["vertex_color"] = [node_colors[node.index] for node in nodes]
visual_style["vertex_label"] = g.vs["label"]

ig.plot(g_prop, **visual_style)

#### Cliques

In [None]:
# CLIQUES - TOTALLY CONNECTED SUBCOMPONENTS OF GRAPH

#print("Cliques in graph")
#for clique in g_prop.cliques():
#    print("   ",[nodes[node]["Label"] for node in clique])

print("Maximal cliques in graph")
maximal_cliques = g_prop.maximal_cliques()
for clique in maximal_cliques:
    print("   ",[nodes[node]["Label"] for node in clique])

largest_clique = g_prop.largest_cliques()

print("Largest clique",[nodes[node]["Label"] for node in largest_clique])

visual_style["vertex_color"] = ["pink" if node.index in largest_clique[0] else "grey" for node in nodes]


ig.plot(g_prop, **visual_style)


#### Centralities

In [None]:
# BETWEENESS - BEING A BRIDGE BETWEEN NODES; BETWEENNES CENTRALITY: NUMBER OF SHORTEST PATHS THROUGH A NODE


print("Betweenness centrality:"),
betweenness = g_prop.betweenness(directed=False)
for bc in betweenness:
    print("   Betweeness centrality of", nodes[betweenness.index(bc)]["Label"],":",bc)


# CLOSENESS - BEING IN THE MIDDLE OF A NETWORK

print("Closeness centrality:"),
closeness = g_prop.closeness()
for node in nodes:
        print("   Closeness centrality of", node["Label"],":",closeness[node.index])


# PRESTIGE(EIGENVECTOR CENTRALITY) - BEING CLOSE TO WELL CONNECTED NODES

eigenvector_centralities = g_prop.eigenvector_centrality()
print("Eigenvector centrality:"),
eigenvector_centralities = g_prop.eigenvector_centrality()
for node in nodes:
    print("   Eigenvector centrality of", node["Label"],":",eigenvector_centralities[node.index])

visual_style["vertex_label"] = g_prop.vs["Label"]
visual_style["vertex_size"] = [50*ec for ec in eigenvector_centralities]
visual_style["vertex_color"] = ["maroon" if bc>0.0 else "white" for bc in betweenness]
visual_style["edge_color"] = "grey"


ig.plot(g_prop, **visual_style)


### Visualizing the network from Lab 2

In [None]:
import itertools

In [None]:
reddit_network = pd.read_csv("reddit_posts_stored-2.csv",delimiter=";")

In [None]:
reddit_network

In [None]:
reddit_network.subreddit.unique()

In [None]:
network = {}
for group in reddit_network.groupby("subreddit"):
  authors = set(group[1]["author"])
  for pair in itertools.product(authors, authors):
    if pair[0]!=pair[1] and not(pair[::-1] in network):
      network.setdefault(pair,0)
      network[pair] += 1

In [None]:
network_df = pd.DataFrame.from_dict(network, orient="index")

In [None]:
network_df.reset_index(inplace=True)
network_df.columns = ["pair","weight"]
network_df.sort_values(by="weight",inplace=True, ascending=False)
network_df = network_df.reset_index().drop(columns=["index"])
network_df

In [None]:
#to get weighted graph we need a list of 3-element tuplels (u,v,w) where u and v are nodes and w is a number representing weight
up_weighted = []
for edge in network:
    #we can filter edges by weight by uncommenting the next line and setting desired weight threshold
    #if(network[edge])>1:
    up_weighted.append((edge[0],edge[1],network[edge]))

G = nx.Graph()
G.add_weighted_edges_from(up_weighted)

In [None]:
print(len(G.nodes()))
print(len(G.edges()))

In [None]:
colors = ig.drawing.colors.known_colors

g = ig.Graph.from_networkx(G)
g.vs["label"] = g.vs["_nx_name"]

visual_style = {}
#node size
visual_style["vertex_size"] = 20
#node color
visual_style["vertex_label_color"] = [random.choice(list(colors.keys())) for v in g.vs()]
#node label
visual_style["vertex_label"] = g.vs["label"]
#node shape
visual_style["vertex_shape"] = "hidden"
#label size
visual_style["vertex_label_size"] = [0.02*degree for degree in g.degree()]
#edge color
visual_style["edge_color"] = visual_style["vertex_label_color"]
#edge thickness
visual_style["edge_width"] = [0.001 *degree for degree in g.degree()]
#bounding box
visual_style["bbox"] = (1024, 1024)
#margin
visual_style["margin"] = 10
#layout
visual_style["layout"] = "fr"
#background
visual_style["background"] = "black"

ig.plot(g, "worldcup.png", **visual_style)

## References

https://igraph.org/python/doc/igraph

https://igraph.org/python/doc/tutorial/tutorial.html