adjacency_list = {
    "v1": ["v2", "v3", "v5"],
    "v2": ["v1", "v3"],
    "v3": ["v1", "v2", "v4", "v5"],
    "v4": ["v3"],
    "v5": ["v1", "v3", "v5"],
    "v6": [],
}


import matplotlib.pyplot as plt
import networkx as nx
from networkx.drawing.nx_agraph import to_agraph 

from IPython.display import Image, display

# Just a little helper function to plot graphs, see section on NetworkX below for further information.
def plot_graph(graph):
    # Detect weights for edges and set them as labels
    has_weights = False
    if any(map(lambda edge: "weight" in graph.edges[edge], graph.edges)):
        for edge in graph.edges:
            edge_attrs = graph.edges[edge]
            weight = edge_attrs.get("weight", None)
            edge_attrs["label"] = weight
            has_weights = True
    
    # First, try graphviz backup as it produces cleaner plots
    # It requires both 'graphviz' and 'pygraphviz' installed.
    try:
        a = to_agraph(graph)
        a.layout('dot') 
        return display(Image(a.draw(format="png")))
    # Fallback to matplotlib backend if graphviz isn't accessible.
    except Exception:
        
        fig, ax = plt.subplots()
        pos=nx.spring_layout(graph)
        nx.draw_networkx(graph, pos=pos, ax=ax)
        if has_weights:
            labels = nx.get_edge_attributes(graph,'weight')
            nx.draw_networkx_edge_labels(graph, pos=pos, edge_labels=labels, ax=ax)
        fig.tight_layout()
        plt.show()


g = nx.Graph(adjacency_list)
plot_graph(g)


adjacency_list_weighted = {
    "v1": [("v2", 2), ("v3", 4), ("v5", 1)],
    "v2": [("v1", 2), ("v3", 3)],
    "v3": [("v1", 4), ("v2", 3), ("v4", 2), ("v5", 3)],
    "v4": [("v3", 2)],
    "v5": [("v1", 1), ("v3", 3)],
    "v6": [],
}


# NetworkX expects weighted graphs in a slightly different format, so we have to convert our data...
g = nx.Graph(
    {
        node: {edge[0]: {"weight": edge[1], "label": edge[1]} for edge in edges}
        for node, edges in adjacency_list_weighted.items()
    }
)
plot_graph(g)


from typing import Dict
from collections import OrderedDict

def dijkstra(graph: Dict[str, Dict[str, int]], start: str, verbose: bool = False):
    # Mark all nodes as unvisited
    unvisited_nodes = set(graph.keys())
    # Set distances to all nodes except start to infinity
    distances = {n: float("inf") if n != start else 0 for n in unvisited_nodes}
    shortest_paths = OrderedDict()
    # Iterate over all nodes.
    if verbose:
        print(f"Running Dijkstra-algorithm to find the shortest paths from '{start}' to all other nodes")
        graph_as_dicts = {node: {neighbor: {"label": weight} for neighbor, weight in neighbors.items()} for node, neighbors in graph.items()}
        plot_graph(nx.DiGraph(graph_as_dicts))
    while unvisited_nodes:
        current_node = next(iter(sorted(unvisited_nodes, key=distances.get)))
        if verbose: print(f"Current-Node is '{current_node}'. Current distance from '{current_node}' to '{start}' is {distances[current_node]}")
        for neighbor, distance in graph[current_node].items():
            if verbose: print(f"Checking neighbor '{neighbor}' of current node '{current_node}'")
            old_distance = distances[neighbor]
            new_distance = distances[current_node] + distance
            if verbose:
                print(
                    f"Old distance to '{neighbor}'= {old_distance}; New distance to {neighbor} via '{current_node}'={new_distance}"
                )
            if old_distance > new_distance:
                distances[neighbor] = new_distance
                shortest_paths[neighbor] = current_node
                if verbose: print(f"Setting distance to '{neighbor}' via '{current_node}' as new minimal distance")
        unvisited_nodes.discard(current_node)
        if verbose: print(f"Finished checking all neighbors of '{current_node}'; marking it as visited")
    
    # The algorithm is finished, but we also decode the shortest paths into a more readable format
    decoded_shortest_paths = []
    for to_node, from_node in shortest_paths.items():
        if from_node == start:
            decoded_shortest_paths.append([from_node, to_node])
        else:
            path = [to_node, from_node]
            while True:
                from_node = shortest_paths[from_node]
                path.append(from_node)
                if from_node == start:
                    break
            decoded_shortest_paths.append(path[::-1])
    return distances, decoded_shortest_paths


graph = {
    "a": {"b": 1, "d": 4},
    "b": {"c": 1},
    "c": {"d": 1},
    "d": {},
    "e": {}
}

dijkstra(graph=graph, start="a", verbose=True)

Running Dijkstra-algorithm to find the shortest paths from 'a' to all other nodes

Current-Node is 'a'. Current distance from 'a' to 'a' is 0
Checking neighbor 'b' of current node 'a'
Old distance to 'b'= inf; New distance to b via 'a'=1
Setting distance to 'b' via 'a' as new minimal distance
Checking neighbor 'd' of current node 'a'
Old distance to 'd'= inf; New distance to d via 'a'=4
Setting distance to 'd' via 'a' as new minimal distance
Finished checking all neighbors of 'a'; marking it as visited
Current-Node is 'b'. Current distance from 'b' to 'a' is 1
Checking neighbor 'c' of current node 'b'
Old distance to 'c'= inf; New distance to c via 'b'=2
Setting distance to 'c' via 'b' as new minimal distance
Finished checking all neighbors of 'b'; marking it as visited
Current-Node is 'c'. Current distance from 'c' to 'a' is 2
Checking neighbor 'd' of current node 'c'
Old distance to 'd'= 4; New distance to d via 'c'=3
Setting distance to 'd' via 'c' as new minimal distance
Finished checking all neighbors of 'c'; marking it as visited
Current-Node is 'd'. Current distance from 'd' to 'a' is 3
Finished checking all neighbors of 'd'; marking it as visited
Current-Node is 'e'. Current distance from 'e' to 'a' is inf
Finished checking all neighbors of 'e'; marking it as visited

({'c': 2, 'a': 0, 'b': 1, 'e': inf, 'd': 3},
 [['a', 'b'], ['a', 'b', 'c', 'd'], ['a', 'b', 'c']])


# Run dijkstra on more complex randomly generated graph
nx_graph = nx.cycle_graph(8)
g = nx.to_dict_of_dicts(nx_graph, edge_data=1)
dijkstra(g, start=2, verbose=True)

Running Dijkstra-algorithm to find the shortest paths from '2' to all other nodes

Current-Node is '2'. Current distance from '2' to '2' is 0
Checking neighbor '1' of current node '2'
Old distance to '1'= inf; New distance to 1 via '2'=1
Setting distance to '1' via '2' as new minimal distance
Checking neighbor '3' of current node '2'
Old distance to '3'= inf; New distance to 3 via '2'=1
Setting distance to '3' via '2' as new minimal distance
Finished checking all neighbors of '2'; marking it as visited
Current-Node is '1'. Current distance from '1' to '2' is 1
Checking neighbor '0' of current node '1'
Old distance to '0'= inf; New distance to 0 via '1'=2
Setting distance to '0' via '1' as new minimal distance
Checking neighbor '2' of current node '1'
Old distance to '2'= 0; New distance to 2 via '1'=2
Finished checking all neighbors of '1'; marking it as visited
Current-Node is '3'. Current distance from '3' to '2' is 1
Checking neighbor '2' of current node '3'
Old distance to '2'= 0; New distance to 2 via '3'=2
Checking neighbor '4' of current node '3'
Old distance to '4'= inf; New distance to 4 via '3'=2
Setting distance to '4' via '3' as new minimal distance
Finished checking all neighbors of '3'; marking it as visited
Current-Node is '0'. Current distance from '0' to '2' is 2
Checking neighbor '1' of current node '0'
Old distance to '1'= 1; New distance to 1 via '0'=3
Checking neighbor '7' of current node '0'
Old distance to '7'= inf; New distance to 7 via '0'=3
Setting distance to '7' via '0' as new minimal distance
Finished checking all neighbors of '0'; marking it as visited
Current-Node is '4'. Current distance from '4' to '2' is 2
Checking neighbor '3' of current node '4'
Old distance to '3'= 1; New distance to 3 via '4'=3
Checking neighbor '5' of current node '4'
Old distance to '5'= inf; New distance to 5 via '4'=3
Setting distance to '5' via '4' as new minimal distance
Finished checking all neighbors of '4'; marking it as visited
Current-Node is '5'. Current distance from '5' to '2' is 3
Checking neighbor '4' of current node '5'
Old distance to '4'= 2; New distance to 4 via '5'=4
Checking neighbor '6' of current node '5'
Old distance to '6'= inf; New distance to 6 via '5'=4
Setting distance to '6' via '5' as new minimal distance
Finished checking all neighbors of '5'; marking it as visited
Current-Node is '7'. Current distance from '7' to '2' is 3
Checking neighbor '6' of current node '7'
Old distance to '6'= 4; New distance to 6 via '7'=4
Checking neighbor '0' of current node '7'
Old distance to '0'= 2; New distance to 0 via '7'=4
Finished checking all neighbors of '7'; marking it as visited
Current-Node is '6'. Current distance from '6' to '2' is 4
Checking neighbor '5' of current node '6'
Old distance to '5'= 3; New distance to 5 via '6'=5
Checking neighbor '7' of current node '6'
Old distance to '7'= 3; New distance to 7 via '6'=5
Finished checking all neighbors of '6'; marking it as visited

({0: 2, 1: 1, 2: 0, 3: 1, 4: 2, 5: 3, 6: 4, 7: 3},
 [[2, 1],
  [2, 3],
  [2, 1, 0],
  [2, 3, 4],
  [2, 1, 0, 7],
  [2, 3, 4, 5],
  [2, 3, 4, 5, 6]])

$ pip install networkx


# Undirected graphs
undirected_graph = nx.Graph()

# Directed graphs
directed_graph = nx.DiGraph()


plot_graph(undirected_graph)
plot_graph(directed_graph)


directed_graph.add_node("a")
directed_graph.add_node("b")
directed_graph.add_edge("a", "b")
plot_graph(directed_graph)


directed_graph.add_nodes_from(["c", "d"])
directed_graph.add_edges_from([("a", "c"), ("d", "c")])
plot_graph(directed_graph)


adjacency_list = {
    "a": ["b"],
    "b": ["c"],
    "c": ["a"]
}
graph_a = nx.DiGraph(adjacency_list)
plot_graph(graph_a)


import pandas as pd

adjacency_matrix = pd.DataFrame([
        [1, 1, 1],
        [0, 1, 0],
        [0, 0, 0.5]
    ],
    columns=["a", "b", "c"],
    index=["a", "b", "c"]
)
adjacency_matrix


graph_b = nx.from_pandas_adjacency(adjacency_matrix, create_using=nx.DiGraph)
plot_graph(graph_b)
graph_b.edges[("a", "b")]

{'weight': 1.0, 'label': 1.0}


graph_b.edges[("a", "c")], graph_b.edges[("c", "c")]

({'weight': 1.0, 'label': 1.0}, {'weight': 0.5, 'label': 0.5})


graph_b.nodes["a"]["meta_character_name"] = "Zelda"
graph_b.nodes["a"]

{'meta_character_name': 'Zelda'}


plot_graph(directed_graph)
directed_graph.edges[("a", "b")]["weight"] = 42
plot_graph(directed_graph)


nx.draw_networkx(directed_graph)


pos = nx.spring_layout(directed_graph)
nx.draw_networkx(directed_graph, pos=pos)
labels = nx.get_edge_attributes(directed_graph, "weight")
nx.draw_networkx_edge_labels(graph, pos=pos, edge_labels=labels)

{('a', 'b'): Text(-0.61795602649711, 0.12391630967032419, '42')}


kite_graph = nx.krackhardt_kite_graph()
plot_graph(kite_graph)


nx.centrality.degree_centrality(kite_graph)

{0: 0.4444444444444444,
 1: 0.4444444444444444,
 2: 0.3333333333333333,
 3: 0.6666666666666666,
 4: 0.3333333333333333,
 5: 0.5555555555555556,
 6: 0.5555555555555556,
 7: 0.3333333333333333,
 8: 0.2222222222222222,
 9: 0.1111111111111111}


nx.centrality.betweenness_centrality(kite_graph)

{0: 0.023148148148148143,
 1: 0.023148148148148143,
 2: 0.0,
 3: 0.10185185185185183,
 4: 0.0,
 5: 0.23148148148148148,
 6: 0.23148148148148148,
 7: 0.38888888888888884,
 8: 0.2222222222222222,
 9: 0.0}


nx.centrality.closeness_centrality(kite_graph)

{0: 0.5294117647058824,
 1: 0.5294117647058824,
 2: 0.5,
 3: 0.6,
 4: 0.5,
 5: 0.6428571428571429,
 6: 0.6428571428571429,
 7: 0.6,
 8: 0.42857142857142855,
 9: 0.3103448275862069}


nx.dijkstra_path(kite_graph, y

  File "/var/folders/tx/s3wb7bvx4blgb8xnlhfxvw7h0000gn/T/ipykernel_10468/2110222276.py", line 1
    nx.dijkstra_path(kite_graph, y
                                  ^
SyntaxError: unexpected EOF while parsing


dict(nx.all_pairs_dijkstra(kite_graph))

{0: ({0: 0, 1: 1, 2: 1, 3: 1, 5: 1, 4: 2, 6: 2, 7: 2, 8: 3, 9: 4},
  {0: [0],
   1: [0, 1],
   2: [0, 2],
   3: [0, 3],
   5: [0, 5],
   4: [0, 1, 4],
   6: [0, 1, 6],
   7: [0, 5, 7],
   8: [0, 5, 7, 8],
   9: [0, 5, 7, 8, 9]}),
 1: ({1: 0, 0: 1, 3: 1, 4: 1, 6: 1, 2: 2, 5: 2, 7: 2, 8: 3, 9: 4},
  {1: [1],
   0: [1, 0],
   3: [1, 3],
   4: [1, 4],
   6: [1, 6],
   2: [1, 0, 2],
   5: [1, 0, 5],
   7: [1, 6, 7],
   8: [1, 6, 7, 8],
   9: [1, 6, 7, 8, 9]}),
 2: ({2: 0, 0: 1, 3: 1, 5: 1, 1: 2, 4: 2, 6: 2, 7: 2, 8: 3, 9: 4},
  {2: [2],
   0: [2, 0],
   3: [2, 3],
   5: [2, 5],
   1: [2, 0, 1],
   4: [2, 3, 4],
   6: [2, 3, 6],
   7: [2, 5, 7],
   8: [2, 5, 7, 8],
   9: [2, 5, 7, 8, 9]}),
 3: ({3: 0, 0: 1, 1: 1, 2: 1, 4: 1, 5: 1, 6: 1, 7: 2, 8: 3, 9: 4},
  {3: [3],
   0: [3, 0],
   1: [3, 1],
   2: [3, 2],
   4: [3, 4],
   5: [3, 5],
   6: [3, 6],
   7: [3, 5, 7],
   8: [3, 5, 7, 8],
   9: [3, 5, 7, 8, 9]}),
 4: ({4: 0, 1: 1, 3: 1, 6: 1, 0: 2, 2: 2, 5: 2, 7: 2, 8: 3, 9: 4},
  {4: [4],
   1: [4, 1],
   3: [4, 3],
   6: [4, 6],
   0: [4, 1, 0],
   2: [4, 3, 2],
   5: [4, 3, 5],
   7: [4, 6, 7],
   8: [4, 6, 7, 8],
   9: [4, 6, 7, 8, 9]}),
 5: ({5: 0, 0: 1, 2: 1, 3: 1, 6: 1, 7: 1, 1: 2, 4: 2, 8: 2, 9: 3},
  {5: [5],
   0: [5, 0],
   2: [5, 2],
   3: [5, 3],
   6: [5, 6],
   7: [5, 7],
   1: [5, 0, 1],
   4: [5, 3, 4],
   8: [5, 7, 8],
   9: [5, 7, 8, 9]}),
 6: ({6: 0, 1: 1, 3: 1, 4: 1, 5: 1, 7: 1, 0: 2, 2: 2, 8: 2, 9: 3},
  {6: [6],
   1: [6, 1],
   3: [6, 3],
   4: [6, 4],
   5: [6, 5],
   7: [6, 7],
   0: [6, 1, 0],
   2: [6, 3, 2],
   8: [6, 7, 8],
   9: [6, 7, 8, 9]}),
 7: ({7: 0, 5: 1, 6: 1, 8: 1, 0: 2, 2: 2, 3: 2, 1: 2, 4: 2, 9: 2},
  {7: [7],
   5: [7, 5],
   6: [7, 6],
   8: [7, 8],
   0: [7, 5, 0],
   2: [7, 5, 2],
   3: [7, 5, 3],
   1: [7, 6, 1],
   4: [7, 6, 4],
   9: [7, 8, 9]}),
 8: ({8: 0, 7: 1, 9: 1, 5: 2, 6: 2, 0: 3, 2: 3, 3: 3, 1: 3, 4: 3},
  {8: [8],
   7: [8, 7],
   9: [8, 9],
   5: [8, 7, 5],
   6: [8, 7, 6],
   0: [8, 7, 5, 0],
   2: [8, 7, 5, 2],
   3: [8, 7, 5, 3],
   1: [8, 7, 6, 1],
   4: [8, 7, 6, 4]}),
 9: ({9: 0, 8: 1, 7: 2, 5: 3, 6: 3, 0: 4, 2: 4, 3: 4, 1: 4, 4: 4},
  {9: [9],
   8: [9, 8],
   7: [9, 8, 7],
   5: [9, 8, 7, 5],
   6: [9, 8, 7, 6],
   0: [9, 8, 7, 5, 0],
   2: [9, 8, 7, 5, 2],
   3: [9, 8, 7, 5, 3],
   1: [9, 8, 7, 6, 1],
   4: [9, 8, 7, 6, 4]})}


g = nx.read_graphml("/Users/lennartkeller/Downloads/ger000173-krueger-die-geistlichen-auf-dem-lande.graphml")
plot_graph(g)

	Eve	Alice	Bob
Eve	1	1	1
Alice	0	1	1
Bob	0	1	1

Data Science for Humanities 2¶

Session: Networks¶

Summer term 23¶

Prof. Goran Glavaš, Lennart Keller¶

Why study networks?¶

Example: Character networks in German Dramas¶

Example: Relationships among German nobles during the Middle Ages¶

Example: Digital Editions¶

Graphs: Describing networks formally¶

Undirected graphs¶

Reflexives graphs¶

Weighted Graphs¶

Graphs: Basics¶

Walk¶

Trail¶

Path¶

Cycle¶

Graph: Representations¶

Adjacency list¶

Adjacency matrix¶

Graph analysis: Node importance¶

Degree centrality¶

Closeness centrality¶

Betweeness centrality¶

Graph: Shortest distance - Dijkstra's Algorithm¶

NetworkX: Working with graphs in Python¶

Creating graphs¶

Creating graphs from scratch¶

Creating graphs from other data-structures¶

Assigning metadata and weights to graphs¶

Visualizing graphs¶

Addendum: Beware of graph visualization¶

Analyzing graphs¶

Project: Analyze character constellations in a Drama of your choice¶

	a	b	c
a	1	1	1.0
b	0	1	0.0
c	0	0	0.5