84 lines
2.1 KiB
84 lines
2.1 KiB
digraph G {
|
|
subgraph cluster_init {
|
|
label="Initialization"
|
|
startup_program [label="startup", shape=box]
|
|
node_w_g0 [label="W\nGPU0"]
|
|
startup_program -> node_w_g0 [label="Initialize"]
|
|
node_w_g1 [label="W\nGPU1"]
|
|
node_w_g0 -> node_w_g1 [label="broadcast"]
|
|
}
|
|
|
|
subgraph cluster_train {
|
|
label="forward_backward"
|
|
|
|
subgraph cluster_gpu0 {
|
|
label="GPU0"
|
|
fc_0 [label="fc\nGPU0", shape=box]
|
|
hidden_0 [label="hidden\nGPU0"]
|
|
node_w_g0 -> fc_0
|
|
fc_0 -> hidden_0
|
|
loss0 [label="loss\nGPU0"]
|
|
hidden_0 -> loss0 [label="many ops omitted"]
|
|
scale_loss_0 [label="scale_loss_gradient\nGPU0", shape=box]
|
|
loss_g0 [label="loss_grad\nGPU0"]
|
|
scale_loss_0->loss_g0
|
|
|
|
fc_g_0 [label="w_grad\nGPU0", shape=box]
|
|
loss0 -> fc_g_0
|
|
loss_g0 -> fc_g_0
|
|
hidden_0 -> fc_g_0
|
|
}
|
|
|
|
subgraph cluster_gpu1 {
|
|
label="GPU1"
|
|
fc_1 [label="fc\nGPU1", shape=box]
|
|
hidden_1 [label="hidden\nGPU1"]
|
|
node_w_g1 -> fc_1
|
|
fc_1 -> hidden_1
|
|
loss1 [label="loss\nGPU1"]
|
|
hidden_1 -> loss1 [label="many ops omitted"]
|
|
scale_loss_1 [label="scale_loss_gradient\nGPU1", shape=box]
|
|
loss_g1 [label="loss_grad\nGPU1"]
|
|
scale_loss_1->loss_g1
|
|
|
|
fc_g_1 [label="w_grad\nGPU1", shape=box]
|
|
loss1 -> fc_g_1
|
|
loss_g1 -> fc_g_1
|
|
hidden_1 -> fc_g_1
|
|
}
|
|
}
|
|
|
|
all_reduce_w [label="Merge Gradients(AllReduce)", shape=box]
|
|
fc_g_0 -> all_reduce_w
|
|
fc_g_1 -> all_reduce_w
|
|
|
|
fc_g_0_merged [label="w_grad\nMerged\nGPU0"]
|
|
fc_g_1_merged [label="w_grad\nMerged\nGPU1"]
|
|
all_reduce_w -> fc_g_0_merged
|
|
all_reduce_w -> fc_g_1_merged
|
|
|
|
subgraph cluster_optimization {
|
|
label="Optimization"
|
|
subgraph cluster_opt_gpu0 {
|
|
label="GPU0"
|
|
sgd_0 [label="SGD Op\nGPU0", shape=box]
|
|
|
|
fc_g_0_merged -> sgd_0
|
|
node_w_g0 -> sgd_0
|
|
optimized_w_0 [label="Optimized W\nGPU0"]
|
|
sgd_0 -> optimized_w_0
|
|
}
|
|
subgraph cluster_opt_gpu1 {
|
|
label="GPU1"
|
|
sgd_1 [label="SGD Op\nGPU1", shape=box]
|
|
|
|
fc_g_1_merged -> sgd_1
|
|
node_w_g1 -> sgd_1
|
|
optimized_w_1 [label="Optimized W\nGPU0"]
|
|
sgd_1 -> optimized_w_1
|
|
}
|
|
}
|
|
|
|
|
|
}
|