# multihead self-attention layer
# mb = 1, num_heads = 12, hidden_size = 1024, t_x = t_y = 128
mb12m128n128k64n"encoder:QK_matmul:12"
mb12m128n64k128n"encoder:WV_matmul:12"

# mb = 128, num_heads = 12, hidden_size = 768, t_x = t_y = 128,
mb1536m128n128k64n"encoder:QK_matmul:12"
mb1536m128n64k128n"encoder:WV_matmul:12"

# mb = 128, num_heads = 16, hidden_size = 1024, t_x = t_y = 128,
#mb2048m128n128k64n"encoder:QK_matmul:24"
#mb2048m128n64k128n"encoder:WV_matmul:24"
