# Incorporates all meaningful shapes with bs and ld included
# bs > 1 is applicable when K is divisible by K-block size.
# ldb is set to 16 to handle B blocking over N-dim correctly.

--bs=1,7,16

--batch=shapes_2d_no_tail_f32

--ld=:16:
--batch=shapes_2d_tail_n_f32
--ld=

--bs=1

--batch=shapes_2d_tail_k_f32
--batch=shapes_2d_big_k_f32

--ld=:16:
--batch=shapes_2d_tail_k_tail_n_f32
--batch=shapes_2d_big_k_tail_n_f32
