; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX2,LMULMAX2-RV32
; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX2,LMULMAX2-RV64
; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX1,LMULMAX1-RV32
; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX1,LMULMAX1-RV64

define void @add_v16i8(ptr %x, ptr %y) {
; CHECK-LABEL: add_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vle8.v v9, (a1)
; CHECK-NEXT:    vadd.vv v8, v8, v9
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = load <16 x i8>, ptr %y
  %c = add <16 x i8> %a, %b
  store <16 x i8> %c, ptr %x
  ret void
}

define void @add_v8i16(ptr %x, ptr %y) {
; CHECK-LABEL: add_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vle16.v v9, (a1)
; CHECK-NEXT:    vadd.vv v8, v8, v9
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = load <8 x i16>, ptr %y
  %c = add <8 x i16> %a, %b
  store <8 x i16> %c, ptr %x
  ret void
}

define void @add_v6i16(ptr %x, ptr %y) {
; CHECK-LABEL: add_v6i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vle16.v v9, (a1)
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vadd.vv v8, v8, v9
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <6 x i16>, ptr %x
  %b = load <6 x i16>, ptr %y
  %c = add <6 x i16> %a, %b
  store <6 x i16> %c, ptr %x
  ret void
}

define void @add_v4i32(ptr %x, ptr %y) {
; CHECK-LABEL: add_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vle32.v v9, (a1)
; CHECK-NEXT:    vadd.vv v8, v8, v9
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = load <4 x i32>, ptr %y
  %c = add <4 x i32> %a, %b
  store <4 x i32> %c, ptr %x
  ret void
}

define void @add_v2i64(ptr %x, ptr %y) {
; CHECK-LABEL: add_v2i64:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT:    vle64.v v8, (a0)
; CHECK-NEXT:    vle64.v v9, (a1)
; CHECK-NEXT:    vadd.vv v8, v8, v9
; CHECK-NEXT:    vse64.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <2 x i64>, ptr %x
  %b = load <2 x i64>, ptr %y
  %c = add <2 x i64> %a, %b
  store <2 x i64> %c, ptr %x
  ret void
}

define void @sub_v16i8(ptr %x, ptr %y) {
; CHECK-LABEL: sub_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vle8.v v9, (a1)
; CHECK-NEXT:    vsub.vv v8, v8, v9
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = load <16 x i8>, ptr %y
  %c = sub <16 x i8> %a, %b
  store <16 x i8> %c, ptr %x
  ret void
}

define void @sub_v8i16(ptr %x, ptr %y) {
; CHECK-LABEL: sub_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vle16.v v9, (a1)
; CHECK-NEXT:    vsub.vv v8, v8, v9
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = load <8 x i16>, ptr %y
  %c = sub <8 x i16> %a, %b
  store <8 x i16> %c, ptr %x
  ret void
}

define void @sub_v6i16(ptr %x, ptr %y) {
; CHECK-LABEL: sub_v6i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vle16.v v9, (a1)
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vsub.vv v8, v8, v9
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <6 x i16>, ptr %x
  %b = load <6 x i16>, ptr %y
  %c = sub <6 x i16> %a, %b
  store <6 x i16> %c, ptr %x
  ret void
}

define void @sub_v4i32(ptr %x, ptr %y) {
; CHECK-LABEL: sub_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vle32.v v9, (a1)
; CHECK-NEXT:    vsub.vv v8, v8, v9
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = load <4 x i32>, ptr %y
  %c = sub <4 x i32> %a, %b
  store <4 x i32> %c, ptr %x
  ret void
}

define void @sub_v2i64(ptr %x, ptr %y) {
; CHECK-LABEL: sub_v2i64:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT:    vle64.v v8, (a0)
; CHECK-NEXT:    vle64.v v9, (a1)
; CHECK-NEXT:    vsub.vv v8, v8, v9
; CHECK-NEXT:    vse64.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <2 x i64>, ptr %x
  %b = load <2 x i64>, ptr %y
  %c = sub <2 x i64> %a, %b
  store <2 x i64> %c, ptr %x
  ret void
}

define void @mul_v16i8(ptr %x, ptr %y) {
; CHECK-LABEL: mul_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vle8.v v9, (a1)
; CHECK-NEXT:    vmul.vv v8, v8, v9
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = load <16 x i8>, ptr %y
  %c = mul <16 x i8> %a, %b
  store <16 x i8> %c, ptr %x
  ret void
}

define void @mul_v8i16(ptr %x, ptr %y) {
; CHECK-LABEL: mul_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vle16.v v9, (a1)
; CHECK-NEXT:    vmul.vv v8, v8, v9
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = load <8 x i16>, ptr %y
  %c = mul <8 x i16> %a, %b
  store <8 x i16> %c, ptr %x
  ret void
}

define void @mul_v6i16(ptr %x, ptr %y) {
; CHECK-LABEL: mul_v6i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vle16.v v9, (a1)
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vmul.vv v8, v8, v9
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <6 x i16>, ptr %x
  %b = load <6 x i16>, ptr %y
  %c = mul <6 x i16> %a, %b
  store <6 x i16> %c, ptr %x
  ret void
}

define void @mul_v4i32(ptr %x, ptr %y) {
; CHECK-LABEL: mul_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vle32.v v9, (a1)
; CHECK-NEXT:    vmul.vv v8, v8, v9
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = load <4 x i32>, ptr %y
  %c = mul <4 x i32> %a, %b
  store <4 x i32> %c, ptr %x
  ret void
}

define void @mul_v2i64(ptr %x, ptr %y) {
; CHECK-LABEL: mul_v2i64:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT:    vle64.v v8, (a0)
; CHECK-NEXT:    vle64.v v9, (a1)
; CHECK-NEXT:    vmul.vv v8, v8, v9
; CHECK-NEXT:    vse64.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <2 x i64>, ptr %x
  %b = load <2 x i64>, ptr %y
  %c = mul <2 x i64> %a, %b
  store <2 x i64> %c, ptr %x
  ret void
}

define void @and_v16i8(ptr %x, ptr %y) {
; CHECK-LABEL: and_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vle8.v v9, (a1)
; CHECK-NEXT:    vand.vv v8, v8, v9
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = load <16 x i8>, ptr %y
  %c = and <16 x i8> %a, %b
  store <16 x i8> %c, ptr %x
  ret void
}

define void @and_v8i16(ptr %x, ptr %y) {
; CHECK-LABEL: and_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vle16.v v9, (a1)
; CHECK-NEXT:    vand.vv v8, v8, v9
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = load <8 x i16>, ptr %y
  %c = and <8 x i16> %a, %b
  store <8 x i16> %c, ptr %x
  ret void
}

define void @and_v6i16(ptr %x, ptr %y) {
; CHECK-LABEL: and_v6i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vle16.v v9, (a1)
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vand.vv v8, v8, v9
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <6 x i16>, ptr %x
  %b = load <6 x i16>, ptr %y
  %c = and <6 x i16> %a, %b
  store <6 x i16> %c, ptr %x
  ret void
}

define void @and_v4i32(ptr %x, ptr %y) {
; CHECK-LABEL: and_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vle32.v v9, (a1)
; CHECK-NEXT:    vand.vv v8, v8, v9
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = load <4 x i32>, ptr %y
  %c = and <4 x i32> %a, %b
  store <4 x i32> %c, ptr %x
  ret void
}

define void @and_v2i64(ptr %x, ptr %y) {
; CHECK-LABEL: and_v2i64:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT:    vle64.v v8, (a0)
; CHECK-NEXT:    vle64.v v9, (a1)
; CHECK-NEXT:    vand.vv v8, v8, v9
; CHECK-NEXT:    vse64.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <2 x i64>, ptr %x
  %b = load <2 x i64>, ptr %y
  %c = and <2 x i64> %a, %b
  store <2 x i64> %c, ptr %x
  ret void
}

define void @or_v16i8(ptr %x, ptr %y) {
; CHECK-LABEL: or_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vle8.v v9, (a1)
; CHECK-NEXT:    vor.vv v8, v8, v9
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = load <16 x i8>, ptr %y
  %c = or <16 x i8> %a, %b
  store <16 x i8> %c, ptr %x
  ret void
}

define void @or_v8i16(ptr %x, ptr %y) {
; CHECK-LABEL: or_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vle16.v v9, (a1)
; CHECK-NEXT:    vor.vv v8, v8, v9
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = load <8 x i16>, ptr %y
  %c = or <8 x i16> %a, %b
  store <8 x i16> %c, ptr %x
  ret void
}

define void @or_v6i16(ptr %x, ptr %y) {
; CHECK-LABEL: or_v6i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vle16.v v9, (a1)
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vor.vv v8, v8, v9
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <6 x i16>, ptr %x
  %b = load <6 x i16>, ptr %y
  %c = or <6 x i16> %a, %b
  store <6 x i16> %c, ptr %x
  ret void
}

define void @or_v4i32(ptr %x, ptr %y) {
; CHECK-LABEL: or_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vle32.v v9, (a1)
; CHECK-NEXT:    vor.vv v8, v8, v9
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = load <4 x i32>, ptr %y
  %c = or <4 x i32> %a, %b
  store <4 x i32> %c, ptr %x
  ret void
}

define void @or_v2i64(ptr %x, ptr %y) {
; CHECK-LABEL: or_v2i64:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT:    vle64.v v8, (a0)
; CHECK-NEXT:    vle64.v v9, (a1)
; CHECK-NEXT:    vor.vv v8, v8, v9
; CHECK-NEXT:    vse64.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <2 x i64>, ptr %x
  %b = load <2 x i64>, ptr %y
  %c = or <2 x i64> %a, %b
  store <2 x i64> %c, ptr %x
  ret void
}

define void @xor_v16i8(ptr %x, ptr %y) {
; CHECK-LABEL: xor_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vle8.v v9, (a1)
; CHECK-NEXT:    vxor.vv v8, v8, v9
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = load <16 x i8>, ptr %y
  %c = xor <16 x i8> %a, %b
  store <16 x i8> %c, ptr %x
  ret void
}

define void @xor_v8i16(ptr %x, ptr %y) {
; CHECK-LABEL: xor_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vle16.v v9, (a1)
; CHECK-NEXT:    vxor.vv v8, v8, v9
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = load <8 x i16>, ptr %y
  %c = xor <8 x i16> %a, %b
  store <8 x i16> %c, ptr %x
  ret void
}

define void @xor_v6i16(ptr %x, ptr %y) {
; CHECK-LABEL: xor_v6i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vle16.v v9, (a1)
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vxor.vv v8, v8, v9
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <6 x i16>, ptr %x
  %b = load <6 x i16>, ptr %y
  %c = xor <6 x i16> %a, %b
  store <6 x i16> %c, ptr %x
  ret void
}

define void @xor_v4i32(ptr %x, ptr %y) {
; CHECK-LABEL: xor_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vle32.v v9, (a1)
; CHECK-NEXT:    vxor.vv v8, v8, v9
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = load <4 x i32>, ptr %y
  %c = xor <4 x i32> %a, %b
  store <4 x i32> %c, ptr %x
  ret void
}

define void @xor_v2i64(ptr %x, ptr %y) {
; CHECK-LABEL: xor_v2i64:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT:    vle64.v v8, (a0)
; CHECK-NEXT:    vle64.v v9, (a1)
; CHECK-NEXT:    vxor.vv v8, v8, v9
; CHECK-NEXT:    vse64.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <2 x i64>, ptr %x
  %b = load <2 x i64>, ptr %y
  %c = xor <2 x i64> %a, %b
  store <2 x i64> %c, ptr %x
  ret void
}

define void @lshr_v16i8(ptr %x, ptr %y) {
; CHECK-LABEL: lshr_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vle8.v v9, (a1)
; CHECK-NEXT:    vsrl.vv v8, v8, v9
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = load <16 x i8>, ptr %y
  %c = lshr <16 x i8> %a, %b
  store <16 x i8> %c, ptr %x
  ret void
}

define void @lshr_v8i16(ptr %x, ptr %y) {
; CHECK-LABEL: lshr_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vle16.v v9, (a1)
; CHECK-NEXT:    vsrl.vv v8, v8, v9
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = load <8 x i16>, ptr %y
  %c = lshr <8 x i16> %a, %b
  store <8 x i16> %c, ptr %x
  ret void
}

define void @lshr_v6i16(ptr %x, ptr %y) {
; CHECK-LABEL: lshr_v6i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vle16.v v9, (a1)
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vsrl.vv v8, v8, v9
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <6 x i16>, ptr %x
  %b = load <6 x i16>, ptr %y
  %c = lshr <6 x i16> %a, %b
  store <6 x i16> %c, ptr %x
  ret void
}

define void @lshr_v4i32(ptr %x, ptr %y) {
; CHECK-LABEL: lshr_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vle32.v v9, (a1)
; CHECK-NEXT:    vsrl.vv v8, v8, v9
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = load <4 x i32>, ptr %y
  %c = lshr <4 x i32> %a, %b
  store <4 x i32> %c, ptr %x
  ret void
}

define void @lshr_v2i64(ptr %x, ptr %y) {
; CHECK-LABEL: lshr_v2i64:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT:    vle64.v v8, (a0)
; CHECK-NEXT:    vle64.v v9, (a1)
; CHECK-NEXT:    vsrl.vv v8, v8, v9
; CHECK-NEXT:    vse64.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <2 x i64>, ptr %x
  %b = load <2 x i64>, ptr %y
  %c = lshr <2 x i64> %a, %b
  store <2 x i64> %c, ptr %x
  ret void
}

define void @ashr_v16i8(ptr %x, ptr %y) {
; CHECK-LABEL: ashr_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vle8.v v9, (a1)
; CHECK-NEXT:    vsra.vv v8, v8, v9
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = load <16 x i8>, ptr %y
  %c = ashr <16 x i8> %a, %b
  store <16 x i8> %c, ptr %x
  ret void
}

define void @ashr_v8i16(ptr %x, ptr %y) {
; CHECK-LABEL: ashr_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vle16.v v9, (a1)
; CHECK-NEXT:    vsra.vv v8, v8, v9
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = load <8 x i16>, ptr %y
  %c = ashr <8 x i16> %a, %b
  store <8 x i16> %c, ptr %x
  ret void
}

define void @ashr_v6i16(ptr %x, ptr %y) {
; CHECK-LABEL: ashr_v6i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vle16.v v9, (a1)
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vsra.vv v8, v8, v9
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <6 x i16>, ptr %x
  %b = load <6 x i16>, ptr %y
  %c = ashr <6 x i16> %a, %b
  store <6 x i16> %c, ptr %x
  ret void
}

define void @ashr_v4i32(ptr %x, ptr %y) {
; CHECK-LABEL: ashr_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vle32.v v9, (a1)
; CHECK-NEXT:    vsra.vv v8, v8, v9
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = load <4 x i32>, ptr %y
  %c = ashr <4 x i32> %a, %b
  store <4 x i32> %c, ptr %x
  ret void
}

define void @ashr_v2i64(ptr %x, ptr %y) {
; CHECK-LABEL: ashr_v2i64:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT:    vle64.v v8, (a0)
; CHECK-NEXT:    vle64.v v9, (a1)
; CHECK-NEXT:    vsra.vv v8, v8, v9
; CHECK-NEXT:    vse64.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <2 x i64>, ptr %x
  %b = load <2 x i64>, ptr %y
  %c = ashr <2 x i64> %a, %b
  store <2 x i64> %c, ptr %x
  ret void
}

define void @shl_v16i8(ptr %x, ptr %y) {
; CHECK-LABEL: shl_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vle8.v v9, (a1)
; CHECK-NEXT:    vsll.vv v8, v8, v9
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = load <16 x i8>, ptr %y
  %c = shl <16 x i8> %a, %b
  store <16 x i8> %c, ptr %x
  ret void
}

define void @shl_v8i16(ptr %x, ptr %y) {
; CHECK-LABEL: shl_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vle16.v v9, (a1)
; CHECK-NEXT:    vsll.vv v8, v8, v9
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = load <8 x i16>, ptr %y
  %c = shl <8 x i16> %a, %b
  store <8 x i16> %c, ptr %x
  ret void
}

define void @shl_v6i16(ptr %x, ptr %y) {
; CHECK-LABEL: shl_v6i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vle16.v v9, (a1)
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vsll.vv v8, v8, v9
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <6 x i16>, ptr %x
  %b = load <6 x i16>, ptr %y
  %c = shl <6 x i16> %a, %b
  store <6 x i16> %c, ptr %x
  ret void
}

define void @shl_v4i32(ptr %x, ptr %y) {
; CHECK-LABEL: shl_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vle32.v v9, (a1)
; CHECK-NEXT:    vsll.vv v8, v8, v9
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = load <4 x i32>, ptr %y
  %c = shl <4 x i32> %a, %b
  store <4 x i32> %c, ptr %x
  ret void
}

define void @shl_v2i64(ptr %x, ptr %y) {
; CHECK-LABEL: shl_v2i64:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT:    vle64.v v8, (a0)
; CHECK-NEXT:    vle64.v v9, (a1)
; CHECK-NEXT:    vsll.vv v8, v8, v9
; CHECK-NEXT:    vse64.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <2 x i64>, ptr %x
  %b = load <2 x i64>, ptr %y
  %c = shl <2 x i64> %a, %b
  store <2 x i64> %c, ptr %x
  ret void
}

define void @sdiv_v16i8(ptr %x, ptr %y) {
; CHECK-LABEL: sdiv_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vle8.v v9, (a1)
; CHECK-NEXT:    vdiv.vv v8, v8, v9
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = load <16 x i8>, ptr %y
  %c = sdiv <16 x i8> %a, %b
  store <16 x i8> %c, ptr %x
  ret void
}

define void @sdiv_v8i16(ptr %x, ptr %y) {
; CHECK-LABEL: sdiv_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vle16.v v9, (a1)
; CHECK-NEXT:    vdiv.vv v8, v8, v9
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = load <8 x i16>, ptr %y
  %c = sdiv <8 x i16> %a, %b
  store <8 x i16> %c, ptr %x
  ret void
}

define void @sdiv_v6i16(ptr %x, ptr %y) {
; CHECK-LABEL: sdiv_v6i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a1)
; CHECK-NEXT:    vle16.v v9, (a0)
; CHECK-NEXT:    vsetivli zero, 2, e16, m1, ta, ma
; CHECK-NEXT:    vslidedown.vi v10, v8, 4
; CHECK-NEXT:    vslidedown.vi v11, v9, 4
; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
; CHECK-NEXT:    vdiv.vv v10, v11, v10
; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT:    vdiv.vv v8, v9, v8
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vslideup.vi v8, v10, 4
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <6 x i16>, ptr %x
  %b = load <6 x i16>, ptr %y
  %c = sdiv <6 x i16> %a, %b
  store <6 x i16> %c, ptr %x
  ret void
}

define void @sdiv_v4i32(ptr %x, ptr %y) {
; CHECK-LABEL: sdiv_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vle32.v v9, (a1)
; CHECK-NEXT:    vdiv.vv v8, v8, v9
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = load <4 x i32>, ptr %y
  %c = sdiv <4 x i32> %a, %b
  store <4 x i32> %c, ptr %x
  ret void
}

define void @sdiv_v2i64(ptr %x, ptr %y) {
; CHECK-LABEL: sdiv_v2i64:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT:    vle64.v v8, (a0)
; CHECK-NEXT:    vle64.v v9, (a1)
; CHECK-NEXT:    vdiv.vv v8, v8, v9
; CHECK-NEXT:    vse64.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <2 x i64>, ptr %x
  %b = load <2 x i64>, ptr %y
  %c = sdiv <2 x i64> %a, %b
  store <2 x i64> %c, ptr %x
  ret void
}

define void @srem_v16i8(ptr %x, ptr %y) {
; CHECK-LABEL: srem_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vle8.v v9, (a1)
; CHECK-NEXT:    vrem.vv v8, v8, v9
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = load <16 x i8>, ptr %y
  %c = srem <16 x i8> %a, %b
  store <16 x i8> %c, ptr %x
  ret void
}

define void @srem_v8i16(ptr %x, ptr %y) {
; CHECK-LABEL: srem_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vle16.v v9, (a1)
; CHECK-NEXT:    vrem.vv v8, v8, v9
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = load <8 x i16>, ptr %y
  %c = srem <8 x i16> %a, %b
  store <8 x i16> %c, ptr %x
  ret void
}

define void @srem_v6i16(ptr %x, ptr %y) {
; CHECK-LABEL: srem_v6i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a1)
; CHECK-NEXT:    vle16.v v9, (a0)
; CHECK-NEXT:    vsetivli zero, 2, e16, m1, ta, ma
; CHECK-NEXT:    vslidedown.vi v10, v8, 4
; CHECK-NEXT:    vslidedown.vi v11, v9, 4
; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
; CHECK-NEXT:    vrem.vv v10, v11, v10
; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT:    vrem.vv v8, v9, v8
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vslideup.vi v8, v10, 4
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <6 x i16>, ptr %x
  %b = load <6 x i16>, ptr %y
  %c = srem <6 x i16> %a, %b
  store <6 x i16> %c, ptr %x
  ret void
}

define void @srem_v4i32(ptr %x, ptr %y) {
; CHECK-LABEL: srem_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vle32.v v9, (a1)
; CHECK-NEXT:    vrem.vv v8, v8, v9
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = load <4 x i32>, ptr %y
  %c = srem <4 x i32> %a, %b
  store <4 x i32> %c, ptr %x
  ret void
}

define void @srem_v2i64(ptr %x, ptr %y) {
; CHECK-LABEL: srem_v2i64:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT:    vle64.v v8, (a0)
; CHECK-NEXT:    vle64.v v9, (a1)
; CHECK-NEXT:    vrem.vv v8, v8, v9
; CHECK-NEXT:    vse64.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <2 x i64>, ptr %x
  %b = load <2 x i64>, ptr %y
  %c = srem <2 x i64> %a, %b
  store <2 x i64> %c, ptr %x
  ret void
}

define void @udiv_v16i8(ptr %x, ptr %y) {
; CHECK-LABEL: udiv_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vle8.v v9, (a1)
; CHECK-NEXT:    vdivu.vv v8, v8, v9
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = load <16 x i8>, ptr %y
  %c = udiv <16 x i8> %a, %b
  store <16 x i8> %c, ptr %x
  ret void
}

define void @udiv_v8i16(ptr %x, ptr %y) {
; CHECK-LABEL: udiv_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vle16.v v9, (a1)
; CHECK-NEXT:    vdivu.vv v8, v8, v9
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = load <8 x i16>, ptr %y
  %c = udiv <8 x i16> %a, %b
  store <8 x i16> %c, ptr %x
  ret void
}

define void @udiv_v6i16(ptr %x, ptr %y) {
; CHECK-LABEL: udiv_v6i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a1)
; CHECK-NEXT:    vle16.v v9, (a0)
; CHECK-NEXT:    vsetivli zero, 2, e16, m1, ta, ma
; CHECK-NEXT:    vslidedown.vi v10, v8, 4
; CHECK-NEXT:    vslidedown.vi v11, v9, 4
; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
; CHECK-NEXT:    vdivu.vv v10, v11, v10
; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT:    vdivu.vv v8, v9, v8
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vslideup.vi v8, v10, 4
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <6 x i16>, ptr %x
  %b = load <6 x i16>, ptr %y
  %c = udiv <6 x i16> %a, %b
  store <6 x i16> %c, ptr %x
  ret void
}

define void @udiv_v4i32(ptr %x, ptr %y) {
; CHECK-LABEL: udiv_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vle32.v v9, (a1)
; CHECK-NEXT:    vdivu.vv v8, v8, v9
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = load <4 x i32>, ptr %y
  %c = udiv <4 x i32> %a, %b
  store <4 x i32> %c, ptr %x
  ret void
}

define void @udiv_v2i64(ptr %x, ptr %y) {
; CHECK-LABEL: udiv_v2i64:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT:    vle64.v v8, (a0)
; CHECK-NEXT:    vle64.v v9, (a1)
; CHECK-NEXT:    vdivu.vv v8, v8, v9
; CHECK-NEXT:    vse64.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <2 x i64>, ptr %x
  %b = load <2 x i64>, ptr %y
  %c = udiv <2 x i64> %a, %b
  store <2 x i64> %c, ptr %x
  ret void
}

define void @urem_v16i8(ptr %x, ptr %y) {
; CHECK-LABEL: urem_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vle8.v v9, (a1)
; CHECK-NEXT:    vremu.vv v8, v8, v9
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = load <16 x i8>, ptr %y
  %c = urem <16 x i8> %a, %b
  store <16 x i8> %c, ptr %x
  ret void
}

define void @urem_v8i16(ptr %x, ptr %y) {
; CHECK-LABEL: urem_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vle16.v v9, (a1)
; CHECK-NEXT:    vremu.vv v8, v8, v9
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = load <8 x i16>, ptr %y
  %c = urem <8 x i16> %a, %b
  store <8 x i16> %c, ptr %x
  ret void
}

define void @urem_v6i16(ptr %x, ptr %y) {
; CHECK-LABEL: urem_v6i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a1)
; CHECK-NEXT:    vle16.v v9, (a0)
; CHECK-NEXT:    vsetivli zero, 2, e16, m1, ta, ma
; CHECK-NEXT:    vslidedown.vi v10, v8, 4
; CHECK-NEXT:    vslidedown.vi v11, v9, 4
; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
; CHECK-NEXT:    vremu.vv v10, v11, v10
; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT:    vremu.vv v8, v9, v8
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vslideup.vi v8, v10, 4
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <6 x i16>, ptr %x
  %b = load <6 x i16>, ptr %y
  %c = urem <6 x i16> %a, %b
  store <6 x i16> %c, ptr %x
  ret void
}

define void @urem_v4i32(ptr %x, ptr %y) {
; CHECK-LABEL: urem_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vle32.v v9, (a1)
; CHECK-NEXT:    vremu.vv v8, v8, v9
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = load <4 x i32>, ptr %y
  %c = urem <4 x i32> %a, %b
  store <4 x i32> %c, ptr %x
  ret void
}

define void @urem_v2i64(ptr %x, ptr %y) {
; CHECK-LABEL: urem_v2i64:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT:    vle64.v v8, (a0)
; CHECK-NEXT:    vle64.v v9, (a1)
; CHECK-NEXT:    vremu.vv v8, v8, v9
; CHECK-NEXT:    vse64.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <2 x i64>, ptr %x
  %b = load <2 x i64>, ptr %y
  %c = urem <2 x i64> %a, %b
  store <2 x i64> %c, ptr %x
  ret void
}

define void @mulhu_v16i8(ptr %x) {
; CHECK-LABEL: mulhu_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    lui a1, 3
; CHECK-NEXT:    addi a1, a1, -2044
; CHECK-NEXT:    vmv.s.x v0, a1
; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
; CHECK-NEXT:    vmv.v.i v9, 0
; CHECK-NEXT:    li a1, -128
; CHECK-NEXT:    vmerge.vxm v10, v9, a1, v0
; CHECK-NEXT:    lui a1, 1
; CHECK-NEXT:    addi a2, a1, 32
; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT:    vmv.s.x v0, a2
; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
; CHECK-NEXT:    lui a2, %hi(.LCPI65_0)
; CHECK-NEXT:    addi a2, a2, %lo(.LCPI65_0)
; CHECK-NEXT:    vle8.v v11, (a2)
; CHECK-NEXT:    vmerge.vim v9, v9, 1, v0
; CHECK-NEXT:    vsrl.vv v9, v8, v9
; CHECK-NEXT:    vmulhu.vv v9, v9, v11
; CHECK-NEXT:    vsub.vv v8, v8, v9
; CHECK-NEXT:    vmulhu.vv v8, v8, v10
; CHECK-NEXT:    vadd.vv v8, v8, v9
; CHECK-NEXT:    li a2, 513
; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT:    vmv.s.x v0, a2
; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
; CHECK-NEXT:    vmv.v.i v9, 4
; CHECK-NEXT:    vmerge.vim v9, v9, 1, v0
; CHECK-NEXT:    addi a1, a1, 78
; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT:    vmv.s.x v0, a1
; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
; CHECK-NEXT:    vmerge.vim v9, v9, 3, v0
; CHECK-NEXT:    lui a1, 8
; CHECK-NEXT:    addi a1, a1, 304
; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT:    vmv.s.x v0, a1
; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
; CHECK-NEXT:    vmerge.vim v9, v9, 2, v0
; CHECK-NEXT:    vsrl.vv v8, v8, v9
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = udiv <16 x i8> %a, <i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25>
  store <16 x i8> %b, ptr %x
  ret void
}

define void @mulhu_v8i16(ptr %x) {
; CHECK-LABEL: mulhu_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vmv.v.i v9, 0
; CHECK-NEXT:    lui a1, 1048568
; CHECK-NEXT:    vsetvli zero, zero, e16, m1, tu, ma
; CHECK-NEXT:    vmv.v.i v10, 0
; CHECK-NEXT:    vmv.s.x v10, a1
; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
; CHECK-NEXT:    vmv.v.i v11, 1
; CHECK-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
; CHECK-NEXT:    vslideup.vi v9, v11, 6
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    lui a1, %hi(.LCPI66_0)
; CHECK-NEXT:    addi a1, a1, %lo(.LCPI66_0)
; CHECK-NEXT:    vle16.v v12, (a1)
; CHECK-NEXT:    vsrl.vv v9, v8, v9
; CHECK-NEXT:    vmulhu.vv v9, v9, v12
; CHECK-NEXT:    vsub.vv v8, v8, v9
; CHECK-NEXT:    vmulhu.vv v8, v8, v10
; CHECK-NEXT:    vadd.vv v8, v8, v9
; CHECK-NEXT:    li a1, 33
; CHECK-NEXT:    vmv.s.x v0, a1
; CHECK-NEXT:    vmv.v.i v9, 3
; CHECK-NEXT:    vmerge.vim v9, v9, 2, v0
; CHECK-NEXT:    vsetivli zero, 7, e16, m1, tu, ma
; CHECK-NEXT:    vslideup.vi v9, v11, 6
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vsrl.vv v8, v8, v9
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = udiv <8 x i16> %a, <i16 7, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
  store <8 x i16> %b, ptr %x
  ret void
}

define void @mulhu_v6i16(ptr %x) {
; CHECK-LABEL: mulhu_v6i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
; CHECK-NEXT:    vid.v v9
; CHECK-NEXT:    vadd.vi v9, v9, 12
; CHECK-NEXT:    vsetivli zero, 2, e16, m1, ta, ma
; CHECK-NEXT:    vslidedown.vi v10, v8, 4
; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
; CHECK-NEXT:    vdivu.vv v9, v10, v9
; CHECK-NEXT:    lui a1, 45217
; CHECK-NEXT:    addi a1, a1, -1785
; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
; CHECK-NEXT:    vmv.s.x v10, a1
; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT:    vsext.vf2 v11, v10
; CHECK-NEXT:    vdivu.vv v8, v8, v11
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vslideup.vi v8, v9, 4
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <6 x i16>, ptr %x
  %b = udiv <6 x i16> %a, <i16 7, i16 9, i16 10, i16 11, i16 12, i16 13>
  store <6 x i16> %b, ptr %x
  ret void
}

define void @mulhu_v4i32(ptr %x) {
; CHECK-LABEL: mulhu_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    lui a1, 524288
; CHECK-NEXT:    vmv.s.x v9, a1
; CHECK-NEXT:    vmv.v.i v10, 0
; CHECK-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
; CHECK-NEXT:    vslideup.vi v10, v9, 2
; CHECK-NEXT:    lui a1, %hi(.LCPI68_0)
; CHECK-NEXT:    addi a1, a1, %lo(.LCPI68_0)
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v9, (a1)
; CHECK-NEXT:    vmulhu.vv v9, v8, v9
; CHECK-NEXT:    vsub.vv v8, v8, v9
; CHECK-NEXT:    vmulhu.vv v8, v8, v10
; CHECK-NEXT:    vadd.vv v8, v8, v9
; CHECK-NEXT:    lui a1, 4128
; CHECK-NEXT:    addi a1, a1, 514
; CHECK-NEXT:    vmv.s.x v9, a1
; CHECK-NEXT:    vsext.vf4 v10, v9
; CHECK-NEXT:    vsrl.vv v8, v8, v10
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = udiv <4 x i32> %a, <i32 5, i32 6, i32 7, i32 9>
  store <4 x i32> %b, ptr %x
  ret void
}

define void @mulhu_v2i64(ptr %x) {
; RV32-LABEL: mulhu_v2i64:
; RV32:       # %bb.0:
; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT:    vle64.v v8, (a0)
; RV32-NEXT:    lui a1, %hi(.LCPI69_0)
; RV32-NEXT:    addi a1, a1, %lo(.LCPI69_0)
; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT:    vle32.v v9, (a1)
; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT:    vmulhu.vv v8, v8, v9
; RV32-NEXT:    lui a1, 32
; RV32-NEXT:    addi a1, a1, 1
; RV32-NEXT:    vmv.s.x v9, a1
; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT:    vsext.vf4 v10, v9
; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT:    vsrl.vv v8, v8, v10
; RV32-NEXT:    vse64.v v8, (a0)
; RV32-NEXT:    ret
;
; RV64-LABEL: mulhu_v2i64:
; RV64:       # %bb.0:
; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; RV64-NEXT:    vle64.v v8, (a0)
; RV64-NEXT:    lui a1, 838861
; RV64-NEXT:    addiw a1, a1, -819
; RV64-NEXT:    slli a2, a1, 32
; RV64-NEXT:    add a1, a1, a2
; RV64-NEXT:    vmv.v.x v9, a1
; RV64-NEXT:    lui a1, 699051
; RV64-NEXT:    addiw a1, a1, -1365
; RV64-NEXT:    slli a2, a1, 32
; RV64-NEXT:    add a1, a1, a2
; RV64-NEXT:    vsetvli zero, zero, e64, m1, tu, ma
; RV64-NEXT:    vmv.s.x v9, a1
; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
; RV64-NEXT:    vmulhu.vv v8, v8, v9
; RV64-NEXT:    vid.v v9
; RV64-NEXT:    vadd.vi v9, v9, 1
; RV64-NEXT:    vsrl.vv v8, v8, v9
; RV64-NEXT:    vse64.v v8, (a0)
; RV64-NEXT:    ret
  %a = load <2 x i64>, ptr %x
  %b = udiv <2 x i64> %a, <i64 3, i64 5>
  store <2 x i64> %b, ptr %x
  ret void
}

define void @mulhs_v16i8(ptr %x) {
; CHECK-LABEL: mulhs_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    li a1, -123
; CHECK-NEXT:    vmv.v.x v9, a1
; CHECK-NEXT:    lui a1, 5
; CHECK-NEXT:    addi a1, a1, -1452
; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT:    vmv.s.x v0, a1
; CHECK-NEXT:    li a1, 57
; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
; CHECK-NEXT:    vmerge.vxm v9, v9, a1, v0
; CHECK-NEXT:    vmulhu.vv v8, v8, v9
; CHECK-NEXT:    vmv.v.i v9, 7
; CHECK-NEXT:    vmerge.vim v9, v9, 1, v0
; CHECK-NEXT:    vsrl.vv v8, v8, v9
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = udiv <16 x i8> %a, <i8 -9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 -9, i8 9, i8 -9>
  store <16 x i8> %b, ptr %x
  ret void
}

define void @mulhs_v8i16(ptr %x) {
; CHECK-LABEL: mulhs_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    lui a1, 5
; CHECK-NEXT:    addi a1, a1, -1755
; CHECK-NEXT:    vmv.v.x v9, a1
; CHECK-NEXT:    li a1, 105
; CHECK-NEXT:    vmv.s.x v0, a1
; CHECK-NEXT:    lui a1, 1048571
; CHECK-NEXT:    addi a1, a1, 1755
; CHECK-NEXT:    vmerge.vxm v9, v9, a1, v0
; CHECK-NEXT:    vmulh.vv v8, v8, v9
; CHECK-NEXT:    vsra.vi v8, v8, 1
; CHECK-NEXT:    vsrl.vi v9, v8, 15
; CHECK-NEXT:    vadd.vv v8, v8, v9
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = sdiv <8 x i16> %a, <i16 -7, i16 7, i16 7, i16 -7, i16 7, i16 -7, i16 -7, i16 7>
  store <8 x i16> %b, ptr %x
  ret void
}

define void @mulhs_v6i16(ptr %x) {
; CHECK-LABEL: mulhs_v6i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
; CHECK-NEXT:    vmv.v.i v9, 7
; CHECK-NEXT:    vid.v v10
; CHECK-NEXT:    li a1, -14
; CHECK-NEXT:    vmadd.vx v10, a1, v9
; CHECK-NEXT:    vsetivli zero, 2, e16, m1, ta, ma
; CHECK-NEXT:    vslidedown.vi v9, v8, 4
; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
; CHECK-NEXT:    vdiv.vv v9, v9, v10
; CHECK-NEXT:    lui a1, 1020016
; CHECK-NEXT:    addi a1, a1, 2041
; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
; CHECK-NEXT:    vmv.s.x v10, a1
; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT:    vsext.vf2 v11, v10
; CHECK-NEXT:    vdiv.vv v8, v8, v11
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vslideup.vi v8, v9, 4
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <6 x i16>, ptr %x
  %b = sdiv <6 x i16> %a, <i16 -7, i16 7, i16 7, i16 -7, i16 7, i16 -7>
  store <6 x i16> %b, ptr %x
  ret void
}

define void @mulhs_v4i32(ptr %x) {
; RV32-LABEL: mulhs_v4i32:
; RV32:       # %bb.0:
; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT:    vle32.v v8, (a0)
; RV32-NEXT:    lui a1, 419430
; RV32-NEXT:    addi a1, a1, 1639
; RV32-NEXT:    vmv.v.x v9, a1
; RV32-NEXT:    vmv.v.i v0, 5
; RV32-NEXT:    lui a1, 629146
; RV32-NEXT:    addi a1, a1, -1639
; RV32-NEXT:    vmerge.vxm v9, v9, a1, v0
; RV32-NEXT:    vmulh.vv v8, v8, v9
; RV32-NEXT:    vsrl.vi v9, v8, 31
; RV32-NEXT:    vsra.vi v8, v8, 1
; RV32-NEXT:    vadd.vv v8, v8, v9
; RV32-NEXT:    vse32.v v8, (a0)
; RV32-NEXT:    ret
;
; RV64-LABEL: mulhs_v4i32:
; RV64:       # %bb.0:
; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT:    vle32.v v8, (a0)
; RV64-NEXT:    lui a1, %hi(.LCPI73_0)
; RV64-NEXT:    addi a1, a1, %lo(.LCPI73_0)
; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; RV64-NEXT:    vlse64.v v9, (a1), zero
; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT:    vmulh.vv v8, v8, v9
; RV64-NEXT:    vsra.vi v8, v8, 1
; RV64-NEXT:    vsrl.vi v9, v8, 31
; RV64-NEXT:    vadd.vv v8, v8, v9
; RV64-NEXT:    vse32.v v8, (a0)
; RV64-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = sdiv <4 x i32> %a, <i32 -5, i32 5, i32 -5, i32 5>
  store <4 x i32> %b, ptr %x
  ret void
}

define void @mulhs_v2i64(ptr %x) {
; RV32-LABEL: mulhs_v2i64:
; RV32:       # %bb.0:
; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT:    vle64.v v8, (a0)
; RV32-NEXT:    lui a1, 349525
; RV32-NEXT:    addi a2, a1, 1365
; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT:    vmv.v.x v9, a2
; RV32-NEXT:    addi a1, a1, 1366
; RV32-NEXT:    vsetvli zero, zero, e32, m1, tu, ma
; RV32-NEXT:    vmv.s.x v9, a1
; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT:    vmulh.vv v9, v8, v9
; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT:    vid.v v10
; RV32-NEXT:    vsrl.vi v10, v10, 1
; RV32-NEXT:    vrsub.vi v10, v10, 0
; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT:    vmadd.vv v10, v8, v9
; RV32-NEXT:    li a1, 63
; RV32-NEXT:    vsrl.vx v8, v10, a1
; RV32-NEXT:    lui a1, 16
; RV32-NEXT:    vmv.s.x v9, a1
; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT:    vsext.vf4 v11, v9
; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT:    vsra.vv v9, v10, v11
; RV32-NEXT:    vadd.vv v8, v9, v8
; RV32-NEXT:    vse64.v v8, (a0)
; RV32-NEXT:    ret
;
; RV64-LABEL: mulhs_v2i64:
; RV64:       # %bb.0:
; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; RV64-NEXT:    lui a1, 349525
; RV64-NEXT:    addiw a1, a1, 1365
; RV64-NEXT:    slli a2, a1, 32
; RV64-NEXT:    add a1, a1, a2
; RV64-NEXT:    lui a2, %hi(.LCPI74_0)
; RV64-NEXT:    ld a2, %lo(.LCPI74_0)(a2)
; RV64-NEXT:    vle64.v v8, (a0)
; RV64-NEXT:    vmv.v.x v9, a1
; RV64-NEXT:    vsetvli zero, zero, e64, m1, tu, ma
; RV64-NEXT:    vmv.s.x v9, a2
; RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
; RV64-NEXT:    vmulh.vv v9, v8, v9
; RV64-NEXT:    vid.v v10
; RV64-NEXT:    vrsub.vi v11, v10, 0
; RV64-NEXT:    vmadd.vv v11, v8, v9
; RV64-NEXT:    li a1, 63
; RV64-NEXT:    vsrl.vx v8, v11, a1
; RV64-NEXT:    vsra.vv v9, v11, v10
; RV64-NEXT:    vadd.vv v8, v9, v8
; RV64-NEXT:    vse64.v v8, (a0)
; RV64-NEXT:    ret
  %a = load <2 x i64>, ptr %x
  %b = sdiv <2 x i64> %a, <i64 3, i64 -3>
  store <2 x i64> %b, ptr %x
  ret void
}

define void @smin_v16i8(ptr %x, ptr %y) {
; CHECK-LABEL: smin_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vle8.v v9, (a1)
; CHECK-NEXT:    vmin.vv v8, v8, v9
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = load <16 x i8>, ptr %y
  %cc = icmp slt <16 x i8> %a, %b
  %c = select <16 x i1> %cc, <16 x i8> %a, <16 x i8> %b
  store <16 x i8> %c, ptr %x
  ret void
}

define void @smin_v8i16(ptr %x, ptr %y) {
; CHECK-LABEL: smin_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vle16.v v9, (a1)
; CHECK-NEXT:    vmin.vv v8, v8, v9
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = load <8 x i16>, ptr %y
  %cc = icmp slt <8 x i16> %a, %b
  %c = select <8 x i1> %cc, <8 x i16> %a, <8 x i16> %b
  store <8 x i16> %c, ptr %x
  ret void
}

define void @smin_v6i16(ptr %x, ptr %y) {
; CHECK-LABEL: smin_v6i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vle16.v v9, (a1)
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vmin.vv v8, v8, v9
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <6 x i16>, ptr %x
  %b = load <6 x i16>, ptr %y
  %cc = icmp slt <6 x i16> %a, %b
  %c = select <6 x i1> %cc, <6 x i16> %a, <6 x i16> %b
  store <6 x i16> %c, ptr %x
  ret void
}

define void @smin_v4i32(ptr %x, ptr %y) {
; CHECK-LABEL: smin_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vle32.v v9, (a1)
; CHECK-NEXT:    vmin.vv v8, v8, v9
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = load <4 x i32>, ptr %y
  %cc = icmp slt <4 x i32> %a, %b
  %c = select <4 x i1> %cc, <4 x i32> %a, <4 x i32> %b
  store <4 x i32> %c, ptr %x
  ret void
}

define void @smin_v2i64(ptr %x, ptr %y) {
; CHECK-LABEL: smin_v2i64:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT:    vle64.v v8, (a0)
; CHECK-NEXT:    vle64.v v9, (a1)
; CHECK-NEXT:    vmin.vv v8, v8, v9
; CHECK-NEXT:    vse64.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <2 x i64>, ptr %x
  %b = load <2 x i64>, ptr %y
  %cc = icmp slt <2 x i64> %a, %b
  %c = select <2 x i1> %cc, <2 x i64> %a, <2 x i64> %b
  store <2 x i64> %c, ptr %x
  ret void
}

define void @smin_vx_v16i8(ptr %x, i8 %y) {
; CHECK-LABEL: smin_vx_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vmin.vx v8, v8, a1
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = insertelement <16 x i8> poison, i8 %y, i32 0
  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
  %d = call <16 x i8> @llvm.smin.v16i8(<16 x i8> %a, <16 x i8> %c)
  store <16 x i8> %d, ptr %x
  ret void
}
declare <16 x i8> @llvm.smin.v16i8(<16 x i8>, <16 x i8>)

define void @smin_vx_v8i16(ptr %x, i16 %y) {
; CHECK-LABEL: smin_vx_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vmin.vx v8, v8, a1
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = insertelement <8 x i16> poison, i16 %y, i32 0
  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
  %d = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %a, <8 x i16> %c)
  store <8 x i16> %d, ptr %x
  ret void
}
declare <8 x i16> @llvm.smin.v8i16(<8 x i16>, <8 x i16>)

define void @smin_vx_v6i16(ptr %x, i16 %y) {
; CHECK-LABEL: smin_vx_v6i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vmin.vx v8, v8, a1
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <6 x i16>, ptr %x
  %b = insertelement <6 x i16> poison, i16 %y, i32 0
  %c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer
  %d = call <6 x i16> @llvm.smin.v6i16(<6 x i16> %a, <6 x i16> %c)
  store <6 x i16> %d, ptr %x
  ret void
}
declare <6 x i16> @llvm.smin.v6i16(<6 x i16>, <6 x i16>)

define void @smin_vx_v4i32(ptr %x, i32 %y) {
; CHECK-LABEL: smin_vx_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vmin.vx v8, v8, a1
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = insertelement <4 x i32> poison, i32 %y, i32 0
  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
  %d = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %a, <4 x i32> %c)
  store <4 x i32> %d, ptr %x
  ret void
}
declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>)

define void @smin_xv_v16i8(ptr %x, i8 %y) {
; CHECK-LABEL: smin_xv_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vmin.vx v8, v8, a1
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = insertelement <16 x i8> poison, i8 %y, i32 0
  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
  %d = call <16 x i8> @llvm.smin.v16i8(<16 x i8> %c, <16 x i8> %a)
  store <16 x i8> %d, ptr %x
  ret void
}

define void @smin_xv_v8i16(ptr %x, i16 %y) {
; CHECK-LABEL: smin_xv_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vmin.vx v8, v8, a1
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = insertelement <8 x i16> poison, i16 %y, i32 0
  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
  %d = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %c, <8 x i16> %a)
  store <8 x i16> %d, ptr %x
  ret void
}

define void @smin_xv_v6i16(ptr %x, i16 %y) {
; CHECK-LABEL: smin_xv_v6i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vmin.vx v8, v8, a1
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <6 x i16>, ptr %x
  %b = insertelement <6 x i16> poison, i16 %y, i32 0
  %c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer
  %d = call <6 x i16> @llvm.smin.v6i16(<6 x i16> %c, <6 x i16> %a)
  store <6 x i16> %d, ptr %x
  ret void
}

define void @smin_xv_v4i32(ptr %x, i32 %y) {
; CHECK-LABEL: smin_xv_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vmin.vx v8, v8, a1
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = insertelement <4 x i32> poison, i32 %y, i32 0
  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
  %d = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %c, <4 x i32> %a)
  store <4 x i32> %d, ptr %x
  ret void
}

define void @smax_v16i8(ptr %x, ptr %y) {
; CHECK-LABEL: smax_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vle8.v v9, (a1)
; CHECK-NEXT:    vmax.vv v8, v8, v9
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = load <16 x i8>, ptr %y
  %cc = icmp sgt <16 x i8> %a, %b
  %c = select <16 x i1> %cc, <16 x i8> %a, <16 x i8> %b
  store <16 x i8> %c, ptr %x
  ret void
}

define void @smax_v8i16(ptr %x, ptr %y) {
; CHECK-LABEL: smax_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vle16.v v9, (a1)
; CHECK-NEXT:    vmax.vv v8, v8, v9
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = load <8 x i16>, ptr %y
  %cc = icmp sgt <8 x i16> %a, %b
  %c = select <8 x i1> %cc, <8 x i16> %a, <8 x i16> %b
  store <8 x i16> %c, ptr %x
  ret void
}

define void @smax_v6i16(ptr %x, ptr %y) {
; CHECK-LABEL: smax_v6i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vle16.v v9, (a1)
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vmax.vv v8, v8, v9
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <6 x i16>, ptr %x
  %b = load <6 x i16>, ptr %y
  %cc = icmp sgt <6 x i16> %a, %b
  %c = select <6 x i1> %cc, <6 x i16> %a, <6 x i16> %b
  store <6 x i16> %c, ptr %x
  ret void
}

define void @smax_v4i32(ptr %x, ptr %y) {
; CHECK-LABEL: smax_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vle32.v v9, (a1)
; CHECK-NEXT:    vmax.vv v8, v8, v9
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = load <4 x i32>, ptr %y
  %cc = icmp sgt <4 x i32> %a, %b
  %c = select <4 x i1> %cc, <4 x i32> %a, <4 x i32> %b
  store <4 x i32> %c, ptr %x
  ret void
}

define void @smax_v2i64(ptr %x, ptr %y) {
; CHECK-LABEL: smax_v2i64:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT:    vle64.v v8, (a0)
; CHECK-NEXT:    vle64.v v9, (a1)
; CHECK-NEXT:    vmax.vv v8, v8, v9
; CHECK-NEXT:    vse64.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <2 x i64>, ptr %x
  %b = load <2 x i64>, ptr %y
  %cc = icmp sgt <2 x i64> %a, %b
  %c = select <2 x i1> %cc, <2 x i64> %a, <2 x i64> %b
  store <2 x i64> %c, ptr %x
  ret void
}

define void @smax_vx_v16i8(ptr %x, i8 %y) {
; CHECK-LABEL: smax_vx_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vmax.vx v8, v8, a1
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = insertelement <16 x i8> poison, i8 %y, i32 0
  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
  %d = call <16 x i8> @llvm.smax.v16i8(<16 x i8> %a, <16 x i8> %c)
  store <16 x i8> %d, ptr %x
  ret void
}
declare <16 x i8> @llvm.smax.v16i8(<16 x i8>, <16 x i8>)

define void @smax_vx_v8i16(ptr %x, i16 %y) {
; CHECK-LABEL: smax_vx_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vmax.vx v8, v8, a1
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = insertelement <8 x i16> poison, i16 %y, i32 0
  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
  %d = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %a, <8 x i16> %c)
  store <8 x i16> %d, ptr %x
  ret void
}
declare <8 x i16> @llvm.smax.v8i16(<8 x i16>, <8 x i16>)

define void @smax_vx_v6i16(ptr %x, i16 %y) {
; CHECK-LABEL: smax_vx_v6i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vmax.vx v8, v8, a1
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <6 x i16>, ptr %x
  %b = insertelement <6 x i16> poison, i16 %y, i32 0
  %c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer
  %d = call <6 x i16> @llvm.smax.v6i16(<6 x i16> %a, <6 x i16> %c)
  store <6 x i16> %d, ptr %x
  ret void
}
declare <6 x i16> @llvm.smax.v6i16(<6 x i16>, <6 x i16>)

define void @smax_vx_v4i32(ptr %x, i32 %y) {
; CHECK-LABEL: smax_vx_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vmax.vx v8, v8, a1
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = insertelement <4 x i32> poison, i32 %y, i32 0
  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
  %d = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %a, <4 x i32> %c)
  store <4 x i32> %d, ptr %x
  ret void
}
declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>)

define void @smax_xv_v16i8(ptr %x, i8 %y) {
; CHECK-LABEL: smax_xv_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vmax.vx v8, v8, a1
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = insertelement <16 x i8> poison, i8 %y, i32 0
  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
  %d = call <16 x i8> @llvm.smax.v16i8(<16 x i8> %c, <16 x i8> %a)
  store <16 x i8> %d, ptr %x
  ret void
}

define void @smax_xv_v8i16(ptr %x, i16 %y) {
; CHECK-LABEL: smax_xv_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vmax.vx v8, v8, a1
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = insertelement <8 x i16> poison, i16 %y, i32 0
  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
  %d = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %c, <8 x i16> %a)
  store <8 x i16> %d, ptr %x
  ret void
}

define void @smax_xv_v6i16(ptr %x, i16 %y) {
; CHECK-LABEL: smax_xv_v6i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vmax.vx v8, v8, a1
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <6 x i16>, ptr %x
  %b = insertelement <6 x i16> poison, i16 %y, i32 0
  %c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer
  %d = call <6 x i16> @llvm.smax.v6i16(<6 x i16> %c, <6 x i16> %a)
  store <6 x i16> %d, ptr %x
  ret void
}

define void @smax_xv_v4i32(ptr %x, i32 %y) {
; CHECK-LABEL: smax_xv_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vmax.vx v8, v8, a1
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = insertelement <4 x i32> poison, i32 %y, i32 0
  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
  %d = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %c, <4 x i32> %a)
  store <4 x i32> %d, ptr %x
  ret void
}

define void @umin_v16i8(ptr %x, ptr %y) {
; CHECK-LABEL: umin_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vle8.v v9, (a1)
; CHECK-NEXT:    vminu.vv v8, v8, v9
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = load <16 x i8>, ptr %y
  %cc = icmp ult <16 x i8> %a, %b
  %c = select <16 x i1> %cc, <16 x i8> %a, <16 x i8> %b
  store <16 x i8> %c, ptr %x
  ret void
}

define void @umin_v8i16(ptr %x, ptr %y) {
; CHECK-LABEL: umin_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vle16.v v9, (a1)
; CHECK-NEXT:    vminu.vv v8, v8, v9
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = load <8 x i16>, ptr %y
  %cc = icmp ult <8 x i16> %a, %b
  %c = select <8 x i1> %cc, <8 x i16> %a, <8 x i16> %b
  store <8 x i16> %c, ptr %x
  ret void
}

define void @umin_v6i16(ptr %x, ptr %y) {
; CHECK-LABEL: umin_v6i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vle16.v v9, (a1)
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vminu.vv v8, v8, v9
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <6 x i16>, ptr %x
  %b = load <6 x i16>, ptr %y
  %cc = icmp ult <6 x i16> %a, %b
  %c = select <6 x i1> %cc, <6 x i16> %a, <6 x i16> %b
  store <6 x i16> %c, ptr %x
  ret void
}

define void @umin_v4i32(ptr %x, ptr %y) {
; CHECK-LABEL: umin_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vle32.v v9, (a1)
; CHECK-NEXT:    vminu.vv v8, v8, v9
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = load <4 x i32>, ptr %y
  %cc = icmp ult <4 x i32> %a, %b
  %c = select <4 x i1> %cc, <4 x i32> %a, <4 x i32> %b
  store <4 x i32> %c, ptr %x
  ret void
}

define void @umin_v2i64(ptr %x, ptr %y) {
; CHECK-LABEL: umin_v2i64:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT:    vle64.v v8, (a0)
; CHECK-NEXT:    vle64.v v9, (a1)
; CHECK-NEXT:    vminu.vv v8, v8, v9
; CHECK-NEXT:    vse64.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <2 x i64>, ptr %x
  %b = load <2 x i64>, ptr %y
  %cc = icmp ult <2 x i64> %a, %b
  %c = select <2 x i1> %cc, <2 x i64> %a, <2 x i64> %b
  store <2 x i64> %c, ptr %x
  ret void
}

define void @umin_vx_v16i8(ptr %x, i8 %y) {
; CHECK-LABEL: umin_vx_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vminu.vx v8, v8, a1
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = insertelement <16 x i8> poison, i8 %y, i32 0
  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
  %d = call <16 x i8> @llvm.umin.v16i8(<16 x i8> %a, <16 x i8> %c)
  store <16 x i8> %d, ptr %x
  ret void
}
declare <16 x i8> @llvm.umin.v16i8(<16 x i8>, <16 x i8>)

define void @umin_vx_v8i16(ptr %x, i16 %y) {
; CHECK-LABEL: umin_vx_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vminu.vx v8, v8, a1
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = insertelement <8 x i16> poison, i16 %y, i32 0
  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
  %d = call <8 x i16> @llvm.umin.v8i16(<8 x i16> %a, <8 x i16> %c)
  store <8 x i16> %d, ptr %x
  ret void
}
declare <8 x i16> @llvm.umin.v8i16(<8 x i16>, <8 x i16>)

define void @umin_vx_v6i16(ptr %x, i16 %y) {
; CHECK-LABEL: umin_vx_v6i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vminu.vx v8, v8, a1
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <6 x i16>, ptr %x
  %b = insertelement <6 x i16> poison, i16 %y, i32 0
  %c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer
  %d = call <6 x i16> @llvm.umin.v6i16(<6 x i16> %a, <6 x i16> %c)
  store <6 x i16> %d, ptr %x
  ret void
}
declare <6 x i16> @llvm.umin.v6i16(<6 x i16>, <6 x i16>)

define void @umin_vx_v4i32(ptr %x, i32 %y) {
; CHECK-LABEL: umin_vx_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vminu.vx v8, v8, a1
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = insertelement <4 x i32> poison, i32 %y, i32 0
  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
  %d = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %a, <4 x i32> %c)
  store <4 x i32> %d, ptr %x
  ret void
}
declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>)

define void @umin_xv_v16i8(ptr %x, i8 %y) {
; CHECK-LABEL: umin_xv_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vminu.vx v8, v8, a1
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = insertelement <16 x i8> poison, i8 %y, i32 0
  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
  %d = call <16 x i8> @llvm.umin.v16i8(<16 x i8> %c, <16 x i8> %a)
  store <16 x i8> %d, ptr %x
  ret void
}

define void @umin_xv_v8i16(ptr %x, i16 %y) {
; CHECK-LABEL: umin_xv_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vminu.vx v8, v8, a1
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = insertelement <8 x i16> poison, i16 %y, i32 0
  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
  %d = call <8 x i16> @llvm.umin.v8i16(<8 x i16> %c, <8 x i16> %a)
  store <8 x i16> %d, ptr %x
  ret void
}

define void @umin_xv_v6i16(ptr %x, i16 %y) {
; CHECK-LABEL: umin_xv_v6i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vminu.vx v8, v8, a1
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <6 x i16>, ptr %x
  %b = insertelement <6 x i16> poison, i16 %y, i32 0
  %c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer
  %d = call <6 x i16> @llvm.umin.v6i16(<6 x i16> %c, <6 x i16> %a)
  store <6 x i16> %d, ptr %x
  ret void
}

define void @umin_xv_v4i32(ptr %x, i32 %y) {
; CHECK-LABEL: umin_xv_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vminu.vx v8, v8, a1
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = insertelement <4 x i32> poison, i32 %y, i32 0
  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
  %d = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %c, <4 x i32> %a)
  store <4 x i32> %d, ptr %x
  ret void
}

define void @umax_v16i8(ptr %x, ptr %y) {
; CHECK-LABEL: umax_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vle8.v v9, (a1)
; CHECK-NEXT:    vmaxu.vv v8, v8, v9
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = load <16 x i8>, ptr %y
  %cc = icmp ugt <16 x i8> %a, %b
  %c = select <16 x i1> %cc, <16 x i8> %a, <16 x i8> %b
  store <16 x i8> %c, ptr %x
  ret void
}

define void @umax_v8i16(ptr %x, ptr %y) {
; CHECK-LABEL: umax_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vle16.v v9, (a1)
; CHECK-NEXT:    vmaxu.vv v8, v8, v9
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = load <8 x i16>, ptr %y
  %cc = icmp ugt <8 x i16> %a, %b
  %c = select <8 x i1> %cc, <8 x i16> %a, <8 x i16> %b
  store <8 x i16> %c, ptr %x
  ret void
}

define void @umax_v6i16(ptr %x, ptr %y) {
; CHECK-LABEL: umax_v6i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vle16.v v9, (a1)
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vmaxu.vv v8, v8, v9
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <6 x i16>, ptr %x
  %b = load <6 x i16>, ptr %y
  %cc = icmp ugt <6 x i16> %a, %b
  %c = select <6 x i1> %cc, <6 x i16> %a, <6 x i16> %b
  store <6 x i16> %c, ptr %x
  ret void
}

define void @umax_v4i32(ptr %x, ptr %y) {
; CHECK-LABEL: umax_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vle32.v v9, (a1)
; CHECK-NEXT:    vmaxu.vv v8, v8, v9
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = load <4 x i32>, ptr %y
  %cc = icmp ugt <4 x i32> %a, %b
  %c = select <4 x i1> %cc, <4 x i32> %a, <4 x i32> %b
  store <4 x i32> %c, ptr %x
  ret void
}

define void @umax_v2i64(ptr %x, ptr %y) {
; CHECK-LABEL: umax_v2i64:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT:    vle64.v v8, (a0)
; CHECK-NEXT:    vle64.v v9, (a1)
; CHECK-NEXT:    vmaxu.vv v8, v8, v9
; CHECK-NEXT:    vse64.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <2 x i64>, ptr %x
  %b = load <2 x i64>, ptr %y
  %cc = icmp ugt <2 x i64> %a, %b
  %c = select <2 x i1> %cc, <2 x i64> %a, <2 x i64> %b
  store <2 x i64> %c, ptr %x
  ret void
}

define void @umax_vx_v16i8(ptr %x, i8 %y) {
; CHECK-LABEL: umax_vx_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vmaxu.vx v8, v8, a1
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = insertelement <16 x i8> poison, i8 %y, i32 0
  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
  %d = call <16 x i8> @llvm.umax.v16i8(<16 x i8> %a, <16 x i8> %c)
  store <16 x i8> %d, ptr %x
  ret void
}
declare <16 x i8> @llvm.umax.v16i8(<16 x i8>, <16 x i8>)

define void @umax_vx_v8i16(ptr %x, i16 %y) {
; CHECK-LABEL: umax_vx_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vmaxu.vx v8, v8, a1
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = insertelement <8 x i16> poison, i16 %y, i32 0
  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
  %d = call <8 x i16> @llvm.umax.v8i16(<8 x i16> %a, <8 x i16> %c)
  store <8 x i16> %d, ptr %x
  ret void
}
declare <8 x i16> @llvm.umax.v8i16(<8 x i16>, <8 x i16>)

define void @umax_vx_v6i16(ptr %x, i16 %y) {
; CHECK-LABEL: umax_vx_v6i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vmaxu.vx v8, v8, a1
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <6 x i16>, ptr %x
  %b = insertelement <6 x i16> poison, i16 %y, i32 0
  %c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer
  %d = call <6 x i16> @llvm.umax.v6i16(<6 x i16> %a, <6 x i16> %c)
  store <6 x i16> %d, ptr %x
  ret void
}
declare <6 x i16> @llvm.umax.v6i16(<6 x i16>, <6 x i16>)

define void @umax_vx_v4i32(ptr %x, i32 %y) {
; CHECK-LABEL: umax_vx_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vmaxu.vx v8, v8, a1
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = insertelement <4 x i32> poison, i32 %y, i32 0
  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
  %d = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %a, <4 x i32> %c)
  store <4 x i32> %d, ptr %x
  ret void
}
declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>)

define void @umax_xv_v16i8(ptr %x, i8 %y) {
; CHECK-LABEL: umax_xv_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vmaxu.vx v8, v8, a1
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = insertelement <16 x i8> poison, i8 %y, i32 0
  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
  %d = call <16 x i8> @llvm.umax.v16i8(<16 x i8> %c, <16 x i8> %a)
  store <16 x i8> %d, ptr %x
  ret void
}

define void @umax_xv_v8i16(ptr %x, i16 %y) {
; CHECK-LABEL: umax_xv_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vmaxu.vx v8, v8, a1
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = insertelement <8 x i16> poison, i16 %y, i32 0
  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
  %d = call <8 x i16> @llvm.umax.v8i16(<8 x i16> %c, <8 x i16> %a)
  store <8 x i16> %d, ptr %x
  ret void
}

define void @umax_xv_v6i16(ptr %x, i16 %y) {
; CHECK-LABEL: umax_xv_v6i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vmaxu.vx v8, v8, a1
; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <6 x i16>, ptr %x
  %b = insertelement <6 x i16> poison, i16 %y, i32 0
  %c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer
  %d = call <6 x i16> @llvm.umax.v6i16(<6 x i16> %c, <6 x i16> %a)
  store <6 x i16> %d, ptr %x
  ret void
}

define void @umax_xv_v4i32(ptr %x, i32 %y) {
; CHECK-LABEL: umax_xv_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vmaxu.vx v8, v8, a1
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = insertelement <4 x i32> poison, i32 %y, i32 0
  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
  %d = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %c, <4 x i32> %a)
  store <4 x i32> %d, ptr %x
  ret void
}

define void @add_v32i8(ptr %x, ptr %y) {
; LMULMAX2-LABEL: add_v32i8:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    li a2, 32
; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
; LMULMAX2-NEXT:    vle8.v v8, (a0)
; LMULMAX2-NEXT:    vle8.v v10, (a1)
; LMULMAX2-NEXT:    vadd.vv v8, v8, v10
; LMULMAX2-NEXT:    vse8.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: add_v32i8:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle8.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle8.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle8.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle8.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vadd.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vadd.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse8.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse8.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: add_v32i8:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle8.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle8.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle8.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle8.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vadd.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vadd.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse8.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse8.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <32 x i8>, ptr %x
  %b = load <32 x i8>, ptr %y
  %c = add <32 x i8> %a, %b
  store <32 x i8> %c, ptr %x
  ret void
}

define void @add_v16i16(ptr %x, ptr %y) {
; LMULMAX2-LABEL: add_v16i16:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
; LMULMAX2-NEXT:    vle16.v v8, (a0)
; LMULMAX2-NEXT:    vle16.v v10, (a1)
; LMULMAX2-NEXT:    vadd.vv v8, v8, v10
; LMULMAX2-NEXT:    vse16.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: add_v16i16:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle16.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle16.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle16.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vadd.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vadd.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse16.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: add_v16i16:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle16.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle16.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle16.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vadd.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vadd.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse16.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse16.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <16 x i16>, ptr %x
  %b = load <16 x i16>, ptr %y
  %c = add <16 x i16> %a, %b
  store <16 x i16> %c, ptr %x
  ret void
}

define void @add_v8i32(ptr %x, ptr %y) {
; LMULMAX2-LABEL: add_v8i32:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
; LMULMAX2-NEXT:    vle32.v v8, (a0)
; LMULMAX2-NEXT:    vle32.v v10, (a1)
; LMULMAX2-NEXT:    vadd.vv v8, v8, v10
; LMULMAX2-NEXT:    vse32.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: add_v8i32:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle32.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle32.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle32.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle32.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vadd.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vadd.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse32.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: add_v8i32:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle32.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle32.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle32.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle32.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vadd.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vadd.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse32.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <8 x i32>, ptr %x
  %b = load <8 x i32>, ptr %y
  %c = add <8 x i32> %a, %b
  store <8 x i32> %c, ptr %x
  ret void
}

define void @add_v6i32(ptr %x, ptr %y) {
; LMULMAX2-LABEL: add_v6i32:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
; LMULMAX2-NEXT:    vle32.v v8, (a0)
; LMULMAX2-NEXT:    vle32.v v10, (a1)
; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
; LMULMAX2-NEXT:    vadd.vv v8, v8, v10
; LMULMAX2-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
; LMULMAX2-NEXT:    vse32.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: add_v6i32:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle32.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vle32.v v9, (a1)
; LMULMAX1-RV32-NEXT:    vle32.v v10, (a2)
; LMULMAX1-RV32-NEXT:    addi a1, a1, 16
; LMULMAX1-RV32-NEXT:    vle32.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vadd.vv v8, v8, v9
; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
; LMULMAX1-RV32-NEXT:    vadd.vv v8, v10, v11
; LMULMAX1-RV32-NEXT:    vse32.v v8, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: add_v6i32:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle32.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle32.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle32.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle32.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vadd.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vadd.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vse64.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <6 x i32>, ptr %x
  %b = load <6 x i32>, ptr %y
  %c = add <6 x i32> %a, %b
  store <6 x i32> %c, ptr %x
  ret void
}

define void @add_v4i64(ptr %x, ptr %y) {
; LMULMAX2-LABEL: add_v4i64:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
; LMULMAX2-NEXT:    vle64.v v8, (a0)
; LMULMAX2-NEXT:    vle64.v v10, (a1)
; LMULMAX2-NEXT:    vadd.vv v8, v8, v10
; LMULMAX2-NEXT:    vse64.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: add_v4i64:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle64.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle64.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle64.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vadd.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vadd.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse64.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: add_v4i64:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle64.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle64.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle64.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vadd.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vadd.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse64.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <4 x i64>, ptr %x
  %b = load <4 x i64>, ptr %y
  %c = add <4 x i64> %a, %b
  store <4 x i64> %c, ptr %x
  ret void
}

define void @sub_v32i8(ptr %x, ptr %y) {
; LMULMAX2-LABEL: sub_v32i8:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    li a2, 32
; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
; LMULMAX2-NEXT:    vle8.v v8, (a0)
; LMULMAX2-NEXT:    vle8.v v10, (a1)
; LMULMAX2-NEXT:    vsub.vv v8, v8, v10
; LMULMAX2-NEXT:    vse8.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: sub_v32i8:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle8.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle8.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle8.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle8.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vsub.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vsub.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse8.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse8.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: sub_v32i8:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle8.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle8.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle8.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle8.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vsub.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vsub.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse8.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse8.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <32 x i8>, ptr %x
  %b = load <32 x i8>, ptr %y
  %c = sub <32 x i8> %a, %b
  store <32 x i8> %c, ptr %x
  ret void
}

define void @sub_v16i16(ptr %x, ptr %y) {
; LMULMAX2-LABEL: sub_v16i16:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
; LMULMAX2-NEXT:    vle16.v v8, (a0)
; LMULMAX2-NEXT:    vle16.v v10, (a1)
; LMULMAX2-NEXT:    vsub.vv v8, v8, v10
; LMULMAX2-NEXT:    vse16.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: sub_v16i16:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle16.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle16.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle16.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vsub.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vsub.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse16.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: sub_v16i16:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle16.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle16.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle16.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vsub.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vsub.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse16.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse16.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <16 x i16>, ptr %x
  %b = load <16 x i16>, ptr %y
  %c = sub <16 x i16> %a, %b
  store <16 x i16> %c, ptr %x
  ret void
}

define void @sub_v8i32(ptr %x, ptr %y) {
; LMULMAX2-LABEL: sub_v8i32:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
; LMULMAX2-NEXT:    vle32.v v8, (a0)
; LMULMAX2-NEXT:    vle32.v v10, (a1)
; LMULMAX2-NEXT:    vsub.vv v8, v8, v10
; LMULMAX2-NEXT:    vse32.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: sub_v8i32:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle32.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle32.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle32.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle32.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vsub.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vsub.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse32.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: sub_v8i32:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle32.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle32.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle32.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle32.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vsub.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vsub.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse32.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <8 x i32>, ptr %x
  %b = load <8 x i32>, ptr %y
  %c = sub <8 x i32> %a, %b
  store <8 x i32> %c, ptr %x
  ret void
}

define void @sub_v4i64(ptr %x, ptr %y) {
; LMULMAX2-LABEL: sub_v4i64:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
; LMULMAX2-NEXT:    vle64.v v8, (a0)
; LMULMAX2-NEXT:    vle64.v v10, (a1)
; LMULMAX2-NEXT:    vsub.vv v8, v8, v10
; LMULMAX2-NEXT:    vse64.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: sub_v4i64:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle64.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle64.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle64.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vsub.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vsub.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse64.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: sub_v4i64:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle64.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle64.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle64.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vsub.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vsub.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse64.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <4 x i64>, ptr %x
  %b = load <4 x i64>, ptr %y
  %c = sub <4 x i64> %a, %b
  store <4 x i64> %c, ptr %x
  ret void
}

define void @mul_v32i8(ptr %x, ptr %y) {
; LMULMAX2-LABEL: mul_v32i8:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    li a2, 32
; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
; LMULMAX2-NEXT:    vle8.v v8, (a0)
; LMULMAX2-NEXT:    vle8.v v10, (a1)
; LMULMAX2-NEXT:    vmul.vv v8, v8, v10
; LMULMAX2-NEXT:    vse8.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: mul_v32i8:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle8.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle8.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle8.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle8.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vmul.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vmul.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse8.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse8.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: mul_v32i8:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle8.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle8.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle8.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle8.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vmul.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vmul.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse8.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse8.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <32 x i8>, ptr %x
  %b = load <32 x i8>, ptr %y
  %c = mul <32 x i8> %a, %b
  store <32 x i8> %c, ptr %x
  ret void
}

define void @mul_v16i16(ptr %x, ptr %y) {
; LMULMAX2-LABEL: mul_v16i16:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
; LMULMAX2-NEXT:    vle16.v v8, (a0)
; LMULMAX2-NEXT:    vle16.v v10, (a1)
; LMULMAX2-NEXT:    vmul.vv v8, v8, v10
; LMULMAX2-NEXT:    vse16.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: mul_v16i16:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle16.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle16.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle16.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vmul.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vmul.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse16.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: mul_v16i16:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle16.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle16.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle16.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vmul.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vmul.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse16.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse16.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <16 x i16>, ptr %x
  %b = load <16 x i16>, ptr %y
  %c = mul <16 x i16> %a, %b
  store <16 x i16> %c, ptr %x
  ret void
}

define void @mul_v8i32(ptr %x, ptr %y) {
; LMULMAX2-LABEL: mul_v8i32:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
; LMULMAX2-NEXT:    vle32.v v8, (a0)
; LMULMAX2-NEXT:    vle32.v v10, (a1)
; LMULMAX2-NEXT:    vmul.vv v8, v8, v10
; LMULMAX2-NEXT:    vse32.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: mul_v8i32:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle32.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle32.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle32.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle32.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vmul.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vmul.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse32.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: mul_v8i32:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle32.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle32.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle32.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle32.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vmul.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vmul.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse32.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <8 x i32>, ptr %x
  %b = load <8 x i32>, ptr %y
  %c = mul <8 x i32> %a, %b
  store <8 x i32> %c, ptr %x
  ret void
}

define void @mul_v4i64(ptr %x, ptr %y) {
; LMULMAX2-LABEL: mul_v4i64:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
; LMULMAX2-NEXT:    vle64.v v8, (a0)
; LMULMAX2-NEXT:    vle64.v v10, (a1)
; LMULMAX2-NEXT:    vmul.vv v8, v8, v10
; LMULMAX2-NEXT:    vse64.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: mul_v4i64:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle64.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle64.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle64.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vmul.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vmul.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse64.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: mul_v4i64:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle64.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle64.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle64.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vmul.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vmul.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse64.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <4 x i64>, ptr %x
  %b = load <4 x i64>, ptr %y
  %c = mul <4 x i64> %a, %b
  store <4 x i64> %c, ptr %x
  ret void
}

define void @and_v32i8(ptr %x, ptr %y) {
; LMULMAX2-LABEL: and_v32i8:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    li a2, 32
; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
; LMULMAX2-NEXT:    vle8.v v8, (a0)
; LMULMAX2-NEXT:    vle8.v v10, (a1)
; LMULMAX2-NEXT:    vand.vv v8, v8, v10
; LMULMAX2-NEXT:    vse8.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: and_v32i8:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle8.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle8.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle8.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle8.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vand.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vand.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse8.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse8.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: and_v32i8:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle8.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle8.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle8.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle8.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vand.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vand.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse8.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse8.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <32 x i8>, ptr %x
  %b = load <32 x i8>, ptr %y
  %c = and <32 x i8> %a, %b
  store <32 x i8> %c, ptr %x
  ret void
}

define void @and_v16i16(ptr %x, ptr %y) {
; LMULMAX2-LABEL: and_v16i16:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
; LMULMAX2-NEXT:    vle16.v v8, (a0)
; LMULMAX2-NEXT:    vle16.v v10, (a1)
; LMULMAX2-NEXT:    vand.vv v8, v8, v10
; LMULMAX2-NEXT:    vse16.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: and_v16i16:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle16.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle16.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle16.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vand.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vand.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse16.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: and_v16i16:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle16.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle16.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle16.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vand.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vand.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse16.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse16.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <16 x i16>, ptr %x
  %b = load <16 x i16>, ptr %y
  %c = and <16 x i16> %a, %b
  store <16 x i16> %c, ptr %x
  ret void
}

define void @and_v8i32(ptr %x, ptr %y) {
; LMULMAX2-LABEL: and_v8i32:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
; LMULMAX2-NEXT:    vle32.v v8, (a0)
; LMULMAX2-NEXT:    vle32.v v10, (a1)
; LMULMAX2-NEXT:    vand.vv v8, v8, v10
; LMULMAX2-NEXT:    vse32.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: and_v8i32:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle32.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle32.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle32.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle32.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vand.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vand.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse32.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: and_v8i32:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle32.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle32.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle32.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle32.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vand.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vand.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse32.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <8 x i32>, ptr %x
  %b = load <8 x i32>, ptr %y
  %c = and <8 x i32> %a, %b
  store <8 x i32> %c, ptr %x
  ret void
}

define void @and_v4i64(ptr %x, ptr %y) {
; LMULMAX2-LABEL: and_v4i64:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
; LMULMAX2-NEXT:    vle64.v v8, (a0)
; LMULMAX2-NEXT:    vle64.v v10, (a1)
; LMULMAX2-NEXT:    vand.vv v8, v8, v10
; LMULMAX2-NEXT:    vse64.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: and_v4i64:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle64.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle64.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle64.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vand.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vand.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse64.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: and_v4i64:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle64.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle64.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle64.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vand.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vand.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse64.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <4 x i64>, ptr %x
  %b = load <4 x i64>, ptr %y
  %c = and <4 x i64> %a, %b
  store <4 x i64> %c, ptr %x
  ret void
}

define void @or_v32i8(ptr %x, ptr %y) {
; LMULMAX2-LABEL: or_v32i8:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    li a2, 32
; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
; LMULMAX2-NEXT:    vle8.v v8, (a0)
; LMULMAX2-NEXT:    vle8.v v10, (a1)
; LMULMAX2-NEXT:    vor.vv v8, v8, v10
; LMULMAX2-NEXT:    vse8.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: or_v32i8:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle8.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle8.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle8.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle8.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vor.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse8.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse8.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: or_v32i8:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle8.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle8.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle8.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle8.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vor.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse8.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse8.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <32 x i8>, ptr %x
  %b = load <32 x i8>, ptr %y
  %c = or <32 x i8> %a, %b
  store <32 x i8> %c, ptr %x
  ret void
}

define void @or_v16i16(ptr %x, ptr %y) {
; LMULMAX2-LABEL: or_v16i16:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
; LMULMAX2-NEXT:    vle16.v v8, (a0)
; LMULMAX2-NEXT:    vle16.v v10, (a1)
; LMULMAX2-NEXT:    vor.vv v8, v8, v10
; LMULMAX2-NEXT:    vse16.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: or_v16i16:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle16.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle16.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle16.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vor.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse16.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: or_v16i16:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle16.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle16.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle16.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vor.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse16.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse16.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <16 x i16>, ptr %x
  %b = load <16 x i16>, ptr %y
  %c = or <16 x i16> %a, %b
  store <16 x i16> %c, ptr %x
  ret void
}

define void @or_v8i32(ptr %x, ptr %y) {
; LMULMAX2-LABEL: or_v8i32:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
; LMULMAX2-NEXT:    vle32.v v8, (a0)
; LMULMAX2-NEXT:    vle32.v v10, (a1)
; LMULMAX2-NEXT:    vor.vv v8, v8, v10
; LMULMAX2-NEXT:    vse32.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: or_v8i32:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle32.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle32.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle32.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle32.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vor.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse32.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: or_v8i32:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle32.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle32.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle32.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle32.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vor.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse32.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <8 x i32>, ptr %x
  %b = load <8 x i32>, ptr %y
  %c = or <8 x i32> %a, %b
  store <8 x i32> %c, ptr %x
  ret void
}

define void @or_v4i64(ptr %x, ptr %y) {
; LMULMAX2-LABEL: or_v4i64:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
; LMULMAX2-NEXT:    vle64.v v8, (a0)
; LMULMAX2-NEXT:    vle64.v v10, (a1)
; LMULMAX2-NEXT:    vor.vv v8, v8, v10
; LMULMAX2-NEXT:    vse64.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: or_v4i64:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle64.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle64.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle64.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vor.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse64.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: or_v4i64:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle64.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle64.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle64.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vor.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse64.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <4 x i64>, ptr %x
  %b = load <4 x i64>, ptr %y
  %c = or <4 x i64> %a, %b
  store <4 x i64> %c, ptr %x
  ret void
}

define void @xor_v32i8(ptr %x, ptr %y) {
; LMULMAX2-LABEL: xor_v32i8:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    li a2, 32
; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
; LMULMAX2-NEXT:    vle8.v v8, (a0)
; LMULMAX2-NEXT:    vle8.v v10, (a1)
; LMULMAX2-NEXT:    vxor.vv v8, v8, v10
; LMULMAX2-NEXT:    vse8.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: xor_v32i8:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle8.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle8.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle8.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle8.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vxor.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vxor.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse8.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse8.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: xor_v32i8:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle8.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle8.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle8.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle8.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vxor.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vxor.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse8.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse8.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <32 x i8>, ptr %x
  %b = load <32 x i8>, ptr %y
  %c = xor <32 x i8> %a, %b
  store <32 x i8> %c, ptr %x
  ret void
}

define void @xor_v16i16(ptr %x, ptr %y) {
; LMULMAX2-LABEL: xor_v16i16:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
; LMULMAX2-NEXT:    vle16.v v8, (a0)
; LMULMAX2-NEXT:    vle16.v v10, (a1)
; LMULMAX2-NEXT:    vxor.vv v8, v8, v10
; LMULMAX2-NEXT:    vse16.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: xor_v16i16:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle16.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle16.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle16.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vxor.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vxor.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse16.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: xor_v16i16:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle16.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle16.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle16.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vxor.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vxor.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse16.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse16.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <16 x i16>, ptr %x
  %b = load <16 x i16>, ptr %y
  %c = xor <16 x i16> %a, %b
  store <16 x i16> %c, ptr %x
  ret void
}

define void @xor_v8i32(ptr %x, ptr %y) {
; LMULMAX2-LABEL: xor_v8i32:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
; LMULMAX2-NEXT:    vle32.v v8, (a0)
; LMULMAX2-NEXT:    vle32.v v10, (a1)
; LMULMAX2-NEXT:    vxor.vv v8, v8, v10
; LMULMAX2-NEXT:    vse32.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: xor_v8i32:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle32.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle32.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle32.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle32.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vxor.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vxor.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse32.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: xor_v8i32:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle32.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle32.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle32.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle32.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vxor.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vxor.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse32.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <8 x i32>, ptr %x
  %b = load <8 x i32>, ptr %y
  %c = xor <8 x i32> %a, %b
  store <8 x i32> %c, ptr %x
  ret void
}

define void @xor_v4i64(ptr %x, ptr %y) {
; LMULMAX2-LABEL: xor_v4i64:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
; LMULMAX2-NEXT:    vle64.v v8, (a0)
; LMULMAX2-NEXT:    vle64.v v10, (a1)
; LMULMAX2-NEXT:    vxor.vv v8, v8, v10
; LMULMAX2-NEXT:    vse64.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: xor_v4i64:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle64.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle64.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle64.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vxor.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vxor.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse64.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: xor_v4i64:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle64.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle64.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle64.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vxor.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vxor.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse64.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <4 x i64>, ptr %x
  %b = load <4 x i64>, ptr %y
  %c = xor <4 x i64> %a, %b
  store <4 x i64> %c, ptr %x
  ret void
}

define void @lshr_v32i8(ptr %x, ptr %y) {
; LMULMAX2-LABEL: lshr_v32i8:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    li a2, 32
; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
; LMULMAX2-NEXT:    vle8.v v8, (a0)
; LMULMAX2-NEXT:    vle8.v v10, (a1)
; LMULMAX2-NEXT:    vsrl.vv v8, v8, v10
; LMULMAX2-NEXT:    vse8.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: lshr_v32i8:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle8.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle8.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle8.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle8.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vsrl.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vsrl.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse8.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse8.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: lshr_v32i8:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle8.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle8.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle8.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle8.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vsrl.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vsrl.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse8.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse8.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <32 x i8>, ptr %x
  %b = load <32 x i8>, ptr %y
  %c = lshr <32 x i8> %a, %b
  store <32 x i8> %c, ptr %x
  ret void
}

define void @lshr_v16i16(ptr %x, ptr %y) {
; LMULMAX2-LABEL: lshr_v16i16:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
; LMULMAX2-NEXT:    vle16.v v8, (a0)
; LMULMAX2-NEXT:    vle16.v v10, (a1)
; LMULMAX2-NEXT:    vsrl.vv v8, v8, v10
; LMULMAX2-NEXT:    vse16.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: lshr_v16i16:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle16.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle16.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle16.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vsrl.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vsrl.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse16.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: lshr_v16i16:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle16.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle16.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle16.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vsrl.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vsrl.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse16.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse16.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <16 x i16>, ptr %x
  %b = load <16 x i16>, ptr %y
  %c = lshr <16 x i16> %a, %b
  store <16 x i16> %c, ptr %x
  ret void
}

define void @lshr_v8i32(ptr %x, ptr %y) {
; LMULMAX2-LABEL: lshr_v8i32:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
; LMULMAX2-NEXT:    vle32.v v8, (a0)
; LMULMAX2-NEXT:    vle32.v v10, (a1)
; LMULMAX2-NEXT:    vsrl.vv v8, v8, v10
; LMULMAX2-NEXT:    vse32.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: lshr_v8i32:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle32.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle32.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle32.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle32.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vsrl.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vsrl.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse32.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: lshr_v8i32:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle32.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle32.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle32.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle32.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vsrl.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vsrl.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse32.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <8 x i32>, ptr %x
  %b = load <8 x i32>, ptr %y
  %c = lshr <8 x i32> %a, %b
  store <8 x i32> %c, ptr %x
  ret void
}

define void @lshr_v4i64(ptr %x, ptr %y) {
; LMULMAX2-LABEL: lshr_v4i64:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
; LMULMAX2-NEXT:    vle64.v v8, (a0)
; LMULMAX2-NEXT:    vle64.v v10, (a1)
; LMULMAX2-NEXT:    vsrl.vv v8, v8, v10
; LMULMAX2-NEXT:    vse64.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: lshr_v4i64:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle64.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle64.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle64.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vsrl.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vsrl.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse64.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: lshr_v4i64:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle64.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle64.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle64.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vsrl.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vsrl.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse64.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <4 x i64>, ptr %x
  %b = load <4 x i64>, ptr %y
  %c = lshr <4 x i64> %a, %b
  store <4 x i64> %c, ptr %x
  ret void
}

define void @ashr_v32i8(ptr %x, ptr %y) {
; LMULMAX2-LABEL: ashr_v32i8:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    li a2, 32
; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
; LMULMAX2-NEXT:    vle8.v v8, (a0)
; LMULMAX2-NEXT:    vle8.v v10, (a1)
; LMULMAX2-NEXT:    vsra.vv v8, v8, v10
; LMULMAX2-NEXT:    vse8.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: ashr_v32i8:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle8.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle8.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle8.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle8.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vsra.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vsra.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse8.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse8.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: ashr_v32i8:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle8.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle8.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle8.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle8.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vsra.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vsra.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse8.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse8.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <32 x i8>, ptr %x
  %b = load <32 x i8>, ptr %y
  %c = ashr <32 x i8> %a, %b
  store <32 x i8> %c, ptr %x
  ret void
}

define void @ashr_v16i16(ptr %x, ptr %y) {
; LMULMAX2-LABEL: ashr_v16i16:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
; LMULMAX2-NEXT:    vle16.v v8, (a0)
; LMULMAX2-NEXT:    vle16.v v10, (a1)
; LMULMAX2-NEXT:    vsra.vv v8, v8, v10
; LMULMAX2-NEXT:    vse16.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: ashr_v16i16:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle16.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle16.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle16.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vsra.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vsra.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse16.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: ashr_v16i16:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle16.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle16.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle16.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vsra.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vsra.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse16.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse16.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <16 x i16>, ptr %x
  %b = load <16 x i16>, ptr %y
  %c = ashr <16 x i16> %a, %b
  store <16 x i16> %c, ptr %x
  ret void
}

define void @ashr_v8i32(ptr %x, ptr %y) {
; LMULMAX2-LABEL: ashr_v8i32:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
; LMULMAX2-NEXT:    vle32.v v8, (a0)
; LMULMAX2-NEXT:    vle32.v v10, (a1)
; LMULMAX2-NEXT:    vsra.vv v8, v8, v10
; LMULMAX2-NEXT:    vse32.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: ashr_v8i32:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle32.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle32.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle32.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle32.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vsra.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vsra.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse32.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: ashr_v8i32:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle32.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle32.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle32.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle32.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vsra.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vsra.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse32.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <8 x i32>, ptr %x
  %b = load <8 x i32>, ptr %y
  %c = ashr <8 x i32> %a, %b
  store <8 x i32> %c, ptr %x
  ret void
}

define void @ashr_v4i64(ptr %x, ptr %y) {
; LMULMAX2-LABEL: ashr_v4i64:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
; LMULMAX2-NEXT:    vle64.v v8, (a0)
; LMULMAX2-NEXT:    vle64.v v10, (a1)
; LMULMAX2-NEXT:    vsra.vv v8, v8, v10
; LMULMAX2-NEXT:    vse64.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: ashr_v4i64:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle64.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle64.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle64.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vsra.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vsra.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse64.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: ashr_v4i64:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle64.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle64.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle64.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vsra.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vsra.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse64.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <4 x i64>, ptr %x
  %b = load <4 x i64>, ptr %y
  %c = ashr <4 x i64> %a, %b
  store <4 x i64> %c, ptr %x
  ret void
}

define void @shl_v32i8(ptr %x, ptr %y) {
; LMULMAX2-LABEL: shl_v32i8:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    li a2, 32
; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
; LMULMAX2-NEXT:    vle8.v v8, (a0)
; LMULMAX2-NEXT:    vle8.v v10, (a1)
; LMULMAX2-NEXT:    vsll.vv v8, v8, v10
; LMULMAX2-NEXT:    vse8.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: shl_v32i8:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle8.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle8.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle8.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle8.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vsll.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vsll.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse8.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse8.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: shl_v32i8:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle8.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle8.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle8.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle8.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vsll.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vsll.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse8.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse8.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <32 x i8>, ptr %x
  %b = load <32 x i8>, ptr %y
  %c = shl <32 x i8> %a, %b
  store <32 x i8> %c, ptr %x
  ret void
}

define void @shl_v16i16(ptr %x, ptr %y) {
; LMULMAX2-LABEL: shl_v16i16:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
; LMULMAX2-NEXT:    vle16.v v8, (a0)
; LMULMAX2-NEXT:    vle16.v v10, (a1)
; LMULMAX2-NEXT:    vsll.vv v8, v8, v10
; LMULMAX2-NEXT:    vse16.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: shl_v16i16:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle16.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle16.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle16.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vsll.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vsll.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse16.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: shl_v16i16:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle16.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle16.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle16.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vsll.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vsll.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse16.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse16.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <16 x i16>, ptr %x
  %b = load <16 x i16>, ptr %y
  %c = shl <16 x i16> %a, %b
  store <16 x i16> %c, ptr %x
  ret void
}

define void @shl_v8i32(ptr %x, ptr %y) {
; LMULMAX2-LABEL: shl_v8i32:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
; LMULMAX2-NEXT:    vle32.v v8, (a0)
; LMULMAX2-NEXT:    vle32.v v10, (a1)
; LMULMAX2-NEXT:    vsll.vv v8, v8, v10
; LMULMAX2-NEXT:    vse32.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: shl_v8i32:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle32.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle32.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle32.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle32.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vsll.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vsll.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse32.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: shl_v8i32:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle32.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle32.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle32.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle32.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vsll.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vsll.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse32.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <8 x i32>, ptr %x
  %b = load <8 x i32>, ptr %y
  %c = shl <8 x i32> %a, %b
  store <8 x i32> %c, ptr %x
  ret void
}

define void @shl_v4i64(ptr %x, ptr %y) {
; LMULMAX2-LABEL: shl_v4i64:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
; LMULMAX2-NEXT:    vle64.v v8, (a0)
; LMULMAX2-NEXT:    vle64.v v10, (a1)
; LMULMAX2-NEXT:    vsll.vv v8, v8, v10
; LMULMAX2-NEXT:    vse64.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: shl_v4i64:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle64.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle64.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle64.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vsll.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vsll.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse64.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: shl_v4i64:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle64.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle64.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle64.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vsll.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vsll.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse64.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <4 x i64>, ptr %x
  %b = load <4 x i64>, ptr %y
  %c = shl <4 x i64> %a, %b
  store <4 x i64> %c, ptr %x
  ret void
}

define void @sdiv_v32i8(ptr %x, ptr %y) {
; LMULMAX2-LABEL: sdiv_v32i8:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    li a2, 32
; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
; LMULMAX2-NEXT:    vle8.v v8, (a0)
; LMULMAX2-NEXT:    vle8.v v10, (a1)
; LMULMAX2-NEXT:    vdiv.vv v8, v8, v10
; LMULMAX2-NEXT:    vse8.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: sdiv_v32i8:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle8.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle8.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle8.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle8.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vdiv.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vdiv.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse8.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse8.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: sdiv_v32i8:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle8.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle8.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle8.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle8.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vdiv.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vdiv.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse8.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse8.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <32 x i8>, ptr %x
  %b = load <32 x i8>, ptr %y
  %c = sdiv <32 x i8> %a, %b
  store <32 x i8> %c, ptr %x
  ret void
}

define void @sdiv_v16i16(ptr %x, ptr %y) {
; LMULMAX2-LABEL: sdiv_v16i16:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
; LMULMAX2-NEXT:    vle16.v v8, (a0)
; LMULMAX2-NEXT:    vle16.v v10, (a1)
; LMULMAX2-NEXT:    vdiv.vv v8, v8, v10
; LMULMAX2-NEXT:    vse16.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: sdiv_v16i16:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle16.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle16.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle16.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vdiv.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vdiv.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse16.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: sdiv_v16i16:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle16.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle16.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle16.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vdiv.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vdiv.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse16.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse16.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <16 x i16>, ptr %x
  %b = load <16 x i16>, ptr %y
  %c = sdiv <16 x i16> %a, %b
  store <16 x i16> %c, ptr %x
  ret void
}

define void @sdiv_v8i32(ptr %x, ptr %y) {
; LMULMAX2-LABEL: sdiv_v8i32:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
; LMULMAX2-NEXT:    vle32.v v8, (a0)
; LMULMAX2-NEXT:    vle32.v v10, (a1)
; LMULMAX2-NEXT:    vdiv.vv v8, v8, v10
; LMULMAX2-NEXT:    vse32.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: sdiv_v8i32:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle32.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle32.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle32.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle32.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vdiv.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vdiv.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse32.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: sdiv_v8i32:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle32.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle32.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle32.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle32.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vdiv.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vdiv.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse32.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <8 x i32>, ptr %x
  %b = load <8 x i32>, ptr %y
  %c = sdiv <8 x i32> %a, %b
  store <8 x i32> %c, ptr %x
  ret void
}

define void @sdiv_v4i64(ptr %x, ptr %y) {
; LMULMAX2-LABEL: sdiv_v4i64:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
; LMULMAX2-NEXT:    vle64.v v8, (a0)
; LMULMAX2-NEXT:    vle64.v v10, (a1)
; LMULMAX2-NEXT:    vdiv.vv v8, v8, v10
; LMULMAX2-NEXT:    vse64.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: sdiv_v4i64:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle64.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle64.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle64.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vdiv.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vdiv.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse64.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: sdiv_v4i64:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle64.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle64.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle64.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vdiv.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vdiv.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse64.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <4 x i64>, ptr %x
  %b = load <4 x i64>, ptr %y
  %c = sdiv <4 x i64> %a, %b
  store <4 x i64> %c, ptr %x
  ret void
}

define void @srem_v32i8(ptr %x, ptr %y) {
; LMULMAX2-LABEL: srem_v32i8:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    li a2, 32
; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
; LMULMAX2-NEXT:    vle8.v v8, (a0)
; LMULMAX2-NEXT:    vle8.v v10, (a1)
; LMULMAX2-NEXT:    vrem.vv v8, v8, v10
; LMULMAX2-NEXT:    vse8.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: srem_v32i8:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle8.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle8.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle8.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle8.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vrem.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vrem.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse8.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse8.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: srem_v32i8:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle8.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle8.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle8.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle8.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vrem.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vrem.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse8.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse8.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <32 x i8>, ptr %x
  %b = load <32 x i8>, ptr %y
  %c = srem <32 x i8> %a, %b
  store <32 x i8> %c, ptr %x
  ret void
}

define void @srem_v16i16(ptr %x, ptr %y) {
; LMULMAX2-LABEL: srem_v16i16:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
; LMULMAX2-NEXT:    vle16.v v8, (a0)
; LMULMAX2-NEXT:    vle16.v v10, (a1)
; LMULMAX2-NEXT:    vrem.vv v8, v8, v10
; LMULMAX2-NEXT:    vse16.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: srem_v16i16:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle16.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle16.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle16.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vrem.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vrem.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse16.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: srem_v16i16:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle16.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle16.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle16.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vrem.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vrem.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse16.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse16.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <16 x i16>, ptr %x
  %b = load <16 x i16>, ptr %y
  %c = srem <16 x i16> %a, %b
  store <16 x i16> %c, ptr %x
  ret void
}

define void @srem_v8i32(ptr %x, ptr %y) {
; LMULMAX2-LABEL: srem_v8i32:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
; LMULMAX2-NEXT:    vle32.v v8, (a0)
; LMULMAX2-NEXT:    vle32.v v10, (a1)
; LMULMAX2-NEXT:    vrem.vv v8, v8, v10
; LMULMAX2-NEXT:    vse32.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: srem_v8i32:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle32.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle32.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle32.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle32.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vrem.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vrem.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse32.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: srem_v8i32:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle32.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle32.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle32.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle32.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vrem.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vrem.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse32.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <8 x i32>, ptr %x
  %b = load <8 x i32>, ptr %y
  %c = srem <8 x i32> %a, %b
  store <8 x i32> %c, ptr %x
  ret void
}

define void @srem_v4i64(ptr %x, ptr %y) {
; LMULMAX2-LABEL: srem_v4i64:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
; LMULMAX2-NEXT:    vle64.v v8, (a0)
; LMULMAX2-NEXT:    vle64.v v10, (a1)
; LMULMAX2-NEXT:    vrem.vv v8, v8, v10
; LMULMAX2-NEXT:    vse64.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: srem_v4i64:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle64.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle64.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle64.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vrem.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vrem.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse64.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: srem_v4i64:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle64.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle64.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle64.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vrem.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vrem.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse64.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <4 x i64>, ptr %x
  %b = load <4 x i64>, ptr %y
  %c = srem <4 x i64> %a, %b
  store <4 x i64> %c, ptr %x
  ret void
}

define void @udiv_v32i8(ptr %x, ptr %y) {
; LMULMAX2-LABEL: udiv_v32i8:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    li a2, 32
; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
; LMULMAX2-NEXT:    vle8.v v8, (a0)
; LMULMAX2-NEXT:    vle8.v v10, (a1)
; LMULMAX2-NEXT:    vdivu.vv v8, v8, v10
; LMULMAX2-NEXT:    vse8.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: udiv_v32i8:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle8.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle8.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle8.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle8.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vdivu.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vdivu.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse8.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse8.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: udiv_v32i8:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle8.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle8.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle8.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle8.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vdivu.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vdivu.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse8.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse8.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <32 x i8>, ptr %x
  %b = load <32 x i8>, ptr %y
  %c = udiv <32 x i8> %a, %b
  store <32 x i8> %c, ptr %x
  ret void
}

define void @udiv_v16i16(ptr %x, ptr %y) {
; LMULMAX2-LABEL: udiv_v16i16:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
; LMULMAX2-NEXT:    vle16.v v8, (a0)
; LMULMAX2-NEXT:    vle16.v v10, (a1)
; LMULMAX2-NEXT:    vdivu.vv v8, v8, v10
; LMULMAX2-NEXT:    vse16.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: udiv_v16i16:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle16.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle16.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle16.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vdivu.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vdivu.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse16.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: udiv_v16i16:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle16.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle16.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle16.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vdivu.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vdivu.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse16.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse16.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <16 x i16>, ptr %x
  %b = load <16 x i16>, ptr %y
  %c = udiv <16 x i16> %a, %b
  store <16 x i16> %c, ptr %x
  ret void
}

define void @udiv_v8i32(ptr %x, ptr %y) {
; LMULMAX2-LABEL: udiv_v8i32:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
; LMULMAX2-NEXT:    vle32.v v8, (a0)
; LMULMAX2-NEXT:    vle32.v v10, (a1)
; LMULMAX2-NEXT:    vdivu.vv v8, v8, v10
; LMULMAX2-NEXT:    vse32.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: udiv_v8i32:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle32.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle32.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle32.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle32.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vdivu.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vdivu.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse32.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: udiv_v8i32:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle32.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle32.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle32.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle32.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vdivu.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vdivu.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse32.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <8 x i32>, ptr %x
  %b = load <8 x i32>, ptr %y
  %c = udiv <8 x i32> %a, %b
  store <8 x i32> %c, ptr %x
  ret void
}

define void @udiv_v4i64(ptr %x, ptr %y) {
; LMULMAX2-LABEL: udiv_v4i64:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
; LMULMAX2-NEXT:    vle64.v v8, (a0)
; LMULMAX2-NEXT:    vle64.v v10, (a1)
; LMULMAX2-NEXT:    vdivu.vv v8, v8, v10
; LMULMAX2-NEXT:    vse64.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: udiv_v4i64:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle64.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle64.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle64.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vdivu.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vdivu.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse64.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: udiv_v4i64:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle64.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle64.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle64.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vdivu.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vdivu.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse64.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <4 x i64>, ptr %x
  %b = load <4 x i64>, ptr %y
  %c = udiv <4 x i64> %a, %b
  store <4 x i64> %c, ptr %x
  ret void
}

define void @urem_v32i8(ptr %x, ptr %y) {
; LMULMAX2-LABEL: urem_v32i8:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    li a2, 32
; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
; LMULMAX2-NEXT:    vle8.v v8, (a0)
; LMULMAX2-NEXT:    vle8.v v10, (a1)
; LMULMAX2-NEXT:    vremu.vv v8, v8, v10
; LMULMAX2-NEXT:    vse8.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: urem_v32i8:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle8.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle8.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle8.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle8.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vremu.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vremu.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse8.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse8.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: urem_v32i8:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle8.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle8.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle8.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle8.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vremu.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vremu.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse8.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse8.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <32 x i8>, ptr %x
  %b = load <32 x i8>, ptr %y
  %c = urem <32 x i8> %a, %b
  store <32 x i8> %c, ptr %x
  ret void
}

define void @urem_v16i16(ptr %x, ptr %y) {
; LMULMAX2-LABEL: urem_v16i16:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
; LMULMAX2-NEXT:    vle16.v v8, (a0)
; LMULMAX2-NEXT:    vle16.v v10, (a1)
; LMULMAX2-NEXT:    vremu.vv v8, v8, v10
; LMULMAX2-NEXT:    vse16.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: urem_v16i16:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle16.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle16.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle16.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vremu.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vremu.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse16.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: urem_v16i16:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle16.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle16.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle16.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vremu.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vremu.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse16.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse16.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <16 x i16>, ptr %x
  %b = load <16 x i16>, ptr %y
  %c = urem <16 x i16> %a, %b
  store <16 x i16> %c, ptr %x
  ret void
}

define void @urem_v8i32(ptr %x, ptr %y) {
; LMULMAX2-LABEL: urem_v8i32:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
; LMULMAX2-NEXT:    vle32.v v8, (a0)
; LMULMAX2-NEXT:    vle32.v v10, (a1)
; LMULMAX2-NEXT:    vremu.vv v8, v8, v10
; LMULMAX2-NEXT:    vse32.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: urem_v8i32:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle32.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle32.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle32.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle32.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vremu.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vremu.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse32.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: urem_v8i32:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle32.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle32.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle32.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle32.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vremu.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vremu.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse32.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <8 x i32>, ptr %x
  %b = load <8 x i32>, ptr %y
  %c = urem <8 x i32> %a, %b
  store <8 x i32> %c, ptr %x
  ret void
}

define void @urem_v4i64(ptr %x, ptr %y) {
; LMULMAX2-LABEL: urem_v4i64:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
; LMULMAX2-NEXT:    vle64.v v8, (a0)
; LMULMAX2-NEXT:    vle64.v v10, (a1)
; LMULMAX2-NEXT:    vremu.vv v8, v8, v10
; LMULMAX2-NEXT:    vse64.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: urem_v4i64:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle64.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle64.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle64.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vremu.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vremu.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse64.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: urem_v4i64:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle64.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle64.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle64.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vremu.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vremu.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse64.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <4 x i64>, ptr %x
  %b = load <4 x i64>, ptr %y
  %c = urem <4 x i64> %a, %b
  store <4 x i64> %c, ptr %x
  ret void
}

define void @extract_v4i64(ptr %x, ptr %y) {
; LMULMAX2-LABEL: extract_v4i64:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
; LMULMAX2-NEXT:    vle64.v v8, (a0)
; LMULMAX2-NEXT:    vle64.v v10, (a1)
; LMULMAX2-NEXT:    vadd.vv v8, v8, v10
; LMULMAX2-NEXT:    vse64.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-LABEL: extract_v4i64:
; LMULMAX1:       # %bb.0:
; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-NEXT:    vle64.v v8, (a0)
; LMULMAX1-NEXT:    addi a2, a0, 16
; LMULMAX1-NEXT:    vle64.v v9, (a2)
; LMULMAX1-NEXT:    vle64.v v10, (a1)
; LMULMAX1-NEXT:    addi a1, a1, 16
; LMULMAX1-NEXT:    vle64.v v11, (a1)
; LMULMAX1-NEXT:    vadd.vv v9, v9, v11
; LMULMAX1-NEXT:    vadd.vv v8, v8, v10
; LMULMAX1-NEXT:    vse64.v v8, (a0)
; LMULMAX1-NEXT:    vse64.v v9, (a2)
; LMULMAX1-NEXT:    ret
  %a = load <4 x i64>, ptr %x
  %b = load <4 x i64>, ptr %y
  br label %"compute"
"compute":
  %c = add <4 x i64> %a, %b
  store <4 x i64> %c, ptr %x
  ret void
}

define void @mulhu_v32i8(ptr %x) {
; LMULMAX2-LABEL: mulhu_v32i8:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    li a1, 32
; LMULMAX2-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
; LMULMAX2-NEXT:    vle8.v v8, (a0)
; LMULMAX2-NEXT:    vmv.v.i v10, 0
; LMULMAX2-NEXT:    lui a1, 163907
; LMULMAX2-NEXT:    addi a1, a1, -2044
; LMULMAX2-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
; LMULMAX2-NEXT:    vmv.s.x v0, a1
; LMULMAX2-NEXT:    li a1, -128
; LMULMAX2-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
; LMULMAX2-NEXT:    vmerge.vxm v12, v10, a1, v0
; LMULMAX2-NEXT:    lui a1, 66049
; LMULMAX2-NEXT:    addi a1, a1, 32
; LMULMAX2-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
; LMULMAX2-NEXT:    vmv.s.x v0, a1
; LMULMAX2-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
; LMULMAX2-NEXT:    lui a1, %hi(.LCPI181_0)
; LMULMAX2-NEXT:    addi a1, a1, %lo(.LCPI181_0)
; LMULMAX2-NEXT:    vle8.v v14, (a1)
; LMULMAX2-NEXT:    vmerge.vim v10, v10, 1, v0
; LMULMAX2-NEXT:    vsrl.vv v10, v8, v10
; LMULMAX2-NEXT:    vmulhu.vv v10, v10, v14
; LMULMAX2-NEXT:    vsub.vv v8, v8, v10
; LMULMAX2-NEXT:    vmulhu.vv v8, v8, v12
; LMULMAX2-NEXT:    vadd.vv v8, v8, v10
; LMULMAX2-NEXT:    vmv.v.i v10, 4
; LMULMAX2-NEXT:    lui a1, 8208
; LMULMAX2-NEXT:    addi a1, a1, 513
; LMULMAX2-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
; LMULMAX2-NEXT:    vmv.s.x v0, a1
; LMULMAX2-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
; LMULMAX2-NEXT:    vmerge.vim v10, v10, 1, v0
; LMULMAX2-NEXT:    lui a1, 66785
; LMULMAX2-NEXT:    addi a1, a1, 78
; LMULMAX2-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
; LMULMAX2-NEXT:    vmv.s.x v0, a1
; LMULMAX2-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
; LMULMAX2-NEXT:    vmerge.vim v10, v10, 3, v0
; LMULMAX2-NEXT:    lui a1, 529160
; LMULMAX2-NEXT:    addi a1, a1, 304
; LMULMAX2-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
; LMULMAX2-NEXT:    vmv.s.x v0, a1
; LMULMAX2-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
; LMULMAX2-NEXT:    vmerge.vim v10, v10, 2, v0
; LMULMAX2-NEXT:    vsrl.vv v8, v8, v10
; LMULMAX2-NEXT:    vse8.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-LABEL: mulhu_v32i8:
; LMULMAX1:       # %bb.0:
; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; LMULMAX1-NEXT:    addi a1, a0, 16
; LMULMAX1-NEXT:    vle8.v v8, (a1)
; LMULMAX1-NEXT:    lui a2, %hi(.LCPI181_0)
; LMULMAX1-NEXT:    addi a2, a2, %lo(.LCPI181_0)
; LMULMAX1-NEXT:    vle8.v v9, (a2)
; LMULMAX1-NEXT:    vle8.v v10, (a0)
; LMULMAX1-NEXT:    vdivu.vv v8, v8, v9
; LMULMAX1-NEXT:    vdivu.vv v9, v10, v9
; LMULMAX1-NEXT:    vse8.v v9, (a0)
; LMULMAX1-NEXT:    vse8.v v8, (a1)
; LMULMAX1-NEXT:    ret
  %a = load <32 x i8>, ptr %x
  %b = udiv <32 x i8> %a, <i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25>
  store <32 x i8> %b, ptr %x
  ret void
}

define void @mulhu_v16i16(ptr %x) {
; LMULMAX2-RV32-LABEL: mulhu_v16i16:
; LMULMAX2-RV32:       # %bb.0:
; LMULMAX2-RV32-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
; LMULMAX2-RV32-NEXT:    vle16.v v10, (a0)
; LMULMAX2-RV32-NEXT:    li a1, 257
; LMULMAX2-RV32-NEXT:    vmv.s.x v0, a1
; LMULMAX2-RV32-NEXT:    vmv.v.i v8, 0
; LMULMAX2-RV32-NEXT:    lui a1, 1048568
; LMULMAX2-RV32-NEXT:    vmerge.vxm v12, v8, a1, v0
; LMULMAX2-RV32-NEXT:    lui a1, 4
; LMULMAX2-RV32-NEXT:    addi a1, a1, 64
; LMULMAX2-RV32-NEXT:    vmv.s.x v8, a1
; LMULMAX2-RV32-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
; LMULMAX2-RV32-NEXT:    vmv.v.i v9, 0
; LMULMAX2-RV32-NEXT:    vmv1r.v v0, v8
; LMULMAX2-RV32-NEXT:    vmerge.vim v9, v9, 1, v0
; LMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
; LMULMAX2-RV32-NEXT:    lui a1, %hi(.LCPI182_0)
; LMULMAX2-RV32-NEXT:    addi a1, a1, %lo(.LCPI182_0)
; LMULMAX2-RV32-NEXT:    vle16.v v14, (a1)
; LMULMAX2-RV32-NEXT:    vsext.vf2 v16, v9
; LMULMAX2-RV32-NEXT:    vsrl.vv v16, v10, v16
; LMULMAX2-RV32-NEXT:    vmulhu.vv v14, v16, v14
; LMULMAX2-RV32-NEXT:    vsub.vv v10, v10, v14
; LMULMAX2-RV32-NEXT:    vmulhu.vv v10, v10, v12
; LMULMAX2-RV32-NEXT:    vadd.vv v10, v10, v14
; LMULMAX2-RV32-NEXT:    lui a1, 2
; LMULMAX2-RV32-NEXT:    addi a1, a1, 289
; LMULMAX2-RV32-NEXT:    vmv.s.x v0, a1
; LMULMAX2-RV32-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
; LMULMAX2-RV32-NEXT:    vmv.v.i v9, 3
; LMULMAX2-RV32-NEXT:    vmerge.vim v9, v9, 2, v0
; LMULMAX2-RV32-NEXT:    vmv1r.v v0, v8
; LMULMAX2-RV32-NEXT:    vmerge.vim v8, v9, 1, v0
; LMULMAX2-RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
; LMULMAX2-RV32-NEXT:    vsext.vf2 v12, v8
; LMULMAX2-RV32-NEXT:    vsrl.vv v8, v10, v12
; LMULMAX2-RV32-NEXT:    vse16.v v8, (a0)
; LMULMAX2-RV32-NEXT:    ret
;
; LMULMAX2-RV64-LABEL: mulhu_v16i16:
; LMULMAX2-RV64:       # %bb.0:
; LMULMAX2-RV64-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
; LMULMAX2-RV64-NEXT:    vle16.v v8, (a0)
; LMULMAX2-RV64-NEXT:    li a1, 257
; LMULMAX2-RV64-NEXT:    vmv.s.x v0, a1
; LMULMAX2-RV64-NEXT:    vmv.v.i v10, 0
; LMULMAX2-RV64-NEXT:    lui a1, 1048568
; LMULMAX2-RV64-NEXT:    vmerge.vxm v10, v10, a1, v0
; LMULMAX2-RV64-NEXT:    li a1, 1
; LMULMAX2-RV64-NEXT:    slli a1, a1, 48
; LMULMAX2-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX2-RV64-NEXT:    vmv.v.x v12, a1
; LMULMAX2-RV64-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
; LMULMAX2-RV64-NEXT:    lui a1, %hi(.LCPI182_0)
; LMULMAX2-RV64-NEXT:    addi a1, a1, %lo(.LCPI182_0)
; LMULMAX2-RV64-NEXT:    vle16.v v14, (a1)
; LMULMAX2-RV64-NEXT:    vsext.vf2 v16, v12
; LMULMAX2-RV64-NEXT:    vsrl.vv v12, v8, v16
; LMULMAX2-RV64-NEXT:    vmulhu.vv v12, v12, v14
; LMULMAX2-RV64-NEXT:    vsub.vv v8, v8, v12
; LMULMAX2-RV64-NEXT:    vmulhu.vv v8, v8, v10
; LMULMAX2-RV64-NEXT:    vadd.vv v8, v8, v12
; LMULMAX2-RV64-NEXT:    lui a1, %hi(.LCPI182_1)
; LMULMAX2-RV64-NEXT:    addi a1, a1, %lo(.LCPI182_1)
; LMULMAX2-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX2-RV64-NEXT:    vlse64.v v10, (a1), zero
; LMULMAX2-RV64-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
; LMULMAX2-RV64-NEXT:    vsext.vf2 v12, v10
; LMULMAX2-RV64-NEXT:    vsrl.vv v8, v8, v12
; LMULMAX2-RV64-NEXT:    vse16.v v8, (a0)
; LMULMAX2-RV64-NEXT:    ret
;
; LMULMAX1-LABEL: mulhu_v16i16:
; LMULMAX1:       # %bb.0:
; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; LMULMAX1-NEXT:    addi a1, a0, 16
; LMULMAX1-NEXT:    vle16.v v8, (a1)
; LMULMAX1-NEXT:    lui a2, %hi(.LCPI182_0)
; LMULMAX1-NEXT:    addi a2, a2, %lo(.LCPI182_0)
; LMULMAX1-NEXT:    vle16.v v9, (a2)
; LMULMAX1-NEXT:    vle16.v v10, (a0)
; LMULMAX1-NEXT:    vdivu.vv v8, v8, v9
; LMULMAX1-NEXT:    vdivu.vv v9, v10, v9
; LMULMAX1-NEXT:    vse16.v v9, (a0)
; LMULMAX1-NEXT:    vse16.v v8, (a1)
; LMULMAX1-NEXT:    ret
  %a = load <16 x i16>, ptr %x
  %b = udiv <16 x i16> %a, <i16 7, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 7, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
  store <16 x i16> %b, ptr %x
  ret void
}

define void @mulhu_v8i32(ptr %x) {
; LMULMAX2-LABEL: mulhu_v8i32:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
; LMULMAX2-NEXT:    vle32.v v8, (a0)
; LMULMAX2-NEXT:    li a1, 68
; LMULMAX2-NEXT:    vmv.s.x v0, a1
; LMULMAX2-NEXT:    lui a1, %hi(.LCPI183_0)
; LMULMAX2-NEXT:    addi a1, a1, %lo(.LCPI183_0)
; LMULMAX2-NEXT:    vle32.v v10, (a1)
; LMULMAX2-NEXT:    vmv.v.i v12, 0
; LMULMAX2-NEXT:    lui a1, 524288
; LMULMAX2-NEXT:    vmerge.vxm v12, v12, a1, v0
; LMULMAX2-NEXT:    vmulhu.vv v10, v8, v10
; LMULMAX2-NEXT:    vsub.vv v8, v8, v10
; LMULMAX2-NEXT:    vmulhu.vv v8, v8, v12
; LMULMAX2-NEXT:    vadd.vv v8, v8, v10
; LMULMAX2-NEXT:    lui a1, 4128
; LMULMAX2-NEXT:    addi a1, a1, 514
; LMULMAX2-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
; LMULMAX2-NEXT:    vmv.v.x v10, a1
; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
; LMULMAX2-NEXT:    vsext.vf4 v12, v10
; LMULMAX2-NEXT:    vsrl.vv v8, v8, v12
; LMULMAX2-NEXT:    vse32.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: mulhu_v8i32:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle32.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a1, a0, 16
; LMULMAX1-RV32-NEXT:    vle32.v v9, (a1)
; LMULMAX1-RV32-NEXT:    lui a2, 524288
; LMULMAX1-RV32-NEXT:    vmv.s.x v10, a2
; LMULMAX1-RV32-NEXT:    vmv.v.i v11, 0
; LMULMAX1-RV32-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
; LMULMAX1-RV32-NEXT:    vslideup.vi v11, v10, 2
; LMULMAX1-RV32-NEXT:    lui a2, %hi(.LCPI183_0)
; LMULMAX1-RV32-NEXT:    addi a2, a2, %lo(.LCPI183_0)
; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle32.v v10, (a2)
; LMULMAX1-RV32-NEXT:    vmulhu.vv v12, v9, v10
; LMULMAX1-RV32-NEXT:    vsub.vv v9, v9, v12
; LMULMAX1-RV32-NEXT:    vmulhu.vv v9, v9, v11
; LMULMAX1-RV32-NEXT:    vadd.vv v9, v9, v12
; LMULMAX1-RV32-NEXT:    lui a2, 4128
; LMULMAX1-RV32-NEXT:    addi a2, a2, 514
; LMULMAX1-RV32-NEXT:    vmv.s.x v12, a2
; LMULMAX1-RV32-NEXT:    vsext.vf4 v13, v12
; LMULMAX1-RV32-NEXT:    vsrl.vv v9, v9, v13
; LMULMAX1-RV32-NEXT:    vmulhu.vv v10, v8, v10
; LMULMAX1-RV32-NEXT:    vsub.vv v8, v8, v10
; LMULMAX1-RV32-NEXT:    vmulhu.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vadd.vv v8, v8, v10
; LMULMAX1-RV32-NEXT:    vsrl.vv v8, v8, v13
; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse32.v v9, (a1)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: mulhu_v8i32:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle32.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a1, a0, 16
; LMULMAX1-RV64-NEXT:    vle32.v v9, (a1)
; LMULMAX1-RV64-NEXT:    lui a2, 36976
; LMULMAX1-RV64-NEXT:    addi a2, a2, 1541
; LMULMAX1-RV64-NEXT:    vmv.s.x v10, a2
; LMULMAX1-RV64-NEXT:    vsext.vf4 v11, v10
; LMULMAX1-RV64-NEXT:    vdivu.vv v9, v9, v11
; LMULMAX1-RV64-NEXT:    vdivu.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse32.v v9, (a1)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <8 x i32>, ptr %x
  %b = udiv <8 x i32> %a, <i32 5, i32 6, i32 7, i32 9, i32 5, i32 6, i32 7, i32 9>
  store <8 x i32> %b, ptr %x
  ret void
}

define void @mulhu_v4i64(ptr %x) {
; LMULMAX2-RV32-LABEL: mulhu_v4i64:
; LMULMAX2-RV32:       # %bb.0:
; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
; LMULMAX2-RV32-NEXT:    vle64.v v8, (a0)
; LMULMAX2-RV32-NEXT:    lui a1, %hi(.LCPI184_0)
; LMULMAX2-RV32-NEXT:    addi a1, a1, %lo(.LCPI184_0)
; LMULMAX2-RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
; LMULMAX2-RV32-NEXT:    vle32.v v10, (a1)
; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
; LMULMAX2-RV32-NEXT:    vmulhu.vv v10, v8, v10
; LMULMAX2-RV32-NEXT:    vsub.vv v8, v8, v10
; LMULMAX2-RV32-NEXT:    lui a1, 524288
; LMULMAX2-RV32-NEXT:    vmv.s.x v12, a1
; LMULMAX2-RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
; LMULMAX2-RV32-NEXT:    vmv.v.i v14, 0
; LMULMAX2-RV32-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
; LMULMAX2-RV32-NEXT:    vslideup.vi v14, v12, 5
; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
; LMULMAX2-RV32-NEXT:    vmulhu.vv v8, v8, v14
; LMULMAX2-RV32-NEXT:    vadd.vv v8, v8, v10
; LMULMAX2-RV32-NEXT:    lui a1, %hi(.LCPI184_1)
; LMULMAX2-RV32-NEXT:    addi a1, a1, %lo(.LCPI184_1)
; LMULMAX2-RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
; LMULMAX2-RV32-NEXT:    vle8.v v10, (a1)
; LMULMAX2-RV32-NEXT:    vsext.vf4 v12, v10
; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
; LMULMAX2-RV32-NEXT:    vsrl.vv v8, v8, v12
; LMULMAX2-RV32-NEXT:    vse64.v v8, (a0)
; LMULMAX2-RV32-NEXT:    ret
;
; LMULMAX2-RV64-LABEL: mulhu_v4i64:
; LMULMAX2-RV64:       # %bb.0:
; LMULMAX2-RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
; LMULMAX2-RV64-NEXT:    vle64.v v8, (a0)
; LMULMAX2-RV64-NEXT:    li a1, -1
; LMULMAX2-RV64-NEXT:    slli a1, a1, 63
; LMULMAX2-RV64-NEXT:    vmv.s.x v10, a1
; LMULMAX2-RV64-NEXT:    vmv.v.i v12, 0
; LMULMAX2-RV64-NEXT:    vsetivli zero, 3, e64, m2, tu, ma
; LMULMAX2-RV64-NEXT:    vslideup.vi v12, v10, 2
; LMULMAX2-RV64-NEXT:    lui a1, %hi(.LCPI184_0)
; LMULMAX2-RV64-NEXT:    addi a1, a1, %lo(.LCPI184_0)
; LMULMAX2-RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
; LMULMAX2-RV64-NEXT:    vle64.v v10, (a1)
; LMULMAX2-RV64-NEXT:    vmulhu.vv v10, v8, v10
; LMULMAX2-RV64-NEXT:    vsub.vv v8, v8, v10
; LMULMAX2-RV64-NEXT:    vmulhu.vv v8, v8, v12
; LMULMAX2-RV64-NEXT:    vadd.vv v8, v8, v10
; LMULMAX2-RV64-NEXT:    lui a1, 12320
; LMULMAX2-RV64-NEXT:    addi a1, a1, 513
; LMULMAX2-RV64-NEXT:    vmv.s.x v10, a1
; LMULMAX2-RV64-NEXT:    vsext.vf8 v12, v10
; LMULMAX2-RV64-NEXT:    vsrl.vv v8, v8, v12
; LMULMAX2-RV64-NEXT:    vse64.v v8, (a0)
; LMULMAX2-RV64-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: mulhu_v4i64:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a1, a0, 16
; LMULMAX1-RV32-NEXT:    vle64.v v9, (a1)
; LMULMAX1-RV32-NEXT:    lui a2, 144
; LMULMAX1-RV32-NEXT:    addi a2, a2, 7
; LMULMAX1-RV32-NEXT:    vmv.s.x v10, a2
; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vsext.vf4 v11, v10
; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vdivu.vv v9, v9, v11
; LMULMAX1-RV32-NEXT:    lui a2, 80
; LMULMAX1-RV32-NEXT:    addi a2, a2, 3
; LMULMAX1-RV32-NEXT:    vmv.s.x v10, a2
; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vsext.vf4 v11, v10
; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vdivu.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse64.v v9, (a1)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: mulhu_v4i64:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a1, a0, 16
; LMULMAX1-RV64-NEXT:    vle64.v v9, (a1)
; LMULMAX1-RV64-NEXT:    vmv.v.i v10, 0
; LMULMAX1-RV64-NEXT:    li a2, -1
; LMULMAX1-RV64-NEXT:    slli a2, a2, 63
; LMULMAX1-RV64-NEXT:    vsetvli zero, zero, e64, m1, tu, ma
; LMULMAX1-RV64-NEXT:    vmv.s.x v10, a2
; LMULMAX1-RV64-NEXT:    lui a2, %hi(.LCPI184_0)
; LMULMAX1-RV64-NEXT:    addi a2, a2, %lo(.LCPI184_0)
; LMULMAX1-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vlse64.v v11, (a2), zero
; LMULMAX1-RV64-NEXT:    lui a2, %hi(.LCPI184_1)
; LMULMAX1-RV64-NEXT:    ld a2, %lo(.LCPI184_1)(a2)
; LMULMAX1-RV64-NEXT:    vsetvli zero, zero, e64, m1, tu, ma
; LMULMAX1-RV64-NEXT:    vmv.s.x v11, a2
; LMULMAX1-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vmulhu.vv v11, v9, v11
; LMULMAX1-RV64-NEXT:    vsub.vv v9, v9, v11
; LMULMAX1-RV64-NEXT:    vmulhu.vv v9, v9, v10
; LMULMAX1-RV64-NEXT:    vadd.vv v9, v9, v11
; LMULMAX1-RV64-NEXT:    vid.v v10
; LMULMAX1-RV64-NEXT:    vadd.vi v11, v10, 2
; LMULMAX1-RV64-NEXT:    vsrl.vv v9, v9, v11
; LMULMAX1-RV64-NEXT:    lui a2, 838861
; LMULMAX1-RV64-NEXT:    addiw a2, a2, -819
; LMULMAX1-RV64-NEXT:    slli a3, a2, 32
; LMULMAX1-RV64-NEXT:    add a2, a2, a3
; LMULMAX1-RV64-NEXT:    vmv.v.x v11, a2
; LMULMAX1-RV64-NEXT:    lui a2, 699051
; LMULMAX1-RV64-NEXT:    addiw a2, a2, -1365
; LMULMAX1-RV64-NEXT:    slli a3, a2, 32
; LMULMAX1-RV64-NEXT:    add a2, a2, a3
; LMULMAX1-RV64-NEXT:    vsetvli zero, zero, e64, m1, tu, ma
; LMULMAX1-RV64-NEXT:    vmv.s.x v11, a2
; LMULMAX1-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vmulhu.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vadd.vi v10, v10, 1
; LMULMAX1-RV64-NEXT:    vsrl.vv v8, v8, v10
; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse64.v v9, (a1)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <4 x i64>, ptr %x
  %b = udiv <4 x i64> %a, <i64 3, i64 5, i64 7, i64 9>
  store <4 x i64> %b, ptr %x
  ret void
}

define void @mulhs_v32i8(ptr %x) {
; LMULMAX2-LABEL: mulhs_v32i8:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    li a1, 32
; LMULMAX2-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
; LMULMAX2-NEXT:    vle8.v v8, (a0)
; LMULMAX2-NEXT:    vmv.v.i v10, 7
; LMULMAX2-NEXT:    lui a1, 304453
; LMULMAX2-NEXT:    addi a1, a1, -1452
; LMULMAX2-NEXT:    vsetvli zero, zero, e32, m8, ta, ma
; LMULMAX2-NEXT:    vmv.s.x v0, a1
; LMULMAX2-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
; LMULMAX2-NEXT:    vmerge.vim v10, v10, 1, v0
; LMULMAX2-NEXT:    li a1, -123
; LMULMAX2-NEXT:    vmv.v.x v12, a1
; LMULMAX2-NEXT:    li a1, 57
; LMULMAX2-NEXT:    vmerge.vxm v12, v12, a1, v0
; LMULMAX2-NEXT:    vmulhu.vv v8, v8, v12
; LMULMAX2-NEXT:    vsrl.vv v8, v8, v10
; LMULMAX2-NEXT:    vse8.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-LABEL: mulhs_v32i8:
; LMULMAX1:       # %bb.0:
; LMULMAX1-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
; LMULMAX1-NEXT:    vle8.v v8, (a0)
; LMULMAX1-NEXT:    addi a1, a0, 16
; LMULMAX1-NEXT:    vle8.v v9, (a1)
; LMULMAX1-NEXT:    lui a2, 5
; LMULMAX1-NEXT:    addi a2, a2, -1452
; LMULMAX1-NEXT:    vmv.s.x v0, a2
; LMULMAX1-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
; LMULMAX1-NEXT:    vmv.v.i v10, -9
; LMULMAX1-NEXT:    vmerge.vim v10, v10, 9, v0
; LMULMAX1-NEXT:    vdivu.vv v9, v9, v10
; LMULMAX1-NEXT:    vdivu.vv v8, v8, v10
; LMULMAX1-NEXT:    vse8.v v8, (a0)
; LMULMAX1-NEXT:    vse8.v v9, (a1)
; LMULMAX1-NEXT:    ret
  %a = load <32 x i8>, ptr %x
  %b = udiv <32 x i8> %a, <i8 -9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 -9, i8 9, i8 -9, i8 -9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 -9, i8 9, i8 -9>
  store <32 x i8> %b, ptr %x
  ret void
}

define void @mulhs_v16i16(ptr %x) {
; LMULMAX2-LABEL: mulhs_v16i16:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
; LMULMAX2-NEXT:    vle16.v v8, (a0)
; LMULMAX2-NEXT:    lui a1, 5
; LMULMAX2-NEXT:    addi a1, a1, -1755
; LMULMAX2-NEXT:    vmv.v.x v10, a1
; LMULMAX2-NEXT:    lui a1, 7
; LMULMAX2-NEXT:    addi a1, a1, -1687
; LMULMAX2-NEXT:    vmv.s.x v0, a1
; LMULMAX2-NEXT:    lui a1, 1048571
; LMULMAX2-NEXT:    addi a1, a1, 1755
; LMULMAX2-NEXT:    vmerge.vxm v10, v10, a1, v0
; LMULMAX2-NEXT:    vmulh.vv v8, v8, v10
; LMULMAX2-NEXT:    vsra.vi v8, v8, 1
; LMULMAX2-NEXT:    vsrl.vi v10, v8, 15
; LMULMAX2-NEXT:    vadd.vv v8, v8, v10
; LMULMAX2-NEXT:    vse16.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-LABEL: mulhs_v16i16:
; LMULMAX1:       # %bb.0:
; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; LMULMAX1-NEXT:    vle16.v v8, (a0)
; LMULMAX1-NEXT:    addi a1, a0, 16
; LMULMAX1-NEXT:    vle16.v v9, (a1)
; LMULMAX1-NEXT:    li a2, 105
; LMULMAX1-NEXT:    vmv.s.x v0, a2
; LMULMAX1-NEXT:    vmv.v.i v10, 7
; LMULMAX1-NEXT:    vmerge.vim v10, v10, -7, v0
; LMULMAX1-NEXT:    vdiv.vv v9, v9, v10
; LMULMAX1-NEXT:    vdiv.vv v8, v8, v10
; LMULMAX1-NEXT:    vse16.v v8, (a0)
; LMULMAX1-NEXT:    vse16.v v9, (a1)
; LMULMAX1-NEXT:    ret
  %a = load <16 x i16>, ptr %x
  %b = sdiv <16 x i16> %a, <i16 -7, i16 7, i16 7, i16 -7, i16 7, i16 -7, i16 -7, i16 7, i16 -7, i16 7, i16 7, i16 -7, i16 7, i16 -7, i16 -7, i16 7>
  store <16 x i16> %b, ptr %x
  ret void
}

define void @mulhs_v8i32(ptr %x) {
; LMULMAX2-RV32-LABEL: mulhs_v8i32:
; LMULMAX2-RV32:       # %bb.0:
; LMULMAX2-RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
; LMULMAX2-RV32-NEXT:    vle32.v v8, (a0)
; LMULMAX2-RV32-NEXT:    lui a1, 419430
; LMULMAX2-RV32-NEXT:    addi a1, a1, 1639
; LMULMAX2-RV32-NEXT:    vmv.v.x v10, a1
; LMULMAX2-RV32-NEXT:    li a1, 85
; LMULMAX2-RV32-NEXT:    vmv.s.x v0, a1
; LMULMAX2-RV32-NEXT:    lui a1, 629146
; LMULMAX2-RV32-NEXT:    addi a1, a1, -1639
; LMULMAX2-RV32-NEXT:    vmerge.vxm v10, v10, a1, v0
; LMULMAX2-RV32-NEXT:    vmulh.vv v8, v8, v10
; LMULMAX2-RV32-NEXT:    vsrl.vi v10, v8, 31
; LMULMAX2-RV32-NEXT:    vsra.vi v8, v8, 1
; LMULMAX2-RV32-NEXT:    vadd.vv v8, v8, v10
; LMULMAX2-RV32-NEXT:    vse32.v v8, (a0)
; LMULMAX2-RV32-NEXT:    ret
;
; LMULMAX2-RV64-LABEL: mulhs_v8i32:
; LMULMAX2-RV64:       # %bb.0:
; LMULMAX2-RV64-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
; LMULMAX2-RV64-NEXT:    vle32.v v8, (a0)
; LMULMAX2-RV64-NEXT:    lui a1, %hi(.LCPI187_0)
; LMULMAX2-RV64-NEXT:    addi a1, a1, %lo(.LCPI187_0)
; LMULMAX2-RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
; LMULMAX2-RV64-NEXT:    vlse64.v v10, (a1), zero
; LMULMAX2-RV64-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
; LMULMAX2-RV64-NEXT:    vmulh.vv v8, v8, v10
; LMULMAX2-RV64-NEXT:    vsra.vi v8, v8, 1
; LMULMAX2-RV64-NEXT:    vsrl.vi v10, v8, 31
; LMULMAX2-RV64-NEXT:    vadd.vv v8, v8, v10
; LMULMAX2-RV64-NEXT:    vse32.v v8, (a0)
; LMULMAX2-RV64-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: mulhs_v8i32:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle32.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a1, a0, 16
; LMULMAX1-RV32-NEXT:    vle32.v v9, (a1)
; LMULMAX1-RV32-NEXT:    lui a2, 419430
; LMULMAX1-RV32-NEXT:    addi a2, a2, 1639
; LMULMAX1-RV32-NEXT:    vmv.v.x v10, a2
; LMULMAX1-RV32-NEXT:    vmv.v.i v0, 5
; LMULMAX1-RV32-NEXT:    lui a2, 629146
; LMULMAX1-RV32-NEXT:    addi a2, a2, -1639
; LMULMAX1-RV32-NEXT:    vmerge.vxm v10, v10, a2, v0
; LMULMAX1-RV32-NEXT:    vmulh.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vsrl.vi v11, v9, 31
; LMULMAX1-RV32-NEXT:    vsra.vi v9, v9, 1
; LMULMAX1-RV32-NEXT:    vadd.vv v9, v9, v11
; LMULMAX1-RV32-NEXT:    vmulh.vv v8, v8, v10
; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 31
; LMULMAX1-RV32-NEXT:    vsra.vi v8, v8, 1
; LMULMAX1-RV32-NEXT:    vadd.vv v8, v8, v10
; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse32.v v9, (a1)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: mulhs_v8i32:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle32.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a1, a0, 16
; LMULMAX1-RV64-NEXT:    vle32.v v9, (a1)
; LMULMAX1-RV64-NEXT:    li a2, 3
; LMULMAX1-RV64-NEXT:    slli a2, a2, 33
; LMULMAX1-RV64-NEXT:    addi a2, a2, -5
; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vmv.v.x v10, a2
; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vdiv.vv v9, v9, v10
; LMULMAX1-RV64-NEXT:    vdiv.vv v8, v8, v10
; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse32.v v9, (a1)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <8 x i32>, ptr %x
  %b = sdiv <8 x i32> %a, <i32 -5, i32 5, i32 -5, i32 5, i32 -5, i32 5, i32 -5, i32 5>
  store <8 x i32> %b, ptr %x
  ret void
}

define void @mulhs_v4i64(ptr %x) {
; LMULMAX2-RV32-LABEL: mulhs_v4i64:
; LMULMAX2-RV32:       # %bb.0:
; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
; LMULMAX2-RV32-NEXT:    vle64.v v8, (a0)
; LMULMAX2-RV32-NEXT:    lui a1, 349525
; LMULMAX2-RV32-NEXT:    addi a2, a1, 1365
; LMULMAX2-RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
; LMULMAX2-RV32-NEXT:    vmv.v.x v10, a2
; LMULMAX2-RV32-NEXT:    li a2, 17
; LMULMAX2-RV32-NEXT:    vmv.s.x v0, a2
; LMULMAX2-RV32-NEXT:    addi a1, a1, 1366
; LMULMAX2-RV32-NEXT:    vmerge.vxm v10, v10, a1, v0
; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
; LMULMAX2-RV32-NEXT:    vmulh.vv v10, v8, v10
; LMULMAX2-RV32-NEXT:    lui a1, 1048560
; LMULMAX2-RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
; LMULMAX2-RV32-NEXT:    vmv.v.x v12, a1
; LMULMAX2-RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
; LMULMAX2-RV32-NEXT:    vsext.vf4 v14, v12
; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
; LMULMAX2-RV32-NEXT:    vmadd.vv v14, v8, v10
; LMULMAX2-RV32-NEXT:    li a1, 63
; LMULMAX2-RV32-NEXT:    vsrl.vx v8, v14, a1
; LMULMAX2-RV32-NEXT:    lui a1, 16
; LMULMAX2-RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
; LMULMAX2-RV32-NEXT:    vmv.v.x v10, a1
; LMULMAX2-RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
; LMULMAX2-RV32-NEXT:    vsext.vf4 v12, v10
; LMULMAX2-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
; LMULMAX2-RV32-NEXT:    vsra.vv v10, v14, v12
; LMULMAX2-RV32-NEXT:    vadd.vv v8, v10, v8
; LMULMAX2-RV32-NEXT:    vse64.v v8, (a0)
; LMULMAX2-RV32-NEXT:    ret
;
; LMULMAX2-RV64-LABEL: mulhs_v4i64:
; LMULMAX2-RV64:       # %bb.0:
; LMULMAX2-RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
; LMULMAX2-RV64-NEXT:    vle64.v v8, (a0)
; LMULMAX2-RV64-NEXT:    lui a1, 349525
; LMULMAX2-RV64-NEXT:    addiw a1, a1, 1365
; LMULMAX2-RV64-NEXT:    slli a2, a1, 32
; LMULMAX2-RV64-NEXT:    add a1, a1, a2
; LMULMAX2-RV64-NEXT:    vmv.v.x v10, a1
; LMULMAX2-RV64-NEXT:    lui a1, %hi(.LCPI188_0)
; LMULMAX2-RV64-NEXT:    ld a1, %lo(.LCPI188_0)(a1)
; LMULMAX2-RV64-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
; LMULMAX2-RV64-NEXT:    vmv.v.i v0, 5
; LMULMAX2-RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
; LMULMAX2-RV64-NEXT:    vmerge.vxm v10, v10, a1, v0
; LMULMAX2-RV64-NEXT:    vmulh.vv v10, v8, v10
; LMULMAX2-RV64-NEXT:    lui a1, 1044496
; LMULMAX2-RV64-NEXT:    addi a1, a1, -256
; LMULMAX2-RV64-NEXT:    vmv.s.x v12, a1
; LMULMAX2-RV64-NEXT:    vsext.vf8 v14, v12
; LMULMAX2-RV64-NEXT:    vmadd.vv v14, v8, v10
; LMULMAX2-RV64-NEXT:    li a1, 63
; LMULMAX2-RV64-NEXT:    vsrl.vx v8, v14, a1
; LMULMAX2-RV64-NEXT:    lui a1, 4096
; LMULMAX2-RV64-NEXT:    addi a1, a1, 256
; LMULMAX2-RV64-NEXT:    vmv.s.x v10, a1
; LMULMAX2-RV64-NEXT:    vsext.vf8 v12, v10
; LMULMAX2-RV64-NEXT:    vsra.vv v10, v14, v12
; LMULMAX2-RV64-NEXT:    vadd.vv v8, v10, v8
; LMULMAX2-RV64-NEXT:    vse64.v v8, (a0)
; LMULMAX2-RV64-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: mulhs_v4i64:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a1, a0, 16
; LMULMAX1-RV32-NEXT:    vle64.v v9, (a1)
; LMULMAX1-RV32-NEXT:    lui a2, 1048528
; LMULMAX1-RV32-NEXT:    addi a2, a2, 3
; LMULMAX1-RV32-NEXT:    vmv.s.x v10, a2
; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vsext.vf4 v11, v10
; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vdiv.vv v9, v9, v11
; LMULMAX1-RV32-NEXT:    vdiv.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse64.v v9, (a1)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: mulhs_v4i64:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a1, a0, 16
; LMULMAX1-RV64-NEXT:    lui a2, 349525
; LMULMAX1-RV64-NEXT:    addiw a2, a2, 1365
; LMULMAX1-RV64-NEXT:    slli a3, a2, 32
; LMULMAX1-RV64-NEXT:    add a2, a2, a3
; LMULMAX1-RV64-NEXT:    lui a3, %hi(.LCPI188_0)
; LMULMAX1-RV64-NEXT:    ld a3, %lo(.LCPI188_0)(a3)
; LMULMAX1-RV64-NEXT:    vle64.v v9, (a1)
; LMULMAX1-RV64-NEXT:    vmv.v.x v10, a2
; LMULMAX1-RV64-NEXT:    vsetvli zero, zero, e64, m1, tu, ma
; LMULMAX1-RV64-NEXT:    vmv.s.x v10, a3
; LMULMAX1-RV64-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vmulh.vv v11, v9, v10
; LMULMAX1-RV64-NEXT:    vid.v v12
; LMULMAX1-RV64-NEXT:    vrsub.vi v13, v12, 0
; LMULMAX1-RV64-NEXT:    vmacc.vv v11, v13, v9
; LMULMAX1-RV64-NEXT:    li a2, 63
; LMULMAX1-RV64-NEXT:    vsrl.vx v9, v11, a2
; LMULMAX1-RV64-NEXT:    vsra.vv v11, v11, v12
; LMULMAX1-RV64-NEXT:    vadd.vv v9, v11, v9
; LMULMAX1-RV64-NEXT:    vmulh.vv v10, v8, v10
; LMULMAX1-RV64-NEXT:    vmacc.vv v10, v8, v13
; LMULMAX1-RV64-NEXT:    vsrl.vx v8, v10, a2
; LMULMAX1-RV64-NEXT:    vsra.vv v10, v10, v12
; LMULMAX1-RV64-NEXT:    vadd.vv v8, v10, v8
; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse64.v v9, (a1)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <4 x i64>, ptr %x
  %b = sdiv <4 x i64> %a, <i64 3, i64 -3, i64 3, i64 -3>
  store <4 x i64> %b, ptr %x
  ret void
}

define void @smin_v32i8(ptr %x, ptr %y) {
; LMULMAX2-LABEL: smin_v32i8:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    li a2, 32
; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
; LMULMAX2-NEXT:    vle8.v v8, (a0)
; LMULMAX2-NEXT:    vle8.v v10, (a1)
; LMULMAX2-NEXT:    vmin.vv v8, v8, v10
; LMULMAX2-NEXT:    vse8.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: smin_v32i8:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle8.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle8.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle8.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle8.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vmin.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vmin.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse8.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse8.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: smin_v32i8:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle8.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle8.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle8.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle8.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vmin.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vmin.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse8.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse8.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <32 x i8>, ptr %x
  %b = load <32 x i8>, ptr %y
  %cc = icmp slt <32 x i8> %a, %b
  %c = select <32 x i1> %cc, <32 x i8> %a, <32 x i8> %b
  store <32 x i8> %c, ptr %x
  ret void
}

define void @smin_v16i16(ptr %x, ptr %y) {
; LMULMAX2-LABEL: smin_v16i16:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
; LMULMAX2-NEXT:    vle16.v v8, (a0)
; LMULMAX2-NEXT:    vle16.v v10, (a1)
; LMULMAX2-NEXT:    vmin.vv v8, v8, v10
; LMULMAX2-NEXT:    vse16.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: smin_v16i16:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle16.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle16.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle16.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vmin.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vmin.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse16.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: smin_v16i16:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle16.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle16.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle16.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vmin.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vmin.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse16.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse16.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <16 x i16>, ptr %x
  %b = load <16 x i16>, ptr %y
  %cc = icmp slt <16 x i16> %a, %b
  %c = select <16 x i1> %cc, <16 x i16> %a, <16 x i16> %b
  store <16 x i16> %c, ptr %x
  ret void
}

define void @smin_v8i32(ptr %x, ptr %y) {
; LMULMAX2-LABEL: smin_v8i32:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
; LMULMAX2-NEXT:    vle32.v v8, (a0)
; LMULMAX2-NEXT:    vle32.v v10, (a1)
; LMULMAX2-NEXT:    vmin.vv v8, v8, v10
; LMULMAX2-NEXT:    vse32.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: smin_v8i32:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle32.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle32.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle32.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle32.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vmin.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vmin.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse32.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: smin_v8i32:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle32.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle32.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle32.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle32.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vmin.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vmin.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse32.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <8 x i32>, ptr %x
  %b = load <8 x i32>, ptr %y
  %cc = icmp slt <8 x i32> %a, %b
  %c = select <8 x i1> %cc, <8 x i32> %a, <8 x i32> %b
  store <8 x i32> %c, ptr %x
  ret void
}

define void @smin_v4i64(ptr %x, ptr %y) {
; LMULMAX2-LABEL: smin_v4i64:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
; LMULMAX2-NEXT:    vle64.v v8, (a0)
; LMULMAX2-NEXT:    vle64.v v10, (a1)
; LMULMAX2-NEXT:    vmin.vv v8, v8, v10
; LMULMAX2-NEXT:    vse64.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: smin_v4i64:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle64.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle64.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle64.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vmin.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vmin.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse64.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: smin_v4i64:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle64.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle64.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle64.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vmin.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vmin.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse64.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <4 x i64>, ptr %x
  %b = load <4 x i64>, ptr %y
  %cc = icmp slt <4 x i64> %a, %b
  %c = select <4 x i1> %cc, <4 x i64> %a, <4 x i64> %b
  store <4 x i64> %c, ptr %x
  ret void
}

define void @smax_v32i8(ptr %x, ptr %y) {
; LMULMAX2-LABEL: smax_v32i8:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    li a2, 32
; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
; LMULMAX2-NEXT:    vle8.v v8, (a0)
; LMULMAX2-NEXT:    vle8.v v10, (a1)
; LMULMAX2-NEXT:    vmax.vv v8, v8, v10
; LMULMAX2-NEXT:    vse8.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: smax_v32i8:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle8.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle8.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle8.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle8.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vmax.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vmax.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse8.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse8.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: smax_v32i8:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle8.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle8.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle8.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle8.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vmax.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vmax.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse8.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse8.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <32 x i8>, ptr %x
  %b = load <32 x i8>, ptr %y
  %cc = icmp sgt <32 x i8> %a, %b
  %c = select <32 x i1> %cc, <32 x i8> %a, <32 x i8> %b
  store <32 x i8> %c, ptr %x
  ret void
}

define void @smax_v16i16(ptr %x, ptr %y) {
; LMULMAX2-LABEL: smax_v16i16:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
; LMULMAX2-NEXT:    vle16.v v8, (a0)
; LMULMAX2-NEXT:    vle16.v v10, (a1)
; LMULMAX2-NEXT:    vmax.vv v8, v8, v10
; LMULMAX2-NEXT:    vse16.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: smax_v16i16:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle16.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle16.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle16.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vmax.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vmax.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse16.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: smax_v16i16:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle16.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle16.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle16.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vmax.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vmax.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse16.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse16.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <16 x i16>, ptr %x
  %b = load <16 x i16>, ptr %y
  %cc = icmp sgt <16 x i16> %a, %b
  %c = select <16 x i1> %cc, <16 x i16> %a, <16 x i16> %b
  store <16 x i16> %c, ptr %x
  ret void
}

define void @smax_v8i32(ptr %x, ptr %y) {
; LMULMAX2-LABEL: smax_v8i32:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
; LMULMAX2-NEXT:    vle32.v v8, (a0)
; LMULMAX2-NEXT:    vle32.v v10, (a1)
; LMULMAX2-NEXT:    vmax.vv v8, v8, v10
; LMULMAX2-NEXT:    vse32.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: smax_v8i32:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle32.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle32.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle32.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle32.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vmax.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vmax.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse32.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: smax_v8i32:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle32.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle32.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle32.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle32.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vmax.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vmax.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse32.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <8 x i32>, ptr %x
  %b = load <8 x i32>, ptr %y
  %cc = icmp sgt <8 x i32> %a, %b
  %c = select <8 x i1> %cc, <8 x i32> %a, <8 x i32> %b
  store <8 x i32> %c, ptr %x
  ret void
}

define void @smax_v4i64(ptr %x, ptr %y) {
; LMULMAX2-LABEL: smax_v4i64:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
; LMULMAX2-NEXT:    vle64.v v8, (a0)
; LMULMAX2-NEXT:    vle64.v v10, (a1)
; LMULMAX2-NEXT:    vmax.vv v8, v8, v10
; LMULMAX2-NEXT:    vse64.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: smax_v4i64:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle64.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle64.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle64.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vmax.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vmax.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse64.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: smax_v4i64:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle64.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle64.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle64.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vmax.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vmax.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse64.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <4 x i64>, ptr %x
  %b = load <4 x i64>, ptr %y
  %cc = icmp sgt <4 x i64> %a, %b
  %c = select <4 x i1> %cc, <4 x i64> %a, <4 x i64> %b
  store <4 x i64> %c, ptr %x
  ret void
}

define void @umin_v32i8(ptr %x, ptr %y) {
; LMULMAX2-LABEL: umin_v32i8:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    li a2, 32
; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
; LMULMAX2-NEXT:    vle8.v v8, (a0)
; LMULMAX2-NEXT:    vle8.v v10, (a1)
; LMULMAX2-NEXT:    vminu.vv v8, v8, v10
; LMULMAX2-NEXT:    vse8.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: umin_v32i8:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle8.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle8.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle8.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle8.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vminu.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vminu.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse8.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse8.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: umin_v32i8:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle8.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle8.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle8.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle8.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vminu.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vminu.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse8.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse8.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <32 x i8>, ptr %x
  %b = load <32 x i8>, ptr %y
  %cc = icmp ult <32 x i8> %a, %b
  %c = select <32 x i1> %cc, <32 x i8> %a, <32 x i8> %b
  store <32 x i8> %c, ptr %x
  ret void
}

define void @umin_v16i16(ptr %x, ptr %y) {
; LMULMAX2-LABEL: umin_v16i16:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
; LMULMAX2-NEXT:    vle16.v v8, (a0)
; LMULMAX2-NEXT:    vle16.v v10, (a1)
; LMULMAX2-NEXT:    vminu.vv v8, v8, v10
; LMULMAX2-NEXT:    vse16.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: umin_v16i16:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle16.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle16.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle16.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vminu.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vminu.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse16.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: umin_v16i16:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle16.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle16.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle16.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vminu.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vminu.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse16.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse16.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <16 x i16>, ptr %x
  %b = load <16 x i16>, ptr %y
  %cc = icmp ult <16 x i16> %a, %b
  %c = select <16 x i1> %cc, <16 x i16> %a, <16 x i16> %b
  store <16 x i16> %c, ptr %x
  ret void
}

define void @umin_v8i32(ptr %x, ptr %y) {
; LMULMAX2-LABEL: umin_v8i32:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
; LMULMAX2-NEXT:    vle32.v v8, (a0)
; LMULMAX2-NEXT:    vle32.v v10, (a1)
; LMULMAX2-NEXT:    vminu.vv v8, v8, v10
; LMULMAX2-NEXT:    vse32.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: umin_v8i32:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle32.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle32.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle32.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle32.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vminu.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vminu.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse32.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: umin_v8i32:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle32.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle32.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle32.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle32.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vminu.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vminu.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse32.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <8 x i32>, ptr %x
  %b = load <8 x i32>, ptr %y
  %cc = icmp ult <8 x i32> %a, %b
  %c = select <8 x i1> %cc, <8 x i32> %a, <8 x i32> %b
  store <8 x i32> %c, ptr %x
  ret void
}

define void @umin_v4i64(ptr %x, ptr %y) {
; LMULMAX2-LABEL: umin_v4i64:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
; LMULMAX2-NEXT:    vle64.v v8, (a0)
; LMULMAX2-NEXT:    vle64.v v10, (a1)
; LMULMAX2-NEXT:    vminu.vv v8, v8, v10
; LMULMAX2-NEXT:    vse64.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: umin_v4i64:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle64.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle64.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle64.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vminu.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vminu.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse64.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: umin_v4i64:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle64.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle64.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle64.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vminu.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vminu.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse64.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <4 x i64>, ptr %x
  %b = load <4 x i64>, ptr %y
  %cc = icmp ult <4 x i64> %a, %b
  %c = select <4 x i1> %cc, <4 x i64> %a, <4 x i64> %b
  store <4 x i64> %c, ptr %x
  ret void
}

define void @umax_v32i8(ptr %x, ptr %y) {
; LMULMAX2-LABEL: umax_v32i8:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    li a2, 32
; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
; LMULMAX2-NEXT:    vle8.v v8, (a0)
; LMULMAX2-NEXT:    vle8.v v10, (a1)
; LMULMAX2-NEXT:    vmaxu.vv v8, v8, v10
; LMULMAX2-NEXT:    vse8.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: umax_v32i8:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle8.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle8.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle8.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle8.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vmaxu.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vmaxu.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse8.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse8.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: umax_v32i8:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle8.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle8.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle8.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle8.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vmaxu.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vmaxu.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse8.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse8.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <32 x i8>, ptr %x
  %b = load <32 x i8>, ptr %y
  %cc = icmp ugt <32 x i8> %a, %b
  %c = select <32 x i1> %cc, <32 x i8> %a, <32 x i8> %b
  store <32 x i8> %c, ptr %x
  ret void
}

define void @umax_v16i16(ptr %x, ptr %y) {
; LMULMAX2-LABEL: umax_v16i16:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
; LMULMAX2-NEXT:    vle16.v v8, (a0)
; LMULMAX2-NEXT:    vle16.v v10, (a1)
; LMULMAX2-NEXT:    vmaxu.vv v8, v8, v10
; LMULMAX2-NEXT:    vse16.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: umax_v16i16:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle16.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle16.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle16.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle16.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vmaxu.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vmaxu.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse16.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse16.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: umax_v16i16:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle16.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle16.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle16.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle16.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vmaxu.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vmaxu.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse16.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse16.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <16 x i16>, ptr %x
  %b = load <16 x i16>, ptr %y
  %cc = icmp ugt <16 x i16> %a, %b
  %c = select <16 x i1> %cc, <16 x i16> %a, <16 x i16> %b
  store <16 x i16> %c, ptr %x
  ret void
}

define void @umax_v8i32(ptr %x, ptr %y) {
; LMULMAX2-LABEL: umax_v8i32:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
; LMULMAX2-NEXT:    vle32.v v8, (a0)
; LMULMAX2-NEXT:    vle32.v v10, (a1)
; LMULMAX2-NEXT:    vmaxu.vv v8, v8, v10
; LMULMAX2-NEXT:    vse32.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: umax_v8i32:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle32.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle32.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle32.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle32.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vmaxu.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vmaxu.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse32.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse32.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: umax_v8i32:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle32.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle32.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle32.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle32.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vmaxu.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vmaxu.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse32.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <8 x i32>, ptr %x
  %b = load <8 x i32>, ptr %y
  %cc = icmp ugt <8 x i32> %a, %b
  %c = select <8 x i1> %cc, <8 x i32> %a, <8 x i32> %b
  store <8 x i32> %c, ptr %x
  ret void
}

define void @umax_v4i64(ptr %x, ptr %y) {
; LMULMAX2-LABEL: umax_v4i64:
; LMULMAX2:       # %bb.0:
; LMULMAX2-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
; LMULMAX2-NEXT:    vle64.v v8, (a0)
; LMULMAX2-NEXT:    vle64.v v10, (a1)
; LMULMAX2-NEXT:    vmaxu.vv v8, v8, v10
; LMULMAX2-NEXT:    vse64.v v8, (a0)
; LMULMAX2-NEXT:    ret
;
; LMULMAX1-RV32-LABEL: umax_v4i64:
; LMULMAX1-RV32:       # %bb.0:
; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
; LMULMAX1-RV32-NEXT:    addi a2, a0, 16
; LMULMAX1-RV32-NEXT:    vle64.v v9, (a2)
; LMULMAX1-RV32-NEXT:    addi a3, a1, 16
; LMULMAX1-RV32-NEXT:    vle64.v v10, (a3)
; LMULMAX1-RV32-NEXT:    vle64.v v11, (a1)
; LMULMAX1-RV32-NEXT:    vmaxu.vv v9, v9, v10
; LMULMAX1-RV32-NEXT:    vmaxu.vv v8, v8, v11
; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
; LMULMAX1-RV32-NEXT:    vse64.v v9, (a2)
; LMULMAX1-RV32-NEXT:    ret
;
; LMULMAX1-RV64-LABEL: umax_v4i64:
; LMULMAX1-RV64:       # %bb.0:
; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
; LMULMAX1-RV64-NEXT:    addi a2, a1, 16
; LMULMAX1-RV64-NEXT:    vle64.v v9, (a2)
; LMULMAX1-RV64-NEXT:    addi a2, a0, 16
; LMULMAX1-RV64-NEXT:    vle64.v v10, (a2)
; LMULMAX1-RV64-NEXT:    vle64.v v11, (a1)
; LMULMAX1-RV64-NEXT:    vmaxu.vv v9, v10, v9
; LMULMAX1-RV64-NEXT:    vmaxu.vv v8, v8, v11
; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
; LMULMAX1-RV64-NEXT:    vse64.v v9, (a2)
; LMULMAX1-RV64-NEXT:    ret
  %a = load <4 x i64>, ptr %x
  %b = load <4 x i64>, ptr %y
  %cc = icmp ugt <4 x i64> %a, %b
  %c = select <4 x i1> %cc, <4 x i64> %a, <4 x i64> %b
  store <4 x i64> %c, ptr %x
  ret void
}

define void @add_vi_v16i8(ptr %x) {
; CHECK-LABEL: add_vi_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vadd.vi v8, v8, -1
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = insertelement <16 x i8> poison, i8 -1, i32 0
  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
  %d = add <16 x i8> %a, %c
  store <16 x i8> %d, ptr %x
  ret void
}

define void @add_vi_v8i16(ptr %x) {
; CHECK-LABEL: add_vi_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vadd.vi v8, v8, -1
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = insertelement <8 x i16> poison, i16 -1, i32 0
  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
  %d = add <8 x i16> %a, %c
  store <8 x i16> %d, ptr %x
  ret void
}

define void @add_vi_v4i32(ptr %x) {
; CHECK-LABEL: add_vi_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vadd.vi v8, v8, -1
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = insertelement <4 x i32> poison, i32 -1, i32 0
  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
  %d = add <4 x i32> %a, %c
  store <4 x i32> %d, ptr %x
  ret void
}

define void @add_vi_v2i64(ptr %x) {
; CHECK-LABEL: add_vi_v2i64:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT:    vle64.v v8, (a0)
; CHECK-NEXT:    vadd.vi v8, v8, -1
; CHECK-NEXT:    vse64.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <2 x i64>, ptr %x
  %b = insertelement <2 x i64> poison, i64 -1, i32 0
  %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer
  %d = add <2 x i64> %a, %c
  store <2 x i64> %d, ptr %x
  ret void
}

define void @add_iv_v16i8(ptr %x) {
; CHECK-LABEL: add_iv_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vadd.vi v8, v8, 1
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = insertelement <16 x i8> poison, i8 1, i32 0
  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
  %d = add <16 x i8> %c, %a
  store <16 x i8> %d, ptr %x
  ret void
}

define void @add_iv_v8i16(ptr %x) {
; CHECK-LABEL: add_iv_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vadd.vi v8, v8, 1
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = insertelement <8 x i16> poison, i16 1, i32 0
  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
  %d = add <8 x i16> %c, %a
  store <8 x i16> %d, ptr %x
  ret void
}

define void @add_iv_v4i32(ptr %x) {
; CHECK-LABEL: add_iv_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vadd.vi v8, v8, 1
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = insertelement <4 x i32> poison, i32 1, i32 0
  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
  %d = add <4 x i32> %c, %a
  store <4 x i32> %d, ptr %x
  ret void
}

define void @add_iv_v2i64(ptr %x) {
; CHECK-LABEL: add_iv_v2i64:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT:    vle64.v v8, (a0)
; CHECK-NEXT:    vadd.vi v8, v8, 1
; CHECK-NEXT:    vse64.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <2 x i64>, ptr %x
  %b = insertelement <2 x i64> poison, i64 1, i32 0
  %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer
  %d = add <2 x i64> %c, %a
  store <2 x i64> %d, ptr %x
  ret void
}

define void @add_vx_v16i8(ptr %x, i8 %y) {
; CHECK-LABEL: add_vx_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vadd.vx v8, v8, a1
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = insertelement <16 x i8> poison, i8 %y, i32 0
  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
  %d = add <16 x i8> %a, %c
  store <16 x i8> %d, ptr %x
  ret void
}

define void @add_vx_v8i16(ptr %x, i16 %y) {
; CHECK-LABEL: add_vx_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vadd.vx v8, v8, a1
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = insertelement <8 x i16> poison, i16 %y, i32 0
  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
  %d = add <8 x i16> %a, %c
  store <8 x i16> %d, ptr %x
  ret void
}

define void @add_vx_v4i32(ptr %x, i32 %y) {
; CHECK-LABEL: add_vx_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vadd.vx v8, v8, a1
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = insertelement <4 x i32> poison, i32 %y, i32 0
  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
  %d = add <4 x i32> %a, %c
  store <4 x i32> %d, ptr %x
  ret void
}

define void @add_xv_v16i8(ptr %x, i8 %y) {
; CHECK-LABEL: add_xv_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vadd.vx v8, v8, a1
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = insertelement <16 x i8> poison, i8 %y, i32 0
  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
  %d = add <16 x i8> %c, %a
  store <16 x i8> %d, ptr %x
  ret void
}

define void @add_xv_v8i16(ptr %x, i16 %y) {
; CHECK-LABEL: add_xv_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vadd.vx v8, v8, a1
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = insertelement <8 x i16> poison, i16 %y, i32 0
  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
  %d = add <8 x i16> %c, %a
  store <8 x i16> %d, ptr %x
  ret void
}

define void @add_xv_v4i32(ptr %x, i32 %y) {
; CHECK-LABEL: add_xv_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vadd.vx v8, v8, a1
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = insertelement <4 x i32> poison, i32 %y, i32 0
  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
  %d = add <4 x i32> %c, %a
  store <4 x i32> %d, ptr %x
  ret void
}

define void @sub_vi_v16i8(ptr %x) {
; CHECK-LABEL: sub_vi_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    li a1, -1
; CHECK-NEXT:    vsub.vx v8, v8, a1
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = insertelement <16 x i8> poison, i8 -1, i32 0
  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
  %d = sub <16 x i8> %a, %c
  store <16 x i8> %d, ptr %x
  ret void
}

define void @sub_vi_v8i16(ptr %x) {
; CHECK-LABEL: sub_vi_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    li a1, -1
; CHECK-NEXT:    vsub.vx v8, v8, a1
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = insertelement <8 x i16> poison, i16 -1, i32 0
  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
  %d = sub <8 x i16> %a, %c
  store <8 x i16> %d, ptr %x
  ret void
}

define void @sub_vi_v4i32(ptr %x) {
; CHECK-LABEL: sub_vi_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    li a1, -1
; CHECK-NEXT:    vsub.vx v8, v8, a1
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = insertelement <4 x i32> poison, i32 -1, i32 0
  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
  %d = sub <4 x i32> %a, %c
  store <4 x i32> %d, ptr %x
  ret void
}

define void @sub_vi_v2i64(ptr %x) {
; CHECK-LABEL: sub_vi_v2i64:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT:    vle64.v v8, (a0)
; CHECK-NEXT:    li a1, -1
; CHECK-NEXT:    vsub.vx v8, v8, a1
; CHECK-NEXT:    vse64.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <2 x i64>, ptr %x
  %b = insertelement <2 x i64> poison, i64 -1, i32 0
  %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer
  %d = sub <2 x i64> %a, %c
  store <2 x i64> %d, ptr %x
  ret void
}

define void @sub_iv_v16i8(ptr %x) {
; CHECK-LABEL: sub_iv_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vrsub.vi v8, v8, 1
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = insertelement <16 x i8> poison, i8 1, i32 0
  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
  %d = sub <16 x i8> %c, %a
  store <16 x i8> %d, ptr %x
  ret void
}

define void @sub_iv_v8i16(ptr %x) {
; CHECK-LABEL: sub_iv_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vrsub.vi v8, v8, 1
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = insertelement <8 x i16> poison, i16 1, i32 0
  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
  %d = sub <8 x i16> %c, %a
  store <8 x i16> %d, ptr %x
  ret void
}

define void @sub_iv_v4i32(ptr %x) {
; CHECK-LABEL: sub_iv_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vrsub.vi v8, v8, 1
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = insertelement <4 x i32> poison, i32 1, i32 0
  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
  %d = sub <4 x i32> %c, %a
  store <4 x i32> %d, ptr %x
  ret void
}

define void @sub_iv_v2i64(ptr %x) {
; CHECK-LABEL: sub_iv_v2i64:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT:    vle64.v v8, (a0)
; CHECK-NEXT:    vrsub.vi v8, v8, 1
; CHECK-NEXT:    vse64.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <2 x i64>, ptr %x
  %b = insertelement <2 x i64> poison, i64 1, i32 0
  %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer
  %d = sub <2 x i64> %c, %a
  store <2 x i64> %d, ptr %x
  ret void
}

define void @sub_vx_v16i8(ptr %x, i8 %y) {
; CHECK-LABEL: sub_vx_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vsub.vx v8, v8, a1
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = insertelement <16 x i8> poison, i8 %y, i32 0
  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
  %d = sub <16 x i8> %a, %c
  store <16 x i8> %d, ptr %x
  ret void
}

define void @sub_vx_v8i16(ptr %x, i16 %y) {
; CHECK-LABEL: sub_vx_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vsub.vx v8, v8, a1
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = insertelement <8 x i16> poison, i16 %y, i32 0
  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
  %d = sub <8 x i16> %a, %c
  store <8 x i16> %d, ptr %x
  ret void
}

define void @sub_vx_v4i32(ptr %x, i32 %y) {
; CHECK-LABEL: sub_vx_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vsub.vx v8, v8, a1
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = insertelement <4 x i32> poison, i32 %y, i32 0
  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
  %d = sub <4 x i32> %a, %c
  store <4 x i32> %d, ptr %x
  ret void
}

define void @sub_xv_v16i8(ptr %x, i8 %y) {
; CHECK-LABEL: sub_xv_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vrsub.vx v8, v8, a1
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = insertelement <16 x i8> poison, i8 %y, i32 0
  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
  %d = sub <16 x i8> %c, %a
  store <16 x i8> %d, ptr %x
  ret void
}

define void @sub_xv_v8i16(ptr %x, i16 %y) {
; CHECK-LABEL: sub_xv_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vrsub.vx v8, v8, a1
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = insertelement <8 x i16> poison, i16 %y, i32 0
  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
  %d = sub <8 x i16> %c, %a
  store <8 x i16> %d, ptr %x
  ret void
}

define void @sub_xv_v4i32(ptr %x, i32 %y) {
; CHECK-LABEL: sub_xv_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vrsub.vx v8, v8, a1
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = insertelement <4 x i32> poison, i32 %y, i32 0
  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
  %d = sub <4 x i32> %c, %a
  store <4 x i32> %d, ptr %x
  ret void
}

define void @mul_vx_v16i8(ptr %x, i8 %y) {
; CHECK-LABEL: mul_vx_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vmul.vx v8, v8, a1
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = insertelement <16 x i8> poison, i8 %y, i32 0
  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
  %d = mul <16 x i8> %a, %c
  store <16 x i8> %d, ptr %x
  ret void
}

define void @mul_vx_v8i16(ptr %x, i16 %y) {
; CHECK-LABEL: mul_vx_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vmul.vx v8, v8, a1
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = insertelement <8 x i16> poison, i16 %y, i32 0
  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
  %d = mul <8 x i16> %a, %c
  store <8 x i16> %d, ptr %x
  ret void
}

define void @mul_vx_v4i32(ptr %x, i32 %y) {
; CHECK-LABEL: mul_vx_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vmul.vx v8, v8, a1
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = insertelement <4 x i32> poison, i32 %y, i32 0
  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
  %d = mul <4 x i32> %a, %c
  store <4 x i32> %d, ptr %x
  ret void
}

define void @mul_xv_v16i8(ptr %x, i8 %y) {
; CHECK-LABEL: mul_xv_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vmul.vx v8, v8, a1
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = insertelement <16 x i8> poison, i8 %y, i32 0
  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
  %d = mul <16 x i8> %c, %a
  store <16 x i8> %d, ptr %x
  ret void
}

define void @mul_xv_v8i16(ptr %x, i16 %y) {
; CHECK-LABEL: mul_xv_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vmul.vx v8, v8, a1
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = insertelement <8 x i16> poison, i16 %y, i32 0
  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
  %d = mul <8 x i16> %c, %a
  store <8 x i16> %d, ptr %x
  ret void
}

define void @mul_xv_v4i32(ptr %x, i32 %y) {
; CHECK-LABEL: mul_xv_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vmul.vx v8, v8, a1
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = insertelement <4 x i32> poison, i32 %y, i32 0
  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
  %d = mul <4 x i32> %c, %a
  store <4 x i32> %d, ptr %x
  ret void
}

define void @and_vi_v16i8(ptr %x) {
; CHECK-LABEL: and_vi_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vand.vi v8, v8, -2
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = insertelement <16 x i8> poison, i8 -2, i32 0
  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
  %d = and <16 x i8> %a, %c
  store <16 x i8> %d, ptr %x
  ret void
}

define void @and_vi_v8i16(ptr %x) {
; CHECK-LABEL: and_vi_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vand.vi v8, v8, -2
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = insertelement <8 x i16> poison, i16 -2, i32 0
  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
  %d = and <8 x i16> %a, %c
  store <8 x i16> %d, ptr %x
  ret void
}

define void @and_vi_v4i32(ptr %x) {
; CHECK-LABEL: and_vi_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vand.vi v8, v8, -2
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = insertelement <4 x i32> poison, i32 -2, i32 0
  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
  %d = and <4 x i32> %a, %c
  store <4 x i32> %d, ptr %x
  ret void
}

define void @and_vi_v2i64(ptr %x) {
; CHECK-LABEL: and_vi_v2i64:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT:    vle64.v v8, (a0)
; CHECK-NEXT:    vand.vi v8, v8, -2
; CHECK-NEXT:    vse64.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <2 x i64>, ptr %x
  %b = insertelement <2 x i64> poison, i64 -2, i32 0
  %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer
  %d = and <2 x i64> %a, %c
  store <2 x i64> %d, ptr %x
  ret void
}

define void @and_iv_v16i8(ptr %x) {
; CHECK-LABEL: and_iv_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vand.vi v8, v8, 1
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = insertelement <16 x i8> poison, i8 1, i32 0
  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
  %d = and <16 x i8> %c, %a
  store <16 x i8> %d, ptr %x
  ret void
}

define void @and_iv_v8i16(ptr %x) {
; CHECK-LABEL: and_iv_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vand.vi v8, v8, 1
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = insertelement <8 x i16> poison, i16 1, i32 0
  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
  %d = and <8 x i16> %c, %a
  store <8 x i16> %d, ptr %x
  ret void
}

define void @and_iv_v4i32(ptr %x) {
; CHECK-LABEL: and_iv_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vand.vi v8, v8, 1
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = insertelement <4 x i32> poison, i32 1, i32 0
  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
  %d = and <4 x i32> %c, %a
  store <4 x i32> %d, ptr %x
  ret void
}

define void @and_iv_v2i64(ptr %x) {
; CHECK-LABEL: and_iv_v2i64:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT:    vle64.v v8, (a0)
; CHECK-NEXT:    vand.vi v8, v8, 1
; CHECK-NEXT:    vse64.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <2 x i64>, ptr %x
  %b = insertelement <2 x i64> poison, i64 1, i32 0
  %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer
  %d = and <2 x i64> %c, %a
  store <2 x i64> %d, ptr %x
  ret void
}

define void @and_vx_v16i8(ptr %x, i8 %y) {
; CHECK-LABEL: and_vx_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vand.vx v8, v8, a1
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = insertelement <16 x i8> poison, i8 %y, i32 0
  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
  %d = and <16 x i8> %a, %c
  store <16 x i8> %d, ptr %x
  ret void
}

define void @and_vx_v8i16(ptr %x, i16 %y) {
; CHECK-LABEL: and_vx_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vand.vx v8, v8, a1
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = insertelement <8 x i16> poison, i16 %y, i32 0
  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
  %d = and <8 x i16> %a, %c
  store <8 x i16> %d, ptr %x
  ret void
}

define void @and_vx_v4i32(ptr %x, i32 %y) {
; CHECK-LABEL: and_vx_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vand.vx v8, v8, a1
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = insertelement <4 x i32> poison, i32 %y, i32 0
  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
  %d = and <4 x i32> %a, %c
  store <4 x i32> %d, ptr %x
  ret void
}

define void @and_xv_v16i8(ptr %x, i8 %y) {
; CHECK-LABEL: and_xv_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vand.vx v8, v8, a1
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = insertelement <16 x i8> poison, i8 %y, i32 0
  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
  %d = and <16 x i8> %c, %a
  store <16 x i8> %d, ptr %x
  ret void
}

define void @and_xv_v8i16(ptr %x, i16 %y) {
; CHECK-LABEL: and_xv_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vand.vx v8, v8, a1
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = insertelement <8 x i16> poison, i16 %y, i32 0
  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
  %d = and <8 x i16> %c, %a
  store <8 x i16> %d, ptr %x
  ret void
}

define void @and_xv_v4i32(ptr %x, i32 %y) {
; CHECK-LABEL: and_xv_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vand.vx v8, v8, a1
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = insertelement <4 x i32> poison, i32 %y, i32 0
  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
  %d = and <4 x i32> %c, %a
  store <4 x i32> %d, ptr %x
  ret void
}

define void @or_vi_v16i8(ptr %x) {
; CHECK-LABEL: or_vi_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vor.vi v8, v8, -2
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = insertelement <16 x i8> poison, i8 -2, i32 0
  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
  %d = or <16 x i8> %a, %c
  store <16 x i8> %d, ptr %x
  ret void
}

define void @or_vi_v8i16(ptr %x) {
; CHECK-LABEL: or_vi_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vor.vi v8, v8, -2
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = insertelement <8 x i16> poison, i16 -2, i32 0
  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
  %d = or <8 x i16> %a, %c
  store <8 x i16> %d, ptr %x
  ret void
}

define void @or_vi_v4i32(ptr %x) {
; CHECK-LABEL: or_vi_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vor.vi v8, v8, -2
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = insertelement <4 x i32> poison, i32 -2, i32 0
  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
  %d = or <4 x i32> %a, %c
  store <4 x i32> %d, ptr %x
  ret void
}

define void @or_vi_v2i64(ptr %x) {
; CHECK-LABEL: or_vi_v2i64:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT:    vle64.v v8, (a0)
; CHECK-NEXT:    vor.vi v8, v8, -2
; CHECK-NEXT:    vse64.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <2 x i64>, ptr %x
  %b = insertelement <2 x i64> poison, i64 -2, i32 0
  %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer
  %d = or <2 x i64> %a, %c
  store <2 x i64> %d, ptr %x
  ret void
}

define void @or_iv_v16i8(ptr %x) {
; CHECK-LABEL: or_iv_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vor.vi v8, v8, 1
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = insertelement <16 x i8> poison, i8 1, i32 0
  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
  %d = or <16 x i8> %c, %a
  store <16 x i8> %d, ptr %x
  ret void
}

define void @or_iv_v8i16(ptr %x) {
; CHECK-LABEL: or_iv_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vor.vi v8, v8, 1
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = insertelement <8 x i16> poison, i16 1, i32 0
  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
  %d = or <8 x i16> %c, %a
  store <8 x i16> %d, ptr %x
  ret void
}

define void @or_iv_v4i32(ptr %x) {
; CHECK-LABEL: or_iv_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vor.vi v8, v8, 1
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = insertelement <4 x i32> poison, i32 1, i32 0
  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
  %d = or <4 x i32> %c, %a
  store <4 x i32> %d, ptr %x
  ret void
}

define void @or_iv_v2i64(ptr %x) {
; CHECK-LABEL: or_iv_v2i64:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT:    vle64.v v8, (a0)
; CHECK-NEXT:    vor.vi v8, v8, 1
; CHECK-NEXT:    vse64.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <2 x i64>, ptr %x
  %b = insertelement <2 x i64> poison, i64 1, i32 0
  %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer
  %d = or <2 x i64> %c, %a
  store <2 x i64> %d, ptr %x
  ret void
}

define void @or_vx_v16i8(ptr %x, i8 %y) {
; CHECK-LABEL: or_vx_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vor.vx v8, v8, a1
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = insertelement <16 x i8> poison, i8 %y, i32 0
  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
  %d = or <16 x i8> %a, %c
  store <16 x i8> %d, ptr %x
  ret void
}

define void @or_vx_v8i16(ptr %x, i16 %y) {
; CHECK-LABEL: or_vx_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vor.vx v8, v8, a1
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = insertelement <8 x i16> poison, i16 %y, i32 0
  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
  %d = or <8 x i16> %a, %c
  store <8 x i16> %d, ptr %x
  ret void
}

define void @or_vx_v4i32(ptr %x, i32 %y) {
; CHECK-LABEL: or_vx_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vor.vx v8, v8, a1
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = insertelement <4 x i32> poison, i32 %y, i32 0
  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
  %d = or <4 x i32> %a, %c
  store <4 x i32> %d, ptr %x
  ret void
}

define void @or_xv_v16i8(ptr %x, i8 %y) {
; CHECK-LABEL: or_xv_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vor.vx v8, v8, a1
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = insertelement <16 x i8> poison, i8 %y, i32 0
  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
  %d = or <16 x i8> %c, %a
  store <16 x i8> %d, ptr %x
  ret void
}

define void @or_xv_v8i16(ptr %x, i16 %y) {
; CHECK-LABEL: or_xv_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vor.vx v8, v8, a1
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = insertelement <8 x i16> poison, i16 %y, i32 0
  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
  %d = or <8 x i16> %c, %a
  store <8 x i16> %d, ptr %x
  ret void
}

define void @or_xv_v4i32(ptr %x, i32 %y) {
; CHECK-LABEL: or_xv_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vor.vx v8, v8, a1
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = insertelement <4 x i32> poison, i32 %y, i32 0
  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
  %d = or <4 x i32> %c, %a
  store <4 x i32> %d, ptr %x
  ret void
}

define void @xor_vi_v16i8(ptr %x) {
; CHECK-LABEL: xor_vi_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vnot.v v8, v8
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = insertelement <16 x i8> poison, i8 -1, i32 0
  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
  %d = xor <16 x i8> %a, %c
  store <16 x i8> %d, ptr %x
  ret void
}

define void @xor_vi_v8i16(ptr %x) {
; CHECK-LABEL: xor_vi_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vnot.v v8, v8
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = insertelement <8 x i16> poison, i16 -1, i32 0
  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
  %d = xor <8 x i16> %a, %c
  store <8 x i16> %d, ptr %x
  ret void
}

define void @xor_vi_v4i32(ptr %x) {
; CHECK-LABEL: xor_vi_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vnot.v v8, v8
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = insertelement <4 x i32> poison, i32 -1, i32 0
  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
  %d = xor <4 x i32> %a, %c
  store <4 x i32> %d, ptr %x
  ret void
}

define void @xor_vi_v2i64(ptr %x) {
; CHECK-LABEL: xor_vi_v2i64:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT:    vle64.v v8, (a0)
; CHECK-NEXT:    vnot.v v8, v8
; CHECK-NEXT:    vse64.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <2 x i64>, ptr %x
  %b = insertelement <2 x i64> poison, i64 -1, i32 0
  %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer
  %d = xor <2 x i64> %a, %c
  store <2 x i64> %d, ptr %x
  ret void
}

define void @xor_iv_v16i8(ptr %x) {
; CHECK-LABEL: xor_iv_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vxor.vi v8, v8, 1
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = insertelement <16 x i8> poison, i8 1, i32 0
  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
  %d = xor <16 x i8> %c, %a
  store <16 x i8> %d, ptr %x
  ret void
}

define void @xor_iv_v8i16(ptr %x) {
; CHECK-LABEL: xor_iv_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vxor.vi v8, v8, 1
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = insertelement <8 x i16> poison, i16 1, i32 0
  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
  %d = xor <8 x i16> %c, %a
  store <8 x i16> %d, ptr %x
  ret void
}

define void @xor_iv_v4i32(ptr %x) {
; CHECK-LABEL: xor_iv_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vxor.vi v8, v8, 1
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = insertelement <4 x i32> poison, i32 1, i32 0
  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
  %d = xor <4 x i32> %c, %a
  store <4 x i32> %d, ptr %x
  ret void
}

define void @xor_iv_v2i64(ptr %x) {
; CHECK-LABEL: xor_iv_v2i64:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT:    vle64.v v8, (a0)
; CHECK-NEXT:    vxor.vi v8, v8, 1
; CHECK-NEXT:    vse64.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <2 x i64>, ptr %x
  %b = insertelement <2 x i64> poison, i64 1, i32 0
  %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer
  %d = xor <2 x i64> %c, %a
  store <2 x i64> %d, ptr %x
  ret void
}

define void @xor_vx_v16i8(ptr %x, i8 %y) {
; CHECK-LABEL: xor_vx_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vxor.vx v8, v8, a1
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = insertelement <16 x i8> poison, i8 %y, i32 0
  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
  %d = xor <16 x i8> %a, %c
  store <16 x i8> %d, ptr %x
  ret void
}

define void @xor_vx_v8i16(ptr %x, i16 %y) {
; CHECK-LABEL: xor_vx_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vxor.vx v8, v8, a1
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = insertelement <8 x i16> poison, i16 %y, i32 0
  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
  %d = xor <8 x i16> %a, %c
  store <8 x i16> %d, ptr %x
  ret void
}

define void @xor_vx_v4i32(ptr %x, i32 %y) {
; CHECK-LABEL: xor_vx_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vxor.vx v8, v8, a1
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = insertelement <4 x i32> poison, i32 %y, i32 0
  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
  %d = xor <4 x i32> %a, %c
  store <4 x i32> %d, ptr %x
  ret void
}

define void @xor_xv_v16i8(ptr %x, i8 %y) {
; CHECK-LABEL: xor_xv_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vxor.vx v8, v8, a1
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = insertelement <16 x i8> poison, i8 %y, i32 0
  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
  %d = xor <16 x i8> %c, %a
  store <16 x i8> %d, ptr %x
  ret void
}

define void @xor_xv_v8i16(ptr %x, i16 %y) {
; CHECK-LABEL: xor_xv_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vxor.vx v8, v8, a1
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = insertelement <8 x i16> poison, i16 %y, i32 0
  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
  %d = xor <8 x i16> %c, %a
  store <8 x i16> %d, ptr %x
  ret void
}

define void @xor_xv_v4i32(ptr %x, i32 %y) {
; CHECK-LABEL: xor_xv_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vxor.vx v8, v8, a1
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = insertelement <4 x i32> poison, i32 %y, i32 0
  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
  %d = xor <4 x i32> %c, %a
  store <4 x i32> %d, ptr %x
  ret void
}

define void @lshr_vi_v16i8(ptr %x) {
; CHECK-LABEL: lshr_vi_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vsrl.vi v8, v8, 7
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = insertelement <16 x i8> poison, i8 7, i32 0
  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
  %d = lshr <16 x i8> %a, %c
  store <16 x i8> %d, ptr %x
  ret void
}

define void @lshr_vi_v8i16(ptr %x) {
; CHECK-LABEL: lshr_vi_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vsrl.vi v8, v8, 15
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = insertelement <8 x i16> poison, i16 15, i32 0
  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
  %d = lshr <8 x i16> %a, %c
  store <8 x i16> %d, ptr %x
  ret void
}

define void @lshr_vi_v4i32(ptr %x) {
; CHECK-LABEL: lshr_vi_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vsrl.vi v8, v8, 31
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = insertelement <4 x i32> poison, i32 31, i32 0
  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
  %d = lshr <4 x i32> %a, %c
  store <4 x i32> %d, ptr %x
  ret void
}

define void @lshr_vi_v2i64(ptr %x) {
; CHECK-LABEL: lshr_vi_v2i64:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT:    vle64.v v8, (a0)
; CHECK-NEXT:    vsrl.vi v8, v8, 31
; CHECK-NEXT:    vse64.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <2 x i64>, ptr %x
  %b = insertelement <2 x i64> poison, i64 31, i32 0
  %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer
  %d = lshr <2 x i64> %a, %c
  store <2 x i64> %d, ptr %x
  ret void
}

define void @lshr_vx_v16i8(ptr %x, i8 %y) {
; CHECK-LABEL: lshr_vx_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vsrl.vx v8, v8, a1
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = insertelement <16 x i8> poison, i8 %y, i32 0
  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
  %d = lshr <16 x i8> %a, %c
  store <16 x i8> %d, ptr %x
  ret void
}

define void @lshr_vx_v8i16(ptr %x, i16 %y) {
; CHECK-LABEL: lshr_vx_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vsrl.vx v8, v8, a1
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = insertelement <8 x i16> poison, i16 %y, i32 0
  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
  %d = lshr <8 x i16> %a, %c
  store <8 x i16> %d, ptr %x
  ret void
}

define void @lshr_vx_v4i32(ptr %x, i32 %y) {
; CHECK-LABEL: lshr_vx_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vsrl.vx v8, v8, a1
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = insertelement <4 x i32> poison, i32 %y, i32 0
  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
  %d = lshr <4 x i32> %a, %c
  store <4 x i32> %d, ptr %x
  ret void
}

define void @ashr_vi_v16i8(ptr %x) {
; CHECK-LABEL: ashr_vi_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vsra.vi v8, v8, 7
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = insertelement <16 x i8> poison, i8 7, i32 0
  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
  %d = ashr <16 x i8> %a, %c
  store <16 x i8> %d, ptr %x
  ret void
}

define void @ashr_vi_v8i16(ptr %x) {
; CHECK-LABEL: ashr_vi_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vsra.vi v8, v8, 15
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = insertelement <8 x i16> poison, i16 15, i32 0
  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
  %d = ashr <8 x i16> %a, %c
  store <8 x i16> %d, ptr %x
  ret void
}

define void @ashr_vi_v4i32(ptr %x) {
; CHECK-LABEL: ashr_vi_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vsra.vi v8, v8, 31
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = insertelement <4 x i32> poison, i32 31, i32 0
  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
  %d = ashr <4 x i32> %a, %c
  store <4 x i32> %d, ptr %x
  ret void
}

define void @ashr_vi_v2i64(ptr %x) {
; CHECK-LABEL: ashr_vi_v2i64:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT:    vle64.v v8, (a0)
; CHECK-NEXT:    vsra.vi v8, v8, 31
; CHECK-NEXT:    vse64.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <2 x i64>, ptr %x
  %b = insertelement <2 x i64> poison, i64 31, i32 0
  %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer
  %d = ashr <2 x i64> %a, %c
  store <2 x i64> %d, ptr %x
  ret void
}

define void @ashr_vx_v16i8(ptr %x, i8 %y) {
; CHECK-LABEL: ashr_vx_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vsra.vx v8, v8, a1
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = insertelement <16 x i8> poison, i8 %y, i32 0
  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
  %d = ashr <16 x i8> %a, %c
  store <16 x i8> %d, ptr %x
  ret void
}

define void @ashr_vx_v8i16(ptr %x, i16 %y) {
; CHECK-LABEL: ashr_vx_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vsra.vx v8, v8, a1
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = insertelement <8 x i16> poison, i16 %y, i32 0
  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
  %d = ashr <8 x i16> %a, %c
  store <8 x i16> %d, ptr %x
  ret void
}

define void @ashr_vx_v4i32(ptr %x, i32 %y) {
; CHECK-LABEL: ashr_vx_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vsra.vx v8, v8, a1
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = insertelement <4 x i32> poison, i32 %y, i32 0
  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
  %d = ashr <4 x i32> %a, %c
  store <4 x i32> %d, ptr %x
  ret void
}

define void @shl_vi_v16i8(ptr %x) {
; CHECK-LABEL: shl_vi_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vsll.vi v8, v8, 7
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = insertelement <16 x i8> poison, i8 7, i32 0
  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
  %d = shl <16 x i8> %a, %c
  store <16 x i8> %d, ptr %x
  ret void
}

define void @shl_vi_v8i16(ptr %x) {
; CHECK-LABEL: shl_vi_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vsll.vi v8, v8, 15
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = insertelement <8 x i16> poison, i16 15, i32 0
  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
  %d = shl <8 x i16> %a, %c
  store <8 x i16> %d, ptr %x
  ret void
}

define void @shl_vi_v4i32(ptr %x) {
; CHECK-LABEL: shl_vi_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vsll.vi v8, v8, 31
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = insertelement <4 x i32> poison, i32 31, i32 0
  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
  %d = shl <4 x i32> %a, %c
  store <4 x i32> %d, ptr %x
  ret void
}

define void @shl_vi_v2i64(ptr %x) {
; CHECK-LABEL: shl_vi_v2i64:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT:    vle64.v v8, (a0)
; CHECK-NEXT:    vsll.vi v8, v8, 31
; CHECK-NEXT:    vse64.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <2 x i64>, ptr %x
  %b = insertelement <2 x i64> poison, i64 31, i32 0
  %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer
  %d = shl <2 x i64> %a, %c
  store <2 x i64> %d, ptr %x
  ret void
}

define void @shl_vx_v16i8(ptr %x, i8 %y) {
; CHECK-LABEL: shl_vx_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vsll.vx v8, v8, a1
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = insertelement <16 x i8> poison, i8 %y, i32 0
  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
  %d = shl <16 x i8> %a, %c
  store <16 x i8> %d, ptr %x
  ret void
}

define void @shl_vx_v8i16(ptr %x, i16 %y) {
; CHECK-LABEL: shl_vx_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vsll.vx v8, v8, a1
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = insertelement <8 x i16> poison, i16 %y, i32 0
  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
  %d = shl <8 x i16> %a, %c
  store <8 x i16> %d, ptr %x
  ret void
}

define void @shl_vx_v4i32(ptr %x, i32 %y) {
; CHECK-LABEL: shl_vx_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vsll.vx v8, v8, a1
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = insertelement <4 x i32> poison, i32 %y, i32 0
  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
  %d = shl <4 x i32> %a, %c
  store <4 x i32> %d, ptr %x
  ret void
}

define void @sdiv_vx_v16i8(ptr %x, i8 %y) {
; CHECK-LABEL: sdiv_vx_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vdiv.vx v8, v8, a1
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = insertelement <16 x i8> poison, i8 %y, i32 0
  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
  %d = sdiv <16 x i8> %a, %c
  store <16 x i8> %d, ptr %x
  ret void
}

define void @sdiv_vx_v8i16(ptr %x, i16 %y) {
; CHECK-LABEL: sdiv_vx_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vdiv.vx v8, v8, a1
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = insertelement <8 x i16> poison, i16 %y, i32 0
  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
  %d = sdiv <8 x i16> %a, %c
  store <8 x i16> %d, ptr %x
  ret void
}

define void @sdiv_vx_v4i32(ptr %x, i32 %y) {
; CHECK-LABEL: sdiv_vx_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vdiv.vx v8, v8, a1
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = insertelement <4 x i32> poison, i32 %y, i32 0
  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
  %d = sdiv <4 x i32> %a, %c
  store <4 x i32> %d, ptr %x
  ret void
}

define void @srem_vx_v16i8(ptr %x, i8 %y) {
; CHECK-LABEL: srem_vx_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vrem.vx v8, v8, a1
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = insertelement <16 x i8> poison, i8 %y, i32 0
  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
  %d = srem <16 x i8> %a, %c
  store <16 x i8> %d, ptr %x
  ret void
}

define void @srem_vx_v8i16(ptr %x, i16 %y) {
; CHECK-LABEL: srem_vx_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vrem.vx v8, v8, a1
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = insertelement <8 x i16> poison, i16 %y, i32 0
  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
  %d = srem <8 x i16> %a, %c
  store <8 x i16> %d, ptr %x
  ret void
}

define void @srem_vx_v4i32(ptr %x, i32 %y) {
; CHECK-LABEL: srem_vx_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vrem.vx v8, v8, a1
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = insertelement <4 x i32> poison, i32 %y, i32 0
  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
  %d = srem <4 x i32> %a, %c
  store <4 x i32> %d, ptr %x
  ret void
}

define void @udiv_vx_v16i8(ptr %x, i8 %y) {
; CHECK-LABEL: udiv_vx_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vdivu.vx v8, v8, a1
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = insertelement <16 x i8> poison, i8 %y, i32 0
  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
  %d = udiv <16 x i8> %a, %c
  store <16 x i8> %d, ptr %x
  ret void
}

define void @udiv_vx_v8i16(ptr %x, i16 %y) {
; CHECK-LABEL: udiv_vx_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vdivu.vx v8, v8, a1
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = insertelement <8 x i16> poison, i16 %y, i32 0
  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
  %d = udiv <8 x i16> %a, %c
  store <8 x i16> %d, ptr %x
  ret void
}

define void @udiv_vx_v4i32(ptr %x, i32 %y) {
; CHECK-LABEL: udiv_vx_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vdivu.vx v8, v8, a1
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = insertelement <4 x i32> poison, i32 %y, i32 0
  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
  %d = udiv <4 x i32> %a, %c
  store <4 x i32> %d, ptr %x
  ret void
}

define void @urem_vx_v16i8(ptr %x, i8 %y) {
; CHECK-LABEL: urem_vx_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    vremu.vx v8, v8, a1
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = insertelement <16 x i8> poison, i8 %y, i32 0
  %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer
  %d = urem <16 x i8> %a, %c
  store <16 x i8> %d, ptr %x
  ret void
}

define void @urem_vx_v8i16(ptr %x, i16 %y) {
; CHECK-LABEL: urem_vx_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    vremu.vx v8, v8, a1
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = insertelement <8 x i16> poison, i16 %y, i32 0
  %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer
  %d = urem <8 x i16> %a, %c
  store <8 x i16> %d, ptr %x
  ret void
}

define void @urem_vx_v4i32(ptr %x, i32 %y) {
; CHECK-LABEL: urem_vx_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    vremu.vx v8, v8, a1
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = insertelement <4 x i32> poison, i32 %y, i32 0
  %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer
  %d = urem <4 x i32> %a, %c
  store <4 x i32> %d, ptr %x
  ret void
}

define void @mulhu_vx_v16i8(ptr %x) {
; CHECK-LABEL: mulhu_vx_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    li a1, 57
; CHECK-NEXT:    vmulhu.vx v8, v8, a1
; CHECK-NEXT:    vsrl.vi v8, v8, 1
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = udiv <16 x i8> %a, <i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9>
  store <16 x i8> %b, ptr %x
  ret void
}

define void @mulhu_vx_v8i16(ptr %x) {
; CHECK-LABEL: mulhu_vx_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    lui a1, 2
; CHECK-NEXT:    addi a1, a1, 1171
; CHECK-NEXT:    vmulhu.vx v9, v8, a1
; CHECK-NEXT:    vsub.vv v8, v8, v9
; CHECK-NEXT:    vsrl.vi v8, v8, 1
; CHECK-NEXT:    vadd.vv v8, v8, v9
; CHECK-NEXT:    vsrl.vi v8, v8, 2
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = udiv <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
  store <8 x i16> %b, ptr %x
  ret void
}

define void @mulhu_vx_v4i32(ptr %x) {
; CHECK-LABEL: mulhu_vx_v4i32:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT:    vle32.v v8, (a0)
; CHECK-NEXT:    lui a1, 838861
; CHECK-NEXT:    addi a1, a1, -819
; CHECK-NEXT:    vmulhu.vx v8, v8, a1
; CHECK-NEXT:    vsrl.vi v8, v8, 2
; CHECK-NEXT:    vse32.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = udiv <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
  store <4 x i32> %b, ptr %x
  ret void
}

define void @mulhu_vx_v2i64(ptr %x) {
; RV32-LABEL: mulhu_vx_v2i64:
; RV32:       # %bb.0:
; RV32-NEXT:    addi sp, sp, -16
; RV32-NEXT:    .cfi_def_cfa_offset 16
; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT:    vle64.v v8, (a0)
; RV32-NEXT:    lui a1, 699051
; RV32-NEXT:    addi a2, a1, -1366
; RV32-NEXT:    sw a2, 12(sp)
; RV32-NEXT:    addi a1, a1, -1365
; RV32-NEXT:    sw a1, 8(sp)
; RV32-NEXT:    addi a1, sp, 8
; RV32-NEXT:    vlse64.v v9, (a1), zero
; RV32-NEXT:    vmulhu.vv v8, v8, v9
; RV32-NEXT:    vsrl.vi v8, v8, 1
; RV32-NEXT:    vse64.v v8, (a0)
; RV32-NEXT:    addi sp, sp, 16
; RV32-NEXT:    ret
;
; RV64-LABEL: mulhu_vx_v2i64:
; RV64:       # %bb.0:
; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; RV64-NEXT:    vle64.v v8, (a0)
; RV64-NEXT:    lui a1, 699051
; RV64-NEXT:    addiw a1, a1, -1365
; RV64-NEXT:    slli a2, a1, 32
; RV64-NEXT:    add a1, a1, a2
; RV64-NEXT:    vmulhu.vx v8, v8, a1
; RV64-NEXT:    vsrl.vi v8, v8, 1
; RV64-NEXT:    vse64.v v8, (a0)
; RV64-NEXT:    ret
  %a = load <2 x i64>, ptr %x
  %b = udiv <2 x i64> %a, <i64 3, i64 3>
  store <2 x i64> %b, ptr %x
  ret void
}

define void @mulhs_vx_v16i8(ptr %x) {
; CHECK-LABEL: mulhs_vx_v16i8:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT:    vle8.v v8, (a0)
; CHECK-NEXT:    li a1, -123
; CHECK-NEXT:    vmulhu.vx v8, v8, a1
; CHECK-NEXT:    vsrl.vi v8, v8, 7
; CHECK-NEXT:    vse8.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <16 x i8>, ptr %x
  %b = udiv <16 x i8> %a, <i8 -9, i8 -9, i8 -9, i8 -9, i8 -9, i8 -9, i8 -9, i8 -9, i8 -9, i8 -9, i8 -9, i8 -9, i8 -9, i8 -9, i8 -9, i8 -9>
  store <16 x i8> %b, ptr %x
  ret void
}

define void @mulhs_vx_v8i16(ptr %x) {
; CHECK-LABEL: mulhs_vx_v8i16:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-NEXT:    vle16.v v8, (a0)
; CHECK-NEXT:    lui a1, 5
; CHECK-NEXT:    addi a1, a1, -1755
; CHECK-NEXT:    vmulh.vx v8, v8, a1
; CHECK-NEXT:    vsra.vi v8, v8, 1
; CHECK-NEXT:    vsrl.vi v9, v8, 15
; CHECK-NEXT:    vadd.vv v8, v8, v9
; CHECK-NEXT:    vse16.v v8, (a0)
; CHECK-NEXT:    ret
  %a = load <8 x i16>, ptr %x
  %b = sdiv <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
  store <8 x i16> %b, ptr %x
  ret void
}

define void @mulhs_vx_v4i32(ptr %x) {
; RV32-LABEL: mulhs_vx_v4i32:
; RV32:       # %bb.0:
; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT:    vle32.v v8, (a0)
; RV32-NEXT:    lui a1, 629146
; RV32-NEXT:    addi a1, a1, -1639
; RV32-NEXT:    vmulh.vx v8, v8, a1
; RV32-NEXT:    vsrl.vi v9, v8, 31
; RV32-NEXT:    vsra.vi v8, v8, 1
; RV32-NEXT:    vadd.vv v8, v8, v9
; RV32-NEXT:    vse32.v v8, (a0)
; RV32-NEXT:    ret
;
; RV64-LABEL: mulhs_vx_v4i32:
; RV64:       # %bb.0:
; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; RV64-NEXT:    vle32.v v8, (a0)
; RV64-NEXT:    lui a1, 629146
; RV64-NEXT:    addi a1, a1, -1639
; RV64-NEXT:    vmulh.vx v8, v8, a1
; RV64-NEXT:    vsra.vi v8, v8, 1
; RV64-NEXT:    vsrl.vi v9, v8, 31
; RV64-NEXT:    vadd.vv v8, v8, v9
; RV64-NEXT:    vse32.v v8, (a0)
; RV64-NEXT:    ret
  %a = load <4 x i32>, ptr %x
  %b = sdiv <4 x i32> %a, <i32 -5, i32 -5, i32 -5, i32 -5>
  store <4 x i32> %b, ptr %x
  ret void
}

define void @mulhs_vx_v2i64(ptr %x) {
; RV32-LABEL: mulhs_vx_v2i64:
; RV32:       # %bb.0:
; RV32-NEXT:    addi sp, sp, -16
; RV32-NEXT:    .cfi_def_cfa_offset 16
; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT:    vle64.v v8, (a0)
; RV32-NEXT:    lui a1, 349525
; RV32-NEXT:    addi a2, a1, 1365
; RV32-NEXT:    sw a2, 12(sp)
; RV32-NEXT:    addi a1, a1, 1366
; RV32-NEXT:    sw a1, 8(sp)
; RV32-NEXT:    addi a1, sp, 8
; RV32-NEXT:    vlse64.v v9, (a1), zero
; RV32-NEXT:    vmulh.vv v8, v8, v9
; RV32-NEXT:    li a1, 63
; RV32-NEXT:    vsrl.vx v9, v8, a1
; RV32-NEXT:    vadd.vv v8, v8, v9
; RV32-NEXT:    vse64.v v8, (a0)
; RV32-NEXT:    addi sp, sp, 16
; RV32-NEXT:    ret
;
; RV64-LABEL: mulhs_vx_v2i64:
; RV64:       # %bb.0:
; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; RV64-NEXT:    vle64.v v8, (a0)
; RV64-NEXT:    lui a1, %hi(.LCPI321_0)
; RV64-NEXT:    ld a1, %lo(.LCPI321_0)(a1)
; RV64-NEXT:    vmulh.vx v8, v8, a1
; RV64-NEXT:    li a1, 63
; RV64-NEXT:    vsrl.vx v9, v8, a1
; RV64-NEXT:    vadd.vv v8, v8, v9
; RV64-NEXT:    vse64.v v8, (a0)
; RV64-NEXT:    ret
  %a = load <2 x i64>, ptr %x
  %b = sdiv <2 x i64> %a, <i64 3, i64 3>
  store <2 x i64> %b, ptr %x
  ret void
}

define void @madd_vv_v2i64(ptr %x, <2 x i64> %y) {
; CHECK-LABEL: madd_vv_v2i64:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT:    vle64.v v9, (a0)
; CHECK-NEXT:    vmadd.vv v9, v8, v8
; CHECK-NEXT:    vse64.v v9, (a0)
; CHECK-NEXT:    ret
  %a = load <2 x i64>, ptr %x
  %b = add <2 x i64> %a, <i64 1, i64 1>
  %c = mul <2 x i64> %b, %y
  store <2 x i64> %c, ptr %x
  ret void
}

define void @madd_vv_v2i64_2(ptr %x, <2 x i64> %y) {
; CHECK-LABEL: madd_vv_v2i64_2:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT:    vle64.v v9, (a0)
; CHECK-NEXT:    vmadd.vv v9, v8, v8
; CHECK-NEXT:    vse64.v v9, (a0)
; CHECK-NEXT:    ret
  %a = load <2 x i64>, ptr %x
  %b = add <2 x i64> %a, <i64 1, i64 1>
  %c = mul <2 x i64> %y, %b
  store <2 x i64> %c, ptr %x
  ret void
}

define void @msub_vv_v2i64(ptr %x, <2 x i64> %y) {
; CHECK-LABEL: msub_vv_v2i64:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT:    vle64.v v9, (a0)
; CHECK-NEXT:    vnmsub.vv v9, v8, v8
; CHECK-NEXT:    vse64.v v9, (a0)
; CHECK-NEXT:    ret
  %a = load <2 x i64>, ptr %x
  %b = sub <2 x i64> <i64 1, i64 1>, %a
  %c = mul <2 x i64> %b, %y
  store <2 x i64> %c, ptr %x
  ret void
}

define void @msub_vv_v2i64_2(ptr %x, <2 x i64> %y) {
; CHECK-LABEL: msub_vv_v2i64_2:
; CHECK:       # %bb.0:
; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT:    vle64.v v9, (a0)
; CHECK-NEXT:    vnmsub.vv v9, v8, v8
; CHECK-NEXT:    vse64.v v9, (a0)
; CHECK-NEXT:    ret
  %a = load <2 x i64>, ptr %x
  %b = sub <2 x i64> <i64 1, i64 1>, %a
  %c = mul <2 x i64> %y, %b
  store <2 x i64> %c, ptr %x
  ret void
}
