//
// Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved.
// Copyright (c) 2020, 2023, Arm Limited. All rights reserved.
// Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
// Copyright (c) 2023, 2025, Rivos Inc. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License version 2 only, as
// published by the Free Software Foundation.
//
// This code is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
// version 2 for more details (a copy is included in the LICENSE file that
// accompanied this code).
//
// You should have received a copy of the GNU General Public License version
// 2 along with this work; if not, write to the Free Software Foundation,
// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
//
// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
// or visit www.oracle.com if you need additional information or have any
// questions.
//
//

// RISCV Vector Extension Architecture Description File

opclass vmemA(indirect);

source %{

  static void loadStore(C2_MacroAssembler* masm, bool is_store,
                        VectorRegister reg, BasicType bt, Register base,
                        uint vector_length, Assembler::VectorMask vm = Assembler::unmasked) {
    Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
    __ vsetvli_helper(bt, vector_length);

    if (is_store) {
      __ vsex_v(reg, base, sew, vm);
    } else {
      if (vm == Assembler::v0_t) {
        __ vxor_vv(reg, reg, reg);
      }
      __ vlex_v(reg, base, sew, vm);
    }
  }

  bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) {
    return match_rule_supported_vector(opcode, vlen, bt);
  }

  // Identify extra cases that we might want to provide match rules for vector nodes
  // and other intrinsics guarded with vector length (vlen) and element type (bt).
  bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
    if (!UseRVV) {
      return false;
    }

    if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) {
      return false;
    }

    switch (opcode) {
      case Op_VectorMaskLastTrue:
        if (!UseZbb || vlen > XLEN) {
          return false;
        }
        break;
      case Op_VectorMaskToLong:
      case Op_VectorLongToMask:
        if (vlen > XLEN) {
          return false;
        }
        break;
      case Op_CountTrailingZerosV:
      case Op_CountLeadingZerosV:
      case Op_PopCountVL:
      case Op_PopCountVI:
      case Op_ReverseBytesV:
      case Op_ReverseV:
        return UseZvbb;
      case Op_RotateLeftV:
      case Op_RotateRightV:
        if (bt != T_INT && bt != T_LONG) {
          return false;
        }
        return UseZvbb;
      case Op_LoadVectorGather:
      case Op_LoadVectorGatherMasked:
      case Op_StoreVectorScatter:
      case Op_StoreVectorScatterMasked:
        if (is_subword_type(bt)) {
          return false;
        }
        break;
      case Op_VectorLoadShuffle:
      case Op_VectorRearrange:
        // vlen >= 4 is required, because min vector size for byte is 4 on riscv,
        // VectorLoadShuffle is from byte to X, so it requires vlen >= 4.
        // VectorRearrange depends on VectorLoadShuffle, so it also requires vlen >= 4.
        if (vlen < 4) {
          return false;
        }
        break;
      case Op_MulReductionVI:
      case Op_MulReductionVL:
        // When vlen < 4, our log2(vlen) implementation does not help to gain performance improvement.
        if (vlen < 4) {
          return false;
        }
      case Op_VectorCastHF2F:
      case Op_VectorCastF2HF:
      case Op_AddVHF:
      case Op_DivVHF:
      case Op_MaxVHF:
      case Op_MinVHF:
      case Op_MulVHF:
      case Op_SqrtVHF:
      case Op_SubVHF:
        return UseZvfh;
      case Op_FmaVHF:
        return UseZvfh && UseFMA;
      case Op_FmaVF:
      case Op_FmaVD:
        return UseFMA;

      // For float, current test shows that, it brings performance gain when vlen >= 8, but brings
      // regression when vlen == 4. So only enable this intrinsic when vlen >= 8.
      // For double, current test shows that even with vlen == 4, there is still some regression.
      // Although there is no hardware to verify it, from the trend of performance data on hardwares
      // (with vlen == 2 and 4 respectively), it's promising to bring better performance rather than
      // regression for double when vlen == 8. So only enable this intrinsic when vlen >= 8.
      case Op_RoundVF:
      case Op_RoundVD:
        return vlen >= 8;

      default:
        break;
    }
    return true;
  }

  bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
    if (!UseRVV) {
      return false;
    }
    switch (opcode) {
      case Op_SelectFromTwoVector:
        // There is no masked version of selectFrom two vector, i.e. selectFrom(av, bv, mask) in vector API.
        return false;
      default:
        break;
    }
    return match_rule_supported_vector(opcode, vlen, bt);
  }

  bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) {
    return false;
  }

  bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) {
    return false;
  }
%}

// All VEC instructions

// vector load/store
instruct loadV(vReg dst, vmemA mem) %{
  match(Set dst (LoadVector mem));
  format %{ "loadV $dst, $mem\t# vector (rvv)" %}
  ins_encode %{
    VectorRegister dst_reg = as_VectorRegister($dst$$reg);
    loadStore(masm, false, dst_reg,
              Matcher::vector_element_basic_type(this), as_Register($mem$$base), Matcher::vector_length(this));
  %}
  ins_pipe(pipe_slow);
%}

instruct storeV(vReg src, vmemA mem) %{
  match(Set mem (StoreVector mem src));
  format %{ "storeV $mem, $src\t# vector (rvv)" %}
  ins_encode %{
    VectorRegister src_reg = as_VectorRegister($src$$reg);
    loadStore(masm, true, src_reg,
              Matcher::vector_element_basic_type(this, $src), as_Register($mem$$base), Matcher::vector_length(this, $src));
  %}
  ins_pipe(pipe_slow);
%}

// vector load mask

instruct vloadmask(vRegMask dst, vReg src) %{
  match(Set dst (VectorLoadMask src));
  format %{ "vloadmask $dst, $src" %}
  ins_encode %{
    __ vsetvli_helper(T_BOOLEAN, Matcher::vector_length(this));
    __ vmsne_vx(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), zr);
  %}
  ins_pipe(pipe_slow);
%}

instruct vloadmask_masked(vRegMask dst, vReg src, vRegMask_V0 v0) %{
  match(Set dst (VectorLoadMask src v0));
  format %{ "vloadmask_masked $dst, $src, $v0" %}
  ins_encode %{
    __ vsetvli_helper(T_BOOLEAN, Matcher::vector_length(this));
    __ vmsne_vx(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), zr, Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// vector store mask

instruct vstoremask(vReg dst, vRegMask_V0 v0, immI size) %{
  match(Set dst (VectorStoreMask v0 size));
  format %{ "vstoremask $dst, V0 # elem size is $size byte[s]" %}
  ins_encode %{
    __ vsetvli_helper(T_BOOLEAN, Matcher::vector_length(this));
    __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($dst$$reg), as_VectorRegister($dst$$reg));
    __ vmerge_vim(as_VectorRegister($dst$$reg), as_VectorRegister($dst$$reg), 1);
  %}
  ins_pipe(pipe_slow);
%}

// vector mask compare

instruct vmaskcmp(vRegMask dst, vReg src1, vReg src2, immI cond) %{
  predicate(Matcher::vector_element_basic_type(n) == T_BYTE ||
            Matcher::vector_element_basic_type(n) == T_SHORT ||
            Matcher::vector_element_basic_type(n) == T_INT ||
            Matcher::vector_element_basic_type(n) == T_LONG);
  match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
  format %{ "vmaskcmp $dst, $src1, $src2, $cond" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    uint vector_length = Matcher::vector_length(this);
    __ compare_integral_v(as_VectorRegister($dst$$reg),
                          as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg),
                          (int)($cond$$constant), bt, vector_length);
  %}
  ins_pipe(pipe_slow);
%}

instruct vmaskcmp_masked(vRegMask dst, vReg src1, vReg src2, immI cond, vRegMask_V0 v0) %{
  predicate(Matcher::vector_element_basic_type(n) == T_BYTE ||
            Matcher::vector_element_basic_type(n) == T_SHORT ||
            Matcher::vector_element_basic_type(n) == T_INT ||
            Matcher::vector_element_basic_type(n) == T_LONG);
  match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond v0)));
  effect(TEMP_DEF dst);
  format %{ "vmaskcmp_masked $dst, $src1, $src2, $cond, $v0" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    uint vector_length = Matcher::vector_length(this);
    __ compare_integral_v(as_VectorRegister($dst$$reg),
                          as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg),
                          (int)($cond$$constant), bt, vector_length, Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// vector mask float compare

instruct vmaskcmp_fp(vRegMask dst, vReg src1, vReg src2, immI cond) %{
  predicate(Matcher::vector_element_basic_type(n) == T_FLOAT ||
            Matcher::vector_element_basic_type(n) == T_DOUBLE);
  match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
  format %{ "vmaskcmp_fp $dst, $src1, $src2, $cond" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    uint vector_length = Matcher::vector_length(this);
    __ compare_fp_v(as_VectorRegister($dst$$reg),
                    as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg),
                    (int)($cond$$constant), bt, vector_length);
  %}
  ins_pipe(pipe_slow);
%}

instruct vmaskcmp_fp_masked(vRegMask dst, vReg src1, vReg src2, immI cond, vRegMask_V0 v0) %{
  predicate(Matcher::vector_element_basic_type(n) == T_FLOAT ||
            Matcher::vector_element_basic_type(n) == T_DOUBLE);
  match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond v0)));
  effect(TEMP_DEF dst);
  format %{ "vmaskcmp_fp_masked $dst, $src1, $src2, $cond, $v0" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    uint vector_length = Matcher::vector_length(this);
    __ compare_fp_v(as_VectorRegister($dst$$reg),
                    as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg),
                    (int)($cond$$constant), bt, vector_length, Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// vector abs

instruct vabs(vReg dst, vReg src, vReg tmp) %{
  match(Set dst (AbsVB src));
  match(Set dst (AbsVS src));
  match(Set dst (AbsVI src));
  match(Set dst (AbsVL src));
  effect(TEMP tmp);
  format %{ "vabs $dst, $src\t# KILL $tmp" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vrsub_vi(as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg), 0);
    __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct vabs_fp(vReg dst, vReg src) %{
  match(Set dst (AbsVF src));
  match(Set dst (AbsVD src));
  format %{ "vabs_fp $dst, $src" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vfabs_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// vector abs - predicated

instruct vabs_masked(vReg dst_src, vRegMask_V0 v0, vReg tmp) %{
  match(Set dst_src (AbsVB dst_src v0));
  match(Set dst_src (AbsVS dst_src v0));
  match(Set dst_src (AbsVI dst_src v0));
  match(Set dst_src (AbsVL dst_src v0));
  effect(TEMP tmp);
  format %{ "vabs_masked $dst_src, $dst_src, $v0\t# KILL $tmp" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vrsub_vi(as_VectorRegister($tmp$$reg), as_VectorRegister($dst_src$$reg), 0,
                Assembler::v0_t);
    __ vmax_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($tmp$$reg),
               as_VectorRegister($dst_src$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vabs_fp_masked(vReg dst_src, vRegMask_V0 v0) %{
  match(Set dst_src (AbsVF dst_src v0));
  match(Set dst_src (AbsVD dst_src v0));
  format %{ "vabs_fp_masked $dst_src, $dst_src, $v0" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vfabs_v(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// vector add

instruct vadd(vReg dst, vReg src1, vReg src2) %{
  match(Set dst (AddVB src1 src2));
  match(Set dst (AddVS src1 src2));
  match(Set dst (AddVI src1 src2));
  match(Set dst (AddVL src1 src2));
  format %{ "vadd $dst, $src1, $src2" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vadd_vv(as_VectorRegister($dst$$reg),
               as_VectorRegister($src1$$reg),
               as_VectorRegister($src2$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct vadd_hfp(vReg dst, vReg src1, vReg src2) %{
  match(Set dst (AddVHF src1 src2));
  format %{ "vadd_hfp $dst, $src1, $src2" %}
  ins_encode %{
    assert(UseZvfh, "must");
    assert(Matcher::vector_element_basic_type(this) == T_SHORT, "must");
    __ vsetvli_helper(T_SHORT, Matcher::vector_length(this));
    __ vfadd_vv(as_VectorRegister($dst$$reg),
                as_VectorRegister($src1$$reg),
                as_VectorRegister($src2$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct vadd_fp(vReg dst, vReg src1, vReg src2) %{
  match(Set dst (AddVF src1 src2));
  match(Set dst (AddVD src1 src2));
  format %{ "vadd_fp $dst, $src1, $src2" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vfadd_vv(as_VectorRegister($dst$$reg),
                as_VectorRegister($src1$$reg),
                as_VectorRegister($src2$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// vector add - predicated

instruct vadd_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{
  match(Set dst_src1 (AddVB (Binary dst_src1 src2) v0));
  match(Set dst_src1 (AddVS (Binary dst_src1 src2) v0));
  match(Set dst_src1 (AddVI (Binary dst_src1 src2) v0));
  match(Set dst_src1 (AddVL (Binary dst_src1 src2) v0));
  format %{ "vadd_masked $dst_src1, $dst_src1, $src2, $v0" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vadd_vv(as_VectorRegister($dst_src1$$reg),
               as_VectorRegister($dst_src1$$reg),
               as_VectorRegister($src2$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vadd_fp_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{
  match(Set dst_src1 (AddVF (Binary dst_src1 src2) v0));
  match(Set dst_src1 (AddVD (Binary dst_src1 src2) v0));
  format %{ "vadd_fp_masked $dst_src1, $dst_src1, $src2, $v0" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vfadd_vv(as_VectorRegister($dst_src1$$reg),
                as_VectorRegister($dst_src1$$reg),
                as_VectorRegister($src2$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// vector-immediate add (unpredicated)

instruct vadd_vi(vReg dst, vReg src1, immI5 con) %{
  match(Set dst (AddVB src1 (Replicate con)));
  match(Set dst (AddVS src1 (Replicate con)));
  match(Set dst (AddVI src1 (Replicate con)));
  format %{ "vadd_vi $dst, $src1, $con" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vadd_vi(as_VectorRegister($dst$$reg),
               as_VectorRegister($src1$$reg),
               $con$$constant);
  %}
  ins_pipe(pipe_slow);
%}

instruct vaddL_vi(vReg dst, vReg src1, immL5 con) %{
  match(Set dst (AddVL src1 (Replicate con)));
  format %{ "vaddL_vi $dst, $src1, $con" %}
  ins_encode %{
    __ vsetvli_helper(T_LONG, Matcher::vector_length(this));
    __ vadd_vi(as_VectorRegister($dst$$reg),
               as_VectorRegister($src1$$reg),
               $con$$constant);
  %}
  ins_pipe(pipe_slow);
%}

// vector-scalar add (unpredicated)

instruct vadd_vx(vReg dst, vReg src1, iRegIorL2I src2) %{
  match(Set dst (AddVB src1 (Replicate src2)));
  match(Set dst (AddVS src1 (Replicate src2)));
  match(Set dst (AddVI src1 (Replicate src2)));
  format %{ "vadd_vx $dst, $src1, $src2" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vadd_vx(as_VectorRegister($dst$$reg),
               as_VectorRegister($src1$$reg),
               as_Register($src2$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct vaddL_vx(vReg dst, vReg src1, iRegL src2) %{
  match(Set dst (AddVL src1 (Replicate src2)));
  format %{ "vaddL_vx $dst, $src1, $src2" %}
  ins_encode %{
    __ vsetvli_helper(T_LONG, Matcher::vector_length(this));
    __ vadd_vx(as_VectorRegister($dst$$reg),
               as_VectorRegister($src1$$reg),
               as_Register($src2$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// vector-immediate add (predicated)

instruct vadd_vi_masked(vReg dst_src, immI5 con, vRegMask_V0 v0) %{
  match(Set dst_src (AddVB (Binary dst_src (Replicate con)) v0));
  match(Set dst_src (AddVS (Binary dst_src (Replicate con)) v0));
  match(Set dst_src (AddVI (Binary dst_src (Replicate con)) v0));
  format %{ "vadd_vi_masked $dst_src, $dst_src, $con, $v0" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vadd_vi(as_VectorRegister($dst_src$$reg),
               as_VectorRegister($dst_src$$reg),
               $con$$constant, Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vaddL_vi_masked(vReg dst_src, immL5 con, vRegMask_V0 v0) %{
  match(Set dst_src (AddVL (Binary dst_src (Replicate con)) v0));
  format %{ "vaddL_vi_masked $dst_src, $dst_src, $con, $v0" %}
  ins_encode %{
    __ vsetvli_helper(T_LONG, Matcher::vector_length(this));
    __ vadd_vi(as_VectorRegister($dst_src$$reg),
               as_VectorRegister($dst_src$$reg),
               $con$$constant, Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// vector-scalar add (predicated)

instruct vadd_vx_masked(vReg dst_src, iRegIorL2I src2, vRegMask_V0 v0) %{
  match(Set dst_src (AddVB (Binary dst_src (Replicate src2)) v0));
  match(Set dst_src (AddVS (Binary dst_src (Replicate src2)) v0));
  match(Set dst_src (AddVI (Binary dst_src (Replicate src2)) v0));
  format %{ "vadd_vx_masked $dst_src, $dst_src, $src2, $v0" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vadd_vx(as_VectorRegister($dst_src$$reg),
               as_VectorRegister($dst_src$$reg),
               as_Register($src2$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vaddL_vx_masked(vReg dst_src, iRegL src2, vRegMask_V0 v0) %{
  match(Set dst_src (AddVL (Binary dst_src (Replicate src2)) v0));
  format %{ "vaddL_vx_masked $dst_src, $dst_src, $src2, $v0" %}
  ins_encode %{
    __ vsetvli_helper(T_LONG, Matcher::vector_length(this));
    __ vadd_vx(as_VectorRegister($dst_src$$reg),
               as_VectorRegister($dst_src$$reg),
               as_Register($src2$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// vector sub

instruct vsub(vReg dst, vReg src1, vReg src2) %{
  match(Set dst (SubVB src1 src2));
  match(Set dst (SubVS src1 src2));
  match(Set dst (SubVI src1 src2));
  match(Set dst (SubVL src1 src2));
  format %{ "vsub $dst, $src1, $src2" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
               as_VectorRegister($src2$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct vsub_hfp(vReg dst, vReg src1, vReg src2) %{
  match(Set dst (SubVHF src1 src2));
  format %{ "vsub_hfp $dst, $src1, $src2" %}
  ins_encode %{
    assert(UseZvfh, "must");
    assert(Matcher::vector_element_basic_type(this) == T_SHORT, "must");
    __ vsetvli_helper(T_SHORT, Matcher::vector_length(this));
    __ vfsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
                as_VectorRegister($src2$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct vsub_fp(vReg dst, vReg src1, vReg src2) %{
  match(Set dst (SubVF src1 src2));
  match(Set dst (SubVD src1 src2));
  format %{ "vsub_fp $dst, $src1, $src2" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vfsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
                as_VectorRegister($src2$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// vector sub - predicated

instruct vsub_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{
  match(Set dst_src1 (SubVB (Binary dst_src1 src2) v0));
  match(Set dst_src1 (SubVS (Binary dst_src1 src2) v0));
  match(Set dst_src1 (SubVI (Binary dst_src1 src2) v0));
  match(Set dst_src1 (SubVL (Binary dst_src1 src2) v0));
  format %{ "vsub_masked $dst_src1, $dst_src1, $src2, $v0" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vsub_vv(as_VectorRegister($dst_src1$$reg), as_VectorRegister($dst_src1$$reg),
               as_VectorRegister($src2$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vsub_fp_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{
  match(Set dst_src1 (SubVF (Binary dst_src1 src2) v0));
  match(Set dst_src1 (SubVD (Binary dst_src1 src2) v0));
  format %{ "vsub_fp_masked $dst_src1, $dst_src1, $src2, $v0" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vfsub_vv(as_VectorRegister($dst_src1$$reg), as_VectorRegister($dst_src1$$reg),
                as_VectorRegister($src2$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// vector-scalar sub (unpredicated)

instruct vsub_vx(vReg dst, vReg src1, iRegIorL2I src2) %{
  match(Set dst (SubVB src1 (Replicate src2)));
  match(Set dst (SubVS src1 (Replicate src2)));
  match(Set dst (SubVI src1 (Replicate src2)));
  format %{ "vsub_vx $dst, $src1, $src2" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vsub_vx(as_VectorRegister($dst$$reg),
               as_VectorRegister($src1$$reg),
               as_Register($src2$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct vsubL_vx(vReg dst, vReg src1, iRegL src2) %{
  match(Set dst (SubVL src1 (Replicate src2)));
  format %{ "vsubL_vx $dst, $src1, $src2" %}
  ins_encode %{
    __ vsetvli_helper(T_LONG, Matcher::vector_length(this));
    __ vsub_vx(as_VectorRegister($dst$$reg),
               as_VectorRegister($src1$$reg),
               as_Register($src2$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// vector-scalar sub (predicated)

instruct vsub_vx_masked(vReg dst_src, iRegIorL2I src2, vRegMask_V0 v0) %{
  match(Set dst_src (SubVB (Binary dst_src (Replicate src2)) v0));
  match(Set dst_src (SubVS (Binary dst_src (Replicate src2)) v0));
  match(Set dst_src (SubVI (Binary dst_src (Replicate src2)) v0));
  format %{ "vsub_vx_masked $dst_src, $dst_src, $src2, $v0" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vsub_vx(as_VectorRegister($dst_src$$reg),
               as_VectorRegister($dst_src$$reg),
               as_Register($src2$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vsubL_vx_masked(vReg dst_src, iRegL src2, vRegMask_V0 v0) %{
  match(Set dst_src (SubVL (Binary dst_src (Replicate src2)) v0));
  format %{ "vsubL_vx_masked $dst_src, $dst_src, $src2, $v0" %}
  ins_encode %{
    __ vsetvli_helper(T_LONG, Matcher::vector_length(this));
    __ vsub_vx(as_VectorRegister($dst_src$$reg),
               as_VectorRegister($dst_src$$reg),
               as_Register($src2$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// -------- vector saturating integer operations

// vector saturating signed integer addition

instruct vsadd(vReg dst, vReg src1, vReg src2) %{
  predicate(n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
  match(Set dst (SaturatingAddV src1 src2));
  format %{ "vsadd $dst, $src1, $src2" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    assert(is_integral_type(bt), "unsupported type");
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vsadd_vv(as_VectorRegister($dst$$reg),
                as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// vector saturating unsigned integer addition

instruct vsaddu(vReg dst, vReg src1, vReg src2) %{
  predicate(n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
  match(Set dst (SaturatingAddV src1 src2));
  format %{ "vsaddu $dst, $src1, $src2" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    assert(is_integral_type(bt), "unsupported type");
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vsaddu_vv(as_VectorRegister($dst$$reg),
                 as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// vector saturating signed integer addition (predicated)

instruct vsadd_masked(vReg dst_src, vReg src1, vRegMask_V0 v0) %{
  predicate(n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
  match(Set dst_src (SaturatingAddV (Binary dst_src src1) v0));
  format %{ "vsadd_masked $dst_src, $dst_src, $src1, $v0" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    assert(is_integral_type(bt), "unsupported type");
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vsadd_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
                as_VectorRegister($src1$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// vector saturating unsigned integer addition (predicated)

instruct vsaddu_masked(vReg dst_src, vReg src1, vRegMask_V0 v0) %{
  predicate(n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
  match(Set dst_src (SaturatingAddV (Binary dst_src src1) v0));
  format %{ "vsaddu_masked $dst_src, $dst_src, $src1, $v0" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    assert(is_integral_type(bt), "unsupported type");
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vsaddu_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
                 as_VectorRegister($src1$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// vector saturating signed integer subtraction

instruct vssub(vReg dst, vReg src1, vReg src2) %{
  predicate(n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
  match(Set dst (SaturatingSubV src1 src2));
  format %{ "vssub $dst, $src1, $src2" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    assert(is_integral_type(bt), "unsupported type");
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vssub_vv(as_VectorRegister($dst$$reg),
                as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// vector saturating unsigned integer subtraction

instruct vssubu(vReg dst, vReg src1, vReg src2) %{
  predicate(n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
  match(Set dst (SaturatingSubV src1 src2));
  format %{ "vssubu $dst, $src1, $src2" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    assert(is_integral_type(bt), "unsupported type");
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vssubu_vv(as_VectorRegister($dst$$reg),
                 as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// vector saturating signed integer subtraction (predicated)

instruct vssub_masked(vReg dst_src, vReg src1, vRegMask_V0 v0) %{
  predicate(n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned());
  match(Set dst_src (SaturatingSubV (Binary dst_src src1) v0));
  format %{ "vssub_masked $dst_src, $dst_src, $src1, $v0" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    assert(is_integral_type(bt), "unsupported type");
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vssub_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
                as_VectorRegister($src1$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// vector saturating unsigned integer subtraction (predicated)

instruct vssubu_masked(vReg dst_src, vReg src1, vRegMask_V0 v0) %{
  predicate(n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned());
  match(Set dst_src (SaturatingSubV (Binary dst_src src1) v0));
  format %{ "vssubu_masked $dst_src, $dst_src, $src1, $v0" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    assert(is_integral_type(bt), "unsupported type");
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vssubu_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
                 as_VectorRegister($src1$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// vector and

instruct vand(vReg dst, vReg src1, vReg src2) %{
  match(Set dst (AndV src1 src2));
  format %{ "vand $dst, $src1, $src2" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vand_vv(as_VectorRegister($dst$$reg),
               as_VectorRegister($src1$$reg),
               as_VectorRegister($src2$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// vector and - predicated

instruct vand_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{
  match(Set dst_src1 (AndV (Binary dst_src1 src2) v0));
  format %{ "vand_masked $dst_src1, $dst_src1, $src2, $v0" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vand_vv(as_VectorRegister($dst_src1$$reg),
               as_VectorRegister($dst_src1$$reg),
               as_VectorRegister($src2$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// vector-immediate and (unpredicated)

instruct vand_vi(vReg dst, vReg src1, immI5 con) %{
  predicate(Matcher::vector_element_basic_type(n) == T_BYTE ||
            Matcher::vector_element_basic_type(n) == T_SHORT ||
            Matcher::vector_element_basic_type(n) == T_INT);
  match(Set dst (AndV src1 (Replicate con)));
  format %{ "vand_vi $dst, $src1, $con" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vand_vi(as_VectorRegister($dst$$reg),
               as_VectorRegister($src1$$reg),
               $con$$constant);
  %}
  ins_pipe(pipe_slow);
%}

instruct vandL_vi(vReg dst, vReg src1, immL5 con) %{
  predicate(Matcher::vector_element_basic_type(n) == T_LONG);
  match(Set dst (AndV src1 (Replicate con)));
  format %{ "vandL_vi $dst, $src1, $con" %}
  ins_encode %{
    __ vsetvli_helper(T_LONG, Matcher::vector_length(this));
    __ vand_vi(as_VectorRegister($dst$$reg),
               as_VectorRegister($src1$$reg),
               $con$$constant);
  %}
  ins_pipe(pipe_slow);
%}

// vector-scalar and (unpredicated)

instruct vand_vx(vReg dst, vReg src1, iRegIorL2I src2) %{
  predicate(Matcher::vector_element_basic_type(n) == T_BYTE ||
            Matcher::vector_element_basic_type(n) == T_SHORT ||
            Matcher::vector_element_basic_type(n) == T_INT);
  match(Set dst (AndV src1 (Replicate src2)));
  format %{ "vand_vx $dst, $src1, $src2" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vand_vx(as_VectorRegister($dst$$reg),
               as_VectorRegister($src1$$reg),
               as_Register($src2$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct vandL_vx(vReg dst, vReg src1, iRegL src2) %{
  predicate(Matcher::vector_element_basic_type(n) == T_LONG);
  match(Set dst (AndV src1 (Replicate src2)));
  format %{ "vandL_vx $dst, $src1, $src2" %}
  ins_encode %{
    __ vsetvli_helper(T_LONG, Matcher::vector_length(this));
    __ vand_vx(as_VectorRegister($dst$$reg),
               as_VectorRegister($src1$$reg),
               as_Register($src2$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// vector-immediate and (predicated)

instruct vand_vi_masked(vReg dst_src, immI5 con, vRegMask_V0 v0) %{
  predicate(Matcher::vector_element_basic_type(n) == T_BYTE ||
            Matcher::vector_element_basic_type(n) == T_SHORT ||
            Matcher::vector_element_basic_type(n) == T_INT);
  match(Set dst_src (AndV (Binary dst_src (Replicate con)) v0));
  format %{ "vand_vi_masked $dst_src, $dst_src, $con, $v0" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vand_vi(as_VectorRegister($dst_src$$reg),
               as_VectorRegister($dst_src$$reg),
               $con$$constant, Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vandL_vi_masked(vReg dst_src, immL5 con, vRegMask_V0 v0) %{
  predicate(Matcher::vector_element_basic_type(n) == T_LONG);
  match(Set dst_src (AndV (Binary dst_src (Replicate con)) v0));
  format %{ "vandL_vi_masked $dst_src, $dst_src, $con, $v0" %}
  ins_encode %{
    __ vsetvli_helper(T_LONG, Matcher::vector_length(this));
    __ vand_vi(as_VectorRegister($dst_src$$reg),
               as_VectorRegister($dst_src$$reg),
               $con$$constant, Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// vector-scalar and (predicated)

instruct vand_vx_masked(vReg dst_src, iRegIorL2I src, vRegMask_V0 v0) %{
  predicate(Matcher::vector_element_basic_type(n) == T_BYTE ||
            Matcher::vector_element_basic_type(n) == T_SHORT ||
            Matcher::vector_element_basic_type(n) == T_INT);
  match(Set dst_src (AndV (Binary dst_src (Replicate src)) v0));
  format %{ "vand_vx_masked $dst_src, $dst_src, $src, $v0" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vand_vx(as_VectorRegister($dst_src$$reg),
               as_VectorRegister($dst_src$$reg),
               as_Register($src$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vandL_vx_masked(vReg dst_src, iRegL src, vRegMask_V0 v0) %{
  predicate(Matcher::vector_element_basic_type(n) == T_LONG);
  match(Set dst_src (AndV (Binary dst_src (Replicate src)) v0));
  format %{ "vandL_vx_masked $dst_src, $dst_src, $src, $v0" %}
  ins_encode %{
    __ vsetvli_helper(T_LONG, Matcher::vector_length(this));
    __ vand_vx(as_VectorRegister($dst_src$$reg),
               as_VectorRegister($dst_src$$reg),
               as_Register($src$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// vector or

instruct vor(vReg dst, vReg src1, vReg src2) %{
  match(Set dst (OrV src1 src2));
  format %{ "vor $dst, $src1, $src2" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vor_vv(as_VectorRegister($dst$$reg),
              as_VectorRegister($src1$$reg),
              as_VectorRegister($src2$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// vector or - predicated

instruct vor_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{
  match(Set dst_src1 (OrV (Binary dst_src1 src2) v0));
  format %{ "vor_masked $dst_src1, $dst_src1, $src2, $v0" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vor_vv(as_VectorRegister($dst_src1$$reg),
              as_VectorRegister($dst_src1$$reg),
              as_VectorRegister($src2$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// vector-immediate or (unpredicated)

instruct vor_vi(vReg dst, vReg src1, immI5 con) %{
  predicate(Matcher::vector_element_basic_type(n) == T_BYTE ||
            Matcher::vector_element_basic_type(n) == T_SHORT ||
            Matcher::vector_element_basic_type(n) == T_INT);
  match(Set dst (OrV src1 (Replicate con)));
  format %{ "vor_vi $dst, $src1, $con" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vor_vi(as_VectorRegister($dst$$reg),
              as_VectorRegister($src1$$reg),
              $con$$constant);
  %}
  ins_pipe(pipe_slow);
%}

instruct vorL_vi(vReg dst, vReg src1, immL5 con) %{
  predicate(Matcher::vector_element_basic_type(n) == T_LONG);
  match(Set dst (OrV src1 (Replicate con)));
  format %{ "vorL_vi $dst, $src1, $con" %}
  ins_encode %{
    __ vsetvli_helper(T_LONG, Matcher::vector_length(this));
    __ vor_vi(as_VectorRegister($dst$$reg),
              as_VectorRegister($src1$$reg),
              $con$$constant);
  %}
  ins_pipe(pipe_slow);
%}

// vector-scalar or (unpredicated)

instruct vor_vx(vReg dst, vReg src1, iRegIorL2I src2) %{
  predicate(Matcher::vector_element_basic_type(n) == T_BYTE ||
            Matcher::vector_element_basic_type(n) == T_SHORT ||
            Matcher::vector_element_basic_type(n) == T_INT);
  match(Set dst (OrV src1 (Replicate src2)));
  format %{ "vor_vx $dst, $src1, $src2" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vor_vx(as_VectorRegister($dst$$reg),
              as_VectorRegister($src1$$reg),
              as_Register($src2$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct vorL_vx(vReg dst, vReg src1, iRegL src2) %{
  predicate(Matcher::vector_element_basic_type(n) == T_LONG);
  match(Set dst (OrV src1 (Replicate src2)));
  format %{ "vorL_vx $dst, $src1, $src2" %}
  ins_encode %{
    __ vsetvli_helper(T_LONG, Matcher::vector_length(this));
    __ vor_vx(as_VectorRegister($dst$$reg),
              as_VectorRegister($src1$$reg),
              as_Register($src2$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// vector-immediate or (predicated)

instruct vor_vi_masked(vReg dst_src, immI5 con, vRegMask_V0 v0) %{
  predicate(Matcher::vector_element_basic_type(n) == T_BYTE ||
            Matcher::vector_element_basic_type(n) == T_SHORT ||
            Matcher::vector_element_basic_type(n) == T_INT);
  match(Set dst_src (OrV (Binary dst_src (Replicate con)) v0));
  format %{ "vor_vi_masked $dst_src, $dst_src, $con, $v0" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vor_vi(as_VectorRegister($dst_src$$reg),
              as_VectorRegister($dst_src$$reg),
              $con$$constant, Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vorL_vi_masked(vReg dst_src, immL5 con, vRegMask_V0 v0) %{
  predicate(Matcher::vector_element_basic_type(n) == T_LONG);
  match(Set dst_src (OrV (Binary dst_src (Replicate con)) v0));
  format %{ "vorL_vi_masked $dst_src, $dst_src, $con, $v0" %}
  ins_encode %{
    __ vsetvli_helper(T_LONG, Matcher::vector_length(this));
    __ vor_vi(as_VectorRegister($dst_src$$reg),
              as_VectorRegister($dst_src$$reg),
              $con$$constant, Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// vector-scalar or (predicated)

instruct vor_vx_masked(vReg dst_src, iRegIorL2I src, vRegMask_V0 v0) %{
  predicate(Matcher::vector_element_basic_type(n) == T_BYTE ||
            Matcher::vector_element_basic_type(n) == T_SHORT ||
            Matcher::vector_element_basic_type(n) == T_INT);
  match(Set dst_src (OrV (Binary dst_src (Replicate src)) v0));
  format %{ "vor_vx_masked $dst_src, $dst_src, $src, $v0" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vor_vx(as_VectorRegister($dst_src$$reg),
              as_VectorRegister($dst_src$$reg),
              as_Register($src$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vorL_vx_masked(vReg dst_src, iRegL src, vRegMask_V0 v0) %{
  predicate(Matcher::vector_element_basic_type(n) == T_LONG);
  match(Set dst_src (OrV (Binary dst_src (Replicate src)) v0));
  format %{ "vorL_vx_masked $dst_src, $dst_src, $src, $v0" %}
  ins_encode %{
    __ vsetvli_helper(T_LONG, Matcher::vector_length(this));
    __ vor_vx(as_VectorRegister($dst_src$$reg),
              as_VectorRegister($dst_src$$reg),
              as_Register($src$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// vector xor

instruct vxor(vReg dst, vReg src1, vReg src2) %{
  match(Set dst (XorV src1 src2));
  format %{ "vxor $dst, $src1, $src2" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vxor_vv(as_VectorRegister($dst$$reg),
               as_VectorRegister($src1$$reg),
               as_VectorRegister($src2$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// vector xor - predicated

instruct vxor_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{
  match(Set dst_src1 (XorV (Binary dst_src1 src2) v0));
  format %{ "vxor_masked $dst_src1, $dst_src1, $src2, $v0" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vxor_vv(as_VectorRegister($dst_src1$$reg),
               as_VectorRegister($dst_src1$$reg),
               as_VectorRegister($src2$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// vector-immediate xor (unpredicated)

instruct vxor_vi(vReg dst, vReg src1, immI5 con) %{
  predicate(Matcher::vector_element_basic_type(n) == T_BYTE ||
            Matcher::vector_element_basic_type(n) == T_SHORT ||
            Matcher::vector_element_basic_type(n) == T_INT);
  match(Set dst (XorV src1 (Replicate con)));
  format %{ "vxor_vi $dst, $src1, $con" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vxor_vi(as_VectorRegister($dst$$reg),
               as_VectorRegister($src1$$reg),
               $con$$constant);
  %}
  ins_pipe(pipe_slow);
%}

instruct vxorL_vi(vReg dst, vReg src1, immL5 con) %{
  predicate(Matcher::vector_element_basic_type(n) == T_LONG);
  match(Set dst (XorV src1 (Replicate con)));
  format %{ "vxorL_vi $dst, $src1, $con" %}
  ins_encode %{
    __ vsetvli_helper(T_LONG, Matcher::vector_length(this));
    __ vxor_vi(as_VectorRegister($dst$$reg),
               as_VectorRegister($src1$$reg),
               $con$$constant);
  %}
  ins_pipe(pipe_slow);
%}

// vector-scalar xor (unpredicated)

instruct vxor_vx(vReg dst, vReg src1, iRegIorL2I src2) %{
  predicate(Matcher::vector_element_basic_type(n) == T_BYTE ||
            Matcher::vector_element_basic_type(n) == T_SHORT ||
            Matcher::vector_element_basic_type(n) == T_INT);
  match(Set dst (XorV src1 (Replicate src2)));
  format %{ "vxor_vx $dst, $src1, $src2" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vxor_vx(as_VectorRegister($dst$$reg),
               as_VectorRegister($src1$$reg),
               as_Register($src2$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct vxorL_vx(vReg dst, vReg src1, iRegL src2) %{
  predicate(Matcher::vector_element_basic_type(n) == T_LONG);
  match(Set dst (XorV src1 (Replicate src2)));
  format %{ "vxorL_vx $dst, $src1, $src2" %}
  ins_encode %{
    __ vsetvli_helper(T_LONG, Matcher::vector_length(this));
    __ vxor_vx(as_VectorRegister($dst$$reg),
               as_VectorRegister($src1$$reg),
               as_Register($src2$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// vector-immediate xor (predicated)

instruct vxor_vi_masked(vReg dst_src, immI5 con, vRegMask_V0 v0) %{
  predicate(Matcher::vector_element_basic_type(n) == T_BYTE ||
            Matcher::vector_element_basic_type(n) == T_SHORT ||
            Matcher::vector_element_basic_type(n) == T_INT);
  match(Set dst_src (XorV (Binary dst_src (Replicate con)) v0));
  format %{ "vxor_vi_masked $dst_src, $dst_src, $con, $v0" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vxor_vi(as_VectorRegister($dst_src$$reg),
               as_VectorRegister($dst_src$$reg),
               $con$$constant, Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vxorL_vi_masked(vReg dst_src, immL5 con, vRegMask_V0 v0) %{
  predicate(Matcher::vector_element_basic_type(n) == T_LONG);
  match(Set dst_src (XorV (Binary dst_src (Replicate con)) v0));
  format %{ "vxorL_vi_masked $dst_src, $dst_src, $con, $v0" %}
  ins_encode %{
    __ vsetvli_helper(T_LONG, Matcher::vector_length(this));
    __ vxor_vi(as_VectorRegister($dst_src$$reg),
               as_VectorRegister($dst_src$$reg),
               $con$$constant, Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// vector-scalar xor (predicated)

instruct vxor_vx_masked(vReg dst_src, iRegIorL2I src, vRegMask_V0 v0) %{
  predicate(Matcher::vector_element_basic_type(n) == T_BYTE ||
            Matcher::vector_element_basic_type(n) == T_SHORT ||
            Matcher::vector_element_basic_type(n) == T_INT);
  match(Set dst_src (XorV (Binary dst_src (Replicate src)) v0));
  format %{ "vxor_vx_masked $dst_src, $dst_src, $src, $v0" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vxor_vx(as_VectorRegister($dst_src$$reg),
               as_VectorRegister($dst_src$$reg),
               as_Register($src$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vxorL_vx_masked(vReg dst_src, iRegL src, vRegMask_V0 v0) %{
  predicate(Matcher::vector_element_basic_type(n) == T_LONG);
  match(Set dst_src (XorV (Binary dst_src (Replicate src)) v0));
  format %{ "vxorL_vx_masked $dst_src, $dst_src, $src, $v0" %}
  ins_encode %{
    __ vsetvli_helper(T_LONG, Matcher::vector_length(this));
    __ vxor_vx(as_VectorRegister($dst_src$$reg),
               as_VectorRegister($dst_src$$reg),
               as_Register($src$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// ------------------------------ Vector and not -----------------------------------

// vector and not

instruct vand_notB(vReg dst, vReg src1, vReg src2, immI_M1 m1) %{
  predicate(UseZvbb && Matcher::vector_element_basic_type(n) == T_BYTE);
  match(Set dst (AndV src1 (XorV src2 (Replicate m1))));
  format %{ "vand_notB $dst, $src1, $src2" %}
  ins_encode %{
    __ vsetvli_helper(T_BYTE, Matcher::vector_length(this));
    __ vandn_vv(as_VectorRegister($dst$$reg),
                as_VectorRegister($src1$$reg),
                as_VectorRegister($src2$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct vand_notS(vReg dst, vReg src1, vReg src2, immI_M1 m1) %{
  predicate(UseZvbb && Matcher::vector_element_basic_type(n) == T_SHORT);
  match(Set dst (AndV src1 (XorV src2 (Replicate m1))));
  format %{ "vand_notS $dst, $src1, $src2" %}
  ins_encode %{
    __ vsetvli_helper(T_SHORT, Matcher::vector_length(this));
    __ vandn_vv(as_VectorRegister($dst$$reg),
                as_VectorRegister($src1$$reg),
                as_VectorRegister($src2$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct vand_notI(vReg dst, vReg src1, vReg src2, immI_M1 m1) %{
  predicate(UseZvbb && Matcher::vector_element_basic_type(n) == T_INT);
  match(Set dst (AndV src1 (XorV src2 (Replicate m1))));
  format %{ "vand_notI $dst, $src1, $src2" %}
  ins_encode %{
    __ vsetvli_helper(T_INT, Matcher::vector_length(this));
    __ vandn_vv(as_VectorRegister($dst$$reg),
                as_VectorRegister($src1$$reg),
                as_VectorRegister($src2$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct vand_notL(vReg dst, vReg src1, vReg src2, immL_M1 m1) %{
  predicate(UseZvbb && Matcher::vector_element_basic_type(n) == T_LONG);
  match(Set dst (AndV src1 (XorV src2 (Replicate m1))));
  format %{ "vand_notL $dst, $src1, $src2" %}
  ins_encode %{
    __ vsetvli_helper(T_LONG, Matcher::vector_length(this));
    __ vandn_vv(as_VectorRegister($dst$$reg),
                as_VectorRegister($src1$$reg),
                as_VectorRegister($src2$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct vand_notB_masked(vReg dst_src1, vReg src2, immI_M1 m1, vRegMask_V0 v0) %{
  predicate(UseZvbb && Matcher::vector_element_basic_type(n) == T_BYTE);
  match(Set dst_src1 (AndV (Binary dst_src1 (XorV src2 (Replicate m1))) v0));
  format %{ "vand_notB_masked $dst_src1, $dst_src1, $src2, $v0" %}
  ins_encode %{
    __ vsetvli_helper(T_BYTE, Matcher::vector_length(this));
    __ vandn_vv(as_VectorRegister($dst_src1$$reg),
                as_VectorRegister($dst_src1$$reg),
                as_VectorRegister($src2$$reg),
                Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vand_notS_masked(vReg dst_src1, vReg src2, immI_M1 m1, vRegMask_V0 v0) %{
  predicate(UseZvbb && Matcher::vector_element_basic_type(n) == T_SHORT);
  match(Set dst_src1 (AndV (Binary dst_src1 (XorV src2 (Replicate m1))) v0));
  format %{ "vand_notS_masked $dst_src1, $dst_src1, $src2, $v0" %}
  ins_encode %{
    __ vsetvli_helper(T_SHORT, Matcher::vector_length(this));
    __ vandn_vv(as_VectorRegister($dst_src1$$reg),
                as_VectorRegister($dst_src1$$reg),
                as_VectorRegister($src2$$reg),
                Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vand_notI_masked(vReg dst_src1, vReg src2, immI_M1 m1, vRegMask_V0 v0) %{
  predicate(UseZvbb && Matcher::vector_element_basic_type(n) == T_INT);
  match(Set dst_src1 (AndV (Binary dst_src1 (XorV src2 (Replicate m1))) v0));
  format %{ "vand_notI_masked $dst_src1, $dst_src1, $src2, $v0" %}
  ins_encode %{
    __ vsetvli_helper(T_INT, Matcher::vector_length(this));
    __ vandn_vv(as_VectorRegister($dst_src1$$reg),
                as_VectorRegister($dst_src1$$reg),
                as_VectorRegister($src2$$reg),
                Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vand_notL_masked(vReg dst_src1, vReg src2, immL_M1 m1, vRegMask_V0 v0) %{
  predicate(UseZvbb && Matcher::vector_element_basic_type(n) == T_LONG);
  match(Set dst_src1 (AndV (Binary dst_src1 (XorV src2 (Replicate m1))) v0));
  format %{ "vand_notL_masked $dst_src1, $dst_src1, $src2, $v0" %}
  ins_encode %{
    __ vsetvli_helper(T_LONG, Matcher::vector_length(this));
    __ vandn_vv(as_VectorRegister($dst_src1$$reg),
                as_VectorRegister($dst_src1$$reg),
                as_VectorRegister($src2$$reg),
                Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vand_notB_vx(vReg dst, vReg src1, iRegIorL2I src2, immI_M1 m1) %{
  predicate(UseZvbb && Matcher::vector_element_basic_type(n) == T_BYTE);
  match(Set dst (AndV src1 (Replicate (XorI src2 m1))));
  format %{ "vand_notB_vx $dst, $src1, $src2" %}
  ins_encode %{
    __ vsetvli_helper(T_BYTE, Matcher::vector_length(this));
    __ vandn_vx(as_VectorRegister($dst$$reg),
                as_VectorRegister($src1$$reg),
                as_Register($src2$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct vand_notS_vx(vReg dst, vReg src1, iRegIorL2I src2, immI_M1 m1) %{
  predicate(UseZvbb && Matcher::vector_element_basic_type(n) == T_SHORT);
  match(Set dst (AndV src1 (Replicate (XorI src2 m1))));
  format %{ "vand_notS_vx $dst, $src1, $src2" %}
  ins_encode %{
    __ vsetvli_helper(T_SHORT, Matcher::vector_length(this));
    __ vandn_vx(as_VectorRegister($dst$$reg),
                as_VectorRegister($src1$$reg),
                as_Register($src2$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct vand_notI_vx(vReg dst, vReg src1, iRegIorL2I src2, immI_M1 m1) %{
  predicate(UseZvbb && Matcher::vector_element_basic_type(n) == T_INT);
  match(Set dst (AndV src1 (Replicate (XorI src2 m1))));
  format %{ "vand_notI_vx $dst, $src1, $src2" %}
  ins_encode %{
    __ vsetvli_helper(T_INT, Matcher::vector_length(this));
    __ vandn_vx(as_VectorRegister($dst$$reg),
                as_VectorRegister($src1$$reg),
                as_Register($src2$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct vand_notL_vx(vReg dst, vReg src1, iRegL src2, immL_M1 m1) %{
  predicate(UseZvbb && Matcher::vector_element_basic_type(n) == T_LONG);
  match(Set dst (AndV src1 (Replicate (XorL src2 m1))));
  format %{ "vand_notL_vx $dst, $src1, $src2" %}
  ins_encode %{
    __ vsetvli_helper(T_LONG, Matcher::vector_length(this));
    __ vandn_vx(as_VectorRegister($dst$$reg),
                as_VectorRegister($src1$$reg),
                as_Register($src2$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct vand_notB_vx_masked(vReg dst_src1, iRegIorL2I src2, immI_M1 m1, vRegMask_V0 v0) %{
  predicate(UseZvbb && Matcher::vector_element_basic_type(n) == T_BYTE);
  match(Set dst_src1 (AndV (Binary dst_src1 (Replicate (XorI src2 m1))) v0));
  format %{ "vand_notB_vx_masked $dst_src1, $dst_src1, $src2, $v0" %}
  ins_encode %{
    __ vsetvli_helper(T_BYTE, Matcher::vector_length(this));
    __ vandn_vx(as_VectorRegister($dst_src1$$reg),
                as_VectorRegister($dst_src1$$reg),
                as_Register($src2$$reg),
                Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vand_notS_vx_masked(vReg dst_src1, iRegIorL2I src2, immI_M1 m1, vRegMask_V0 v0) %{
  predicate(UseZvbb && Matcher::vector_element_basic_type(n) == T_SHORT);
  match(Set dst_src1 (AndV (Binary dst_src1 (Replicate (XorI src2 m1))) v0));
  format %{ "vand_notS_vx_masked $dst_src1, $dst_src1, $src2, $v0" %}
  ins_encode %{
    __ vsetvli_helper(T_SHORT, Matcher::vector_length(this));
    __ vandn_vx(as_VectorRegister($dst_src1$$reg),
                as_VectorRegister($dst_src1$$reg),
                as_Register($src2$$reg),
                Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vand_notI_vx_masked(vReg dst_src1, iRegIorL2I src2, immI_M1 m1, vRegMask_V0 v0) %{
  predicate(UseZvbb && Matcher::vector_element_basic_type(n) == T_INT);
  match(Set dst_src1 (AndV (Binary dst_src1 (Replicate (XorI src2 m1))) v0));
  format %{ "vand_notI_vx_masked $dst_src1, $dst_src1, $src2, $v0" %}
  ins_encode %{
    __ vsetvli_helper(T_INT, Matcher::vector_length(this));
    __ vandn_vx(as_VectorRegister($dst_src1$$reg),
                as_VectorRegister($dst_src1$$reg),
                as_Register($src2$$reg),
                Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vand_notL_vx_masked(vReg dst_src1, iRegL src2, immL_M1 m1, vRegMask_V0 v0) %{
  predicate(UseZvbb && Matcher::vector_element_basic_type(n) == T_LONG);
  match(Set dst_src1 (AndV (Binary dst_src1 (Replicate (XorL src2 m1))) v0));
  format %{ "vand_notL_vx_masked $dst_src1, $dst_src1, $src2, $v0" %}
  ins_encode %{
    __ vsetvli_helper(T_LONG, Matcher::vector_length(this));
    __ vandn_vx(as_VectorRegister($dst_src1$$reg),
                as_VectorRegister($dst_src1$$reg),
                as_Register($src2$$reg),
                Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// ------------------------------ Vector not -----------------------------------

// vector not

instruct vnot(vReg dst, vReg src, immI_M1 m1) %{
  predicate(Matcher::vector_element_basic_type(n) == T_BYTE ||
            Matcher::vector_element_basic_type(n) == T_SHORT ||
            Matcher::vector_element_basic_type(n) == T_INT);
  match(Set dst (XorV src (Replicate m1)));
  format %{ "vnot $dst, $src" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vxor_vi(as_VectorRegister($dst$$reg),
               as_VectorRegister($src$$reg),
               -1);
  %}
  ins_pipe(pipe_slow);
%}

instruct vnotL(vReg dst, vReg src, immL_M1 m1) %{
  predicate(Matcher::vector_element_basic_type(n) == T_LONG);
  match(Set dst (XorV src (Replicate m1)));
  format %{ "vnotL $dst, $src" %}
  ins_encode %{
    __ vsetvli_helper(T_LONG, Matcher::vector_length(this));
    __ vxor_vi(as_VectorRegister($dst$$reg),
               as_VectorRegister($src$$reg),
               -1);
  %}
  ins_pipe(pipe_slow);
%}

// vector not - predicated

instruct vnot_masked(vReg dst_src, immI_M1 m1, vRegMask_V0 v0) %{
  predicate(Matcher::vector_element_basic_type(n) == T_BYTE ||
            Matcher::vector_element_basic_type(n) == T_SHORT ||
            Matcher::vector_element_basic_type(n) == T_INT);
  match(Set dst_src (XorV (Binary dst_src (Replicate m1)) v0));
  format %{ "vnot_masked $dst_src, $dst_src, $v0" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vxor_vi(as_VectorRegister($dst_src$$reg),
               as_VectorRegister($dst_src$$reg),
               -1, Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vnotL_masked(vReg dst_src, immI_M1 m1, vRegMask_V0 v0) %{
  predicate(Matcher::vector_element_basic_type(n) == T_LONG);
  match(Set dst_src (XorV (Binary dst_src (Replicate m1)) v0));
  format %{ "vnotL_masked $dst_src, $dst_src, $v0" %}
  ins_encode %{
    __ vsetvli_helper(T_LONG, Matcher::vector_length(this));
    __ vxor_vi(as_VectorRegister($dst_src$$reg),
               as_VectorRegister($dst_src$$reg),
               -1, Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// vector float div

instruct vdiv_hfp(vReg dst, vReg src1, vReg src2) %{
  match(Set dst (DivVHF src1 src2));
  format %{ "vdiv_hfp $dst, $src1, $src2" %}
  ins_encode %{
    assert(UseZvfh, "must");
    assert(Matcher::vector_element_basic_type(this) == T_SHORT, "must");
    __ vsetvli_helper(T_SHORT, Matcher::vector_length(this));
    __ vfdiv_vv(as_VectorRegister($dst$$reg),
                as_VectorRegister($src1$$reg),
                as_VectorRegister($src2$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct vdiv_fp(vReg dst, vReg src1, vReg src2) %{
  match(Set dst (DivVF src1 src2));
  match(Set dst (DivVD src1 src2));
  format %{ "vdiv_fp $dst, $src1, $src2" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vfdiv_vv(as_VectorRegister($dst$$reg),
                as_VectorRegister($src1$$reg),
                as_VectorRegister($src2$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// vector float div - predicated

instruct vdiv_fp_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{
  match(Set dst_src1 (DivVF (Binary dst_src1 src2) v0));
  match(Set dst_src1 (DivVD (Binary dst_src1 src2) v0));
  format %{ "vdiv_fp_masked $dst_src1, $dst_src1, $src2, $v0" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vfdiv_vv(as_VectorRegister($dst_src1$$reg),
                as_VectorRegister($dst_src1$$reg),
                as_VectorRegister($src2$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// vector integer max/min

instruct vmax(vReg dst, vReg src1, vReg src2) %{
  predicate(Matcher::vector_element_basic_type(n) != T_FLOAT &&
            Matcher::vector_element_basic_type(n) != T_DOUBLE);
  match(Set dst (MaxV src1 src2));
  format %{ "vmax $dst, $src1, $src2" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vmax_vv(as_VectorRegister($dst$$reg),
               as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct vmin(vReg dst, vReg src1, vReg src2) %{
  predicate(Matcher::vector_element_basic_type(n) != T_FLOAT &&
            Matcher::vector_element_basic_type(n) != T_DOUBLE);
  match(Set dst (MinV src1 src2));
  format %{ "vmin $dst, $src1, $src2" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vmin_vv(as_VectorRegister($dst$$reg),
               as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// vector integer max/min - predicated

instruct vmax_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{
  predicate(Matcher::vector_element_basic_type(n) != T_FLOAT &&
            Matcher::vector_element_basic_type(n) != T_DOUBLE);
  match(Set dst_src1 (MaxV (Binary dst_src1 src2) v0));
  format %{ "vmax_masked $dst_src1, $dst_src1, $src2, $v0" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vmax_vv(as_VectorRegister($dst_src1$$reg), as_VectorRegister($dst_src1$$reg),
               as_VectorRegister($src2$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vmin_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{
  predicate(Matcher::vector_element_basic_type(n) != T_FLOAT &&
            Matcher::vector_element_basic_type(n) != T_DOUBLE);
  match(Set dst_src1 (MinV (Binary dst_src1 src2) v0));
  format %{ "vmin_masked $dst_src1, $dst_src1, $src2, $v0" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vmin_vv(as_VectorRegister($dst_src1$$reg), as_VectorRegister($dst_src1$$reg),
               as_VectorRegister($src2$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// vector unsigned integer max/min

instruct vmaxu(vReg dst, vReg src1, vReg src2) %{
  match(Set dst (UMaxV src1 src2));
  format %{ "vmaxu $dst, $src1, $src2" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    assert(is_integral_type(bt), "unsupported type");
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vmaxu_vv(as_VectorRegister($dst$$reg),
                as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct vminu(vReg dst, vReg src1, vReg src2) %{
  match(Set dst (UMinV src1 src2));
  format %{ "vminu $dst, $src1, $src2" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    assert(is_integral_type(bt), "unsupported type");
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vminu_vv(as_VectorRegister($dst$$reg),
                as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// vector unsigned integer max/min - predicated

instruct vmaxu_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{
  match(Set dst_src1 (UMaxV (Binary dst_src1 src2) v0));
  format %{ "vmaxu_masked $dst_src1, $dst_src1, $src2, $v0" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    assert(is_integral_type(bt), "unsupported type");
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vmaxu_vv(as_VectorRegister($dst_src1$$reg), as_VectorRegister($dst_src1$$reg),
                as_VectorRegister($src2$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vminu_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{
  match(Set dst_src1 (UMinV (Binary dst_src1 src2) v0));
  format %{ "vminu_masked $dst_src1, $dst_src1, $src2, $v0" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    assert(is_integral_type(bt), "unsupported type");
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vminu_vv(as_VectorRegister($dst_src1$$reg), as_VectorRegister($dst_src1$$reg),
                as_VectorRegister($src2$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// vector float-point max/min (half precision)

instruct vmax_hfp(vReg dst, vReg src1, vReg src2, vRegMask_V0 v0) %{
  match(Set dst (MaxVHF src1 src2));
  effect(TEMP_DEF dst, TEMP v0);
  format %{ "vmax_hfp $dst, $src1, $src2" %}
  ins_encode %{
    assert(UseZvfh, "must");
    assert(Matcher::vector_element_basic_type(this) == T_SHORT, "must");
    __ minmax_fp_v(as_VectorRegister($dst$$reg),
                   as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg),
                   T_SHORT, false /* is_min */, Matcher::vector_length(this));
  %}
  ins_pipe(pipe_slow);
%}

instruct vmin_hfp(vReg dst, vReg src1, vReg src2, vRegMask_V0 v0) %{
  match(Set dst (MinVHF src1 src2));
  effect(TEMP_DEF dst, TEMP v0);
  format %{ "vmin_hfp $dst, $src1, $src2" %}
  ins_encode %{
    assert(UseZvfh, "must");
    assert(Matcher::vector_element_basic_type(this) == T_SHORT, "must");
    __ minmax_fp_v(as_VectorRegister($dst$$reg),
                   as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg),
                   T_SHORT, true /* is_min */, Matcher::vector_length(this));
  %}
  ins_pipe(pipe_slow);
%}

// vector float-point max/min

instruct vmax_fp(vReg dst, vReg src1, vReg src2, vRegMask_V0 v0) %{
  predicate(Matcher::vector_element_basic_type(n) == T_FLOAT ||
            Matcher::vector_element_basic_type(n) == T_DOUBLE);
  match(Set dst (MaxV src1 src2));
  effect(TEMP_DEF dst, TEMP v0);
  format %{ "vmax_fp $dst, $src1, $src2" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ minmax_fp_v(as_VectorRegister($dst$$reg),
                   as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg),
                   bt, false /* is_min */, Matcher::vector_length(this));
  %}
  ins_pipe(pipe_slow);
%}

instruct vmin_fp(vReg dst, vReg src1, vReg src2, vRegMask_V0 v0) %{
  predicate(Matcher::vector_element_basic_type(n) == T_FLOAT ||
            Matcher::vector_element_basic_type(n) == T_DOUBLE);
  match(Set dst (MinV src1 src2));
  effect(TEMP_DEF dst, TEMP v0);
  format %{ "vmin_fp $dst, $src1, $src2" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ minmax_fp_v(as_VectorRegister($dst$$reg),
                   as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg),
                   bt, true /* is_min */, Matcher::vector_length(this));
  %}
  ins_pipe(pipe_slow);
%}

// vector float-point max/min - predicated

instruct vmax_fp_masked(vReg dst_src1, vReg src2, vRegMask vmask, vReg tmp1, vReg tmp2, vRegMask_V0 v0) %{
  predicate(Matcher::vector_element_basic_type(n) == T_FLOAT ||
            Matcher::vector_element_basic_type(n) == T_DOUBLE);
  match(Set dst_src1 (MaxV (Binary dst_src1 src2) vmask));
  effect(TEMP_DEF dst_src1, TEMP tmp1, TEMP tmp2, TEMP v0);
  format %{ "vmax_fp_masked $dst_src1, $dst_src1, $src2, $vmask\t# KILL $tmp1, $tmp2, $v0" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ minmax_fp_masked_v(as_VectorRegister($dst_src1$$reg), as_VectorRegister($dst_src1$$reg),
                          as_VectorRegister($src2$$reg), as_VectorRegister($vmask$$reg),
                          as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
                          bt, false /* is_min */, Matcher::vector_length(this));
  %}
  ins_pipe(pipe_slow);
%}

instruct vmin_fp_masked(vReg dst_src1, vReg src2, vRegMask vmask, vReg tmp1, vReg tmp2, vRegMask_V0 v0) %{
  predicate(Matcher::vector_element_basic_type(n) == T_FLOAT ||
            Matcher::vector_element_basic_type(n) == T_DOUBLE);
  match(Set dst_src1 (MinV (Binary dst_src1 src2) vmask));
  effect(TEMP_DEF dst_src1, TEMP tmp1, TEMP tmp2, TEMP v0);
  format %{ "vmin_fp_masked $dst_src1, $dst_src1, $src2, $vmask\t# KILL $tmp1, $tmp2, $v0" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ minmax_fp_masked_v(as_VectorRegister($dst_src1$$reg), as_VectorRegister($dst_src1$$reg),
                          as_VectorRegister($src2$$reg), as_VectorRegister($vmask$$reg),
                          as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
                          bt, true /* is_min */, Matcher::vector_length(this));
  %}
  ins_pipe(pipe_slow);
%}

// vector fmla

// dst_src1 = src2 * src3 + dst_src1 (half precision)
instruct vhfmla(vReg dst_src1, vReg src2, vReg src3) %{
  match(Set dst_src1 (FmaVHF dst_src1 (Binary src2 src3)));
  format %{ "vhfmla $dst_src1, $dst_src1, $src2, $src3" %}
  ins_encode %{
    assert(UseFMA, "Needs FMA instructions support.");
    assert(UseZvfh, "must");
    assert(Matcher::vector_element_basic_type(this) == T_SHORT, "must");
    __ vsetvli_helper(T_SHORT, Matcher::vector_length(this));
    __ vfmacc_vv(as_VectorRegister($dst_src1$$reg),
                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// dst_src1 = src2 * src3 + dst_src1
instruct vfmla(vReg dst_src1, vReg src2, vReg src3) %{
  match(Set dst_src1 (FmaVF dst_src1 (Binary src2 src3)));
  match(Set dst_src1 (FmaVD dst_src1 (Binary src2 src3)));
  format %{ "vfmla $dst_src1, $dst_src1, $src2, $src3" %}
  ins_encode %{
    assert(UseFMA, "Needs FMA instructions support.");
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vfmacc_vv(as_VectorRegister($dst_src1$$reg),
                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// vector fmadd - predicated
// dst_src1 = dst_src1 * src2 + src3

instruct vfmadd_masked(vReg dst_src1, vReg src2, vReg src3, vRegMask_V0 v0) %{
  match(Set dst_src1 (FmaVF (Binary dst_src1 src2) (Binary src3 v0)));
  match(Set dst_src1 (FmaVD (Binary dst_src1 src2) (Binary src3 v0)));
  format %{ "vfmadd_masked $dst_src1, $dst_src1, $src2, $src3, $v0" %}
  ins_encode %{
    assert(UseFMA, "Needs FMA instructions support.");
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vfmadd_vv(as_VectorRegister($dst_src1$$reg), as_VectorRegister($src2$$reg),
                 as_VectorRegister($src3$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// vector fmls

// dst_src1 = src2 * (-src3) + dst_src1
// "(-src2) * src3 + dst_src1" has been idealized to "src3 * (-src2) + dst_src1"
instruct vfmlsF(vReg dst_src1, vReg src2, vReg src3) %{
  match(Set dst_src1 (FmaVF dst_src1 (Binary src2 (NegVF src3))));
  format %{ "vfmlsF $dst_src1, $dst_src1, $src2, $src3" %}
  ins_encode %{
    assert(UseFMA, "Needs FMA instructions support.");
    __ vsetvli_helper(T_FLOAT, Matcher::vector_length(this));
    __ vfnmsac_vv(as_VectorRegister($dst_src1$$reg),
                  as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// dst_src1 = src2 * (-src3) + dst_src1
// "(-src2) * src3 + dst_src1" has been idealized to "src3 * (-src2) + dst_src1"
instruct vfmlsD(vReg dst_src1, vReg src2, vReg src3) %{
  match(Set dst_src1 (FmaVD dst_src1 (Binary src2 (NegVD src3))));
  format %{ "vfmlsD $dst_src1, $dst_src1, $src2, $src3" %}
  ins_encode %{
    assert(UseFMA, "Needs FMA instructions support.");
    __ vsetvli_helper(T_DOUBLE, Matcher::vector_length(this));
    __ vfnmsac_vv(as_VectorRegister($dst_src1$$reg),
                  as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// vector fnmsub - predicated

// dst_src1 = dst_src1 * (-src2) + src3
instruct vfnmsub_masked(vReg dst_src1, vReg src2, vReg src3, vRegMask_V0 v0) %{
  match(Set dst_src1 (FmaVF (Binary dst_src1 (NegVF src2)) (Binary src3 v0)));
  match(Set dst_src1 (FmaVD (Binary dst_src1 (NegVD src2)) (Binary src3 v0)));
  format %{ "vfnmsub_masked $dst_src1, $dst_src1, $src2, $src3, $v0" %}
  ins_encode %{
    assert(UseFMA, "Needs FMA instructions support.");
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vfnmsub_vv(as_VectorRegister($dst_src1$$reg), as_VectorRegister($src2$$reg),
                  as_VectorRegister($src3$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// vector fnmla

// dst_src1 = src2 * (-src3) - dst_src1
// "(-src2) * src3 - dst_src1" has been idealized to "src3 * (-src2) - dst_src1"
instruct vfnmlaF(vReg dst_src1, vReg src2, vReg src3) %{
  match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 (NegVF src3))));
  format %{ "vfnmlaF $dst_src1, $dst_src1, $src2, $src3" %}
  ins_encode %{
    assert(UseFMA, "Needs FMA instructions support.");
    __ vsetvli_helper(T_FLOAT, Matcher::vector_length(this));
    __ vfnmacc_vv(as_VectorRegister($dst_src1$$reg),
                  as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// dst_src1 = src2 * (-src3) - dst_src1
// "(-src2) * src3 - dst_src1" has been idealized to "src3 * (-src2) - dst_src1"
instruct vfnmlaD(vReg dst_src1, vReg src2, vReg src3) %{
  match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 (NegVD src3))));
  format %{ "vfnmlaD $dst_src1, $dst_src1, $src2, $src3" %}
  ins_encode %{
    assert(UseFMA, "Needs FMA instructions support.");
    __ vsetvli_helper(T_DOUBLE, Matcher::vector_length(this));
    __ vfnmacc_vv(as_VectorRegister($dst_src1$$reg),
                  as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// vector fnmadd - predicated

// dst_src1 = dst_src1 * (-src2) - src3
instruct vfnmadd_masked(vReg dst_src1, vReg src2, vReg src3, vRegMask_V0 v0) %{
  match(Set dst_src1 (FmaVF (Binary dst_src1 (NegVF src2)) (Binary (NegVF src3) v0)));
  match(Set dst_src1 (FmaVD (Binary dst_src1 (NegVD src2)) (Binary (NegVD src3) v0)));
  format %{ "vfnmadd_masked $dst_src1, $dst_src1, $src2, $src3, $v0" %}
  ins_encode %{
    assert(UseFMA, "Needs FMA instructions support.");
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vfnmadd_vv(as_VectorRegister($dst_src1$$reg), as_VectorRegister($src2$$reg),
                  as_VectorRegister($src3$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// vector fnmls

// dst_src1 = src2 * src3 - dst_src1
instruct vfnmlsF(vReg dst_src1, vReg src2, vReg src3) %{
  match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 src3)));
  format %{ "vfnmlsF $dst_src1, $dst_src1, $src2, $src3" %}
  ins_encode %{
    assert(UseFMA, "Needs FMA instructions support.");
    __ vsetvli_helper(T_FLOAT, Matcher::vector_length(this));
    __ vfmsac_vv(as_VectorRegister($dst_src1$$reg),
                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// dst_src1 = -dst_src1 + src2 * src3
instruct vfnmlsD(vReg dst_src1, vReg src2, vReg src3) %{
  match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 src3)));
  format %{ "vfnmlsD $dst_src1, $dst_src1, $src2, $src3" %}
  ins_encode %{
    assert(UseFMA, "Needs FMA instructions support.");
    __ vsetvli_helper(T_DOUBLE, Matcher::vector_length(this));
    __ vfmsac_vv(as_VectorRegister($dst_src1$$reg),
                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// vector vfmsub - predicated

// dst_src1 = dst_src1 * src2 - src3
instruct vfmsub_masked(vReg dst_src1, vReg src2, vReg src3, vRegMask_V0 v0) %{
  match(Set dst_src1 (FmaVF (Binary dst_src1 src2) (Binary (NegVF src3) v0)));
  match(Set dst_src1 (FmaVD (Binary dst_src1 src2) (Binary (NegVD src3) v0)));
  format %{ "vfmsub_masked $dst_src1, $dst_src1, $src2, $src3, $v0" %}
  ins_encode %{
    assert(UseFMA, "Needs FMA instructions support.");
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vfmsub_vv(as_VectorRegister($dst_src1$$reg), as_VectorRegister($src2$$reg),
                 as_VectorRegister($src3$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// vector mla

// dst_src1 = dst_src1 + src2 * src3
instruct vmla(vReg dst_src1, vReg src2, vReg src3) %{
  match(Set dst_src1 (AddVB dst_src1 (MulVB src2 src3)));
  match(Set dst_src1 (AddVS dst_src1 (MulVS src2 src3)));
  match(Set dst_src1 (AddVI dst_src1 (MulVI src2 src3)));
  match(Set dst_src1 (AddVL dst_src1 (MulVL src2 src3)));
  format %{ "vmla $dst_src1, $dst_src1, $src2, $src3" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vmacc_vv(as_VectorRegister($dst_src1$$reg),
                as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// vector mla - predicated

instruct vmla_masked(vReg dst_src1, vReg src2, vReg src3, vRegMask_V0 v0) %{
  match(Set dst_src1 (AddVB (Binary dst_src1 (MulVB src2 src3)) v0));
  match(Set dst_src1 (AddVS (Binary dst_src1 (MulVS src2 src3)) v0));
  match(Set dst_src1 (AddVI (Binary dst_src1 (MulVI src2 src3)) v0));
  match(Set dst_src1 (AddVL (Binary dst_src1 (MulVL src2 src3)) v0));
  format %{ "vmla_masked $dst_src1, $dst_src1, $src2, $src3, $v0" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vmacc_vv(as_VectorRegister($dst_src1$$reg), as_VectorRegister($src2$$reg),
                as_VectorRegister($src3$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// vector mls

// dst_src1 = dst_src1 - src2 * src3
instruct vmls(vReg dst_src1, vReg src2, vReg src3) %{
  match(Set dst_src1 (SubVB dst_src1 (MulVB src2 src3)));
  match(Set dst_src1 (SubVS dst_src1 (MulVS src2 src3)));
  match(Set dst_src1 (SubVI dst_src1 (MulVI src2 src3)));
  match(Set dst_src1 (SubVL dst_src1 (MulVL src2 src3)));
  format %{ "vmls $dst_src1, $dst_src1, $src2, $src3" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vnmsac_vv(as_VectorRegister($dst_src1$$reg),
                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// vector mls - predicated

instruct vmls_masked(vReg dst_src1, vReg src2, vReg src3, vRegMask_V0 v0) %{
  match(Set dst_src1 (SubVB (Binary dst_src1 (MulVB src2 src3)) v0));
  match(Set dst_src1 (SubVS (Binary dst_src1 (MulVS src2 src3)) v0));
  match(Set dst_src1 (SubVI (Binary dst_src1 (MulVI src2 src3)) v0));
  match(Set dst_src1 (SubVL (Binary dst_src1 (MulVL src2 src3)) v0));
  format %{ "vmls_masked $dst_src1, $dst_src1, $src2, $src3, $v0" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vnmsac_vv(as_VectorRegister($dst_src1$$reg), as_VectorRegister($src2$$reg),
                 as_VectorRegister($src3$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// vector mul

instruct vmul(vReg dst, vReg src1, vReg src2) %{
  match(Set dst (MulVB src1 src2));
  match(Set dst (MulVS src1 src2));
  match(Set dst (MulVI src1 src2));
  match(Set dst (MulVL src1 src2));
  format %{ "vmul $dst, $src1, $src2" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
               as_VectorRegister($src2$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct vmul_hfp(vReg dst, vReg src1, vReg src2) %{
  match(Set dst (MulVHF src1 src2));
  format %{ "vmul_hfp $dst, $src1, $src2" %}
  ins_encode %{
    assert(UseZvfh, "must");
    assert(Matcher::vector_element_basic_type(this) == T_SHORT, "must");
    __ vsetvli_helper(T_SHORT, Matcher::vector_length(this));
    __ vfmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
                as_VectorRegister($src2$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct vmul_fp(vReg dst, vReg src1, vReg src2) %{
  match(Set dst (MulVF src1 src2));
  match(Set dst (MulVD src1 src2));
  format %{ "vmul_fp $dst, $src1, $src2" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vfmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
                as_VectorRegister($src2$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// vector mul - predicated

instruct vmul_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{
  match(Set dst_src1 (MulVB (Binary dst_src1 src2) v0));
  match(Set dst_src1 (MulVS (Binary dst_src1 src2) v0));
  match(Set dst_src1 (MulVI (Binary dst_src1 src2) v0));
  match(Set dst_src1 (MulVL (Binary dst_src1 src2) v0));
  format %{ "vmul_masked $dst_src1, $dst_src1, $src2, $v0" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vmul_vv(as_VectorRegister($dst_src1$$reg), as_VectorRegister($dst_src1$$reg),
               as_VectorRegister($src2$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vmul_fp_masked(vReg dst_src1, vReg src2, vRegMask_V0 v0) %{
  match(Set dst_src1 (MulVF (Binary dst_src1 src2) v0));
  match(Set dst_src1 (MulVD (Binary dst_src1 src2) v0));
  format %{ "vmul_fp_masked $dst_src1, $dst_src1, $src2, $v0" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vfmul_vv(as_VectorRegister($dst_src1$$reg), as_VectorRegister($dst_src1$$reg),
                as_VectorRegister($src2$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// vector-scalar mul (unpredicated)

instruct vmul_vx(vReg dst, vReg src1, iRegIorL2I src2) %{
  match(Set dst (MulVB src1 (Replicate src2)));
  match(Set dst (MulVS src1 (Replicate src2)));
  match(Set dst (MulVI src1 (Replicate src2)));
  format %{ "vmul_vx $dst, $src1, $src2" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vmul_vx(as_VectorRegister($dst$$reg),
               as_VectorRegister($src1$$reg),
               as_Register($src2$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct vmulL_vx(vReg dst, vReg src1, iRegL src2) %{
  match(Set dst (MulVL src1 (Replicate src2)));
  format %{ "vmulL_vx $dst, $src1, $src2" %}
  ins_encode %{
    __ vsetvli_helper(T_LONG, Matcher::vector_length(this));
    __ vmul_vx(as_VectorRegister($dst$$reg),
               as_VectorRegister($src1$$reg),
               as_Register($src2$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// vector-scalar mul (predicated)

instruct vmul_vx_masked(vReg dst_src, iRegIorL2I src2, vRegMask_V0 v0) %{
  match(Set dst_src (MulVB (Binary dst_src (Replicate src2)) v0));
  match(Set dst_src (MulVS (Binary dst_src (Replicate src2)) v0));
  match(Set dst_src (MulVI (Binary dst_src (Replicate src2)) v0));
  format %{ "vmul_vx_masked $dst_src, $dst_src, $src2, $v0" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vmul_vx(as_VectorRegister($dst_src$$reg),
               as_VectorRegister($dst_src$$reg),
               as_Register($src2$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vmulL_vx_masked(vReg dst_src, iRegL src2, vRegMask_V0 v0) %{
  match(Set dst_src (MulVL (Binary dst_src (Replicate src2)) v0));
  format %{ "vmulL_vx_masked $dst_src, $dst_src, $src2, $v0" %}
  ins_encode %{
    __ vsetvli_helper(T_LONG, Matcher::vector_length(this));
    __ vmul_vx(as_VectorRegister($dst_src$$reg),
               as_VectorRegister($dst_src$$reg),
               as_Register($src2$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// vector neg

instruct vneg(vReg dst, vReg src) %{
  match(Set dst (NegVI src));
  match(Set dst (NegVL src));
  format %{ "vneg $dst, $src" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vneg_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// vector neg - predicated

instruct vneg_masked(vReg dst_src, vRegMask_V0 v0) %{
  match(Set dst_src (NegVI dst_src v0));
  match(Set dst_src (NegVL dst_src v0));
  format %{ "vneg_masked $dst_src, $dst_src, $v0" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vneg_v(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
              Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// vector fneg

instruct vfneg(vReg dst, vReg src) %{
  match(Set dst (NegVF src));
  match(Set dst (NegVD src));
  format %{ "vfneg $dst, $src" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vfneg_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// vector fneg  - predicated

instruct vfneg_masked(vReg dst_src, vRegMask_V0 v0) %{
  match(Set dst_src (NegVF dst_src v0));
  match(Set dst_src (NegVD dst_src v0));
  format %{ "vfneg_masked $dst_src, $dst_src, $v0" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vfneg_v(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
               Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// vector and reduction

instruct reduce_and(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
  predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE ||
            Matcher::vector_element_basic_type(n->in(2)) == T_SHORT ||
            Matcher::vector_element_basic_type(n->in(2)) == T_INT);
  match(Set dst (AndReductionV src1 src2));
  effect(TEMP tmp);
  format %{ "reduce_and $dst, $src1, $src2\t# KILL $tmp" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this, $src2);
    __ reduce_integral_v($dst$$Register, $src1$$Register,
                         as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg),
                         this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2));
  %}
  ins_pipe(pipe_slow);
%}

instruct reduce_andL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{
  predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG);
  match(Set dst (AndReductionV src1 src2));
  effect(TEMP tmp);
  format %{ "reduce_andL $dst, $src1, $src2\t# KILL $tmp" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this, $src2);
    __ reduce_integral_v($dst$$Register, $src1$$Register,
                         as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg),
                         this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2));
  %}
  ins_pipe(pipe_slow);
%}

// vector and reduction - predicated

instruct reduce_and_masked(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegMask_V0 v0, vReg tmp) %{
  predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE ||
            Matcher::vector_element_basic_type(n->in(2)) == T_SHORT ||
            Matcher::vector_element_basic_type(n->in(2)) == T_INT);
  match(Set dst (AndReductionV (Binary src1 src2) v0));
  effect(TEMP tmp);
  format %{ "reduce_and_masked $dst, $src1, $src2, $v0\t# KILL $tmp" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this, $src2);
    __ reduce_integral_v($dst$$Register, $src1$$Register,
                         as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg),
                         this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2),
                         Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct reduce_andL_masked(iRegLNoSp dst, iRegL src1, vReg src2, vRegMask_V0 v0, vReg tmp) %{
  predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG);
  match(Set dst (AndReductionV (Binary src1 src2) v0));
  effect(TEMP tmp);
  format %{ "reduce_andL_masked $dst, $src1, $src2, $v0\t# KILL $tmp" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this, $src2);
    __ reduce_integral_v($dst$$Register, $src1$$Register,
                         as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg),
                         this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2),
                         Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// vector or reduction

instruct reduce_or(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
  predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE ||
            Matcher::vector_element_basic_type(n->in(2)) == T_SHORT ||
            Matcher::vector_element_basic_type(n->in(2)) == T_INT);
  match(Set dst (OrReductionV src1 src2));
  effect(TEMP tmp);
  format %{ "reduce_or $dst, $src1, $src2\t# KILL $tmp" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this, $src2);
    __ reduce_integral_v($dst$$Register, $src1$$Register,
                         as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg),
                         this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2));
  %}
  ins_pipe(pipe_slow);
%}

instruct reduce_orL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{
  predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG);
  match(Set dst (OrReductionV src1 src2));
  effect(TEMP tmp);
  format %{ "reduce_orL $dst, $src1, $src2\t# KILL $tmp" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this, $src2);
    __ reduce_integral_v($dst$$Register, $src1$$Register,
                         as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg),
                         this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2));
  %}
  ins_pipe(pipe_slow);
%}

// vector or reduction - predicated

instruct reduce_or_masked(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegMask_V0 v0, vReg tmp) %{
  predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE ||
            Matcher::vector_element_basic_type(n->in(2)) == T_SHORT ||
            Matcher::vector_element_basic_type(n->in(2)) == T_INT);
  match(Set dst (OrReductionV (Binary src1 src2) v0));
  effect(TEMP tmp);
  format %{ "reduce_or_masked $dst, $src1, $src2, $v0\t# KILL $tmp" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this, $src2);
    __ reduce_integral_v($dst$$Register, $src1$$Register,
                         as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg),
                         this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2),
                         Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct reduce_orL_masked(iRegLNoSp dst, iRegL src1, vReg src2, vRegMask_V0 v0, vReg tmp) %{
  predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG);
  match(Set dst (OrReductionV (Binary src1 src2) v0));
  effect(TEMP tmp);
  format %{ "reduce_orL_masked $dst, $src1, $src2, $v0\t# KILL $tmp" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this, $src2);
    __ reduce_integral_v($dst$$Register, $src1$$Register,
                         as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg),
                         this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2),
                         Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// vector xor reduction

instruct reduce_xor(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
  predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE ||
            Matcher::vector_element_basic_type(n->in(2)) == T_SHORT ||
            Matcher::vector_element_basic_type(n->in(2)) == T_INT);
  match(Set dst (XorReductionV src1 src2));
  effect(TEMP tmp);
  format %{ "reduce_xor $dst, $src1, $src2\t# KILL $tmp" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this, $src2);
    __ reduce_integral_v($dst$$Register, $src1$$Register,
                         as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg),
                         this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2));
  %}
  ins_pipe(pipe_slow);
%}

instruct reduce_xorL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{
  predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG);
  match(Set dst (XorReductionV src1 src2));
  effect(TEMP tmp);
  format %{ "reduce_xorL $dst, $src1, $src2\t# KILL $tmp" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this, $src2);
    __ reduce_integral_v($dst$$Register, $src1$$Register,
                         as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg),
                         this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2));
  %}
  ins_pipe(pipe_slow);
%}

// vector xor reduction - predicated

instruct reduce_xor_masked(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegMask_V0 v0, vReg tmp) %{
  predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE ||
            Matcher::vector_element_basic_type(n->in(2)) == T_SHORT ||
            Matcher::vector_element_basic_type(n->in(2)) == T_INT);
  match(Set dst (XorReductionV (Binary src1 src2) v0));
  effect(TEMP tmp);
  format %{ "reduce_xor_masked $dst, $src1, $src2, $v0\t# KILL $tmp" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this, $src2);
    __ reduce_integral_v($dst$$Register, $src1$$Register,
                         as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg),
                         this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2),
                         Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct reduce_xorL_masked(iRegLNoSp dst, iRegL src1, vReg src2, vRegMask_V0 v0, vReg tmp) %{
  predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG);
  match(Set dst (XorReductionV (Binary src1 src2) v0));
  effect(TEMP tmp);
  format %{ "reduce_xorL_masked $dst, $src1, $src2, $v0\t# KILL $tmp" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this, $src2);
    __ reduce_integral_v($dst$$Register, $src1$$Register,
                         as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg),
                         this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2),
                         Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// vector add reduction

instruct reduce_add(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
  predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE ||
            Matcher::vector_element_basic_type(n->in(2)) == T_SHORT ||
            Matcher::vector_element_basic_type(n->in(2)) == T_INT);
  match(Set dst (AddReductionVI src1 src2));
  effect(TEMP tmp);
  format %{ "reduce_add $dst, $src1, $src2\t# KILL $tmp" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this, $src2);
    __ reduce_integral_v($dst$$Register, $src1$$Register,
                         as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg),
                         this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2));
  %}
  ins_pipe(pipe_slow);
%}

instruct reduce_addL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{
  predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG);
  match(Set dst (AddReductionVL src1 src2));
  effect(TEMP tmp);
  format %{ "reduce_addL $dst, $src1, $src2\t# KILL $tmp" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this, $src2);
    __ reduce_integral_v($dst$$Register, $src1$$Register,
                         as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg),
                         this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2));
  %}
  ins_pipe(pipe_slow);
%}

// Distinguish two cases based on requires_strict_order
// 1. Non strictly-ordered AddReductionVF/D. For example, AddReductionVF/D
//    generated by Vector API. It is more beneficial performance-wise to do
//    an unordered FP reduction sum (vfredusum.vs).
// 2. Strictly-ordered AddReductionVF/D. For example, AddReductionVF/D
//    generated by auto-vectorization. Must do an ordered FP reduction sum
//    (vfredosum.vs).

instruct reduce_addF_ordered(fRegF dst, fRegF src1, vReg src2, vReg tmp) %{
  predicate(n->as_Reduction()->requires_strict_order());
  match(Set dst (AddReductionVF src1 src2));
  effect(TEMP tmp);
  format %{ "reduce_addF_ordered $dst, $src1, $src2\t# KILL $tmp" %}
  ins_encode %{
    __ vsetvli_helper(T_FLOAT, Matcher::vector_length(this, $src2));
    __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1$$FloatRegister);
    __ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
                    as_VectorRegister($tmp$$reg));
    __ vfmv_f_s($dst$$FloatRegister, as_VectorRegister($tmp$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct reduce_addF_unordered(fRegF dst, fRegF src1, vReg src2, vReg tmp) %{
  predicate(!n->as_Reduction()->requires_strict_order());
  match(Set dst (AddReductionVF src1 src2));
  effect(TEMP tmp);
  format %{ "reduce_addF_unordered $dst, $src1, $src2\t# KILL $tmp" %}
  ins_encode %{
    __ vsetvli_helper(T_FLOAT, Matcher::vector_length(this, $src2));
    __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1$$FloatRegister);
    __ vfredusum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
                    as_VectorRegister($tmp$$reg));
    __ vfmv_f_s($dst$$FloatRegister, as_VectorRegister($tmp$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct reduce_addD_ordered(fRegD dst, fRegD src1, vReg src2, vReg tmp) %{
  predicate(n->as_Reduction()->requires_strict_order());
  match(Set dst (AddReductionVD src1 src2));
  effect(TEMP tmp);
  format %{ "reduce_addD_ordered $dst, $src1, $src2\t# KILL $tmp" %}
  ins_encode %{
    __ vsetvli_helper(T_DOUBLE, Matcher::vector_length(this, $src2));
    __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1$$FloatRegister);
    __ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
                    as_VectorRegister($tmp$$reg));
    __ vfmv_f_s($dst$$FloatRegister, as_VectorRegister($tmp$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct reduce_addD_unordered(fRegD dst, fRegD src1, vReg src2, vReg tmp) %{
  predicate(!n->as_Reduction()->requires_strict_order());
  match(Set dst (AddReductionVD src1 src2));
  effect(TEMP tmp);
  format %{ "reduce_addD_unordered $dst, $src1, $src2\t# KILL $tmp" %}
  ins_encode %{
    __ vsetvli_helper(T_DOUBLE, Matcher::vector_length(this, $src2));
    __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1$$FloatRegister);
    __ vfredusum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
                    as_VectorRegister($tmp$$reg));
    __ vfmv_f_s($dst$$FloatRegister, as_VectorRegister($tmp$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// vector add reduction - predicated

instruct reduce_add_masked(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegMask_V0 v0, vReg tmp) %{
  predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE ||
            Matcher::vector_element_basic_type(n->in(2)) == T_SHORT ||
            Matcher::vector_element_basic_type(n->in(2)) == T_INT);
  match(Set dst (AddReductionVI (Binary src1 src2) v0));
  effect(TEMP tmp);
  format %{ "reduce_add_masked $dst, $src1, $src2, $v0\t# KILL $tmp" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this, $src2);
    __ reduce_integral_v($dst$$Register, $src1$$Register,
                         as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg),
                         this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2),
                         Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct reduce_addL_masked(iRegLNoSp dst, iRegL src1, vReg src2, vRegMask_V0 v0, vReg tmp) %{
  predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG);
  match(Set dst (AddReductionVL (Binary src1 src2) v0));
  effect(TEMP tmp);
  format %{ "reduce_addL_masked $dst, $src1, $src2, $v0\t# KILL $tmp" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this, $src2);
    __ reduce_integral_v($dst$$Register, $src1$$Register,
                         as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg),
                         this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2),
                         Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct reduce_addF_masked(fRegF dst, fRegF src1, vReg src2, vRegMask_V0 v0, vReg tmp) %{
  match(Set dst (AddReductionVF (Binary src1 src2) v0));
  effect(TEMP tmp);
  format %{ "reduce_addF_masked $dst, $src1, $src2, $v0\t# KILL $tmp" %}
  ins_encode %{
    __ vsetvli_helper(T_FLOAT, Matcher::vector_length(this, $src2));
    __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1$$FloatRegister);
    __ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
                    as_VectorRegister($tmp$$reg), Assembler::v0_t);
    __ vfmv_f_s($dst$$FloatRegister, as_VectorRegister($tmp$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct reduce_addD_masked(fRegD dst, fRegD src1, vReg src2, vRegMask_V0 v0, vReg tmp) %{
  match(Set dst (AddReductionVD (Binary src1 src2) v0));
  effect(TEMP tmp);
  format %{ "reduce_addD_masked $dst, $src1, $src2, $v0\t# KILL $tmp" %}
  ins_encode %{
    __ vsetvli_helper(T_DOUBLE, Matcher::vector_length(this, $src2));
    __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1$$FloatRegister);
    __ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
                    as_VectorRegister($tmp$$reg), Assembler::v0_t);
    __ vfmv_f_s($dst$$FloatRegister, as_VectorRegister($tmp$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// vector integer max reduction

instruct vreduce_max(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
  predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE ||
            Matcher::vector_element_basic_type(n->in(2)) == T_SHORT ||
            Matcher::vector_element_basic_type(n->in(2)) == T_INT);
  match(Set dst (MaxReductionV src1 src2));
  effect(TEMP tmp);
  format %{ "vreduce_max $dst, $src1, $src2\t# KILL $tmp" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this, $src2);
    __ reduce_integral_v($dst$$Register, $src1$$Register,
                         as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg),
                         this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2));
  %}
  ins_pipe(pipe_slow);
%}

instruct vreduce_maxL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{
  predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG);
  match(Set dst (MaxReductionV src1 src2));
  effect(TEMP tmp);
  format %{ "vreduce_maxL $dst, $src1, $src2\t# KILL $tmp" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this, $src2);
    __ reduce_integral_v($dst$$Register, $src1$$Register,
                         as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg),
                         this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2));
  %}
  ins_pipe(pipe_slow);
%}

// vector integer max reduction - predicated

instruct vreduce_max_masked(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegMask_V0 v0, vReg tmp) %{
  predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE ||
            Matcher::vector_element_basic_type(n->in(2)) == T_SHORT ||
            Matcher::vector_element_basic_type(n->in(2)) == T_INT);
  match(Set dst (MaxReductionV (Binary src1 src2) v0));
  effect(TEMP tmp);
  format %{ "vreduce_max_masked $dst, $src1, $src2, $v0\t# KILL $tmp" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this, $src2);
    __ reduce_integral_v($dst$$Register, $src1$$Register,
                         as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg),
                         this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2),
                         Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vreduce_maxL_masked(iRegLNoSp dst, iRegL src1, vReg src2, vRegMask_V0 v0, vReg tmp) %{
  predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG);
  match(Set dst (MaxReductionV (Binary src1 src2) v0));
  effect(TEMP tmp);
  format %{ "vreduce_maxL_masked $dst, $src1, $src2, $v0\t# KILL $tmp" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this, $src2);
    __ reduce_integral_v($dst$$Register, $src1$$Register,
                         as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg),
                         this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2),
                         Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// vector integer min reduction

instruct vreduce_min(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
  predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE ||
            Matcher::vector_element_basic_type(n->in(2)) == T_SHORT ||
            Matcher::vector_element_basic_type(n->in(2)) == T_INT);
  match(Set dst (MinReductionV src1 src2));
  effect(TEMP tmp);
  format %{ "vreduce_min $dst, $src1, $src2\t# KILL $tmp" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this, $src2);
    __ reduce_integral_v($dst$$Register, $src1$$Register,
                         as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg),
                         this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2));
  %}
  ins_pipe(pipe_slow);
%}

instruct vreduce_minL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{
  predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG);
  match(Set dst (MinReductionV src1 src2));
  effect(TEMP tmp);
  format %{ "vreduce_minL $dst, $src1, $src2\t# KILL $tmp" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this, $src2);
    __ reduce_integral_v($dst$$Register, $src1$$Register,
                         as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg),
                         this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2));
  %}
  ins_pipe(pipe_slow);
%}

// vector integer min reduction - predicated

instruct vreduce_min_masked(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegMask_V0 v0, vReg tmp) %{
  predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE ||
            Matcher::vector_element_basic_type(n->in(2)) == T_SHORT ||
            Matcher::vector_element_basic_type(n->in(2)) == T_INT);
  match(Set dst (MinReductionV (Binary src1 src2) v0));
  effect(TEMP tmp);
  format %{ "vreduce_min_masked $dst, $src1, $src2, $v0\t# KILL $tmp" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this, $src2);
    __ reduce_integral_v($dst$$Register, $src1$$Register,
                         as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg),
                         this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2),
                         Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vreduce_minL_masked(iRegLNoSp dst, iRegL src1, vReg src2, vRegMask_V0 v0, vReg tmp) %{
  predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG);
  match(Set dst (MinReductionV (Binary src1 src2) v0));
  effect(TEMP tmp);
  format %{ "vreduce_minL_masked $dst, $src1, $src2, $v0\t# KILL $tmp" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this, $src2);
    __ reduce_integral_v($dst$$Register, $src1$$Register,
                         as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg),
                         this->ideal_Opcode(), bt, Matcher::vector_length(this, $src2),
                         Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// vector float max reduction

instruct vreduce_maxF(fRegF dst, fRegF src1, vReg src2, vReg tmp1, vReg tmp2) %{
  predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT);
  match(Set dst (MaxReductionV src1 src2));
  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
  format %{ "vreduce_maxF $dst, $src1, $src2, $tmp1, $tmp2" %}
  ins_encode %{
    __ reduce_minmax_fp_v($dst$$FloatRegister,
                          $src1$$FloatRegister, as_VectorRegister($src2$$reg),
                          as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
                          false /* is_double */, false /* is_min */, Matcher::vector_length(this, $src2));
  %}
  ins_pipe(pipe_slow);
%}

instruct vreduce_maxD(fRegD dst, fRegD src1, vReg src2, vReg tmp1, vReg tmp2) %{
  predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE);
  match(Set dst (MaxReductionV src1 src2));
  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
  format %{ "vreduce_maxD $dst, $src1, $src2, $tmp1, $tmp2" %}
  ins_encode %{
    __ reduce_minmax_fp_v($dst$$FloatRegister,
                          $src1$$FloatRegister, as_VectorRegister($src2$$reg),
                          as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
                          true /* is_double */, false /* is_min */, Matcher::vector_length(this, $src2));
  %}
  ins_pipe(pipe_slow);
%}

// vector float max reduction - predicated

instruct vreduce_maxF_masked(fRegF dst, fRegF src1, vReg src2, vRegMask_V0 v0, vReg tmp1, vReg tmp2) %{
  predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT);
  match(Set dst (MaxReductionV (Binary src1 src2) v0));
  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
  format %{ "vreduce_maxF_masked $dst, $src1, $src2, $v0\t# KILL $tmp1, $tmp2" %}
  ins_encode %{
    __ reduce_minmax_fp_v($dst$$FloatRegister,
                          $src1$$FloatRegister, as_VectorRegister($src2$$reg),
                          as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
                          false /* is_double */, false /* is_min */,
                          Matcher::vector_length(this, $src2), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vreduce_maxD_masked(fRegD dst, fRegD src1, vReg src2, vRegMask_V0 v0, vReg tmp1, vReg tmp2) %{
  predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE);
  match(Set dst (MaxReductionV (Binary src1 src2) v0));
  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
  format %{ "vreduce_maxD_masked $dst, $src1, $src2, $v0\t# KILL $tmp1, $tmp2" %}
  ins_encode %{
    __ reduce_minmax_fp_v($dst$$FloatRegister,
                          $src1$$FloatRegister, as_VectorRegister($src2$$reg),
                          as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
                          true /* is_double */, false /* is_min */,
                          Matcher::vector_length(this, $src2), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// vector float min reduction

instruct vreduce_minF(fRegF dst, fRegF src1, vReg src2, vReg tmp1, vReg tmp2) %{
  predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT);
  match(Set dst (MinReductionV src1 src2));
  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
  format %{ "vreduce_minF $dst, $src1, $src2, $tmp1, $tmp2" %}
  ins_encode %{
    __ reduce_minmax_fp_v($dst$$FloatRegister,
                          $src1$$FloatRegister, as_VectorRegister($src2$$reg),
                          as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
                          false /* is_double */, true /* is_min */, Matcher::vector_length(this, $src2));
  %}
  ins_pipe(pipe_slow);
%}

instruct vreduce_minD(fRegD dst, fRegD src1, vReg src2, vReg tmp1, vReg tmp2) %{
  predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE);
  match(Set dst (MinReductionV src1 src2));
  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
  format %{ "vreduce_minD $dst, $src1, $src2, $tmp1, $tmp2" %}
  ins_encode %{
    __ reduce_minmax_fp_v($dst$$FloatRegister,
                          $src1$$FloatRegister, as_VectorRegister($src2$$reg),
                          as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
                          true /* is_double */, true /* is_min */, Matcher::vector_length(this, $src2));
  %}
  ins_pipe(pipe_slow);
%}

// vector float min reduction - predicated

instruct vreduce_minF_masked(fRegF dst, fRegF src1, vReg src2, vRegMask_V0 v0, vReg tmp1, vReg tmp2) %{
  predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT);
  match(Set dst (MinReductionV (Binary src1 src2) v0));
  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
  format %{ "vreduce_minF_masked $dst, $src1, $src2, $v0\t# KILL $tmp1, $tmp2" %}
  ins_encode %{
    __ reduce_minmax_fp_v($dst$$FloatRegister,
                          $src1$$FloatRegister, as_VectorRegister($src2$$reg),
                          as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
                          false /* is_double */, true /* is_min */,
                          Matcher::vector_length(this, $src2), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vreduce_minD_masked(fRegD dst, fRegD src1, vReg src2, vRegMask_V0 v0, vReg tmp1, vReg tmp2) %{
  predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE);
  match(Set dst (MinReductionV (Binary src1 src2) v0));
  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
  format %{ "vreduce_minD_masked $dst, $src1, $src2, $v0\t# KILL $tmp1, $tmp2" %}
  ins_encode %{
    __ reduce_minmax_fp_v($dst$$FloatRegister,
                          $src1$$FloatRegister, as_VectorRegister($src2$$reg),
                          as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
                          true /* is_double */, true /* is_min */,
                          Matcher::vector_length(this, $src2), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}


// ------------------------------ Vector reduction mul -------------------------

instruct reduce_mulI(iRegINoSp dst, iRegIorL2I isrc, vReg vsrc,
                     vReg tmp1, vReg tmp2) %{
  match(Set dst (MulReductionVI isrc vsrc));
  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
  format %{ "reduce_mulI $dst, $isrc, $vsrc\t" %}

  ins_encode %{
    __ reduce_mul_integral_v($dst$$Register, $isrc$$Register, as_VectorRegister($vsrc$$reg),
                             as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
                             Matcher::vector_element_basic_type(this, $vsrc), Matcher::vector_length(this, $vsrc));
  %}
  ins_pipe(pipe_slow);
%}

instruct reduce_mulI_masked(iRegINoSp dst, iRegIorL2I isrc, vReg vsrc,
                            vRegMask_V0 v0, vReg tmp1, vReg tmp2) %{
  match(Set dst (MulReductionVI (Binary isrc vsrc) v0));
  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
  format %{ "reduce_mulI_masked $dst, $isrc, $vsrc, $v0\t" %}

  ins_encode %{
    __ reduce_mul_integral_v($dst$$Register, $isrc$$Register, as_VectorRegister($vsrc$$reg),
                             as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
                             Matcher::vector_element_basic_type(this, $vsrc), Matcher::vector_length(this, $vsrc),
                             Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct reduce_mulL(iRegLNoSp dst, iRegL isrc, vReg vsrc,
                     vReg tmp1, vReg tmp2) %{
  match(Set dst (MulReductionVL isrc vsrc));
  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
  format %{ "reduce_mulL $dst, $isrc, $vsrc\t" %}

  ins_encode %{
    __ reduce_mul_integral_v($dst$$Register, $isrc$$Register, as_VectorRegister($vsrc$$reg),
                             as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
                             Matcher::vector_element_basic_type(this, $vsrc), Matcher::vector_length(this, $vsrc));
  %}
  ins_pipe(pipe_slow);
%}

instruct reduce_mulL_masked(iRegLNoSp dst, iRegL isrc, vReg vsrc,
                            vRegMask_V0 v0, vReg tmp1, vReg tmp2) %{
  match(Set dst (MulReductionVL (Binary isrc vsrc) v0));
  effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
  format %{ "reduce_mulL_masked $dst, $isrc, $vsrc, $v0\t" %}

  ins_encode %{
    __ reduce_mul_integral_v($dst$$Register, $isrc$$Register, as_VectorRegister($vsrc$$reg),
                             as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
                             Matcher::vector_element_basic_type(this, $vsrc), Matcher::vector_length(this, $vsrc),
                             Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// vector replicate

instruct replicate(vReg dst, iRegIorL2I src) %{
  predicate(Matcher::is_non_long_integral_vector(n));
  match(Set dst (Replicate src));
  format %{ "replicate $dst, $src" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct replicateL(vReg dst, iRegL src) %{
  predicate(Matcher::vector_element_basic_type(n) == T_LONG);
  match(Set dst (Replicate src));
  format %{ "replicateL $dst, $src" %}
  ins_encode %{
    __ vsetvli_helper(T_LONG, Matcher::vector_length(this));
    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct replicate_imm5(vReg dst, immI5 con) %{
  predicate(Matcher::is_non_long_integral_vector(n));
  match(Set dst (Replicate con));
  format %{ "replicate_imm5 $dst, $con" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length_in_bytes(this));
    __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant);
  %}
  ins_pipe(pipe_slow);
%}

instruct replicateL_imm5(vReg dst, immL5 con) %{
  predicate(Matcher::vector_element_basic_type(n) == T_LONG);
  match(Set dst (Replicate con));
  format %{ "replicateL_imm5 $dst, $con" %}
  ins_encode %{
    __ vsetvli_helper(T_LONG, Matcher::vector_length(this));
    __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant);
  %}
  ins_pipe(pipe_slow);
%}

instruct replicateHF(vReg dst, fRegF src) %{
  predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
  match(Set dst (Replicate src));
  format %{ "replicateHF $dst, $src" %}
  ins_encode %{
    assert(UseZvfh, "must");
    __ vsetvli_helper(T_SHORT, Matcher::vector_length(this));
    __ vfmv_v_f(as_VectorRegister($dst$$reg), $src$$FloatRegister);
  %}
  ins_pipe(pipe_slow);
%}

instruct replicateF(vReg dst, fRegF src) %{
  predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
  match(Set dst (Replicate src));
  format %{ "replicateF $dst, $src" %}
  ins_encode %{
    __ vsetvli_helper(T_FLOAT, Matcher::vector_length(this));
    __ vfmv_v_f(as_VectorRegister($dst$$reg), $src$$FloatRegister);
  %}
  ins_pipe(pipe_slow);
%}

instruct replicateD(vReg dst, fRegD src) %{
  predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
  match(Set dst (Replicate src));
  format %{ "replicateD $dst, $src" %}
  ins_encode %{
    __ vsetvli_helper(T_DOUBLE, Matcher::vector_length(this));
    __ vfmv_v_f(as_VectorRegister($dst$$reg), $src$$FloatRegister);
  %}
  ins_pipe(pipe_slow);
%}

// vector shift
//
// Following shift instruct's are shared by vectorization (in SLP, superword.cpp) and Vector API.
//
// Shift behaviour in vectorization is defined by java language spec, which includes:
//  1. "If the promoted type of the left-hand operand is int, then only the five lowest-order bits of
//      the right-hand operand are used as the shift distance. It is as if the right-hand operand were
//      subjected to a bitwise logical AND operator & (§15.22.1) with the mask value 0x1f (0b11111).
//      The shift distance actually used is therefore always in the range 0 to 31, inclusive."
//  2. similarly, for long "with the mask value 0x3f (0b111111)"
// check https://docs.oracle.com/javase/specs/jls/se21/html/jls-15.html#jls-15.19 for details.
//
// Shift behaviour in Vector API is defined as:
//   e.g. for ASHR, "a>>(n&(ESIZE*8-1))"
//   this behaviour is the same as shift instrunction's in riscv vector extension.
// check https://docs.oracle.com/en/java/javase/21/docs/api/jdk.incubator.vector/jdk/incubator/vector/VectorOperators.html#ASHR
// and https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#116-vector-single-width-shift-instructions for details.
//
// Despite the difference between these 2 behaviours, the same shift instruct's of byte and short are
// still shared between vectorization and Vector API. The way it works is hidden inside the implementation
// of vectorization and Vector API:
//  1. when doing optimization vectorization masks the shift value with "(BitsPerInt - 1)" or "(BitsPerLong - 1)"
//  2. in Vector API, shift value is masked with SHIFT_MASK (e.g. for ByteVector it's "Byte.SIZE - 1")
//
// If not because of this pre-processing of shift value respectively in vectorization and Vector API, then
// e.g. for a byte shift value 16, the intrinsic behaviour will be different, and they can not share the same
// instruct here, as vectorization requires x >> 16, but Vector API requires x >> (16 & 7).

instruct vasrB(vReg dst, vReg src, vReg shift, vRegMask_V0 v0) %{
  match(Set dst (RShiftVB src shift));
  effect(TEMP_DEF dst, TEMP v0);
  format %{ "vasrB $dst, $src, $shift" %}
  ins_encode %{
    __ vsetvli_helper(T_BYTE, Matcher::vector_length(this));
    // if shift > BitsPerByte - 1, clear the low BitsPerByte - 1 bits
    __ vmsgtu_vi(as_VectorRegister($v0$$reg), as_VectorRegister($shift$$reg), BitsPerByte - 1);
    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
               BitsPerByte - 1, Assembler::v0_t);
    // otherwise, shift
    __ vmnot_m(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg));
    __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
               as_VectorRegister($shift$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vasrS(vReg dst, vReg src, vReg shift, vRegMask_V0 v0) %{
  match(Set dst (RShiftVS src shift));
  effect(TEMP_DEF dst, TEMP v0);
  format %{ "vasrS $dst, $src, $shift" %}
  ins_encode %{
    __ vsetvli_helper(T_SHORT, Matcher::vector_length(this));
    // if shift > BitsPerShort - 1, clear the low BitsPerShort - 1 bits
    __ vmsgtu_vi(as_VectorRegister($v0$$reg), as_VectorRegister($shift$$reg), BitsPerShort - 1);
    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
               BitsPerShort - 1, Assembler::v0_t);
    // otherwise, shift
    __ vmnot_m(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg));
    __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
               as_VectorRegister($shift$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vasrI(vReg dst, vReg src, vReg shift) %{
  match(Set dst (RShiftVI src shift));
  format %{ "vasrI $dst, $src, $shift" %}
  ins_encode %{
    __ vsetvli_helper(T_INT, Matcher::vector_length(this));
    __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
               as_VectorRegister($shift$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct vasrL(vReg dst, vReg src, vReg shift) %{
  match(Set dst (RShiftVL src shift));
  format %{ "vasrL $dst, $src, $shift" %}
  ins_encode %{
    __ vsetvli_helper(T_LONG, Matcher::vector_length(this));
    __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
               as_VectorRegister($shift$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct vasrB_masked(vReg dst_src, vReg shift, vRegMask vmask, vRegMask_V0 v0) %{
  match(Set dst_src (RShiftVB (Binary dst_src shift) vmask));
  effect(TEMP_DEF dst_src, TEMP v0);
  format %{ "vasrB_masked $dst_src, $dst_src, $shift, $vmask\t# KILL $v0" %}
  ins_encode %{
    __ vsetvli_helper(T_BYTE, Matcher::vector_length(this));
    __ vmsgtu_vi(as_VectorRegister($v0$$reg), as_VectorRegister($shift$$reg), BitsPerByte - 1);
    // if shift > BitsPerByte - 1, clear the low BitsPerByte - 1 bits
    __ vmerge_vim(as_VectorRegister($shift$$reg), as_VectorRegister($shift$$reg), BitsPerByte - 1);
    // otherwise, shift
    __ vmv1r_v(as_VectorRegister($v0$$reg), as_VectorRegister($vmask$$reg));
    __ vsra_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
               as_VectorRegister($shift$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vasrS_masked(vReg dst_src, vReg shift, vRegMask vmask, vRegMask_V0 v0) %{
  match(Set dst_src (RShiftVS (Binary dst_src shift) vmask));
  effect(TEMP_DEF dst_src, TEMP v0);
  format %{ "vasrS_masked $dst_src, $dst_src, $shift, $vmask\t# KILL $v0" %}
  ins_encode %{
    __ vsetvli_helper(T_SHORT, Matcher::vector_length(this));
    __ vmsgtu_vi(as_VectorRegister($v0$$reg), as_VectorRegister($shift$$reg), BitsPerShort - 1);
    // if shift > BitsPerShort - 1, clear the low BitsPerShort - 1 bits
    __ vmerge_vim(as_VectorRegister($shift$$reg), as_VectorRegister($shift$$reg), BitsPerShort - 1);
    // otherwise, shift
    __ vmv1r_v(as_VectorRegister($v0$$reg), as_VectorRegister($vmask$$reg));
    __ vsra_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
               as_VectorRegister($shift$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vasrI_masked(vReg dst_src, vReg shift, vRegMask_V0 v0) %{
  match(Set dst_src (RShiftVI (Binary dst_src shift) v0));
  effect(TEMP_DEF dst_src);
  format %{ "vasrI_masked $dst_src, $dst_src, $shift, $v0" %}
  ins_encode %{
    __ vsetvli_helper(T_INT, Matcher::vector_length(this));
    __ vsra_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
               as_VectorRegister($shift$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vasrL_masked(vReg dst_src, vReg shift, vRegMask_V0 v0) %{
  match(Set dst_src (RShiftVL (Binary dst_src shift) v0));
  effect(TEMP_DEF dst_src);
  format %{ "vasrL_masked $dst_src, $dst_src, $shift, $v0" %}
  ins_encode %{
    __ vsetvli_helper(T_LONG, Matcher::vector_length(this));
    __ vsra_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
               as_VectorRegister($shift$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vlslB(vReg dst, vReg src, vReg shift, vRegMask_V0 v0) %{
  match(Set dst (LShiftVB src shift));
  effect(TEMP_DEF dst, TEMP v0);
  format %{ "vlslB $dst, $src, $shift" %}
  ins_encode %{
    __ vsetvli_helper(T_BYTE, Matcher::vector_length(this));
    // if shift > BitsPerByte - 1, clear the element
    __ vmsgtu_vi(as_VectorRegister($v0$$reg), as_VectorRegister($shift$$reg), BitsPerByte - 1);
    __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
               as_VectorRegister($src$$reg), Assembler::v0_t);
    // otherwise, shift
    __ vmnot_m(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg));
    __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
               as_VectorRegister($shift$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vlslS(vReg dst, vReg src, vReg shift, vRegMask_V0 v0) %{
  match(Set dst (LShiftVS src shift));
  effect(TEMP_DEF dst, TEMP v0);
  format %{ "vlslS $dst, $src, $shift" %}
  ins_encode %{
    __ vsetvli_helper(T_SHORT, Matcher::vector_length(this));
    // if shift > BitsPerShort - 1, clear the element
    __ vmsgtu_vi(as_VectorRegister($v0$$reg), as_VectorRegister($shift$$reg), BitsPerShort - 1);
    __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
               as_VectorRegister($src$$reg), Assembler::v0_t);
    // otherwise, shift
    __ vmnot_m(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg));
    __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
               as_VectorRegister($shift$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vlslI(vReg dst, vReg src, vReg shift) %{
  match(Set dst (LShiftVI src shift));
  format %{ "vlslI $dst, $src, $shift" %}
  ins_encode %{
    __ vsetvli_helper(T_INT, Matcher::vector_length(this));
    __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
               as_VectorRegister($shift$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct vlslL(vReg dst, vReg src, vReg shift) %{
  match(Set dst (LShiftVL src shift));
  format %{ "vlslL $dst, $src, $shift" %}
  ins_encode %{
    __ vsetvli_helper(T_LONG, Matcher::vector_length(this));
    __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
               as_VectorRegister($shift$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct vlslB_masked(vReg dst_src, vReg shift, vRegMask vmask, vRegMask_V0 v0) %{
  match(Set dst_src (LShiftVB (Binary dst_src shift) vmask));
  effect(TEMP_DEF dst_src, TEMP v0);
  format %{ "vlslB_masked $dst_src, $dst_src, $shift, $vmask\t# KILL $v0" %}
  ins_encode %{
    __ vsetvli_helper(T_BYTE, Matcher::vector_length(this));
    // if shift > BitsPerByte - 1, clear the element
    __ vmsgtu_vi(as_VectorRegister($v0$$reg), as_VectorRegister($shift$$reg), BitsPerByte - 1);
    __ vmand_mm(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg),
                as_VectorRegister($vmask$$reg));
    __ vxor_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
               as_VectorRegister($dst_src$$reg), Assembler::v0_t);
    // otherwise, shift
    __ vmv1r_v(as_VectorRegister($v0$$reg), as_VectorRegister($vmask$$reg));
    __ vsll_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
               as_VectorRegister($shift$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vlslS_masked(vReg dst_src, vReg shift, vRegMask vmask, vRegMask_V0 v0) %{
  match(Set dst_src (LShiftVS (Binary dst_src shift) vmask));
  effect(TEMP_DEF dst_src, TEMP v0);
  format %{ "vlslS_masked $dst_src, $dst_src, $shift, $vmask\t# KILL $v0" %}
  ins_encode %{
    __ vsetvli_helper(T_SHORT, Matcher::vector_length(this));
    // if shift > BitsPerShort - 1, clear the element
    __ vmsgtu_vi(as_VectorRegister($v0$$reg), as_VectorRegister($shift$$reg), BitsPerShort - 1);
    __ vmand_mm(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg),
                as_VectorRegister($vmask$$reg));
    __ vxor_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
               as_VectorRegister($dst_src$$reg), Assembler::v0_t);
    // otherwise, shift
    __ vmv1r_v(as_VectorRegister($v0$$reg), as_VectorRegister($vmask$$reg));
    __ vsll_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
               as_VectorRegister($shift$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vlslI_masked(vReg dst_src, vReg shift, vRegMask_V0 v0) %{
  match(Set dst_src (LShiftVI (Binary dst_src shift) v0));
  effect(TEMP_DEF dst_src);
  format %{ "vlslI_masked $dst_src, $dst_src, $shift, $v0" %}
  ins_encode %{
    __ vsetvli_helper(T_INT, Matcher::vector_length(this));
    __ vsll_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
               as_VectorRegister($shift$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vlslL_masked(vReg dst_src, vReg shift, vRegMask_V0 v0) %{
  match(Set dst_src (LShiftVL (Binary dst_src shift) v0));
  effect(TEMP_DEF dst_src);
  format %{ "vlslL_masked $dst_src, $dst_src, $shift, $v0" %}
  ins_encode %{
    __ vsetvli_helper(T_LONG, Matcher::vector_length(this));
    __ vsll_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
               as_VectorRegister($shift$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vlsrB(vReg dst, vReg src, vReg shift, vRegMask_V0 v0) %{
  match(Set dst (URShiftVB src shift));
  effect(TEMP_DEF dst, TEMP v0);
  format %{ "vlsrB $dst, $src, $shift" %}
  ins_encode %{
    __ vsetvli_helper(T_BYTE, Matcher::vector_length(this));
    // if shift > BitsPerByte - 1, clear the element
    __ vmsgtu_vi(as_VectorRegister($v0$$reg), as_VectorRegister($shift$$reg), BitsPerByte - 1);
    __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
               as_VectorRegister($src$$reg), Assembler::v0_t);
    // otherwise, shift
    __ vmnot_m(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg));
    __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
               as_VectorRegister($shift$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vlsrS(vReg dst, vReg src, vReg shift, vRegMask_V0 v0) %{
  match(Set dst (URShiftVS src shift));
  effect(TEMP_DEF dst, TEMP v0);
  format %{ "vlsrS $dst, $src, $shift" %}
  ins_encode %{
    __ vsetvli_helper(T_SHORT, Matcher::vector_length(this));
    // if shift > BitsPerShort - 1, clear the element
    __ vmsgtu_vi(as_VectorRegister($v0$$reg), as_VectorRegister($shift$$reg), BitsPerShort - 1);
    __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
               as_VectorRegister($src$$reg), Assembler::v0_t);
    // otherwise, shift
    __ vmnot_m(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg));
    __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
               as_VectorRegister($shift$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vlsrI(vReg dst, vReg src, vReg shift) %{
  match(Set dst (URShiftVI src shift));
  format %{ "vlsrI $dst, $src, $shift" %}
  ins_encode %{
    __ vsetvli_helper(T_INT, Matcher::vector_length(this));
    __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
               as_VectorRegister($shift$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct vlsrL(vReg dst, vReg src, vReg shift) %{
  match(Set dst (URShiftVL src shift));
  format %{ "vlsrL $dst, $src, $shift" %}
  ins_encode %{
    __ vsetvli_helper(T_LONG, Matcher::vector_length(this));
    __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
               as_VectorRegister($shift$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct vlsrB_masked(vReg dst_src, vReg shift, vRegMask vmask, vRegMask_V0 v0) %{
  match(Set dst_src (URShiftVB (Binary dst_src shift) vmask));
  effect(TEMP_DEF dst_src, TEMP v0);
  format %{ "vlsrB_masked $dst_src, $dst_src, $shift, $vmask\t# KILL $v0" %}
  ins_encode %{
    __ vsetvli_helper(T_BYTE, Matcher::vector_length(this));
    // if shift > BitsPerByte - 1, clear the element
    __ vmsgtu_vi(as_VectorRegister($v0$$reg), as_VectorRegister($shift$$reg), BitsPerByte - 1);
    __ vmand_mm(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg),
                as_VectorRegister($vmask$$reg));
    __ vxor_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
               as_VectorRegister($dst_src$$reg), Assembler::v0_t);
    // otherwise, shift
    __ vmv1r_v(as_VectorRegister($v0$$reg), as_VectorRegister($vmask$$reg));
    __ vsrl_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
               as_VectorRegister($shift$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vlsrS_masked(vReg dst_src, vReg shift, vRegMask vmask, vRegMask_V0 v0) %{
  match(Set dst_src (URShiftVS (Binary dst_src shift) vmask));
  effect(TEMP_DEF dst_src, TEMP v0);
  format %{ "vlsrS_masked $dst_src, $dst_src, $shift, $vmask\t# KILL $v0" %}
  ins_encode %{
    __ vsetvli_helper(T_SHORT, Matcher::vector_length(this));
    // if shift > BitsPerShort - 1, clear the element
    __ vmsgtu_vi(as_VectorRegister($v0$$reg), as_VectorRegister($shift$$reg), BitsPerShort - 1);
    __ vmand_mm(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg),
                as_VectorRegister($vmask$$reg));
    __ vxor_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
               as_VectorRegister($dst_src$$reg), Assembler::v0_t);
    // otherwise, shift
    __ vmv1r_v(as_VectorRegister($v0$$reg), as_VectorRegister($vmask$$reg));
    __ vsrl_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
               as_VectorRegister($shift$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vlsrI_masked(vReg dst_src, vReg shift, vRegMask_V0 v0) %{
  match(Set dst_src (URShiftVI (Binary dst_src shift) v0));
  effect(TEMP_DEF dst_src);
  format %{ "vlsrI_masked $dst_src, $dst_src, $shift, $v0" %}
  ins_encode %{
    __ vsetvli_helper(T_INT, Matcher::vector_length(this));
    __ vsrl_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
               as_VectorRegister($shift$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vlsrL_masked(vReg dst_src, vReg shift, vRegMask_V0 v0) %{
  match(Set dst_src (URShiftVL (Binary dst_src shift) v0));
  effect(TEMP_DEF dst_src);
  format %{ "vlsrL_masked $dst_src, $dst_src, $shift, $v0" %}
  ins_encode %{
    __ vsetvli_helper(T_LONG, Matcher::vector_length(this));
    __ vsrl_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
               as_VectorRegister($shift$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vasrB_vi(vReg dst, vReg src, immI shift) %{
  match(Set dst (RShiftVB src (RShiftCntV shift)));
  format %{ "vasrB_vi $dst, $src, $shift" %}
  ins_encode %{
    uint32_t con = (unsigned)$shift$$constant & 0x1f;
    __ vsetvli_helper(T_BYTE, Matcher::vector_length(this));
    if (con == 0) {
      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
                as_VectorRegister($src$$reg));
      return;
    }
    if (con >= BitsPerByte) con = BitsPerByte - 1;
    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
  %}
  ins_pipe(pipe_slow);
%}

instruct vasrS_vi(vReg dst, vReg src, immI shift) %{
  match(Set dst (RShiftVS src (RShiftCntV shift)));
  format %{ "vasrS_vi $dst, $src, $shift" %}
  ins_encode %{
    uint32_t con = (unsigned)$shift$$constant & 0x1f;
    __ vsetvli_helper(T_SHORT, Matcher::vector_length(this));
    if (con == 0) {
      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
                as_VectorRegister($src$$reg));
      return;
    }
    if (con >= BitsPerShort) con = BitsPerShort - 1;
    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
  %}
  ins_pipe(pipe_slow);
%}

instruct vasrI_vi(vReg dst, vReg src, immI shift) %{
  match(Set dst (RShiftVI src (RShiftCntV shift)));
  format %{ "vasrI_vi $dst, $src, $shift" %}
  ins_encode %{
    uint32_t con = (unsigned)$shift$$constant & 0x1f;
    __ vsetvli_helper(T_INT, Matcher::vector_length(this));
    if (con == 0) {
      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
                as_VectorRegister($src$$reg));
      return;
    }
    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
  %}
  ins_pipe(pipe_slow);
%}

instruct vasrL_vi(vReg dst, vReg src, immI shift) %{
  predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32);
  match(Set dst (RShiftVL src (RShiftCntV shift)));
  format %{ "vasrL_vi $dst, $src, $shift" %}
  ins_encode %{
    uint32_t con = (unsigned)$shift$$constant & 0x1f;
    __ vsetvli_helper(T_LONG, Matcher::vector_length(this));
    if (con == 0) {
      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
                as_VectorRegister($src$$reg));
      return;
    }
    __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
  %}
  ins_pipe(pipe_slow);
%}

instruct vasrB_vi_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{
  match(Set dst_src (RShiftVB (Binary dst_src (RShiftCntV shift)) v0));
  format %{ "vasrB_vi_masked $dst_src, $dst_src, $shift, $v0" %}
  ins_encode %{
    uint32_t con = (unsigned)$shift$$constant & 0x1f;
    if (con == 0) {
      return;
    }
    if (con >= BitsPerByte) con = BitsPerByte - 1;
    __ vsetvli_helper(T_BYTE, Matcher::vector_length(this));
    __ vsra_vi(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), con,
               Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vasrS_vi_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{
  match(Set dst_src (RShiftVS (Binary dst_src (RShiftCntV shift)) v0));
  format %{ "vasrS_vi_masked $dst_src, $dst_src, $shift, $v0" %}
  ins_encode %{
    uint32_t con = (unsigned)$shift$$constant & 0x1f;
    if (con == 0) {
      return;
    }
    if (con >= BitsPerShort) con = BitsPerShort - 1;
    __ vsetvli_helper(T_SHORT, Matcher::vector_length(this));
    __ vsra_vi(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), con,
               Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vasrI_vi_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{
  match(Set dst_src (RShiftVI (Binary dst_src (RShiftCntV shift)) v0));
  format %{ "vasrI_vi_masked $dst_src, $dst_src, $shift, $v0" %}
  ins_encode %{
    uint32_t con = (unsigned)$shift$$constant & 0x1f;
    if (con == 0) {
      return;
    }
    __ vsetvli_helper(T_INT, Matcher::vector_length(this));
    __ vsra_vi(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), con,
               Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vasrL_vi_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{
  predicate((n->in(1)->in(2)->in(1)->get_int() & 0x3f) < 32);
  match(Set dst_src (RShiftVL (Binary dst_src (RShiftCntV shift)) v0));
  format %{ "vasrL_vi_masked $dst_src, $dst_src, $shift, $v0" %}
  ins_encode %{
    uint32_t con = (unsigned)$shift$$constant & 0x1f;
    if (con == 0) {
      return;
    }
    __ vsetvli_helper(T_LONG, Matcher::vector_length(this));
    __ vsra_vi(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), con,
               Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vlsrB_vi(vReg dst, vReg src, immI shift) %{
  match(Set dst (URShiftVB src (RShiftCntV shift)));
  format %{ "vlsrB_vi $dst, $src, $shift" %}
  ins_encode %{
    uint32_t con = (unsigned)$shift$$constant & 0x1f;
    __ vsetvli_helper(T_BYTE, Matcher::vector_length(this));
    if (con == 0) {
      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
                as_VectorRegister($src$$reg));
      return;
    }
    if (con >= BitsPerByte) {
      __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
                 as_VectorRegister($src$$reg));
      return;
    }
    __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
  %}
  ins_pipe(pipe_slow);
%}

instruct vlsrS_vi(vReg dst, vReg src, immI shift) %{
  match(Set dst (URShiftVS src (RShiftCntV shift)));
  format %{ "vlsrS_vi $dst, $src, $shift" %}
  ins_encode %{
    uint32_t con = (unsigned)$shift$$constant & 0x1f;
    __ vsetvli_helper(T_SHORT, Matcher::vector_length(this));
    if (con == 0) {
      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
                as_VectorRegister($src$$reg));
      return;
    }
    if (con >= BitsPerShort) {
      __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
                 as_VectorRegister($src$$reg));
      return;
    }
    __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
  %}
  ins_pipe(pipe_slow);
%}

instruct vlsrI_vi(vReg dst, vReg src, immI shift) %{
  match(Set dst (URShiftVI src (RShiftCntV shift)));
  format %{ "vlsrI_vi $dst, $src, $shift" %}
  ins_encode %{
    uint32_t con = (unsigned)$shift$$constant & 0x1f;
    __ vsetvli_helper(T_INT, Matcher::vector_length(this));
    if (con == 0) {
      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
                as_VectorRegister($src$$reg));
      return;
    }
    __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
  %}
  ins_pipe(pipe_slow);
%}

instruct vlsrL_vi(vReg dst, vReg src, immI shift) %{
  predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32);
  match(Set dst (URShiftVL src (RShiftCntV shift)));
  format %{ "vlsrL_vi $dst, $src, $shift" %}
  ins_encode %{
    uint32_t con = (unsigned)$shift$$constant & 0x1f;
    __ vsetvli_helper(T_LONG, Matcher::vector_length(this));
    if (con == 0) {
      __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
                as_VectorRegister($src$$reg));
      return;
    }
    __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
  %}
  ins_pipe(pipe_slow);
%}

instruct vlsrB_vi_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{
  match(Set dst_src (URShiftVB (Binary dst_src (RShiftCntV shift)) v0));
  format %{ "vlsrB_vi_masked $dst_src, $dst_src, $shift, $v0" %}
  ins_encode %{
    uint32_t con = (unsigned)$shift$$constant & 0x1f;
    if (con == 0) {
      return;
    }
    __ vsetvli_helper(T_BYTE, Matcher::vector_length(this));
    if (con >= BitsPerByte) {
      __ vxor_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
                 as_VectorRegister($dst_src$$reg), Assembler::v0_t);
      return;
    }
    __ vsrl_vi(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), con,
               Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vlsrS_vi_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{
  match(Set dst_src (URShiftVS (Binary dst_src (RShiftCntV shift)) v0));
  format %{ "vlsrS_vi_masked $dst_src, $dst_src, $shift, $v0" %}
  ins_encode %{
    uint32_t con = (unsigned)$shift$$constant & 0x1f;
    if (con == 0) {
      return;
    }
    __ vsetvli_helper(T_SHORT, Matcher::vector_length(this));
    if (con >= BitsPerShort) {
      __ vxor_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
                 as_VectorRegister($dst_src$$reg), Assembler::v0_t);
      return;
    }
    __ vsrl_vi(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), con,
               Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vlsrI_vi_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{
  match(Set dst_src (URShiftVI (Binary dst_src (RShiftCntV shift)) v0));
  format %{ "vlsrI_vi_masked $dst_src, $dst_src, $shift, $v0" %}
  ins_encode %{
    uint32_t con = (unsigned)$shift$$constant & 0x1f;
    if (con == 0) {
      return;
    }
    __ vsetvli_helper(T_INT, Matcher::vector_length(this));
    __ vsrl_vi(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), con,
               Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vlsrL_vi_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{
  predicate((n->in(1)->in(2)->in(1)->get_int() & 0x3f) < 32);
  match(Set dst_src (URShiftVL (Binary dst_src (RShiftCntV shift)) v0));
  format %{ "vlsrL_vi_masked $dst_src, $dst_src, $shift, $v0" %}
  ins_encode %{
    uint32_t con = (unsigned)$shift$$constant & 0x1f;
    if (con == 0) {
      return;
    }
    __ vsetvli_helper(T_LONG, Matcher::vector_length(this));
    __ vsrl_vi(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), con,
               Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vlslB_vi(vReg dst, vReg src, immI shift) %{
  match(Set dst (LShiftVB src (LShiftCntV shift)));
  format %{ "vlslB_vi $dst, $src, $shift" %}
  ins_encode %{
    uint32_t con = (unsigned)$shift$$constant & 0x1f;
    __ vsetvli_helper(T_BYTE, Matcher::vector_length(this));
    if (con >= BitsPerByte) {
      __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
                 as_VectorRegister($src$$reg));
      return;
    }
    __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
  %}
  ins_pipe(pipe_slow);
%}

instruct vlslS_vi(vReg dst, vReg src, immI shift) %{
  match(Set dst (LShiftVS src (LShiftCntV shift)));
  format %{ "vlslS_vi $dst, $src, $shift" %}
  ins_encode %{
    uint32_t con = (unsigned)$shift$$constant & 0x1f;
    __ vsetvli_helper(T_SHORT, Matcher::vector_length(this));
    if (con >= BitsPerShort) {
      __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
                 as_VectorRegister($src$$reg));
      return;
    }
    __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
  %}
  ins_pipe(pipe_slow);
%}

instruct vlslI_vi(vReg dst, vReg src, immI shift) %{
  match(Set dst (LShiftVI src (LShiftCntV shift)));
  format %{ "vlslI_vi $dst, $src, $shift" %}
  ins_encode %{
    uint32_t con = (unsigned)$shift$$constant & 0x1f;
    __ vsetvli_helper(T_INT, Matcher::vector_length(this));
    __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
  %}
  ins_pipe(pipe_slow);
%}

instruct vlslL_vi(vReg dst, vReg src, immI shift) %{
  predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32);
  match(Set dst (LShiftVL src (LShiftCntV shift)));
  format %{ "vlslL_vi $dst, $src, $shift" %}
  ins_encode %{
    uint32_t con = (unsigned)$shift$$constant & 0x1f;
    __ vsetvli_helper(T_LONG, Matcher::vector_length(this));
    __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
  %}
  ins_pipe(pipe_slow);
%}

instruct vlslB_vi_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{
  match(Set dst_src (LShiftVB (Binary dst_src (LShiftCntV shift)) v0));
  format %{ "vlslB_vi_masked $dst_src, $dst_src, $shift, $v0" %}
  ins_encode %{
    uint32_t con = (unsigned)$shift$$constant & 0x1f;
    __ vsetvli_helper(T_BYTE, Matcher::vector_length(this));
    if (con >= BitsPerByte) {
      __ vxor_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
                 as_VectorRegister($dst_src$$reg), Assembler::v0_t);
      return;
    }
    __ vsll_vi(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), con,
               Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vlslS_vi_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{
  match(Set dst_src (LShiftVS (Binary dst_src (LShiftCntV shift)) v0));
  format %{ "vlslS_vi_masked $dst_src, $dst_src, $shift, $v0" %}
  ins_encode %{
    uint32_t con = (unsigned)$shift$$constant & 0x1f;
    __ vsetvli_helper(T_SHORT, Matcher::vector_length(this));
    if (con >= BitsPerShort) {
      __ vxor_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
                 as_VectorRegister($dst_src$$reg), Assembler::v0_t);
      return;
    }
    __ vsll_vi(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), con,
               Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vlslI_vi_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{
  match(Set dst_src (LShiftVI (Binary dst_src (LShiftCntV shift)) v0));
  format %{ "vlslI_vi_masked $dst_src, $dst_src, $shift, $v0" %}
  ins_encode %{
    uint32_t con = (unsigned)$shift$$constant & 0x1f;
    __ vsetvli_helper(T_INT, Matcher::vector_length(this));
    __ vsll_vi(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), con,
               Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vlslL_vi_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{
  predicate((n->in(1)->in(2)->in(1)->get_int() & 0x3f) < 32);
  match(Set dst_src (LShiftVL (Binary dst_src (LShiftCntV shift)) v0));
  format %{ "vlslL_vi_masked $dst_src, $dst_src, $shift, $v0" %}
  ins_encode %{
    uint32_t con = (unsigned)$shift$$constant & 0x1f;
    __ vsetvli_helper(T_LONG, Matcher::vector_length(this));
    __ vsll_vi(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), con,
               Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// vector shift count

instruct vshiftcnt(vReg dst, iRegIorL2I cnt) %{
  match(Set dst (LShiftCntV cnt));
  match(Set dst (RShiftCntV cnt));
  format %{ "vshiftcnt $dst, $cnt" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// --------------------------------- Vector Rotation ----------------------------------
// Rotate right

instruct vrotate_right(vReg dst, vReg src, vReg shift) %{
  match(Set dst (RotateRightV src shift));
  format %{ "vrotate_right $dst, $src, $shift\t" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vror_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
               as_VectorRegister($shift$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// Only the low log2(SEW) bits of shift value are used, all other bits are ignored.
instruct vrotate_right_vx(vReg dst, vReg src, iRegIorL2I shift) %{
  match(Set dst (RotateRightV src (Replicate shift)));
  format %{ "vrotate_right_vx $dst, $src, $shift\t" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vror_vx(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
               as_Register($shift$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct vrotate_right_vi(vReg dst, vReg src, immI shift) %{
  match(Set dst (RotateRightV src shift));
  format %{ "vrotate_right_vi $dst, $src, $shift\t" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    uint32_t bits = type2aelembytes(bt) * 8;
    uint32_t con = (unsigned)$shift$$constant & (bits - 1);
    if (con == 0) {
      return;
    }
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vror_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
  %}
  ins_pipe(pipe_slow);
%}

// Rotate right - masked

instruct vrotate_right_masked(vReg dst_src, vReg shift, vRegMask_V0 v0) %{
  match(Set dst_src (RotateRightV (Binary dst_src shift) v0));
  format %{ "vrotate_right_masked $dst_src, $dst_src, $shift, $v0\t" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vror_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
               as_VectorRegister($shift$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// Only the low log2(SEW) bits of shift value are used, all other bits are ignored.
instruct vrotate_right_vx_masked(vReg dst_src, iRegIorL2I shift, vRegMask_V0 v0) %{
  match(Set dst_src (RotateRightV (Binary dst_src (Replicate shift)) v0));
  format %{ "vrotate_right_vx_masked $dst_src, $dst_src, $shift, $v0\t" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vror_vx(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
               as_Register($shift$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vrotate_right_vi_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{
  match(Set dst_src (RotateRightV (Binary dst_src shift) v0));
  format %{ "vrotate_right_vi_masked $dst_src, $dst_src, $shift, $v0\t" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    uint32_t bits = type2aelembytes(bt) * 8;
    uint32_t con = (unsigned)$shift$$constant & (bits - 1);
    if (con == 0) {
      return;
    }
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vror_vi(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
               con, Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// Rotate left

instruct vrotate_left(vReg dst, vReg src, vReg shift) %{
  match(Set dst (RotateLeftV src shift));
  format %{ "vrotate_left $dst, $src, $shift\t" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vrol_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
               as_VectorRegister($shift$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// Only the low log2(SEW) bits of shift value are used, all other bits are ignored.
instruct vrotate_left_vx(vReg dst, vReg src, iRegIorL2I shift) %{
  match(Set dst (RotateLeftV src (Replicate shift)));
  format %{ "vrotate_left_vx $dst, $src, $shift\t" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vrol_vx(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
               as_Register($shift$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct vrotate_left_vi(vReg dst, vReg src, immI shift) %{
  match(Set dst (RotateLeftV src shift));
  format %{ "vrotate_left_vi $dst, $src, $shift\t" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    uint32_t bits = type2aelembytes(bt) * 8;
    uint32_t con = (unsigned)$shift$$constant & (bits - 1);
    if (con == 0) {
      return;
    }
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    con = bits - con;
    __ vror_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
  %}
  ins_pipe(pipe_slow);
%}

// Rotate left - masked

instruct vrotate_left_masked(vReg dst_src, vReg shift, vRegMask_V0 v0) %{
  match(Set dst_src (RotateLeftV (Binary dst_src shift) v0));
  format %{ "vrotate_left_masked $dst_src, $dst_src, $shift, $v0\t" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vrol_vv(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
               as_VectorRegister($shift$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// Only the low log2(SEW) bits of shift value are used, all other bits are ignored.
instruct vrotate_left_vx_masked(vReg dst_src, iRegIorL2I shift, vRegMask_V0 v0) %{
  match(Set dst_src (RotateLeftV (Binary dst_src (Replicate shift)) v0));
  format %{ "vrotate_left_vx_masked $dst_src, $dst_src, $shift, $v0\t" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vrol_vx(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
               as_Register($shift$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vrotate_left_vi_masked(vReg dst_src, immI shift, vRegMask_V0 v0) %{
  match(Set dst_src (RotateLeftV (Binary dst_src shift) v0));
  format %{ "vrotate_left_vi_masked $dst_src, $dst_src, $shift, $v0\t" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    uint32_t bits = type2aelembytes(bt) * 8;
    uint32_t con = (unsigned)$shift$$constant & (bits - 1);
    if (con == 0) {
      return;
    }
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    con = bits - con;
    __ vror_vi(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
               con, Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// vector sqrt

instruct vsqrt_hfp(vReg dst, vReg src) %{
  match(Set dst (SqrtVHF src));
  format %{ "vsqrt_hfp $dst, $src" %}
  ins_encode %{
    assert(UseZvfh, "must");
    assert(Matcher::vector_element_basic_type(this) == T_SHORT, "must");
    __ vsetvli_helper(T_SHORT, Matcher::vector_length(this));
    __ vfsqrt_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct vsqrt_fp(vReg dst, vReg src) %{
  match(Set dst (SqrtVF src));
  match(Set dst (SqrtVD src));
  format %{ "vsqrt_fp $dst, $src" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vfsqrt_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// vector sqrt - predicated

instruct vsqrt_fp_masked(vReg dst_src, vRegMask_V0 v0) %{
  match(Set dst_src (SqrtVF dst_src v0));
  match(Set dst_src (SqrtVD dst_src v0));
  format %{ "vsqrt_fp_masked $dst_src, $dst_src, $v0" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vfsqrt_v(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg),
                Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vstring_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
                         iRegI_R10 result, vReg_V2 v2,
                         vReg_V3 v3, vReg_V4 v4, vReg_V5 v5, rFlagsReg cr)
%{
  predicate(UseRVV && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
  match(Set result (StrEquals (Binary str1 str2) cnt));
  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, TEMP v2, TEMP v3, TEMP v4, TEMP v5, KILL cr);

  format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsL" %}
  ins_encode %{
    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
    __ string_equals_v($str1$$Register, $str2$$Register,
                       $result$$Register, $cnt$$Register);
  %}
  ins_pipe(pipe_class_memory);
%}

instruct varray_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
                        vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5, iRegP_R28 tmp, rFlagsReg cr)
%{
  predicate(UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
  match(Set result (AryEq ary1 ary2));
  effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v2, TEMP v3, TEMP v4, TEMP v5, KILL cr);

  format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsB // KILL $tmp" %}
  ins_encode %{
    __ arrays_equals_v($ary1$$Register, $ary2$$Register,
                       $result$$Register, $tmp$$Register, 1);
    %}
  ins_pipe(pipe_class_memory);
%}

instruct varray_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
                        vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5, iRegP_R28 tmp, rFlagsReg cr)
%{
  predicate(UseRVV && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
  match(Set result (AryEq ary1 ary2));
  effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v2, TEMP v3, TEMP v4, TEMP v5, KILL cr);

  format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsC // KILL $tmp" %}
  ins_encode %{
    __ arrays_equals_v($ary1$$Register, $ary2$$Register,
                       $result$$Register, $tmp$$Register, 2);
  %}
  ins_pipe(pipe_class_memory);
%}

instruct vstring_compareU_128b(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
                          iRegI_R10 result, vReg_V4 v4, vReg_V5 v5, vReg_V6 v6, vReg_V7 v7,
                          vReg_V8 v8, vReg_V9 v9, vReg_V10 v10, vReg_V11 v11,
                          iRegP_R28 tmp1, iRegL_R29 tmp2)
%{
  predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU &&
            MaxVectorSize == 16);
  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
  effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
        TEMP v4, TEMP v5, TEMP v6, TEMP v7, TEMP v8, TEMP v9, TEMP v10, TEMP v11);

  format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareU" %}
  ins_encode %{
    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
    __ string_compare_v($str1$$Register, $str2$$Register,
                        $cnt1$$Register, $cnt2$$Register, $result$$Register,
                        $tmp1$$Register, $tmp2$$Register,
                        StrIntrinsicNode::UU);
  %}
  ins_pipe(pipe_class_memory);
%}

instruct vstring_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
                          iRegI_R10 result, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
                          iRegP_R28 tmp1, iRegL_R29 tmp2)
%{
  predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU &&
            MaxVectorSize > 16);
  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
  effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
        TEMP v2, TEMP v3, TEMP v4, TEMP v5);

  format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareU" %}
  ins_encode %{
    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
    __ string_compare_v($str1$$Register, $str2$$Register,
                        $cnt1$$Register, $cnt2$$Register, $result$$Register,
                        $tmp1$$Register, $tmp2$$Register,
                        StrIntrinsicNode::UU);
  %}
  ins_pipe(pipe_class_memory);
%}

instruct vstring_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
                          iRegI_R10 result, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
                          iRegP_R28 tmp1, iRegL_R29 tmp2)
%{
  predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL);
  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
  effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
        TEMP v2, TEMP v3, TEMP v4, TEMP v5);

  format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareL" %}
  ins_encode %{
    __ string_compare_v($str1$$Register, $str2$$Register,
                        $cnt1$$Register, $cnt2$$Register, $result$$Register,
                        $tmp1$$Register, $tmp2$$Register,
                        StrIntrinsicNode::LL);
  %}
  ins_pipe(pipe_class_memory);
%}

instruct vstring_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
                           iRegI_R10 result, vReg_V4 v4, vReg_V5 v5, vReg_V6 v6, vReg_V7 v7,
                           vReg_V8 v8, vReg_V9 v9, vReg_V10 v10, vReg_V11 v11,
                           iRegP_R28 tmp1, iRegL_R29 tmp2)
%{
  predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL);
  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
  effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
         TEMP v4, TEMP v5, TEMP v6, TEMP v7, TEMP v8, TEMP v9, TEMP v10, TEMP v11);

  format %{"String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareUL" %}
  ins_encode %{
    __ string_compare_v($str1$$Register, $str2$$Register,
                        $cnt1$$Register, $cnt2$$Register, $result$$Register,
                        $tmp1$$Register, $tmp2$$Register,
                        StrIntrinsicNode::UL);
  %}
  ins_pipe(pipe_class_memory);
%}
instruct vstring_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
                           iRegI_R10 result, vReg_V4 v4, vReg_V5 v5, vReg_V6 v6, vReg_V7 v7,
                           vReg_V8 v8, vReg_V9 v9, vReg_V10 v10, vReg_V11 v11,
                           iRegP_R28 tmp1, iRegL_R29 tmp2)
%{
  predicate(UseRVV && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU);
  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
  effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
         TEMP v4, TEMP v5, TEMP v6, TEMP v7, TEMP v8, TEMP v9, TEMP v10, TEMP v11);

  format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareLU" %}
  ins_encode %{
    __ string_compare_v($str1$$Register, $str2$$Register,
                        $cnt1$$Register, $cnt2$$Register, $result$$Register,
                        $tmp1$$Register, $tmp2$$Register,
                        StrIntrinsicNode::LU);
  %}
  ins_pipe(pipe_class_memory);
%}

// fast byte[] to char[] inflation
instruct vstring_inflate(Universe dummy, iRegP_R10 src, iRegP_R11 dst, iRegI_R12 len,
                         vReg_V4 v4, vReg_V5 v5, vReg_V6 v6, vReg_V7 v7, iRegLNoSp tmp)
%{
  predicate(UseRVV);
  match(Set dummy (StrInflatedCopy src (Binary dst len)));
  effect(TEMP v4, TEMP v5, TEMP v6, TEMP v7, TEMP tmp, USE_KILL src, USE_KILL dst, USE_KILL len);

  format %{ "String Inflate $src,$dst" %}
  ins_encode %{
    __ byte_array_inflate_v($src$$Register, $dst$$Register, $len$$Register, $tmp$$Register);
  %}
  ins_pipe(pipe_class_memory);
%}

// encode char[] to byte[] in ISO_8859_1
instruct vencode_iso_array(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 result,
                           vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vRegMask_V0 v0, iRegLNoSp tmp)
%{
  predicate(UseRVV && !((EncodeISOArrayNode*)n)->is_ascii());
  match(Set result (EncodeISOArray src (Binary dst len)));
  effect(TEMP_DEF result, USE_KILL src, USE_KILL dst, USE_KILL len,
         TEMP v0, TEMP v1, TEMP v2, TEMP v3, TEMP tmp);

  format %{ "Encode ISO array $src, $dst, $len -> $result # KILL $src, $dst, $len, $tmp, V0-V3" %}
  ins_encode %{
    __ encode_iso_array_v($src$$Register, $dst$$Register, $len$$Register,
                          $result$$Register, $tmp$$Register, false /* ascii */);
  %}
  ins_pipe(pipe_class_memory);
%}

instruct vencode_ascii_array(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 result,
                             vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vRegMask_V0 v0, iRegLNoSp tmp)
%{
  predicate(UseRVV && ((EncodeISOArrayNode*)n)->is_ascii());
  match(Set result (EncodeISOArray src (Binary dst len)));
  effect(TEMP_DEF result, USE_KILL src, USE_KILL dst, USE_KILL len,
         TEMP v0, TEMP v1, TEMP v2, TEMP v3, TEMP tmp);

  format %{ "Encode ASCII array $src, $dst, $len -> $result # KILL $src, $dst, $len, $tmp, V0-V3" %}
  ins_encode %{
    __ encode_iso_array_v($src$$Register, $dst$$Register, $len$$Register,
                          $result$$Register, $tmp$$Register, true /* ascii */);
  %}
  ins_pipe(pipe_class_memory);
%}

// fast char[] to byte[] compression
instruct vstring_compress(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 result,
                          vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vRegMask_V0 v0, iRegLNoSp tmp)
%{
  predicate(UseRVV);
  match(Set result (StrCompressedCopy src (Binary dst len)));
  effect(TEMP_DEF result, USE_KILL src, USE_KILL dst, USE_KILL len,
         TEMP v0, TEMP v1, TEMP v2, TEMP v3, TEMP tmp);

  format %{ "String Compress $src,$dst -> $result    // KILL R11, R12, R13" %}
  ins_encode %{
    __ char_array_compress_v($src$$Register, $dst$$Register, $len$$Register,
                             $result$$Register, $tmp$$Register);
  %}
  ins_pipe(pipe_class_memory);
%}

instruct vcount_positives(iRegP_R11 ary, iRegI_R12 len, iRegI_R10 result,
                          vReg_V4 v4, vReg_V5 v5, vReg_V6 v6, vReg_V7 v7, iRegLNoSp tmp)
%{
  predicate(UseRVV);
  match(Set result (CountPositives ary len));
  effect(TEMP_DEF result, USE_KILL ary, USE_KILL len, TEMP v4, TEMP v5, TEMP v6, TEMP v7, TEMP tmp);

  format %{ "count positives byte[] $ary, $len -> $result" %}
  ins_encode %{
    __ count_positives_v($ary$$Register, $len$$Register, $result$$Register, $tmp$$Register);
  %}

  ins_pipe(pipe_class_memory);
%}

instruct vstringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
                               iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
                               vReg_V4 v4, vReg_V5 v5, vReg_V6 v6, vReg_V7 v7)
%{
  predicate(UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
  effect(TEMP_DEF result, USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
         TEMP tmp1, TEMP tmp2, TEMP v4, TEMP v5, TEMP v6, TEMP v7);

  format %{ "StringUTF16 IndexOf char[] $str1, $cnt1, $ch -> $result" %}

  ins_encode %{
    __ string_indexof_char_v($str1$$Register, $cnt1$$Register, $ch$$Register,
                             $result$$Register, $tmp1$$Register, $tmp2$$Register,
                             false /* isL */);
  %}

  ins_pipe(pipe_class_memory);
%}

instruct vstringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
                               iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
                               vReg_V4 v4, vReg_V5 v5, vReg_V6 v6, vReg_V7 v7)
%{
  predicate(UseRVV && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
  effect(TEMP_DEF result, USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
         TEMP tmp1, TEMP tmp2, TEMP v4, TEMP v5, TEMP v6, TEMP v7);

  format %{ "StringLatin1 IndexOf char[] $str1, $cnt1, $ch -> $result" %}

  ins_encode %{
    __ string_indexof_char_v($str1$$Register, $cnt1$$Register, $ch$$Register,
                             $result$$Register, $tmp1$$Register, $tmp2$$Register,
                             true /* isL */);
  %}

  ins_pipe(pipe_class_memory);
%}

// clearing of an array
instruct vclearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy,
                             vReg_V4 v4, vReg_V5 v5, vReg_V6 v6, vReg_V7 v7)
%{
  predicate(!UseBlockZeroing && UseRVV);
  match(Set dummy (ClearArray cnt base));
  effect(USE_KILL cnt, USE_KILL base, TEMP v4, TEMP v5, TEMP v6, TEMP v7);

  format %{ "ClearArray $cnt, $base\t#@clearArray_reg_reg" %}

  ins_encode %{
    __ clear_array_v($base$$Register, $cnt$$Register);
  %}

  ins_pipe(pipe_class_memory);
%}

// Vector Load Const
instruct vloadcon(vReg dst, immI0 src) %{
  match(Set dst (VectorLoadConst src));
  format %{ "vloadcon $dst\t# generate iota indices" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vid_v(as_VectorRegister($dst$$reg));
    if (is_floating_point_type(bt)) {
      __ vfcvt_f_x_v(as_VectorRegister($dst$$reg), as_VectorRegister($dst$$reg));
    }
  %}
  ins_pipe(pipe_slow);
%}

instruct vmask_gen_I(vRegMask dst, iRegI src) %{
  match(Set dst (VectorMaskGen (ConvI2L src)));
  format %{ "vmask_gen_I $dst, $src" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vid_v(as_VectorRegister($dst$$reg));
    __ vmsltu_vx(as_VectorRegister($dst$$reg), as_VectorRegister($dst$$reg), $src$$Register);
  %}
  ins_pipe(pipe_slow);
%}

instruct vmask_gen_L(vRegMask dst, iRegL src) %{
  match(Set dst (VectorMaskGen src));
  format %{ "vmask_gen_L $dst, $src" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vid_v(as_VectorRegister($dst$$reg));
    __ vmsltu_vx(as_VectorRegister($dst$$reg), as_VectorRegister($dst$$reg), $src$$Register);
  %}
  ins_pipe(pipe_slow);
%}

instruct vmask_gen_imm(vRegMask dst, immL con) %{
  predicate(n->in(1)->get_long() <= 16 ||
            n->in(1)->get_long() == Matcher::vector_length(n));
  match(Set dst (VectorMaskGen con));
  format %{ "vmask_gen_imm $dst, $con" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    if ((uint)($con$$constant) == 0) {
      __ vmclr_m(as_VectorRegister($dst$$reg));
    } else if ((uint)($con$$constant) == Matcher::vector_length(this)) {
      __ vmset_m(as_VectorRegister($dst$$reg));
    } else {
      assert((uint)($con$$constant) < Matcher::vector_length(this), "unsupported input lane_cnt");
      __ vid_v(as_VectorRegister($dst$$reg));
      __ vmsleu_vi(as_VectorRegister($dst$$reg), as_VectorRegister($dst$$reg), (uint)($con$$constant) - 1);
    }
  %}
  ins_pipe(pipe_slow);
%}

instruct vmaskAll_immI(vRegMask dst, immI src) %{
  match(Set dst (MaskAll src));
  format %{ "vmaskAll_immI $dst, $src" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    int con = (int)$src$$constant;
    if (con == 0) {
      __ vmclr_m(as_VectorRegister($dst$$reg));
    } else {
      assert(con == -1, "invalid constant value for mask");
      __ vmset_m(as_VectorRegister($dst$$reg));
    }
  %}
  ins_pipe(pipe_slow);
%}

instruct vmaskAllI(vRegMask dst, iRegIorL2I src) %{
  match(Set dst (MaskAll src));
  format %{ "vmaskAllI $dst, $src" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
    __ vmsne_vx(as_VectorRegister($dst$$reg), as_VectorRegister($dst$$reg), zr);
  %}
  ins_pipe(pipe_slow);
%}

instruct vmaskAll_immL(vRegMask dst, immL src) %{
  match(Set dst (MaskAll src));
  format %{ "vmaskAll_immL $dst, $src" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    long con = (long)$src$$constant;
    if (con == 0) {
      __ vmclr_m(as_VectorRegister($dst$$reg));
    } else {
      assert(con == -1, "invalid constant value for mask");
      __ vmset_m(as_VectorRegister($dst$$reg));
    }
  %}
  ins_pipe(pipe_slow);
%}

instruct vmaskAllL(vRegMask dst, iRegL src) %{
  match(Set dst (MaskAll src));
  format %{ "vmaskAllL $dst, $src" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
    __ vmsne_vx(as_VectorRegister($dst$$reg), as_VectorRegister($dst$$reg), zr);
  %}
  ins_pipe(pipe_slow);
%}

// ------------------------------ Vector mask basic OPs ------------------------

// vector mask logical ops: and/or/xor

instruct vmask_and(vRegMask dst, vRegMask src1, vRegMask src2) %{
  match(Set dst (AndVMask src1 src2));
  format %{ "vmask_and $dst, $src1, $src2" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vmand_mm(as_VectorRegister($dst$$reg),
                as_VectorRegister($src1$$reg),
                as_VectorRegister($src2$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct vmask_or(vRegMask dst, vRegMask src1, vRegMask src2) %{
  match(Set dst (OrVMask src1 src2));
  format %{ "vmask_or $dst, $src1, $src2" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vmor_mm(as_VectorRegister($dst$$reg),
               as_VectorRegister($src1$$reg),
               as_VectorRegister($src2$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct vmask_xor(vRegMask dst, vRegMask src1, vRegMask src2) %{
  match(Set dst (XorVMask src1 src2));
  format %{ "vmask_xor $dst, $src1, $src2" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vmxor_mm(as_VectorRegister($dst$$reg),
                as_VectorRegister($src1$$reg),
                as_VectorRegister($src2$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct vmaskcast(vRegMask dst_src) %{
  match(Set dst_src (VectorMaskCast dst_src));
  ins_cost(0);
  format %{ "vmaskcast $dst_src, $dst_src\t# do nothing" %}
  ins_encode(/* empty encoding */);
  ins_pipe(pipe_class_empty);
%}

// vector load/store - predicated

instruct loadV_masked(vReg dst, vmemA mem, vRegMask_V0 v0) %{
  match(Set dst (LoadVectorMasked mem v0));
  format %{ "loadV_masked $dst, $mem, $v0" %}
  ins_encode %{
    VectorRegister dst_reg = as_VectorRegister($dst$$reg);
    loadStore(masm, false, dst_reg,
              Matcher::vector_element_basic_type(this), as_Register($mem$$base),
              Matcher::vector_length(this), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct storeV_masked(vReg src, vmemA mem, vRegMask_V0 v0) %{
  match(Set mem (StoreVectorMasked mem (Binary src v0)));
  format %{ "storeV_masked $mem, $src, $v0" %}
  ins_encode %{
    VectorRegister src_reg = as_VectorRegister($src$$reg);
    loadStore(masm, true, src_reg,
              Matcher::vector_element_basic_type(this, $src), as_Register($mem$$base),
              Matcher::vector_length(this, $src), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// ------------------------------ Vector blend ---------------------------------

instruct vblend(vReg dst, vReg src1, vReg src2, vRegMask_V0 v0) %{
  match(Set dst (VectorBlend (Binary src1 src2) v0));
  format %{ "vblend $dst, $src1, $src2, v0" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vmerge_vvm(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
                  as_VectorRegister($src2$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// ------------------------------ Vector cast ----------------------------------

// VectorCastB2X, VectorUCastB2X

instruct vcvtBtoX(vReg dst, vReg src) %{
  match(Set dst (VectorCastB2X src));
  effect(TEMP_DEF dst);
  format %{ "vcvtBtoX $dst, $src" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    if (is_floating_point_type(bt)) {
      __ integer_extend_v(as_VectorRegister($dst$$reg), bt == T_FLOAT ? T_INT : T_LONG,
                          Matcher::vector_length(this), as_VectorRegister($src$$reg), T_BYTE,
                          true /* is_signed */);
      __ vfcvt_f_x_v(as_VectorRegister($dst$$reg), as_VectorRegister($dst$$reg));
    } else {
      __ integer_extend_v(as_VectorRegister($dst$$reg), bt,
                          Matcher::vector_length(this), as_VectorRegister($src$$reg), T_BYTE,
                          true /* is_signed */);
    }
  %}
  ins_pipe(pipe_slow);
%}

instruct vcvtUBtoX(vReg dst, vReg src) %{
  predicate(Matcher::vector_element_basic_type(n) == T_SHORT ||
            Matcher::vector_element_basic_type(n) == T_INT ||
            Matcher::vector_element_basic_type(n) == T_LONG);
  match(Set dst (VectorUCastB2X src));
  effect(TEMP_DEF dst);
  format %{ "vcvtUBtoX $dst, $src" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ integer_extend_v(as_VectorRegister($dst$$reg), bt,
                        Matcher::vector_length(this), as_VectorRegister($src$$reg), T_BYTE,
                        false /* is_signed */);
  %}
  ins_pipe(pipe_slow);
%}

// VectorCastS2X, VectorUCastS2X

instruct vcvtStoB(vReg dst, vReg src) %{
  predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
  match(Set dst (VectorCastS2X src));
  format %{ "vcvtStoB $dst, $src" %}
  ins_encode %{
    __ integer_narrow_v(as_VectorRegister($dst$$reg), T_BYTE, Matcher::vector_length(this),
                        as_VectorRegister($src$$reg), T_SHORT);
  %}
  ins_pipe(pipe_slow);
%}

instruct vcvtStoX(vReg dst, vReg src) %{
  predicate(Matcher::vector_element_basic_type(n) == T_INT ||
            Matcher::vector_element_basic_type(n) == T_LONG);
  match(Set dst (VectorCastS2X src));
  effect(TEMP_DEF dst);
  format %{ "vcvtStoX $dst, $src" %}
  ins_encode %{
    __ integer_extend_v(as_VectorRegister($dst$$reg), Matcher::vector_element_basic_type(this),
                        Matcher::vector_length(this), as_VectorRegister($src$$reg), T_SHORT,
                        true /* is_signed */);
  %}
  ins_pipe(pipe_slow);
%}

instruct vcvtStoX_fp(vReg dst, vReg src) %{
  predicate(Matcher::vector_element_basic_type(n) == T_FLOAT ||
            Matcher::vector_element_basic_type(n) == T_DOUBLE);
  match(Set dst (VectorCastS2X src));
  effect(TEMP_DEF dst);
  format %{ "vcvtStoX_fp $dst, $src" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ integer_extend_v(as_VectorRegister($dst$$reg), (bt == T_FLOAT ? T_INT : T_LONG),
                        Matcher::vector_length(this), as_VectorRegister($src$$reg), T_SHORT,
                        true /* is_signed */);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vfcvt_f_x_v(as_VectorRegister($dst$$reg), as_VectorRegister($dst$$reg));
  %}
  ins_pipe(pipe_slow);
%}


instruct vcvtUStoX(vReg dst, vReg src) %{
  predicate(Matcher::vector_element_basic_type(n) == T_INT ||
            Matcher::vector_element_basic_type(n) == T_LONG);
  match(Set dst (VectorUCastS2X src));
  effect(TEMP_DEF dst);
  format %{ "vcvtUStoX $dst, $src" %}
  ins_encode %{
    __ integer_extend_v(as_VectorRegister($dst$$reg), Matcher::vector_element_basic_type(this),
                        Matcher::vector_length(this), as_VectorRegister($src$$reg), T_SHORT,
                        false /* is_signed */);
  %}
  ins_pipe(pipe_slow);
%}

// VectorCastI2X, VectorUCastI2X

instruct vcvtItoX_narrow(vReg dst, vReg src) %{
  predicate((Matcher::vector_element_basic_type(n) == T_BYTE ||
             Matcher::vector_element_basic_type(n) == T_SHORT));
  match(Set dst (VectorCastI2X src));
  format %{ "vcvtItoX_narrow $dst, $src" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ integer_narrow_v(as_VectorRegister($dst$$reg), bt, Matcher::vector_length(this),
                        as_VectorRegister($src$$reg), T_INT);
  %}
  ins_pipe(pipe_slow);
%}

instruct vcvtItoL(vReg dst, vReg src) %{
  predicate(Matcher::vector_element_basic_type(n) == T_LONG);
  match(Set dst (VectorCastI2X src));
  effect(TEMP_DEF dst);
  format %{ "vcvtItoL $dst, $src" %}
  ins_encode %{
    __ integer_extend_v(as_VectorRegister($dst$$reg), T_LONG,
                        Matcher::vector_length(this), as_VectorRegister($src$$reg), T_INT,
                        true /* is_signed */);
  %}
  ins_pipe(pipe_slow);
%}

instruct vcvtUItoL(vReg dst, vReg src) %{
  predicate(Matcher::vector_element_basic_type(n) == T_LONG);
  match(Set dst (VectorUCastI2X src));
  effect(TEMP_DEF dst);
  format %{ "vcvtUItoL $dst, $src" %}
  ins_encode %{
    __ integer_extend_v(as_VectorRegister($dst$$reg), T_LONG,
                        Matcher::vector_length(this), as_VectorRegister($src$$reg), T_INT,
                        false /* is_signed */);
  %}
  ins_pipe(pipe_slow);
%}

instruct vcvtItoF(vReg dst, vReg src) %{
  predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
  match(Set dst (VectorCastI2X src));
  format %{ "vcvtItoF $dst, $src" %}
  ins_encode %{
    __ vsetvli_helper(T_FLOAT, Matcher::vector_length(this));
    __ vfcvt_f_x_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct vcvtItoD(vReg dst, vReg src) %{
  predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
  match(Set dst (VectorCastI2X src));
  effect(TEMP_DEF dst);
  format %{ "vcvtItoD $dst, $src" %}
  ins_encode %{
    __ vsetvli_helper(T_INT, Matcher::vector_length(this), Assembler::mf2);
    __ vfwcvt_f_x_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// VectorCastL2X

instruct vcvtLtoI(vReg dst, vReg src) %{
  predicate(Matcher::vector_element_basic_type(n) == T_BYTE ||
            Matcher::vector_element_basic_type(n) == T_SHORT ||
            Matcher::vector_element_basic_type(n) == T_INT);
  match(Set dst (VectorCastL2X src));
  format %{ "vcvtLtoI $dst, $src" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ integer_narrow_v(as_VectorRegister($dst$$reg), bt, Matcher::vector_length(this),
                        as_VectorRegister($src$$reg), T_LONG);
  %}
  ins_pipe(pipe_slow);
%}

instruct vcvtLtoF(vReg dst, vReg src) %{
  predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
  match(Set dst (VectorCastL2X src));
  format %{ "vcvtLtoF $dst, $src" %}
  ins_encode %{
    __ vsetvli_helper(T_FLOAT, Matcher::vector_length(this), Assembler::mf2);
    __ vfncvt_f_x_w(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct vcvtLtoD(vReg dst, vReg src) %{
  predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
  match(Set dst (VectorCastL2X src));
  format %{ "vcvtLtoD $dst, $src" %}
  ins_encode %{
    __ vsetvli_helper(T_DOUBLE, Matcher::vector_length(this));
    __ vfcvt_f_x_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// VectorCastF2X

instruct vcvtFtoX_narrow(vReg dst, vReg src, vRegMask_V0 v0) %{
  predicate(Matcher::vector_element_basic_type(n) == T_BYTE ||
            Matcher::vector_element_basic_type(n) == T_SHORT);
  match(Set dst (VectorCastF2X src));
  effect(TEMP_DEF dst, TEMP v0);
  format %{ "vcvtFtoX_narrow $dst, $src" %}
  ins_encode %{
    __ vsetvli_helper(T_FLOAT, Matcher::vector_length(this));
    __ vfcvt_rtz_x_f_v_safe(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ integer_narrow_v(as_VectorRegister($dst$$reg), bt, Matcher::vector_length(this),
                        as_VectorRegister($dst$$reg), T_INT);
  %}
  ins_pipe(pipe_slow);
%}

instruct vcvtFtoI(vReg dst, vReg src, vRegMask_V0 v0) %{
  predicate(Matcher::vector_element_basic_type(n) == T_INT);
  match(Set dst (VectorCastF2X src));
  effect(TEMP_DEF dst, TEMP v0);
  format %{ "vcvtFtoI $dst, $src" %}
  ins_encode %{
    __ vsetvli_helper(T_FLOAT, Matcher::vector_length(this));
    __ vfcvt_rtz_x_f_v_safe(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct vcvtFtoL(vReg dst, vReg src, vRegMask_V0 v0) %{
  predicate(Matcher::vector_element_basic_type(n) == T_LONG);
  match(Set dst (VectorCastF2X src));
  effect(TEMP_DEF dst, TEMP v0);
  format %{ "vcvtFtoL $dst, $src" %}
  ins_encode %{
    __ vsetvli_helper(T_LONG, Matcher::vector_length(this));
    __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($dst$$reg), as_VectorRegister($dst$$reg));
    __ vsetvli_helper(T_FLOAT, Matcher::vector_length(this), Assembler::mf2);
    __ vmfeq_vv(as_VectorRegister($v0$$reg), as_VectorRegister($src$$reg), as_VectorRegister($src$$reg));
    __ vfwcvt_rtz_x_f_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vcvtFtoD(vReg dst, vReg src) %{
  predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE);
  match(Set dst (VectorCastF2X src));
  effect(TEMP_DEF dst);
  format %{ "vcvtFtoD $dst, $src" %}
  ins_encode %{
    __ vsetvli_helper(T_FLOAT, Matcher::vector_length(this), Assembler::mf2);
    __ vfwcvt_f_f_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// VectorCastD2X

instruct vcvtDtoX_narrow(vReg dst, vReg src, vRegMask_V0 v0) %{
  predicate(Matcher::vector_element_basic_type(n) == T_BYTE ||
            Matcher::vector_element_basic_type(n) == T_SHORT ||
            Matcher::vector_element_basic_type(n) == T_INT);
  match(Set dst (VectorCastD2X src));
  effect(TEMP_DEF dst, TEMP v0);
  format %{ "vcvtDtoX_narrow $dst, $src" %}
  ins_encode %{
    __ vsetvli_helper(T_DOUBLE, Matcher::vector_length(this));
    __ vmfeq_vv(as_VectorRegister($v0$$reg), as_VectorRegister($src$$reg), as_VectorRegister($src$$reg));
    __ vsetvli_helper(T_INT, Matcher::vector_length(this), Assembler::mf2);
    __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($dst$$reg), as_VectorRegister($dst$$reg));
    __ vfncvt_rtz_x_f_w(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), Assembler::v0_t);
    BasicType bt = Matcher::vector_element_basic_type(this);
    if (bt == T_BYTE || bt == T_SHORT) {
      __ integer_narrow_v(as_VectorRegister($dst$$reg), bt, Matcher::vector_length(this),
                          as_VectorRegister($dst$$reg), T_INT);
    }
  %}
  ins_pipe(pipe_slow);
%}

instruct vcvtDtoL(vReg dst, vReg src, vRegMask_V0 v0) %{
  predicate(Matcher::vector_element_basic_type(n) == T_LONG);
  match(Set dst (VectorCastD2X src));
  effect(TEMP_DEF dst, TEMP v0);
  format %{ "vcvtDtoL $dst, $src" %}
  ins_encode %{
    __ vsetvli_helper(T_LONG, Matcher::vector_length(this));
    __ vfcvt_rtz_x_f_v_safe(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct vcvtDtoF(vReg dst, vReg src) %{
  predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
  match(Set dst (VectorCastD2X src));
  format %{ "vcvtDtoF $dst, $src" %}
  ins_encode %{
    __ vsetvli_helper(T_FLOAT, Matcher::vector_length(this), Assembler::mf2);
    __ vfncvt_f_f_w(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// ------------------------------ Vector reinterpret ---------------------------

instruct reinterpret(vReg dst_src) %{
  predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
  match(Set dst_src (VectorReinterpret dst_src));
  ins_cost(0);
  format %{ "# reinterpret $dst_src, $dst_src\t# do nothing" %}
  ins_encode %{
    // empty
  %}
  ins_pipe(pipe_class_empty);
%}

instruct reinterpretResize(vReg dst, vReg src) %{
  predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1)));
  match(Set dst (VectorReinterpret src));
  effect(TEMP_DEF dst);
  format %{ "reinterpretResize $dst, $src" %}
  ins_encode %{
    uint length_in_bytes_src = Matcher::vector_length_in_bytes(this, $src);
    uint length_in_bytes_dst = Matcher::vector_length_in_bytes(this);
    uint length_in_bytes_resize = length_in_bytes_src < length_in_bytes_dst ?
                                  length_in_bytes_src : length_in_bytes_dst;
    assert(length_in_bytes_src <= MaxVectorSize && length_in_bytes_dst <= MaxVectorSize,
           "invalid vector length");
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($dst$$reg), as_VectorRegister($dst$$reg));
    __ vsetvli_helper(T_BYTE, length_in_bytes_resize);
    __ vmv_v_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// vector mask reinterpret

instruct vmask_reinterpret_same_esize(vRegMask dst_src) %{
  predicate(Matcher::vector_length(n) == Matcher::vector_length(n->in(1)) &&
            Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
  match(Set dst_src (VectorReinterpret dst_src));
  ins_cost(0);
  format %{ "vmask_reinterpret_same_esize $dst_src, $dst_src\t# do nothing" %}
  ins_encode(/* empty encoding */);
  ins_pipe(pipe_class_empty);
%}

instruct vmask_reinterpret_diff_esize(vRegMask dst, vRegMask_V0 src, vReg tmp) %{
  predicate(Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
            Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)));
  match(Set dst (VectorReinterpret src));
  effect(TEMP tmp);
  format %{ "vmask_reinterpret_diff_esize $dst, $src\t# KILL $tmp" %}
  ins_encode %{
    BasicType from_bt = Matcher::vector_element_basic_type(this, $src);
    __ vsetvli_helper(from_bt, Matcher::vector_length(this, $src));
    __ vxor_vv(as_VectorRegister($tmp$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($tmp$$reg));
    __ vmerge_vim(as_VectorRegister($tmp$$reg), as_VectorRegister($tmp$$reg), -1);
    BasicType to_bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(to_bt, Matcher::vector_length(this));
    __ vmseq_vi(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), -1);
  %}
  ins_pipe(pipe_slow);
%}

// ------------------------------ Vector selectFrom -----------------------------

instruct select_from_two_vectors(vReg dst, vReg src1, vReg src2, vReg index, vRegMask_V0 v0, vReg tmp) %{
  match(Set dst (SelectFromTwoVector (Binary index src1) src2));
  effect(TEMP_DEF dst, TEMP v0, TEMP tmp);
  format %{ "select_from_two_vectors $dst, $src1, $src2, $index" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vrgather_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
                   as_VectorRegister($index$$reg));
    bool use_imm = __ is_simm5(Matcher::vector_length(this) - 1);
    if (use_imm) {
      __ vmsgtu_vi(v0, as_VectorRegister($index$$reg), Matcher::vector_length(this) - 1);
      __ vadd_vi(as_VectorRegister($tmp$$reg), as_VectorRegister($index$$reg),
                 -Matcher::vector_length(this), Assembler::v0_t);
    } else {
      __ mv(t0, Matcher::vector_length(this) - 1);
      __ vmsgtu_vx(v0, as_VectorRegister($index$$reg), t0);
      __ mv(t0, -Matcher::vector_length(this));
      __ vadd_vx(as_VectorRegister($tmp$$reg), as_VectorRegister($index$$reg), t0, Assembler::v0_t);
    }
    __ vrgather_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src2$$reg),
                   as_VectorRegister($tmp$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// ------------------------------ Vector rearrange -----------------------------

instruct rearrange(vReg dst, vReg src, vReg shuffle) %{
  match(Set dst (VectorRearrange src shuffle));
  effect(TEMP_DEF dst);
  format %{ "rearrange $dst, $src, $shuffle" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vrgather_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
                   as_VectorRegister($shuffle$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct rearrange_masked(vReg dst, vReg src, vReg shuffle, vRegMask_V0 v0) %{
  match(Set dst (VectorRearrange (Binary src shuffle) v0));
  effect(TEMP_DEF dst);
  format %{ "rearrange_masked $dst, $src, $shuffle, $v0" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($dst$$reg),
               as_VectorRegister($dst$$reg));
    __ vrgather_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
                   as_VectorRegister($shuffle$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// ------------------------------ Vector extract ---------------------------------

instruct extract(iRegINoSp dst, vReg src, immI idx, vReg tmp)
%{
  match(Set dst (ExtractB src idx));
  match(Set dst (ExtractS src idx));
  match(Set dst (ExtractI src idx));
  effect(TEMP tmp);
  format %{ "extract $dst, $src, $idx\t# KILL $tmp" %}
  ins_encode %{
    __ extract_v($dst$$Register, as_VectorRegister($src$$reg),
                 Matcher::vector_element_basic_type(this, $src), (int)($idx$$constant),
                 as_VectorRegister($tmp$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct extractL(iRegLNoSp dst, vReg src, immI idx, vReg tmp)
%{
  match(Set dst (ExtractL src idx));
  effect(TEMP tmp);
  format %{ "extractL $dst, $src, $idx\t# KILL $tmp" %}
  ins_encode %{
    __ extract_v($dst$$Register, as_VectorRegister($src$$reg), T_LONG,
                 (int)($idx$$constant), as_VectorRegister($tmp$$reg));
  %}
  ins_pipe(pipe_slow);
%}


instruct extractF(fRegF dst, vReg src, immI idx, vReg tmp)
%{
  match(Set dst (ExtractF src idx));
  effect(TEMP tmp);
  format %{ "extractF $dst, $src, $idx\t# KILL $tmp" %}
  ins_encode %{
    __ extract_fp_v($dst$$FloatRegister, as_VectorRegister($src$$reg), T_FLOAT,
                    (int)($idx$$constant), as_VectorRegister($tmp$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct extractD(fRegD dst, vReg src, immI idx, vReg tmp)
%{
  match(Set dst (ExtractD src idx));
  effect(TEMP tmp);
  format %{ "extractD $dst, $src, $idx\t# KILL $tmp" %}
  ins_encode %{
    __ extract_fp_v($dst$$FloatRegister, as_VectorRegister($src$$reg), T_DOUBLE,
                    (int)($idx$$constant), as_VectorRegister($tmp$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// ------------------------------ Compress/Expand Operations -------------------

instruct mcompress(vRegMask dst, vRegMask src, vReg tmp) %{
  match(Set dst (CompressM src));
  effect(TEMP tmp);
  format %{ "mcompress $dst, $src\t# KILL $tmp" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vid_v(as_VectorRegister($tmp$$reg));
    __ vcpop_m(t0, as_VectorRegister($src$$reg));
    __ vmsltu_vx(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), t0);
  %}
  ins_pipe(pipe_slow);
%}

instruct vcompress(vReg dst, vReg src, vRegMask_V0 v0) %{
  match(Set dst (CompressV src v0));
  effect(TEMP_DEF dst);
  format %{ "vcompress $dst, $src, $v0" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($dst$$reg),
               as_VectorRegister($dst$$reg));
    __ vcompress_vm(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
                    as_VectorRegister($v0$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct vexpand(vReg dst, vReg src, vRegMask_V0 v0, vReg tmp) %{
  match(Set dst (ExpandV src v0));
  effect(TEMP_DEF dst, TEMP tmp);
  format %{ "vexpand $dst, $src, $v0\t# KILL $tmp" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ viota_m(as_VectorRegister($tmp$$reg), as_VectorRegister($v0$$reg));
    __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($dst$$reg),
               as_VectorRegister($dst$$reg));
    __ vrgather_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
                   as_VectorRegister($tmp$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// ------------------------------ Vector signum --------------------------------

// Vector Math.signum

instruct vsignum_reg(vReg dst, vReg zero, vReg one, vRegMask_V0 v0) %{
  match(Set dst (SignumVF dst (Binary zero one)));
  match(Set dst (SignumVD dst (Binary zero one)));
  effect(TEMP_DEF dst, TEMP v0);
  format %{ "vsignum $dst, $dst\t" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ signum_fp_v(as_VectorRegister($dst$$reg), as_VectorRegister($one$$reg),
                   bt, Matcher::vector_length(this));
  %}
  ins_pipe(pipe_slow);
%}

// ---------------- Round float/double Vector Operations ----------------

instruct vround_f(vReg dst, vReg src, fRegF tmp, vRegMask_V0 v0) %{
  match(Set dst (RoundVF src));
  effect(TEMP_DEF dst, TEMP tmp, TEMP v0);
  format %{ "java_round_float_v $dst, $src\t" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    uint vector_length = Matcher::vector_length(this);
    __ java_round_float_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
                          as_FloatRegister($tmp$$reg), bt, vector_length);
  %}
  ins_pipe(pipe_slow);
%}

instruct vround_d(vReg dst, vReg src, fRegD tmp, vRegMask_V0 v0) %{
  match(Set dst (RoundVD src));
  effect(TEMP_DEF dst, TEMP tmp, TEMP v0);
  format %{ "java_round_double_v $dst, $src\t" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    uint vector_length = Matcher::vector_length(this);
    __ java_round_double_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
                           as_FloatRegister($tmp$$reg), bt, vector_length);
  %}
  ins_pipe(pipe_slow);
%}

// -------------------------------- Reverse Bits Vector Operations ------------------------

instruct vreverse_masked(vReg dst_src, vRegMask_V0 v0) %{
  match(Set dst_src (ReverseV dst_src v0));
  format %{ "vreverse_masked $dst_src, $dst_src, v0" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    uint vlen = Matcher::vector_length(this);
    __ vsetvli_helper(bt, vlen);
    __ vbrev_v(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vreverse(vReg dst, vReg src) %{
  match(Set dst (ReverseV src));
  format %{ "vreverse $dst, $src" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    uint vlen = Matcher::vector_length(this);
    __ vsetvli_helper(bt, vlen);
    __ vbrev_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// -------------------------------- Reverse Bytes Vector Operations ------------------------

instruct vreverse_bytes_masked(vReg dst_src, vRegMask_V0 v0) %{
  match(Set dst_src (ReverseBytesV dst_src v0));
  format %{ "vreverse_bytes_masked $dst_src, $dst_src, v0" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    uint vlen = Matcher::vector_length(this);
    __ vsetvli_helper(bt, vlen);
    __ vrev8_v(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vreverse_bytes(vReg dst, vReg src) %{
  match(Set dst (ReverseBytesV src));
  format %{ "vreverse_bytes $dst, $src" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    uint vlen = Matcher::vector_length(this);
    __ vsetvli_helper(bt, vlen);
    __ vrev8_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// ---------------- Convert Half Floating to Floating Vector Operations ----------------

// half precision -> single

instruct vconvHF2F(vReg dst, vReg src, vRegMask_V0 v0) %{
  predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
  match(Set dst (VectorCastHF2F src));
  effect(TEMP_DEF dst, TEMP v0);
  format %{ "vfwcvt.f.f.v $dst, $src\t# convert half to single precision" %}
  ins_encode %{
    __ float16_to_float_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
                          Matcher::vector_length(this));
  %}
  ins_pipe(pipe_slow);
%}

// single precision -> half

instruct vconvF2HF(vReg dst, vReg src, vReg vtmp, vRegMask_V0 v0, iRegINoSp tmp) %{
  predicate(Matcher::vector_element_basic_type(n) == T_SHORT);
  match(Set dst (VectorCastF2HF src));
  effect(TEMP_DEF dst, TEMP v0, TEMP vtmp, TEMP tmp);
  format %{ "vfncvt.f.f.w $dst, $src\t# convert single to half precision" %}
  ins_encode %{
    __ float_to_float16_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
                          as_VectorRegister($vtmp$$reg), $tmp$$Register,
                          Matcher::vector_length(this));
  %}
  ins_pipe(pipe_slow);
%}


// ------------------------------ Popcount vector ------------------------------

instruct vpopcount_masked(vReg dst_src, vRegMask_V0 v0) %{
  match(Set dst_src (PopCountVI dst_src v0));
  match(Set dst_src (PopCountVL dst_src v0));
  format %{ "vcpop_v $dst_src, $dst_src, $v0\t# vcpop_v with mask" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    uint vlen = Matcher::vector_length(this);
    __ vsetvli_helper(bt, vlen);
    __ vcpop_v(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vpopcount(vReg dst, vReg src) %{
  match(Set dst (PopCountVI src));
  match(Set dst (PopCountVL src));
  format %{ "vcpop_v $dst, $src\t# vcpop_v without mask" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    uint vlen = Matcher::vector_length(this);
    __ vsetvli_helper(bt, vlen);
    __ vcpop_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// ------------------------------ CountLeadingZerosV --------------------------

instruct vcountLeadingZeros_masked(vReg dst_src, vRegMask_V0 v0) %{
  match(Set dst_src (CountLeadingZerosV dst_src v0));
  format %{ "vcount_leading_zeros_masked $dst_src, $dst_src, v0" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    uint vlen = Matcher::vector_length(this);
    __ vsetvli_helper(bt, vlen);
    __ vclz_v(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vcountLeadingZeros(vReg dst, vReg src) %{
  match(Set dst (CountLeadingZerosV src));
  format %{ "vcount_leading_zeros $dst, $src" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    uint vlen = Matcher::vector_length(this);
    __ vsetvli_helper(bt, vlen);
    __ vclz_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// ------------------------------ CountTrailingZerosV --------------------------

instruct vcountTrailingZeros_masked(vReg dst_src, vRegMask_V0 v0) %{
  match(Set dst_src (CountTrailingZerosV dst_src v0));
  format %{ "vcount_trailing_zeros_masked $dst_src, $dst_src, v0" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    uint vlen = Matcher::vector_length(this);
    __ vsetvli_helper(bt, vlen);
    __ vctz_v(as_VectorRegister($dst_src$$reg), as_VectorRegister($dst_src$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct vcountTrailingZeros(vReg dst, vReg src) %{
  match(Set dst (CountTrailingZerosV src));
  format %{ "vcount_trailing_zeros $dst, $src" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    uint vlen = Matcher::vector_length(this);
    __ vsetvli_helper(bt, vlen);
    __ vctz_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// ------------------------------ Vector Load Gather ---------------------------

instruct gather_loadS(vReg dst, indirect mem, vReg idx) %{
  predicate(type2aelembytes(Matcher::vector_element_basic_type(n)) == 4);
  match(Set dst (LoadVectorGather mem idx));
  effect(TEMP_DEF dst);
  format %{ "gather_loadS $dst, $mem, $idx" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($idx$$reg), (int)sew);
    __ vluxei32_v(as_VectorRegister($dst$$reg), as_Register($mem$$base),
                  as_VectorRegister($dst$$reg));
 %}
  ins_pipe(pipe_slow);
%}

instruct gather_loadD(vReg dst, indirect mem, vReg idx) %{
  predicate(type2aelembytes(Matcher::vector_element_basic_type(n)) == 8);
  match(Set dst (LoadVectorGather mem idx));
  effect(TEMP_DEF dst);
  format %{ "gather_loadD $dst, $mem, $idx" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vzext_vf2(as_VectorRegister($dst$$reg), as_VectorRegister($idx$$reg));
    __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($dst$$reg), (int)sew);
    __ vluxei64_v(as_VectorRegister($dst$$reg), as_Register($mem$$base),
                  as_VectorRegister($dst$$reg));
 %}
  ins_pipe(pipe_slow);
%}

instruct gather_loadS_masked(vReg dst, indirect mem, vReg idx, vRegMask_V0 v0, vReg tmp) %{
  predicate(type2aelembytes(Matcher::vector_element_basic_type(n)) == 4);
  match(Set dst (LoadVectorGatherMasked mem (Binary idx v0)));
  effect(TEMP_DEF dst, TEMP tmp);
  format %{ "gather_loadS_masked $dst, $mem, $idx, $v0\t# KILL $tmp" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vsll_vi(as_VectorRegister($tmp$$reg), as_VectorRegister($idx$$reg), (int)sew);
    __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($dst$$reg),
               as_VectorRegister($dst$$reg));
    __ vluxei32_v(as_VectorRegister($dst$$reg), as_Register($mem$$base),
                  as_VectorRegister($tmp$$reg), Assembler::v0_t);
 %}
  ins_pipe(pipe_slow);
%}

instruct gather_loadD_masked(vReg dst, indirect mem, vReg idx, vRegMask_V0 v0, vReg tmp) %{
  predicate(type2aelembytes(Matcher::vector_element_basic_type(n)) == 8);
  match(Set dst (LoadVectorGatherMasked mem (Binary idx v0)));
  effect(TEMP_DEF dst, TEMP tmp);
  format %{ "gather_loadD_masked $dst, $mem, $idx, $v0\t# KILL $tmp" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vzext_vf2(as_VectorRegister($tmp$$reg), as_VectorRegister($idx$$reg));
    __ vsll_vi(as_VectorRegister($tmp$$reg), as_VectorRegister($tmp$$reg), (int)sew);
    __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($dst$$reg),
               as_VectorRegister($dst$$reg));
    __ vluxei64_v(as_VectorRegister($dst$$reg), as_Register($mem$$base),
                  as_VectorRegister($tmp$$reg), Assembler::v0_t);
 %}
  ins_pipe(pipe_slow);
%}

// ------------------------------ Vector Store Scatter -------------------------

instruct scatter_storeS(indirect mem, vReg src, vReg idx, vReg tmp) %{
  predicate(type2aelembytes(Matcher::vector_element_basic_type(n->in(3)->in(1))) == 4);
  match(Set mem (StoreVectorScatter mem (Binary src idx)));
  effect(TEMP tmp);
  format %{ "scatter_storeS $mem, $idx, $src\t# KILL $tmp" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this, $src);
    Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
    __ vsetvli_helper(bt, Matcher::vector_length(this, $src));
    __ vsll_vi(as_VectorRegister($tmp$$reg), as_VectorRegister($idx$$reg), (int)sew);
    __ vsuxei32_v(as_VectorRegister($src$$reg), as_Register($mem$$base),
                  as_VectorRegister($tmp$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct scatter_storeD(indirect mem, vReg src, vReg idx, vReg tmp) %{
  predicate(type2aelembytes(Matcher::vector_element_basic_type(n->in(3)->in(1))) == 8);
  match(Set mem (StoreVectorScatter mem (Binary src idx)));
  effect(TEMP tmp);
  format %{ "scatter_storeD $mem, $idx, $src\t# KILL $tmp" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this, $src);
    Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
    __ vsetvli_helper(bt, Matcher::vector_length(this, $src));
    __ vzext_vf2(as_VectorRegister($tmp$$reg), as_VectorRegister($idx$$reg));
    __ vsll_vi(as_VectorRegister($tmp$$reg), as_VectorRegister($tmp$$reg), (int)sew);
    __ vsuxei64_v(as_VectorRegister($src$$reg), as_Register($mem$$base),
                  as_VectorRegister($tmp$$reg));
  %}
  ins_pipe(pipe_slow);
%}

instruct scatter_storeS_masked(indirect mem, vReg src, vReg idx, vRegMask_V0 v0, vReg tmp) %{
  predicate(type2aelembytes(Matcher::vector_element_basic_type(n->in(3)->in(1))) == 4);
  match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx v0))));
  effect(TEMP tmp);
  format %{ "scatter_storeS_masked $mem, $idx, $src, $v0\t# KILL $tmp" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this, $src);
    Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
    __ vsetvli_helper(bt, Matcher::vector_length(this, $src));
    __ vsll_vi(as_VectorRegister($tmp$$reg), as_VectorRegister($idx$$reg), (int)sew);
    __ vsuxei32_v(as_VectorRegister($src$$reg), as_Register($mem$$base),
                  as_VectorRegister($tmp$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

instruct scatter_storeD_masked(indirect mem, vReg src, vReg idx, vRegMask_V0 v0, vReg tmp) %{
  predicate(type2aelembytes(Matcher::vector_element_basic_type(n->in(3)->in(1))) == 8);
  match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx v0))));
  effect(TEMP tmp);
  format %{ "scatter_storeD_masked $mem, $idx, $src, $v0\t# KILL $tmp" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this, $src);
    Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
    __ vsetvli_helper(bt, Matcher::vector_length(this, $src));
    __ vzext_vf2(as_VectorRegister($tmp$$reg), as_VectorRegister($idx$$reg));
    __ vsll_vi(as_VectorRegister($tmp$$reg), as_VectorRegister($tmp$$reg), (int)sew);
    __ vsuxei64_v(as_VectorRegister($src$$reg), as_Register($mem$$base),
                  as_VectorRegister($tmp$$reg), Assembler::v0_t);
  %}
  ins_pipe(pipe_slow);
%}

// ------------------------------ Populate Index to a Vector -------------------

instruct populateindex(vReg dst, iRegIorL2I src1, iRegIorL2I src2, vReg tmp) %{
  match(Set dst (PopulateIndex src1 src2));
  effect(TEMP_DEF dst, TEMP tmp);
  format %{ "populateindex $dst, $src1, $src2\t# KILL $tmp" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    Assembler::SEW sew = Assembler::elemtype_to_sew(bt);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src1$$reg));
    __ vid_v(as_VectorRegister($tmp$$reg));
    __ vmacc_vx(as_VectorRegister($dst$$reg), as_Register($src2$$reg), as_VectorRegister($tmp$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// ------------------------------ Vector insert --------------------------------

// BYTE, SHORT, INT

instruct insert_index_lt32(vReg dst, vReg src, iRegIorL2I val, immI idx, vRegMask_V0 v0) %{
  predicate(n->in(2)->get_int() < 32 &&
            (Matcher::vector_element_basic_type(n) == T_BYTE ||
             Matcher::vector_element_basic_type(n) == T_SHORT ||
             Matcher::vector_element_basic_type(n) == T_INT));
  match(Set dst (VectorInsert (Binary src val) idx));
  effect(TEMP v0);
  format %{ "insert_index_lt32 $dst, $src, $val, $idx" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vid_v(as_VectorRegister($v0$$reg));
    __ vadd_vi(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg), -16);
    __ vmseq_vi(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg), (int)($idx$$constant) - 16);
    __ vmerge_vxm(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), $val$$Register);
  %}
  ins_pipe(pipe_slow);
%}

instruct insert_index(vReg dst, vReg src, iRegIorL2I val, iRegIorL2I idx, vReg tmp, vRegMask_V0 v0) %{
  predicate(n->in(2)->get_int() >= 32 &&
            (Matcher::vector_element_basic_type(n) == T_BYTE ||
             Matcher::vector_element_basic_type(n) == T_SHORT ||
             Matcher::vector_element_basic_type(n) == T_INT));
  match(Set dst (VectorInsert (Binary src val) idx));
  effect(TEMP tmp, TEMP v0);
  format %{ "insert_index $dst, $src, $val, $idx\t# KILL $tmp" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vid_v(as_VectorRegister($v0$$reg));
    __ vmv_v_x(as_VectorRegister($tmp$$reg), $idx$$Register);
    __ vmseq_vv(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg), as_VectorRegister($tmp$$reg));
    __ vmerge_vxm(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), $val$$Register);
  %}
  ins_pipe(pipe_slow);
%}

// LONG

instruct insertL_index_lt32(vReg dst, vReg src, iRegL val, immI idx, vRegMask_V0 v0) %{
  predicate(n->in(2)->get_int() < 32 &&
            (Matcher::vector_element_basic_type(n) == T_LONG));
  match(Set dst (VectorInsert (Binary src val) idx));
  effect(TEMP v0);
  format %{ "insertL_index_lt32 $dst, $src, $val, $idx" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vid_v(as_VectorRegister($v0$$reg));
    __ vadd_vi(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg), -16);
    __ vmseq_vi(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg), (int)($idx$$constant) - 16);
    __ vmerge_vxm(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), $val$$Register);
  %}
  ins_pipe(pipe_slow);
%}

instruct insertL_index(vReg dst, vReg src, iRegL val, iRegIorL2I idx, vReg tmp, vRegMask_V0 v0) %{
  predicate(n->in(2)->get_int() >= 32 &&
            (Matcher::vector_element_basic_type(n) == T_LONG));
  match(Set dst (VectorInsert (Binary src val) idx));
  effect(TEMP tmp, TEMP v0);
  format %{ "insertL_index $dst, $src, $val, $idx\t# KILL $tmp" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this);
    __ vsetvli_helper(bt, Matcher::vector_length(this));
    __ vid_v(as_VectorRegister($v0$$reg));
    __ vmv_v_x(as_VectorRegister($tmp$$reg), $idx$$Register);
    __ vmseq_vv(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg), as_VectorRegister($tmp$$reg));
    __ vmerge_vxm(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), $val$$Register);
  %}
  ins_pipe(pipe_slow);
%}

// FLOAT

instruct insertF_index_lt32(vReg dst, vReg src, fRegF val, immI idx, vRegMask_V0 v0) %{
  predicate(n->in(2)->get_int() < 32 &&
            (Matcher::vector_element_basic_type(n) == T_FLOAT));
  match(Set dst (VectorInsert (Binary src val) idx));
  effect(TEMP v0);
  format %{ "insertF_index_lt32 $dst, $src, $val, $idx" %}
  ins_encode %{
    __ vsetvli_helper(T_FLOAT, Matcher::vector_length(this));
    __ vid_v(as_VectorRegister($v0$$reg));
    __ vadd_vi(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg), -16);
    __ vmseq_vi(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg), (int)($idx$$constant) - 16);
    __ vfmerge_vfm(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), $val$$FloatRegister);
  %}
  ins_pipe(pipe_slow);
%}

instruct insertF_index(vReg dst, vReg src, fRegF val, iRegIorL2I idx, vReg tmp, vRegMask_V0 v0) %{
  predicate(n->in(2)->get_int() >= 32 &&
            (Matcher::vector_element_basic_type(n) == T_FLOAT));
  match(Set dst (VectorInsert (Binary src val) idx));
  effect(TEMP tmp, TEMP v0);
  format %{ "insertF_index $dst, $src, $val, $idx\t# KILL $tmp" %}
  ins_encode %{
    __ vsetvli_helper(T_FLOAT, Matcher::vector_length(this));
    __ vid_v(as_VectorRegister($v0$$reg));
    __ vmv_v_x(as_VectorRegister($tmp$$reg), $idx$$Register);
    __ vmseq_vv(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg), as_VectorRegister($tmp$$reg));
    __ vfmerge_vfm(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), $val$$FloatRegister);
  %}
  ins_pipe(pipe_slow);
%}

// DOUBLE

instruct insertD_index_lt32(vReg dst, vReg src, fRegD val, immI idx, vRegMask_V0 v0) %{
  predicate(n->in(2)->get_int() < 32 &&
            (Matcher::vector_element_basic_type(n) == T_DOUBLE));
  match(Set dst (VectorInsert (Binary src val) idx));
  effect(TEMP v0);
  format %{ "insertD_index_lt32 $dst, $src, $val, $idx" %}
  ins_encode %{
    __ vsetvli_helper(T_DOUBLE, Matcher::vector_length(this));
    __ vid_v(as_VectorRegister($v0$$reg));
    __ vadd_vi(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg), -16);
    __ vmseq_vi(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg), (int)($idx$$constant) - 16);
    __ vfmerge_vfm(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), $val$$FloatRegister);
  %}
  ins_pipe(pipe_slow);
%}

instruct insertD_index(vReg dst, vReg src, fRegD val, iRegIorL2I idx, vReg tmp, vRegMask_V0 v0) %{
  predicate(n->in(2)->get_int() >= 32 &&
            (Matcher::vector_element_basic_type(n) == T_DOUBLE));
  match(Set dst (VectorInsert (Binary src val) idx));
  effect(TEMP tmp, TEMP v0);
  format %{ "insertD_index $dst, $src, $val, $idx\t# KILL $tmp" %}
  ins_encode %{
    __ vsetvli_helper(T_DOUBLE, Matcher::vector_length(this));
    __ vid_v(as_VectorRegister($v0$$reg));
    __ vmv_v_x(as_VectorRegister($tmp$$reg), $idx$$Register);
    __ vmseq_vv(as_VectorRegister($v0$$reg), as_VectorRegister($v0$$reg), as_VectorRegister($tmp$$reg));
    __ vfmerge_vfm(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), $val$$FloatRegister);
  %}
  ins_pipe(pipe_slow);
%}

// ------------------------------ Vector mask reductions -----------------------

// true count

instruct vmask_truecount(iRegINoSp dst, vRegMask src) %{
  match(Set dst (VectorMaskTrueCount src));
  format %{ "vmask_truecount $dst, $src" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this, $src);
    __ vsetvli_helper(bt, Matcher::vector_length(this, $src));
    __ vcpop_m($dst$$Register, as_VectorRegister($src$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// first true

// Return the index of the first mask lane that is set, or vector length if none of
// them are set.

instruct vmask_firsttrue(iRegINoSp dst, vRegMask src, vRegMask tmp) %{
  match(Set dst (VectorMaskFirstTrue src));
  effect(TEMP tmp);
  format %{ "vmask_firsttrue $dst, $src\t# KILL $tmp" %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this, $src);
    __ vsetvli_helper(bt, Matcher::vector_length(this, $src));
    __ vmsbf_m(as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg));
    __ vcpop_m($dst$$Register, as_VectorRegister($tmp$$reg));
  %}
  ins_pipe(pipe_slow);
%}

// last true

// Return the index of the first last lane that is set, or -1 if none of
// them are set.

instruct vmask_lasttrue(iRegINoSp dst, vRegMask src) %{
  match(Set dst (VectorMaskLastTrue src));
  format %{ "vmask_lasttrue $dst, $src" %}
  ins_encode %{
    uint vector_length = Matcher::vector_length(this, $src);
    assert(UseZbb && vector_length <= XLEN, "precondition");
    __ vsetvli_helper(T_LONG, 1);
    __ vmv_x_s($dst$$Register, as_VectorRegister($src$$reg));
    if (XLEN != vector_length) {
      __ slli($dst$$Register, $dst$$Register, XLEN - vector_length);
      __ srli($dst$$Register, $dst$$Register, XLEN - vector_length);
    }
    __ clz($dst$$Register, $dst$$Register);
    __ mv(t0, XLEN - 1);
    __ sub($dst$$Register, t0, $dst$$Register);
  %}
  ins_pipe(pipe_slow);
%}

// tolong

instruct vmask_tolong(iRegLNoSp dst, vRegMask src) %{
  match(Set dst (VectorMaskToLong src));
  format %{ "vmask_tolong $dst, $src" %}
  ins_encode %{
    uint vector_length = Matcher::vector_length(this, $src);
    assert(vector_length <= XLEN, "precondition");
    __ vsetvli_helper(T_LONG, 1);
    __ vmv_x_s($dst$$Register, as_VectorRegister($src$$reg));
    if (XLEN != vector_length) {
      __ slli($dst$$Register, $dst$$Register, XLEN - vector_length);
      __ srli($dst$$Register, $dst$$Register, XLEN - vector_length);
    }
  %}
  ins_pipe(pipe_slow);
%}

// fromlong

instruct vmask_fromlong(vRegMask dst, iRegL src) %{
  match(Set dst (VectorLongToMask src));
  format %{ "vmask_fromlong $dst, $src" %}
  ins_encode %{
    assert(Matcher::vector_length(this) <= XLEN, "precondition");
    __ vsetvli_helper(T_LONG, 1);
    __ vmv_s_x(as_VectorRegister($dst$$reg), $src$$Register);
  %}
  ins_pipe(pipe_slow);
%}

// ------------------------------ VectorTest -----------------------------------

// anytrue

// Not matched. Condition is negated and value zero is moved to the right side in CMoveINode::Ideal.

// instruct cmovI_vtest_anytrue(iRegINoSp dst, cmpOp cop, vRegMask op1, vRegMask op2, immI0 zero, immI_1 one) %{
//   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne &&
//             static_cast<const VectorTestNode*>(n->in(1)->in(2))->get_predicate() == BoolTest::ne);
//   match(Set dst (CMoveI (Binary cop (VectorTest op1 op2)) (Binary zero one)));
//   format %{ "CMove $dst, (vectortest $cop $op1 $op2), zero, one\t#@cmovI_vtest_anytrue"  %}
//   ins_encode %{
//     BasicType bt = Matcher::vector_element_basic_type(this, $op1);
//     uint vector_length = Matcher::vector_length(this, $op1);
//     __ vsetvli_helper(bt, vector_length);
//     __ vcpop_m($dst$$Register, as_VectorRegister($op1$$reg));
//     __ snez($dst$$Register, $dst$$Register);
//   %}
//   ins_pipe(pipe_slow);
// %}

instruct cmovI_vtest_anytrue_negate(iRegINoSp dst, cmpOp cop, vRegMask op1, vRegMask op2, immI0 zero, immI_1 one) %{
  predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq &&
            static_cast<const VectorTestNode*>(n->in(1)->in(2))->get_predicate() == BoolTest::ne);
  match(Set dst (CMoveI (Binary cop (VectorTest op1 op2)) (Binary one zero)));
  format %{ "CMove $dst, (vectortest $cop $op1 $op2), zero, one\t#@cmovI_vtest_anytrue_negate"  %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this, $op1);
    uint vector_length = Matcher::vector_length(this, $op1);
    __ vsetvli_helper(bt, vector_length);
    __ vcpop_m($dst$$Register, as_VectorRegister($op1$$reg));
    __ snez($dst$$Register, $dst$$Register);
  %}
  ins_pipe(pipe_slow);
%}

// alltrue

// Not matched. Condition is negated and value zero is moved to the right side in CMoveINode::Ideal.

// instruct cmovI_vtest_alltrue(iRegINoSp dst, cmpOp cop, vRegMask op1, vRegMask op2, immI0 zero, immI_1 one) %{
//   predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::eq &&
//             static_cast<const VectorTestNode*>(n->in(1)->in(2))->get_predicate() == BoolTest::overflow);
//   match(Set dst (CMoveI (Binary cop (VectorTest op1 op2)) (Binary zero one)));
//   format %{ "CMove $dst, (vectortest $cop $op1 $op2), zero, one\t#@cmovI_vtest_alltrue"  %}
//   ins_encode %{
//     BasicType bt = Matcher::vector_element_basic_type(this, $op1);
//     uint vector_length = Matcher::vector_length(this, $op1);
//     __ vsetvli_helper(bt, vector_length);
//     __ vcpop_m($dst$$Register, as_VectorRegister($op1$$reg));
//     __ sub($dst$$Register, $dst$$Register, vector_length);
//     __ seqz($dst$$Register, $dst$$Register);
//   %}
//   ins_pipe(pipe_slow);
// %}

instruct cmovI_vtest_alltrue_negate(iRegINoSp dst, cmpOp cop, vRegMask op1, vRegMask op2, immI0 zero, immI_1 one) %{
  predicate(n->in(1)->in(1)->as_Bool()->_test._test == BoolTest::ne &&
            static_cast<const VectorTestNode*>(n->in(1)->in(2))->get_predicate() == BoolTest::overflow);
  match(Set dst (CMoveI (Binary cop (VectorTest op1 op2)) (Binary one zero)));
  format %{ "CMove $dst, (vectortest $cop $op1 $op2), zero, one\t#@cmovI_vtest_alltrue_negate"  %}
  ins_encode %{
    BasicType bt = Matcher::vector_element_basic_type(this, $op1);
    uint vector_length = Matcher::vector_length(this, $op1);
    __ vsetvli_helper(bt, vector_length);
    __ vcpop_m($dst$$Register, as_VectorRegister($op1$$reg));
    __ sub($dst$$Register, $dst$$Register, vector_length);
    __ seqz($dst$$Register, $dst$$Register);
  %}
  ins_pipe(pipe_slow);
%}

// anytrue

instruct vtest_anytrue_branch(cmpOpEqNe cop, vRegMask op1, vRegMask op2, label lbl) %{
  predicate(static_cast<const VectorTestNode*>(n->in(2))->get_predicate() == BoolTest::ne);
  match(If cop (VectorTest op1 op2));
  effect(USE lbl);
  format %{ "b$cop (vectortest ne $op1, $op2) $lbl\t#@vtest_anytrue_branch" %}
  ins_encode %{
    uint vector_length = Matcher::vector_length(this, $op1);
    BasicType bt = Matcher::vector_element_basic_type(this, $op1);
    __ vsetvli_helper(bt, vector_length);
    __ vcpop_m(t0, as_VectorRegister($op1$$reg));
    __ enc_cmpEqNe_imm0_branch($cop$$cmpcode, t0, *($lbl$$label), /* is_far */ true);
  %}
  ins_pipe(pipe_slow);
%}

// alltrue

instruct vtest_alltrue_branch(cmpOpEqNe cop, vRegMask op1, vRegMask op2, label lbl) %{
  predicate(static_cast<const VectorTestNode*>(n->in(2))->get_predicate() == BoolTest::overflow);
  match(If cop (VectorTest op1 op2));
  effect(USE lbl);
  format %{ "b$cop (vectortest overflow $op1, $op2) $lbl\t#@vtest_alltrue_branch" %}
  ins_encode %{
    uint vector_length = Matcher::vector_length(this, $op1);
    BasicType bt = Matcher::vector_element_basic_type(this, $op1);
    __ vsetvli_helper(bt, vector_length);
    __ vcpop_m(t0, as_VectorRegister($op1$$reg));
    __ sub(t0, t0, vector_length);
    __ enc_cmpEqNe_imm0_branch($cop$$cmpcode, t0, *($lbl$$label), /* is_far */ true);
  %}
  ins_pipe(pipe_slow);
%}
